Files
unity/crnlib/crn_dxt_hc.h
T
Alexander Suvorov b8349dfac8 Use block encoding to store intermediate selectors after endpoint quantization
This change simplifies further modification of the code.

Explanation:
This change is required for further optimization of the quantization code.

Testing:
The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch.

[Compressing Kodak set without mipmaps]
Original: 1582222 bytes / 28.935 sec
Modified: 1494501 bytes / 24.528 sec
Improvement: 5.54% (compression ratio) / 15.23% (compression time)

[Compressing Kodak set with mipmaps]
Original: 2065243 bytes / 36.982 sec
Modified: 1945365 bytes / 32.308 sec
Improvement: 5.80% (compression ratio) / 12.64% (compression time)
2017-05-18 13:44:04 +02:00

397 lines
13 KiB
C++

// File: crn_dxt_hc.h
// See Copyright Notice and license at the end of inc/crnlib.h
#pragma once
#include "crn_dxt1.h"
#include "crn_dxt5a.h"
#include "crn_dxt_endpoint_refiner.h"
#include "crn_image.h"
#include "crn_dxt.h"
#include "crn_image.h"
#include "crn_dxt_hc_common.h"
#include "crn_tree_clusterizer.h"
#include "crn_threading.h"
#define CRN_NO_FUNCTION_DEFINITIONS
#include "../inc/crnlib.h"
namespace crnlib {
const uint cTotalCompressionPhases = 25;
class dxt_hc {
public:
dxt_hc();
~dxt_hc();
struct endpoint_indices_details {
union {
struct {
uint16 color;
uint16 alpha0;
uint16 alpha1;
};
uint16 component[3];
};
uint8 reference;
};
struct selector_indices_details {
union {
struct {
uint16 color;
uint16 alpha0;
uint16 alpha1;
};
uint16 component[3];
};
};
struct chunk_details {
uint block_index[2][2];
};
crnlib::vector<chunk_details> m_chunk_details;
crnlib::vector<uint64> m_block_selectors[3];
crnlib::vector<crnlib::vector<color_quad_u8>> m_blocks;
crnlib::vector<endpoint_indices_details> m_endpoint_indices;
struct pixel_chunk {
pixel_chunk() { clear(); }
dxt_pixel_block m_blocks[cChunkBlockHeight][cChunkBlockWidth];
const color_quad_u8& operator()(uint cx, uint cy) const {
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
}
color_quad_u8& operator()(uint cx, uint cy) {
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
}
inline void clear() {
utils::zero_object(*this);
m_weight = 1.0f;
}
float m_weight;
uint m_legacy_index;
};
typedef crnlib::vector<pixel_chunk> pixel_chunk_vec;
struct params {
params()
: m_color_endpoint_codebook_size(3072),
m_color_selector_codebook_size(3072),
m_alpha_endpoint_codebook_size(3072),
m_alpha_selector_codebook_size(3072),
m_adaptive_tile_color_psnr_derating(2.0f), // was 3.4f
m_adaptive_tile_alpha_psnr_derating(2.0f),
m_adaptive_tile_color_alpha_weighting_ratio(3.0f),
m_num_levels(0),
m_format(cDXT1),
m_hierarchical(true),
m_perceptual(true),
m_debugging(false),
m_pProgress_func(NULL),
m_pProgress_func_data(NULL) {
m_alpha_component_indices[0] = 3;
m_alpha_component_indices[1] = 0;
for (uint i = 0; i < cCRNMaxLevels; i++) {
m_levels[i].m_first_chunk = 0;
m_levels[i].m_num_chunks = 0;
}
}
// Valid range for codebook sizes: [32,8192] (non-power of two values are okay)
uint m_color_endpoint_codebook_size;
uint m_color_selector_codebook_size;
uint m_alpha_endpoint_codebook_size;
uint m_alpha_selector_codebook_size;
// Higher values cause fewer 8x4, 4x8, and 4x4 blocks to be utilized less often (lower quality/smaller files).
// Lower values cause the encoder to use large tiles less often (better quality/larger files).
// Valid range: [0.0,100.0].
// A value of 0 will cause the encoder to only use tiles larger than 4x4 if doing so would incur to quality loss.
float m_adaptive_tile_color_psnr_derating;
float m_adaptive_tile_alpha_psnr_derating;
float m_adaptive_tile_color_alpha_weighting_ratio;
uint m_alpha_component_indices[2];
struct miplevel_desc {
uint m_first_chunk;
uint m_num_chunks;
uint m_chunk_width;
};
// The mip level data is optional!
miplevel_desc m_levels[cCRNMaxLevels];
uint m_num_levels;
crnlib::vector<endpoint_indices_details> *m_endpoint_indices;
crnlib::vector<selector_indices_details> *m_selector_indices;
dxt_format m_format;
// If m_hierarchical is false, only 4x4 blocks will be used by the encoder (leading to higher quality/larger files).
bool m_hierarchical;
// If m_perceptual is true, perceptual color metrics will be used by the encoder.
bool m_perceptual;
bool m_debugging;
crn_progress_callback_func m_pProgress_func;
void* m_pProgress_func_data;
};
void clear();
// Main compression function
bool compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool);
// Output accessors
inline uint get_num_chunks() const { return m_num_chunks; }
struct selectors {
selectors() { utils::zero_object(*this); }
uint8 m_selectors[cBlockPixelHeight][cBlockPixelWidth];
uint8 get_by_index(uint i) const {
CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight));
const uint8* p = (const uint8*)m_selectors;
return *(p + i);
}
void set_by_index(uint i, uint v) {
CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight));
uint8* p = (uint8*)m_selectors;
*(p + i) = static_cast<uint8>(v);
}
};
typedef crnlib::vector<selectors> selectors_vec;
// Color endpoints
inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); }
inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; }
const crnlib::vector<uint>& get_color_endpoint_vec() const { return m_color_endpoints; }
// Color selectors
uint get_color_selector_codebook_size() const { return m_color_selectors.size(); }
const selectors& get_color_selectors(uint codebook_index) const { return m_color_selectors[codebook_index]; }
const crnlib::vector<selectors>& get_color_selectors_vec() const { return m_color_selectors; }
// Alpha endpoints
inline uint get_alpha_endpoint_codebook_size() const { return m_alpha_endpoints.size(); }
inline uint get_alpha_endpoint(uint codebook_index) const { return m_alpha_endpoints[codebook_index]; }
const crnlib::vector<uint>& get_alpha_endpoint_vec() const { return m_alpha_endpoints; }
// Alpha selectors
uint get_alpha_selector_codebook_size() const { return m_alpha_selectors.size(); }
const selectors& get_alpha_selectors(uint codebook_index) const { return m_alpha_selectors[codebook_index]; }
const crnlib::vector<selectors>& get_alpha_selectors_vec() const { return m_alpha_selectors; }
private:
params m_params;
uint m_num_chunks;
const pixel_chunk* m_pChunks;
uint m_num_alpha_blocks; // 0, 1, or 2
bool m_has_color_blocks;
bool m_has_alpha0_blocks;
bool m_has_alpha1_blocks;
struct compressed_tile {
uint m_endpoint_cluster_index;
uint m_first_endpoint;
uint m_second_endpoint;
uint8 m_pixel_width;
uint8 m_pixel_height;
uint8 m_layout_index;
};
struct compressed_chunk {
compressed_chunk() { utils::zero_object(*this); }
uint8 m_encoding_index;
uint8 m_num_tiles;
compressed_tile m_tiles[cChunkMaxTiles];
compressed_tile m_quantized_tiles[cChunkMaxTiles];
uint16 m_endpoint_cluster_index[cChunkMaxTiles];
uint16 m_selector_cluster_index[cChunkBlockHeight][cChunkBlockWidth];
};
typedef crnlib::vector<compressed_chunk> compressed_chunk_vec;
enum {
cColorChunks = 0,
cAlpha0Chunks = 1,
cAlpha1Chunks = 2,
cNumCompressedChunkVecs = 3
};
compressed_chunk_vec m_compressed_chunks[cNumCompressedChunkVecs];
volatile atomic32_t m_encoding_hist[cNumChunkEncodings];
atomic32_t m_total_tiles;
void compress_dxt1_block(
dxt1_endpoint_optimizer::results& results,
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
uint8* pSelectors);
void compress_dxt5_block(
dxt5_endpoint_optimizer::results& results,
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
uint8* pAlpha_selectors);
void determine_compressed_chunks_task(uint64 data, void* pData_ptr);
bool determine_compressed_chunks();
struct tile_cluster {
tile_cluster()
: m_first_endpoint(0), m_second_endpoint(0), m_error(0) {}
// first = chunk, second = tile
// if an alpha tile, second's upper 16 bits contains the alpha index (0 or 1)
crnlib::vector<std::pair<uint, uint> > m_tiles;
crnlib::vector<color_quad_u8> m_pixels;
crnlib::vector<uint8> m_selectors;
struct {
bool result;
uint first_endpoint;
uint second_endpoint;
uint64 error;
} m_refined;
uint m_first_endpoint;
uint m_second_endpoint;
uint64 m_error;
};
typedef crnlib::vector<tile_cluster> tile_cluster_vec;
tile_cluster_vec m_color_clusters;
tile_cluster_vec m_alpha_clusters;
selectors_vec m_color_selectors;
selectors_vec m_alpha_selectors;
// For each selector, this array indicates every chunk/tile/tile block that use this color selector.
struct block_id {
block_id() { utils::zero_object(*this); }
block_id(uint chunk_index, uint alpha_index, uint tile_index, uint block_x, uint block_y)
: m_chunk_index(chunk_index), m_alpha_index((uint8)alpha_index), m_tile_index((uint8)tile_index), m_block_x((uint8)block_x), m_block_y((uint8)block_y) {}
uint m_chunk_index;
uint8 m_alpha_index;
uint8 m_tile_index;
uint8 m_block_x;
uint8 m_block_y;
};
typedef crnlib::vector<crnlib::vector<block_id> > chunk_blocks_using_selectors_vec;
chunk_blocks_using_selectors_vec m_chunk_blocks_using_color_selectors;
chunk_blocks_using_selectors_vec m_chunk_blocks_using_alpha_selectors; // second's upper 16 bits contain alpha index!
crnlib::vector<uint> m_color_endpoints; // not valid until end, only for user access
crnlib::vector<uint> m_alpha_endpoints; // not valid until end, only for user access
crn_thread_id_t m_main_thread_id;
bool m_canceled;
task_pool* m_pTask_pool;
int m_prev_phase_index;
int m_prev_percentage_complete;
typedef vec<6, float> vec6F;
typedef vec<16, float> vec16F;
typedef tree_clusterizer<vec2F> vec2F_tree_vq;
typedef tree_clusterizer<vec6F> vec6F_tree_vq;
typedef tree_clusterizer<vec16F> vec16F_tree_vq;
struct assign_color_endpoint_clusters_state {
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(assign_color_endpoint_clusters_state);
assign_color_endpoint_clusters_state(vec6F_tree_vq& vq, crnlib::vector<crnlib::vector<vec6F> >& training_vecs)
: m_vq(vq), m_training_vecs(training_vecs) {}
vec6F_tree_vq& m_vq;
crnlib::vector<crnlib::vector<vec6F> >& m_training_vecs;
};
struct create_selector_codebook_state {
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(create_selector_codebook_state);
create_selector_codebook_state(dxt_hc& hc, bool alpha_blocks, uint comp_index_start, uint comp_index_end, vec16F_tree_vq& selector_vq, chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors, selectors_vec& selectors_cb)
: m_hc(hc),
m_alpha_blocks(alpha_blocks),
m_comp_index_start(comp_index_start),
m_comp_index_end(comp_index_end),
m_selector_vq(selector_vq),
m_chunk_blocks_using_selectors(chunk_blocks_using_selectors),
m_selectors_cb(selectors_cb) {
}
dxt_hc& m_hc;
bool m_alpha_blocks;
uint m_comp_index_start;
uint m_comp_index_end;
vec16F_tree_vq& m_selector_vq;
chunk_blocks_using_selectors_vec& m_chunk_blocks_using_selectors;
selectors_vec& m_selectors_cb;
mutable spinlock m_chunk_blocks_using_selectors_lock;
};
void assign_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
bool determine_color_endpoint_clusters();
struct determine_alpha_endpoint_clusters_state {
vec2F_tree_vq m_vq;
crnlib::vector<crnlib::vector<vec2F> > m_training_vecs[2];
};
void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
bool determine_alpha_endpoint_clusters();
void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr);
bool determine_color_endpoint_codebook();
void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr);
bool determine_alpha_endpoint_codebook();
void create_selector_codebook_task(uint64 data, void* pData_ptr);
bool create_selector_codebook(bool alpha_blocks);
bool refine_quantized_color_endpoints();
bool refine_quantized_color_selectors();
bool refine_quantized_alpha_endpoints();
bool refine_quantized_alpha_selectors();
bool initialize_blocks(const params& p);
bool create_block_encodings(const params& p);
bool update_progress(uint phase_index, uint subphase_index, uint subphase_total);
bool compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks);
};
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::pixel_chunk);
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors);
} // namespace crnlib