Switch from chunk encoding to block encoding while performing image quantization
This change improves compression speed and simplifies further modification of the code. Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch. [Compressing Kodak set without mipmaps] Original: 1582222 bytes / 28.947 sec Modified: 1494501 bytes / 17.642 sec Improvement: 5.54% (compression ratio) / 39.05% (compression time) [Compressing Kodak set with mipmaps] Original: 2065243 bytes / 36.965 sec Modified: 1945365 bytes / 22.989 sec Improvement: 5.80% (compression ratio) / 37.81% (compression time)
This commit is contained in:
Binary file not shown.
+30
-60
@@ -841,50 +841,6 @@ bool crn_comp::alias_images() {
|
||||
return true;
|
||||
}
|
||||
|
||||
void crn_comp::append_chunks(const image_u8& img, uint num_chunks_x, uint num_chunks_y, dxt_hc::pixel_chunk_vec& chunks, float weight) {
|
||||
for (uint y = 0; y < num_chunks_y; y++) {
|
||||
for (uint legacy_index = chunks.size(), x = 0; x < num_chunks_x; x++) {
|
||||
chunks.resize(chunks.size() + 1);
|
||||
|
||||
dxt_hc::pixel_chunk& chunk = chunks.back();
|
||||
chunk.m_weight = weight;
|
||||
chunk.m_legacy_index = legacy_index + (y & 1 ? num_chunks_x - 1 - x : x);
|
||||
|
||||
for (uint cy = 0; cy < cChunkPixelHeight; cy++) {
|
||||
uint py = y * cChunkPixelHeight + cy;
|
||||
py = math::minimum(py, img.get_height() - 1);
|
||||
|
||||
for (uint cx = 0; cx < cChunkPixelWidth; cx++) {
|
||||
uint px = x * cChunkPixelWidth + cx;
|
||||
px = math::minimum(px, img.get_width() - 1);
|
||||
|
||||
chunk(cx, cy) = img(px, py);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void crn_comp::create_chunks() {
|
||||
m_chunks.reserve(m_total_chunks);
|
||||
m_chunks.resize(0);
|
||||
|
||||
for (uint level = 0; level < m_pParams->m_levels; level++) {
|
||||
for (uint face = 0; face < m_pParams->m_faces; face++) {
|
||||
if (!face) {
|
||||
CRNLIB_ASSERT(m_levels[level].m_first_chunk == m_chunks.size());
|
||||
}
|
||||
|
||||
float mip_weight = math::minimum(12.0f, powf(1.3f, static_cast<float>(level)));
|
||||
//float mip_weight = 1.0f;
|
||||
|
||||
append_chunks(m_images[face][level], m_levels[level].m_chunk_width, m_levels[level].m_chunk_height, m_chunks, mip_weight);
|
||||
}
|
||||
}
|
||||
|
||||
CRNLIB_ASSERT(m_chunks.size() == m_total_chunks);
|
||||
}
|
||||
|
||||
void crn_comp::clear() {
|
||||
m_pParams = NULL;
|
||||
|
||||
@@ -903,8 +859,6 @@ void crn_comp::clear() {
|
||||
|
||||
m_total_chunks = 0;
|
||||
|
||||
m_chunks.clear();
|
||||
|
||||
utils::zero_object(m_crn_header);
|
||||
|
||||
m_comp_data.clear();
|
||||
@@ -931,7 +885,7 @@ void crn_comp::clear() {
|
||||
m_packed_alpha_selectors.clear();
|
||||
}
|
||||
|
||||
bool crn_comp::quantize_chunks() {
|
||||
bool crn_comp::quantize_images() {
|
||||
dxt_hc::params params;
|
||||
|
||||
params.m_adaptive_tile_alpha_psnr_derating = m_pParams->m_crn_adaptive_tile_alpha_psnr_derating;
|
||||
@@ -964,10 +918,8 @@ bool crn_comp::quantize_chunks() {
|
||||
|
||||
float alpha_endpoint_quality = powf(quality, 2.1f * alpha_quality_power_mul);
|
||||
float alpha_selector_quality = powf(quality, 1.65f * alpha_quality_power_mul);
|
||||
params.m_alpha_endpoint_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);
|
||||
;
|
||||
params.m_alpha_endpoint_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);
|
||||
params.m_alpha_selector_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(48, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);
|
||||
;
|
||||
}
|
||||
|
||||
if (m_pParams->m_flags & cCRNCompFlagDebugging) {
|
||||
@@ -1058,18 +1010,39 @@ bool crn_comp::quantize_chunks() {
|
||||
|
||||
params.m_num_levels = m_pParams->m_levels;
|
||||
for (uint i = 0; i < m_pParams->m_levels; i++) {
|
||||
params.m_levels[i].m_first_chunk = m_levels[i].m_first_chunk;
|
||||
params.m_levels[i].m_num_chunks = m_levels[i].m_num_chunks;
|
||||
params.m_levels[i].m_chunk_width = m_levels[i].m_chunk_width;
|
||||
params.m_levels[i].m_first_block = m_levels[i].m_first_chunk << 2;
|
||||
params.m_levels[i].m_num_blocks = m_levels[i].m_num_chunks << 2;
|
||||
params.m_levels[i].m_block_width = m_levels[i].m_chunk_width << 1;
|
||||
params.m_levels[i].m_weight = math::minimum(12.0f, powf(1.3f, (float)i));
|
||||
}
|
||||
params.m_num_faces = m_pParams->m_faces;
|
||||
|
||||
params.m_endpoint_indices = &m_endpoint_indices;
|
||||
params.m_selector_indices = &m_selector_indices;
|
||||
|
||||
if (!m_hvq.compress(params, m_total_chunks, &m_chunks[0], m_task_pool))
|
||||
return false;
|
||||
params.m_num_blocks = m_total_chunks << 2;
|
||||
params.m_blocks = (color_quad_u8(*)[16])crnlib_malloc(params.m_num_blocks * 16 * sizeof(color_quad_u8));
|
||||
for (uint b = 0, level = 0; level < m_pParams->m_levels; level++) {
|
||||
for (uint face = 0; face < m_pParams->m_faces; face++) {
|
||||
image_u8& image = m_images[face][level];
|
||||
uint width = image.get_width();
|
||||
uint height = image.get_height();
|
||||
uint blockWidth = (width + 7 & ~7) >> 2;
|
||||
uint blockHeight = (height + 7 & ~7) >> 2;
|
||||
for (uint by = 0; by < blockHeight; by++) {
|
||||
for (uint y0 = by << 2, bx = 0; bx < blockWidth; bx++, b++) {
|
||||
for (uint t = 0, x0 = bx << 2, dy = 0; dy < 4; dy++) {
|
||||
for (uint y = math::minimum<uint>(y0 + dy, height - 1), dx = 0; dx < 4; dx++, t++)
|
||||
params.m_blocks[b][t] = image(math::minimum<uint>(x0 + dx, width - 1), y);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bool result = m_hvq.compress(params, m_task_pool);
|
||||
crnlib_free(params.m_blocks);
|
||||
|
||||
return true;
|
||||
return result;
|
||||
}
|
||||
|
||||
struct optimize_color_endpoint_codebook_params {
|
||||
@@ -1463,10 +1436,7 @@ bool crn_comp::update_progress(uint phase_index, uint subphase_index, uint subph
|
||||
bool crn_comp::compress_internal() {
|
||||
if (!alias_images())
|
||||
return false;
|
||||
|
||||
create_chunks();
|
||||
|
||||
if (!quantize_chunks())
|
||||
if (!quantize_images())
|
||||
return false;
|
||||
|
||||
crnlib::vector<uint> endpoint_remap[2];
|
||||
|
||||
+1
-5
@@ -69,7 +69,6 @@ class crn_comp : public itexture_comp {
|
||||
crnlib::vector<dxt_hc::selector_indices_details> m_selector_indices;
|
||||
|
||||
uint m_total_chunks;
|
||||
dxt_hc::pixel_chunk_vec m_chunks;
|
||||
|
||||
crnd::crn_header m_crn_header;
|
||||
crnlib::vector<uint8> m_comp_data;
|
||||
@@ -94,8 +93,6 @@ class crn_comp : public itexture_comp {
|
||||
|
||||
void clear();
|
||||
|
||||
void append_chunks(const image_u8& img, uint num_chunks_x, uint num_chunks_y, dxt_hc::pixel_chunk_vec& chunks, float weight);
|
||||
|
||||
static float color_endpoint_similarity_func(uint index_a, uint index_b, void* pContext);
|
||||
static float alpha_endpoint_similarity_func(uint index_a, uint index_b, void* pContext);
|
||||
void sort_color_endpoint_codebook(crnlib::vector<uint>& remapping, const crnlib::vector<uint>& endpoints);
|
||||
@@ -117,8 +114,7 @@ class crn_comp : public itexture_comp {
|
||||
uint trial_index);
|
||||
|
||||
bool alias_images();
|
||||
void create_chunks();
|
||||
bool quantize_chunks();
|
||||
bool quantize_images();
|
||||
|
||||
bool pack_chunks(
|
||||
uint group,
|
||||
|
||||
+295
-590
File diff suppressed because it is too large
Load Diff
+55
-119
@@ -45,22 +45,20 @@ class dxt_hc {
|
||||
};
|
||||
};
|
||||
|
||||
struct chunk_details {
|
||||
uint block_index[2][2];
|
||||
};
|
||||
crnlib::vector<chunk_details> m_chunk_details;
|
||||
|
||||
struct tile_details {
|
||||
crnlib::vector<color_quad_u8> pixels;
|
||||
uint weight;
|
||||
float weight;
|
||||
vec<6, float> color_endpoint;
|
||||
vec<2, float> alpha_endpoints[2];
|
||||
uint16 cluster_indices[3];
|
||||
};
|
||||
crnlib::vector<tile_details> m_tiles;
|
||||
uint m_total_tiles;
|
||||
uint m_num_tiles;
|
||||
float m_color_derating[cCRNMaxLevels][8];
|
||||
float m_alpha_derating[8];
|
||||
|
||||
crnlib::vector<crnlib::vector<color_quad_u8>> m_blocks;
|
||||
color_quad_u8 (*m_blocks)[16];
|
||||
uint m_num_blocks;
|
||||
crnlib::vector<float> m_block_weights;
|
||||
crnlib::vector<uint8> m_block_encodings;
|
||||
crnlib::vector<uint64> m_block_selectors[3];
|
||||
@@ -72,114 +70,70 @@ class dxt_hc {
|
||||
crnlib::vector<endpoint_indices_details> m_endpoint_indices;
|
||||
crnlib::vector<selector_indices_details> m_selector_indices;
|
||||
|
||||
struct pixel_chunk {
|
||||
pixel_chunk() { clear(); }
|
||||
|
||||
dxt_pixel_block m_blocks[cChunkBlockHeight][cChunkBlockWidth];
|
||||
|
||||
const color_quad_u8& operator()(uint cx, uint cy) const {
|
||||
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
|
||||
|
||||
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
|
||||
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
|
||||
}
|
||||
|
||||
color_quad_u8& operator()(uint cx, uint cy) {
|
||||
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
|
||||
|
||||
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
|
||||
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
|
||||
}
|
||||
|
||||
inline void clear() {
|
||||
utils::zero_object(*this);
|
||||
m_weight = 1.0f;
|
||||
}
|
||||
|
||||
float m_weight;
|
||||
uint m_legacy_index;
|
||||
};
|
||||
|
||||
typedef crnlib::vector<pixel_chunk> pixel_chunk_vec;
|
||||
|
||||
struct params {
|
||||
params()
|
||||
: m_color_endpoint_codebook_size(3072),
|
||||
: m_blocks(0),
|
||||
m_num_blocks(0),
|
||||
m_num_levels(0),
|
||||
m_num_faces(0),
|
||||
m_format(cDXT1),
|
||||
m_perceptual(true),
|
||||
m_hierarchical(true),
|
||||
m_color_endpoint_codebook_size(3072),
|
||||
m_color_selector_codebook_size(3072),
|
||||
m_alpha_endpoint_codebook_size(3072),
|
||||
m_alpha_selector_codebook_size(3072),
|
||||
m_adaptive_tile_color_psnr_derating(2.0f), // was 3.4f
|
||||
m_adaptive_tile_color_psnr_derating(2.0f),
|
||||
m_adaptive_tile_alpha_psnr_derating(2.0f),
|
||||
m_adaptive_tile_color_alpha_weighting_ratio(3.0f),
|
||||
m_num_levels(0),
|
||||
m_format(cDXT1),
|
||||
m_hierarchical(true),
|
||||
m_perceptual(true),
|
||||
m_debugging(false),
|
||||
m_pProgress_func(NULL),
|
||||
m_pProgress_func_data(NULL) {
|
||||
m_pProgress_func(0),
|
||||
m_pProgress_func_data(0),
|
||||
m_endpoint_indices(0),
|
||||
m_selector_indices(0) {
|
||||
m_alpha_component_indices[0] = 3;
|
||||
m_alpha_component_indices[1] = 0;
|
||||
|
||||
for (uint i = 0; i < cCRNMaxLevels; i++) {
|
||||
m_levels[i].m_first_chunk = 0;
|
||||
m_levels[i].m_num_chunks = 0;
|
||||
m_levels[i].m_first_block = 0;
|
||||
m_levels[i].m_num_blocks = 0;
|
||||
m_levels[i].m_block_width = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Valid range for codebook sizes: [32,8192] (non-power of two values are okay)
|
||||
color_quad_u8 (*m_blocks)[16];
|
||||
uint m_num_blocks;
|
||||
uint m_num_levels;
|
||||
uint m_num_faces;
|
||||
|
||||
struct {
|
||||
uint m_first_block;
|
||||
uint m_num_blocks;
|
||||
uint m_block_width;
|
||||
float m_weight;
|
||||
} m_levels[cCRNMaxLevels];
|
||||
|
||||
dxt_format m_format;
|
||||
bool m_perceptual;
|
||||
bool m_hierarchical;
|
||||
|
||||
uint m_color_endpoint_codebook_size;
|
||||
uint m_color_selector_codebook_size;
|
||||
|
||||
uint m_alpha_endpoint_codebook_size;
|
||||
uint m_alpha_selector_codebook_size;
|
||||
|
||||
// Higher values cause fewer 8x4, 4x8, and 4x4 blocks to be utilized less often (lower quality/smaller files).
|
||||
// Lower values cause the encoder to use large tiles less often (better quality/larger files).
|
||||
// Valid range: [0.0,100.0].
|
||||
// A value of 0 will cause the encoder to only use tiles larger than 4x4 if doing so would incur to quality loss.
|
||||
float m_adaptive_tile_color_psnr_derating;
|
||||
|
||||
float m_adaptive_tile_alpha_psnr_derating;
|
||||
|
||||
float m_adaptive_tile_color_alpha_weighting_ratio;
|
||||
|
||||
uint m_alpha_component_indices[2];
|
||||
|
||||
struct miplevel_desc {
|
||||
uint m_first_chunk;
|
||||
uint m_num_chunks;
|
||||
uint m_chunk_width;
|
||||
};
|
||||
// The mip level data is optional!
|
||||
miplevel_desc m_levels[cCRNMaxLevels];
|
||||
uint m_num_levels;
|
||||
bool m_debugging;
|
||||
crn_progress_callback_func m_pProgress_func;
|
||||
void* m_pProgress_func_data;
|
||||
|
||||
crnlib::vector<endpoint_indices_details> *m_endpoint_indices;
|
||||
crnlib::vector<selector_indices_details> *m_selector_indices;
|
||||
|
||||
dxt_format m_format;
|
||||
|
||||
// If m_hierarchical is false, only 4x4 blocks will be used by the encoder (leading to higher quality/larger files).
|
||||
bool m_hierarchical;
|
||||
|
||||
// If m_perceptual is true, perceptual color metrics will be used by the encoder.
|
||||
bool m_perceptual;
|
||||
|
||||
bool m_debugging;
|
||||
|
||||
crn_progress_callback_func m_pProgress_func;
|
||||
void* m_pProgress_func_data;
|
||||
};
|
||||
|
||||
void clear();
|
||||
|
||||
// Main compression function
|
||||
bool compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool);
|
||||
|
||||
// Output accessors
|
||||
inline uint get_num_chunks() const { return m_num_chunks; }
|
||||
|
||||
struct selectors {
|
||||
selectors() { utils::zero_object(*this); }
|
||||
|
||||
@@ -198,6 +152,9 @@ class dxt_hc {
|
||||
};
|
||||
typedef crnlib::vector<selectors> selectors_vec;
|
||||
|
||||
void clear();
|
||||
bool compress(const params& p, task_pool& task_pool);
|
||||
|
||||
// Color endpoints
|
||||
inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); }
|
||||
inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; }
|
||||
@@ -221,34 +178,18 @@ class dxt_hc {
|
||||
private:
|
||||
params m_params;
|
||||
|
||||
uint m_num_chunks;
|
||||
const pixel_chunk* m_pChunks;
|
||||
|
||||
uint m_num_alpha_blocks;
|
||||
bool m_has_color_blocks;
|
||||
bool m_has_alpha0_blocks;
|
||||
bool m_has_alpha1_blocks;
|
||||
|
||||
enum {
|
||||
cColorChunks = 0,
|
||||
cAlpha0Chunks = 1,
|
||||
cAlpha1Chunks = 2,
|
||||
cNumCompressedChunkVecs = 3
|
||||
cColorBlocks = 0,
|
||||
cAlpha0Blocks = 1,
|
||||
cAlpha1Blocks = 2,
|
||||
cNumCompressedComponents = 3
|
||||
};
|
||||
|
||||
void compress_dxt1_block(
|
||||
dxt1_endpoint_optimizer::results& results,
|
||||
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
|
||||
uint8* pSelectors);
|
||||
|
||||
void compress_dxt5_block(
|
||||
dxt5_endpoint_optimizer::results& results,
|
||||
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
|
||||
uint8* pAlpha_selectors);
|
||||
|
||||
void determine_compressed_chunks_task(uint64 data, void* pData_ptr);
|
||||
bool determine_compressed_chunks();
|
||||
|
||||
struct endpoint_cluster {
|
||||
endpoint_cluster() : m_first_endpoint(0), m_second_endpoint(0) {}
|
||||
crnlib::vector<uint> m_blocks[3];
|
||||
@@ -283,30 +224,25 @@ class dxt_hc {
|
||||
typedef tree_clusterizer<vec6F> vec6F_tree_vq;
|
||||
typedef tree_clusterizer<vec16F> vec16F_tree_vq;
|
||||
|
||||
void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
|
||||
bool determine_color_endpoint_clusters();
|
||||
|
||||
void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
|
||||
bool determine_alpha_endpoint_clusters();
|
||||
void determine_tiles_task(uint64 data, void* pData_ptr);
|
||||
|
||||
void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr);
|
||||
bool determine_color_endpoint_codebook();
|
||||
void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
|
||||
void determine_color_endpoints();
|
||||
|
||||
void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr);
|
||||
bool determine_alpha_endpoint_codebook();
|
||||
void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
|
||||
void determine_alpha_endpoints();
|
||||
|
||||
void create_color_selector_codebook_task(uint64 data, void* pData_ptr);
|
||||
bool create_color_selector_codebook();
|
||||
void create_color_selector_codebook();
|
||||
|
||||
void create_alpha_selector_codebook_task(uint64 data, void* pData_ptr);
|
||||
bool create_alpha_selector_codebook();
|
||||
void create_alpha_selector_codebook();
|
||||
|
||||
bool initialize_blocks(const params& p);
|
||||
bool create_block_encodings(const params& p);
|
||||
bool update_progress(uint phase_index, uint subphase_index, uint subphase_total);
|
||||
};
|
||||
|
||||
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::pixel_chunk);
|
||||
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors);
|
||||
|
||||
} // namespace crnlib
|
||||
|
||||
Reference in New Issue
Block a user