Switch from chunk encoding to block encoding while performing image quantization

This change improves compression speed and simplifies further modification of the code.

Testing:
The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch.

[Compressing Kodak set without mipmaps]
Original: 1582222 bytes / 28.947 sec
Modified: 1494501 bytes / 17.642 sec
Improvement: 5.54% (compression ratio) / 39.05% (compression time)

[Compressing Kodak set with mipmaps]
Original: 2065243 bytes / 36.965 sec
Modified: 1945365 bytes / 22.989 sec
Improvement: 5.80% (compression ratio) / 37.81% (compression time)
This commit is contained in:
Alexander Suvorov
2017-06-02 18:13:49 +02:00
parent cd9ba9b615
commit e7d458aa22
5 changed files with 381 additions and 774 deletions
Binary file not shown.
+30 -60
View File
@@ -841,50 +841,6 @@ bool crn_comp::alias_images() {
return true;
}
void crn_comp::append_chunks(const image_u8& img, uint num_chunks_x, uint num_chunks_y, dxt_hc::pixel_chunk_vec& chunks, float weight) {
for (uint y = 0; y < num_chunks_y; y++) {
for (uint legacy_index = chunks.size(), x = 0; x < num_chunks_x; x++) {
chunks.resize(chunks.size() + 1);
dxt_hc::pixel_chunk& chunk = chunks.back();
chunk.m_weight = weight;
chunk.m_legacy_index = legacy_index + (y & 1 ? num_chunks_x - 1 - x : x);
for (uint cy = 0; cy < cChunkPixelHeight; cy++) {
uint py = y * cChunkPixelHeight + cy;
py = math::minimum(py, img.get_height() - 1);
for (uint cx = 0; cx < cChunkPixelWidth; cx++) {
uint px = x * cChunkPixelWidth + cx;
px = math::minimum(px, img.get_width() - 1);
chunk(cx, cy) = img(px, py);
}
}
}
}
}
void crn_comp::create_chunks() {
m_chunks.reserve(m_total_chunks);
m_chunks.resize(0);
for (uint level = 0; level < m_pParams->m_levels; level++) {
for (uint face = 0; face < m_pParams->m_faces; face++) {
if (!face) {
CRNLIB_ASSERT(m_levels[level].m_first_chunk == m_chunks.size());
}
float mip_weight = math::minimum(12.0f, powf(1.3f, static_cast<float>(level)));
//float mip_weight = 1.0f;
append_chunks(m_images[face][level], m_levels[level].m_chunk_width, m_levels[level].m_chunk_height, m_chunks, mip_weight);
}
}
CRNLIB_ASSERT(m_chunks.size() == m_total_chunks);
}
void crn_comp::clear() {
m_pParams = NULL;
@@ -903,8 +859,6 @@ void crn_comp::clear() {
m_total_chunks = 0;
m_chunks.clear();
utils::zero_object(m_crn_header);
m_comp_data.clear();
@@ -931,7 +885,7 @@ void crn_comp::clear() {
m_packed_alpha_selectors.clear();
}
bool crn_comp::quantize_chunks() {
bool crn_comp::quantize_images() {
dxt_hc::params params;
params.m_adaptive_tile_alpha_psnr_derating = m_pParams->m_crn_adaptive_tile_alpha_psnr_derating;
@@ -964,10 +918,8 @@ bool crn_comp::quantize_chunks() {
float alpha_endpoint_quality = powf(quality, 2.1f * alpha_quality_power_mul);
float alpha_selector_quality = powf(quality, 1.65f * alpha_quality_power_mul);
params.m_alpha_endpoint_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);
;
params.m_alpha_endpoint_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);
params.m_alpha_selector_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(48, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);
;
}
if (m_pParams->m_flags & cCRNCompFlagDebugging) {
@@ -1058,18 +1010,39 @@ bool crn_comp::quantize_chunks() {
params.m_num_levels = m_pParams->m_levels;
for (uint i = 0; i < m_pParams->m_levels; i++) {
params.m_levels[i].m_first_chunk = m_levels[i].m_first_chunk;
params.m_levels[i].m_num_chunks = m_levels[i].m_num_chunks;
params.m_levels[i].m_chunk_width = m_levels[i].m_chunk_width;
params.m_levels[i].m_first_block = m_levels[i].m_first_chunk << 2;
params.m_levels[i].m_num_blocks = m_levels[i].m_num_chunks << 2;
params.m_levels[i].m_block_width = m_levels[i].m_chunk_width << 1;
params.m_levels[i].m_weight = math::minimum(12.0f, powf(1.3f, (float)i));
}
params.m_num_faces = m_pParams->m_faces;
params.m_endpoint_indices = &m_endpoint_indices;
params.m_selector_indices = &m_selector_indices;
if (!m_hvq.compress(params, m_total_chunks, &m_chunks[0], m_task_pool))
return false;
params.m_num_blocks = m_total_chunks << 2;
params.m_blocks = (color_quad_u8(*)[16])crnlib_malloc(params.m_num_blocks * 16 * sizeof(color_quad_u8));
for (uint b = 0, level = 0; level < m_pParams->m_levels; level++) {
for (uint face = 0; face < m_pParams->m_faces; face++) {
image_u8& image = m_images[face][level];
uint width = image.get_width();
uint height = image.get_height();
uint blockWidth = (width + 7 & ~7) >> 2;
uint blockHeight = (height + 7 & ~7) >> 2;
for (uint by = 0; by < blockHeight; by++) {
for (uint y0 = by << 2, bx = 0; bx < blockWidth; bx++, b++) {
for (uint t = 0, x0 = bx << 2, dy = 0; dy < 4; dy++) {
for (uint y = math::minimum<uint>(y0 + dy, height - 1), dx = 0; dx < 4; dx++, t++)
params.m_blocks[b][t] = image(math::minimum<uint>(x0 + dx, width - 1), y);
}
}
}
}
}
bool result = m_hvq.compress(params, m_task_pool);
crnlib_free(params.m_blocks);
return true;
return result;
}
struct optimize_color_endpoint_codebook_params {
@@ -1463,10 +1436,7 @@ bool crn_comp::update_progress(uint phase_index, uint subphase_index, uint subph
bool crn_comp::compress_internal() {
if (!alias_images())
return false;
create_chunks();
if (!quantize_chunks())
if (!quantize_images())
return false;
crnlib::vector<uint> endpoint_remap[2];
+1 -5
View File
@@ -69,7 +69,6 @@ class crn_comp : public itexture_comp {
crnlib::vector<dxt_hc::selector_indices_details> m_selector_indices;
uint m_total_chunks;
dxt_hc::pixel_chunk_vec m_chunks;
crnd::crn_header m_crn_header;
crnlib::vector<uint8> m_comp_data;
@@ -94,8 +93,6 @@ class crn_comp : public itexture_comp {
void clear();
void append_chunks(const image_u8& img, uint num_chunks_x, uint num_chunks_y, dxt_hc::pixel_chunk_vec& chunks, float weight);
static float color_endpoint_similarity_func(uint index_a, uint index_b, void* pContext);
static float alpha_endpoint_similarity_func(uint index_a, uint index_b, void* pContext);
void sort_color_endpoint_codebook(crnlib::vector<uint>& remapping, const crnlib::vector<uint>& endpoints);
@@ -117,8 +114,7 @@ class crn_comp : public itexture_comp {
uint trial_index);
bool alias_images();
void create_chunks();
bool quantize_chunks();
bool quantize_images();
bool pack_chunks(
uint group,
+295 -590
View File
File diff suppressed because it is too large Load Diff
+55 -119
View File
@@ -45,22 +45,20 @@ class dxt_hc {
};
};
struct chunk_details {
uint block_index[2][2];
};
crnlib::vector<chunk_details> m_chunk_details;
struct tile_details {
crnlib::vector<color_quad_u8> pixels;
uint weight;
float weight;
vec<6, float> color_endpoint;
vec<2, float> alpha_endpoints[2];
uint16 cluster_indices[3];
};
crnlib::vector<tile_details> m_tiles;
uint m_total_tiles;
uint m_num_tiles;
float m_color_derating[cCRNMaxLevels][8];
float m_alpha_derating[8];
crnlib::vector<crnlib::vector<color_quad_u8>> m_blocks;
color_quad_u8 (*m_blocks)[16];
uint m_num_blocks;
crnlib::vector<float> m_block_weights;
crnlib::vector<uint8> m_block_encodings;
crnlib::vector<uint64> m_block_selectors[3];
@@ -72,114 +70,70 @@ class dxt_hc {
crnlib::vector<endpoint_indices_details> m_endpoint_indices;
crnlib::vector<selector_indices_details> m_selector_indices;
struct pixel_chunk {
pixel_chunk() { clear(); }
dxt_pixel_block m_blocks[cChunkBlockHeight][cChunkBlockWidth];
const color_quad_u8& operator()(uint cx, uint cy) const {
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
}
color_quad_u8& operator()(uint cx, uint cy) {
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
}
inline void clear() {
utils::zero_object(*this);
m_weight = 1.0f;
}
float m_weight;
uint m_legacy_index;
};
typedef crnlib::vector<pixel_chunk> pixel_chunk_vec;
struct params {
params()
: m_color_endpoint_codebook_size(3072),
: m_blocks(0),
m_num_blocks(0),
m_num_levels(0),
m_num_faces(0),
m_format(cDXT1),
m_perceptual(true),
m_hierarchical(true),
m_color_endpoint_codebook_size(3072),
m_color_selector_codebook_size(3072),
m_alpha_endpoint_codebook_size(3072),
m_alpha_selector_codebook_size(3072),
m_adaptive_tile_color_psnr_derating(2.0f), // was 3.4f
m_adaptive_tile_color_psnr_derating(2.0f),
m_adaptive_tile_alpha_psnr_derating(2.0f),
m_adaptive_tile_color_alpha_weighting_ratio(3.0f),
m_num_levels(0),
m_format(cDXT1),
m_hierarchical(true),
m_perceptual(true),
m_debugging(false),
m_pProgress_func(NULL),
m_pProgress_func_data(NULL) {
m_pProgress_func(0),
m_pProgress_func_data(0),
m_endpoint_indices(0),
m_selector_indices(0) {
m_alpha_component_indices[0] = 3;
m_alpha_component_indices[1] = 0;
for (uint i = 0; i < cCRNMaxLevels; i++) {
m_levels[i].m_first_chunk = 0;
m_levels[i].m_num_chunks = 0;
m_levels[i].m_first_block = 0;
m_levels[i].m_num_blocks = 0;
m_levels[i].m_block_width = 0;
}
}
// Valid range for codebook sizes: [32,8192] (non-power of two values are okay)
color_quad_u8 (*m_blocks)[16];
uint m_num_blocks;
uint m_num_levels;
uint m_num_faces;
struct {
uint m_first_block;
uint m_num_blocks;
uint m_block_width;
float m_weight;
} m_levels[cCRNMaxLevels];
dxt_format m_format;
bool m_perceptual;
bool m_hierarchical;
uint m_color_endpoint_codebook_size;
uint m_color_selector_codebook_size;
uint m_alpha_endpoint_codebook_size;
uint m_alpha_selector_codebook_size;
// Higher values cause fewer 8x4, 4x8, and 4x4 blocks to be utilized less often (lower quality/smaller files).
// Lower values cause the encoder to use large tiles less often (better quality/larger files).
// Valid range: [0.0,100.0].
// A value of 0 will cause the encoder to only use tiles larger than 4x4 if doing so would incur to quality loss.
float m_adaptive_tile_color_psnr_derating;
float m_adaptive_tile_alpha_psnr_derating;
float m_adaptive_tile_color_alpha_weighting_ratio;
uint m_alpha_component_indices[2];
struct miplevel_desc {
uint m_first_chunk;
uint m_num_chunks;
uint m_chunk_width;
};
// The mip level data is optional!
miplevel_desc m_levels[cCRNMaxLevels];
uint m_num_levels;
bool m_debugging;
crn_progress_callback_func m_pProgress_func;
void* m_pProgress_func_data;
crnlib::vector<endpoint_indices_details> *m_endpoint_indices;
crnlib::vector<selector_indices_details> *m_selector_indices;
dxt_format m_format;
// If m_hierarchical is false, only 4x4 blocks will be used by the encoder (leading to higher quality/larger files).
bool m_hierarchical;
// If m_perceptual is true, perceptual color metrics will be used by the encoder.
bool m_perceptual;
bool m_debugging;
crn_progress_callback_func m_pProgress_func;
void* m_pProgress_func_data;
};
void clear();
// Main compression function
bool compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool);
// Output accessors
inline uint get_num_chunks() const { return m_num_chunks; }
struct selectors {
selectors() { utils::zero_object(*this); }
@@ -198,6 +152,9 @@ class dxt_hc {
};
typedef crnlib::vector<selectors> selectors_vec;
void clear();
bool compress(const params& p, task_pool& task_pool);
// Color endpoints
inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); }
inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; }
@@ -221,34 +178,18 @@ class dxt_hc {
private:
params m_params;
uint m_num_chunks;
const pixel_chunk* m_pChunks;
uint m_num_alpha_blocks;
bool m_has_color_blocks;
bool m_has_alpha0_blocks;
bool m_has_alpha1_blocks;
enum {
cColorChunks = 0,
cAlpha0Chunks = 1,
cAlpha1Chunks = 2,
cNumCompressedChunkVecs = 3
cColorBlocks = 0,
cAlpha0Blocks = 1,
cAlpha1Blocks = 2,
cNumCompressedComponents = 3
};
void compress_dxt1_block(
dxt1_endpoint_optimizer::results& results,
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
uint8* pSelectors);
void compress_dxt5_block(
dxt5_endpoint_optimizer::results& results,
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
uint8* pAlpha_selectors);
void determine_compressed_chunks_task(uint64 data, void* pData_ptr);
bool determine_compressed_chunks();
struct endpoint_cluster {
endpoint_cluster() : m_first_endpoint(0), m_second_endpoint(0) {}
crnlib::vector<uint> m_blocks[3];
@@ -283,30 +224,25 @@ class dxt_hc {
typedef tree_clusterizer<vec6F> vec6F_tree_vq;
typedef tree_clusterizer<vec16F> vec16F_tree_vq;
void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
bool determine_color_endpoint_clusters();
void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
bool determine_alpha_endpoint_clusters();
void determine_tiles_task(uint64 data, void* pData_ptr);
void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr);
bool determine_color_endpoint_codebook();
void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
void determine_color_endpoints();
void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr);
bool determine_alpha_endpoint_codebook();
void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
void determine_alpha_endpoints();
void create_color_selector_codebook_task(uint64 data, void* pData_ptr);
bool create_color_selector_codebook();
void create_color_selector_codebook();
void create_alpha_selector_codebook_task(uint64 data, void* pData_ptr);
bool create_alpha_selector_codebook();
void create_alpha_selector_codebook();
bool initialize_blocks(const params& p);
bool create_block_encodings(const params& p);
bool update_progress(uint phase_index, uint subphase_index, uint subphase_total);
};
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::pixel_chunk);
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors);
} // namespace crnlib