Use block encoding to store intermediate selectors after endpoint quantization
This change simplifies further modification of the code. Explanation: This change is required for further optimization of the quantization code. Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch. [Compressing Kodak set without mipmaps] Original: 1582222 bytes / 28.935 sec Modified: 1494501 bytes / 24.528 sec Improvement: 5.54% (compression ratio) / 15.23% (compression time) [Compressing Kodak set with mipmaps] Original: 2065243 bytes / 36.982 sec Modified: 1945365 bytes / 32.308 sec Improvement: 5.80% (compression ratio) / 12.64% (compression time)
This commit is contained in:
Binary file not shown.
+8
-1
@@ -341,6 +341,10 @@ void dxt1_endpoint_optimizer::return_solution(results& res, const potential_solu
|
||||
invert_selectors = (solution.m_coords.m_low_color < solution.m_coords.m_high_color);
|
||||
}
|
||||
|
||||
res.m_alternate_rounding = solution.m_alternate_rounding;
|
||||
res.m_enforce_selector = solution.m_enforce_selector;
|
||||
res.m_enforced_selector = solution.m_enforced_selector;
|
||||
res.m_reordered = invert_selectors;
|
||||
if (invert_selectors) {
|
||||
res.m_low_color = solution.m_coords.m_high_color;
|
||||
res.m_high_color = solution.m_coords.m_low_color;
|
||||
@@ -1539,11 +1543,13 @@ bool dxt1_endpoint_optimizer::evaluate_solution_uber(
|
||||
solution.m_error = trial_error;
|
||||
solution.m_alpha_block = (block_type != 0);
|
||||
solution.m_selectors = m_trial_selectors;
|
||||
solution.m_alternate_rounding = alternate_rounding;
|
||||
solution.m_valid = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((!solution.m_alpha_block) && (solution.m_coords.m_low_color == solution.m_coords.m_high_color)) {
|
||||
solution.m_enforce_selector = !solution.m_alpha_block && solution.m_coords.m_low_color == solution.m_coords.m_high_color;
|
||||
if (solution.m_enforce_selector) {
|
||||
uint s;
|
||||
if ((solution.m_coords.m_low_color & 31) != 31) {
|
||||
solution.m_coords.m_low_color++;
|
||||
@@ -1555,6 +1561,7 @@ bool dxt1_endpoint_optimizer::evaluate_solution_uber(
|
||||
|
||||
for (uint i = 0; i < m_unique_colors.size(); i++)
|
||||
solution.m_selectors[i] = static_cast<uint8>(s);
|
||||
solution.m_enforced_selector = s;
|
||||
}
|
||||
|
||||
if ((pBest_solution) && (solution.m_error < pBest_solution->m_error)) {
|
||||
|
||||
@@ -156,6 +156,10 @@ class dxt1_endpoint_optimizer {
|
||||
|
||||
uint8* m_pSelectors;
|
||||
bool m_alpha_block;
|
||||
bool m_reordered;
|
||||
bool m_alternate_rounding;
|
||||
bool m_enforce_selector;
|
||||
uint8 m_enforced_selector;
|
||||
};
|
||||
|
||||
struct solution {
|
||||
@@ -248,6 +252,9 @@ class dxt1_endpoint_optimizer {
|
||||
uint64 m_error;
|
||||
bool m_alpha_block;
|
||||
bool m_valid;
|
||||
bool m_alternate_rounding;
|
||||
bool m_enforce_selector;
|
||||
uint8 m_enforced_selector;
|
||||
|
||||
void clear() {
|
||||
m_coords.clear();
|
||||
|
||||
@@ -46,6 +46,7 @@ bool dxt5_endpoint_optimizer::compute(const params& p, results& r) {
|
||||
|
||||
if (m_unique_values.size() == 1) {
|
||||
r.m_block_type = 0;
|
||||
r.m_reordered = false;
|
||||
r.m_error = 0;
|
||||
r.m_first_endpoint = m_unique_values[0];
|
||||
r.m_second_endpoint = m_unique_values[0];
|
||||
@@ -101,6 +102,7 @@ bool dxt5_endpoint_optimizer::compute(const params& p, results& r) {
|
||||
}
|
||||
}
|
||||
|
||||
m_pResults->m_reordered = false;
|
||||
if (m_pResults->m_first_endpoint == m_pResults->m_second_endpoint) {
|
||||
for (uint i = 0; i < m_best_selectors.size(); i++)
|
||||
m_best_selectors[i] = 0;
|
||||
@@ -112,11 +114,13 @@ bool dxt5_endpoint_optimizer::compute(const params& p, results& r) {
|
||||
|
||||
if (m_pResults->m_first_endpoint > m_pResults->m_second_endpoint) {
|
||||
utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint);
|
||||
m_pResults->m_reordered = true;
|
||||
for (uint i = 0; i < m_best_selectors.size(); i++)
|
||||
m_best_selectors[i] = g_six_alpha_invert_table[m_best_selectors[i]];
|
||||
}
|
||||
} else if (!(m_pResults->m_first_endpoint > m_pResults->m_second_endpoint)) {
|
||||
utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint);
|
||||
m_pResults->m_reordered = true;
|
||||
for (uint i = 0; i < m_best_selectors.size(); i++)
|
||||
m_best_selectors[i] = g_eight_alpha_invert_table[m_best_selectors[i]];
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@ class dxt5_endpoint_optimizer {
|
||||
uint8 m_second_endpoint;
|
||||
|
||||
uint8 m_block_type; // 1 if 6-alpha, otherwise 8-alpha
|
||||
bool m_reordered;
|
||||
};
|
||||
|
||||
bool compute(const params& p, results& r);
|
||||
|
||||
+178
-193
@@ -10,6 +10,18 @@
|
||||
#define CRNLIB_ENABLE_DEBUG_MESSAGES 0
|
||||
|
||||
namespace crnlib {
|
||||
|
||||
static uint8 g_tile_map[8][2][2] = {
|
||||
{{ 0, 0 }, { 0, 0 }},
|
||||
{{ 0, 0 }, { 1, 1 }},
|
||||
{{ 0, 1 }, { 0, 1 }},
|
||||
{{ 0, 0 }, { 1, 2 }},
|
||||
{{ 1, 2 }, { 0, 0 }},
|
||||
{{ 0, 1 }, { 0, 2 }},
|
||||
{{ 1, 0 }, { 2, 0 }},
|
||||
{{ 0, 1 }, { 2, 3 }},
|
||||
};
|
||||
|
||||
static color_quad_u8 g_tile_layout_colors[cNumChunkTileLayouts] =
|
||||
{
|
||||
color_quad_u8(255, 90, 32, 255),
|
||||
@@ -76,6 +88,43 @@ void dxt_hc::clear() {
|
||||
|
||||
m_prev_phase_index = -1;
|
||||
m_prev_percentage_complete = -1;
|
||||
|
||||
m_chunk_details.clear();
|
||||
m_blocks.clear();
|
||||
for (uint c = 0; c < 3; c++)
|
||||
m_block_selectors[c].clear();
|
||||
m_endpoint_indices.clear();
|
||||
|
||||
}
|
||||
|
||||
bool dxt_hc::initialize_blocks(const params& p) {
|
||||
m_chunk_details.resize(m_num_chunks);
|
||||
m_blocks.resize(m_num_chunks << 2);
|
||||
for (uint c = 0; c < 3; c++)
|
||||
m_block_selectors[c].resize(m_blocks.size());
|
||||
m_endpoint_indices.resize(m_blocks.size());
|
||||
|
||||
for (uint level = 0; level < p.m_num_levels; level++) {
|
||||
uint first_chunk = p.m_levels[level].m_first_chunk;
|
||||
uint end_chunk = p.m_levels[level].m_first_chunk + p.m_levels[level].m_num_chunks;
|
||||
uint chunk_width = p.m_levels[level].m_chunk_width;
|
||||
uint block_width = chunk_width << 1;
|
||||
for (uint b = first_chunk << 2, cy = 0, chunk_base = first_chunk; chunk_base < end_chunk; chunk_base += chunk_width, cy++) {
|
||||
for (uint by = 0; by < 2; by++) {
|
||||
for (uint cx = 0; cx < chunk_width; cx++) {
|
||||
for (uint bx = 0; bx < 2; bx++, b++) {
|
||||
const pixel_chunk& chunk = m_pChunks[chunk_base + cx];
|
||||
m_chunk_details[chunk_base + cx].block_index[by][bx] = b;
|
||||
for (uint t = 0, y = 0; y < 4; y++) {
|
||||
for (uint x = 0; x < 4; x++, t++)
|
||||
m_blocks[b].push_back(chunk(bx << 2 | x, by << 2 | y));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool dxt_hc::compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool) {
|
||||
@@ -130,6 +179,7 @@ bool dxt_hc::compress_internal(const params& p, uint num_chunks, const pixel_chu
|
||||
}
|
||||
}
|
||||
|
||||
initialize_blocks(p);
|
||||
determine_compressed_chunks();
|
||||
|
||||
if (m_has_color_blocks) {
|
||||
@@ -506,7 +556,6 @@ void dxt_hc::determine_compressed_chunks_task(uint64 data, void* pData_ptr) {
|
||||
output.m_tiles[t].m_first_endpoint = color_results.m_low_color;
|
||||
output.m_tiles[t].m_second_endpoint = color_results.m_high_color;
|
||||
|
||||
memcpy(output.m_tiles[t].m_selectors, pColor_selectors, cChunkPixelWidth * cChunkPixelHeight);
|
||||
} else {
|
||||
const uint a = q - cAlpha0Chunks;
|
||||
|
||||
@@ -517,7 +566,6 @@ void dxt_hc::determine_compressed_chunks_task(uint64 data, void* pData_ptr) {
|
||||
output.m_tiles[t].m_first_endpoint = alpha_results.m_first_endpoint;
|
||||
output.m_tiles[t].m_second_endpoint = alpha_results.m_second_endpoint;
|
||||
|
||||
memcpy(output.m_tiles[t].m_selectors, pAlpha_selectors, cChunkPixelWidth * cChunkPixelHeight);
|
||||
}
|
||||
} // t
|
||||
} // q
|
||||
@@ -926,22 +974,67 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr
|
||||
cluster.m_second_endpoint = results.m_high_color;
|
||||
cluster.m_error = results.m_error;
|
||||
|
||||
dxt_endpoint_refiner refiner;
|
||||
dxt_endpoint_refiner::params p;
|
||||
dxt_endpoint_refiner::results r;
|
||||
p.m_perceptual = m_params.m_perceptual;
|
||||
p.m_pSelectors = cluster.m_selectors.get_ptr();
|
||||
p.m_pPixels = cluster.m_pixels.get_ptr();
|
||||
p.m_num_pixels = cluster.m_pixels.size();
|
||||
p.m_dxt1_selectors = true;
|
||||
p.m_error_to_beat = cluster.m_error;
|
||||
p.m_block_index = cluster_index;
|
||||
cluster.m_refined.result = refiner.refine(p, r);
|
||||
cluster.m_refined.first_endpoint = r.m_low_color;
|
||||
cluster.m_refined.second_endpoint = r.m_high_color;
|
||||
cluster.m_refined.error = r.m_error;
|
||||
color_quad_u8 color_values[4];
|
||||
color_values[0] = dxt1_block::unpack_color(results.m_low_color, true);
|
||||
color_values[3] = dxt1_block::unpack_color(results.m_high_color, true);
|
||||
for (uint c = 0; c < 3; c++) {
|
||||
color_values[1].c[c] = ((color_values[0].c[c] << 1) + color_values[3].c[c] + (results.m_alternate_rounding ? 1 : 0)) / 3;
|
||||
color_values[2].c[c] = ((color_values[3].c[c] << 1) + color_values[0].c[c] + (results.m_alternate_rounding ? 1 : 0)) / 3;
|
||||
}
|
||||
|
||||
uint pixel_index = 0;
|
||||
uint8 color_order[4];
|
||||
for (uint8 i = 0; i < 4; i++)
|
||||
color_order[i] = results.m_reordered ? 3 - g_dxt1_to_linear[i] : g_dxt1_to_linear[i];
|
||||
|
||||
uint endpoint_weight = color::color_distance(m_params.m_perceptual, color_values[0], color_values[3], false) / 2000;
|
||||
float encoding_weight[8];
|
||||
for (uint i = 0; i < 8; i++)
|
||||
encoding_weight[i] = math::lerp(1.15f, 1.0f, i / 7.0f);
|
||||
|
||||
for (uint t = 0; t < cluster.m_tiles.size(); t++) {
|
||||
const uint chunk_index = cluster.m_tiles[t].first;
|
||||
const uint tile_index = cluster.m_tiles[t].second;
|
||||
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
||||
uint8 encoding_index = chunk.m_encoding_index;
|
||||
uint weight = (uint)(math::clamp<uint>(endpoint_weight * m_pChunks[chunk_index].m_weight, 1, 2048) * encoding_weight[encoding_index]);
|
||||
for (uint by = 0; by < 2; by++) {
|
||||
for (uint bx = 0; bx < 2; bx++) {
|
||||
if (g_tile_map[encoding_index][by][bx] == tile_index) {
|
||||
uint b = m_chunk_details[chunk_index].block_index[by][bx];
|
||||
uint64 selector = 0;
|
||||
for (uint sh = 0, p = 0; p < 16; p++, sh += 3) {
|
||||
uint8 s_best;
|
||||
for (uint32 error_best = UINT_MAX, t = 0; t < 4; t++) {
|
||||
uint8 s = color_order[t];
|
||||
uint32 error = color::color_distance(m_params.m_perceptual, (color_quad_u8&)m_blocks[b][p], color_values[s], false);
|
||||
if (error < error_best) {
|
||||
s_best = s;
|
||||
error_best = error;
|
||||
}
|
||||
}
|
||||
selector |= (uint64)s_best << sh;
|
||||
}
|
||||
m_block_selectors[cColorChunks][b] = selector | (uint64)weight << 48;
|
||||
m_endpoint_indices[b].component[0] = cluster_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dxt_endpoint_refiner refiner;
|
||||
dxt_endpoint_refiner::params refinerParams;
|
||||
dxt_endpoint_refiner::results refinerResults;
|
||||
refinerParams.m_perceptual = m_params.m_perceptual;
|
||||
refinerParams.m_pSelectors = cluster.m_selectors.get_ptr();
|
||||
refinerParams.m_pPixels = cluster.m_pixels.get_ptr();
|
||||
refinerParams.m_num_pixels = cluster.m_pixels.size();
|
||||
refinerParams.m_dxt1_selectors = true;
|
||||
refinerParams.m_error_to_beat = cluster.m_error;
|
||||
refinerParams.m_block_index = cluster_index;
|
||||
cluster.m_refined.result = refiner.refine(refinerParams, refinerResults);
|
||||
cluster.m_refined.first_endpoint = refinerResults.m_low_color;
|
||||
cluster.m_refined.second_endpoint = refinerResults.m_high_color;
|
||||
cluster.m_refined.error = refinerResults.m_error;
|
||||
|
||||
for (uint t = 0; t < cluster.m_tiles.size(); t++) {
|
||||
const uint chunk_index = cluster.m_tiles[t].first;
|
||||
@@ -970,10 +1063,6 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr
|
||||
quantized_tile.m_pixel_width = tile.m_pixel_width;
|
||||
quantized_tile.m_pixel_height = tile.m_pixel_height;
|
||||
quantized_tile.m_layout_index = tile.m_layout_index;
|
||||
|
||||
memcpy(quantized_tile.m_selectors, &cluster.m_selectors[pixel_index], total_pixels);
|
||||
|
||||
pixel_index += total_pixels;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1037,6 +1126,49 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
|
||||
cluster.m_second_endpoint = results.m_second_endpoint;
|
||||
cluster.m_error = results.m_error;
|
||||
|
||||
int delta = cluster.m_second_endpoint - cluster.m_first_endpoint;
|
||||
uint8 alpha_values[8];
|
||||
uint8 alpha_order[8];
|
||||
for (uint sum = cluster.m_first_endpoint * 7, i = 0; i < 8; i++, sum += delta) {
|
||||
alpha_values[i] = (uint8)(sum / 7);
|
||||
alpha_order[i] = results.m_reordered ? 7 - g_dxt5_to_linear[i] : g_dxt5_to_linear[i];
|
||||
}
|
||||
uint64 encoding_weight[8];
|
||||
for (uint endpoint_weight = math::clamp<uint>(delta * delta >> 3, 1, 2048), i = 0; i < 8; i++)
|
||||
encoding_weight[i] = (uint)(endpoint_weight * math::lerp(1.15f, 1.0f, i / 7.0f));
|
||||
|
||||
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
||||
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
||||
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
||||
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
||||
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
||||
uint component_index = m_params.m_alpha_component_indices[alpha_index];
|
||||
uint8 encoding_index = chunk.m_encoding_index;
|
||||
for (uint by = 0; by < 2; by++) {
|
||||
for (uint bx = 0; bx < 2; bx++) {
|
||||
if (g_tile_map[encoding_index][by][bx] == tile_index) {
|
||||
uint b = m_chunk_details[chunk_index].block_index[by][bx];
|
||||
uint64 selector = 0;
|
||||
for (uint sh = 0, p = 0; p < 16; p++, sh += 3) {
|
||||
uint8 s_best;
|
||||
for (uint32 error_best = UINT_MAX, t = 0; t < 8; t++) {
|
||||
uint8 s = alpha_order[t];
|
||||
int delta = m_blocks[b][p][component_index] - alpha_values[s];
|
||||
uint32 error = delta >= 0 ? delta : -delta;
|
||||
if (error < error_best) {
|
||||
s_best = s;
|
||||
error_best = error;
|
||||
}
|
||||
}
|
||||
selector |= (uint64)s_best << sh;
|
||||
}
|
||||
m_block_selectors[cAlpha0Chunks + alpha_index][b] = selector | encoding_weight[encoding_index] << 48;
|
||||
m_endpoint_indices[b].component[cAlpha0Chunks + alpha_index] = cluster_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dxt_endpoint_refiner refiner;
|
||||
dxt_endpoint_refiner::params p;
|
||||
dxt_endpoint_refiner::results r;
|
||||
@@ -1052,8 +1184,6 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
|
||||
cluster.m_refined.second_endpoint = r.m_high_color;
|
||||
cluster.m_refined.error = r.m_error;
|
||||
|
||||
uint pixel_index = 0;
|
||||
|
||||
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
||||
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
||||
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
||||
@@ -1074,18 +1204,12 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
|
||||
|
||||
compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
|
||||
|
||||
const uint total_pixels = tile.m_pixel_width * tile.m_pixel_height;
|
||||
|
||||
quantized_tile.m_endpoint_cluster_index = cluster_index;
|
||||
quantized_tile.m_first_endpoint = results.m_first_endpoint;
|
||||
quantized_tile.m_second_endpoint = results.m_second_endpoint;
|
||||
quantized_tile.m_pixel_width = tile.m_pixel_width;
|
||||
quantized_tile.m_pixel_height = tile.m_pixel_height;
|
||||
quantized_tile.m_layout_index = tile.m_layout_index;
|
||||
|
||||
memcpy(quantized_tile.m_selectors, &cluster.m_selectors[pixel_index], total_pixels);
|
||||
|
||||
pixel_index += total_pixels;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1269,165 +1393,37 @@ void dxt_hc::create_selector_codebook_task(uint64 data, void* pData_ptr) {
|
||||
}
|
||||
|
||||
bool dxt_hc::create_selector_codebook(bool alpha_blocks) {
|
||||
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
||||
if (m_params.m_debugging)
|
||||
console::info("Computing selector training vectors");
|
||||
#endif
|
||||
|
||||
const uint cColorDistToWeight = 2000;
|
||||
const uint cAlphaErrorToWeight = 8;
|
||||
|
||||
vec16F_tree_vq selector_vq;
|
||||
vec16F v;
|
||||
uint c_start = alpha_blocks ? cAlpha0Chunks : cColorChunks;
|
||||
uint c_end = alpha_blocks ? cAlpha0Chunks + m_num_alpha_blocks - 1 : cColorChunks;
|
||||
float scale = alpha_blocks ? 0.125f : 0.25f;
|
||||
|
||||
uint comp_index_start = cColorChunks;
|
||||
uint comp_index_end = cColorChunks;
|
||||
if (alpha_blocks) {
|
||||
comp_index_start = cAlpha0Chunks;
|
||||
comp_index_end = cAlpha0Chunks + m_num_alpha_blocks - 1;
|
||||
for (uint c = c_start; c <= c_end; c++) {
|
||||
for (uint b = 0; b < m_blocks.size(); b++) {
|
||||
uint64 selector = m_block_selectors[c][b];
|
||||
for (uint8 p = 0; p < 16; p++, selector >>= 3)
|
||||
v[p] = ((selector & 7) + 0.5f) * scale;
|
||||
selector_vq.add_training_vec(v, selector);
|
||||
}
|
||||
}
|
||||
|
||||
crnlib::vector<vec16F> training_vecs[cNumCompressedChunkVecs][4];
|
||||
|
||||
for (uint comp_chunk_index = comp_index_start; comp_chunk_index <= comp_index_end; comp_chunk_index++) {
|
||||
for (uint i = 0; i < 4; i++)
|
||||
training_vecs[comp_chunk_index][i].resize(m_num_chunks);
|
||||
|
||||
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
||||
if ((chunk_index & 63) == 0) {
|
||||
if (!update_progress(9 + comp_chunk_index, chunk_index, m_num_chunks))
|
||||
return false;
|
||||
}
|
||||
|
||||
const compressed_chunk& chunk = m_compressed_chunks[comp_chunk_index][chunk_index];
|
||||
|
||||
uint8 block_selectors[cChunkBlockWidth][cChunkBlockHeight][cBlockPixelWidth * cBlockPixelHeight];
|
||||
uint block_weight[cChunkBlockWidth][cChunkBlockHeight];
|
||||
|
||||
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
||||
const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
|
||||
|
||||
uint weight;
|
||||
if (comp_chunk_index == cColorChunks) {
|
||||
const color_quad_u8 first_color(dxt1_block::unpack_color(static_cast<uint16>(quantized_tile.m_first_endpoint), true));
|
||||
const color_quad_u8 second_color(dxt1_block::unpack_color(static_cast<uint16>(quantized_tile.m_second_endpoint), true));
|
||||
const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false);
|
||||
|
||||
weight = dist / cColorDistToWeight;
|
||||
|
||||
weight = static_cast<uint>(weight * m_pChunks[chunk_index].m_weight);
|
||||
} else {
|
||||
int first_endpoint = quantized_tile.m_first_endpoint;
|
||||
int second_endpoint = quantized_tile.m_second_endpoint;
|
||||
int error = first_endpoint - second_endpoint;
|
||||
error = error * error;
|
||||
|
||||
weight = static_cast<uint>(error / cAlphaErrorToWeight);
|
||||
}
|
||||
|
||||
const uint cMaxWeight = 2048;
|
||||
|
||||
weight = math::clamp<uint>(weight, 1U, cMaxWeight);
|
||||
|
||||
// umm, this is a hack
|
||||
float f = math::lerp(1.15f, 1.0f, chunk.m_encoding_index / float(cNumChunkEncodings - 1));
|
||||
weight = (uint)(weight * f);
|
||||
|
||||
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
|
||||
|
||||
for (uint y = 0; y < (layout.m_height >> 2); y++)
|
||||
for (uint x = 0; x < (layout.m_width >> 2); x++)
|
||||
block_weight[x + (layout.m_x_ofs >> 2)][y + (layout.m_y_ofs >> 2)] = weight;
|
||||
|
||||
const uint8* pSelectors = quantized_tile.m_selectors;
|
||||
|
||||
for (uint y = 0; y < layout.m_height; y++) {
|
||||
const uint cy = y + layout.m_y_ofs;
|
||||
|
||||
for (uint x = 0; x < layout.m_width; x++) {
|
||||
const uint selector = pSelectors[x + y * layout.m_width];
|
||||
|
||||
if (comp_chunk_index == cColorChunks)
|
||||
CRNLIB_ASSERT(selector < cDXT1SelectorValues);
|
||||
else
|
||||
CRNLIB_ASSERT(selector < cDXT5SelectorValues);
|
||||
|
||||
const uint cx = x + layout.m_x_ofs;
|
||||
|
||||
block_selectors[cx >> 2][cy >> 2][(cx & 3) + (cy & 3) * 4] = static_cast<uint8>(selector);
|
||||
} // x
|
||||
} // y
|
||||
} // tile_index
|
||||
|
||||
vec16F v;
|
||||
for (uint y = 0; y < cChunkBlockHeight; y++) {
|
||||
for (uint x = 0; x < cChunkBlockWidth; x++) {
|
||||
for (uint i = 0; i < cBlockPixelWidth * cBlockPixelHeight; i++) {
|
||||
uint s = block_selectors[x][y][i];
|
||||
|
||||
float f;
|
||||
|
||||
if (comp_chunk_index == cColorChunks) {
|
||||
CRNLIB_ASSERT(s < cDXT1SelectorValues);
|
||||
f = (g_dxt1_to_linear[s] + .5f) * 1.0f / 4.0f;
|
||||
} else {
|
||||
CRNLIB_ASSERT(s < cDXT5SelectorValues);
|
||||
f = (g_dxt5_to_linear[s] + .5f) * 1.0f / 8.0f;
|
||||
}
|
||||
|
||||
CRNLIB_ASSERT((f >= 0.0f) && (f <= 1.0f));
|
||||
|
||||
v[i] = f;
|
||||
} // i
|
||||
|
||||
selector_vq.add_training_vec(v, block_weight[x][y]);
|
||||
|
||||
training_vecs[comp_chunk_index][x + y * 2][chunk_index] = v;
|
||||
} // x
|
||||
} // y
|
||||
|
||||
} // chunk_index
|
||||
|
||||
} // comp_chunk_index
|
||||
|
||||
timer t;
|
||||
t.start();
|
||||
|
||||
selector_vq.generate_codebook(alpha_blocks ? m_params.m_alpha_selector_codebook_size : m_params.m_color_selector_codebook_size);
|
||||
|
||||
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
||||
if (m_params.m_debugging) {
|
||||
double total_time = t.get_elapsed_secs();
|
||||
console::info("Codebook gen time: %3.3fs, Selector codebook size: %u", total_time, selector_vq.get_codebook_size());
|
||||
}
|
||||
#endif
|
||||
|
||||
selectors_vec& selectors_cb = alpha_blocks ? m_alpha_selectors : m_color_selectors;
|
||||
|
||||
selectors_cb.resize(selector_vq.get_codebook_size());
|
||||
|
||||
for (uint i = 0; i < selector_vq.get_codebook_size(); i++) {
|
||||
const vec16F& v = selector_vq.get_codebook_entry(i);
|
||||
|
||||
for (uint j = 0; j < cBlockPixelWidth * cBlockPixelHeight; j++) {
|
||||
int s;
|
||||
if (alpha_blocks) {
|
||||
s = math::clamp<int>(static_cast<int>(v[j] * 8.0f), 0, 7);
|
||||
s = g_dxt5_from_linear[s];
|
||||
} else {
|
||||
s = math::clamp<int>(static_cast<int>(v[j] * 4.0f), 0, 3);
|
||||
s = g_dxt1_from_linear[s];
|
||||
}
|
||||
|
||||
selectors_cb[i].m_selectors[j >> 2][j & 3] = static_cast<uint8>(s);
|
||||
} // j
|
||||
} // i
|
||||
for (uint j = 0; j < 16; j++)
|
||||
selectors_cb[i].m_selectors[j >> 2][j & 3] = alpha_blocks ? g_dxt5_from_linear[(int)(v[j] * 8.0f)] : g_dxt1_from_linear[(int)(v[j] * 4.0f)];
|
||||
}
|
||||
|
||||
chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors = alpha_blocks ? m_chunk_blocks_using_alpha_selectors : m_chunk_blocks_using_color_selectors;
|
||||
|
||||
chunk_blocks_using_selectors.clear();
|
||||
chunk_blocks_using_selectors.resize(selectors_cb.size());
|
||||
|
||||
create_selector_codebook_state state(*this, alpha_blocks, comp_index_start, comp_index_end, selector_vq, chunk_blocks_using_selectors, selectors_cb);
|
||||
create_selector_codebook_state state(*this, alpha_blocks, c_start, c_end, selector_vq, chunk_blocks_using_selectors, selectors_cb);
|
||||
|
||||
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
||||
m_pTask_pool->queue_object_task(this, &dxt_hc::create_selector_codebook_task, i, &state);
|
||||
@@ -1819,22 +1815,11 @@ bool dxt_hc::refine_quantized_alpha_endpoints() {
|
||||
}
|
||||
|
||||
bool dxt_hc::create_block_encodings(const params& p) {
|
||||
crnlib::vector<endpoint_indices_details>& m_endpoint_indices = *p.m_endpoint_indices;
|
||||
crnlib::vector<selector_indices_details>& m_selector_indices = *p.m_selector_indices;
|
||||
crnlib::vector<endpoint_indices_details>& endpoint_indices = *p.m_endpoint_indices;
|
||||
crnlib::vector<selector_indices_details>& selector_indices = *p.m_selector_indices;
|
||||
|
||||
uint8 tile_map[8][2][2] = {
|
||||
{{ 0, 0 }, { 0, 0 }},
|
||||
{{ 0, 0 }, { 1, 1 }},
|
||||
{{ 0, 1 }, { 0, 1 }},
|
||||
{{ 0, 0 }, { 1, 2 }},
|
||||
{{ 1, 2 }, { 0, 0 }},
|
||||
{{ 0, 1 }, { 0, 2 }},
|
||||
{{ 1, 0 }, { 2, 0 }},
|
||||
{{ 0, 1 }, { 2, 3 }},
|
||||
};
|
||||
|
||||
m_endpoint_indices.resize(m_num_chunks << 2);
|
||||
m_selector_indices.resize(m_num_chunks << 2);
|
||||
endpoint_indices.resize(m_num_chunks << 2);
|
||||
selector_indices.resize(m_num_chunks << 2);
|
||||
bool hasBlocks[cNumCompressedChunkVecs] = {m_has_color_blocks, m_num_alpha_blocks > 0, m_num_alpha_blocks > 1};
|
||||
|
||||
for (uint level = 0; level < p.m_num_levels; level++) {
|
||||
@@ -1851,14 +1836,14 @@ bool dxt_hc::create_block_encodings(const params& p) {
|
||||
for (uint c = 0; c < cNumCompressedChunkVecs; c++) {
|
||||
if (hasBlocks[c]) {
|
||||
const compressed_chunk& chunk = m_compressed_chunks[c][chunk_base + cx];
|
||||
uint16 endpoint_index = chunk.m_quantized_tiles[tile_map[chunk.m_encoding_index][by][bx]].m_endpoint_cluster_index;
|
||||
left_match = left_match && endpoint_index == m_endpoint_indices[b - 1].component[c];
|
||||
top_match = top_match && endpoint_index == m_endpoint_indices[b - block_width].component[c];
|
||||
m_endpoint_indices[b].component[c] = endpoint_index;
|
||||
m_selector_indices[b].component[c] = chunk.m_selector_cluster_index[by][bx];
|
||||
uint16 endpoint_index = chunk.m_quantized_tiles[g_tile_map[chunk.m_encoding_index][by][bx]].m_endpoint_cluster_index;
|
||||
left_match = left_match && endpoint_index == endpoint_indices[b - 1].component[c];
|
||||
top_match = top_match && endpoint_index == endpoint_indices[b - block_width].component[c];
|
||||
endpoint_indices[b].component[c] = endpoint_index;
|
||||
selector_indices[b].component[c] = chunk.m_selector_cluster_index[by][bx];
|
||||
}
|
||||
}
|
||||
m_endpoint_indices[b].reference = left_match ? 1 : top_match ? 2 : 0;
|
||||
endpoint_indices[b].reference = left_match ? 1 : top_match ? 2 : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+10
-2
@@ -45,6 +45,15 @@ class dxt_hc {
|
||||
};
|
||||
};
|
||||
|
||||
struct chunk_details {
|
||||
uint block_index[2][2];
|
||||
};
|
||||
crnlib::vector<chunk_details> m_chunk_details;
|
||||
|
||||
crnlib::vector<uint64> m_block_selectors[3];
|
||||
crnlib::vector<crnlib::vector<color_quad_u8>> m_blocks;
|
||||
crnlib::vector<endpoint_indices_details> m_endpoint_indices;
|
||||
|
||||
struct pixel_chunk {
|
||||
pixel_chunk() { clear(); }
|
||||
|
||||
@@ -207,8 +216,6 @@ class dxt_hc {
|
||||
uint m_first_endpoint;
|
||||
uint m_second_endpoint;
|
||||
|
||||
uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight];
|
||||
|
||||
uint8 m_pixel_width;
|
||||
uint8 m_pixel_height;
|
||||
|
||||
@@ -377,6 +384,7 @@ class dxt_hc {
|
||||
bool refine_quantized_color_selectors();
|
||||
bool refine_quantized_alpha_endpoints();
|
||||
bool refine_quantized_alpha_selectors();
|
||||
bool initialize_blocks(const params& p);
|
||||
bool create_block_encodings(const params& p);
|
||||
bool update_progress(uint phase_index, uint subphase_index, uint subphase_total);
|
||||
bool compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks);
|
||||
|
||||
Reference in New Issue
Block a user