Move alpha endpoint refinement into the alpha endpoint optimization thread

This change improves compression speed when using alpha channel.

Explanation:
As the alpha endpoint refinement does not depend on the alpha selector codebook computation, it can be safely moved into the alpha endpoint optimization thread.

Testing:
The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch.

[Compressing Kodak set without mipmaps]
Original: 1582222 bytes / 28.912 sec
Modified: 1494501 bytes / 24.128 sec
Improvement: 5.54% (compression ratio) / 16.55% (compression time)

[Compressing Kodak set with mipmaps]
Original: 2065243 bytes / 36.985 sec
Modified: 1945365 bytes / 31.741 sec
Improvement: 5.80% (compression ratio) / 14.18% (compression time)
This commit is contained in:
Alexander Suvorov
2017-05-12 14:06:53 +02:00
parent 9c289fc621
commit 1ef829ed6f
2 changed files with 40 additions and 154 deletions
Binary file not shown.
+40 -154
View File
@@ -853,12 +853,19 @@ bool dxt_hc::determine_alpha_endpoint_clusters() {
return false;
for (uint a = 0; a < m_num_alpha_blocks; a++) {
uint component_index = m_params.m_alpha_component_indices[a];
for (uint i = 0; i < m_num_chunks; i++) {
int chunk_index = m_pChunks[i].m_legacy_index;
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
const uint cluster_index = chunk.m_endpoint_cluster_index[tile_index];
m_alpha_clusters[cluster_index].m_tiles.push_back(std::make_pair(chunk_index, tile_index | (a << 16)));
const compressed_tile& tile = chunk.m_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
for (uint y = 0; y < layout.m_height; y++)
for (uint x = 0; x < layout.m_width; x++)
m_alpha_clusters[cluster_index].m_pixels.push_back(color_quad_u8(m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[component_index]));
}
}
}
@@ -990,16 +997,8 @@ bool dxt_hc::determine_color_endpoint_codebook() {
void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr) {
pData_ptr;
const uint thread_index = static_cast<uint>(data);
crnlib::vector<color_quad_u8> pixels;
pixels.reserve(512);
crnlib::vector<uint8> selectors;
selectors.reserve(512);
uint total_empty_clusters = 0;
for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) {
if (m_canceled)
return;
@@ -1015,63 +1014,44 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
}
tile_cluster& cluster = m_alpha_clusters[cluster_index];
if (cluster.m_tiles.empty()) {
total_empty_clusters++;
if (cluster.m_pixels.empty())
continue;
}
pixels.resize(0);
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
CRNLIB_ASSERT(chunk_index < m_num_chunks);
CRNLIB_ASSERT(tile_index < cChunkMaxTiles);
CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks);
const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index);
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
const compressed_tile& tile = chunk.m_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
color_quad_u8 c(cClear);
for (uint y = 0; y < layout.m_height; y++) {
for (uint x = 0; x < layout.m_width; x++) {
c[0] = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[m_params.m_alpha_component_indices[alpha_index]];
pixels.push_back(c);
}
}
}
selectors.resize(pixels.size());
cluster.m_selectors.resize(cluster.m_pixels.size());
dxt5_endpoint_optimizer::params params;
params.m_block_index = cluster_index;
params.m_pPixels = &pixels[0];
params.m_num_pixels = pixels.size();
params.m_pPixels = cluster.m_pixels.get_ptr();
params.m_num_pixels = cluster.m_pixels.size();
params.m_comp_index = 0;
params.m_quality = cCRNDXTQualityUber;
params.m_use_both_block_types = false;
dxt5_endpoint_optimizer::results results;
results.m_pSelectors = &selectors[0];
results.m_pSelectors = cluster.m_selectors.get_ptr();
dxt5_endpoint_optimizer optimizer;
const bool all_transparent = optimizer.compute(params, results);
all_transparent;
optimizer.compute(params, results);
cluster.m_first_endpoint = results.m_first_endpoint;
cluster.m_second_endpoint = results.m_second_endpoint;
cluster.m_error = results.m_error;
dxt_endpoint_refiner refiner;
dxt_endpoint_refiner::params p;
dxt_endpoint_refiner::results r;
p.m_perceptual = m_params.m_perceptual;
p.m_pSelectors = cluster.m_selectors.get_ptr();
p.m_pPixels = cluster.m_pixels.get_ptr();
p.m_num_pixels = cluster.m_pixels.size();
p.m_dxt1_selectors = false;
p.m_error_to_beat = cluster.m_error;
p.m_block_index = cluster_index;
cluster.m_refined.result = refiner.refine(p, r);
cluster.m_refined.first_endpoint = r.m_low_color;
cluster.m_refined.second_endpoint = r.m_high_color;
cluster.m_refined.error = r.m_error;
uint pixel_index = 0;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
@@ -1103,18 +1083,11 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
quantized_tile.m_pixel_height = tile.m_pixel_height;
quantized_tile.m_layout_index = tile.m_layout_index;
memcpy(quantized_tile.m_selectors, &selectors[pixel_index], total_pixels);
memcpy(quantized_tile.m_selectors, &cluster.m_selectors[pixel_index], total_pixels);
pixel_index += total_pixels;
}
} // cluster_index
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging) {
if (total_empty_clusters)
console::warning("Total empty alpha clusters: %u", total_empty_clusters);
}
#endif
}
bool dxt_hc::determine_alpha_endpoint_codebook() {
@@ -1783,110 +1756,23 @@ bool dxt_hc::refine_quantized_alpha_endpoints() {
if (!m_num_alpha_blocks)
return true;
uint total_refined_tiles = 0;
uint total_refined_pixels = 0;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Refining quantized alpha endpoints");
#endif
for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) {
if ((cluster_index & 255) == 0) {
if (!update_progress(18, cluster_index, m_alpha_clusters.size()))
return false;
}
tile_cluster& cluster = m_alpha_clusters[cluster_index];
uint total_pixels = 0;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
total_pixels += (tile.m_pixel_width * tile.m_pixel_height);
}
if (!total_pixels)
continue;
crnlib::vector<color_quad_u8> pixels;
crnlib::vector<uint8> selectors;
pixels.reserve(total_pixels);
selectors.reserve(total_pixels);
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
const pixel_chunk& src_pixels = m_pChunks[chunk_index];
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
const chunk_tile_desc& tile_layout = g_chunk_tile_layouts[tile.m_layout_index];
for (uint y = 0; y < tile.m_pixel_height; y++) {
for (uint x = 0; x < tile.m_pixel_width; x++) {
selectors.push_back(tile.m_selectors[x + y * tile.m_pixel_width]);
pixels.push_back(color_quad_u8(src_pixels(x + tile_layout.m_x_ofs, y + tile_layout.m_y_ofs)[m_params.m_alpha_component_indices[alpha_index]]));
}
if (cluster.m_refined.result) {
cluster.m_error = cluster.m_refined.error;
cluster.m_first_endpoint = cluster.m_refined.first_endpoint;
cluster.m_second_endpoint = cluster.m_refined.second_endpoint;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
compressed_tile& tile = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index].m_quantized_tiles[tile_index];
tile.m_first_endpoint = cluster.m_first_endpoint;
tile.m_second_endpoint = cluster.m_second_endpoint;
}
}
dxt_endpoint_refiner refiner;
dxt_endpoint_refiner::params p;
dxt_endpoint_refiner::results r;
p.m_perceptual = m_params.m_perceptual;
p.m_pSelectors = &selectors[0];
p.m_pPixels = &pixels[0];
p.m_num_pixels = total_pixels;
p.m_dxt1_selectors = false;
p.m_error_to_beat = cluster.m_error;
p.m_block_index = cluster_index;
if (!refiner.refine(p, r))
continue;
total_refined_tiles++;
total_refined_pixels += total_pixels;
cluster.m_error = r.m_error;
cluster.m_first_endpoint = r.m_low_color;
cluster.m_second_endpoint = r.m_high_color;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
tile.m_first_endpoint = r.m_low_color;
tile.m_second_endpoint = r.m_high_color;
}
}
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Total refined pixels: %u, endpoints: %u out of %u", total_refined_pixels, total_refined_tiles, m_alpha_clusters.size());
#endif
uint cluster_count = 0;
hash_map<uint32, uint> packed_clusters;
for (uint i = 0; i < m_alpha_clusters.size(); i++) {