Move alpha endpoint refinement into the alpha endpoint optimization thread
This change improves compression speed when using alpha channel. Explanation: As the alpha endpoint refinement does not depend on the alpha selector codebook computation, it can be safely moved into the alpha endpoint optimization thread. Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch. [Compressing Kodak set without mipmaps] Original: 1582222 bytes / 28.912 sec Modified: 1494501 bytes / 24.128 sec Improvement: 5.54% (compression ratio) / 16.55% (compression time) [Compressing Kodak set with mipmaps] Original: 2065243 bytes / 36.985 sec Modified: 1945365 bytes / 31.741 sec Improvement: 5.80% (compression ratio) / 14.18% (compression time)
This commit is contained in:
Binary file not shown.
+40
-154
@@ -853,12 +853,19 @@ bool dxt_hc::determine_alpha_endpoint_clusters() {
|
||||
return false;
|
||||
|
||||
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
||||
uint component_index = m_params.m_alpha_component_indices[a];
|
||||
for (uint i = 0; i < m_num_chunks; i++) {
|
||||
int chunk_index = m_pChunks[i].m_legacy_index;
|
||||
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
|
||||
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
||||
const uint cluster_index = chunk.m_endpoint_cluster_index[tile_index];
|
||||
m_alpha_clusters[cluster_index].m_tiles.push_back(std::make_pair(chunk_index, tile_index | (a << 16)));
|
||||
|
||||
const compressed_tile& tile = chunk.m_tiles[tile_index];
|
||||
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
|
||||
for (uint y = 0; y < layout.m_height; y++)
|
||||
for (uint x = 0; x < layout.m_width; x++)
|
||||
m_alpha_clusters[cluster_index].m_pixels.push_back(color_quad_u8(m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[component_index]));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -990,16 +997,8 @@ bool dxt_hc::determine_color_endpoint_codebook() {
|
||||
|
||||
void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr) {
|
||||
pData_ptr;
|
||||
|
||||
const uint thread_index = static_cast<uint>(data);
|
||||
|
||||
crnlib::vector<color_quad_u8> pixels;
|
||||
pixels.reserve(512);
|
||||
|
||||
crnlib::vector<uint8> selectors;
|
||||
selectors.reserve(512);
|
||||
|
||||
uint total_empty_clusters = 0;
|
||||
for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) {
|
||||
if (m_canceled)
|
||||
return;
|
||||
@@ -1015,63 +1014,44 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
|
||||
}
|
||||
|
||||
tile_cluster& cluster = m_alpha_clusters[cluster_index];
|
||||
if (cluster.m_tiles.empty()) {
|
||||
total_empty_clusters++;
|
||||
if (cluster.m_pixels.empty())
|
||||
continue;
|
||||
}
|
||||
|
||||
pixels.resize(0);
|
||||
|
||||
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
||||
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
||||
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
||||
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
||||
|
||||
CRNLIB_ASSERT(chunk_index < m_num_chunks);
|
||||
CRNLIB_ASSERT(tile_index < cChunkMaxTiles);
|
||||
CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks);
|
||||
|
||||
const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
||||
|
||||
CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index);
|
||||
|
||||
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
|
||||
const compressed_tile& tile = chunk.m_tiles[tile_index];
|
||||
|
||||
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
|
||||
|
||||
color_quad_u8 c(cClear);
|
||||
|
||||
for (uint y = 0; y < layout.m_height; y++) {
|
||||
for (uint x = 0; x < layout.m_width; x++) {
|
||||
c[0] = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[m_params.m_alpha_component_indices[alpha_index]];
|
||||
|
||||
pixels.push_back(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
selectors.resize(pixels.size());
|
||||
cluster.m_selectors.resize(cluster.m_pixels.size());
|
||||
|
||||
dxt5_endpoint_optimizer::params params;
|
||||
params.m_block_index = cluster_index;
|
||||
params.m_pPixels = &pixels[0];
|
||||
params.m_num_pixels = pixels.size();
|
||||
params.m_pPixels = cluster.m_pixels.get_ptr();
|
||||
params.m_num_pixels = cluster.m_pixels.size();
|
||||
params.m_comp_index = 0;
|
||||
params.m_quality = cCRNDXTQualityUber;
|
||||
params.m_use_both_block_types = false;
|
||||
|
||||
dxt5_endpoint_optimizer::results results;
|
||||
results.m_pSelectors = &selectors[0];
|
||||
results.m_pSelectors = cluster.m_selectors.get_ptr();
|
||||
|
||||
dxt5_endpoint_optimizer optimizer;
|
||||
const bool all_transparent = optimizer.compute(params, results);
|
||||
all_transparent;
|
||||
optimizer.compute(params, results);
|
||||
|
||||
cluster.m_first_endpoint = results.m_first_endpoint;
|
||||
cluster.m_second_endpoint = results.m_second_endpoint;
|
||||
cluster.m_error = results.m_error;
|
||||
|
||||
dxt_endpoint_refiner refiner;
|
||||
dxt_endpoint_refiner::params p;
|
||||
dxt_endpoint_refiner::results r;
|
||||
p.m_perceptual = m_params.m_perceptual;
|
||||
p.m_pSelectors = cluster.m_selectors.get_ptr();
|
||||
p.m_pPixels = cluster.m_pixels.get_ptr();
|
||||
p.m_num_pixels = cluster.m_pixels.size();
|
||||
p.m_dxt1_selectors = false;
|
||||
p.m_error_to_beat = cluster.m_error;
|
||||
p.m_block_index = cluster_index;
|
||||
cluster.m_refined.result = refiner.refine(p, r);
|
||||
cluster.m_refined.first_endpoint = r.m_low_color;
|
||||
cluster.m_refined.second_endpoint = r.m_high_color;
|
||||
cluster.m_refined.error = r.m_error;
|
||||
|
||||
uint pixel_index = 0;
|
||||
|
||||
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
||||
@@ -1103,18 +1083,11 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
|
||||
quantized_tile.m_pixel_height = tile.m_pixel_height;
|
||||
quantized_tile.m_layout_index = tile.m_layout_index;
|
||||
|
||||
memcpy(quantized_tile.m_selectors, &selectors[pixel_index], total_pixels);
|
||||
memcpy(quantized_tile.m_selectors, &cluster.m_selectors[pixel_index], total_pixels);
|
||||
|
||||
pixel_index += total_pixels;
|
||||
}
|
||||
} // cluster_index
|
||||
|
||||
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
||||
if (m_params.m_debugging) {
|
||||
if (total_empty_clusters)
|
||||
console::warning("Total empty alpha clusters: %u", total_empty_clusters);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool dxt_hc::determine_alpha_endpoint_codebook() {
|
||||
@@ -1783,110 +1756,23 @@ bool dxt_hc::refine_quantized_alpha_endpoints() {
|
||||
if (!m_num_alpha_blocks)
|
||||
return true;
|
||||
|
||||
uint total_refined_tiles = 0;
|
||||
uint total_refined_pixels = 0;
|
||||
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
||||
if (m_params.m_debugging)
|
||||
console::info("Refining quantized alpha endpoints");
|
||||
#endif
|
||||
|
||||
for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) {
|
||||
if ((cluster_index & 255) == 0) {
|
||||
if (!update_progress(18, cluster_index, m_alpha_clusters.size()))
|
||||
return false;
|
||||
}
|
||||
|
||||
tile_cluster& cluster = m_alpha_clusters[cluster_index];
|
||||
|
||||
uint total_pixels = 0;
|
||||
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
||||
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
||||
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
||||
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
||||
|
||||
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
||||
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
||||
|
||||
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
|
||||
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
|
||||
|
||||
total_pixels += (tile.m_pixel_width * tile.m_pixel_height);
|
||||
}
|
||||
|
||||
if (!total_pixels)
|
||||
continue;
|
||||
|
||||
crnlib::vector<color_quad_u8> pixels;
|
||||
crnlib::vector<uint8> selectors;
|
||||
|
||||
pixels.reserve(total_pixels);
|
||||
selectors.reserve(total_pixels);
|
||||
|
||||
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
||||
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
||||
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
||||
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
||||
|
||||
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
||||
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
||||
|
||||
const pixel_chunk& src_pixels = m_pChunks[chunk_index];
|
||||
|
||||
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
|
||||
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
|
||||
|
||||
const chunk_tile_desc& tile_layout = g_chunk_tile_layouts[tile.m_layout_index];
|
||||
|
||||
for (uint y = 0; y < tile.m_pixel_height; y++) {
|
||||
for (uint x = 0; x < tile.m_pixel_width; x++) {
|
||||
selectors.push_back(tile.m_selectors[x + y * tile.m_pixel_width]);
|
||||
|
||||
pixels.push_back(color_quad_u8(src_pixels(x + tile_layout.m_x_ofs, y + tile_layout.m_y_ofs)[m_params.m_alpha_component_indices[alpha_index]]));
|
||||
}
|
||||
if (cluster.m_refined.result) {
|
||||
cluster.m_error = cluster.m_refined.error;
|
||||
cluster.m_first_endpoint = cluster.m_refined.first_endpoint;
|
||||
cluster.m_second_endpoint = cluster.m_refined.second_endpoint;
|
||||
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
||||
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
||||
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
||||
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
||||
compressed_tile& tile = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index].m_quantized_tiles[tile_index];
|
||||
tile.m_first_endpoint = cluster.m_first_endpoint;
|
||||
tile.m_second_endpoint = cluster.m_second_endpoint;
|
||||
}
|
||||
}
|
||||
|
||||
dxt_endpoint_refiner refiner;
|
||||
dxt_endpoint_refiner::params p;
|
||||
dxt_endpoint_refiner::results r;
|
||||
|
||||
p.m_perceptual = m_params.m_perceptual;
|
||||
p.m_pSelectors = &selectors[0];
|
||||
p.m_pPixels = &pixels[0];
|
||||
p.m_num_pixels = total_pixels;
|
||||
p.m_dxt1_selectors = false;
|
||||
p.m_error_to_beat = cluster.m_error;
|
||||
p.m_block_index = cluster_index;
|
||||
|
||||
if (!refiner.refine(p, r))
|
||||
continue;
|
||||
|
||||
total_refined_tiles++;
|
||||
total_refined_pixels += total_pixels;
|
||||
|
||||
cluster.m_error = r.m_error;
|
||||
|
||||
cluster.m_first_endpoint = r.m_low_color;
|
||||
cluster.m_second_endpoint = r.m_high_color;
|
||||
|
||||
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
||||
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
||||
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
||||
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
||||
|
||||
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
||||
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
||||
|
||||
tile.m_first_endpoint = r.m_low_color;
|
||||
tile.m_second_endpoint = r.m_high_color;
|
||||
}
|
||||
}
|
||||
|
||||
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
||||
if (m_params.m_debugging)
|
||||
console::info("Total refined pixels: %u, endpoints: %u out of %u", total_refined_pixels, total_refined_tiles, m_alpha_clusters.size());
|
||||
#endif
|
||||
|
||||
uint cluster_count = 0;
|
||||
hash_map<uint32, uint> packed_clusters;
|
||||
for (uint i = 0; i < m_alpha_clusters.size(); i++) {
|
||||
|
||||
Reference in New Issue
Block a user