Files
unity/crnlib/crn_dxt_hc.cpp
T
Alexander Suvorov 13b1faa48d Reorder chunks in each scanline in the left-to-right manner
This change slightly improves compression ratio and compression time.

Explanation:
The efficiency of the Crunch encoding scheme depends on the similarity between the neighbour chunks. For this reason in original version of Crunch the order of chunks is reversed after each scanline, so that there is no jump from one side of the image to another at the image borders. The problem here is that inside of each chunk, the blocks are normally ordered in a usual up-to-down-left-to-right manner, regardless of the chunk scanning order. While on the forward scan we normally need to perform diagonal jumps (+1, +1) in order to get to the next chunk, on the reverse scan we normally need to perform much larger (-3, +1) jumps, which usually defeats the advantage of not having discontinuity at the image borders.

Note:
This modification alters the output format and makes it incompatible with the previous revisions.

Testing:
The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch.

[Compressing Kodak set without mipmaps]
Original: 1582222 bytes / 28.882 sec
Modified: 1579618 bytes / 28.743 sec
Improvement: 0.16% (compression ratio) / 0.48% (compression time)

[Compressing Kodak set with mipmaps]
Original: 2065243 bytes / 36.920 sec
Modified: 2061499 bytes / 36.833 sec
Improvement: 0.18% (compression ratio) / 0.24% (compression time)
2017-04-27 11:08:16 +02:00

2287 lines
82 KiB
C++

// File: crn_dxt_hc.cpp
// See Copyright Notice and license at the end of inc/crnlib.h
#include "crn_core.h"
#include "crn_dxt_hc.h"
#include "crn_image_utils.h"
#include "crn_console.h"
#include "crn_dxt_fast.h"
#define CRNLIB_USE_FAST_DXT 1
#define CRNLIB_ENABLE_DEBUG_MESSAGES 0
namespace crnlib {
static color_quad_u8 g_tile_layout_colors[cNumChunkTileLayouts] =
{
color_quad_u8(255, 90, 32, 255),
color_quad_u8(64, 210, 192, 255),
color_quad_u8(128, 16, 225, 255),
color_quad_u8(255, 192, 200, 255),
color_quad_u8(255, 128, 200, 255),
color_quad_u8(255, 0, 0, 255),
color_quad_u8(0, 255, 0, 255),
color_quad_u8(0, 0, 255, 255),
color_quad_u8(255, 0, 255, 255)};
dxt_hc::dxt_hc()
: m_num_chunks(0),
m_pChunks(NULL),
m_num_alpha_blocks(0),
m_has_color_blocks(false),
m_has_alpha0_blocks(false),
m_has_alpha1_blocks(false),
m_main_thread_id(crn_get_current_thread_id()),
m_canceled(false),
m_pTask_pool(NULL),
m_prev_phase_index(-1),
m_prev_percentage_complete(-1) {
utils::zero_object(m_encoding_hist);
}
dxt_hc::~dxt_hc() {
}
void dxt_hc::clear() {
m_num_chunks = 0;
m_pChunks = NULL;
m_chunk_encoding.clear();
m_num_alpha_blocks = 0;
m_has_color_blocks = false;
m_has_alpha0_blocks = false;
m_has_alpha1_blocks = false;
m_color_selectors.clear();
m_alpha_selectors.clear();
for (uint i = 0; i < cNumCompressedChunkVecs; i++)
m_compressed_chunks[i].clear();
utils::zero_object(m_encoding_hist);
m_total_tiles = 0;
m_color_clusters.clear();
m_alpha_clusters.clear();
m_color_selectors.clear();
m_alpha_selectors.clear();
m_chunk_blocks_using_color_selectors.clear();
m_chunk_blocks_using_alpha_selectors.clear();
m_color_endpoints.clear();
m_alpha_endpoints.clear();
m_dbg_chunk_pixels.clear();
m_dbg_chunk_pixels_tile_vis.clear();
m_dbg_chunk_pixels_color_quantized.clear();
m_dbg_chunk_pixels_alpha_quantized.clear();
m_dbg_chunk_pixels_quantized_color_selectors.clear();
m_dbg_chunk_pixels_orig_color_selectors.clear();
m_dbg_chunk_pixels_final_color_selectors.clear();
m_dbg_chunk_pixels_final_alpha_selectors.clear();
m_dbg_chunk_pixels_quantized_alpha_selectors.clear();
m_dbg_chunk_pixels_orig_alpha_selectors.clear();
m_dbg_chunk_pixels_final_alpha_selectors.clear();
m_dbg_chunk_pixels_final.clear();
m_canceled = false;
m_prev_phase_index = -1;
m_prev_percentage_complete = -1;
}
bool dxt_hc::compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool) {
m_pTask_pool = &task_pool;
m_main_thread_id = crn_get_current_thread_id();
bool result = compress_internal(p, num_chunks, pChunks);
m_pTask_pool = NULL;
return result;
}
bool dxt_hc::compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks) {
if ((!num_chunks) || (!pChunks))
return false;
if ((m_params.m_format == cDXT1A) || (m_params.m_format == cDXT3))
return false;
clear();
m_params = p;
m_num_chunks = num_chunks;
m_pChunks = pChunks;
switch (m_params.m_format) {
case cDXT1: {
m_has_color_blocks = true;
break;
}
case cDXT5: {
m_has_color_blocks = true;
m_has_alpha0_blocks = true;
m_num_alpha_blocks = 1;
break;
}
case cDXT5A: {
m_has_alpha0_blocks = true;
m_num_alpha_blocks = 1;
break;
}
case cDXN_XY:
case cDXN_YX: {
m_has_alpha0_blocks = true;
m_has_alpha1_blocks = true;
m_num_alpha_blocks = 2;
break;
}
default: {
return false;
}
}
determine_compressed_chunks();
if (m_has_color_blocks) {
if (!determine_color_endpoint_clusters())
return false;
if (!determine_color_endpoint_codebook())
return false;
}
if (m_num_alpha_blocks) {
if (!determine_alpha_endpoint_clusters())
return false;
if (!determine_alpha_endpoint_codebook())
return false;
}
create_quantized_debug_images();
if (m_has_color_blocks) {
if (!create_selector_codebook(false))
return false;
}
if (m_num_alpha_blocks) {
if (!create_selector_codebook(true))
return false;
}
if (m_has_color_blocks) {
if (!refine_quantized_color_selectors())
return false;
if (!refine_quantized_color_endpoints())
return false;
}
if (m_num_alpha_blocks) {
if (!refine_quantized_alpha_endpoints())
return false;
if (!refine_quantized_alpha_selectors())
return false;
}
create_final_debug_image();
if (!create_chunk_encodings())
return false;
return true;
}
void dxt_hc::compress_dxt1_block(
dxt1_endpoint_optimizer::results& results,
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
uint8* pColor_Selectors) {
chunk_index;
color_quad_u8 pixels[cChunkPixelWidth * cChunkPixelHeight];
for (uint y = 0; y < height; y++)
for (uint x = 0; x < width; x++)
pixels[x + y * width] = chunk(x_ofs + x, y_ofs + y);
//double s = image_utils::compute_std_dev(width * height, pixels, 0, 3);
#if CRNLIB_USE_FAST_DXT
uint low16, high16;
dxt_fast::compress_color_block(width * height, pixels, low16, high16, pColor_Selectors);
results.m_low_color = static_cast<uint16>(low16);
results.m_high_color = static_cast<uint16>(high16);
results.m_alpha_block = false;
results.m_error = INT_MAX;
results.m_pSelectors = pColor_Selectors;
#else
dxt1_endpoint_optimizer optimizer;
dxt1_endpoint_optimizer::params params;
params.m_block_index = chunk_index;
params.m_pPixels = pixels;
params.m_num_pixels = width * height;
params.m_pixels_have_alpha = false;
params.m_use_alpha_blocks = false;
params.m_perceptual = m_params.m_perceptual;
params.m_highest_quality = false; //false;
params.m_endpoint_caching = false;
results.m_pSelectors = pColor_Selectors;
optimizer.compute(params, results);
#endif
}
void dxt_hc::compress_dxt5_block(
dxt5_endpoint_optimizer::results& results,
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
uint8* pAlpha_selectors) {
chunk_index;
color_quad_u8 pixels[cChunkPixelWidth * cChunkPixelHeight];
for (uint y = 0; y < height; y++)
for (uint x = 0; x < width; x++)
pixels[x + y * width] = chunk(x_ofs + x, y_ofs + y);
#if 0 //CRNLIB_USE_FAST_DXT
uint low, high;
dxt_fast::compress_alpha_block(width * height, pixels, low, high, pAlpha_selectors, component_index);
results.m_pSelectors = pAlpha_selectors;
results.m_error = INT_MAX;
results.m_first_endpoint = static_cast<uint8>(low);
results.m_second_endpoint = static_cast<uint8>(high);
results.m_block_type = 0;
#else
dxt5_endpoint_optimizer optimizer;
dxt5_endpoint_optimizer::params params;
params.m_block_index = chunk_index;
params.m_pPixels = pixels;
params.m_num_pixels = width * height;
params.m_comp_index = component_index;
params.m_use_both_block_types = false;
params.m_quality = cCRNDXTQualityNormal;
results.m_pSelectors = pAlpha_selectors;
optimizer.compute(params, results);
#endif
}
void dxt_hc::determine_compressed_chunks_task(uint64 data, void* pData_ptr) {
pData_ptr;
const uint thread_index = static_cast<uint>(data);
image_u8 orig_chunk;
image_u8 decomp_chunk[cNumChunkEncodings];
orig_chunk.resize(cChunkPixelWidth, cChunkPixelHeight);
for (uint i = 0; i < cNumChunkEncodings; i++)
decomp_chunk[i].resize(cChunkPixelWidth, cChunkPixelHeight);
image_utils::error_metrics color_error_metrics[cNumChunkEncodings];
dxt1_endpoint_optimizer::results color_optimizer_results[cNumChunkTileLayouts];
uint8 layout_color_selectors[cNumChunkTileLayouts][cChunkPixelWidth * cChunkPixelHeight];
image_utils::error_metrics alpha_error_metrics[2][cNumChunkEncodings];
dxt5_endpoint_optimizer::results alpha_optimizer_results[2][cNumChunkTileLayouts];
uint8 layout_alpha_selectors[2][cNumChunkTileLayouts][cChunkPixelWidth * cChunkPixelHeight];
uint first_layout = 0;
uint last_layout = cNumChunkTileLayouts;
uint first_encoding = 0;
uint last_encoding = cNumChunkEncodings;
if (!m_params.m_hierarchical) {
first_layout = cFirst4x4ChunkTileLayout;
first_encoding = cNumChunkEncodings - 1;
}
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if (m_canceled)
return;
if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 511) == 0)) {
if (!update_progress(0, chunk_index, m_num_chunks))
return;
}
if (m_pTask_pool->get_num_threads()) {
if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
uint level_index = 0;
for (uint i = 0; i < m_params.m_num_levels; i++) {
if ((chunk_index >= m_params.m_levels[i].m_first_chunk) && (chunk_index < m_params.m_levels[i].m_first_chunk + m_params.m_levels[i].m_num_chunks)) {
level_index = i;
break;
}
}
for (uint cy = 0; cy < cChunkPixelHeight; cy++)
for (uint cx = 0; cx < cChunkPixelWidth; cx++)
orig_chunk(cx, cy) = m_pChunks[chunk_index](cx, cy);
if (m_has_color_blocks) {
for (uint l = first_layout; l < last_layout; l++) {
utils::zero_object(layout_color_selectors[l]);
compress_dxt1_block(
color_optimizer_results[l], chunk_index,
orig_chunk,
g_chunk_tile_layouts[l].m_x_ofs, g_chunk_tile_layouts[l].m_y_ofs,
g_chunk_tile_layouts[l].m_width, g_chunk_tile_layouts[l].m_height,
layout_color_selectors[l]);
}
}
float alpha_layout_std_dev[2][cNumChunkTileLayouts];
utils::zero_object(alpha_layout_std_dev);
for (uint a = 0; a < m_num_alpha_blocks; a++) {
for (uint l = first_layout; l < last_layout; l++) {
utils::zero_object(layout_alpha_selectors[a][l]);
compress_dxt5_block(
alpha_optimizer_results[a][l], chunk_index,
orig_chunk,
g_chunk_tile_layouts[l].m_x_ofs, g_chunk_tile_layouts[l].m_y_ofs,
g_chunk_tile_layouts[l].m_width, g_chunk_tile_layouts[l].m_height,
m_params.m_alpha_component_indices[a],
layout_alpha_selectors[a][l]);
for (uint a = 0; a < m_num_alpha_blocks; a++) {
float mean = 0.0f;
float variance = 0.0f;
for (uint cy = 0; cy < g_chunk_tile_layouts[l].m_height; cy++) {
for (uint cx = 0; cx < g_chunk_tile_layouts[l].m_width; cx++) {
uint s = orig_chunk(cx + g_chunk_tile_layouts[l].m_x_ofs, cy + g_chunk_tile_layouts[l].m_y_ofs)[m_params.m_alpha_component_indices[a]];
mean += s;
variance += s * s;
} // cx
} //cy
float scale = 1.0f / (g_chunk_tile_layouts[l].m_width * g_chunk_tile_layouts[l].m_height);
mean *= scale;
variance *= scale;
variance -= mean * mean;
alpha_layout_std_dev[a][l] = sqrt(variance);
} //a
}
}
for (uint e = first_encoding; e < last_encoding; e++) {
for (uint t = 0; t < g_chunk_encodings[e].m_num_tiles; t++) {
const uint layout_index = g_chunk_encodings[e].m_tiles[t].m_layout_index;
CRNLIB_ASSERT((layout_index >= first_layout) && (layout_index < last_layout));
if (m_has_color_blocks) {
const dxt1_endpoint_optimizer::results& color_results = color_optimizer_results[layout_index];
const uint8* pColor_selectors = layout_color_selectors[layout_index];
color_quad_u8 block_colors[cDXT1SelectorValues];
CRNLIB_ASSERT(color_results.m_low_color >= color_results.m_high_color);
// it's okay if color_results.m_low_color == color_results.m_high_color, because in this case only selector 0 should be used
dxt1_block::get_block_colors4(block_colors, color_results.m_low_color, color_results.m_high_color);
for (uint cy = 0; cy < g_chunk_encodings[e].m_tiles[t].m_height; cy++) {
for (uint cx = 0; cx < g_chunk_encodings[e].m_tiles[t].m_width; cx++) {
uint s = pColor_selectors[cx + cy * g_chunk_encodings[e].m_tiles[t].m_width];
CRNLIB_ASSERT(s < cDXT1SelectorValues);
decomp_chunk[e](cx + g_chunk_encodings[e].m_tiles[t].m_x_ofs, cy + g_chunk_encodings[e].m_tiles[t].m_y_ofs) = block_colors[s];
}
}
}
for (uint a = 0; a < m_num_alpha_blocks; a++) {
const dxt5_endpoint_optimizer::results& alpha_results = alpha_optimizer_results[a][layout_index];
const uint8* pAlpha_selectors = layout_alpha_selectors[a][layout_index];
uint block_values[cDXT5SelectorValues];
CRNLIB_ASSERT(alpha_results.m_first_endpoint >= alpha_results.m_second_endpoint);
dxt5_block::get_block_values8(block_values, alpha_results.m_first_endpoint, alpha_results.m_second_endpoint);
for (uint cy = 0; cy < g_chunk_encodings[e].m_tiles[t].m_height; cy++) {
for (uint cx = 0; cx < g_chunk_encodings[e].m_tiles[t].m_width; cx++) {
uint s = pAlpha_selectors[cx + cy * g_chunk_encodings[e].m_tiles[t].m_width];
CRNLIB_ASSERT(s < cDXT5SelectorValues);
decomp_chunk[e](cx + g_chunk_encodings[e].m_tiles[t].m_x_ofs, cy + g_chunk_encodings[e].m_tiles[t].m_y_ofs)[m_params.m_alpha_component_indices[a]] =
static_cast<uint8>(block_values[s]);
}
}
}
} // t
if (m_params.m_hierarchical) {
if (m_has_color_blocks)
color_error_metrics[e].compute(decomp_chunk[e], orig_chunk, 0, 3);
for (uint a = 0; a < m_num_alpha_blocks; a++)
alpha_error_metrics[a][e].compute(decomp_chunk[e], orig_chunk, m_params.m_alpha_component_indices[a], 1);
}
} // e
uint best_encoding = cNumChunkEncodings - 1;
if (m_params.m_hierarchical) {
float quality[cNumChunkEncodings];
utils::zero_object(quality);
float best_quality = 0.0f;
best_encoding = 0;
for (uint e = 0; e < cNumChunkEncodings; e++) {
if (m_has_color_blocks) {
float adaptive_tile_color_psnr_derating = m_params.m_adaptive_tile_color_psnr_derating;
if ((level_index) && (adaptive_tile_color_psnr_derating > .25f)) {
//adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, (level_index - 1) / math::maximum(1.0f, float(m_params.m_num_levels - 2)));
adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast<float>(level_index)));
}
float color_derating = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
quality[e] = (float)math::maximum<double>(color_error_metrics[e].mPeakSNR - color_derating, 0.0f);
}
if (m_num_alpha_blocks) {
quality[e] *= m_params.m_adaptive_tile_color_alpha_weighting_ratio;
float alpha_derating = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
float max_std_dev = 0.0f;
for (uint a = 0; a < m_num_alpha_blocks; a++) {
quality[e] += (float)math::maximum<double>(alpha_error_metrics[a][e].mPeakSNR - alpha_derating, 0.0f);
for (uint t = 0; t < g_chunk_encodings[e].m_num_tiles; t++) {
float std_dev = alpha_layout_std_dev[a][g_chunk_encodings[e].m_tiles[t].m_layout_index];
max_std_dev = math::maximum(max_std_dev, std_dev);
}
}
#if 0
// rg [4/28/09] - disabling this because it's fucking up dxt5_xgbr normal maps
const float l = 6.0f;
const float k = .5f;
if (max_std_dev > l)
{
float s = max_std_dev - l;
quality[e] -= (k * s);
}
#endif
}
if (quality[e] > best_quality) {
best_quality = quality[e];
best_encoding = e;
}
}
}
atomic_increment32(&m_encoding_hist[best_encoding]);
atomic_exchange_add32(&m_total_tiles, g_chunk_encodings[best_encoding].m_num_tiles);
for (uint q = 0; q < cNumCompressedChunkVecs; q++) {
if (q == cColorChunks) {
if (!m_has_color_blocks)
continue;
} else if (q > m_num_alpha_blocks)
continue;
compressed_chunk& output = m_compressed_chunks[q][chunk_index];
output.m_encoding_index = static_cast<uint8>(best_encoding);
output.m_num_tiles = static_cast<uint8>(g_chunk_encodings[best_encoding].m_num_tiles);
for (uint t = 0; t < g_chunk_encodings[best_encoding].m_num_tiles; t++) {
const uint layout_index = g_chunk_encodings[best_encoding].m_tiles[t].m_layout_index;
output.m_tiles[t].m_layout_index = static_cast<uint8>(layout_index);
output.m_tiles[t].m_pixel_width = static_cast<uint8>(g_chunk_encodings[best_encoding].m_tiles[t].m_width);
output.m_tiles[t].m_pixel_height = static_cast<uint8>(g_chunk_encodings[best_encoding].m_tiles[t].m_height);
if (q == cColorChunks) {
const dxt1_endpoint_optimizer::results& color_results = color_optimizer_results[layout_index];
const uint8* pColor_selectors = layout_color_selectors[layout_index];
output.m_tiles[t].m_endpoint_cluster_index = 0;
output.m_tiles[t].m_first_endpoint = color_results.m_low_color;
output.m_tiles[t].m_second_endpoint = color_results.m_high_color;
memcpy(output.m_tiles[t].m_selectors, pColor_selectors, cChunkPixelWidth * cChunkPixelHeight);
output.m_tiles[t].m_alpha_encoding = color_results.m_alpha_block;
} else {
const uint a = q - cAlpha0Chunks;
const dxt5_endpoint_optimizer::results& alpha_results = alpha_optimizer_results[a][layout_index];
const uint8* pAlpha_selectors = layout_alpha_selectors[a][layout_index];
output.m_tiles[t].m_endpoint_cluster_index = 0;
output.m_tiles[t].m_first_endpoint = alpha_results.m_first_endpoint;
output.m_tiles[t].m_second_endpoint = alpha_results.m_second_endpoint;
memcpy(output.m_tiles[t].m_selectors, pAlpha_selectors, cChunkPixelWidth * cChunkPixelHeight);
output.m_tiles[t].m_alpha_encoding = alpha_results.m_block_type != 0;
}
} // t
} // q
if (m_params.m_debugging) {
for (uint y = 0; y < cChunkPixelHeight; y++)
for (uint x = 0; x < cChunkPixelWidth; x++)
m_dbg_chunk_pixels[chunk_index](x, y) = decomp_chunk[best_encoding](x, y);
for (uint t = 0; t < g_chunk_encodings[best_encoding].m_num_tiles; t++) {
const uint layout_index = g_chunk_encodings[best_encoding].m_tiles[t].m_layout_index;
const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[layout_index];
for (uint ty = 0; ty < tile_desc.m_height; ty++)
for (uint tx = 0; tx < tile_desc.m_width; tx++)
m_dbg_chunk_pixels_tile_vis[chunk_index](tile_desc.m_x_ofs + tx, tile_desc.m_y_ofs + ty) = g_tile_layout_colors[layout_index];
}
}
} // chunk_index
}
bool dxt_hc::determine_compressed_chunks() {
utils::zero_object(m_encoding_hist);
for (uint i = 0; i < cNumCompressedChunkVecs; i++)
m_compressed_chunks[i].clear();
if (m_has_color_blocks)
m_compressed_chunks[cColorChunks].resize(m_num_chunks);
for (uint a = 0; a < m_num_alpha_blocks; a++)
m_compressed_chunks[cAlpha0Chunks + a].resize(m_num_chunks);
if (m_params.m_debugging) {
m_dbg_chunk_pixels.resize(m_num_chunks);
m_dbg_chunk_pixels_tile_vis.resize(m_num_chunks);
for (uint i = 0; i < m_num_chunks; i++) {
m_dbg_chunk_pixels[i].clear();
m_dbg_chunk_pixels_tile_vis[i].clear();
}
}
m_total_tiles = 0;
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &dxt_hc::determine_compressed_chunks_task, i);
m_pTask_pool->join();
if (m_canceled)
return false;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging) {
console::info("Total Pixels: %u, Chunks: %u, Blocks: %u, Adapted Tiles: %u", m_num_chunks * cChunkPixelWidth * cChunkPixelHeight, m_num_chunks, m_num_chunks * cChunkBlockWidth * cChunkBlockHeight, m_total_tiles);
console::info("Chunk encoding type symbol_histogram: ");
for (uint e = 0; e < cNumChunkEncodings; e++)
console::info("%u ", m_encoding_hist[e]);
console::info("Blocks per chunk encoding type: ");
for (uint e = 0; e < cNumChunkEncodings; e++)
console::info("%u ", m_encoding_hist[e] * cChunkBlockWidth * cChunkBlockHeight);
}
#endif
return true;
}
void dxt_hc::assign_color_endpoint_clusters_task(uint64 data, void* pData_ptr) {
const uint thread_index = (uint)data;
assign_color_endpoint_clusters_state& state = *static_cast<assign_color_endpoint_clusters_state*>(pData_ptr);
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if (m_canceled)
return;
if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 63) == 0)) {
if (!update_progress(2, chunk_index, m_num_chunks))
return;
}
if (m_pTask_pool->get_num_threads()) {
if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
uint cluster_index = state.m_vq.find_best_codebook_entry_fs(state.m_training_vecs[chunk_index][tile_index]);
chunk.m_endpoint_cluster_index[tile_index] = static_cast<uint16>(cluster_index);
}
}
}
bool dxt_hc::determine_color_endpoint_clusters() {
if (!m_has_color_blocks)
return true;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Generating color training vectors");
#endif
const float r_scale = .5f;
const float b_scale = .25f;
vec6F_tree_vq vq;
crnlib::vector<crnlib::vector<vec6F> > training_vecs;
training_vecs.resize(m_num_chunks);
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if ((chunk_index & 255) == 0) {
if (!update_progress(1, chunk_index, m_num_chunks))
return false;
}
const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
training_vecs[chunk_index].resize(chunk.m_num_tiles);
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
const compressed_tile& tile = chunk.m_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
tree_clusterizer<vec3F> palettizer;
for (uint y = 0; y < layout.m_height; y++) {
for (uint x = 0; x < layout.m_width; x++) {
const color_quad_u8& c = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y);
vec3F v;
if (m_params.m_perceptual) {
v.set(c[0] * 1.0f / 255.0f, c[1] * 1.0f / 255.0f, c[2] * 1.0f / 255.0f);
v[0] *= r_scale;
v[2] *= b_scale;
} else {
v.set(c[0] * 1.0f / 255.0f, c[1] * 1.0f / 255.0f, c[2] * 1.0f / 255.0f);
}
palettizer.add_training_vec(v, 1);
}
}
palettizer.generate_codebook(2);
uint tile_weight = tile.m_pixel_width * tile.m_pixel_height;
tile_weight = static_cast<uint>(tile_weight * m_pChunks[chunk_index].m_weight);
vec3F v[2];
utils::zero_object(v);
for (uint i = 0; i < palettizer.get_codebook_size(); i++)
v[i] = palettizer.get_codebook_entry(i);
if (palettizer.get_codebook_size() == 1)
v[1] = v[0];
if (v[0].length() > v[1].length())
utils::swap(v[0], v[1]);
vec6F vv;
for (uint i = 0; i < 2; i++) {
vv[i * 3 + 0] = v[i][0];
vv[i * 3 + 1] = v[i][1];
vv[i * 3 + 2] = v[i][2];
}
vq.add_training_vec(vv, tile_weight);
training_vecs[chunk_index][tile_index] = vv;
}
}
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Begin color cluster analysis");
timer t;
t.start();
#endif
uint codebook_size = math::minimum<uint>(m_total_tiles, m_params.m_color_endpoint_codebook_size);
vq.generate_codebook(codebook_size);
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging) {
double total_time = t.get_elapsed_secs();
console::info("Codebook gen time: %3.3fs, Total color clusters: %u", total_time, vq.get_codebook_size());
}
#endif
m_color_clusters.resize(vq.get_codebook_size());
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Begin color cluster assignment");
#endif
assign_color_endpoint_clusters_state state(vq, training_vecs);
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &dxt_hc::assign_color_endpoint_clusters_task, i, &state);
m_pTask_pool->join();
if (m_canceled)
return false;
for (uint i = 0; i < m_num_chunks; i++) {
int chunk_index = m_pChunks[i].m_legacy_index;
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
uint cluster_index = chunk.m_endpoint_cluster_index[tile_index];
m_color_clusters[cluster_index].m_tiles.push_back(std::make_pair(chunk_index, tile_index));
}
}
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Completed color cluster assignment");
#endif
return true;
}
void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) {
const uint thread_index = static_cast<uint>(data);
const determine_alpha_endpoint_clusters_state& state = *static_cast<determine_alpha_endpoint_clusters_state*>(pData_ptr);
for (uint a = 0; a < m_num_alpha_blocks; a++) {
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if (m_canceled)
return;
if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 63) == 0)) {
if (!update_progress(7, m_num_chunks * a + chunk_index, m_num_chunks * m_num_alpha_blocks))
return;
}
if (m_pTask_pool->get_num_threads()) {
if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
uint cluster_index = state.m_vq.find_best_codebook_entry_fs(state.m_training_vecs[a][chunk_index][tile_index]);
chunk.m_endpoint_cluster_index[tile_index] = static_cast<uint16>(cluster_index);
}
}
}
}
bool dxt_hc::determine_alpha_endpoint_clusters() {
if (!m_num_alpha_blocks)
return true;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Generating alpha training vectors");
#endif
determine_alpha_endpoint_clusters_state state;
for (uint a = 0; a < m_num_alpha_blocks; a++) {
state.m_training_vecs[a].resize(m_num_chunks);
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if ((chunk_index & 63) == 0) {
if (!update_progress(6, m_num_chunks * a + chunk_index, m_num_chunks * m_num_alpha_blocks))
return false;
}
const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
state.m_training_vecs[a][chunk_index].resize(chunk.m_num_tiles);
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
const compressed_tile& tile = chunk.m_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
tree_clusterizer<vec1F> palettizer;
for (uint y = 0; y < layout.m_height; y++) {
for (uint x = 0; x < layout.m_width; x++) {
uint c = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[m_params.m_alpha_component_indices[a]];
vec1F v(c * 1.0f / 255.0f);
palettizer.add_training_vec(v, 1);
}
}
palettizer.generate_codebook(2);
const uint tile_weight = tile.m_pixel_width * tile.m_pixel_height;
vec1F v[2];
utils::zero_object(v);
for (uint i = 0; i < palettizer.get_codebook_size(); i++)
v[i] = palettizer.get_codebook_entry(i);
if (palettizer.get_codebook_size() == 1)
v[1] = v[0];
if (v[0] > v[1])
utils::swap(v[0], v[1]);
vec2F vv(v[0][0], v[1][0]);
state.m_vq.add_training_vec(vv, tile_weight);
state.m_training_vecs[a][chunk_index][tile_index] = vv;
} // tile_index
} // chunk_index
} // a
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Begin alpha cluster analysis");
timer t;
t.start();
#endif
uint codebook_size = math::minimum<uint>(m_total_tiles, m_params.m_alpha_endpoint_codebook_size);
state.m_vq.generate_codebook(codebook_size);
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging) {
double total_time = t.get_elapsed_secs();
console::info("Codebook gen time: %3.3fs, Total alpha clusters: %u", total_time, state.m_vq.get_codebook_size());
}
#endif
m_alpha_clusters.resize(state.m_vq.get_codebook_size());
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Begin alpha cluster assignment");
#endif
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_clusters_task, i, &state);
m_pTask_pool->join();
if (m_canceled)
return false;
for (uint a = 0; a < m_num_alpha_blocks; a++) {
for (uint i = 0; i < m_num_chunks; i++) {
int chunk_index = m_pChunks[i].m_legacy_index;
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
const uint cluster_index = chunk.m_endpoint_cluster_index[tile_index];
m_alpha_clusters[cluster_index].m_tiles.push_back(std::make_pair(chunk_index, tile_index | (a << 16)));
}
}
}
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Completed alpha cluster assignment");
#endif
return true;
}
void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr) {
pData_ptr;
const uint thread_index = static_cast<uint>(data);
if (!m_has_color_blocks)
return;
crnlib::vector<color_quad_u8> pixels;
pixels.reserve(512);
crnlib::vector<uint8> selectors;
uint total_pixels = 0;
uint total_empty_clusters = 0;
for (uint cluster_index = 0; cluster_index < m_color_clusters.size(); cluster_index++) {
if (m_canceled)
return;
if ((crn_get_current_thread_id() == m_main_thread_id) && ((cluster_index & 63) == 0)) {
if (!update_progress(3, cluster_index, m_color_clusters.size()))
return;
}
if (m_pTask_pool->get_num_threads()) {
if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
tile_cluster& cluster = m_color_clusters[cluster_index];
if (cluster.m_tiles.empty()) {
total_empty_clusters++;
continue;
}
pixels.resize(0);
for (uint t = 0; t < cluster.m_tiles.size(); t++) {
const uint chunk_index = cluster.m_tiles[t].first;
const uint tile_index = cluster.m_tiles[t].second;
CRNLIB_ASSERT(chunk_index < m_num_chunks);
CRNLIB_ASSERT(tile_index < cChunkMaxTiles);
const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
const compressed_tile& tile = chunk.m_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
for (uint y = 0; y < layout.m_height; y++)
for (uint x = 0; x < layout.m_width; x++)
pixels.push_back(m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y));
}
total_pixels += pixels.size();
selectors.resize(pixels.size());
dxt1_endpoint_optimizer::params params;
params.m_block_index = cluster_index;
params.m_pPixels = &pixels[0];
params.m_num_pixels = pixels.size();
params.m_pixels_have_alpha = false;
params.m_use_alpha_blocks = false;
params.m_perceptual = m_params.m_perceptual;
params.m_quality = cCRNDXTQualityUber;
params.m_endpoint_caching = false;
dxt1_endpoint_optimizer::results results;
results.m_pSelectors = &selectors[0];
dxt1_endpoint_optimizer optimizer;
const bool all_transparent = optimizer.compute(params, results);
all_transparent;
cluster.m_first_endpoint = results.m_low_color;
cluster.m_second_endpoint = results.m_high_color;
cluster.m_alpha_encoding = results.m_alpha_block;
cluster.m_error = results.m_error;
uint pixel_index = 0;
for (uint t = 0; t < cluster.m_tiles.size(); t++) {
const uint chunk_index = cluster.m_tiles[t].first;
const uint tile_index = cluster.m_tiles[t].second;
CRNLIB_ASSERT(chunk_index < m_num_chunks);
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index);
const compressed_tile& tile = chunk.m_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
layout;
compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
const uint total_pixels = tile.m_pixel_width * tile.m_pixel_height;
quantized_tile.m_endpoint_cluster_index = cluster_index;
quantized_tile.m_first_endpoint = results.m_low_color;
quantized_tile.m_second_endpoint = results.m_high_color;
//quantized_tile.m_error = results.m_error;
quantized_tile.m_alpha_encoding = results.m_alpha_block;
quantized_tile.m_pixel_width = tile.m_pixel_width;
quantized_tile.m_pixel_height = tile.m_pixel_height;
quantized_tile.m_layout_index = tile.m_layout_index;
memcpy(quantized_tile.m_selectors, &selectors[pixel_index], total_pixels);
pixel_index += total_pixels;
}
}
//CRNLIB_ASSERT(total_pixels == (m_num_chunks * cChunkPixelWidth * cChunkPixelHeight));
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging) {
if (total_empty_clusters)
console::warning("Total empty color clusters: %u", total_empty_clusters);
}
#endif
}
bool dxt_hc::determine_color_endpoint_codebook() {
if (!m_has_color_blocks)
return true;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Computing optimal color cluster endpoints");
#endif
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_codebook_task, i, NULL);
m_pTask_pool->join();
return !m_canceled;
}
void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr) {
pData_ptr;
const uint thread_index = static_cast<uint>(data);
crnlib::vector<color_quad_u8> pixels;
pixels.reserve(512);
crnlib::vector<uint8> selectors;
selectors.reserve(512);
uint total_empty_clusters = 0;
for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) {
if (m_canceled)
return;
if ((crn_get_current_thread_id() == m_main_thread_id) && ((cluster_index & 63) == 0)) {
if (!update_progress(8, cluster_index, m_alpha_clusters.size()))
return;
}
if (m_pTask_pool->get_num_threads()) {
if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
tile_cluster& cluster = m_alpha_clusters[cluster_index];
if (cluster.m_tiles.empty()) {
total_empty_clusters++;
continue;
}
pixels.resize(0);
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
CRNLIB_ASSERT(chunk_index < m_num_chunks);
CRNLIB_ASSERT(tile_index < cChunkMaxTiles);
CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks);
const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index);
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
const compressed_tile& tile = chunk.m_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
color_quad_u8 c(cClear);
for (uint y = 0; y < layout.m_height; y++) {
for (uint x = 0; x < layout.m_width; x++) {
c[0] = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[m_params.m_alpha_component_indices[alpha_index]];
pixels.push_back(c);
}
}
}
selectors.resize(pixels.size());
dxt5_endpoint_optimizer::params params;
params.m_block_index = cluster_index;
params.m_pPixels = &pixels[0];
params.m_num_pixels = pixels.size();
params.m_comp_index = 0;
params.m_quality = cCRNDXTQualityUber;
params.m_use_both_block_types = false;
dxt5_endpoint_optimizer::results results;
results.m_pSelectors = &selectors[0];
dxt5_endpoint_optimizer optimizer;
const bool all_transparent = optimizer.compute(params, results);
all_transparent;
cluster.m_first_endpoint = results.m_first_endpoint;
cluster.m_second_endpoint = results.m_second_endpoint;
cluster.m_alpha_encoding = results.m_block_type != 0;
cluster.m_error = results.m_error;
uint pixel_index = 0;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
CRNLIB_ASSERT(chunk_index < m_num_chunks);
CRNLIB_ASSERT(tile_index < cChunkMaxTiles);
CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks);
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index);
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
const compressed_tile& tile = chunk.m_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
layout;
compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
const uint total_pixels = tile.m_pixel_width * tile.m_pixel_height;
quantized_tile.m_endpoint_cluster_index = cluster_index;
quantized_tile.m_first_endpoint = results.m_first_endpoint;
quantized_tile.m_second_endpoint = results.m_second_endpoint;
//quantized_tile.m_error = results.m_error;
quantized_tile.m_alpha_encoding = results.m_block_type != 0;
quantized_tile.m_pixel_width = tile.m_pixel_width;
quantized_tile.m_pixel_height = tile.m_pixel_height;
quantized_tile.m_layout_index = tile.m_layout_index;
memcpy(quantized_tile.m_selectors, &selectors[pixel_index], total_pixels);
pixel_index += total_pixels;
}
} // cluster_index
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging) {
if (total_empty_clusters)
console::warning("Total empty alpha clusters: %u", total_empty_clusters);
}
#endif
}
bool dxt_hc::determine_alpha_endpoint_codebook() {
if (!m_num_alpha_blocks)
return true;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Computing optimal alpha cluster endpoints");
#endif
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, NULL);
m_pTask_pool->join();
return !m_canceled;
}
void dxt_hc::create_quantized_debug_images() {
if (!m_params.m_debugging)
return;
if (m_has_color_blocks) {
m_dbg_chunk_pixels_color_quantized.resize(m_num_chunks);
m_dbg_chunk_pixels_quantized_color_selectors.resize(m_num_chunks);
m_dbg_chunk_pixels_orig_color_selectors.resize(m_num_chunks);
for (uint i = 0; i < m_num_chunks; i++) {
m_dbg_chunk_pixels_color_quantized[i].clear();
m_dbg_chunk_pixels_quantized_color_selectors[i].clear();
m_dbg_chunk_pixels_orig_color_selectors[i].clear();
}
}
if (m_num_alpha_blocks) {
m_dbg_chunk_pixels_alpha_quantized.resize(m_num_chunks);
m_dbg_chunk_pixels_quantized_alpha_selectors.resize(m_num_chunks);
m_dbg_chunk_pixels_orig_alpha_selectors.resize(m_num_chunks);
for (uint i = 0; i < m_num_chunks; i++) {
m_dbg_chunk_pixels_alpha_quantized[i].clear();
m_dbg_chunk_pixels_quantized_alpha_selectors[i].clear();
m_dbg_chunk_pixels_orig_alpha_selectors[i].clear();
}
}
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if (m_has_color_blocks) {
pixel_chunk& output_chunk_color_quantized = m_dbg_chunk_pixels_color_quantized[chunk_index];
pixel_chunk& output_chunk_selectors = m_dbg_chunk_pixels_quantized_color_selectors[chunk_index];
pixel_chunk& output_chunk_orig_selectors = m_dbg_chunk_pixels_orig_color_selectors[chunk_index];
const compressed_chunk& color_chunk = m_compressed_chunks[cColorChunks][chunk_index];
for (uint tile_index = 0; tile_index < color_chunk.m_num_tiles; tile_index++) {
const compressed_tile& quantized_tile = color_chunk.m_quantized_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
const uint8* pColor_Selectors = quantized_tile.m_selectors;
color_quad_u8 block_colors[cDXT1SelectorValues];
CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint);
dxt1_block::get_block_colors(block_colors, static_cast<uint16>(quantized_tile.m_first_endpoint), static_cast<uint16>(quantized_tile.m_second_endpoint));
for (uint y = 0; y < layout.m_height; y++) {
for (uint x = 0; x < layout.m_width; x++) {
const uint selector = pColor_Selectors[x + y * layout.m_width];
output_chunk_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = selector * 255 / (cDXT1SelectorValues - 1);
output_chunk_orig_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = color_chunk.m_tiles[tile_index].m_selectors[x + y * layout.m_width] * 255 / (cDXT1SelectorValues - 1);
output_chunk_color_quantized(x + layout.m_x_ofs, y + layout.m_y_ofs) = block_colors[selector];
}
}
}
}
for (uint a = 0; a < m_num_alpha_blocks; a++) {
pixel_chunk& output_chunk_alpha_quantized = m_dbg_chunk_pixels_alpha_quantized[chunk_index];
pixel_chunk& output_chunk_selectors = m_dbg_chunk_pixels_quantized_alpha_selectors[chunk_index];
pixel_chunk& output_chunk_orig_selectors = m_dbg_chunk_pixels_orig_alpha_selectors[chunk_index];
const compressed_chunk& alpha_chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
for (uint tile_index = 0; tile_index < alpha_chunk.m_num_tiles; tile_index++) {
const compressed_tile& quantized_tile = alpha_chunk.m_quantized_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
const uint8* pAlpha_selectors = quantized_tile.m_selectors;
uint block_values[cDXT5SelectorValues];
CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint);
dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint);
for (uint y = 0; y < layout.m_height; y++) {
for (uint x = 0; x < layout.m_width; x++) {
const uint selector = pAlpha_selectors[x + y * layout.m_width];
CRNLIB_ASSERT(selector < cDXT5SelectorValues);
output_chunk_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(selector * 255 / (cDXT5SelectorValues - 1));
output_chunk_orig_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(alpha_chunk.m_tiles[tile_index].m_selectors[x + y * layout.m_width] * 255 / (cDXT5SelectorValues - 1));
output_chunk_alpha_quantized(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(block_values[selector]);
}
}
}
} // a
}
}
void dxt_hc::create_selector_codebook_task(uint64 data, void* pData_ptr) {
const uint thread_index = static_cast<uint>(data);
const create_selector_codebook_state& state = *static_cast<create_selector_codebook_state*>(pData_ptr);
for (uint comp_chunk_index = state.m_comp_index_start; comp_chunk_index <= state.m_comp_index_end; comp_chunk_index++) {
const uint alpha_index = state.m_alpha_blocks ? (comp_chunk_index - cAlpha0Chunks) : 0;
const uint alpha_pixel_comp = state.m_alpha_blocks ? m_params.m_alpha_component_indices[alpha_index] : 0;
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if (m_canceled)
return;
if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 127) == 0)) {
if (!update_progress(12 + comp_chunk_index, chunk_index, m_num_chunks))
return;
}
if (m_pTask_pool->get_num_threads()) {
if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
compressed_chunk& chunk = m_compressed_chunks[comp_chunk_index][chunk_index];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
const uint tile_blocks_x = layout.m_width >> 2;
const uint tile_blocks_y = layout.m_height >> 2;
const uint tile_block_ofs_x = layout.m_x_ofs >> 2;
const uint tile_block_ofs_y = layout.m_y_ofs >> 2;
if (state.m_alpha_blocks) {
uint block_values[cDXT5SelectorValues];
dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint);
for (uint by = 0; by < tile_blocks_y; by++) {
for (uint bx = 0; bx < tile_blocks_x; bx++) {
#if 0
uint best_index = selector_vq.find_best_codebook_entry_fs(training_vecs[comp_chunk_index][(tile_block_ofs_x+bx)+(tile_block_ofs_y+by)*2][chunk_index]);
#else
const dxt_pixel_block& block = m_pChunks[chunk_index].m_blocks[tile_block_ofs_y + by][tile_block_ofs_x + bx];
uint best_error = UINT_MAX;
uint best_index = 0;
for (uint i = 0; i < state.m_selectors_cb.size(); i++) {
const selectors& s = state.m_selectors_cb[i];
uint total_error = 0;
for (uint y = 0; y < cBlockPixelHeight; y++) {
for (uint x = 0; x < cBlockPixelWidth; x++) {
int a = block.m_pixels[y][x][alpha_pixel_comp];
int b = block_values[s.m_selectors[y][x]];
int error = a - b;
error *= error;
total_error += error;
if (total_error > best_error)
goto early_out;
} // x
} //y
early_out:
if (total_error < best_error) {
best_error = total_error;
best_index = i;
if (best_error == 0)
break;
}
} // i
#endif
CRNLIB_ASSERT((tile_block_ofs_x + bx) < 2);
CRNLIB_ASSERT((tile_block_ofs_y + by) < 2);
chunk.m_selector_cluster_index[tile_block_ofs_y + by][tile_block_ofs_x + bx] = static_cast<uint16>(best_index);
{
scoped_spinlock lock(state.m_chunk_blocks_using_selectors_lock);
state.m_chunk_blocks_using_selectors[best_index].push_back(block_id(chunk_index, alpha_index, tile_index, tile_block_ofs_x + bx, tile_block_ofs_y + by));
}
// std::make_pair(chunk_index, (tile_index << 16) | ((tile_block_ofs_y + by) << 8) | (tile_block_ofs_x + bx) ) );
} // bx
} // by
} else {
color_quad_u8 block_colors[cDXT1SelectorValues];
dxt1_block::get_block_colors4(block_colors, static_cast<uint16>(quantized_tile.m_first_endpoint), static_cast<uint16>(quantized_tile.m_second_endpoint));
const bool block_with_alpha = quantized_tile.m_first_endpoint == quantized_tile.m_second_endpoint;
for (uint by = 0; by < tile_blocks_y; by++) {
for (uint bx = 0; bx < tile_blocks_x; bx++) {
const dxt_pixel_block& block = m_pChunks[chunk_index].m_blocks[tile_block_ofs_y + by][tile_block_ofs_x + bx];
uint best_error = UINT_MAX;
uint best_index = 0;
for (uint i = 0; i < state.m_selectors_cb.size(); i++) {
const selectors& s = state.m_selectors_cb[i];
uint total_error = 0;
for (uint y = 0; y < cBlockPixelHeight; y++) {
for (uint x = 0; x < cBlockPixelWidth; x++) {
const color_quad_u8& a = block.m_pixels[y][x];
uint selector_index = s.m_selectors[y][x];
if ((block_with_alpha) && (selector_index == 3))
total_error += 999999;
const color_quad_u8& b = block_colors[selector_index];
uint error = color::color_distance(m_params.m_perceptual, a, b, false);
total_error += error;
if (total_error > best_error)
goto early_out2;
} // x
} //y
early_out2:
if (total_error < best_error) {
best_error = total_error;
best_index = i;
if (best_error == 0)
break;
}
} // i
CRNLIB_ASSERT((tile_block_ofs_x + bx) < 2);
CRNLIB_ASSERT((tile_block_ofs_y + by) < 2);
chunk.m_selector_cluster_index[tile_block_ofs_y + by][tile_block_ofs_x + bx] = static_cast<uint16>(best_index);
{
scoped_spinlock lock(state.m_chunk_blocks_using_selectors_lock);
state.m_chunk_blocks_using_selectors[best_index].push_back(block_id(chunk_index, 0, tile_index, tile_block_ofs_x + bx, tile_block_ofs_y + by));
}
// std::make_pair(chunk_index, (tile_index << 16) | ((tile_block_ofs_y + by) << 8) | (tile_block_ofs_x + bx) ) );
} // bx
} // by
} // if alpha_blocks
} // tile_index
} // chunk_index
} // comp_chunk_index
}
bool dxt_hc::create_selector_codebook(bool alpha_blocks) {
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Computing selector training vectors");
#endif
const uint cColorDistToWeight = 2000;
const uint cAlphaErrorToWeight = 8;
vec16F_tree_vq selector_vq;
uint comp_index_start = cColorChunks;
uint comp_index_end = cColorChunks;
if (alpha_blocks) {
comp_index_start = cAlpha0Chunks;
comp_index_end = cAlpha0Chunks + m_num_alpha_blocks - 1;
}
crnlib::vector<vec16F> training_vecs[cNumCompressedChunkVecs][4];
for (uint comp_chunk_index = comp_index_start; comp_chunk_index <= comp_index_end; comp_chunk_index++) {
for (uint i = 0; i < 4; i++)
training_vecs[comp_chunk_index][i].resize(m_num_chunks);
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if ((chunk_index & 63) == 0) {
if (!update_progress(9 + comp_chunk_index, chunk_index, m_num_chunks))
return false;
}
const compressed_chunk& chunk = m_compressed_chunks[comp_chunk_index][chunk_index];
uint8 block_selectors[cChunkBlockWidth][cChunkBlockHeight][cBlockPixelWidth * cBlockPixelHeight];
uint block_weight[cChunkBlockWidth][cChunkBlockHeight];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
uint weight;
if (comp_chunk_index == cColorChunks) {
const color_quad_u8 first_color(dxt1_block::unpack_color(static_cast<uint16>(quantized_tile.m_first_endpoint), true));
const color_quad_u8 second_color(dxt1_block::unpack_color(static_cast<uint16>(quantized_tile.m_second_endpoint), true));
const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false);
weight = dist / cColorDistToWeight;
weight = static_cast<uint>(weight * m_pChunks[chunk_index].m_weight);
} else {
int first_endpoint = quantized_tile.m_first_endpoint;
int second_endpoint = quantized_tile.m_second_endpoint;
int error = first_endpoint - second_endpoint;
error = error * error;
weight = static_cast<uint>(error / cAlphaErrorToWeight);
}
const uint cMaxWeight = 2048;
weight = math::clamp<uint>(weight, 1U, cMaxWeight);
// umm, this is a hack
float f = math::lerp(1.15f, 1.0f, chunk.m_encoding_index / float(cNumChunkEncodings - 1));
weight = (uint)(weight * f);
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
for (uint y = 0; y < (layout.m_height >> 2); y++)
for (uint x = 0; x < (layout.m_width >> 2); x++)
block_weight[x + (layout.m_x_ofs >> 2)][y + (layout.m_y_ofs >> 2)] = weight;
const uint8* pSelectors = quantized_tile.m_selectors;
for (uint y = 0; y < layout.m_height; y++) {
const uint cy = y + layout.m_y_ofs;
for (uint x = 0; x < layout.m_width; x++) {
const uint selector = pSelectors[x + y * layout.m_width];
if (comp_chunk_index == cColorChunks)
CRNLIB_ASSERT(selector < cDXT1SelectorValues);
else
CRNLIB_ASSERT(selector < cDXT5SelectorValues);
const uint cx = x + layout.m_x_ofs;
block_selectors[cx >> 2][cy >> 2][(cx & 3) + (cy & 3) * 4] = static_cast<uint8>(selector);
} // x
} // y
} // tile_index
vec16F v;
for (uint y = 0; y < cChunkBlockHeight; y++) {
for (uint x = 0; x < cChunkBlockWidth; x++) {
for (uint i = 0; i < cBlockPixelWidth * cBlockPixelHeight; i++) {
uint s = block_selectors[x][y][i];
float f;
if (comp_chunk_index == cColorChunks) {
CRNLIB_ASSERT(s < cDXT1SelectorValues);
f = (g_dxt1_to_linear[s] + .5f) * 1.0f / 4.0f;
} else {
CRNLIB_ASSERT(s < cDXT5SelectorValues);
f = (g_dxt5_to_linear[s] + .5f) * 1.0f / 8.0f;
}
CRNLIB_ASSERT((f >= 0.0f) && (f <= 1.0f));
v[i] = f;
} // i
selector_vq.add_training_vec(v, block_weight[x][y]);
training_vecs[comp_chunk_index][x + y * 2][chunk_index] = v;
} // x
} // y
} // chunk_index
} // comp_chunk_index
timer t;
t.start();
selector_vq.generate_codebook(alpha_blocks ? m_params.m_alpha_selector_codebook_size : m_params.m_color_selector_codebook_size);
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging) {
double total_time = t.get_elapsed_secs();
console::info("Codebook gen time: %3.3fs, Selector codebook size: %u", total_time, selector_vq.get_codebook_size());
}
#endif
selectors_vec& selectors_cb = alpha_blocks ? m_alpha_selectors : m_color_selectors;
selectors_cb.resize(selector_vq.get_codebook_size());
for (uint i = 0; i < selector_vq.get_codebook_size(); i++) {
const vec16F& v = selector_vq.get_codebook_entry(i);
for (uint j = 0; j < cBlockPixelWidth * cBlockPixelHeight; j++) {
int s;
if (alpha_blocks) {
s = math::clamp<int>(static_cast<int>(v[j] * 8.0f), 0, 7);
s = g_dxt5_from_linear[s];
} else {
s = math::clamp<int>(static_cast<int>(v[j] * 4.0f), 0, 3);
s = g_dxt1_from_linear[s];
}
selectors_cb[i].m_selectors[j >> 2][j & 3] = static_cast<uint8>(s);
} // j
} // i
chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors = alpha_blocks ? m_chunk_blocks_using_alpha_selectors : m_chunk_blocks_using_color_selectors;
chunk_blocks_using_selectors.clear();
chunk_blocks_using_selectors.resize(selectors_cb.size());
create_selector_codebook_state state(*this, alpha_blocks, comp_index_start, comp_index_end, selector_vq, chunk_blocks_using_selectors, selectors_cb);
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &dxt_hc::create_selector_codebook_task, i, &state);
m_pTask_pool->join();
return !m_canceled;
}
bool dxt_hc::refine_quantized_color_selectors() {
if (!m_has_color_blocks)
return true;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Refining quantized color selectors");
#endif
uint total_refined_selectors = 0;
uint total_refined_pixels = 0;
uint total_selectors = 0;
for (uint selector_index = 0; selector_index < m_color_selectors.size(); selector_index++) {
if ((selector_index & 255) == 0) {
if (!update_progress(15, selector_index, m_color_selectors.size()))
return false;
}
if (m_chunk_blocks_using_color_selectors[selector_index].empty())
continue;
selectors& sel = m_color_selectors[selector_index];
for (uint y = 0; y < cBlockPixelHeight; y++) {
for (uint x = 0; x < cBlockPixelWidth; x++) {
uint best_s = 0;
uint best_error = UINT_MAX;
for (uint s = 0; s < cDXT1SelectorValues; s++) {
uint total_error = 0;
for (uint block_iter = 0; block_iter < m_chunk_blocks_using_color_selectors[selector_index].size(); block_iter++) {
const block_id& id = m_chunk_blocks_using_color_selectors[selector_index][block_iter];
const uint chunk_index = id.m_chunk_index;
const uint tile_index = id.m_tile_index;
const uint chunk_block_x = id.m_block_x;
const uint chunk_block_y = id.m_block_y;
CRNLIB_ASSERT((chunk_block_x < cChunkBlockWidth) && (chunk_block_y < cChunkBlockHeight));
const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
CRNLIB_ASSERT(chunk.m_selector_cluster_index[chunk_block_y][chunk_block_x] == selector_index);
const compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
//const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[tile.m_layout_index];
color_quad_u8 block_colors[cDXT1SelectorValues];
CRNLIB_ASSERT(tile.m_first_endpoint >= tile.m_second_endpoint);
dxt1_block::get_block_colors4(block_colors, static_cast<uint16>(tile.m_first_endpoint), static_cast<uint16>(tile.m_second_endpoint));
if ((tile.m_first_endpoint == tile.m_second_endpoint) && (s == 3))
total_error += 999999;
const color_quad_u8& orig_pixel = m_pChunks[chunk_index](chunk_block_x * cBlockPixelWidth + x, chunk_block_y * cBlockPixelHeight + y);
const color_quad_u8& quantized_pixel = block_colors[s];
const uint error = color::color_distance(m_params.m_perceptual, orig_pixel, quantized_pixel, false);
total_error += error;
} // block_iter
if (total_error < best_error) {
best_error = total_error;
best_s = s;
}
} // s
if (sel.m_selectors[y][x] != best_s) {
total_refined_selectors++;
total_refined_pixels += m_chunk_blocks_using_color_selectors[selector_index].size();
sel.m_selectors[y][x] = static_cast<uint8>(best_s);
}
total_selectors++;
} //x
} //y
} // selector_index
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Total refined pixels: %u, selectors: %u out of %u", total_refined_pixels, total_refined_selectors, total_selectors);
#endif
return true;
}
bool dxt_hc::refine_quantized_alpha_selectors() {
if (!m_num_alpha_blocks)
return true;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Refining quantized alpha selectors");
#endif
uint total_refined_selectors = 0;
uint total_refined_pixels = 0;
uint total_selectors = 0;
for (uint selector_index = 0; selector_index < m_alpha_selectors.size(); selector_index++) {
if ((selector_index & 255) == 0) {
if (!update_progress(16, selector_index, m_alpha_selectors.size()))
return false;
}
if (m_chunk_blocks_using_alpha_selectors[selector_index].empty())
continue;
selectors& sel = m_alpha_selectors[selector_index];
for (uint y = 0; y < cBlockPixelHeight; y++) {
for (uint x = 0; x < cBlockPixelWidth; x++) {
uint best_s = 0;
uint best_error = UINT_MAX;
for (uint s = 0; s < cDXT5SelectorValues; s++) {
uint total_error = 0;
for (uint block_iter = 0; block_iter < m_chunk_blocks_using_alpha_selectors[selector_index].size(); block_iter++) {
const block_id& id = m_chunk_blocks_using_alpha_selectors[selector_index][block_iter];
const uint chunk_index = id.m_chunk_index;
const uint tile_index = id.m_tile_index;
const uint chunk_block_x = id.m_block_x;
const uint chunk_block_y = id.m_block_y;
const uint alpha_index = id.m_alpha_index;
CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks);
CRNLIB_ASSERT((chunk_block_x < cChunkBlockWidth) && (chunk_block_y < cChunkBlockHeight));
const compressed_chunk& chunk = m_compressed_chunks[alpha_index + cAlpha0Chunks][chunk_index];
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
CRNLIB_ASSERT(chunk.m_selector_cluster_index[chunk_block_y][chunk_block_x] == selector_index);
const compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
//const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[tile.m_layout_index];
uint block_values[cDXT5SelectorValues];
CRNLIB_ASSERT(tile.m_first_endpoint >= tile.m_second_endpoint);
dxt5_block::get_block_values(block_values, tile.m_first_endpoint, tile.m_second_endpoint);
int orig_value = m_pChunks[chunk_index](chunk_block_x * cBlockPixelWidth + x, chunk_block_y * cBlockPixelHeight + y)[m_params.m_alpha_component_indices[alpha_index]];
int quantized_value = block_values[s];
int error = (orig_value - quantized_value);
error *= error;
total_error += error;
} // block_iter
if (total_error < best_error) {
best_error = total_error;
best_s = s;
}
} // s
if (sel.m_selectors[y][x] != best_s) {
total_refined_selectors++;
total_refined_pixels += m_chunk_blocks_using_alpha_selectors[selector_index].size();
sel.m_selectors[y][x] = static_cast<uint8>(best_s);
}
total_selectors++;
} //x
} //y
} // selector_index
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Total refined pixels: %u, selectors: %u out of %u", total_refined_pixels, total_refined_selectors, total_selectors);
#endif
return true;
}
bool dxt_hc::refine_quantized_color_endpoints() {
if (!m_has_color_blocks)
return true;
uint total_refined_tiles = 0;
uint total_refined_pixels = 0;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Refining quantized color endpoints");
#endif
for (uint cluster_index = 0; cluster_index < m_color_clusters.size(); cluster_index++) {
if ((cluster_index & 255) == 0) {
if (!update_progress(17, cluster_index, m_color_clusters.size()))
return false;
}
tile_cluster& cluster = m_color_clusters[cluster_index];
uint total_pixels = 0;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second;
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
total_pixels += (tile.m_pixel_width * tile.m_pixel_height);
}
if (!total_pixels)
continue;
crnlib::vector<color_quad_u8> pixels;
crnlib::vector<uint8> selectors;
pixels.reserve(total_pixels);
selectors.reserve(total_pixels);
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second;
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
const pixel_chunk& src_pixels = m_pChunks[chunk_index];
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
const chunk_tile_desc& tile_layout = g_chunk_tile_layouts[tile.m_layout_index];
for (uint y = 0; y < tile.m_pixel_height; y++) {
for (uint x = 0; x < tile.m_pixel_width; x++) {
selectors.push_back(tile.m_selectors[x + y * tile.m_pixel_width]);
pixels.push_back(src_pixels(x + tile_layout.m_x_ofs, y + tile_layout.m_y_ofs));
}
}
}
dxt_endpoint_refiner refiner;
dxt_endpoint_refiner::params p;
dxt_endpoint_refiner::results r;
p.m_perceptual = m_params.m_perceptual;
p.m_pSelectors = &selectors[0];
p.m_pPixels = &pixels[0];
p.m_num_pixels = total_pixels;
p.m_dxt1_selectors = true;
p.m_error_to_beat = cluster.m_error;
p.m_block_index = cluster_index;
if (!refiner.refine(p, r))
continue;
total_refined_tiles++;
total_refined_pixels += total_pixels;
cluster.m_error = r.m_error;
cluster.m_first_endpoint = r.m_low_color;
cluster.m_second_endpoint = r.m_high_color;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second;
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
tile.m_first_endpoint = r.m_low_color;
tile.m_second_endpoint = r.m_high_color;
}
}
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Total refined pixels: %u, endpoints: %u out of %u", total_refined_pixels, total_refined_tiles, m_color_clusters.size());
#endif
return true;
}
bool dxt_hc::refine_quantized_alpha_endpoints() {
if (!m_num_alpha_blocks)
return true;
uint total_refined_tiles = 0;
uint total_refined_pixels = 0;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Refining quantized alpha endpoints");
#endif
for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) {
if ((cluster_index & 255) == 0) {
if (!update_progress(18, cluster_index, m_alpha_clusters.size()))
return false;
}
tile_cluster& cluster = m_alpha_clusters[cluster_index];
uint total_pixels = 0;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
total_pixels += (tile.m_pixel_width * tile.m_pixel_height);
}
if (!total_pixels)
continue;
crnlib::vector<color_quad_u8> pixels;
crnlib::vector<uint8> selectors;
pixels.reserve(total_pixels);
selectors.reserve(total_pixels);
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
const pixel_chunk& src_pixels = m_pChunks[chunk_index];
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
const chunk_tile_desc& tile_layout = g_chunk_tile_layouts[tile.m_layout_index];
for (uint y = 0; y < tile.m_pixel_height; y++) {
for (uint x = 0; x < tile.m_pixel_width; x++) {
selectors.push_back(tile.m_selectors[x + y * tile.m_pixel_width]);
pixels.push_back(color_quad_u8(src_pixels(x + tile_layout.m_x_ofs, y + tile_layout.m_y_ofs)[m_params.m_alpha_component_indices[alpha_index]]));
}
}
}
dxt_endpoint_refiner refiner;
dxt_endpoint_refiner::params p;
dxt_endpoint_refiner::results r;
p.m_perceptual = m_params.m_perceptual;
p.m_pSelectors = &selectors[0];
p.m_pPixels = &pixels[0];
p.m_num_pixels = total_pixels;
p.m_dxt1_selectors = false;
p.m_error_to_beat = cluster.m_error;
p.m_block_index = cluster_index;
if (!refiner.refine(p, r))
continue;
total_refined_tiles++;
total_refined_pixels += total_pixels;
cluster.m_error = r.m_error;
cluster.m_first_endpoint = r.m_low_color;
cluster.m_second_endpoint = r.m_high_color;
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
const uint chunk_index = cluster.m_tiles[tile_iter].first;
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
tile.m_first_endpoint = r.m_low_color;
tile.m_second_endpoint = r.m_high_color;
}
}
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
console::info("Total refined pixels: %u, endpoints: %u out of %u", total_refined_pixels, total_refined_tiles, m_alpha_clusters.size());
#endif
return true;
}
void dxt_hc::create_final_debug_image() {
if (!m_params.m_debugging)
return;
m_dbg_chunk_pixels_final.resize(m_num_chunks);
for (uint i = 0; i < m_num_chunks; i++)
m_dbg_chunk_pixels_final[i].clear();
if (m_has_color_blocks) {
m_dbg_chunk_pixels_final_color_selectors.resize(m_num_chunks);
for (uint i = 0; i < m_num_chunks; i++)
m_dbg_chunk_pixels_final_color_selectors[i].clear();
}
if (m_num_alpha_blocks) {
m_dbg_chunk_pixels_final_alpha_selectors.resize(m_num_chunks);
for (uint i = 0; i < m_num_chunks; i++)
m_dbg_chunk_pixels_final_alpha_selectors[i].clear();
}
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
pixel_chunk& output_chunk_final = m_dbg_chunk_pixels_final[chunk_index];
if (m_has_color_blocks) {
const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
pixel_chunk& output_chunk_quantized_color_selectors = m_dbg_chunk_pixels_final_color_selectors[chunk_index];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
color_quad_u8 block_colors[cDXT1SelectorValues];
dxt1_block::get_block_colors(block_colors, static_cast<uint16>(quantized_tile.m_first_endpoint), static_cast<uint16>(quantized_tile.m_second_endpoint));
for (uint y = 0; y < layout.m_height; y++) {
for (uint x = 0; x < layout.m_width; x++) {
const uint chunk_x_ofs = x + layout.m_x_ofs;
const uint chunk_y_ofs = y + layout.m_y_ofs;
const uint block_x = chunk_x_ofs >> 2;
const uint block_y = chunk_y_ofs >> 2;
const selectors& s = m_color_selectors[chunk.m_selector_cluster_index[block_y][block_x]];
uint selector = s.m_selectors[chunk_y_ofs & 3][chunk_x_ofs & 3];
output_chunk_final(x + layout.m_x_ofs, y + layout.m_y_ofs) = block_colors[selector];
output_chunk_quantized_color_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = g_tile_layout_colors[selector];
}
}
}
}
if (m_num_alpha_blocks) {
pixel_chunk& output_chunk_quantized_alpha_selectors = m_dbg_chunk_pixels_final_alpha_selectors[chunk_index];
for (uint a = 0; a < m_num_alpha_blocks; a++) {
const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
uint block_values[cDXT5SelectorValues];
// purposely call the general version to debug single color alpah6 blocks
CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint);
dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint);
for (uint y = 0; y < layout.m_height; y++) {
for (uint x = 0; x < layout.m_width; x++) {
const uint chunk_x_ofs = x + layout.m_x_ofs;
const uint chunk_y_ofs = y + layout.m_y_ofs;
const uint block_x = chunk_x_ofs >> 2;
const uint block_y = chunk_y_ofs >> 2;
const selectors& s = m_alpha_selectors[chunk.m_selector_cluster_index[block_y][block_x]];
uint selector = s.m_selectors[chunk_y_ofs & 3][chunk_x_ofs & 3];
CRNLIB_ASSERT(selector < cDXT5SelectorValues);
output_chunk_final(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(block_values[selector]);
output_chunk_quantized_alpha_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(selector * 255 / (cDXT5SelectorValues - 1));
} //x
} // y
} // tile_index
} // a
}
} // chunk_index
}
bool dxt_hc::create_chunk_encodings() {
m_chunk_encoding.resize(m_num_chunks);
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
if ((chunk_index & 255) == 0) {
if (!update_progress(19, chunk_index, m_num_chunks))
return false;
}
chunk_encoding& encoding = m_chunk_encoding[chunk_index];
for (uint q = 0; q < cNumCompressedChunkVecs; q++) {
bool skip = true;
if (q == cColorChunks) {
if (m_has_color_blocks)
skip = false;
} else if (q <= m_num_alpha_blocks)
skip = false;
if (skip)
continue;
CRNLIB_ASSERT(!m_compressed_chunks[q].empty());
const compressed_chunk& chunk = m_compressed_chunks[q][chunk_index];
CRNLIB_ASSERT(chunk.m_encoding_index < cNumChunkEncodings);
encoding.m_encoding_index = static_cast<uint8>(chunk.m_encoding_index);
CRNLIB_ASSERT(chunk.m_num_tiles <= cChunkMaxTiles);
encoding.m_num_tiles = static_cast<uint8>(chunk.m_num_tiles);
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
if (!q) {
CRNLIB_ASSERT(quantized_tile.m_endpoint_cluster_index < m_color_clusters.size());
} else {
CRNLIB_ASSERT(quantized_tile.m_endpoint_cluster_index < m_alpha_clusters.size());
}
encoding.m_endpoint_indices[q][tile_index] = static_cast<uint16>(quantized_tile.m_endpoint_cluster_index);
}
for (uint y = 0; y < cChunkBlockHeight; y++) {
for (uint x = 0; x < cChunkBlockWidth; x++) {
const uint selector_index = chunk.m_selector_cluster_index[y][x];
if (!q) {
CRNLIB_ASSERT(selector_index < m_color_selectors.size());
} else {
CRNLIB_ASSERT(selector_index < m_alpha_selectors.size());
}
encoding.m_selector_indices[q][y][x] = static_cast<uint16>(selector_index);
}
}
} // q
} // chunk_index
if (m_has_color_blocks) {
m_color_endpoints.resize(m_color_clusters.size());
for (uint i = 0; i < m_color_clusters.size(); i++)
m_color_endpoints[i] = dxt1_block::pack_endpoints(m_color_clusters[i].m_first_endpoint, m_color_clusters[i].m_second_endpoint);
}
if (m_num_alpha_blocks) {
m_alpha_endpoints.resize(m_alpha_clusters.size());
for (uint i = 0; i < m_alpha_clusters.size(); i++)
m_alpha_endpoints[i] = dxt5_block::pack_endpoints(m_alpha_clusters[i].m_first_endpoint, m_alpha_clusters[i].m_second_endpoint);
}
return true;
}
void dxt_hc::create_debug_image_from_chunks(uint num_chunks_x, uint num_chunks_y, const pixel_chunk_vec& chunks, const chunk_encoding_vec* pChunk_encodings, image_u8& img, bool serpentine_scan, int comp_index) {
if (chunks.empty()) {
img.set_all(color_quad_u8::make_black());
return;
}
img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight);
for (uint y = 0; y < num_chunks_y; y++) {
for (uint x = 0; x < num_chunks_x; x++) {
uint c = x + y * num_chunks_x;
if ((serpentine_scan) && (y & 1))
c = (num_chunks_x - 1 - x) + y * num_chunks_x;
if (comp_index >= 0) {
for (uint cy = 0; cy < cChunkPixelHeight; cy++)
for (uint cx = 0; cx < cChunkPixelWidth; cx++)
img(x * cChunkPixelWidth + cx, y * cChunkPixelHeight + cy) = chunks[c](cx, cy)[comp_index];
} else {
for (uint cy = 0; cy < cChunkPixelHeight; cy++)
for (uint cx = 0; cx < cChunkPixelWidth; cx++)
img(x * cChunkPixelWidth + cx, y * cChunkPixelHeight + cy) = chunks[c](cx, cy);
}
if (pChunk_encodings) {
const chunk_encoding& chunk = (*pChunk_encodings)[c];
const chunk_encoding_desc& encoding_desc = g_chunk_encodings[chunk.m_encoding_index];
CRNLIB_ASSERT(chunk.m_num_tiles == encoding_desc.m_num_tiles);
for (uint t = 0; t < chunk.m_num_tiles; t++) {
const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t];
img.unclipped_fill_box(
x * 8 + tile_desc.m_x_ofs, y * 8 + tile_desc.m_y_ofs,
tile_desc.m_width + 1, tile_desc.m_height + 1, color_quad_u8(128, 128, 128, 255));
}
}
}
}
}
bool dxt_hc::update_progress(uint phase_index, uint subphase_index, uint subphase_total) {
CRNLIB_ASSERT(crn_get_current_thread_id() == m_main_thread_id);
if (!m_params.m_pProgress_func)
return true;
#if CRNLIB_ENABLE_DEBUG_MESSAGES
if (m_params.m_debugging)
return true;
#endif
const int percentage_complete = (subphase_total > 1) ? ((100 * subphase_index) / (subphase_total - 1)) : 100;
if (((int)phase_index == m_prev_phase_index) && (m_prev_percentage_complete == percentage_complete))
return !m_canceled;
m_prev_percentage_complete = percentage_complete;
bool status = (*m_params.m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_params.m_pProgress_func_data) != 0;
if (!status) {
m_canceled = true;
return false;
}
return true;
}
} // namespace crnlib