d0b6f5759b
This change simplifies further modification of the code. Explanation: Considering that chunks are no longer used in the output format, it makes sense to also remove chunk related code from the intermediate processing. This modification also allows to use endpoint references from the leftmost block to the rightmost block in the previous scanline (wrapped reference to the left). Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch. [Compressing Kodak set without mipmaps] Original: 1582222 bytes / 28.846 sec Modified: 1494501 bytes / 25.628 sec Improvement: 5.54% (compression ratio) / 11.16% (compression time) [Compressing Kodak set with mipmaps] Original: 2065243 bytes / 36.869 sec Modified: 1945365 bytes / 33.497 sec Improvement: 5.80% (compression ratio) / 9.15% (compression time)
2377 lines
89 KiB
C++
2377 lines
89 KiB
C++
// File: crn_dxt_hc.cpp
|
|
// See Copyright Notice and license at the end of inc/crnlib.h
|
|
#include "crn_core.h"
|
|
#include "crn_dxt_hc.h"
|
|
#include "crn_image_utils.h"
|
|
#include "crn_console.h"
|
|
#include "crn_dxt_fast.h"
|
|
|
|
#define CRNLIB_USE_FAST_DXT 1
|
|
#define CRNLIB_ENABLE_DEBUG_MESSAGES 0
|
|
|
|
namespace crnlib {
|
|
static color_quad_u8 g_tile_layout_colors[cNumChunkTileLayouts] =
|
|
{
|
|
color_quad_u8(255, 90, 32, 255),
|
|
color_quad_u8(64, 210, 192, 255),
|
|
color_quad_u8(128, 16, 225, 255),
|
|
color_quad_u8(255, 192, 200, 255),
|
|
|
|
color_quad_u8(255, 128, 200, 255),
|
|
|
|
color_quad_u8(255, 0, 0, 255),
|
|
color_quad_u8(0, 255, 0, 255),
|
|
color_quad_u8(0, 0, 255, 255),
|
|
color_quad_u8(255, 0, 255, 255)};
|
|
|
|
dxt_hc::dxt_hc()
|
|
: m_num_chunks(0),
|
|
m_pChunks(NULL),
|
|
m_num_alpha_blocks(0),
|
|
m_has_color_blocks(false),
|
|
m_has_alpha0_blocks(false),
|
|
m_has_alpha1_blocks(false),
|
|
m_main_thread_id(crn_get_current_thread_id()),
|
|
m_canceled(false),
|
|
m_pTask_pool(NULL),
|
|
m_prev_phase_index(-1),
|
|
m_prev_percentage_complete(-1) {
|
|
utils::zero_object(m_encoding_hist);
|
|
}
|
|
|
|
dxt_hc::~dxt_hc() {
|
|
}
|
|
|
|
void dxt_hc::clear() {
|
|
m_num_chunks = 0;
|
|
m_pChunks = NULL;
|
|
|
|
m_num_alpha_blocks = 0;
|
|
m_has_color_blocks = false;
|
|
m_has_alpha0_blocks = false;
|
|
m_has_alpha1_blocks = false;
|
|
|
|
m_color_selectors.clear();
|
|
|
|
m_alpha_selectors.clear();
|
|
for (uint i = 0; i < cNumCompressedChunkVecs; i++)
|
|
m_compressed_chunks[i].clear();
|
|
|
|
utils::zero_object(m_encoding_hist);
|
|
|
|
m_total_tiles = 0;
|
|
|
|
m_color_clusters.clear();
|
|
m_alpha_clusters.clear();
|
|
m_color_selectors.clear();
|
|
m_alpha_selectors.clear();
|
|
|
|
m_chunk_blocks_using_color_selectors.clear();
|
|
m_chunk_blocks_using_alpha_selectors.clear();
|
|
|
|
m_color_endpoints.clear();
|
|
m_alpha_endpoints.clear();
|
|
|
|
m_dbg_chunk_pixels.clear();
|
|
m_dbg_chunk_pixels_tile_vis.clear();
|
|
m_dbg_chunk_pixels_color_quantized.clear();
|
|
m_dbg_chunk_pixels_alpha_quantized.clear();
|
|
|
|
m_dbg_chunk_pixels_quantized_color_selectors.clear();
|
|
m_dbg_chunk_pixels_orig_color_selectors.clear();
|
|
m_dbg_chunk_pixels_final_color_selectors.clear();
|
|
m_dbg_chunk_pixels_final_alpha_selectors.clear();
|
|
|
|
m_dbg_chunk_pixels_quantized_alpha_selectors.clear();
|
|
m_dbg_chunk_pixels_orig_alpha_selectors.clear();
|
|
m_dbg_chunk_pixels_final_alpha_selectors.clear();
|
|
|
|
m_dbg_chunk_pixels_final.clear();
|
|
|
|
m_canceled = false;
|
|
|
|
m_prev_phase_index = -1;
|
|
m_prev_percentage_complete = -1;
|
|
}
|
|
|
|
bool dxt_hc::compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool) {
|
|
m_pTask_pool = &task_pool;
|
|
m_main_thread_id = crn_get_current_thread_id();
|
|
|
|
bool result = compress_internal(p, num_chunks, pChunks);
|
|
|
|
m_pTask_pool = NULL;
|
|
|
|
return result;
|
|
}
|
|
|
|
bool dxt_hc::compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks) {
|
|
if ((!num_chunks) || (!pChunks))
|
|
return false;
|
|
if ((m_params.m_format == cDXT1A) || (m_params.m_format == cDXT3))
|
|
return false;
|
|
|
|
clear();
|
|
|
|
m_params = p;
|
|
|
|
m_num_chunks = num_chunks;
|
|
m_pChunks = pChunks;
|
|
|
|
switch (m_params.m_format) {
|
|
case cDXT1: {
|
|
m_has_color_blocks = true;
|
|
break;
|
|
}
|
|
case cDXT5: {
|
|
m_has_color_blocks = true;
|
|
m_has_alpha0_blocks = true;
|
|
m_num_alpha_blocks = 1;
|
|
break;
|
|
}
|
|
case cDXT5A: {
|
|
m_has_alpha0_blocks = true;
|
|
m_num_alpha_blocks = 1;
|
|
break;
|
|
}
|
|
case cDXN_XY:
|
|
case cDXN_YX: {
|
|
m_has_alpha0_blocks = true;
|
|
m_has_alpha1_blocks = true;
|
|
m_num_alpha_blocks = 2;
|
|
break;
|
|
}
|
|
default: {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
determine_compressed_chunks();
|
|
|
|
if (m_has_color_blocks) {
|
|
if (!determine_color_endpoint_clusters())
|
|
return false;
|
|
if (!determine_color_endpoint_codebook())
|
|
return false;
|
|
}
|
|
|
|
if (m_num_alpha_blocks) {
|
|
if (!determine_alpha_endpoint_clusters())
|
|
return false;
|
|
if (!determine_alpha_endpoint_codebook())
|
|
return false;
|
|
}
|
|
|
|
create_quantized_debug_images();
|
|
|
|
if (m_has_color_blocks) {
|
|
if (!create_selector_codebook(false))
|
|
return false;
|
|
}
|
|
|
|
if (m_num_alpha_blocks) {
|
|
if (!create_selector_codebook(true))
|
|
return false;
|
|
}
|
|
|
|
if (m_has_color_blocks) {
|
|
if (!refine_quantized_color_selectors())
|
|
return false;
|
|
|
|
if (!refine_quantized_color_endpoints())
|
|
return false;
|
|
}
|
|
|
|
if (m_num_alpha_blocks) {
|
|
if (!refine_quantized_alpha_endpoints())
|
|
return false;
|
|
|
|
if (!refine_quantized_alpha_selectors())
|
|
return false;
|
|
}
|
|
|
|
create_final_debug_image();
|
|
|
|
if (!create_block_encodings(p))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void dxt_hc::compress_dxt1_block(
|
|
dxt1_endpoint_optimizer::results& results,
|
|
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
|
|
uint8* pColor_Selectors) {
|
|
chunk_index;
|
|
|
|
color_quad_u8 pixels[cChunkPixelWidth * cChunkPixelHeight];
|
|
|
|
for (uint y = 0; y < height; y++)
|
|
for (uint x = 0; x < width; x++)
|
|
pixels[x + y * width] = chunk(x_ofs + x, y_ofs + y);
|
|
|
|
//double s = image_utils::compute_std_dev(width * height, pixels, 0, 3);
|
|
|
|
#if CRNLIB_USE_FAST_DXT
|
|
uint low16, high16;
|
|
dxt_fast::compress_color_block(width * height, pixels, low16, high16, pColor_Selectors);
|
|
results.m_low_color = static_cast<uint16>(low16);
|
|
results.m_high_color = static_cast<uint16>(high16);
|
|
results.m_alpha_block = false;
|
|
results.m_error = INT_MAX;
|
|
results.m_pSelectors = pColor_Selectors;
|
|
#else
|
|
dxt1_endpoint_optimizer optimizer;
|
|
|
|
dxt1_endpoint_optimizer::params params;
|
|
params.m_block_index = chunk_index;
|
|
params.m_pPixels = pixels;
|
|
params.m_num_pixels = width * height;
|
|
params.m_pixels_have_alpha = false;
|
|
params.m_use_alpha_blocks = false;
|
|
params.m_perceptual = m_params.m_perceptual;
|
|
params.m_highest_quality = false; //false;
|
|
params.m_endpoint_caching = false;
|
|
|
|
results.m_pSelectors = pColor_Selectors;
|
|
|
|
optimizer.compute(params, results);
|
|
#endif
|
|
}
|
|
|
|
void dxt_hc::compress_dxt5_block(
|
|
dxt5_endpoint_optimizer::results& results,
|
|
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
|
|
uint8* pAlpha_selectors) {
|
|
chunk_index;
|
|
|
|
color_quad_u8 pixels[cChunkPixelWidth * cChunkPixelHeight];
|
|
|
|
for (uint y = 0; y < height; y++)
|
|
for (uint x = 0; x < width; x++)
|
|
pixels[x + y * width] = chunk(x_ofs + x, y_ofs + y);
|
|
|
|
#if 0 //CRNLIB_USE_FAST_DXT
|
|
uint low, high;
|
|
dxt_fast::compress_alpha_block(width * height, pixels, low, high, pAlpha_selectors, component_index);
|
|
results.m_pSelectors = pAlpha_selectors;
|
|
results.m_error = INT_MAX;
|
|
results.m_first_endpoint = static_cast<uint8>(low);
|
|
results.m_second_endpoint = static_cast<uint8>(high);
|
|
results.m_block_type = 0;
|
|
#else
|
|
dxt5_endpoint_optimizer optimizer;
|
|
dxt5_endpoint_optimizer::params params;
|
|
params.m_block_index = chunk_index;
|
|
params.m_pPixels = pixels;
|
|
params.m_num_pixels = width * height;
|
|
params.m_comp_index = component_index;
|
|
params.m_use_both_block_types = false;
|
|
params.m_quality = cCRNDXTQualityNormal;
|
|
|
|
results.m_pSelectors = pAlpha_selectors;
|
|
|
|
optimizer.compute(params, results);
|
|
#endif
|
|
}
|
|
|
|
void dxt_hc::determine_compressed_chunks_task(uint64 data, void* pData_ptr) {
|
|
pData_ptr;
|
|
const uint thread_index = static_cast<uint>(data);
|
|
|
|
image_u8 orig_chunk;
|
|
image_u8 decomp_chunk[cNumChunkEncodings];
|
|
|
|
orig_chunk.resize(cChunkPixelWidth, cChunkPixelHeight);
|
|
for (uint i = 0; i < cNumChunkEncodings; i++)
|
|
decomp_chunk[i].resize(cChunkPixelWidth, cChunkPixelHeight);
|
|
|
|
image_utils::error_metrics color_error_metrics[cNumChunkEncodings];
|
|
dxt1_endpoint_optimizer::results color_optimizer_results[cNumChunkTileLayouts];
|
|
uint8 layout_color_selectors[cNumChunkTileLayouts][cChunkPixelWidth * cChunkPixelHeight];
|
|
|
|
image_utils::error_metrics alpha_error_metrics[2][cNumChunkEncodings];
|
|
dxt5_endpoint_optimizer::results alpha_optimizer_results[2][cNumChunkTileLayouts];
|
|
uint8 layout_alpha_selectors[2][cNumChunkTileLayouts][cChunkPixelWidth * cChunkPixelHeight];
|
|
|
|
uint first_layout = 0;
|
|
uint last_layout = cNumChunkTileLayouts;
|
|
|
|
uint first_encoding = 0;
|
|
uint last_encoding = cNumChunkEncodings;
|
|
|
|
if (!m_params.m_hierarchical) {
|
|
first_layout = cFirst4x4ChunkTileLayout;
|
|
first_encoding = cNumChunkEncodings - 1;
|
|
}
|
|
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
if (m_canceled)
|
|
return;
|
|
|
|
if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 511) == 0)) {
|
|
if (!update_progress(0, chunk_index, m_num_chunks))
|
|
return;
|
|
}
|
|
|
|
if (m_pTask_pool->get_num_threads()) {
|
|
if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
|
|
continue;
|
|
}
|
|
|
|
uint level_index = 0;
|
|
for (uint i = 0; i < m_params.m_num_levels; i++) {
|
|
if ((chunk_index >= m_params.m_levels[i].m_first_chunk) && (chunk_index < m_params.m_levels[i].m_first_chunk + m_params.m_levels[i].m_num_chunks)) {
|
|
level_index = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (uint cy = 0; cy < cChunkPixelHeight; cy++)
|
|
for (uint cx = 0; cx < cChunkPixelWidth; cx++)
|
|
orig_chunk(cx, cy) = m_pChunks[chunk_index](cx, cy);
|
|
|
|
if (m_has_color_blocks) {
|
|
for (uint l = first_layout; l < last_layout; l++) {
|
|
utils::zero_object(layout_color_selectors[l]);
|
|
|
|
compress_dxt1_block(
|
|
color_optimizer_results[l], chunk_index,
|
|
orig_chunk,
|
|
g_chunk_tile_layouts[l].m_x_ofs, g_chunk_tile_layouts[l].m_y_ofs,
|
|
g_chunk_tile_layouts[l].m_width, g_chunk_tile_layouts[l].m_height,
|
|
layout_color_selectors[l]);
|
|
}
|
|
}
|
|
|
|
float alpha_layout_std_dev[2][cNumChunkTileLayouts];
|
|
utils::zero_object(alpha_layout_std_dev);
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
for (uint l = first_layout; l < last_layout; l++) {
|
|
utils::zero_object(layout_alpha_selectors[a][l]);
|
|
|
|
compress_dxt5_block(
|
|
alpha_optimizer_results[a][l], chunk_index,
|
|
orig_chunk,
|
|
g_chunk_tile_layouts[l].m_x_ofs, g_chunk_tile_layouts[l].m_y_ofs,
|
|
g_chunk_tile_layouts[l].m_width, g_chunk_tile_layouts[l].m_height,
|
|
m_params.m_alpha_component_indices[a],
|
|
layout_alpha_selectors[a][l]);
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
float mean = 0.0f;
|
|
float variance = 0.0f;
|
|
|
|
for (uint cy = 0; cy < g_chunk_tile_layouts[l].m_height; cy++) {
|
|
for (uint cx = 0; cx < g_chunk_tile_layouts[l].m_width; cx++) {
|
|
uint s = orig_chunk(cx + g_chunk_tile_layouts[l].m_x_ofs, cy + g_chunk_tile_layouts[l].m_y_ofs)[m_params.m_alpha_component_indices[a]];
|
|
|
|
mean += s;
|
|
variance += s * s;
|
|
} // cx
|
|
} //cy
|
|
|
|
float scale = 1.0f / (g_chunk_tile_layouts[l].m_width * g_chunk_tile_layouts[l].m_height);
|
|
|
|
mean *= scale;
|
|
variance *= scale;
|
|
|
|
variance -= mean * mean;
|
|
|
|
alpha_layout_std_dev[a][l] = sqrt(variance);
|
|
|
|
} //a
|
|
}
|
|
}
|
|
|
|
for (uint e = first_encoding; e < last_encoding; e++) {
|
|
for (uint t = 0; t < g_chunk_encodings[e].m_num_tiles; t++) {
|
|
const uint layout_index = g_chunk_encodings[e].m_tiles[t].m_layout_index;
|
|
CRNLIB_ASSERT((layout_index >= first_layout) && (layout_index < last_layout));
|
|
|
|
if (m_has_color_blocks) {
|
|
const dxt1_endpoint_optimizer::results& color_results = color_optimizer_results[layout_index];
|
|
const uint8* pColor_selectors = layout_color_selectors[layout_index];
|
|
|
|
color_quad_u8 block_colors[cDXT1SelectorValues];
|
|
CRNLIB_ASSERT(color_results.m_low_color >= color_results.m_high_color);
|
|
// it's okay if color_results.m_low_color == color_results.m_high_color, because in this case only selector 0 should be used
|
|
dxt1_block::get_block_colors4(block_colors, color_results.m_low_color, color_results.m_high_color);
|
|
|
|
for (uint cy = 0; cy < g_chunk_encodings[e].m_tiles[t].m_height; cy++) {
|
|
for (uint cx = 0; cx < g_chunk_encodings[e].m_tiles[t].m_width; cx++) {
|
|
uint s = pColor_selectors[cx + cy * g_chunk_encodings[e].m_tiles[t].m_width];
|
|
CRNLIB_ASSERT(s < cDXT1SelectorValues);
|
|
|
|
decomp_chunk[e](cx + g_chunk_encodings[e].m_tiles[t].m_x_ofs, cy + g_chunk_encodings[e].m_tiles[t].m_y_ofs) = block_colors[s];
|
|
}
|
|
}
|
|
}
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
const dxt5_endpoint_optimizer::results& alpha_results = alpha_optimizer_results[a][layout_index];
|
|
const uint8* pAlpha_selectors = layout_alpha_selectors[a][layout_index];
|
|
|
|
uint block_values[cDXT5SelectorValues];
|
|
CRNLIB_ASSERT(alpha_results.m_first_endpoint >= alpha_results.m_second_endpoint);
|
|
dxt5_block::get_block_values8(block_values, alpha_results.m_first_endpoint, alpha_results.m_second_endpoint);
|
|
|
|
for (uint cy = 0; cy < g_chunk_encodings[e].m_tiles[t].m_height; cy++) {
|
|
for (uint cx = 0; cx < g_chunk_encodings[e].m_tiles[t].m_width; cx++) {
|
|
uint s = pAlpha_selectors[cx + cy * g_chunk_encodings[e].m_tiles[t].m_width];
|
|
CRNLIB_ASSERT(s < cDXT5SelectorValues);
|
|
|
|
decomp_chunk[e](cx + g_chunk_encodings[e].m_tiles[t].m_x_ofs, cy + g_chunk_encodings[e].m_tiles[t].m_y_ofs)[m_params.m_alpha_component_indices[a]] =
|
|
static_cast<uint8>(block_values[s]);
|
|
}
|
|
}
|
|
}
|
|
} // t
|
|
|
|
if (m_params.m_hierarchical) {
|
|
if (m_has_color_blocks)
|
|
color_error_metrics[e].compute(decomp_chunk[e], orig_chunk, 0, 3);
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++)
|
|
alpha_error_metrics[a][e].compute(decomp_chunk[e], orig_chunk, m_params.m_alpha_component_indices[a], 1);
|
|
}
|
|
} // e
|
|
|
|
uint best_encoding = cNumChunkEncodings - 1;
|
|
|
|
if (m_params.m_hierarchical) {
|
|
float quality[cNumChunkEncodings];
|
|
utils::zero_object(quality);
|
|
|
|
float best_quality = 0.0f;
|
|
|
|
best_encoding = 0;
|
|
|
|
for (uint e = 0; e < cNumChunkEncodings; e++) {
|
|
if (m_has_color_blocks) {
|
|
float adaptive_tile_color_psnr_derating = m_params.m_adaptive_tile_color_psnr_derating;
|
|
if ((level_index) && (adaptive_tile_color_psnr_derating > .25f)) {
|
|
//adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, (level_index - 1) / math::maximum(1.0f, float(m_params.m_num_levels - 2)));
|
|
adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast<float>(level_index)));
|
|
}
|
|
|
|
float color_derating = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
|
|
quality[e] = (float)math::maximum<double>(color_error_metrics[e].mPeakSNR - color_derating, 0.0f);
|
|
}
|
|
|
|
if (m_num_alpha_blocks) {
|
|
quality[e] *= m_params.m_adaptive_tile_color_alpha_weighting_ratio;
|
|
float alpha_derating = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
|
|
|
|
float max_std_dev = 0.0f;
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
quality[e] += (float)math::maximum<double>(alpha_error_metrics[a][e].mPeakSNR - alpha_derating, 0.0f);
|
|
|
|
for (uint t = 0; t < g_chunk_encodings[e].m_num_tiles; t++) {
|
|
float std_dev = alpha_layout_std_dev[a][g_chunk_encodings[e].m_tiles[t].m_layout_index];
|
|
max_std_dev = math::maximum(max_std_dev, std_dev);
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
// rg [4/28/09] - disabling this because it's fucking up dxt5_xgbr normal maps
|
|
const float l = 6.0f;
|
|
const float k = .5f;
|
|
|
|
if (max_std_dev > l)
|
|
{
|
|
float s = max_std_dev - l;
|
|
quality[e] -= (k * s);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
if (quality[e] > best_quality) {
|
|
best_quality = quality[e];
|
|
best_encoding = e;
|
|
}
|
|
}
|
|
}
|
|
|
|
atomic_increment32(&m_encoding_hist[best_encoding]);
|
|
|
|
atomic_exchange_add32(&m_total_tiles, g_chunk_encodings[best_encoding].m_num_tiles);
|
|
|
|
for (uint q = 0; q < cNumCompressedChunkVecs; q++) {
|
|
if (q == cColorChunks) {
|
|
if (!m_has_color_blocks)
|
|
continue;
|
|
} else if (q > m_num_alpha_blocks)
|
|
continue;
|
|
|
|
compressed_chunk& output = m_compressed_chunks[q][chunk_index];
|
|
|
|
output.m_encoding_index = static_cast<uint8>(best_encoding);
|
|
output.m_num_tiles = static_cast<uint8>(g_chunk_encodings[best_encoding].m_num_tiles);
|
|
|
|
for (uint t = 0; t < g_chunk_encodings[best_encoding].m_num_tiles; t++) {
|
|
const uint layout_index = g_chunk_encodings[best_encoding].m_tiles[t].m_layout_index;
|
|
|
|
output.m_tiles[t].m_layout_index = static_cast<uint8>(layout_index);
|
|
output.m_tiles[t].m_pixel_width = static_cast<uint8>(g_chunk_encodings[best_encoding].m_tiles[t].m_width);
|
|
output.m_tiles[t].m_pixel_height = static_cast<uint8>(g_chunk_encodings[best_encoding].m_tiles[t].m_height);
|
|
|
|
if (q == cColorChunks) {
|
|
const dxt1_endpoint_optimizer::results& color_results = color_optimizer_results[layout_index];
|
|
const uint8* pColor_selectors = layout_color_selectors[layout_index];
|
|
|
|
output.m_tiles[t].m_endpoint_cluster_index = 0;
|
|
output.m_tiles[t].m_first_endpoint = color_results.m_low_color;
|
|
output.m_tiles[t].m_second_endpoint = color_results.m_high_color;
|
|
|
|
memcpy(output.m_tiles[t].m_selectors, pColor_selectors, cChunkPixelWidth * cChunkPixelHeight);
|
|
output.m_tiles[t].m_alpha_encoding = color_results.m_alpha_block;
|
|
} else {
|
|
const uint a = q - cAlpha0Chunks;
|
|
|
|
const dxt5_endpoint_optimizer::results& alpha_results = alpha_optimizer_results[a][layout_index];
|
|
const uint8* pAlpha_selectors = layout_alpha_selectors[a][layout_index];
|
|
|
|
output.m_tiles[t].m_endpoint_cluster_index = 0;
|
|
output.m_tiles[t].m_first_endpoint = alpha_results.m_first_endpoint;
|
|
output.m_tiles[t].m_second_endpoint = alpha_results.m_second_endpoint;
|
|
|
|
memcpy(output.m_tiles[t].m_selectors, pAlpha_selectors, cChunkPixelWidth * cChunkPixelHeight);
|
|
output.m_tiles[t].m_alpha_encoding = alpha_results.m_block_type != 0;
|
|
}
|
|
} // t
|
|
} // q
|
|
|
|
if (m_params.m_debugging) {
|
|
for (uint y = 0; y < cChunkPixelHeight; y++)
|
|
for (uint x = 0; x < cChunkPixelWidth; x++)
|
|
m_dbg_chunk_pixels[chunk_index](x, y) = decomp_chunk[best_encoding](x, y);
|
|
|
|
for (uint t = 0; t < g_chunk_encodings[best_encoding].m_num_tiles; t++) {
|
|
const uint layout_index = g_chunk_encodings[best_encoding].m_tiles[t].m_layout_index;
|
|
|
|
const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[layout_index];
|
|
|
|
for (uint ty = 0; ty < tile_desc.m_height; ty++)
|
|
for (uint tx = 0; tx < tile_desc.m_width; tx++)
|
|
m_dbg_chunk_pixels_tile_vis[chunk_index](tile_desc.m_x_ofs + tx, tile_desc.m_y_ofs + ty) = g_tile_layout_colors[layout_index];
|
|
}
|
|
}
|
|
|
|
} // chunk_index
|
|
}
|
|
|
|
bool dxt_hc::determine_compressed_chunks() {
|
|
utils::zero_object(m_encoding_hist);
|
|
|
|
for (uint i = 0; i < cNumCompressedChunkVecs; i++)
|
|
m_compressed_chunks[i].clear();
|
|
|
|
if (m_has_color_blocks)
|
|
m_compressed_chunks[cColorChunks].resize(m_num_chunks);
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++)
|
|
m_compressed_chunks[cAlpha0Chunks + a].resize(m_num_chunks);
|
|
|
|
if (m_params.m_debugging) {
|
|
m_dbg_chunk_pixels.resize(m_num_chunks);
|
|
m_dbg_chunk_pixels_tile_vis.resize(m_num_chunks);
|
|
|
|
for (uint i = 0; i < m_num_chunks; i++) {
|
|
m_dbg_chunk_pixels[i].clear();
|
|
m_dbg_chunk_pixels_tile_vis[i].clear();
|
|
}
|
|
}
|
|
|
|
m_total_tiles = 0;
|
|
|
|
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
|
m_pTask_pool->queue_object_task(this, &dxt_hc::determine_compressed_chunks_task, i);
|
|
|
|
m_pTask_pool->join();
|
|
if (m_canceled)
|
|
return false;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging) {
|
|
console::info("Total Pixels: %u, Chunks: %u, Blocks: %u, Adapted Tiles: %u", m_num_chunks * cChunkPixelWidth * cChunkPixelHeight, m_num_chunks, m_num_chunks * cChunkBlockWidth * cChunkBlockHeight, m_total_tiles);
|
|
|
|
console::info("Chunk encoding type symbol_histogram: ");
|
|
for (uint e = 0; e < cNumChunkEncodings; e++)
|
|
console::info("%u ", m_encoding_hist[e]);
|
|
|
|
console::info("Blocks per chunk encoding type: ");
|
|
for (uint e = 0; e < cNumChunkEncodings; e++)
|
|
console::info("%u ", m_encoding_hist[e] * cChunkBlockWidth * cChunkBlockHeight);
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
void dxt_hc::assign_color_endpoint_clusters_task(uint64 data, void* pData_ptr) {
|
|
const uint thread_index = (uint)data;
|
|
assign_color_endpoint_clusters_state& state = *static_cast<assign_color_endpoint_clusters_state*>(pData_ptr);
|
|
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
if (m_canceled)
|
|
return;
|
|
|
|
if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 63) == 0)) {
|
|
if (!update_progress(2, chunk_index, m_num_chunks))
|
|
return;
|
|
}
|
|
|
|
if (m_pTask_pool->get_num_threads()) {
|
|
if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
|
|
continue;
|
|
}
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
uint cluster_index = state.m_vq.find_best_codebook_entry_fs(state.m_training_vecs[chunk_index][tile_index]);
|
|
|
|
chunk.m_endpoint_cluster_index[tile_index] = static_cast<uint16>(cluster_index);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool dxt_hc::determine_color_endpoint_clusters() {
|
|
if (!m_has_color_blocks)
|
|
return true;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Generating color training vectors");
|
|
#endif
|
|
|
|
const float r_scale = .5f;
|
|
const float b_scale = .25f;
|
|
|
|
vec6F_tree_vq vq;
|
|
|
|
crnlib::vector<crnlib::vector<vec6F> > training_vecs;
|
|
|
|
training_vecs.resize(m_num_chunks);
|
|
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
if ((chunk_index & 255) == 0) {
|
|
if (!update_progress(1, chunk_index, m_num_chunks))
|
|
return false;
|
|
}
|
|
|
|
const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
|
|
training_vecs[chunk_index].resize(chunk.m_num_tiles);
|
|
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
const compressed_tile& tile = chunk.m_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
|
|
|
|
tree_clusterizer<vec3F> palettizer;
|
|
for (uint y = 0; y < layout.m_height; y++) {
|
|
for (uint x = 0; x < layout.m_width; x++) {
|
|
const color_quad_u8& c = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y);
|
|
|
|
vec3F v;
|
|
if (m_params.m_perceptual) {
|
|
v.set(c[0] * 1.0f / 255.0f, c[1] * 1.0f / 255.0f, c[2] * 1.0f / 255.0f);
|
|
v[0] *= r_scale;
|
|
v[2] *= b_scale;
|
|
} else {
|
|
v.set(c[0] * 1.0f / 255.0f, c[1] * 1.0f / 255.0f, c[2] * 1.0f / 255.0f);
|
|
}
|
|
|
|
palettizer.add_training_vec(v, 1);
|
|
}
|
|
}
|
|
|
|
palettizer.generate_codebook(2);
|
|
|
|
uint tile_weight = tile.m_pixel_width * tile.m_pixel_height;
|
|
tile_weight = static_cast<uint>(tile_weight * m_pChunks[chunk_index].m_weight);
|
|
|
|
vec3F v[2];
|
|
utils::zero_object(v);
|
|
|
|
for (uint i = 0; i < palettizer.get_codebook_size(); i++)
|
|
v[i] = palettizer.get_codebook_entry(i);
|
|
|
|
if (palettizer.get_codebook_size() == 1)
|
|
v[1] = v[0];
|
|
if (v[0].length() > v[1].length())
|
|
utils::swap(v[0], v[1]);
|
|
|
|
vec6F vv;
|
|
for (uint i = 0; i < 2; i++) {
|
|
vv[i * 3 + 0] = v[i][0];
|
|
vv[i * 3 + 1] = v[i][1];
|
|
vv[i * 3 + 2] = v[i][2];
|
|
}
|
|
|
|
vq.add_training_vec(vv, tile_weight);
|
|
|
|
training_vecs[chunk_index][tile_index] = vv;
|
|
}
|
|
}
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Begin color cluster analysis");
|
|
timer t;
|
|
t.start();
|
|
#endif
|
|
|
|
uint codebook_size = math::minimum<uint>(m_total_tiles, m_params.m_color_endpoint_codebook_size);
|
|
vq.generate_codebook(codebook_size);
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging) {
|
|
double total_time = t.get_elapsed_secs();
|
|
console::info("Codebook gen time: %3.3fs, Total color clusters: %u", total_time, vq.get_codebook_size());
|
|
}
|
|
#endif
|
|
|
|
m_color_clusters.resize(vq.get_codebook_size());
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Begin color cluster assignment");
|
|
#endif
|
|
|
|
assign_color_endpoint_clusters_state state(vq, training_vecs);
|
|
|
|
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
|
m_pTask_pool->queue_object_task(this, &dxt_hc::assign_color_endpoint_clusters_task, i, &state);
|
|
|
|
m_pTask_pool->join();
|
|
if (m_canceled)
|
|
return false;
|
|
|
|
for (uint i = 0; i < m_num_chunks; i++) {
|
|
int chunk_index = m_pChunks[i].m_legacy_index;
|
|
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
uint cluster_index = chunk.m_endpoint_cluster_index[tile_index];
|
|
m_color_clusters[cluster_index].m_tiles.push_back(std::make_pair(chunk_index, tile_index));
|
|
}
|
|
}
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Completed color cluster assignment");
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) {
|
|
const uint thread_index = static_cast<uint>(data);
|
|
const determine_alpha_endpoint_clusters_state& state = *static_cast<determine_alpha_endpoint_clusters_state*>(pData_ptr);
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
if (m_canceled)
|
|
return;
|
|
|
|
if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 63) == 0)) {
|
|
if (!update_progress(7, m_num_chunks * a + chunk_index, m_num_chunks * m_num_alpha_blocks))
|
|
return;
|
|
}
|
|
|
|
if (m_pTask_pool->get_num_threads()) {
|
|
if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
|
|
continue;
|
|
}
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
|
|
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
uint cluster_index = state.m_vq.find_best_codebook_entry_fs(state.m_training_vecs[a][chunk_index][tile_index]);
|
|
|
|
chunk.m_endpoint_cluster_index[tile_index] = static_cast<uint16>(cluster_index);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool dxt_hc::determine_alpha_endpoint_clusters() {
|
|
if (!m_num_alpha_blocks)
|
|
return true;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Generating alpha training vectors");
|
|
#endif
|
|
|
|
determine_alpha_endpoint_clusters_state state;
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
state.m_training_vecs[a].resize(m_num_chunks);
|
|
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
if ((chunk_index & 63) == 0) {
|
|
if (!update_progress(6, m_num_chunks * a + chunk_index, m_num_chunks * m_num_alpha_blocks))
|
|
return false;
|
|
}
|
|
|
|
const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
|
|
|
|
state.m_training_vecs[a][chunk_index].resize(chunk.m_num_tiles);
|
|
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
const compressed_tile& tile = chunk.m_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
|
|
|
|
tree_clusterizer<vec1F> palettizer;
|
|
|
|
for (uint y = 0; y < layout.m_height; y++) {
|
|
for (uint x = 0; x < layout.m_width; x++) {
|
|
uint c = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[m_params.m_alpha_component_indices[a]];
|
|
|
|
vec1F v(c * 1.0f / 255.0f);
|
|
|
|
palettizer.add_training_vec(v, 1);
|
|
}
|
|
}
|
|
palettizer.generate_codebook(2);
|
|
|
|
const uint tile_weight = tile.m_pixel_width * tile.m_pixel_height;
|
|
|
|
vec1F v[2];
|
|
utils::zero_object(v);
|
|
|
|
for (uint i = 0; i < palettizer.get_codebook_size(); i++)
|
|
v[i] = palettizer.get_codebook_entry(i);
|
|
|
|
if (palettizer.get_codebook_size() == 1)
|
|
v[1] = v[0];
|
|
if (v[0] > v[1])
|
|
utils::swap(v[0], v[1]);
|
|
|
|
vec2F vv(v[0][0], v[1][0]);
|
|
|
|
state.m_vq.add_training_vec(vv, tile_weight);
|
|
|
|
state.m_training_vecs[a][chunk_index][tile_index] = vv;
|
|
|
|
} // tile_index
|
|
} // chunk_index
|
|
} // a
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Begin alpha cluster analysis");
|
|
timer t;
|
|
t.start();
|
|
#endif
|
|
|
|
uint codebook_size = math::minimum<uint>(m_total_tiles, m_params.m_alpha_endpoint_codebook_size);
|
|
state.m_vq.generate_codebook(codebook_size);
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging) {
|
|
double total_time = t.get_elapsed_secs();
|
|
console::info("Codebook gen time: %3.3fs, Total alpha clusters: %u", total_time, state.m_vq.get_codebook_size());
|
|
}
|
|
#endif
|
|
|
|
m_alpha_clusters.resize(state.m_vq.get_codebook_size());
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Begin alpha cluster assignment");
|
|
#endif
|
|
|
|
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
|
m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_clusters_task, i, &state);
|
|
|
|
m_pTask_pool->join();
|
|
if (m_canceled)
|
|
return false;
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
for (uint i = 0; i < m_num_chunks; i++) {
|
|
int chunk_index = m_pChunks[i].m_legacy_index;
|
|
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
const uint cluster_index = chunk.m_endpoint_cluster_index[tile_index];
|
|
m_alpha_clusters[cluster_index].m_tiles.push_back(std::make_pair(chunk_index, tile_index | (a << 16)));
|
|
}
|
|
}
|
|
}
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Completed alpha cluster assignment");
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr) {
|
|
pData_ptr;
|
|
const uint thread_index = static_cast<uint>(data);
|
|
|
|
if (!m_has_color_blocks)
|
|
return;
|
|
|
|
crnlib::vector<color_quad_u8> pixels;
|
|
pixels.reserve(512);
|
|
|
|
crnlib::vector<uint8> selectors;
|
|
|
|
uint total_pixels = 0;
|
|
|
|
uint total_empty_clusters = 0;
|
|
for (uint cluster_index = 0; cluster_index < m_color_clusters.size(); cluster_index++) {
|
|
if (m_canceled)
|
|
return;
|
|
|
|
if ((crn_get_current_thread_id() == m_main_thread_id) && ((cluster_index & 63) == 0)) {
|
|
if (!update_progress(3, cluster_index, m_color_clusters.size()))
|
|
return;
|
|
}
|
|
|
|
if (m_pTask_pool->get_num_threads()) {
|
|
if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
|
|
continue;
|
|
}
|
|
|
|
tile_cluster& cluster = m_color_clusters[cluster_index];
|
|
if (cluster.m_tiles.empty()) {
|
|
total_empty_clusters++;
|
|
continue;
|
|
}
|
|
|
|
pixels.resize(0);
|
|
|
|
for (uint t = 0; t < cluster.m_tiles.size(); t++) {
|
|
const uint chunk_index = cluster.m_tiles[t].first;
|
|
const uint tile_index = cluster.m_tiles[t].second;
|
|
CRNLIB_ASSERT(chunk_index < m_num_chunks);
|
|
CRNLIB_ASSERT(tile_index < cChunkMaxTiles);
|
|
|
|
const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
|
|
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
|
|
const compressed_tile& tile = chunk.m_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
|
|
|
|
for (uint y = 0; y < layout.m_height; y++)
|
|
for (uint x = 0; x < layout.m_width; x++)
|
|
pixels.push_back(m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y));
|
|
}
|
|
|
|
total_pixels += pixels.size();
|
|
|
|
selectors.resize(pixels.size());
|
|
|
|
dxt1_endpoint_optimizer::params params;
|
|
params.m_block_index = cluster_index;
|
|
params.m_pPixels = &pixels[0];
|
|
params.m_num_pixels = pixels.size();
|
|
params.m_pixels_have_alpha = false;
|
|
params.m_use_alpha_blocks = false;
|
|
params.m_perceptual = m_params.m_perceptual;
|
|
params.m_quality = cCRNDXTQualityUber;
|
|
params.m_endpoint_caching = false;
|
|
|
|
dxt1_endpoint_optimizer::results results;
|
|
results.m_pSelectors = &selectors[0];
|
|
|
|
dxt1_endpoint_optimizer optimizer;
|
|
const bool all_transparent = optimizer.compute(params, results);
|
|
all_transparent;
|
|
|
|
cluster.m_first_endpoint = results.m_low_color;
|
|
cluster.m_second_endpoint = results.m_high_color;
|
|
cluster.m_alpha_encoding = results.m_alpha_block;
|
|
cluster.m_error = results.m_error;
|
|
|
|
uint pixel_index = 0;
|
|
|
|
for (uint t = 0; t < cluster.m_tiles.size(); t++) {
|
|
const uint chunk_index = cluster.m_tiles[t].first;
|
|
const uint tile_index = cluster.m_tiles[t].second;
|
|
|
|
CRNLIB_ASSERT(chunk_index < m_num_chunks);
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
|
|
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
|
|
|
|
CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index);
|
|
|
|
const compressed_tile& tile = chunk.m_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
|
|
layout;
|
|
|
|
compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
const uint total_pixels = tile.m_pixel_width * tile.m_pixel_height;
|
|
|
|
quantized_tile.m_endpoint_cluster_index = cluster_index;
|
|
quantized_tile.m_first_endpoint = results.m_low_color;
|
|
quantized_tile.m_second_endpoint = results.m_high_color;
|
|
//quantized_tile.m_error = results.m_error;
|
|
quantized_tile.m_alpha_encoding = results.m_alpha_block;
|
|
quantized_tile.m_pixel_width = tile.m_pixel_width;
|
|
quantized_tile.m_pixel_height = tile.m_pixel_height;
|
|
quantized_tile.m_layout_index = tile.m_layout_index;
|
|
|
|
memcpy(quantized_tile.m_selectors, &selectors[pixel_index], total_pixels);
|
|
|
|
pixel_index += total_pixels;
|
|
}
|
|
}
|
|
|
|
//CRNLIB_ASSERT(total_pixels == (m_num_chunks * cChunkPixelWidth * cChunkPixelHeight));
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging) {
|
|
if (total_empty_clusters)
|
|
console::warning("Total empty color clusters: %u", total_empty_clusters);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
bool dxt_hc::determine_color_endpoint_codebook() {
|
|
if (!m_has_color_blocks)
|
|
return true;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Computing optimal color cluster endpoints");
|
|
#endif
|
|
|
|
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
|
m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_codebook_task, i, NULL);
|
|
|
|
m_pTask_pool->join();
|
|
|
|
return !m_canceled;
|
|
}
|
|
|
|
void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr) {
|
|
pData_ptr;
|
|
|
|
const uint thread_index = static_cast<uint>(data);
|
|
|
|
crnlib::vector<color_quad_u8> pixels;
|
|
pixels.reserve(512);
|
|
|
|
crnlib::vector<uint8> selectors;
|
|
selectors.reserve(512);
|
|
|
|
uint total_empty_clusters = 0;
|
|
for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) {
|
|
if (m_canceled)
|
|
return;
|
|
|
|
if ((crn_get_current_thread_id() == m_main_thread_id) && ((cluster_index & 63) == 0)) {
|
|
if (!update_progress(8, cluster_index, m_alpha_clusters.size()))
|
|
return;
|
|
}
|
|
|
|
if (m_pTask_pool->get_num_threads()) {
|
|
if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
|
|
continue;
|
|
}
|
|
|
|
tile_cluster& cluster = m_alpha_clusters[cluster_index];
|
|
if (cluster.m_tiles.empty()) {
|
|
total_empty_clusters++;
|
|
continue;
|
|
}
|
|
|
|
pixels.resize(0);
|
|
|
|
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
|
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
|
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
|
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
|
|
|
CRNLIB_ASSERT(chunk_index < m_num_chunks);
|
|
CRNLIB_ASSERT(tile_index < cChunkMaxTiles);
|
|
CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks);
|
|
|
|
const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
|
|
|
CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index);
|
|
|
|
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
|
|
const compressed_tile& tile = chunk.m_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
|
|
|
|
color_quad_u8 c(cClear);
|
|
|
|
for (uint y = 0; y < layout.m_height; y++) {
|
|
for (uint x = 0; x < layout.m_width; x++) {
|
|
c[0] = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[m_params.m_alpha_component_indices[alpha_index]];
|
|
|
|
pixels.push_back(c);
|
|
}
|
|
}
|
|
}
|
|
|
|
selectors.resize(pixels.size());
|
|
|
|
dxt5_endpoint_optimizer::params params;
|
|
params.m_block_index = cluster_index;
|
|
params.m_pPixels = &pixels[0];
|
|
params.m_num_pixels = pixels.size();
|
|
params.m_comp_index = 0;
|
|
params.m_quality = cCRNDXTQualityUber;
|
|
params.m_use_both_block_types = false;
|
|
|
|
dxt5_endpoint_optimizer::results results;
|
|
results.m_pSelectors = &selectors[0];
|
|
|
|
dxt5_endpoint_optimizer optimizer;
|
|
const bool all_transparent = optimizer.compute(params, results);
|
|
all_transparent;
|
|
|
|
cluster.m_first_endpoint = results.m_first_endpoint;
|
|
cluster.m_second_endpoint = results.m_second_endpoint;
|
|
cluster.m_alpha_encoding = results.m_block_type != 0;
|
|
cluster.m_error = results.m_error;
|
|
|
|
uint pixel_index = 0;
|
|
|
|
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
|
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
|
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
|
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
|
CRNLIB_ASSERT(chunk_index < m_num_chunks);
|
|
CRNLIB_ASSERT(tile_index < cChunkMaxTiles);
|
|
CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks);
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
|
|
|
CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index);
|
|
|
|
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
|
|
const compressed_tile& tile = chunk.m_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index];
|
|
layout;
|
|
|
|
compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
const uint total_pixels = tile.m_pixel_width * tile.m_pixel_height;
|
|
|
|
quantized_tile.m_endpoint_cluster_index = cluster_index;
|
|
quantized_tile.m_first_endpoint = results.m_first_endpoint;
|
|
quantized_tile.m_second_endpoint = results.m_second_endpoint;
|
|
//quantized_tile.m_error = results.m_error;
|
|
quantized_tile.m_alpha_encoding = results.m_block_type != 0;
|
|
quantized_tile.m_pixel_width = tile.m_pixel_width;
|
|
quantized_tile.m_pixel_height = tile.m_pixel_height;
|
|
quantized_tile.m_layout_index = tile.m_layout_index;
|
|
|
|
memcpy(quantized_tile.m_selectors, &selectors[pixel_index], total_pixels);
|
|
|
|
pixel_index += total_pixels;
|
|
}
|
|
} // cluster_index
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging) {
|
|
if (total_empty_clusters)
|
|
console::warning("Total empty alpha clusters: %u", total_empty_clusters);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
bool dxt_hc::determine_alpha_endpoint_codebook() {
|
|
if (!m_num_alpha_blocks)
|
|
return true;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Computing optimal alpha cluster endpoints");
|
|
#endif
|
|
|
|
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
|
m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, NULL);
|
|
|
|
m_pTask_pool->join();
|
|
|
|
return !m_canceled;
|
|
}
|
|
|
|
void dxt_hc::create_quantized_debug_images() {
|
|
if (!m_params.m_debugging)
|
|
return;
|
|
|
|
if (m_has_color_blocks) {
|
|
m_dbg_chunk_pixels_color_quantized.resize(m_num_chunks);
|
|
m_dbg_chunk_pixels_quantized_color_selectors.resize(m_num_chunks);
|
|
m_dbg_chunk_pixels_orig_color_selectors.resize(m_num_chunks);
|
|
|
|
for (uint i = 0; i < m_num_chunks; i++) {
|
|
m_dbg_chunk_pixels_color_quantized[i].clear();
|
|
m_dbg_chunk_pixels_quantized_color_selectors[i].clear();
|
|
m_dbg_chunk_pixels_orig_color_selectors[i].clear();
|
|
}
|
|
}
|
|
|
|
if (m_num_alpha_blocks) {
|
|
m_dbg_chunk_pixels_alpha_quantized.resize(m_num_chunks);
|
|
m_dbg_chunk_pixels_quantized_alpha_selectors.resize(m_num_chunks);
|
|
m_dbg_chunk_pixels_orig_alpha_selectors.resize(m_num_chunks);
|
|
|
|
for (uint i = 0; i < m_num_chunks; i++) {
|
|
m_dbg_chunk_pixels_alpha_quantized[i].clear();
|
|
m_dbg_chunk_pixels_quantized_alpha_selectors[i].clear();
|
|
m_dbg_chunk_pixels_orig_alpha_selectors[i].clear();
|
|
}
|
|
}
|
|
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
if (m_has_color_blocks) {
|
|
pixel_chunk& output_chunk_color_quantized = m_dbg_chunk_pixels_color_quantized[chunk_index];
|
|
pixel_chunk& output_chunk_selectors = m_dbg_chunk_pixels_quantized_color_selectors[chunk_index];
|
|
pixel_chunk& output_chunk_orig_selectors = m_dbg_chunk_pixels_orig_color_selectors[chunk_index];
|
|
|
|
const compressed_chunk& color_chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
|
|
for (uint tile_index = 0; tile_index < color_chunk.m_num_tiles; tile_index++) {
|
|
const compressed_tile& quantized_tile = color_chunk.m_quantized_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
|
|
|
|
const uint8* pColor_Selectors = quantized_tile.m_selectors;
|
|
|
|
color_quad_u8 block_colors[cDXT1SelectorValues];
|
|
CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint);
|
|
dxt1_block::get_block_colors(block_colors, static_cast<uint16>(quantized_tile.m_first_endpoint), static_cast<uint16>(quantized_tile.m_second_endpoint));
|
|
|
|
for (uint y = 0; y < layout.m_height; y++) {
|
|
for (uint x = 0; x < layout.m_width; x++) {
|
|
const uint selector = pColor_Selectors[x + y * layout.m_width];
|
|
|
|
output_chunk_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = selector * 255 / (cDXT1SelectorValues - 1);
|
|
|
|
output_chunk_orig_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = color_chunk.m_tiles[tile_index].m_selectors[x + y * layout.m_width] * 255 / (cDXT1SelectorValues - 1);
|
|
|
|
output_chunk_color_quantized(x + layout.m_x_ofs, y + layout.m_y_ofs) = block_colors[selector];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
pixel_chunk& output_chunk_alpha_quantized = m_dbg_chunk_pixels_alpha_quantized[chunk_index];
|
|
pixel_chunk& output_chunk_selectors = m_dbg_chunk_pixels_quantized_alpha_selectors[chunk_index];
|
|
pixel_chunk& output_chunk_orig_selectors = m_dbg_chunk_pixels_orig_alpha_selectors[chunk_index];
|
|
|
|
const compressed_chunk& alpha_chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
|
|
|
|
for (uint tile_index = 0; tile_index < alpha_chunk.m_num_tiles; tile_index++) {
|
|
const compressed_tile& quantized_tile = alpha_chunk.m_quantized_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
|
|
|
|
const uint8* pAlpha_selectors = quantized_tile.m_selectors;
|
|
|
|
uint block_values[cDXT5SelectorValues];
|
|
CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint);
|
|
dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint);
|
|
|
|
for (uint y = 0; y < layout.m_height; y++) {
|
|
for (uint x = 0; x < layout.m_width; x++) {
|
|
const uint selector = pAlpha_selectors[x + y * layout.m_width];
|
|
|
|
CRNLIB_ASSERT(selector < cDXT5SelectorValues);
|
|
|
|
output_chunk_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(selector * 255 / (cDXT5SelectorValues - 1));
|
|
|
|
output_chunk_orig_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(alpha_chunk.m_tiles[tile_index].m_selectors[x + y * layout.m_width] * 255 / (cDXT5SelectorValues - 1));
|
|
|
|
output_chunk_alpha_quantized(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(block_values[selector]);
|
|
}
|
|
}
|
|
}
|
|
} // a
|
|
}
|
|
}
|
|
|
|
void dxt_hc::create_selector_codebook_task(uint64 data, void* pData_ptr) {
|
|
const uint thread_index = static_cast<uint>(data);
|
|
const create_selector_codebook_state& state = *static_cast<create_selector_codebook_state*>(pData_ptr);
|
|
|
|
for (uint comp_chunk_index = state.m_comp_index_start; comp_chunk_index <= state.m_comp_index_end; comp_chunk_index++) {
|
|
const uint alpha_index = state.m_alpha_blocks ? (comp_chunk_index - cAlpha0Chunks) : 0;
|
|
const uint alpha_pixel_comp = state.m_alpha_blocks ? m_params.m_alpha_component_indices[alpha_index] : 0;
|
|
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
if (m_canceled)
|
|
return;
|
|
|
|
if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 127) == 0)) {
|
|
if (!update_progress(12 + comp_chunk_index, chunk_index, m_num_chunks))
|
|
return;
|
|
}
|
|
|
|
if (m_pTask_pool->get_num_threads()) {
|
|
if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
|
|
continue;
|
|
}
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[comp_chunk_index][chunk_index];
|
|
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
|
|
|
|
const uint tile_blocks_x = layout.m_width >> 2;
|
|
const uint tile_blocks_y = layout.m_height >> 2;
|
|
|
|
const uint tile_block_ofs_x = layout.m_x_ofs >> 2;
|
|
const uint tile_block_ofs_y = layout.m_y_ofs >> 2;
|
|
|
|
if (state.m_alpha_blocks) {
|
|
uint block_values[cDXT5SelectorValues];
|
|
dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint);
|
|
|
|
for (uint by = 0; by < tile_blocks_y; by++) {
|
|
for (uint bx = 0; bx < tile_blocks_x; bx++) {
|
|
#if 0
|
|
uint best_index = selector_vq.find_best_codebook_entry_fs(training_vecs[comp_chunk_index][(tile_block_ofs_x+bx)+(tile_block_ofs_y+by)*2][chunk_index]);
|
|
#else
|
|
const dxt_pixel_block& block = m_pChunks[chunk_index].m_blocks[tile_block_ofs_y + by][tile_block_ofs_x + bx];
|
|
|
|
uint best_error = UINT_MAX;
|
|
uint best_index = 0;
|
|
|
|
for (uint i = 0; i < state.m_selectors_cb.size(); i++) {
|
|
const selectors& s = state.m_selectors_cb[i];
|
|
|
|
uint total_error = 0;
|
|
|
|
for (uint y = 0; y < cBlockPixelHeight; y++) {
|
|
for (uint x = 0; x < cBlockPixelWidth; x++) {
|
|
int a = block.m_pixels[y][x][alpha_pixel_comp];
|
|
int b = block_values[s.m_selectors[y][x]];
|
|
int error = a - b;
|
|
error *= error;
|
|
|
|
total_error += error;
|
|
if (total_error > best_error)
|
|
goto early_out;
|
|
} // x
|
|
} //y
|
|
|
|
early_out:
|
|
if (total_error < best_error) {
|
|
best_error = total_error;
|
|
best_index = i;
|
|
|
|
if (best_error == 0)
|
|
break;
|
|
}
|
|
} // i
|
|
#endif
|
|
|
|
CRNLIB_ASSERT((tile_block_ofs_x + bx) < 2);
|
|
CRNLIB_ASSERT((tile_block_ofs_y + by) < 2);
|
|
|
|
chunk.m_selector_cluster_index[tile_block_ofs_y + by][tile_block_ofs_x + bx] = static_cast<uint16>(best_index);
|
|
|
|
{
|
|
scoped_spinlock lock(state.m_chunk_blocks_using_selectors_lock);
|
|
state.m_chunk_blocks_using_selectors[best_index].push_back(block_id(chunk_index, alpha_index, tile_index, tile_block_ofs_x + bx, tile_block_ofs_y + by));
|
|
}
|
|
// std::make_pair(chunk_index, (tile_index << 16) | ((tile_block_ofs_y + by) << 8) | (tile_block_ofs_x + bx) ) );
|
|
|
|
} // bx
|
|
} // by
|
|
|
|
} else {
|
|
color_quad_u8 block_colors[cDXT1SelectorValues];
|
|
dxt1_block::get_block_colors4(block_colors, static_cast<uint16>(quantized_tile.m_first_endpoint), static_cast<uint16>(quantized_tile.m_second_endpoint));
|
|
|
|
const bool block_with_alpha = quantized_tile.m_first_endpoint == quantized_tile.m_second_endpoint;
|
|
|
|
for (uint by = 0; by < tile_blocks_y; by++) {
|
|
for (uint bx = 0; bx < tile_blocks_x; bx++) {
|
|
const dxt_pixel_block& block = m_pChunks[chunk_index].m_blocks[tile_block_ofs_y + by][tile_block_ofs_x + bx];
|
|
|
|
uint best_error = UINT_MAX;
|
|
uint best_index = 0;
|
|
|
|
for (uint i = 0; i < state.m_selectors_cb.size(); i++) {
|
|
const selectors& s = state.m_selectors_cb[i];
|
|
|
|
uint total_error = 0;
|
|
|
|
for (uint y = 0; y < cBlockPixelHeight; y++) {
|
|
for (uint x = 0; x < cBlockPixelWidth; x++) {
|
|
const color_quad_u8& a = block.m_pixels[y][x];
|
|
|
|
uint selector_index = s.m_selectors[y][x];
|
|
if ((block_with_alpha) && (selector_index == 3))
|
|
total_error += 999999;
|
|
|
|
const color_quad_u8& b = block_colors[selector_index];
|
|
|
|
uint error = color::color_distance(m_params.m_perceptual, a, b, false);
|
|
|
|
total_error += error;
|
|
if (total_error > best_error)
|
|
goto early_out2;
|
|
} // x
|
|
} //y
|
|
|
|
early_out2:
|
|
if (total_error < best_error) {
|
|
best_error = total_error;
|
|
best_index = i;
|
|
|
|
if (best_error == 0)
|
|
break;
|
|
}
|
|
} // i
|
|
|
|
CRNLIB_ASSERT((tile_block_ofs_x + bx) < 2);
|
|
CRNLIB_ASSERT((tile_block_ofs_y + by) < 2);
|
|
|
|
chunk.m_selector_cluster_index[tile_block_ofs_y + by][tile_block_ofs_x + bx] = static_cast<uint16>(best_index);
|
|
|
|
{
|
|
scoped_spinlock lock(state.m_chunk_blocks_using_selectors_lock);
|
|
state.m_chunk_blocks_using_selectors[best_index].push_back(block_id(chunk_index, 0, tile_index, tile_block_ofs_x + bx, tile_block_ofs_y + by));
|
|
}
|
|
// std::make_pair(chunk_index, (tile_index << 16) | ((tile_block_ofs_y + by) << 8) | (tile_block_ofs_x + bx) ) );
|
|
|
|
} // bx
|
|
} // by
|
|
|
|
} // if alpha_blocks
|
|
|
|
} // tile_index
|
|
|
|
} // chunk_index
|
|
|
|
} // comp_chunk_index
|
|
}
|
|
|
|
bool dxt_hc::create_selector_codebook(bool alpha_blocks) {
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Computing selector training vectors");
|
|
#endif
|
|
|
|
const uint cColorDistToWeight = 2000;
|
|
const uint cAlphaErrorToWeight = 8;
|
|
|
|
vec16F_tree_vq selector_vq;
|
|
|
|
uint comp_index_start = cColorChunks;
|
|
uint comp_index_end = cColorChunks;
|
|
if (alpha_blocks) {
|
|
comp_index_start = cAlpha0Chunks;
|
|
comp_index_end = cAlpha0Chunks + m_num_alpha_blocks - 1;
|
|
}
|
|
|
|
crnlib::vector<vec16F> training_vecs[cNumCompressedChunkVecs][4];
|
|
|
|
for (uint comp_chunk_index = comp_index_start; comp_chunk_index <= comp_index_end; comp_chunk_index++) {
|
|
for (uint i = 0; i < 4; i++)
|
|
training_vecs[comp_chunk_index][i].resize(m_num_chunks);
|
|
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
if ((chunk_index & 63) == 0) {
|
|
if (!update_progress(9 + comp_chunk_index, chunk_index, m_num_chunks))
|
|
return false;
|
|
}
|
|
|
|
const compressed_chunk& chunk = m_compressed_chunks[comp_chunk_index][chunk_index];
|
|
|
|
uint8 block_selectors[cChunkBlockWidth][cChunkBlockHeight][cBlockPixelWidth * cBlockPixelHeight];
|
|
uint block_weight[cChunkBlockWidth][cChunkBlockHeight];
|
|
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
uint weight;
|
|
if (comp_chunk_index == cColorChunks) {
|
|
const color_quad_u8 first_color(dxt1_block::unpack_color(static_cast<uint16>(quantized_tile.m_first_endpoint), true));
|
|
const color_quad_u8 second_color(dxt1_block::unpack_color(static_cast<uint16>(quantized_tile.m_second_endpoint), true));
|
|
const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false);
|
|
|
|
weight = dist / cColorDistToWeight;
|
|
|
|
weight = static_cast<uint>(weight * m_pChunks[chunk_index].m_weight);
|
|
} else {
|
|
int first_endpoint = quantized_tile.m_first_endpoint;
|
|
int second_endpoint = quantized_tile.m_second_endpoint;
|
|
int error = first_endpoint - second_endpoint;
|
|
error = error * error;
|
|
|
|
weight = static_cast<uint>(error / cAlphaErrorToWeight);
|
|
}
|
|
|
|
const uint cMaxWeight = 2048;
|
|
|
|
weight = math::clamp<uint>(weight, 1U, cMaxWeight);
|
|
|
|
// umm, this is a hack
|
|
float f = math::lerp(1.15f, 1.0f, chunk.m_encoding_index / float(cNumChunkEncodings - 1));
|
|
weight = (uint)(weight * f);
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
|
|
|
|
for (uint y = 0; y < (layout.m_height >> 2); y++)
|
|
for (uint x = 0; x < (layout.m_width >> 2); x++)
|
|
block_weight[x + (layout.m_x_ofs >> 2)][y + (layout.m_y_ofs >> 2)] = weight;
|
|
|
|
const uint8* pSelectors = quantized_tile.m_selectors;
|
|
|
|
for (uint y = 0; y < layout.m_height; y++) {
|
|
const uint cy = y + layout.m_y_ofs;
|
|
|
|
for (uint x = 0; x < layout.m_width; x++) {
|
|
const uint selector = pSelectors[x + y * layout.m_width];
|
|
|
|
if (comp_chunk_index == cColorChunks)
|
|
CRNLIB_ASSERT(selector < cDXT1SelectorValues);
|
|
else
|
|
CRNLIB_ASSERT(selector < cDXT5SelectorValues);
|
|
|
|
const uint cx = x + layout.m_x_ofs;
|
|
|
|
block_selectors[cx >> 2][cy >> 2][(cx & 3) + (cy & 3) * 4] = static_cast<uint8>(selector);
|
|
} // x
|
|
} // y
|
|
} // tile_index
|
|
|
|
vec16F v;
|
|
for (uint y = 0; y < cChunkBlockHeight; y++) {
|
|
for (uint x = 0; x < cChunkBlockWidth; x++) {
|
|
for (uint i = 0; i < cBlockPixelWidth * cBlockPixelHeight; i++) {
|
|
uint s = block_selectors[x][y][i];
|
|
|
|
float f;
|
|
|
|
if (comp_chunk_index == cColorChunks) {
|
|
CRNLIB_ASSERT(s < cDXT1SelectorValues);
|
|
f = (g_dxt1_to_linear[s] + .5f) * 1.0f / 4.0f;
|
|
} else {
|
|
CRNLIB_ASSERT(s < cDXT5SelectorValues);
|
|
f = (g_dxt5_to_linear[s] + .5f) * 1.0f / 8.0f;
|
|
}
|
|
|
|
CRNLIB_ASSERT((f >= 0.0f) && (f <= 1.0f));
|
|
|
|
v[i] = f;
|
|
} // i
|
|
|
|
selector_vq.add_training_vec(v, block_weight[x][y]);
|
|
|
|
training_vecs[comp_chunk_index][x + y * 2][chunk_index] = v;
|
|
} // x
|
|
} // y
|
|
|
|
} // chunk_index
|
|
|
|
} // comp_chunk_index
|
|
|
|
timer t;
|
|
t.start();
|
|
|
|
selector_vq.generate_codebook(alpha_blocks ? m_params.m_alpha_selector_codebook_size : m_params.m_color_selector_codebook_size);
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging) {
|
|
double total_time = t.get_elapsed_secs();
|
|
console::info("Codebook gen time: %3.3fs, Selector codebook size: %u", total_time, selector_vq.get_codebook_size());
|
|
}
|
|
#endif
|
|
|
|
selectors_vec& selectors_cb = alpha_blocks ? m_alpha_selectors : m_color_selectors;
|
|
|
|
selectors_cb.resize(selector_vq.get_codebook_size());
|
|
|
|
for (uint i = 0; i < selector_vq.get_codebook_size(); i++) {
|
|
const vec16F& v = selector_vq.get_codebook_entry(i);
|
|
|
|
for (uint j = 0; j < cBlockPixelWidth * cBlockPixelHeight; j++) {
|
|
int s;
|
|
if (alpha_blocks) {
|
|
s = math::clamp<int>(static_cast<int>(v[j] * 8.0f), 0, 7);
|
|
s = g_dxt5_from_linear[s];
|
|
} else {
|
|
s = math::clamp<int>(static_cast<int>(v[j] * 4.0f), 0, 3);
|
|
s = g_dxt1_from_linear[s];
|
|
}
|
|
|
|
selectors_cb[i].m_selectors[j >> 2][j & 3] = static_cast<uint8>(s);
|
|
} // j
|
|
} // i
|
|
|
|
chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors = alpha_blocks ? m_chunk_blocks_using_alpha_selectors : m_chunk_blocks_using_color_selectors;
|
|
|
|
chunk_blocks_using_selectors.clear();
|
|
chunk_blocks_using_selectors.resize(selectors_cb.size());
|
|
|
|
create_selector_codebook_state state(*this, alpha_blocks, comp_index_start, comp_index_end, selector_vq, chunk_blocks_using_selectors, selectors_cb);
|
|
|
|
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
|
m_pTask_pool->queue_object_task(this, &dxt_hc::create_selector_codebook_task, i, &state);
|
|
|
|
m_pTask_pool->join();
|
|
|
|
return !m_canceled;
|
|
}
|
|
|
|
bool dxt_hc::refine_quantized_color_selectors() {
|
|
if (!m_has_color_blocks)
|
|
return true;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Refining quantized color selectors");
|
|
#endif
|
|
|
|
uint total_refined_selectors = 0;
|
|
uint total_refined_pixels = 0;
|
|
uint total_selectors = 0;
|
|
|
|
for (uint selector_index = 0; selector_index < m_color_selectors.size(); selector_index++) {
|
|
if ((selector_index & 255) == 0) {
|
|
if (!update_progress(15, selector_index, m_color_selectors.size()))
|
|
return false;
|
|
}
|
|
|
|
if (m_chunk_blocks_using_color_selectors[selector_index].empty())
|
|
continue;
|
|
|
|
selectors& sel = m_color_selectors[selector_index];
|
|
|
|
for (uint y = 0; y < cBlockPixelHeight; y++) {
|
|
for (uint x = 0; x < cBlockPixelWidth; x++) {
|
|
uint best_s = 0;
|
|
uint best_error = UINT_MAX;
|
|
|
|
for (uint s = 0; s < cDXT1SelectorValues; s++) {
|
|
uint total_error = 0;
|
|
|
|
for (uint block_iter = 0; block_iter < m_chunk_blocks_using_color_selectors[selector_index].size(); block_iter++) {
|
|
const block_id& id = m_chunk_blocks_using_color_selectors[selector_index][block_iter];
|
|
const uint chunk_index = id.m_chunk_index;
|
|
const uint tile_index = id.m_tile_index;
|
|
const uint chunk_block_x = id.m_block_x;
|
|
const uint chunk_block_y = id.m_block_y;
|
|
|
|
CRNLIB_ASSERT((chunk_block_x < cChunkBlockWidth) && (chunk_block_y < cChunkBlockHeight));
|
|
|
|
const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
|
|
|
|
CRNLIB_ASSERT(chunk.m_selector_cluster_index[chunk_block_y][chunk_block_x] == selector_index);
|
|
|
|
const compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
//const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[tile.m_layout_index];
|
|
|
|
color_quad_u8 block_colors[cDXT1SelectorValues];
|
|
CRNLIB_ASSERT(tile.m_first_endpoint >= tile.m_second_endpoint);
|
|
dxt1_block::get_block_colors4(block_colors, static_cast<uint16>(tile.m_first_endpoint), static_cast<uint16>(tile.m_second_endpoint));
|
|
|
|
if ((tile.m_first_endpoint == tile.m_second_endpoint) && (s == 3))
|
|
total_error += 999999;
|
|
|
|
const color_quad_u8& orig_pixel = m_pChunks[chunk_index](chunk_block_x * cBlockPixelWidth + x, chunk_block_y * cBlockPixelHeight + y);
|
|
const color_quad_u8& quantized_pixel = block_colors[s];
|
|
|
|
const uint error = color::color_distance(m_params.m_perceptual, orig_pixel, quantized_pixel, false);
|
|
total_error += error;
|
|
|
|
} // block_iter
|
|
|
|
if (total_error < best_error) {
|
|
best_error = total_error;
|
|
best_s = s;
|
|
}
|
|
|
|
} // s
|
|
|
|
if (sel.m_selectors[y][x] != best_s) {
|
|
total_refined_selectors++;
|
|
total_refined_pixels += m_chunk_blocks_using_color_selectors[selector_index].size();
|
|
sel.m_selectors[y][x] = static_cast<uint8>(best_s);
|
|
}
|
|
|
|
total_selectors++;
|
|
|
|
} //x
|
|
|
|
} //y
|
|
|
|
} // selector_index
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Total refined pixels: %u, selectors: %u out of %u", total_refined_pixels, total_refined_selectors, total_selectors);
|
|
#endif
|
|
|
|
uint selector_count = 0;
|
|
hash_map<uint32, uint> packed_selectors;
|
|
for (uint i = 0; i < m_color_selectors.size(); i++) {
|
|
if (m_chunk_blocks_using_color_selectors[i].size()) {
|
|
uint32 packed_selector = 0;
|
|
for (uint s = 0; s < 16; s++)
|
|
packed_selector |= m_color_selectors[i].get_by_index(s) << (s << 1);
|
|
hash_map<uint32, uint>::insert_result insert_result = packed_selectors.insert(packed_selector, selector_count);
|
|
if (insert_result.second) {
|
|
if (selector_count != i) {
|
|
m_color_selectors[selector_count] = m_color_selectors[i];
|
|
m_chunk_blocks_using_color_selectors[selector_count].swap(m_chunk_blocks_using_color_selectors[i]);
|
|
for (uint b = 0; b < m_chunk_blocks_using_color_selectors[selector_count].size(); b++) {
|
|
const block_id& id = m_chunk_blocks_using_color_selectors[selector_count][b];
|
|
m_compressed_chunks[cColorChunks][id.m_chunk_index].m_selector_cluster_index[id.m_block_y][id.m_block_x] = selector_count;
|
|
}
|
|
}
|
|
selector_count++;
|
|
} else {
|
|
for (uint b = 0; b < m_chunk_blocks_using_color_selectors[i].size(); b++) {
|
|
const block_id& id = m_chunk_blocks_using_color_selectors[i][b];
|
|
m_compressed_chunks[cColorChunks][id.m_chunk_index].m_selector_cluster_index[id.m_block_y][id.m_block_x] = insert_result.first->second;
|
|
}
|
|
m_chunk_blocks_using_color_selectors[insert_result.first->second].append(m_chunk_blocks_using_color_selectors[i]);
|
|
m_chunk_blocks_using_color_selectors[i].clear();
|
|
}
|
|
}
|
|
}
|
|
m_color_selectors.resize(selector_count);
|
|
m_chunk_blocks_using_color_selectors.resize(selector_count);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool dxt_hc::refine_quantized_alpha_selectors() {
|
|
if (!m_num_alpha_blocks)
|
|
return true;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Refining quantized alpha selectors");
|
|
#endif
|
|
|
|
uint total_refined_selectors = 0;
|
|
uint total_refined_pixels = 0;
|
|
uint total_selectors = 0;
|
|
|
|
for (uint selector_index = 0; selector_index < m_alpha_selectors.size(); selector_index++) {
|
|
if ((selector_index & 255) == 0) {
|
|
if (!update_progress(16, selector_index, m_alpha_selectors.size()))
|
|
return false;
|
|
}
|
|
|
|
if (m_chunk_blocks_using_alpha_selectors[selector_index].empty())
|
|
continue;
|
|
|
|
selectors& sel = m_alpha_selectors[selector_index];
|
|
|
|
for (uint y = 0; y < cBlockPixelHeight; y++) {
|
|
for (uint x = 0; x < cBlockPixelWidth; x++) {
|
|
uint best_s = 0;
|
|
uint best_error = UINT_MAX;
|
|
|
|
for (uint s = 0; s < cDXT5SelectorValues; s++) {
|
|
uint total_error = 0;
|
|
|
|
for (uint block_iter = 0; block_iter < m_chunk_blocks_using_alpha_selectors[selector_index].size(); block_iter++) {
|
|
const block_id& id = m_chunk_blocks_using_alpha_selectors[selector_index][block_iter];
|
|
const uint chunk_index = id.m_chunk_index;
|
|
const uint tile_index = id.m_tile_index;
|
|
const uint chunk_block_x = id.m_block_x;
|
|
const uint chunk_block_y = id.m_block_y;
|
|
const uint alpha_index = id.m_alpha_index;
|
|
CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks);
|
|
|
|
CRNLIB_ASSERT((chunk_block_x < cChunkBlockWidth) && (chunk_block_y < cChunkBlockHeight));
|
|
|
|
const compressed_chunk& chunk = m_compressed_chunks[alpha_index + cAlpha0Chunks][chunk_index];
|
|
CRNLIB_ASSERT(tile_index < chunk.m_num_tiles);
|
|
|
|
CRNLIB_ASSERT(chunk.m_selector_cluster_index[chunk_block_y][chunk_block_x] == selector_index);
|
|
|
|
const compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
//const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[tile.m_layout_index];
|
|
|
|
uint block_values[cDXT5SelectorValues];
|
|
CRNLIB_ASSERT(tile.m_first_endpoint >= tile.m_second_endpoint);
|
|
dxt5_block::get_block_values(block_values, tile.m_first_endpoint, tile.m_second_endpoint);
|
|
|
|
int orig_value = m_pChunks[chunk_index](chunk_block_x * cBlockPixelWidth + x, chunk_block_y * cBlockPixelHeight + y)[m_params.m_alpha_component_indices[alpha_index]];
|
|
int quantized_value = block_values[s];
|
|
|
|
int error = (orig_value - quantized_value);
|
|
error *= error;
|
|
|
|
total_error += error;
|
|
|
|
} // block_iter
|
|
|
|
if (total_error < best_error) {
|
|
best_error = total_error;
|
|
best_s = s;
|
|
}
|
|
|
|
} // s
|
|
|
|
if (sel.m_selectors[y][x] != best_s) {
|
|
total_refined_selectors++;
|
|
total_refined_pixels += m_chunk_blocks_using_alpha_selectors[selector_index].size();
|
|
sel.m_selectors[y][x] = static_cast<uint8>(best_s);
|
|
}
|
|
|
|
total_selectors++;
|
|
|
|
} //x
|
|
|
|
} //y
|
|
|
|
} // selector_index
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Total refined pixels: %u, selectors: %u out of %u", total_refined_pixels, total_refined_selectors, total_selectors);
|
|
#endif
|
|
|
|
uint selector_count = 0;
|
|
hash_map<uint64, uint> packed_selectors;
|
|
for (uint i = 0; i < m_alpha_selectors.size(); i++) {
|
|
if (m_chunk_blocks_using_alpha_selectors[i].size()) {
|
|
uint64 packed_selector = 0;
|
|
for (uint s = 0; s < 16; s++)
|
|
packed_selector |= (uint64)m_alpha_selectors[i].get_by_index(s) << (s << 2);
|
|
hash_map<uint64, uint>::insert_result insert_result = packed_selectors.insert(packed_selector, selector_count);
|
|
if (insert_result.second) {
|
|
if (selector_count != i) {
|
|
m_alpha_selectors[selector_count] = m_alpha_selectors[i];
|
|
m_chunk_blocks_using_alpha_selectors[selector_count].swap(m_chunk_blocks_using_alpha_selectors[i]);
|
|
for (uint b = 0; b < m_chunk_blocks_using_alpha_selectors[selector_count].size(); b++) {
|
|
const block_id& id = m_chunk_blocks_using_alpha_selectors[selector_count][b];
|
|
m_compressed_chunks[cAlpha0Chunks + id.m_alpha_index][id.m_chunk_index].m_selector_cluster_index[id.m_block_y][id.m_block_x] = selector_count;
|
|
}
|
|
}
|
|
selector_count++;
|
|
} else {
|
|
for (uint b = 0; b < m_chunk_blocks_using_alpha_selectors[i].size(); b++) {
|
|
const block_id& id = m_chunk_blocks_using_alpha_selectors[i][b];
|
|
m_compressed_chunks[cAlpha0Chunks + id.m_alpha_index][id.m_chunk_index].m_selector_cluster_index[id.m_block_y][id.m_block_x] = insert_result.first->second;
|
|
}
|
|
m_chunk_blocks_using_alpha_selectors[insert_result.first->second].append(m_chunk_blocks_using_alpha_selectors[i]);
|
|
m_chunk_blocks_using_alpha_selectors[i].clear();
|
|
}
|
|
}
|
|
}
|
|
m_alpha_selectors.resize(selector_count);
|
|
m_chunk_blocks_using_alpha_selectors.resize(selector_count);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool dxt_hc::refine_quantized_color_endpoints() {
|
|
if (!m_has_color_blocks)
|
|
return true;
|
|
|
|
uint total_refined_tiles = 0;
|
|
uint total_refined_pixels = 0;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Refining quantized color endpoints");
|
|
#endif
|
|
|
|
for (uint cluster_index = 0; cluster_index < m_color_clusters.size(); cluster_index++) {
|
|
if ((cluster_index & 255) == 0) {
|
|
if (!update_progress(17, cluster_index, m_color_clusters.size()))
|
|
return false;
|
|
}
|
|
|
|
tile_cluster& cluster = m_color_clusters[cluster_index];
|
|
|
|
uint total_pixels = 0;
|
|
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
|
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
|
const uint tile_index = cluster.m_tiles[tile_iter].second;
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
|
|
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
|
|
|
|
total_pixels += (tile.m_pixel_width * tile.m_pixel_height);
|
|
}
|
|
|
|
if (!total_pixels)
|
|
continue;
|
|
|
|
crnlib::vector<color_quad_u8> pixels;
|
|
crnlib::vector<uint8> selectors;
|
|
|
|
pixels.reserve(total_pixels);
|
|
selectors.reserve(total_pixels);
|
|
|
|
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
|
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
|
const uint tile_index = cluster.m_tiles[tile_iter].second;
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
const pixel_chunk& src_pixels = m_pChunks[chunk_index];
|
|
|
|
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
|
|
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
|
|
|
|
const chunk_tile_desc& tile_layout = g_chunk_tile_layouts[tile.m_layout_index];
|
|
|
|
for (uint y = 0; y < tile.m_pixel_height; y++) {
|
|
for (uint x = 0; x < tile.m_pixel_width; x++) {
|
|
selectors.push_back(tile.m_selectors[x + y * tile.m_pixel_width]);
|
|
|
|
pixels.push_back(src_pixels(x + tile_layout.m_x_ofs, y + tile_layout.m_y_ofs));
|
|
}
|
|
}
|
|
}
|
|
|
|
dxt_endpoint_refiner refiner;
|
|
dxt_endpoint_refiner::params p;
|
|
dxt_endpoint_refiner::results r;
|
|
|
|
p.m_perceptual = m_params.m_perceptual;
|
|
p.m_pSelectors = &selectors[0];
|
|
p.m_pPixels = &pixels[0];
|
|
p.m_num_pixels = total_pixels;
|
|
p.m_dxt1_selectors = true;
|
|
p.m_error_to_beat = cluster.m_error;
|
|
p.m_block_index = cluster_index;
|
|
|
|
if (!refiner.refine(p, r))
|
|
continue;
|
|
|
|
total_refined_tiles++;
|
|
total_refined_pixels += total_pixels;
|
|
|
|
cluster.m_error = r.m_error;
|
|
|
|
cluster.m_first_endpoint = r.m_low_color;
|
|
cluster.m_second_endpoint = r.m_high_color;
|
|
|
|
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
|
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
|
const uint tile_index = cluster.m_tiles[tile_iter].second;
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
tile.m_first_endpoint = r.m_low_color;
|
|
tile.m_second_endpoint = r.m_high_color;
|
|
}
|
|
}
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Total refined pixels: %u, endpoints: %u out of %u", total_refined_pixels, total_refined_tiles, m_color_clusters.size());
|
|
#endif
|
|
|
|
uint cluster_count = 0;
|
|
hash_map<uint64, uint> packed_clusters;
|
|
for (uint i = 0; i < m_color_clusters.size(); i++) {
|
|
tile_cluster& cluster = m_color_clusters[i];
|
|
if (cluster.m_tiles.size()) {
|
|
uint64 packed_cluster = cluster.m_first_endpoint | cluster.m_second_endpoint << 16 | (cluster.m_alpha_encoding ? 1ULL << 32 : 0);
|
|
hash_map<uint64, uint>::insert_result insert_result = packed_clusters.insert(packed_cluster, cluster_count);
|
|
if (insert_result.second) {
|
|
if (cluster_count != i) {
|
|
tile_cluster& destination_cluster = m_color_clusters[cluster_count];
|
|
destination_cluster.m_alpha_encoding = cluster.m_alpha_encoding;
|
|
destination_cluster.m_error = cluster.m_error;
|
|
destination_cluster.m_first_endpoint = cluster.m_first_endpoint;
|
|
destination_cluster.m_second_endpoint = cluster.m_second_endpoint;
|
|
destination_cluster.m_tiles.swap(cluster.m_tiles);
|
|
for (uint t = 0; t < destination_cluster.m_tiles.size(); t++) {
|
|
const uint chunk_index = destination_cluster.m_tiles[t].first;
|
|
const uint tile_index = destination_cluster.m_tiles[t].second;
|
|
compressed_tile& tile = m_compressed_chunks[cColorChunks][chunk_index].m_quantized_tiles[tile_index];
|
|
tile.m_first_endpoint = destination_cluster.m_first_endpoint;
|
|
tile.m_second_endpoint = destination_cluster.m_second_endpoint;
|
|
tile.m_endpoint_cluster_index = cluster_count;
|
|
}
|
|
}
|
|
cluster_count++;
|
|
} else {
|
|
tile_cluster& destination_cluster = m_color_clusters[insert_result.first->second];
|
|
for (uint t = 0; t < cluster.m_tiles.size(); t++) {
|
|
const uint chunk_index = cluster.m_tiles[t].first;
|
|
const uint tile_index = cluster.m_tiles[t].second;
|
|
compressed_tile& tile = m_compressed_chunks[cColorChunks][chunk_index].m_quantized_tiles[tile_index];
|
|
tile.m_endpoint_cluster_index = insert_result.first->second;
|
|
}
|
|
destination_cluster.m_error += cluster.m_error;
|
|
destination_cluster.m_tiles.append(cluster.m_tiles);
|
|
cluster.m_tiles.clear();
|
|
}
|
|
}
|
|
}
|
|
m_color_clusters.resize(cluster_count);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool dxt_hc::refine_quantized_alpha_endpoints() {
|
|
if (!m_num_alpha_blocks)
|
|
return true;
|
|
|
|
uint total_refined_tiles = 0;
|
|
uint total_refined_pixels = 0;
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Refining quantized alpha endpoints");
|
|
#endif
|
|
|
|
for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) {
|
|
if ((cluster_index & 255) == 0) {
|
|
if (!update_progress(18, cluster_index, m_alpha_clusters.size()))
|
|
return false;
|
|
}
|
|
|
|
tile_cluster& cluster = m_alpha_clusters[cluster_index];
|
|
|
|
uint total_pixels = 0;
|
|
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
|
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
|
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
|
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
|
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
|
|
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
|
|
|
|
total_pixels += (tile.m_pixel_width * tile.m_pixel_height);
|
|
}
|
|
|
|
if (!total_pixels)
|
|
continue;
|
|
|
|
crnlib::vector<color_quad_u8> pixels;
|
|
crnlib::vector<uint8> selectors;
|
|
|
|
pixels.reserve(total_pixels);
|
|
selectors.reserve(total_pixels);
|
|
|
|
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
|
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
|
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
|
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
|
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
const pixel_chunk& src_pixels = m_pChunks[chunk_index];
|
|
|
|
CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint);
|
|
CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint);
|
|
|
|
const chunk_tile_desc& tile_layout = g_chunk_tile_layouts[tile.m_layout_index];
|
|
|
|
for (uint y = 0; y < tile.m_pixel_height; y++) {
|
|
for (uint x = 0; x < tile.m_pixel_width; x++) {
|
|
selectors.push_back(tile.m_selectors[x + y * tile.m_pixel_width]);
|
|
|
|
pixels.push_back(color_quad_u8(src_pixels(x + tile_layout.m_x_ofs, y + tile_layout.m_y_ofs)[m_params.m_alpha_component_indices[alpha_index]]));
|
|
}
|
|
}
|
|
}
|
|
|
|
dxt_endpoint_refiner refiner;
|
|
dxt_endpoint_refiner::params p;
|
|
dxt_endpoint_refiner::results r;
|
|
|
|
p.m_perceptual = m_params.m_perceptual;
|
|
p.m_pSelectors = &selectors[0];
|
|
p.m_pPixels = &pixels[0];
|
|
p.m_num_pixels = total_pixels;
|
|
p.m_dxt1_selectors = false;
|
|
p.m_error_to_beat = cluster.m_error;
|
|
p.m_block_index = cluster_index;
|
|
|
|
if (!refiner.refine(p, r))
|
|
continue;
|
|
|
|
total_refined_tiles++;
|
|
total_refined_pixels += total_pixels;
|
|
|
|
cluster.m_error = r.m_error;
|
|
|
|
cluster.m_first_endpoint = r.m_low_color;
|
|
cluster.m_second_endpoint = r.m_high_color;
|
|
|
|
for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) {
|
|
const uint chunk_index = cluster.m_tiles[tile_iter].first;
|
|
const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU;
|
|
const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U;
|
|
|
|
compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index];
|
|
compressed_tile& tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
tile.m_first_endpoint = r.m_low_color;
|
|
tile.m_second_endpoint = r.m_high_color;
|
|
}
|
|
}
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
console::info("Total refined pixels: %u, endpoints: %u out of %u", total_refined_pixels, total_refined_tiles, m_alpha_clusters.size());
|
|
#endif
|
|
|
|
uint cluster_count = 0;
|
|
hash_map<uint64, uint> packed_clusters;
|
|
for (uint i = 0; i < m_alpha_clusters.size(); i++) {
|
|
tile_cluster& cluster = m_alpha_clusters[i];
|
|
if (cluster.m_tiles.size()) {
|
|
uint64 packed_cluster = cluster.m_first_endpoint | cluster.m_second_endpoint << 16 | (cluster.m_alpha_encoding ? 1ULL << 32 : 0);
|
|
hash_map<uint64, uint>::insert_result insert_result = packed_clusters.insert(packed_cluster, cluster_count);
|
|
if (insert_result.second) {
|
|
if (cluster_count != i) {
|
|
tile_cluster& destination_cluster = m_alpha_clusters[cluster_count];
|
|
destination_cluster.m_alpha_encoding = cluster.m_alpha_encoding;
|
|
destination_cluster.m_error = cluster.m_error;
|
|
destination_cluster.m_first_endpoint = cluster.m_first_endpoint;
|
|
destination_cluster.m_second_endpoint = cluster.m_second_endpoint;
|
|
destination_cluster.m_tiles.swap(cluster.m_tiles);
|
|
for (uint t = 0; t < destination_cluster.m_tiles.size(); t++) {
|
|
const uint chunk_index = destination_cluster.m_tiles[t].first;
|
|
const uint tile_index = destination_cluster.m_tiles[t].second & 0xFFFFU;
|
|
const uint alpha_index = destination_cluster.m_tiles[t].second >> 16U;
|
|
compressed_tile& tile = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index].m_quantized_tiles[tile_index];
|
|
tile.m_first_endpoint = destination_cluster.m_first_endpoint;
|
|
tile.m_second_endpoint = destination_cluster.m_second_endpoint;
|
|
tile.m_endpoint_cluster_index = cluster_count;
|
|
}
|
|
}
|
|
cluster_count++;
|
|
} else {
|
|
tile_cluster& destination_cluster = m_alpha_clusters[insert_result.first->second];
|
|
for (uint t = 0; t < cluster.m_tiles.size(); t++) {
|
|
const uint chunk_index = cluster.m_tiles[t].first;
|
|
const uint tile_index = cluster.m_tiles[t].second & 0xFFFFU;
|
|
const uint alpha_index = cluster.m_tiles[t].second >> 16U;
|
|
compressed_tile& tile = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index].m_quantized_tiles[tile_index];
|
|
tile.m_endpoint_cluster_index = insert_result.first->second;
|
|
}
|
|
destination_cluster.m_error += cluster.m_error;
|
|
destination_cluster.m_tiles.append(cluster.m_tiles);
|
|
cluster.m_tiles.clear();
|
|
}
|
|
}
|
|
}
|
|
m_alpha_clusters.resize(cluster_count);
|
|
|
|
return true;
|
|
}
|
|
|
|
void dxt_hc::create_final_debug_image() {
|
|
if (!m_params.m_debugging)
|
|
return;
|
|
|
|
m_dbg_chunk_pixels_final.resize(m_num_chunks);
|
|
for (uint i = 0; i < m_num_chunks; i++)
|
|
m_dbg_chunk_pixels_final[i].clear();
|
|
|
|
if (m_has_color_blocks) {
|
|
m_dbg_chunk_pixels_final_color_selectors.resize(m_num_chunks);
|
|
for (uint i = 0; i < m_num_chunks; i++)
|
|
m_dbg_chunk_pixels_final_color_selectors[i].clear();
|
|
}
|
|
|
|
if (m_num_alpha_blocks) {
|
|
m_dbg_chunk_pixels_final_alpha_selectors.resize(m_num_chunks);
|
|
for (uint i = 0; i < m_num_chunks; i++)
|
|
m_dbg_chunk_pixels_final_alpha_selectors[i].clear();
|
|
}
|
|
|
|
for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
|
|
pixel_chunk& output_chunk_final = m_dbg_chunk_pixels_final[chunk_index];
|
|
|
|
if (m_has_color_blocks) {
|
|
const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index];
|
|
|
|
pixel_chunk& output_chunk_quantized_color_selectors = m_dbg_chunk_pixels_final_color_selectors[chunk_index];
|
|
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
|
|
|
|
color_quad_u8 block_colors[cDXT1SelectorValues];
|
|
dxt1_block::get_block_colors(block_colors, static_cast<uint16>(quantized_tile.m_first_endpoint), static_cast<uint16>(quantized_tile.m_second_endpoint));
|
|
|
|
for (uint y = 0; y < layout.m_height; y++) {
|
|
for (uint x = 0; x < layout.m_width; x++) {
|
|
const uint chunk_x_ofs = x + layout.m_x_ofs;
|
|
const uint chunk_y_ofs = y + layout.m_y_ofs;
|
|
const uint block_x = chunk_x_ofs >> 2;
|
|
const uint block_y = chunk_y_ofs >> 2;
|
|
const selectors& s = m_color_selectors[chunk.m_selector_cluster_index[block_y][block_x]];
|
|
|
|
uint selector = s.m_selectors[chunk_y_ofs & 3][chunk_x_ofs & 3];
|
|
|
|
output_chunk_final(x + layout.m_x_ofs, y + layout.m_y_ofs) = block_colors[selector];
|
|
output_chunk_quantized_color_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = g_tile_layout_colors[selector];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (m_num_alpha_blocks) {
|
|
pixel_chunk& output_chunk_quantized_alpha_selectors = m_dbg_chunk_pixels_final_alpha_selectors[chunk_index];
|
|
|
|
for (uint a = 0; a < m_num_alpha_blocks; a++) {
|
|
const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index];
|
|
|
|
for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) {
|
|
const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index];
|
|
|
|
const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index];
|
|
|
|
uint block_values[cDXT5SelectorValues];
|
|
|
|
// purposely call the general version to debug single color alpah6 blocks
|
|
CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint);
|
|
dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint);
|
|
|
|
for (uint y = 0; y < layout.m_height; y++) {
|
|
for (uint x = 0; x < layout.m_width; x++) {
|
|
const uint chunk_x_ofs = x + layout.m_x_ofs;
|
|
const uint chunk_y_ofs = y + layout.m_y_ofs;
|
|
const uint block_x = chunk_x_ofs >> 2;
|
|
const uint block_y = chunk_y_ofs >> 2;
|
|
const selectors& s = m_alpha_selectors[chunk.m_selector_cluster_index[block_y][block_x]];
|
|
|
|
uint selector = s.m_selectors[chunk_y_ofs & 3][chunk_x_ofs & 3];
|
|
|
|
CRNLIB_ASSERT(selector < cDXT5SelectorValues);
|
|
|
|
output_chunk_final(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(block_values[selector]);
|
|
|
|
output_chunk_quantized_alpha_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast<uint8>(selector * 255 / (cDXT5SelectorValues - 1));
|
|
} //x
|
|
} // y
|
|
} // tile_index
|
|
|
|
} // a
|
|
}
|
|
} // chunk_index
|
|
}
|
|
|
|
bool dxt_hc::create_block_encodings(const params& p) {
|
|
crnlib::vector<endpoint_indices_details>& m_endpoint_indices = *p.m_endpoint_indices;
|
|
crnlib::vector<selector_indices_details>& m_selector_indices = *p.m_selector_indices;
|
|
|
|
uint8 tile_map[8][2][2] = {
|
|
{{ 0, 0 }, { 0, 0 }},
|
|
{{ 0, 0 }, { 1, 1 }},
|
|
{{ 0, 1 }, { 0, 1 }},
|
|
{{ 0, 0 }, { 1, 2 }},
|
|
{{ 1, 2 }, { 0, 0 }},
|
|
{{ 0, 1 }, { 0, 2 }},
|
|
{{ 1, 0 }, { 2, 0 }},
|
|
{{ 0, 1 }, { 2, 3 }},
|
|
};
|
|
|
|
m_endpoint_indices.resize(m_num_chunks << 2);
|
|
m_selector_indices.resize(m_num_chunks << 2);
|
|
bool hasBlocks[cNumCompressedChunkVecs] = {m_has_color_blocks, m_num_alpha_blocks > 0, m_num_alpha_blocks > 1};
|
|
|
|
for (uint level = 0; level < p.m_num_levels; level++) {
|
|
uint first_chunk = p.m_levels[level].m_first_chunk;
|
|
uint end_chunk = p.m_levels[level].m_first_chunk + p.m_levels[level].m_num_chunks;
|
|
uint chunk_width = p.m_levels[level].m_chunk_width;
|
|
uint block_width = chunk_width << 1;
|
|
for (uint b = first_chunk << 2, cy = 0, chunk_base = first_chunk; chunk_base < end_chunk; chunk_base += chunk_width, cy++) {
|
|
for (uint by = 0; by < 2; by++) {
|
|
for (uint cx = 0; cx < chunk_width; cx++) {
|
|
for (uint bx = 0; bx < 2; bx++, b++) {
|
|
bool top_match = cy || by;
|
|
bool left_match = top_match || cx || bx;
|
|
for (uint c = 0; c < cNumCompressedChunkVecs; c++) {
|
|
if (hasBlocks[c]) {
|
|
const compressed_chunk& chunk = m_compressed_chunks[c][chunk_base + cx];
|
|
uint16 endpoint_index = chunk.m_quantized_tiles[tile_map[chunk.m_encoding_index][by][bx]].m_endpoint_cluster_index;
|
|
left_match = left_match && endpoint_index == m_endpoint_indices[b - 1].component[c];
|
|
top_match = top_match && endpoint_index == m_endpoint_indices[b - block_width].component[c];
|
|
m_endpoint_indices[b].component[c] = endpoint_index;
|
|
m_selector_indices[b].component[c] = chunk.m_selector_cluster_index[by][bx];
|
|
}
|
|
}
|
|
m_endpoint_indices[b].reference = left_match ? 1 : top_match ? 2 : 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (m_has_color_blocks) {
|
|
m_color_endpoints.resize(m_color_clusters.size());
|
|
for (uint i = 0; i < m_color_clusters.size(); i++)
|
|
m_color_endpoints[i] = dxt1_block::pack_endpoints(m_color_clusters[i].m_first_endpoint, m_color_clusters[i].m_second_endpoint);
|
|
}
|
|
|
|
if (m_num_alpha_blocks) {
|
|
m_alpha_endpoints.resize(m_alpha_clusters.size());
|
|
for (uint i = 0; i < m_alpha_clusters.size(); i++)
|
|
m_alpha_endpoints[i] = dxt5_block::pack_endpoints(m_alpha_clusters[i].m_first_endpoint, m_alpha_clusters[i].m_second_endpoint);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool dxt_hc::update_progress(uint phase_index, uint subphase_index, uint subphase_total) {
|
|
CRNLIB_ASSERT(crn_get_current_thread_id() == m_main_thread_id);
|
|
|
|
if (!m_params.m_pProgress_func)
|
|
return true;
|
|
|
|
#if CRNLIB_ENABLE_DEBUG_MESSAGES
|
|
if (m_params.m_debugging)
|
|
return true;
|
|
#endif
|
|
|
|
const int percentage_complete = (subphase_total > 1) ? ((100 * subphase_index) / (subphase_total - 1)) : 100;
|
|
if (((int)phase_index == m_prev_phase_index) && (m_prev_percentage_complete == percentage_complete))
|
|
return !m_canceled;
|
|
|
|
m_prev_percentage_complete = percentage_complete;
|
|
|
|
bool status = (*m_params.m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_params.m_pProgress_func_data) != 0;
|
|
if (!status) {
|
|
m_canceled = true;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
} // namespace crnlib
|