Files
unity/crnlib/crn_qdxt1.cpp
T
Alexander Suvorov 3e12aff909 Fix miscellaneous compiler warnings
DXT Testing:

The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch (revision ea9b8d8).

[Compressing Kodak set without mipmaps using DXT1 encoding]
Original: 1582222 bytes / 28.866 sec
Modified: 1468204 bytes / 11.858 sec
Improvement: 7.21% (compression ratio) / 58.92% (compression time)

[Compressing Kodak set with mipmaps using DXT1 encoding]
Original: 2065243 bytes / 36.878 sec
Modified: 1914805 bytes / 15.625 sec
Improvement: 7.28% (compression ratio) / 57.63% (compression time)

ETC Testing:

The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). The ETC1 quantization parameters have been selected in such a way, so that ETC1 compression gives approximately the same average Luma PSNR as the corresponding DXT1 compression (which is equal to 34.044 dB for the Kodak test set compressed without mipmaps using DXT1 encoding and default quality settings).

[Compressing Kodak set without mipmaps using ETC1 encoding]
Total size: 1607858 bytes
Total time: 17.181 sec
Average bitrate: 1.363 bpp
Average Luma PSNR: 34.050 dB
2017-09-11 13:52:21 +02:00

841 lines
30 KiB
C++

// File: crn_qdxt.cpp
// See Copyright Notice and license at the end of inc/crnlib.h
#include "crn_core.h"
#include "crn_qdxt1.h"
#include "crn_dxt1.h"
#include "crn_dxt_fast.h"
#include "crn_image_utils.h"
#include "crn_dxt_hc_common.h"
#define GENERATE_DEBUG_IMAGES 0
namespace crnlib {
qdxt1::qdxt1(task_pool& task_pool)
: m_pTask_pool(&task_pool),
m_main_thread_id(0),
m_canceled(false),
m_progress_start(0),
m_progress_range(100),
m_num_blocks(0),
m_pBlocks(NULL),
m_pDst_elements(NULL),
m_elements_per_block(0),
m_max_selector_clusters(0),
m_prev_percentage_complete(-1),
m_selector_clusterizer(task_pool) {
}
qdxt1::~qdxt1() {
}
void qdxt1::clear() {
m_main_thread_id = 0;
m_num_blocks = 0;
m_pBlocks = 0;
m_pDst_elements = NULL;
m_elements_per_block = 0;
m_params.clear();
m_endpoint_clusterizer.clear();
m_endpoint_cluster_indices.clear();
m_max_selector_clusters = 0;
m_canceled = false;
m_progress_start = 0;
m_progress_range = 100;
m_selector_clusterizer.clear();
for (uint i = 0; i <= qdxt1_params::cMaxQuality; i++)
m_cached_selector_cluster_indices[i].clear();
m_cluster_hash.clear();
m_prev_percentage_complete = -1;
}
bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params) {
clear();
CRNLIB_ASSERT(n && pBlocks);
m_main_thread_id = crn_get_current_thread_id();
m_num_blocks = n;
m_pBlocks = pBlocks;
m_params = params;
m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks);
m_progress_start = 0;
m_progress_range = 75;
const bool debugging = false;
image_u8 debug_img;
if ((m_params.m_hierarchical) && (m_params.m_num_mips)) {
vec6F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs();
training_vecs.resize(m_num_blocks);
uint encoding_hist[cNumChunkEncodings];
utils::zero_object(encoding_hist);
uint total_processed_blocks = 0;
uint next_progress_threshold = 512;
for (uint level = 0; level < m_params.m_num_mips; level++) {
const qdxt1_params::mip_desc& level_desc = m_params.m_mip_desc[level];
const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth;
const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight;
const uint level_width = level_desc.m_block_width * 4;
const uint level_height = level_desc.m_block_height * 4;
if (debugging)
debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight);
float adaptive_tile_color_psnr_derating = 1.5f; // was 2.4f
if ((level) && (adaptive_tile_color_psnr_derating > .25f)) {
adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.1f, static_cast<float>(level))); // was 3.0f
}
for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) {
for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) {
color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight];
for (uint y = 0; y < cChunkPixelHeight; y++) {
const uint pix_y = math::minimum<uint>(chunk_y * cChunkPixelHeight + y, level_height - 1);
const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width);
for (uint x = 0; x < cChunkPixelWidth; x++) {
const uint pix_x = math::minimum<uint>(chunk_x * cChunkPixelWidth + x, level_width - 1);
const uint block_index = outer_block_index + (pix_x >> 2);
const dxt_pixel_block& block = m_pBlocks[block_index];
const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3];
chunk_pixels[x + y * 8] = p;
}
}
struct layout_results {
uint m_low_color;
uint m_high_color;
uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight];
uint64 m_error;
//float m_penalty;
};
layout_results layouts[cNumChunkTileLayouts];
for (uint l = 0; l < cNumChunkTileLayouts; l++) {
const uint width = g_chunk_tile_layouts[l].m_width;
const uint height = g_chunk_tile_layouts[l].m_height;
const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs;
const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs;
color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight];
for (uint y = 0; y < height; y++)
for (uint x = 0; x < width; x++)
layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth];
const uint n = width * height;
dxt_fast::compress_color_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors);
color_quad_u8 c[4];
dxt1_block::get_block_colors(c, static_cast<uint16>(layouts[l].m_low_color), static_cast<uint16>(layouts[l].m_high_color));
uint64 error = 0;
for (uint i = 0; i < n; i++)
error += color::elucidian_distance(layout_pixels[i], c[layouts[l].m_selectors[i]], false);
layouts[l].m_error = error;
#if 0
if ((width > 4) || (height > 4))
{
const uint dist = color::elucidian_distance(
dxt1_block::unpack_color(static_cast<uint16>(layouts[l].m_low_color), true),
dxt1_block::unpack_color(static_cast<uint16>(layouts[l].m_high_color), true), false);
layouts[l].m_penalty = math::clamp((sqrt((float)dist) - 75.0f) / 150.0f, 0.0f, 2.0f);
if ((width == 8) && (height == 8))
layouts[l].m_penalty *= 2.0f;
}
else
{
layouts[l].m_penalty = 0.0f;
}
#endif
}
double best_peak_snr = -1.0f;
uint best_encoding = 0;
for (uint e = 0; e < cNumChunkEncodings; e++) {
const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e];
double total_error = 0;
for (uint t = 0; t < encoding_desc.m_num_tiles; t++)
total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error;
//double mean_squared = total_error * (1.0f / (16.0f * 3.0f));
double mean_squared = total_error * (1.0f / (64.0f * 3.0f));
double root_mean_squared = sqrt(mean_squared);
double peak_snr = 999999.0f;
if (mean_squared)
peak_snr = math::clamp<double>(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f);
//if (level)
// adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f));
float color_derating = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
peak_snr = peak_snr - color_derating;
//for (uint t = 0; t < encoding_desc.m_num_tiles; t++)
// peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty;
if (peak_snr > best_peak_snr) {
best_peak_snr = peak_snr;
best_encoding = e;
}
}
encoding_hist[best_encoding]++;
const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding];
for (uint t = 0; t < encoding_desc.m_num_tiles; t++) {
const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t];
uint layout_index = tile_desc.m_layout_index;
const layout_results& layout = layouts[layout_index];
color_quad_u8 c[4];
if (debugging)
dxt1_block::get_block_colors(c, static_cast<uint16>(layout.m_low_color), static_cast<uint16>(layout.m_high_color));
color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight];
for (uint y = 0; y < tile_desc.m_height; y++) {
const uint pix_y = y + tile_desc.m_y_ofs;
for (uint x = 0; x < tile_desc.m_width; x++) {
const uint pix_x = x + tile_desc.m_x_ofs;
tile_pixels[x + y * tile_desc.m_width] = chunk_pixels[pix_x + pix_y * cChunkPixelWidth];
if (debugging)
debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]];
}
}
color_quad_u8 l, h;
dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h);
//const uint dist = color::color_distance(m_params.m_perceptual, l, h, false);
const uint dist = color::elucidian_distance(l, h, false);
const uint cColorDistToWeight = 5000;
const uint cMaxWeight = 8;
uint weight = math::clamp<uint>(dist / cColorDistToWeight, 1, cMaxWeight);
vec6F ev;
ev[0] = l[0];
ev[1] = l[1];
ev[2] = l[2];
ev[3] = h[0];
ev[4] = h[1];
ev[5] = h[2];
for (uint y = 0; y < (tile_desc.m_height >> 2); y++) {
uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2);
if (block_y >= level_desc.m_block_height)
continue;
for (uint x = 0; x < (tile_desc.m_width >> 2); x++) {
uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2);
if (block_x >= level_desc.m_block_width)
break;
uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width;
training_vecs[block_index].first = ev;
training_vecs[block_index].second = weight;
total_processed_blocks++;
//if (debugging)
//{
// debug_img(block_x, block_y) = l;
// debug_img(block_x + level_desc.m_block_width, block_y) = h;
//}
} // x
} // y
} //t
if (total_processed_blocks >= next_progress_threshold) {
next_progress_threshold += 512;
if (!update_progress(total_processed_blocks, m_num_blocks - 1))
return false;
}
} // chunk_x
} // chunk_y
#if GENERATE_DEBUG_IMAGES
if (debugging)
image_utils::write_to_file(dynamic_string(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha);
#endif
} // level
#if 0
trace("chunk encoding hist: ");
for (uint i = 0; i < cNumChunkEncodings; i++)
trace("%u ", encoding_hist[i]);
trace("\n");
#endif
} else {
for (uint block_index = 0; block_index < m_num_blocks; block_index++) {
if ((block_index & 511) == 0) {
if (!update_progress(block_index, m_num_blocks - 1))
return false;
}
color_quad_u8 l, h;
dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, &m_pBlocks[block_index].m_pixels[0][0], l, h);
//const uint dist = color::color_distance(m_params.m_perceptual, l, h, false);
const uint dist = color::elucidian_distance(l, h, false);
const uint cColorDistToWeight = 5000;
const uint cMaxWeight = 8;
uint weight = math::clamp<uint>(dist / cColorDistToWeight, 1, cMaxWeight);
vec6F ev;
ev[0] = l[0];
ev[1] = l[1];
ev[2] = l[2];
ev[3] = h[0];
ev[4] = h[1];
ev[5] = h[2];
m_endpoint_clusterizer.add_training_vec(ev, weight);
}
}
const uint cMaxEndpointClusters = 65535U;
m_progress_start = 75;
m_progress_range = 20;
if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this))
return false;
crnlib::hash_map<uint, empty_type> selector_hash;
m_progress_start = 95;
m_progress_range = 5;
for (uint block_index = 0; block_index < m_num_blocks; block_index++) {
if ((block_index & 511) == 0) {
if (!update_progress(block_index, m_num_blocks - 1))
return false;
}
dxt1_block dxt_blk;
dxt_fast::compress_color_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0]);
uint selectors = dxt_blk.m_selectors[0] | (dxt_blk.m_selectors[1] << 8) | (dxt_blk.m_selectors[2] << 16) | (dxt_blk.m_selectors[3] << 24);
selector_hash.insert(selectors);
}
m_max_selector_clusters = selector_hash.size() + 128;
// trace("max endpoint clusters: %u\n", m_endpoint_clusterizer.get_codebook_size());
// trace("max selector clusters: %u\n", m_max_selector_clusters);
update_progress(1, 1);
return true;
}
bool qdxt1::update_progress(uint value, uint max_value) {
if (!m_params.m_pProgress_func)
return true;
uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100;
if ((int)percentage == m_prev_percentage_complete)
return true;
m_prev_percentage_complete = percentage;
if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) {
m_canceled = true;
return false;
}
return true;
}
void qdxt1::pack_endpoints_task(uint64 data, void*) {
const uint thread_index = static_cast<uint>(data);
crnlib::vector<color_quad_u8> cluster_pixels;
cluster_pixels.reserve(1024);
crnlib::vector<uint8> selectors;
selectors.reserve(1024);
dxt1_endpoint_optimizer optimizer;
dxt1_endpoint_optimizer::params p;
dxt1_endpoint_optimizer::results r;
p.m_quality = m_params.m_dxt_quality;
p.m_use_alpha_blocks = m_params.m_use_alpha_blocks;
p.m_dxt1a_alpha_threshold = m_params.m_dxt1a_alpha_threshold;
p.m_perceptual = m_params.m_perceptual;
uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100);
cluster_index_progress_mask /= 2;
cluster_index_progress_mask = math::maximum<uint>(cluster_index_progress_mask, 8);
cluster_index_progress_mask -= 1;
cluster_id cid;
const crnlib::vector<uint32>& indices = cid.m_cells;
for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) {
if (m_canceled)
return;
if ((cluster_index & cluster_index_progress_mask) == 0) {
if (crn_get_current_thread_id() == m_main_thread_id) {
if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1))
return;
}
}
if (m_pTask_pool->get_num_threads()) {
if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
const crnlib::vector<uint>& cluster_indices = m_endpoint_cluster_indices[cluster_index];
selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize);
bool found = false;
uint32 found_endpoints = 0;
cid.set(cluster_indices);
{
scoped_spinlock lock(m_cluster_hash_lock);
cluster_hash::const_iterator it(m_cluster_hash.find(cid));
if (it != m_cluster_hash.end()) {
CRNLIB_ASSERT(cid == it->first);
found = true;
found_endpoints = it->second;
}
}
if (found) {
const uint16 low_color = static_cast<uint16>(found_endpoints);
const uint16 high_color = static_cast<uint16>((found_endpoints >> 16U));
color_quad_u8 block_colors[4];
dxt1_block::get_block_colors(block_colors, low_color, high_color);
const bool is_alpha_block = (low_color <= high_color);
for (uint block_iter = 0; block_iter < indices.size(); block_iter++) {
const uint block_index = indices[block_iter];
const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0];
for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) {
dxt1_block& dxt_block = get_block(block_index);
dxt_block.set_low_color(static_cast<uint16>(low_color));
dxt_block.set_high_color(static_cast<uint16>(high_color));
uint mask = 0;
for (int i = 15; i >= 0; i--) {
mask <<= 2;
const color_quad_u8& c = pSrc_pixels[i];
uint dist0 = color::color_distance(m_params.m_perceptual, c, block_colors[0], false);
uint dist1 = color::color_distance(m_params.m_perceptual, c, block_colors[1], false);
uint dist2 = color::color_distance(m_params.m_perceptual, c, block_colors[2], false);
uint selector = 0, best_dist = dist0;
if (dist1 < best_dist) {
selector = 1;
best_dist = dist1;
}
if (dist2 < best_dist) {
selector = 2;
best_dist = dist2;
}
if (!is_alpha_block) {
uint dist3 = color::color_distance(m_params.m_perceptual, c, block_colors[3], false);
if (dist3 < best_dist) {
selector = 3;
}
} else {
if (c.a < m_params.m_dxt1a_alpha_threshold)
selector = 3;
}
mask |= selector;
}
dxt_block.m_selectors[0] = static_cast<uint8>(mask & 0xFF);
dxt_block.m_selectors[1] = static_cast<uint8>((mask >> 8) & 0xFF);
dxt_block.m_selectors[2] = static_cast<uint8>((mask >> 16) & 0xFF);
dxt_block.m_selectors[3] = static_cast<uint8>((mask >> 24) & 0xFF);
}
}
} else {
cluster_pixels.resize(indices.size() * cDXTBlockSize * cDXTBlockSize);
color_quad_u8* pDst = &cluster_pixels[0];
bool has_alpha_pixels = false;
for (uint block_iter = 0; block_iter < indices.size(); block_iter++) {
const uint block_index = indices[block_iter];
//const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0];
const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels;
for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) {
const color_quad_u8& src = pSrc_pixels[i];
if (src.a < m_params.m_dxt1a_alpha_threshold)
has_alpha_pixels = true;
*pDst++ = src;
}
}
p.m_block_index = cluster_index;
p.m_num_pixels = cluster_pixels.size();
p.m_pPixels = cluster_pixels.begin();
r.m_pSelectors = selectors.begin();
uint low_color, high_color;
if ((m_params.m_dxt_quality != cCRNDXTQualitySuperFast) || (has_alpha_pixels)) {
p.m_pixels_have_alpha = has_alpha_pixels;
optimizer.compute(p, r);
low_color = r.m_low_color;
high_color = r.m_high_color;
} else {
dxt_fast::compress_color_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), true);
}
const uint8* pSrc_selectors = selectors.begin();
for (uint block_iter = 0; block_iter < indices.size(); block_iter++) {
const uint block_index = indices[block_iter];
dxt1_block& dxt_block = get_block(block_index);
dxt_block.set_low_color(static_cast<uint16>(low_color));
dxt_block.set_high_color(static_cast<uint16>(high_color));
uint mask = 0;
for (int i = 15; i >= 0; i--) {
mask <<= 2;
mask |= pSrc_selectors[i];
}
pSrc_selectors += (cDXTBlockSize * cDXTBlockSize);
dxt_block.m_selectors[0] = static_cast<uint8>(mask & 0xFF);
dxt_block.m_selectors[1] = static_cast<uint8>((mask >> 8) & 0xFF);
dxt_block.m_selectors[2] = static_cast<uint8>((mask >> 16) & 0xFF);
dxt_block.m_selectors[3] = static_cast<uint8>((mask >> 24) & 0xFF);
}
{
scoped_spinlock lock(m_cluster_hash_lock);
m_cluster_hash.insert(cid, low_color | (high_color << 16));
}
}
}
}
struct optimize_selectors_params {
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params);
optimize_selectors_params(
crnlib::vector<crnlib::vector<uint> >& selector_cluster_indices)
: m_selector_cluster_indices(selector_cluster_indices) {
}
crnlib::vector<crnlib::vector<uint> >& m_selector_cluster_indices;
};
void qdxt1::optimize_selectors_task(uint64 data, void* pData_ptr) {
const uint thread_index = static_cast<uint>(data);
optimize_selectors_params& task_params = *static_cast<optimize_selectors_params*>(pData_ptr);
crnlib::vector<uint> block_categories[2];
block_categories[0].reserve(2048);
block_categories[1].reserve(2048);
for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) {
if (m_canceled)
return;
if ((cluster_index & 255) == 0) {
if (crn_get_current_thread_id() == m_main_thread_id) {
if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1))
return;
}
}
if (m_pTask_pool->get_num_threads()) {
if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
const crnlib::vector<uint>& selector_indices = task_params.m_selector_cluster_indices[cluster_index];
if (selector_indices.size() <= 1)
continue;
block_categories[0].resize(0);
block_categories[1].resize(0);
for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) {
const uint block_index = selector_indices[block_iter];
const dxt1_block& src_block = get_block(block_index);
if (!src_block.is_alpha_block())
block_categories[0].push_back(block_index);
else {
bool has_alpha_pixels = false;
if (m_params.m_dxt1a_alpha_threshold > 0) {
const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels;
for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) {
const color_quad_u8& src = pSrc_pixels[i];
if (src.a < m_params.m_dxt1a_alpha_threshold) {
has_alpha_pixels = true;
break;
}
}
}
if (has_alpha_pixels)
continue;
block_categories[1].push_back(block_index);
}
}
dxt1_block blk;
utils::zero_object(blk);
for (uint block_type = 0; block_type <= 1; block_type++) {
const crnlib::vector<uint>& block_indices = block_categories[block_type];
if (block_indices.size() <= 1)
continue;
for (uint y = 0; y < 4; y++) {
for (uint x = 0; x < 4; x++) {
uint best_s = 0;
uint64 best_error = 0xFFFFFFFFFFULL;
uint max_s = 4;
if (block_type == 1)
max_s = 3;
for (uint s = 0; s < max_s; s++) {
uint64 total_error = 0;
for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) {
const uint block_index = block_indices[block_iter];
const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x];
const dxt1_block& dst_block = get_block(block_index);
color_quad_u8 colors[4];
dxt1_block::get_block_colors(colors, static_cast<uint16>(dst_block.get_low_color()), static_cast<uint16>(dst_block.get_high_color()));
uint error = color::color_distance(m_params.m_perceptual, orig_color, colors[s], false);
total_error += error;
}
if (total_error < best_error) {
best_error = total_error;
best_s = s;
}
}
blk.set_selector(x, y, best_s);
} // x
} // y
for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) {
const uint block_index = block_indices[block_iter];
dxt1_block& dst_block = get_block(block_index);
memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors));
}
}
} // cluster_index
}
bool qdxt1::generate_codebook_progress_callback(uint percentage_completed, void* pData) {
return static_cast<qdxt1*>(pData)->update_progress(percentage_completed, 100U);
}
bool qdxt1::create_selector_clusters(uint max_selector_clusters, crnlib::vector<crnlib::vector<uint> >& selector_cluster_indices) {
m_progress_start = m_progress_range;
m_progress_range = 33;
weighted_selector_vec_array selector_vecs(m_num_blocks);
for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) {
dxt1_block& dxt1_block = get_block(block_iter);
vec16F sv;
float* pDst = &sv[0];
for (uint y = 0; y < 4; y++)
for (uint x = 0; x < 4; x++)
*pDst++ = g_dxt1_to_linear[dxt1_block.get_selector(x, y)];
const color_quad_u8 first_color(dxt1_block::unpack_color((uint16)dxt1_block.get_low_color(), true));
const color_quad_u8 second_color(dxt1_block::unpack_color((uint16)dxt1_block.get_high_color(), true));
const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false);
const uint cColorDistToWeight = 2000;
const uint cMaxWeight = 2048;
uint weight = math::clamp<uint>(dist / cColorDistToWeight, 1, cMaxWeight);
selector_vecs[block_iter].m_vec = sv;
selector_vecs[block_iter].m_weight = weight;
}
return m_selector_clusterizer.create_clusters(
selector_vecs, max_selector_clusters, selector_cluster_indices, generate_codebook_progress_callback, this);
}
bool qdxt1::pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul) {
CRNLIB_ASSERT(m_num_blocks);
m_main_thread_id = crn_get_current_thread_id();
m_canceled = false;
m_pDst_elements = pDst_elements;
m_elements_per_block = elements_per_block;
m_params = params;
if (!m_params.m_use_alpha_blocks)
m_params.m_dxt1a_alpha_threshold = 0;
m_prev_percentage_complete = -1;
CRNLIB_ASSERT(m_params.m_quality_level <= qdxt1_params::cMaxQuality);
const float quality = m_params.m_quality_level / (float)qdxt1_params::cMaxQuality;
const float endpoint_quality = powf(quality, 1.8f * quality_power_mul);
const float selector_quality = powf(quality, 1.65f * quality_power_mul);
//const uint max_endpoint_clusters = math::clamp<uint>(static_cast<uint>(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 128U, m_endpoint_clusterizer.get_codebook_size());
//const uint max_selector_clusters = math::clamp<uint>(static_cast<uint>(m_max_selector_clusters * selector_quality), 150U, m_max_selector_clusters);
const uint max_endpoint_clusters = math::clamp<uint>(static_cast<uint>(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 96U, m_endpoint_clusterizer.get_codebook_size());
const uint max_selector_clusters = math::clamp<uint>(static_cast<uint>(m_max_selector_clusters * selector_quality), 128U, m_max_selector_clusters);
if (quality >= 1.0f) {
m_endpoint_cluster_indices.resize(m_num_blocks);
for (uint i = 0; i < m_num_blocks; i++) {
m_endpoint_cluster_indices[i].resize(1);
m_endpoint_cluster_indices[i][0] = i;
}
} else
m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices);
// trace("endpoint clusters: %u\n", m_endpoint_cluster_indices.size());
uint total_blocks = 0;
uint max_blocks = 0;
for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) {
uint num = m_endpoint_cluster_indices[i].size();
total_blocks += num;
max_blocks = math::maximum(max_blocks, num);
}
#if 0
trace("Num clusters: %u, Average blocks per cluster: %u, Max blocks per cluster: %u\n",
m_endpoint_cluster_indices.size(),
total_blocks / m_endpoint_cluster_indices.size(),
max_blocks);
#endif
crnlib::vector<crnlib::vector<uint> >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level];
m_progress_start = 0;
if (quality >= 1.0f)
m_progress_range = 100;
else if (selector_cluster_indices.empty())
m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33;
else
m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50;
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &qdxt1::pack_endpoints_task, i);
m_pTask_pool->join();
if (m_canceled)
return false;
if (quality >= 1.0f)
return true;
if (selector_cluster_indices.empty()) {
create_selector_clusters(max_selector_clusters, selector_cluster_indices);
if (m_canceled) {
selector_cluster_indices.clear();
return false;
}
}
m_progress_start += m_progress_range;
m_progress_range = 100 - m_progress_start;
optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices);
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &qdxt1::optimize_selectors_task, i, &optimize_selectors_task_params);
m_pTask_pool->join();
return !m_canceled;
}
} // namespace crnlib