diff --git a/bin/crunch_x64.exe b/bin/crunch_x64.exe index e068703..4c7b128 100644 Binary files a/bin/crunch_x64.exe and b/bin/crunch_x64.exe differ diff --git a/crnlib/crn_comp.cpp b/crnlib/crn_comp.cpp index a8c29d4..29a042c 100644 --- a/crnlib/crn_comp.cpp +++ b/crnlib/crn_comp.cpp @@ -10,7 +10,6 @@ #define CRNLIB_ENABLE_DEBUG_MESSAGES 0 namespace crnlib { -static const uint cEncodingMapNumChunksPerCode = 3; crn_comp::crn_comp() : m_pParams(NULL) { @@ -20,10 +19,10 @@ crn_comp::~crn_comp() { } float crn_comp::color_endpoint_similarity_func(uint index_a, uint index_b, void* pContext) { - dxt_hc& hvq = *static_cast(pContext); + crnlib::vector& color_endpoints = *static_cast*>(pContext); - uint endpoint_a = hvq.get_color_endpoint(index_a); - uint endpoint_b = hvq.get_color_endpoint(index_b); + uint endpoint_a = color_endpoints[index_a]; + uint endpoint_b = color_endpoints[index_b]; color_quad_u8 a[2]; a[0] = dxt1_block::unpack_color((uint16)(endpoint_a & 0xFFFF), true); @@ -40,13 +39,13 @@ float crn_comp::color_endpoint_similarity_func(uint index_a, uint index_b, void* } float crn_comp::alpha_endpoint_similarity_func(uint index_a, uint index_b, void* pContext) { - dxt_hc& hvq = *static_cast(pContext); + crnlib::vector& alpha_endpoints = *static_cast*>(pContext); - uint endpoint_a = hvq.get_alpha_endpoint(index_a); + uint endpoint_a = alpha_endpoints[index_a]; int endpoint_a_lo = dxt5_block::unpack_endpoint(endpoint_a, 0); int endpoint_a_hi = dxt5_block::unpack_endpoint(endpoint_a, 1); - uint endpoint_b = hvq.get_alpha_endpoint(index_b); + uint endpoint_b = alpha_endpoints[index_b]; int endpoint_b_lo = dxt5_block::unpack_endpoint(endpoint_b, 0); int endpoint_b_hi = dxt5_block::unpack_endpoint(endpoint_b, 1); @@ -180,10 +179,10 @@ bool crn_comp::pack_color_endpoints( console::debug("pack_color_endpoints: %u", trial_index); #endif - crnlib::vector remapped_endpoints(m_hvq.get_color_endpoint_codebook_size()); + crnlib::vector remapped_endpoints(m_color_endpoints.size()); - for (uint i = 0; i < m_hvq.get_color_endpoint_codebook_size(); i++) - remapped_endpoints[remapping[i]] = m_hvq.get_color_endpoint(i); + for (uint i = 0; i < m_color_endpoints.size(); i++) + remapped_endpoints[remapping[i]] = m_color_endpoints[i]; const uint component_limits[6] = {31, 63, 31, 31, 63, 31}; @@ -197,7 +196,7 @@ bool crn_comp::pack_color_endpoints( #endif crnlib::vector residual_syms; - residual_syms.reserve(m_hvq.get_color_endpoint_codebook_size() * 2 * 3); + residual_syms.reserve(m_color_endpoints.size() * 2 * 3); color_quad_u8 prev[2]; prev[0].clear(); @@ -205,7 +204,7 @@ bool crn_comp::pack_color_endpoints( int total_residuals = 0; - for (uint endpoint_index = 0; endpoint_index < m_hvq.get_color_endpoint_codebook_size(); endpoint_index++) { + for (uint endpoint_index = 0; endpoint_index < m_color_endpoints.size(); endpoint_index++) { const uint endpoint = remapped_endpoints[endpoint_index]; color_quad_u8 cur[2]; @@ -309,10 +308,10 @@ bool crn_comp::pack_alpha_endpoints( console::debug("pack_alpha_endpoints: %u", trial_index); #endif - crnlib::vector remapped_endpoints(m_hvq.get_alpha_endpoint_codebook_size()); + crnlib::vector remapped_endpoints(m_alpha_endpoints.size()); - for (uint i = 0; i < m_hvq.get_alpha_endpoint_codebook_size(); i++) - remapped_endpoints[remapping[i]] = m_hvq.get_alpha_endpoint(i); + for (uint i = 0; i < m_alpha_endpoints.size(); i++) + remapped_endpoints[remapping[i]] = m_alpha_endpoints[i]; symbol_histogram hist; hist.resize(256); @@ -323,14 +322,14 @@ bool crn_comp::pack_alpha_endpoints( #endif crnlib::vector residual_syms; - residual_syms.reserve(m_hvq.get_alpha_endpoint_codebook_size() * 2 * 3); + residual_syms.reserve(m_alpha_endpoints.size() * 2 * 3); uint prev[2]; utils::zero_object(prev); int total_residuals = 0; - for (uint endpoint_index = 0; endpoint_index < m_hvq.get_alpha_endpoint_codebook_size(); endpoint_index++) { + for (uint endpoint_index = 0; endpoint_index < m_alpha_endpoints.size(); endpoint_index++) { const uint endpoint = remapped_endpoints[endpoint_index]; uint cur[2]; @@ -416,264 +415,206 @@ bool crn_comp::pack_alpha_endpoints( return true; } -float crn_comp::color_selector_similarity_func(uint index_a, uint index_b, void* pContext) { - const crnlib::vector& selectors = *static_cast*>(pContext); - - const dxt_hc::selectors& selectors_a = selectors[index_a]; - const dxt_hc::selectors& selectors_b = selectors[index_b]; - - int total = 0; - for (uint i = 0; i < 16; i++) { - int a = g_dxt1_to_linear[selectors_a.get_by_index(i)]; - int b = g_dxt1_to_linear[selectors_b.get_by_index(i)]; - - int delta = a - b; - total += delta * delta; - } - - float weight = 1.0f - math::clamp(total * 1.0f / 20.0f, 0.0f, 1.0f); - return weight; -} - -float crn_comp::alpha_selector_similarity_func(uint index_a, uint index_b, void* pContext) { - const crnlib::vector& selectors = *static_cast*>(pContext); - - const dxt_hc::selectors& selectors_a = selectors[index_a]; - const dxt_hc::selectors& selectors_b = selectors[index_b]; - - int total = 0; - for (uint i = 0; i < 16; i++) { - int a = g_dxt5_to_linear[selectors_a.get_by_index(i)]; - int b = g_dxt5_to_linear[selectors_b.get_by_index(i)]; - - int delta = a - b; - total += delta * delta; - } - - float weight = 1.0f - math::clamp(total * 1.0f / 100.0f, 0.0f, 1.0f); - return weight; -} - -void crn_comp::sort_selector_codebook(crnlib::vector& remapping, const crnlib::vector& selectors, const uint8* pTo_linear) { - remapping.resize(selectors.size()); - +void crn_comp::sort_color_selectors(crnlib::vector& remapping) { + remapping.resize(m_color_selectors.size()); uint lowest_energy = UINT_MAX; uint lowest_energy_index = 0; - - for (uint i = 0; i < selectors.size(); i++) { + for (uint i = 0; i < m_color_selectors.size(); i++) { uint total = 0; - for (uint j = 0; j < 16; j++) { - int a = pTo_linear[selectors[i].get_by_index(j)]; - + for (uint32 selector = m_color_selectors[i], j = 0; j < 16; j++, selector >>= 2) { + int a = selector & 3; total += a * a; } - if (total < lowest_energy) { lowest_energy = total; lowest_energy_index = i; } } - uint cur_index = lowest_energy_index; - - crnlib::vector chosen_flags(selectors.size()); - + crnlib::vector chosen_flags(m_color_selectors.size()); uint n = 0; for (;;) { chosen_flags[cur_index] = true; - remapping[cur_index] = n; n++; - if (n == selectors.size()) + if (n == m_color_selectors.size()) break; - uint lowest_error = UINT_MAX; uint lowest_error_index = 0; - - for (uint i = 0; i < selectors.size(); i++) { + for (uint i = 0; i < m_color_selectors.size(); i++) { if (chosen_flags[i]) continue; - uint total = 0; - for (uint j = 0; j < 16; j++) { - int a = pTo_linear[selectors[cur_index].get_by_index(j)]; - int b = pTo_linear[selectors[i].get_by_index(j)]; - - int delta = a - b; + for (uint32 cur_selector = m_color_selectors[cur_index], selector = m_color_selectors[i], j = 0; j < 16; j++, cur_selector >>= 2, selector >>= 2) { + int delta = (cur_selector & 3) - (selector & 3); total += delta * delta; } - if (total < lowest_error) { lowest_error = total; lowest_error_index = i; } } - cur_index = lowest_error_index; } } -// The indices are only used for statistical purposes. -bool crn_comp::pack_selectors( - crnlib::vector& packed_data, - const crnlib::vector& selectors, - const crnlib::vector& remapping, - uint max_selector_value, - const uint8* pTo_linear, - uint trial_index) { - trial_index; - -#if CRNLIB_ENABLE_DEBUG_MESSAGES - if (m_pParams->m_flags & cCRNCompFlagDebugging) - console::debug("pack_selectors: %u", trial_index); -#endif - - crnlib::vector remapped_selectors(selectors.size()); - - for (uint i = 0; i < selectors.size(); i++) - remapped_selectors[remapping[i]] = selectors[i]; - -#if CRNLIB_CREATE_DEBUG_IMAGES - image_u8 residual_image(16, selectors.size()); - ; - image_u8 selector_image(16, selectors.size()); - ; -#endif +void crn_comp::sort_alpha_selectors(crnlib::vector& remapping) { + remapping.resize(m_alpha_selectors.size()); + uint lowest_energy = UINT_MAX; + uint lowest_energy_index = 0; + for (uint i = 0; i < m_alpha_selectors.size(); i++) { + uint total = 0; + for (uint64 selector = m_alpha_selectors[i], j = 0; j < 16; j++, selector >>= 3) { + int a = selector & 7; + total += a * a; + } + if (total < lowest_energy) { + lowest_energy = total; + lowest_energy_index = i; + } + } + uint cur_index = lowest_energy_index; + crnlib::vector chosen_flags(m_alpha_selectors.size()); + uint n = 0; + for (;;) { + chosen_flags[cur_index] = true; + remapping[cur_index] = n; + n++; + if (n == m_alpha_selectors.size()) + break; + uint lowest_error = UINT_MAX; + uint lowest_error_index = 0; + for (uint i = 0; i < m_alpha_selectors.size(); i++) { + if (chosen_flags[i]) + continue; + uint total = 0; + for (uint64 cur_selector = m_alpha_selectors[cur_index], selector = m_alpha_selectors[i], j = 0; j < 16; j++, cur_selector >>= 3, selector >>= 3) { + int delta = (cur_selector & 7) - (selector & 7); + total += delta * delta; + } + if (total < lowest_error) { + lowest_error = total; + lowest_error_index = i; + } + } + cur_index = lowest_error_index; + } +} +bool crn_comp::pack_color_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping) { + crnlib::vector remapped_selectors(m_color_selectors.size()); + for (uint i = 0; i < m_color_selectors.size(); i++) + remapped_selectors[remapping[i]] = m_color_selectors[i]; crnlib::vector residual_syms; - residual_syms.reserve(selectors.size() * 8); - - const uint num_baised_selector_values = (max_selector_value * 2 + 1); - symbol_histogram hist(num_baised_selector_values * num_baised_selector_values); - - dxt_hc::selectors prev_selectors; - utils::zero_object(prev_selectors); - int total_residuals = 0; - for (uint selector_index = 0; selector_index < selectors.size(); selector_index++) { - const dxt_hc::selectors& s = remapped_selectors[selector_index]; - + residual_syms.reserve(m_color_selectors.size() * 8); + symbol_histogram hist(49); + uint32 prev_selector = 0; + for (uint selector_index = 0; selector_index < m_color_selectors.size(); selector_index++) { + uint32 cur_selector = remapped_selectors[selector_index]; uint prev_sym = 0; - for (uint i = 0; i < 16; i++) { - int p = pTo_linear[crnlib_assert_range_incl(prev_selectors.get_by_index(i), max_selector_value)]; - - int r = pTo_linear[crnlib_assert_range_incl(s.get_by_index(i), max_selector_value)] - p; - - total_residuals += r * r; - - uint sym = r + max_selector_value; - - CRNLIB_ASSERT(sym < num_baised_selector_values); + for (uint32 selector = cur_selector, i = 0; i < 16; i++, selector >>= 2, prev_selector >>= 2) { + int sym = 3 + (selector & 3) - (prev_selector & 3); if (i & 1) { - uint paired_sym = (sym * num_baised_selector_values) + prev_sym; + uint paired_sym = 7 * sym + prev_sym; residual_syms.push_back(paired_sym); hist.inc_freq(paired_sym); } else prev_sym = sym; - -#if CRNLIB_CREATE_DEBUG_IMAGES - selector_image(i, selector_index) = (pTo_linear[crnlib_assert_range_incl(s.get_by_index(i), max_selector_value)] * 255) / max_selector_value; - residual_image(i, selector_index) = sym; -#endif } - - prev_selectors = s; + prev_selector = cur_selector; } - -#if CRNLIB_ENABLE_DEBUG_MESSAGES - if (m_pParams->m_flags & cCRNCompFlagDebugging) - console::debug("Total selector endpoint residuals: %u", total_residuals); -#endif - -#if CRNLIB_CREATE_DEBUG_IMAGES - image_utils::write_to_file(dynamic_string(cVarArg, "selectors_%u_%u.tga", trial_index, max_selector_value).get_ptr(), selector_image); - image_utils::write_to_file(dynamic_string(cVarArg, "selector_residuals_%u_%u.tga", trial_index, max_selector_value).get_ptr(), residual_image); -#endif - static_huffman_data_model residual_dm; - symbol_codec codec; codec.start_encoding(1024 * 1024); - - // Transmit residuals if (!residual_dm.init(true, hist, 15)) return false; - if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false)) return false; - -#if CRNLIB_ENABLE_DEBUG_MESSAGES - if (m_pParams->m_flags & cCRNCompFlagDebugging) - console::debug("Wrote %u bits for selector residual Huffman tables", codec.encode_get_total_bits_written()); -#endif - uint start_bits = codec.encode_get_total_bits_written(); start_bits; - for (uint i = 0; i < residual_syms.size(); i++) { const uint sym = residual_syms[i]; codec.encode(sym, residual_dm); } - -#if CRNLIB_ENABLE_DEBUG_MESSAGES - if (m_pParams->m_flags & cCRNCompFlagDebugging) - console::debug("Wrote %u bits for selector residuals", codec.encode_get_total_bits_written() - start_bits); -#endif - codec.stop_encoding(false); - packed_data.swap(codec.get_encoding_buf()); - -#if CRNLIB_ENABLE_DEBUG_MESSAGES - if (m_pParams->m_flags & cCRNCompFlagDebugging) { - console::debug("Wrote a total of %u bits for selector codebook", codec.encode_get_total_bits_written()); - - console::debug("Wrote %f bits per each selector codebook entry", packed_data.size() * 8.0f / selectors.size()); - } -#endif - return true; } -bool crn_comp::pack_chunks( +bool crn_comp::pack_alpha_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping) { + crnlib::vector remapped_selectors(m_alpha_selectors.size()); + for (uint i = 0; i < m_alpha_selectors.size(); i++) + remapped_selectors[remapping[i]] = m_alpha_selectors[i]; + crnlib::vector residual_syms; + residual_syms.reserve(m_alpha_selectors.size() * 8); + symbol_histogram hist(225); + uint64 prev_selector = 0; + for (uint selector_index = 0; selector_index < m_alpha_selectors.size(); selector_index++) { + uint64 cur_selector = remapped_selectors[selector_index]; + uint prev_sym = 0; + for (uint64 selector = cur_selector, i = 0; i < 16; i++, selector >>= 3, prev_selector >>= 3) { + int sym = 7 + (selector & 7) - (prev_selector & 7); + if (i & 1) { + uint paired_sym = 15 * sym + prev_sym; + residual_syms.push_back(paired_sym); + hist.inc_freq(paired_sym); + } else + prev_sym = sym; + } + prev_selector = cur_selector; + } + + static_huffman_data_model residual_dm; + symbol_codec codec; + codec.start_encoding(1024 * 1024); + if (!residual_dm.init(true, hist, 15)) + return false; + if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false)) + return false; + uint start_bits = codec.encode_get_total_bits_written(); + start_bits; + for (uint i = 0; i < residual_syms.size(); i++) { + const uint sym = residual_syms[i]; + codec.encode(sym, residual_dm); + } + codec.stop_encoding(false); + packed_data.swap(codec.get_encoding_buf()); + return true; +} + +bool crn_comp::pack_blocks( uint group, bool clear_histograms, symbol_codec* pCodec, const crnlib::vector* pColor_endpoint_remap, const crnlib::vector* pColor_selector_remap, const crnlib::vector* pAlpha_endpoint_remap, - const crnlib::vector* pAlpha_selector_remap) { - uint first_chunk = m_mip_groups[group].m_first_chunk; - uint num_chunks = m_mip_groups[group].m_num_chunks; - uint chunk_width = m_mip_groups[group].m_chunk_width; - + const crnlib::vector* pAlpha_selector_remap + ) { if (!pCodec) { - m_chunk_encoding_hist.resize(256); + m_reference_hist.resize(256); if (clear_histograms) - m_chunk_encoding_hist.set_all(0); + m_reference_hist.set_all(0); if (pColor_endpoint_remap) { - CRNLIB_ASSERT(pColor_endpoint_remap->size() == m_hvq.get_color_endpoint_codebook_size()); m_endpoint_index_hist[0].resize(pColor_endpoint_remap->size()); if (clear_histograms) m_endpoint_index_hist[0].set_all(0); } if (pColor_selector_remap) { - CRNLIB_ASSERT(pColor_selector_remap->size() == m_hvq.get_color_selector_codebook_size()); m_selector_index_hist[0].resize(pColor_selector_remap->size()); if (clear_histograms) m_selector_index_hist[0].set_all(0); } if (pAlpha_endpoint_remap) { - CRNLIB_ASSERT(pAlpha_endpoint_remap->size() == m_hvq.get_alpha_endpoint_codebook_size()); m_endpoint_index_hist[1].resize(pAlpha_endpoint_remap->size()); if (clear_histograms) m_endpoint_index_hist[1].set_all(0); } if (pAlpha_selector_remap) { - CRNLIB_ASSERT(pAlpha_selector_remap->size() == m_hvq.get_alpha_selector_codebook_size()); m_selector_index_hist[1].resize(pAlpha_selector_remap->size()); if (clear_histograms) m_selector_index_hist[1].set_all(0); @@ -690,15 +631,16 @@ bool crn_comp::pack_chunks( } } - for (uint by = 0, block_width = chunk_width << 1, b = first_chunk << 2, bEnd = b + (num_chunks << 2); b < bEnd; by++) { + uint block_width = m_levels[group].block_width; + for (uint by = 0, b = m_levels[group].first_block, bEnd = b + m_levels[group].num_blocks; b < bEnd; by++) { for (uint bx = 0; bx < block_width; bx++, b++) { if (!(by & 1) && !(bx & 1)) { uint8 reference_group = m_endpoint_indices[b].reference | m_endpoint_indices[b + block_width].reference << 2 | m_endpoint_indices[b + 1].reference << 4 | m_endpoint_indices[b + block_width + 1].reference << 6; if (pCodec) - pCodec->encode(reference_group, m_reference_encoding_dm); + pCodec->encode(reference_group, m_reference_dm); else - m_chunk_encoding_hist.inc_freq(reference_group); + m_reference_hist.inc_freq(reference_group); } for (uint c = 0; c < cNumComps; c++) { if (endpoint_remap[c]) { @@ -747,42 +689,13 @@ void crn_comp::append_vec(crnlib::vector& a, const crnlib::vector& } } -#if 0 - bool crn_comp::init_chunk_encoding_dm() - { - symbol_histogram hist(1 << (3 * cEncodingMapNumChunksPerCode)); - - for (uint chunk_index = 0; chunk_index < m_hvq.get_num_chunks(); chunk_index += cEncodingMapNumChunksPerCode) - { - uint index = 0; - for (uint i = 0; i < cEncodingMapNumChunksPerCode; i++) - { - if ((chunk_index + i) >= m_hvq.get_num_chunks()) - break; - const dxt_hc::chunk_encoding& encoding = m_hvq.get_chunk_encoding(chunk_index + i); - - index |= (encoding.m_encoding_index << (i * 3)); - } - - hist.inc_freq(index); - } - - if (!m_chunk_encoding_dm.init(true, hist, 16)) - return false; - - return true; - } -#endif - bool crn_comp::alias_images() { for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { const uint width = math::maximum(1U, m_pParams->m_width >> level_index); const uint height = math::maximum(1U, m_pParams->m_height >> level_index); - if (!m_pParams->m_pImages[face_index][level_index]) return false; - m_images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); } } @@ -792,52 +705,22 @@ bool crn_comp::alias_images() { for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { image_u8 cooked_image(m_images[face_index][level_index]); - image_utils::convert_image(cooked_image, conv_type); - m_images[face_index][level_index].swap(cooked_image); } } } - m_mip_groups.clear(); - m_mip_groups.resize(m_pParams->m_levels); - - utils::zero_object(m_levels); - - uint mip_group = 0; - uint chunk_index = 0; - uint mip_group_chunk_index = 0; - (void)mip_group_chunk_index; - for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { - const uint width = math::maximum(1U, m_pParams->m_width >> level_index); - const uint height = math::maximum(1U, m_pParams->m_height >> level_index); - const uint chunk_width = math::align_up_value(width, cChunkPixelWidth) / cChunkPixelWidth; - const uint chunk_height = math::align_up_value(height, cChunkPixelHeight) / cChunkPixelHeight; - const uint num_chunks = m_pParams->m_faces * chunk_width * chunk_height; - - m_mip_groups[mip_group].m_first_chunk = chunk_index; - mip_group_chunk_index = 0; - - m_mip_groups[mip_group].m_num_chunks += num_chunks; - m_mip_groups[mip_group].m_chunk_width = chunk_width; - - m_levels[level_index].m_width = width; - m_levels[level_index].m_height = height; - m_levels[level_index].m_chunk_width = chunk_width; - m_levels[level_index].m_chunk_height = chunk_height; - m_levels[level_index].m_first_chunk = chunk_index; - m_levels[level_index].m_num_chunks = num_chunks; - m_levels[level_index].m_group_index = mip_group; - m_levels[level_index].m_group_first_chunk = 0; - - chunk_index += num_chunks; - - mip_group++; + m_levels.resize(m_pParams->m_levels); + m_total_blocks = 0; + for (uint level = 0; level < m_pParams->m_levels; level++) { + uint blockHeight = (math::maximum(1U, m_pParams->m_height >> level) + 7 & ~7) >> 2; + m_levels[level].block_width = (math::maximum(1U, m_pParams->m_width >> level) + 7 & ~7) >> 2; + m_levels[level].first_block = m_total_blocks; + m_levels[level].num_blocks = m_pParams->m_faces * m_levels[level].block_width * blockHeight; + m_total_blocks += m_levels[level].num_blocks; } - m_total_chunks = chunk_index; - return true; } @@ -848,25 +731,26 @@ void crn_comp::clear() { for (uint l = 0; l < cCRNMaxLevels; l++) m_images[f][l].clear(); - utils::zero_object(m_levels); - - m_mip_groups.clear(); - utils::zero_object(m_has_comp); + m_levels.clear(); + + m_total_blocks = 0; + m_color_endpoints.clear(); + m_alpha_endpoints.clear(); + m_color_selectors.clear(); + m_alpha_selectors.clear(); m_endpoint_indices.clear(); m_selector_indices.clear(); - m_total_chunks = 0; - utils::zero_object(m_crn_header); m_comp_data.clear(); m_hvq.clear(); - m_chunk_encoding_hist.clear(); - m_reference_encoding_dm.clear(); + m_reference_hist.clear(); + m_reference_dm.clear(); for (uint i = 0; i < 2; i++) { m_endpoint_index_hist[i].clear(); m_endpoint_index_dm[i].clear(); @@ -875,7 +759,7 @@ void crn_comp::clear() { } for (uint i = 0; i < cCRNMaxLevels; i++) - m_packed_chunks[i].clear(); + m_packed_blocks[i].clear(); m_packed_data_models.clear(); @@ -999,7 +883,7 @@ bool crn_comp::quantize_images() { break; } case cCRNFmtETC1: { - console::warning("crn_comp::quantize_chunks: This class does not support ETC1"); + console::warning("crn_comp::quantize_images: This class does not support ETC1"); return false; } default: { @@ -1007,21 +891,18 @@ bool crn_comp::quantize_images() { } } params.m_debugging = (m_pParams->m_flags & cCRNCompFlagDebugging) != 0; + params.m_pTask_pool = &m_task_pool; params.m_num_levels = m_pParams->m_levels; for (uint i = 0; i < m_pParams->m_levels; i++) { - params.m_levels[i].m_first_block = m_levels[i].m_first_chunk << 2; - params.m_levels[i].m_num_blocks = m_levels[i].m_num_chunks << 2; - params.m_levels[i].m_block_width = m_levels[i].m_chunk_width << 1; + params.m_levels[i].m_first_block = m_levels[i].first_block; + params.m_levels[i].m_num_blocks = m_levels[i].num_blocks; + params.m_levels[i].m_block_width = m_levels[i].block_width; params.m_levels[i].m_weight = math::minimum(12.0f, powf(1.3f, (float)i)); } params.m_num_faces = m_pParams->m_faces; - - params.m_endpoint_indices = &m_endpoint_indices; - params.m_selector_indices = &m_selector_indices; - - params.m_num_blocks = m_total_chunks << 2; - params.m_blocks = (color_quad_u8(*)[16])crnlib_malloc(params.m_num_blocks * 16 * sizeof(color_quad_u8)); + params.m_num_blocks = m_total_blocks; + color_quad_u8 (*blocks)[16] = (color_quad_u8(*)[16])crnlib_malloc(params.m_num_blocks * 16 * sizeof(color_quad_u8)); for (uint b = 0, level = 0; level < m_pParams->m_levels; level++) { for (uint face = 0; face < m_pParams->m_faces; face++) { image_u8& image = m_images[face][level]; @@ -1033,14 +914,14 @@ bool crn_comp::quantize_images() { for (uint y0 = by << 2, bx = 0; bx < blockWidth; bx++, b++) { for (uint t = 0, x0 = bx << 2, dy = 0; dy < 4; dy++) { for (uint y = math::minimum(y0 + dy, height - 1), dx = 0; dx < 4; dx++, t++) - params.m_blocks[b][t] = image(math::minimum(x0 + dx, width - 1), y); + blocks[b][t] = image(math::minimum(x0 + dx, width - 1), y); } } } } } - bool result = m_hvq.compress(params, m_task_pool); - crnlib_free(params.m_blocks); + bool result = m_hvq.compress(blocks, m_endpoint_indices, m_selector_indices, m_color_endpoints, m_alpha_endpoints, m_color_selectors, m_alpha_selectors, params); + crnlib_free(blocks); return result; } @@ -1062,14 +943,14 @@ void crn_comp::optimize_color_endpoint_codebook_task(uint64 data, void* pData_pt crnlib::vector& remapping = pParams->m_trial->remapping; if (pParams->m_iter_index == pParams->m_max_iter_index) { - sort_color_endpoint_codebook(remapping, m_hvq.get_color_endpoint_vec()); + sort_color_endpoint_codebook(remapping, m_color_endpoints); } else { create_zeng_reorder_table( - m_hvq.get_color_endpoint_codebook_size(), + m_color_endpoints.size(), *pParams->m_xhist, remapping, pParams->m_iter_index ? color_endpoint_similarity_func : NULL, - &m_hvq, + &m_color_endpoints, pParams->m_iter_index / static_cast(pParams->m_max_iter_index - 1)); } @@ -1078,8 +959,8 @@ void crn_comp::optimize_color_endpoint_codebook_task(uint64 data, void* pData_pt uint codebook_size = remapping.size(); crnlib::vector hist(codebook_size); - for (uint group = 0; group < m_mip_groups.size(); group++) { - for (uint endpoint_index = 0, b = m_mip_groups[group].m_first_chunk << 2, bEnd = b + (m_mip_groups[group].m_num_chunks << 2); b < bEnd; b++) { + for (uint level = 0; level < m_levels.size(); level++) { + for (uint endpoint_index = 0, b = m_levels[level].first_block, bEnd = b + m_levels[level].num_blocks; b < bEnd; b++) { uint index = remapping[m_endpoint_indices[b].component[cColor]]; if (!m_endpoint_indices[b].reference) { int sym = index - endpoint_index; @@ -1109,8 +990,8 @@ void crn_comp::optimize_color_endpoint_codebook_task(uint64 data, void* pData_pt bool crn_comp::optimize_color_endpoint_codebook(crnlib::vector& remapping) { if (m_pParams->m_flags & cCRNCompFlagQuick) { - remapping.resize(m_hvq.get_color_endpoint_vec().size()); - for (uint i = 0; i < m_hvq.get_color_endpoint_vec().size(); i++) + remapping.resize(m_color_endpoints.size()); + for (uint i = 0; i < m_color_endpoints.size(); i++) remapping[i] = i; if (!pack_color_endpoints(m_packed_color_endpoints, remapping, 0)) @@ -1127,7 +1008,7 @@ bool crn_comp::optimize_color_endpoint_codebook(crnlib::vector& remapping) const uint cMaxEndpointRemapIters = 3; optimize_color_endpoint_codebook_params::trial remapping_trial[cMaxEndpointRemapIters + 1]; - uint n = m_hvq.get_color_endpoint_codebook_size(); + uint n = m_color_endpoints.size(); hist_type xhist(n * n); for (uint b = 1; b < m_endpoint_indices.size(); b++) { if (!m_endpoint_indices[b].reference) { @@ -1164,13 +1045,13 @@ bool crn_comp::optimize_color_endpoint_codebook(crnlib::vector& remapping) bool crn_comp::optimize_color_selector_codebook(crnlib::vector& remapping) { if (m_pParams->m_flags & cCRNCompFlagQuick) { - remapping.resize(m_hvq.get_color_selectors_vec().size()); - for (uint i = 0; i < m_hvq.get_color_selectors_vec().size(); i++) + remapping.resize(m_color_selectors.size()); + for (uint i = 0; i < m_color_selectors.size(); i++) remapping[i] = i; } else { - sort_selector_codebook(remapping, m_hvq.get_color_selectors_vec(), g_dxt1_to_linear); + sort_color_selectors(remapping); } - return pack_selectors(m_packed_color_selectors, m_hvq.get_color_selectors_vec(), remapping, 3, g_dxt1_to_linear, 0); + return pack_color_selectors(m_packed_color_selectors, remapping); } struct optimize_alpha_endpoint_codebook_params { @@ -1190,14 +1071,14 @@ void crn_comp::optimize_alpha_endpoint_codebook_task(uint64 data, void* pData_pt crnlib::vector& remapping = pParams->m_trial->remapping; if (pParams->m_iter_index == pParams->m_max_iter_index) { - sort_alpha_endpoint_codebook(remapping, m_hvq.get_alpha_endpoint_vec()); + sort_alpha_endpoint_codebook(remapping, m_alpha_endpoints); } else { create_zeng_reorder_table( - m_hvq.get_alpha_endpoint_codebook_size(), + m_alpha_endpoints.size(), *pParams->m_xhist, remapping, pParams->m_iter_index ? alpha_endpoint_similarity_func : NULL, - &m_hvq, + &m_alpha_endpoints, pParams->m_iter_index / static_cast(pParams->m_max_iter_index - 1)); } @@ -1207,8 +1088,8 @@ void crn_comp::optimize_alpha_endpoint_codebook_task(uint64 data, void* pData_pt crnlib::vector hist(codebook_size); bool hasAlpha0 = m_has_comp[cAlpha0], hasAlpha1 = m_has_comp[cAlpha1]; - for (uint group = 0; group < m_mip_groups.size(); group++) { - for (uint index0 = 0, index1 = 0, b = m_mip_groups[group].m_first_chunk << 2, bEnd = b + (m_mip_groups[group].m_num_chunks << 2); b < bEnd; b++) { + for (uint level = 0; level < m_levels.size(); level++) { + for (uint index0 = 0, index1 = 0, b = m_levels[level].first_block, bEnd = b + m_levels[level].num_blocks; b < bEnd; b++) { if (hasAlpha0) { uint index = remapping[m_endpoint_indices[b].component[cAlpha0]]; if (!m_endpoint_indices[b].reference) { @@ -1248,8 +1129,8 @@ void crn_comp::optimize_alpha_endpoint_codebook_task(uint64 data, void* pData_pt bool crn_comp::optimize_alpha_endpoint_codebook(crnlib::vector& remapping) { if (m_pParams->m_flags & cCRNCompFlagQuick) { - remapping.resize(m_hvq.get_alpha_endpoint_vec().size()); - for (uint i = 0; i < m_hvq.get_alpha_endpoint_vec().size(); i++) + remapping.resize(m_alpha_endpoints.size()); + for (uint i = 0; i < m_alpha_endpoints.size(); i++) remapping[i] = i; if (!pack_alpha_endpoints(m_packed_alpha_endpoints, remapping, 0)) @@ -1266,7 +1147,7 @@ bool crn_comp::optimize_alpha_endpoint_codebook(crnlib::vector& remapping) const uint cMaxEndpointRemapIters = 3; optimize_alpha_endpoint_codebook_params::trial remapping_trial[cMaxEndpointRemapIters + 1]; - uint n = m_hvq.get_alpha_endpoint_codebook_size(); + uint n = m_alpha_endpoints.size(); hist_type xhist(n * n); bool hasAlpha0 = m_has_comp[cAlpha0], hasAlpha1 = m_has_comp[cAlpha1]; for (uint b = 1; b < m_endpoint_indices.size(); b++) { @@ -1310,20 +1191,20 @@ bool crn_comp::optimize_alpha_endpoint_codebook(crnlib::vector& remapping) bool crn_comp::optimize_alpha_selector_codebook(crnlib::vector& remapping) { if (m_pParams->m_flags & cCRNCompFlagQuick) { - remapping.resize(m_hvq.get_alpha_selectors_vec().size()); - for (uint i = 0; i < m_hvq.get_alpha_selectors_vec().size(); i++) + remapping.resize(m_alpha_selectors.size()); + for (uint i = 0; i < m_alpha_selectors.size(); i++) remapping[i] = i; } else { - sort_selector_codebook(remapping, m_hvq.get_alpha_selectors_vec(), g_dxt5_to_linear); + sort_alpha_selectors(remapping); } - return pack_selectors(m_packed_alpha_selectors, m_hvq.get_alpha_selectors_vec(), remapping, 7, g_dxt5_to_linear, 0); + return pack_alpha_selectors(m_packed_alpha_selectors, remapping); } bool crn_comp::pack_data_models() { symbol_codec codec; codec.start_encoding(1024 * 1024); - if (!codec.encode_transmit_static_huffman_data_model(m_reference_encoding_dm, false)) + if (!codec.encode_transmit_static_huffman_data_model(m_reference_dm, false)) return false; for (uint i = 0; i < 2; i++) { @@ -1363,28 +1244,28 @@ bool crn_comp::create_comp_data() { m_comp_data.resize(m_comp_data.size() + sizeof(m_crn_header.m_level_ofs[0]) * (m_pParams->m_levels - 1)); if (m_packed_color_endpoints.size()) { - m_crn_header.m_color_endpoints.m_num = static_cast(m_hvq.get_color_endpoint_codebook_size()); + m_crn_header.m_color_endpoints.m_num = static_cast(m_color_endpoints.size()); m_crn_header.m_color_endpoints.m_size = m_packed_color_endpoints.size(); m_crn_header.m_color_endpoints.m_ofs = m_comp_data.size(); append_vec(m_comp_data, m_packed_color_endpoints); } if (m_packed_color_selectors.size()) { - m_crn_header.m_color_selectors.m_num = static_cast(m_hvq.get_color_selector_codebook_size()); + m_crn_header.m_color_selectors.m_num = static_cast(m_color_selectors.size()); m_crn_header.m_color_selectors.m_size = m_packed_color_selectors.size(); m_crn_header.m_color_selectors.m_ofs = m_comp_data.size(); append_vec(m_comp_data, m_packed_color_selectors); } if (m_packed_alpha_endpoints.size()) { - m_crn_header.m_alpha_endpoints.m_num = static_cast(m_hvq.get_alpha_endpoint_codebook_size()); + m_crn_header.m_alpha_endpoints.m_num = static_cast(m_alpha_endpoints.size()); m_crn_header.m_alpha_endpoints.m_size = m_packed_alpha_endpoints.size(); m_crn_header.m_alpha_endpoints.m_ofs = m_comp_data.size(); append_vec(m_comp_data, m_packed_alpha_endpoints); } if (m_packed_alpha_selectors.size()) { - m_crn_header.m_alpha_selectors.m_num = static_cast(m_hvq.get_alpha_selector_codebook_size()); + m_crn_header.m_alpha_selectors.m_num = static_cast(m_alpha_selectors.size()); m_crn_header.m_alpha_selectors.m_size = m_packed_alpha_selectors.size(); m_crn_header.m_alpha_selectors.m_ofs = m_comp_data.size(); append_vec(m_comp_data, m_packed_alpha_selectors); @@ -1395,9 +1276,9 @@ bool crn_comp::create_comp_data() { append_vec(m_comp_data, m_packed_data_models); uint level_ofs[cCRNMaxLevels]; - for (uint i = 0; i < m_mip_groups.size(); i++) { + for (uint i = 0; i < m_levels.size(); i++) { level_ofs[i] = m_comp_data.size(); - append_vec(m_comp_data, m_packed_chunks[i]); + append_vec(m_comp_data, m_packed_blocks[i]); } crnd::crn_header& dst_header = *(crnd::crn_header*)&m_comp_data[0]; @@ -1405,10 +1286,10 @@ bool crn_comp::create_comp_data() { memcpy(&dst_header, &m_crn_header, sizeof(dst_header)); - for (uint i = 0; i < m_mip_groups.size(); i++) + for (uint i = 0; i < m_levels.size(); i++) dst_header.m_level_ofs[i] = level_ofs[i]; - const uint actual_header_size = sizeof(crnd::crn_header) + sizeof(dst_header.m_level_ofs[0]) * (m_mip_groups.size() - 1); + const uint actual_header_size = sizeof(crnd::crn_header) + sizeof(dst_header.m_level_ofs[0]) * (m_levels.size() - 1); dst_header.m_sig = crnd::crn_header::cCRNSigValue; @@ -1456,7 +1337,7 @@ bool crn_comp::compress_internal() { return false; } - m_chunk_encoding_hist.clear(); + m_reference_hist.clear(); for (uint i = 0; i < 2; i++) { m_endpoint_index_hist[i].clear(); m_endpoint_index_dm[i].clear(); @@ -1465,13 +1346,13 @@ bool crn_comp::compress_internal() { } for (uint pass = 0; pass < 2; pass++) { - for (uint mip_group = 0; mip_group < m_mip_groups.size(); mip_group++) { + for (uint level = 0; level < m_levels.size(); level++) { symbol_codec codec; codec.start_encoding(2 * 1024 * 1024); - if (!pack_chunks( - mip_group, - !pass && !mip_group, pass ? &codec : NULL, + if (!pack_blocks( + level, + !pass && !level, pass ? &codec : NULL, m_has_comp[cColor] ? &endpoint_remap[0] : NULL, m_has_comp[cColor] ? &selector_remap[0] : NULL, m_has_comp[cAlpha0] ? &endpoint_remap[1] : NULL, m_has_comp[cAlpha0] ? &selector_remap[1] : NULL)) { return false; @@ -1480,11 +1361,11 @@ bool crn_comp::compress_internal() { codec.stop_encoding(false); if (pass) - m_packed_chunks[mip_group].swap(codec.get_encoding_buf()); + m_packed_blocks[level].swap(codec.get_encoding_buf()); } if (!pass) { - m_reference_encoding_dm.init(true, m_chunk_encoding_hist, 16); + m_reference_dm.init(true, m_reference_hist, 16); for (uint i = 0; i < 2; i++) { if (m_endpoint_index_hist[i].size()) diff --git a/crnlib/crn_comp.h b/crnlib/crn_comp.h index ca326ae..58350ec 100644 --- a/crnlib/crn_comp.h +++ b/crnlib/crn_comp.h @@ -37,25 +37,6 @@ class crn_comp : public itexture_comp { image_u8 m_images[cCRNMaxFaces][cCRNMaxLevels]; - struct level_tag { - uint m_width, m_height; - uint m_chunk_width, m_chunk_height; - uint m_group_index; - uint m_num_chunks; - uint m_first_chunk; - uint m_group_first_chunk; - } m_levels[cCRNMaxLevels]; - - struct mip_group { - mip_group() - : m_first_chunk(0), m_num_chunks(0) {} - - uint m_first_chunk; - uint m_num_chunks; - uint m_chunk_width; - }; - crnlib::vector m_mip_groups; - enum comp { cColor, cAlpha0, @@ -65,18 +46,28 @@ class crn_comp : public itexture_comp { bool m_has_comp[cNumComps]; + struct level_details { + uint first_block; + uint num_blocks; + uint block_width; + }; + crnlib::vector m_levels; + + uint m_total_blocks; + crnlib::vector m_color_endpoints; + crnlib::vector m_alpha_endpoints; + crnlib::vector m_color_selectors; + crnlib::vector m_alpha_selectors; crnlib::vector m_endpoint_indices; crnlib::vector m_selector_indices; - uint m_total_chunks; - crnd::crn_header m_crn_header; crnlib::vector m_comp_data; dxt_hc m_hvq; - symbol_histogram m_chunk_encoding_hist; - static_huffman_data_model m_reference_encoding_dm; + symbol_histogram m_reference_hist; + static_huffman_data_model m_reference_dm; symbol_histogram m_endpoint_index_hist[2]; static_huffman_data_model m_endpoint_index_dm[2]; // color, alpha @@ -84,7 +75,7 @@ class crn_comp : public itexture_comp { symbol_histogram m_selector_index_hist[2]; static_huffman_data_model m_selector_index_dm[2]; // color, alpha - crnlib::vector m_packed_chunks[cCRNMaxLevels]; + crnlib::vector m_packed_blocks[cCRNMaxLevels]; crnlib::vector m_packed_data_models; crnlib::vector m_packed_color_endpoints; crnlib::vector m_packed_color_selectors; @@ -101,22 +92,16 @@ class crn_comp : public itexture_comp { bool pack_color_endpoints(crnlib::vector& data, const crnlib::vector& remapping, uint trial_index); bool pack_alpha_endpoints(crnlib::vector& data, const crnlib::vector& remapping, uint trial_index); - static float color_selector_similarity_func(uint index_a, uint index_b, void* pContext); - static float alpha_selector_similarity_func(uint index_a, uint index_b, void* pContext); - void sort_selector_codebook(crnlib::vector& remapping, const crnlib::vector& selectors, const uint8* pTo_linear); + void sort_color_selectors(crnlib::vector& remapping); + void sort_alpha_selectors(crnlib::vector& remapping); - bool pack_selectors( - crnlib::vector& packed_data, - const crnlib::vector& selectors, - const crnlib::vector& remapping, - uint max_selector_value, - const uint8* pTo_linear, - uint trial_index); + bool pack_color_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping); + bool pack_alpha_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping); bool alias_images(); bool quantize_images(); - bool pack_chunks( + bool pack_blocks( uint group, bool clear_histograms, symbol_codec* pCodec, diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp index 7b51eb8..f40acbe 100644 --- a/crnlib/crn_dxt_hc.cpp +++ b/crnlib/crn_dxt_hc.cpp @@ -6,10 +6,11 @@ #include "crn_console.h" #include "crn_dxt_fast.h" -#define CRNLIB_ENABLE_DEBUG_MESSAGES 0 - namespace crnlib { +typedef vec<6, float> vec6F; +typedef vec<16, float> vec16F; + static uint8 g_tile_map[8][2][2] = { {{ 0, 0 }, { 0, 0 }}, {{ 0, 0 }, { 1, 1 }}, @@ -23,10 +24,8 @@ static uint8 g_tile_map[8][2][2] = { dxt_hc::dxt_hc() : m_num_blocks(0), - m_num_alpha_blocks(0), m_has_color_blocks(false), - m_has_alpha0_blocks(false), - m_has_alpha1_blocks(false), + m_num_alpha_blocks(0), m_main_thread_id(crn_get_current_thread_id()), m_canceled(false), m_pTask_pool(NULL), @@ -42,16 +41,9 @@ void dxt_hc::clear() { m_num_blocks = 0; m_num_alpha_blocks = 0; m_has_color_blocks = false; - m_has_alpha0_blocks = false; - m_has_alpha1_blocks = false; m_color_clusters.clear(); m_alpha_clusters.clear(); - m_alpha_selectors_vec.clear(); - m_color_selectors_vec.clear(); - - m_color_endpoints.clear(); - m_alpha_endpoints.clear(); m_canceled = false; @@ -73,51 +65,39 @@ void dxt_hc::clear() { m_num_tiles = 0; } -bool dxt_hc::compress(const params& p, task_pool& task_pool) { +bool dxt_hc::compress( + color_quad_u8 (*blocks)[16], + crnlib::vector& endpoint_indices, + crnlib::vector& selector_indices, + crnlib::vector& color_endpoints, + crnlib::vector& alpha_endpoints, + crnlib::vector& color_selectors, + crnlib::vector& alpha_selectors, + const params& p + ) { clear(); - m_params = p; + m_has_color_blocks = p.m_format == cDXT1 || p.m_format == cDXT5; + m_num_alpha_blocks = p.m_format == cDXT5 || p.m_format == cDXT5A ? 1 : p.m_format == cDXN_XY || p.m_format == cDXN_YX ? 2 : 0; + if (!m_has_color_blocks && !m_num_alpha_blocks) + return false; + m_blocks = blocks; m_main_thread_id = crn_get_current_thread_id(); - m_pTask_pool = &task_pool; - - switch (m_params.m_format) { - case cDXT1: { - m_has_color_blocks = true; - break; - } - case cDXT5: { - m_has_color_blocks = true; - m_has_alpha0_blocks = true; - m_num_alpha_blocks = 1; - break; - } - case cDXT5A: { - m_has_alpha0_blocks = true; - m_num_alpha_blocks = 1; - break; - } - case cDXN_XY: - case cDXN_YX: { - m_has_alpha0_blocks = true; - m_has_alpha1_blocks = true; - m_num_alpha_blocks = 2; - break; - } - default: { - return false; - } - } + m_pTask_pool = p.m_pTask_pool; + m_params = p; + uint tile_derating[8] = {0, 1, 1, 2, 2, 2, 2, 3}; for (uint level = 0; level < p.m_num_levels; level++) { float adaptive_tile_color_psnr_derating = p.m_adaptive_tile_color_psnr_derating; if (level && adaptive_tile_color_psnr_derating > .25f) adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast(level))); for (uint e = 0; e < 8; e++) - m_color_derating[level][e] = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f); + m_color_derating[level][e] = math::lerp(0.0f, adaptive_tile_color_psnr_derating, tile_derating[e] / 3.0f); } for (uint e = 0; e < 8; e++) - m_alpha_derating[e] = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f); + m_alpha_derating[e] = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, tile_derating[e] / 3.0f); + for (uint i = 0; i < 256; i++) + m_uint8_to_float[i] = i * 1.0f / 255.0f; - m_blocks = m_params.m_blocks; m_num_blocks = m_params.m_num_blocks; m_block_weights.resize(m_num_blocks); m_block_encodings.resize(m_num_blocks); @@ -156,76 +136,68 @@ bool dxt_hc::compress(const params& p, task_pool& task_pool) { if (m_num_alpha_blocks) create_alpha_selector_codebook(); - crnlib::vector color_endpoint_remap(m_color_clusters.size()); - m_color_endpoints.reserve(m_color_clusters.size()); - hash_map color_clusters_map; + color_endpoints.reserve(color_endpoints.size() + m_color_clusters.size()); + crnlib::vector color_endpoints_remap(m_color_clusters.size()); + hash_map color_endpoints_map; for (uint i = 0; i < m_color_clusters.size(); i++) { - if (m_color_clusters[i].m_pixels.size()) { - uint endpoint = dxt1_block::pack_endpoints(m_color_clusters[i].m_refined_first_endpoint, m_color_clusters[i].m_refined_second_endpoint); - hash_map::insert_result insert_result = color_clusters_map.insert(endpoint, m_color_endpoints.size()); + if (m_color_clusters[i].pixels.size()) { + uint32 endpoint = dxt1_block::pack_endpoints(m_color_clusters[i].first_endpoint, m_color_clusters[i].second_endpoint); + hash_map::insert_result insert_result = color_endpoints_map.insert(endpoint, color_endpoints.size()); if (insert_result.second) { - color_endpoint_remap[i] = m_color_endpoints.size(); - m_color_endpoints.push_back(endpoint); + color_endpoints_remap[i] = color_endpoints.size(); + color_endpoints.push_back(endpoint); } else { - color_endpoint_remap[i] = insert_result.first->second; + color_endpoints_remap[i] = insert_result.first->second; } } } - crnlib::vector color_selector_remap(m_color_selectors.size()); - m_color_selectors_vec.reserve(m_color_selectors.size()); - hash_map color_selector_map; + alpha_endpoints.reserve(alpha_endpoints.size() + m_alpha_clusters.size()); + crnlib::vector alpha_endpoints_remap(m_alpha_clusters.size()); + hash_map alpha_endpoints_map; + for (uint i = 0; i < m_alpha_clusters.size(); i++) { + if (m_alpha_clusters[i].pixels.size()) { + uint32 endpoint = dxt5_block::pack_endpoints(m_alpha_clusters[i].first_endpoint, m_alpha_clusters[i].second_endpoint); + hash_map::insert_result insert_result = alpha_endpoints_map.insert(endpoint, alpha_endpoints.size()); + if (insert_result.second) { + alpha_endpoints_remap[i] = alpha_endpoints.size(); + alpha_endpoints.push_back(endpoint); + } else { + alpha_endpoints_remap[i] = insert_result.first->second; + } + } + } + + color_selectors.reserve(color_selectors.size() + m_color_selectors.size()); + crnlib::vector color_selectors_remap(m_color_selectors.size()); + hash_map color_selectors_map; for (uint i = 0; i < m_color_selectors.size(); i++) { if (m_color_selectors_used[i]) { - hash_map::insert_result insert_result = color_selector_map.insert(m_color_selectors[i], m_color_selectors_vec.size()); + hash_map::insert_result insert_result = color_selectors_map.insert(m_color_selectors[i], color_selectors.size()); if (insert_result.second) { - color_selector_remap[i] = m_color_selectors_vec.size(); - selectors selector_vec; - for (uint32 selector = m_color_selectors[i], s = 0; s < 16; s++, selector >>= 2) - selector_vec.set_by_index(s, selector & 3); - m_color_selectors_vec.push_back(selector_vec); + color_selectors_remap[i] = color_selectors.size(); + color_selectors.push_back(m_color_selectors[i]); } else { - color_selector_remap[i] = insert_result.first->second; + color_selectors_remap[i] = insert_result.first->second; } } } - crnlib::vector alpha_endpoint_remap(m_alpha_clusters.size()); - m_alpha_endpoints.reserve(m_alpha_clusters.size()); - hash_map alpha_endpoints_map; - for (uint i = 0; i < m_alpha_clusters.size(); i++) { - if (m_alpha_clusters[i].m_pixels.size()) { - uint endpoint = dxt5_block::pack_endpoints(m_alpha_clusters[i].m_refined_first_endpoint, m_alpha_clusters[i].m_refined_second_endpoint); - hash_map::insert_result insert_result = alpha_endpoints_map.insert(endpoint, m_alpha_endpoints.size()); - if (insert_result.second) { - alpha_endpoint_remap[i] = m_alpha_endpoints.size(); - m_alpha_endpoints.push_back(endpoint); - } else { - alpha_endpoint_remap[i] = insert_result.first->second; - } - } - } - - crnlib::vector alpha_selector_remap(m_alpha_selectors.size()); - m_alpha_selectors_vec.reserve(m_alpha_selectors.size()); + alpha_selectors.reserve(alpha_selectors.size() + m_alpha_selectors.size()); + crnlib::vector alpha_selectors_remap(m_alpha_selectors.size()); hash_map alpha_selectors_map; for (uint i = 0; i < m_alpha_selectors.size(); i++) { if (m_alpha_selectors_used[i]) { - hash_map::insert_result insert_result = alpha_selectors_map.insert(m_alpha_selectors[i], m_alpha_selectors_vec.size()); + hash_map::insert_result insert_result = alpha_selectors_map.insert(m_alpha_selectors[i], alpha_selectors.size()); if (insert_result.second) { - alpha_selector_remap[i] = m_alpha_selectors_vec.size(); - selectors selector_vec; - for (uint64 selector = m_alpha_selectors[i], s = 0; s < 16; s++, selector >>= 3) - selector_vec.set_by_index(s, selector & 7); - m_alpha_selectors_vec.push_back(selector_vec); + alpha_selectors_remap[i] = alpha_selectors.size(); + alpha_selectors.push_back(m_alpha_selectors[i]); } else { - alpha_selector_remap[i] = insert_result.first->second; + alpha_selectors_remap[i] = insert_result.first->second; } } } - crnlib::vector& endpoint_indices = *m_params.m_endpoint_indices; - crnlib::vector& selector_indices = *m_params.m_selector_indices; endpoint_indices.resize(m_num_blocks); selector_indices.resize(m_num_blocks); for (uint level = 0; level < p.m_num_levels; level++) { @@ -236,12 +208,12 @@ bool dxt_hc::compress(const params& p, task_pool& task_pool) { for (uint bx = 0; bx < block_width; bx++, b++) { bool top_match = by != 0; bool left_match = top_match || bx; - for (uint c = m_has_color_blocks ? 0 : cAlpha0Blocks; c < cAlpha0Blocks + m_num_alpha_blocks; c++) { - uint16 endpoint_index = (c ? alpha_endpoint_remap : color_endpoint_remap)[m_endpoint_indices[b].component[c]]; + for (uint c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { + uint16 endpoint_index = (c ? alpha_endpoints_remap : color_endpoints_remap)[m_endpoint_indices[b].component[c]]; left_match = left_match && endpoint_index == endpoint_indices[b - 1].component[c]; top_match = top_match && endpoint_index == endpoint_indices[b - block_width].component[c]; endpoint_indices[b].component[c] = endpoint_index; - uint16 selector_index = (c ? alpha_selector_remap : color_selector_remap)[m_selector_indices[b].component[c]]; + uint16 selector_index = (c ? alpha_selectors_remap : color_selectors_remap)[m_selector_indices[b].component[c]]; selector_indices[b].component[c] = selector_index; } endpoint_indices[b].reference = left_match ? 1 : top_match ? 2 : 0; @@ -257,12 +229,15 @@ void dxt_hc::determine_tiles_task(uint64 data, void* pData_ptr) { uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint offsets[9] = {0, 16, 32, 48, 0, 32, 64, 96, 64}; uint8 tiles[8][4] = {{8}, {6, 7}, {4, 5}, {6, 1, 3}, {7, 0, 2}, {4, 2, 3}, {5, 0, 1}, {0, 2, 1, 3}}; - color_quad_u8 chunkPixels[128]; + color_quad_u8 tilePixels[128]; uint8 selectors[64]; uint tile_error[3][9]; uint total_error[3][8]; + tree_clusterizer color_palettizer; + tree_clusterizer alpha_palettizer; for (uint level = 0; level < m_params.m_num_levels; level++) { + float weight = m_params.m_levels[level].m_weight; uint width = m_params.m_levels[level].m_block_width; uint height = m_params.m_levels[level].m_num_blocks / width; uint faceHeight = height / m_params.m_num_faces; @@ -282,12 +257,12 @@ void dxt_hc::determine_tiles_task(uint64 data, void* pData_ptr) { } for (uint bNext = b + width; b < bNext; b += 2, tile_offset += tile_offset_delta) { for (int t = 0; t < 64; t += 16) - memcpy(chunkPixels + t, m_blocks[b + (t & 16 ? width : 0) + (t & 32 ? 1 : 0)], 64); + memcpy(tilePixels + t, m_blocks[b + (t & 16 ? width : 0) + (t & 32 ? 1 : 0)], 64); for (int t = 0; t < 64; t += 4) - memcpy(chunkPixels + 64 + t, m_blocks[b + (t & 32 ? width : 0) + (t & 4 ? 1 : 0)] + (t >> 1 & 12), 16); + memcpy(tilePixels + 64 + t, m_blocks[b + (t & 32 ? width : 0) + (t & 4 ? 1 : 0)] + (t >> 1 & 12), 16); for (uint t = 0; t < 9; t++) { - color_quad_u8* pixels = chunkPixels + offsets[t]; + color_quad_u8* pixels = tilePixels + offsets[t]; uint size = 16 << (t >> 2); if (m_has_color_blocks) { uint low16, high16; @@ -301,7 +276,7 @@ void dxt_hc::determine_tiles_task(uint64 data, void* pData_ptr) { error += delta * delta; } } - tile_error[cColorBlocks][t] = error; + tile_error[cColor][t] = error; } for (uint a = 0; a < m_num_alpha_blocks; a++) { uint8 component = m_params.m_alpha_component_indices[a]; @@ -317,11 +292,11 @@ void dxt_hc::determine_tiles_task(uint64 data, void* pData_ptr) { optimizer.compute(params, results); uint block_values[cDXT5SelectorValues]; dxt5_block::get_block_values8(block_values, results.m_first_endpoint, results.m_second_endpoint); - tile_error[cAlpha0Blocks + a][t] = results.m_error; + tile_error[cAlpha0 + a][t] = results.m_error; } } - for (uint8 c = m_has_color_blocks ? 0 : cAlpha0Blocks; c < cAlpha0Blocks + m_num_alpha_blocks; c++) { + for (uint8 c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { for (uint8 e = 0; e < 8; e++) { total_error[c][e] = 0; for (uint8 t = 0, s = e + 1; s; s >>= 1, t++) @@ -334,13 +309,13 @@ void dxt_hc::determine_tiles_task(uint64 data, void* pData_ptr) { for (uint e = 0; e < 8; e++) { float quality = 0; if (m_has_color_blocks) { - double peakSNR = total_error[cColorBlocks][e] ? log10(255.0f / sqrt(total_error[cColorBlocks][e] / 192.0)) * 20.0f : 999999.0f; + double peakSNR = total_error[cColor][e] ? log10(255.0f / sqrt(total_error[cColor][e] / 192.0)) * 20.0f : 999999.0f; quality = (float)math::maximum(peakSNR - m_color_derating[level][e], 0.0f); if (m_num_alpha_blocks) quality *= m_params.m_adaptive_tile_color_alpha_weighting_ratio; } for (uint a = 0; a < m_num_alpha_blocks; a++) { - double peakSNR = total_error[cAlpha0Blocks + a][e] ? log10(255.0f / sqrt(total_error[cAlpha0Blocks + a][e] / 64.0)) * 20.0f : 999999.0f; + double peakSNR = total_error[cAlpha0 + a][e] ? log10(255.0f / sqrt(total_error[cAlpha0 + a][e] / 64.0)) * 20.0f : 999999.0f; quality += (float)math::maximum(peakSNR - m_alpha_derating[e], 0.0f); } if (quality > best_quality) { @@ -352,56 +327,34 @@ void dxt_hc::determine_tiles_task(uint64 data, void* pData_ptr) { for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) { tile_details& tile = m_tiles[tile_offset | tile_index]; uint t = tiles[best_encoding][tile_index]; - tile.pixels.append(chunkPixels + offsets[t], 16 << (t >> 2)); - tile.weight = m_block_weights[b]; + tile.pixels.append(tilePixels + offsets[t], 16 << (t >> 2)); + tile.weight = weight; if (m_has_color_blocks) { - tree_clusterizer palettizer; + color_palettizer.clear(); for (uint p = 0; p < tile.pixels.size(); p++) { - const color_quad_u8& c = tile.pixels[p]; - vec3F v(c[0] * 1.0f / 255.0f, c[1] * 1.0f / 255.0f, c[2] * 1.0f / 255.0f); - if (m_params.m_perceptual) { - v[0] *= 0.5f; - v[2] *= 0.25f; - } - palettizer.add_training_vec(v, 1); + const color_quad_u8& pixel = tile.pixels[p]; + vec3F v(m_uint8_to_float[pixel[0]], m_uint8_to_float[pixel[1]], m_uint8_to_float[pixel[2]]); + color_palettizer.add_training_vec(m_params.m_perceptual ? vec3F(v[0] * 0.5f, v[1], v[2] * 0.25f): v, 1); } - palettizer.generate_codebook(2); - vec3F v[2]; - utils::zero_object(v); - for (uint i = 0; i < palettizer.get_codebook_size(); i++) - v[i] = palettizer.get_codebook_entry(i); - if (palettizer.get_codebook_size() == 1) - v[1] = v[0]; - if (v[0].length() > v[1].length()) - utils::swap(v[0], v[1]); - vec6F vv; - for (uint i = 0; i < 2; i++) { - vv[i * 3 + 0] = v[i][0]; - vv[i * 3 + 1] = v[i][1]; - vv[i * 3 + 2] = v[i][2]; + color_palettizer.generate_codebook(2); + bool single = color_palettizer.get_codebook_size() == 1; + bool reorder = !single && color_palettizer.get_codebook_entry(0).length() > color_palettizer.get_codebook_entry(1).length(); + for (uint t = 0, i = 0; i < 2; i++) { + vec3F v = color_palettizer.get_codebook_entry(single ? 0 : reorder ? 1 - i : i); + for (uint c = 0; c < 3; c++, t++) + tile.color_endpoint[t] = v[c]; } - tile.color_endpoint = vv; } for (uint a = 0; a < m_num_alpha_blocks; a++) { - uint component_index = m_params.m_alpha_component_indices[a]; - tree_clusterizer palettizer; - for (uint p = 0; p < tile.pixels.size(); p++) { - vec1F v(tile.pixels[p][component_index] * 1.0f / 255.0f); - palettizer.add_training_vec(v, 1); - } - palettizer.generate_codebook(2); - vec1F v[2]; - utils::zero_object(v); - for (uint i = 0; i < palettizer.get_codebook_size(); i++) - v[i] = palettizer.get_codebook_entry(i); - if (palettizer.get_codebook_size() == 1) - v[1] = v[0]; - if (v[0] > v[1]) - utils::swap(v[0], v[1]); - vec2F vv(v[0][0], v[1][0]); - tile.alpha_endpoints[a] = vv; + alpha_palettizer.clear(); + for (uint c = m_params.m_alpha_component_indices[a], p = 0; p < tile.pixels.size(); p++) + alpha_palettizer.add_training_vec(vec1F(m_uint8_to_float[tile.pixels[p][c]]), 1); + alpha_palettizer.generate_codebook(2); + float v[2] = {alpha_palettizer.get_codebook_entry(0)[0], alpha_palettizer.get_codebook_entry(alpha_palettizer.get_codebook_size() - 1)[0]}; + tile.alpha_endpoints[a][0] = math::minimum(v[0], v[1]); + tile.alpha_endpoints[a][1] = math::maximum(v[0], v[1]); } } @@ -439,16 +392,16 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr continue; } - endpoint_cluster& cluster = m_color_clusters[cluster_index]; - if (cluster.m_pixels.empty()) + color_cluster& cluster = m_color_clusters[cluster_index]; + if (cluster.pixels.empty()) continue; - crnlib::vector selectors(cluster.m_pixels.size()); + crnlib::vector selectors(cluster.pixels.size()); dxt1_endpoint_optimizer::params params; params.m_block_index = cluster_index; - params.m_pPixels = cluster.m_pixels.get_ptr(); - params.m_num_pixels = cluster.m_pixels.size(); + params.m_pPixels = cluster.pixels.get_ptr(); + params.m_num_pixels = cluster.pixels.size(); params.m_pixels_have_alpha = false; params.m_use_alpha_blocks = false; params.m_perceptual = m_params.m_perceptual; @@ -460,28 +413,23 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr dxt1_endpoint_optimizer optimizer; optimizer.compute(params, results); - cluster.m_first_endpoint = results.m_low_color; - cluster.m_second_endpoint = results.m_high_color; - dxt1_block::get_block_colors4(cluster.m_color_values, cluster.m_first_endpoint, cluster.m_second_endpoint); - - color_quad_u8 color_values[4]; - color_values[0] = dxt1_block::unpack_color(results.m_low_color, true); - color_values[3] = dxt1_block::unpack_color(results.m_high_color, true); - for (uint c = 0; c < 3; c++) { - color_values[1].c[c] = ((color_values[0].c[c] << 1) + color_values[3].c[c] + (results.m_alternate_rounding ? 1 : 0)) / 3; - color_values[2].c[c] = ((color_values[3].c[c] << 1) + color_values[0].c[c] + (results.m_alternate_rounding ? 1 : 0)) / 3; + cluster.first_endpoint = results.m_low_color; + cluster.second_endpoint = results.m_high_color; + color_quad_u8 block_values[4], color_values[4]; + dxt1_block::get_block_colors4(block_values, cluster.first_endpoint, cluster.second_endpoint); + for (uint i = 0; i < 4; i++) + color_values[i] = cluster.color_values[i] = block_values[g_dxt1_from_linear[i]]; + for (uint c = 0; results.m_alternate_rounding && c < 3; c++) { + color_values[1].c[c] = ((color_values[0].c[c] << 1) + color_values[3].c[c] + 1) / 3; + color_values[2].c[c] = ((color_values[3].c[c] << 1) + color_values[0].c[c] + 1) / 3; } - uint8 color_order[4]; - for (uint8 i = 0; i < 4; i++) - color_order[i] = results.m_reordered ? 3 - g_dxt1_to_linear[i] : g_dxt1_to_linear[i]; - uint endpoint_weight = color::color_distance(m_params.m_perceptual, color_values[0], color_values[3], false) / 2000; float encoding_weight[8]; for (uint i = 0; i < 8; i++) encoding_weight[i] = math::lerp(1.15f, 1.0f, i / 7.0f); - crnlib::vector& blocks = cluster.m_blocks[cColorBlocks]; + crnlib::vector& blocks = cluster.blocks[cColor]; for (uint i = 0; i < blocks.size(); i++) { uint b = blocks[i]; uint weight = (uint)(math::clamp(endpoint_weight * m_block_weights[b], 1, 2048) * encoding_weight[m_block_encodings[b]]); @@ -490,7 +438,7 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr uint error_best = cUINT32_MAX; uint8 s_best = 0; for (uint8 t = 0; t < 4; t++) { - uint8 s = color_order[t]; + uint8 s = results.m_reordered ? 3 - g_dxt1_to_linear[t] : g_dxt1_to_linear[t]; uint error = color::color_distance(m_params.m_perceptual, (color_quad_u8&)m_blocks[b][p], color_values[s], false); if (error < error_best) { s_best = s; @@ -499,7 +447,7 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr } selector |= s_best << sh; } - m_block_selectors[cColorBlocks][b] = selector | (uint64)weight << 32; + m_block_selectors[cColor][b] = selector | (uint64)weight << 32; } dxt_endpoint_refiner refiner; @@ -507,33 +455,29 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr dxt_endpoint_refiner::results refinerResults; refinerParams.m_perceptual = m_params.m_perceptual; refinerParams.m_pSelectors = selectors.get_ptr(); - refinerParams.m_pPixels = cluster.m_pixels.get_ptr(); - refinerParams.m_num_pixels = cluster.m_pixels.size(); + refinerParams.m_pPixels = cluster.pixels.get_ptr(); + refinerParams.m_num_pixels = cluster.pixels.size(); refinerParams.m_dxt1_selectors = true; refinerParams.m_error_to_beat = results.m_error; refinerParams.m_block_index = cluster_index; - cluster.m_refined_result = refiner.refine(refinerParams, refinerResults); - if (cluster.m_refined_result) { - cluster.m_refined_first_endpoint = refinerResults.m_low_color; - cluster.m_refined_second_endpoint = refinerResults.m_high_color; - } else { - cluster.m_refined_first_endpoint = cluster.m_first_endpoint; - cluster.m_refined_second_endpoint = cluster.m_second_endpoint; + if (refiner.refine(refinerParams, refinerResults)) { + cluster.first_endpoint = refinerResults.m_low_color; + cluster.second_endpoint = refinerResults.m_high_color; } } } void dxt_hc::determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr) { - vec6F_tree_vq* vq = (vec6F_tree_vq*)pData_ptr; + tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; uint num_tasks = m_pTask_pool->get_num_threads() + 1; for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) { if (m_tiles[t].pixels.size()) - m_tiles[t].cluster_indices[cColorBlocks] = vq->find_best_codebook_entry_fs(m_tiles[t].color_endpoint); + m_tiles[t].cluster_indices[cColor] = vq->find_best_codebook_entry_fs(m_tiles[t].color_endpoint); } } void dxt_hc::determine_color_endpoints() { - vec6F_tree_vq vq; + tree_clusterizer vq; for (uint t = 0; t < m_tiles.size(); t++) { if (m_tiles[t].pixels.size()) vq.add_training_vec(m_tiles[t].color_endpoint, (uint)(m_tiles[t].pixels.size() * m_tiles[t].weight)); @@ -548,13 +492,13 @@ void dxt_hc::determine_color_endpoints() { for (uint t = 0; t < m_num_blocks; t++) { if (m_tiles[t].pixels.size()) - m_color_clusters[m_tiles[t].cluster_indices[cColorBlocks]].m_pixels.append(m_tiles[t].pixels); + m_color_clusters[m_tiles[t].cluster_indices[cColor]].pixels.append(m_tiles[t].pixels); } for (uint b = 0; b < m_num_blocks; b++) { - uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cColorBlocks]; - m_endpoint_indices[b].component[cColorBlocks] = cluster_index; - m_color_clusters[cluster_index].m_blocks[cColorBlocks].push_back(b); + uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cColor]; + m_endpoint_indices[b].component[cColor] = cluster_index; + m_color_clusters[cluster_index].blocks[cColor].push_back(b); } for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) @@ -580,15 +524,15 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr continue; } - endpoint_cluster& cluster = m_alpha_clusters[cluster_index]; - if (cluster.m_pixels.empty()) + alpha_cluster& cluster = m_alpha_clusters[cluster_index]; + if (cluster.pixels.empty()) continue; - crnlib::vector selectors(cluster.m_pixels.size()); + crnlib::vector selectors(cluster.pixels.size()); dxt5_endpoint_optimizer::params params; - params.m_pPixels = cluster.m_pixels.get_ptr(); - params.m_num_pixels = cluster.m_pixels.size(); + params.m_pPixels = cluster.pixels.get_ptr(); + params.m_num_pixels = cluster.pixels.size(); params.m_comp_index = 0; params.m_quality = cCRNDXTQualityUber; params.m_use_both_block_types = false; @@ -598,25 +542,21 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr dxt5_endpoint_optimizer optimizer; optimizer.compute(params, results); - cluster.m_first_endpoint = results.m_first_endpoint; - cluster.m_second_endpoint = results.m_second_endpoint; - dxt5_block::get_block_values(cluster.m_alpha_values, cluster.m_first_endpoint, cluster.m_second_endpoint); - - int delta = cluster.m_second_endpoint - cluster.m_first_endpoint; - uint8 alpha_values[8]; - uint8 alpha_order[8]; - for (uint sum = cluster.m_first_endpoint * 7, i = 0; i < 8; i++, sum += delta) { - alpha_values[i] = (uint8)(sum / 7); - alpha_order[i] = results.m_reordered ? 7 - g_dxt5_to_linear[i] : g_dxt5_to_linear[i]; - } + cluster.first_endpoint = results.m_first_endpoint; + cluster.second_endpoint = results.m_second_endpoint; + uint block_values[8], alpha_values[8]; + dxt5_block::get_block_values(block_values, cluster.first_endpoint, cluster.second_endpoint); + for (uint i = 0; i < 8; i++) + alpha_values[i] = cluster.alpha_values[i] = block_values[g_dxt5_from_linear[i]]; + int delta = cluster.second_endpoint - cluster.first_endpoint; uint encoding_weight[8]; for (uint endpoint_weight = math::clamp(delta * delta >> 3, 1, 2048), i = 0; i < 8; i++) encoding_weight[i] = (uint)(endpoint_weight * math::lerp(1.15f, 1.0f, i / 7.0f)); for (uint a = 0; a < m_num_alpha_blocks; a++) { uint component_index = m_params.m_alpha_component_indices[a]; - crnlib::vector& blocks = cluster.m_blocks[cAlpha0Blocks + a]; + crnlib::vector& blocks = cluster.blocks[cAlpha0 + a]; for (uint i = 0; i < blocks.size(); i++) { uint b = blocks[i]; uint weight = encoding_weight[m_block_encodings[b]]; @@ -625,7 +565,7 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr uint error_best = cUINT32_MAX; uint8 s_best = 0; for (uint8 t = 0; t < 8; t++) { - uint8 s = alpha_order[t]; + uint8 s = results.m_reordered ? 7 - g_dxt5_to_linear[t] : g_dxt5_to_linear[t]; int delta = m_blocks[b][p][component_index] - alpha_values[s]; uint error = delta >= 0 ? delta : -delta; if (error < error_best) { @@ -635,7 +575,7 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr } selector |= (uint64)s_best << sh; } - m_block_selectors[cAlpha0Blocks + a][b] = selector | (uint64)weight << 48; + m_block_selectors[cAlpha0 + a][b] = selector | (uint64)weight << 48; } } @@ -644,37 +584,37 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr dxt_endpoint_refiner::results refinerResults; refinerParams.m_perceptual = m_params.m_perceptual; refinerParams.m_pSelectors = selectors.get_ptr(); - refinerParams.m_pPixels = cluster.m_pixels.get_ptr(); - refinerParams.m_num_pixels = cluster.m_pixels.size(); + refinerParams.m_pPixels = cluster.pixels.get_ptr(); + refinerParams.m_num_pixels = cluster.pixels.size(); refinerParams.m_dxt1_selectors = false; refinerParams.m_error_to_beat = results.m_error; refinerParams.m_block_index = cluster_index; - cluster.m_refined_result = refiner.refine(refinerParams, refinerResults); - if (cluster.m_refined_result) { - cluster.m_refined_first_endpoint = refinerResults.m_low_color; - cluster.m_refined_second_endpoint = refinerResults.m_high_color; - dxt5_block::get_block_values(cluster.m_refined_alpha_values, cluster.m_refined_first_endpoint, cluster.m_refined_second_endpoint); + cluster.refined_alpha = refiner.refine(refinerParams, refinerResults); + if (cluster.refined_alpha) { + cluster.first_endpoint = refinerResults.m_low_color; + cluster.second_endpoint = refinerResults.m_high_color; + dxt5_block::get_block_values(block_values, cluster.first_endpoint, cluster.second_endpoint); + for (uint i = 0; i < 8; i++) + cluster.refined_alpha_values[i] = block_values[g_dxt5_from_linear[i]]; } else { - cluster.m_refined_first_endpoint = cluster.m_first_endpoint; - cluster.m_refined_second_endpoint = cluster.m_second_endpoint; - memcpy(cluster.m_refined_alpha_values, cluster.m_alpha_values, sizeof(cluster.m_refined_alpha_values)); + memcpy(cluster.refined_alpha_values, cluster.alpha_values, sizeof(cluster.refined_alpha_values)); } } } void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) { - vec2F_tree_vq* vq = (vec2F_tree_vq*)pData_ptr; + tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; uint num_tasks = m_pTask_pool->get_num_threads() + 1; for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) { if (m_tiles[t].pixels.size()) { for (uint a = 0; a < m_num_alpha_blocks; a++) - m_tiles[t].cluster_indices[cAlpha0Blocks + a] = vq->find_best_codebook_entry_fs(m_tiles[t].alpha_endpoints[a]); + m_tiles[t].cluster_indices[cAlpha0 + a] = vq->find_best_codebook_entry_fs(m_tiles[t].alpha_endpoints[a]); } } } void dxt_hc::determine_alpha_endpoints() { - vec2F_tree_vq vq; + tree_clusterizer vq; for (uint a = 0; a < m_num_alpha_blocks; a++) { for (uint t = 0; t < m_tiles.size(); t++) { if (m_tiles[t].pixels.size()) @@ -694,7 +634,7 @@ void dxt_hc::determine_alpha_endpoints() { for (uint t = 0; t < m_num_blocks; t++) { crnlib::vector& source = m_tiles[t].pixels; if (source.size()) { - crnlib::vector& destination = m_alpha_clusters[m_tiles[t].cluster_indices[cAlpha0Blocks + a]].m_pixels; + crnlib::vector& destination = m_alpha_clusters[m_tiles[t].cluster_indices[cAlpha0 + a]].pixels; for (uint p = 0; p < source.size(); p++) destination.push_back(color_quad_u8(source[p][component_index])); } @@ -703,9 +643,9 @@ void dxt_hc::determine_alpha_endpoints() { for (uint b = 0; b < m_num_blocks; b++) { for (uint a = 0; a < m_num_alpha_blocks; a++) { - uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cAlpha0Blocks + a]; - m_endpoint_indices[b].component[cAlpha0Blocks + a] = cluster_index; - m_alpha_clusters[cluster_index].m_blocks[cAlpha0Blocks + a].push_back(b); + uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cAlpha0 + a]; + m_endpoint_indices[b].component[cAlpha0 + a] = cluster_index; + m_alpha_clusters[cluster_index].blocks[cAlpha0 + a].push_back(b); } } @@ -725,8 +665,8 @@ void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) { uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint errors[16][4]; for (uint b = m_num_blocks * data / num_tasks, bEnd = m_num_blocks * (data + 1) / num_tasks; b < bEnd; b++) { - endpoint_cluster& cluster = m_color_clusters[m_endpoint_indices[b].color]; - color_quad_u8* endpoint_colors = cluster.m_color_values; + color_cluster& cluster = m_color_clusters[m_endpoint_indices[b].color]; + color_quad_u8* endpoint_colors = cluster.color_values; for (uint p = 0; p < 16; p++) { for (uint s = 0; s < 4; s++) errors[p][s] = color::color_distance(m_params.m_perceptual, m_blocks[b][p], endpoint_colors[s], false); @@ -766,10 +706,10 @@ void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) { } void dxt_hc::create_color_selector_codebook() { - vec16F_tree_vq selector_vq; + tree_clusterizer selector_vq; vec16F v; for (uint b = 0; b < m_num_blocks; b++) { - uint64 selector = m_block_selectors[cColorBlocks][b]; + uint64 selector = m_block_selectors[cColor][b]; for (uint8 p = 0; p < 16; p++, selector >>= 2) v[p] = ((selector & 3) + 0.5f) * 0.25f; selector_vq.add_training_vec(v, selector); @@ -780,10 +720,8 @@ void dxt_hc::create_color_selector_codebook() { for (uint i = 0; i < selector_vq.get_codebook_size(); i++) { const vec16F& v = selector_vq.get_codebook_entry(i); m_color_selectors[i] = 0; - for (uint sh = 0, j = 0; j < 16; j++, sh += 2) { - uint8 s = g_dxt1_from_linear[(int)(v[j] * 4.0f)]; - m_color_selectors[i] |= s << sh; - } + for (uint sh = 0, j = 0; j < 16; j++, sh += 2) + m_color_selectors[i] |= (uint)(v[j] * 4.0f) << sh; } uint num_tasks = m_pTask_pool->get_num_threads() + 1; @@ -809,16 +747,10 @@ void dxt_hc::create_color_selector_codebook() { uint (&errors)[16][4] = selector_details[0][i].error; m_color_selectors[i] = 0; for (uint sh = 0, p = 0; p < 16; p++, sh += 2) { - uint best_error = errors[p][0]; - uint8 best_s = 0; - for (uint8 s = 1; s < 4; s++) { - uint error = errors[p][s]; - if (error < best_error) { - best_s = s; - best_error = error; - } - } - m_color_selectors[i] |= best_s << sh; + uint* e = errors[p]; + uint8 s03 = e[3] < e[0] ? 3 : 0; + uint8 s12 = e[2] < e[1] ? 2 : 1; + m_color_selectors[i] |= (e[s12] < e[s03] ? s12 : s03) << sh; } } } @@ -834,10 +766,10 @@ void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) { uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint errors[16][8]; for (uint b = m_num_blocks * data / num_tasks, bEnd = m_num_blocks * (data + 1) / num_tasks; b < bEnd; b++) { - for (uint c = cAlpha0Blocks; c < cAlpha0Blocks + m_num_alpha_blocks; c++) { - const uint alpha_pixel_comp = m_params.m_alpha_component_indices[c - cAlpha0Blocks]; - endpoint_cluster& cluster = m_alpha_clusters[m_endpoint_indices[b].component[c]]; - uint* block_values = cluster.m_alpha_values; + for (uint c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { + const uint alpha_pixel_comp = m_params.m_alpha_component_indices[c - cAlpha0]; + alpha_cluster& cluster = m_alpha_clusters[m_endpoint_indices[b].component[c]]; + uint* block_values = cluster.alpha_values; for (uint p = 0; p < 16; p++) { for (uint s = 0; s < 8; s++) { int delta = m_blocks[b][p][alpha_pixel_comp] - block_values[s]; @@ -868,8 +800,8 @@ void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) { best_index = s; } } - if (cluster.m_refined_result) { - block_values = cluster.m_refined_alpha_values; + if (cluster.refined_alpha) { + block_values = cluster.refined_alpha_values; for (uint p = 0; p < 16; p++) { for (uint s = 0; s < 8; s++) { int delta = m_blocks[b][p][alpha_pixel_comp] - block_values[s]; @@ -889,9 +821,9 @@ void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) { } void dxt_hc::create_alpha_selector_codebook() { - vec16F_tree_vq selector_vq; + tree_clusterizer selector_vq; vec16F v; - for (uint c = cAlpha0Blocks; c < cAlpha0Blocks + m_num_alpha_blocks; c++) { + for (uint c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { for (uint b = 0; b < m_num_blocks; b++) { uint64 selector = m_block_selectors[c][b]; for (uint8 p = 0; p < 16; p++, selector >>= 3) @@ -905,10 +837,8 @@ void dxt_hc::create_alpha_selector_codebook() { for (uint i = 0; i < selector_vq.get_codebook_size(); i++) { const vec16F& v = selector_vq.get_codebook_entry(i); m_alpha_selectors[i] = 0; - for (uint sh = 0, j = 0; j < 16; j++, sh += 3) { - uint8 s = g_dxt5_from_linear[(int)(v[j] * 8.0f)]; - m_alpha_selectors[i] |= (uint64)s << sh; - } + for (uint sh = 0, j = 0; j < 16; j++, sh += 3) + m_alpha_selectors[i] |= (uint64)(v[j] * 8.0f) << sh; } uint num_tasks = m_pTask_pool->get_num_threads() + 1; @@ -934,16 +864,14 @@ void dxt_hc::create_alpha_selector_codebook() { uint (&errors)[16][8] = selector_details[0][i].error; m_alpha_selectors[i] = 0; for (uint sh = 0, p = 0; p < 16; p++, sh += 3) { - uint best_error = errors[p][0]; - uint8 best_s = 0; - for (uint8 s = 1; s < 8; s++) { - uint error = errors[p][s]; - if (error < best_error) { - best_s = s; - best_error = error; - } - } - m_alpha_selectors[i] |= (uint64)best_s << sh; + uint* e = errors[p]; + uint8 s07 = e[7] < e[0] ? 7 : 0; + uint8 s12 = e[2] < e[1] ? 2 : 1; + uint8 s34 = e[4] < e[3] ? 4 : 3; + uint8 s56 = e[6] < e[5] ? 6 : 5; + uint8 s02 = e[s12] < e[s07] ? s12 : s07; + uint8 s36 = e[s56] < e[s34] ? s56 : s34; + m_alpha_selectors[i] |= (uint64)(e[s36] < e[s02] ? s36 : s02) << sh; } } } diff --git a/crnlib/crn_dxt_hc.h b/crnlib/crn_dxt_hc.h index 679afd8..8661746 100644 --- a/crnlib/crn_dxt_hc.h +++ b/crnlib/crn_dxt_hc.h @@ -56,6 +56,7 @@ class dxt_hc { uint m_num_tiles; float m_color_derating[cCRNMaxLevels][8]; float m_alpha_derating[8]; + float m_uint8_to_float[256]; color_quad_u8 (*m_blocks)[16]; uint m_num_blocks; @@ -72,8 +73,7 @@ class dxt_hc { struct params { params() - : m_blocks(0), - m_num_blocks(0), + : m_num_blocks(0), m_num_levels(0), m_num_faces(0), m_format(cDXT1), @@ -88,9 +88,7 @@ class dxt_hc { m_adaptive_tile_color_alpha_weighting_ratio(3.0f), m_debugging(false), m_pProgress_func(0), - m_pProgress_func_data(0), - m_endpoint_indices(0), - m_selector_indices(0) { + m_pProgress_func_data(0) { m_alpha_component_indices[0] = 3; m_alpha_component_indices[1] = 0; for (uint i = 0; i < cCRNMaxLevels; i++) { @@ -100,7 +98,6 @@ class dxt_hc { } } - color_quad_u8 (*m_blocks)[16]; uint m_num_blocks; uint m_num_levels; uint m_num_faces; @@ -126,90 +123,58 @@ class dxt_hc { float m_adaptive_tile_color_alpha_weighting_ratio; uint m_alpha_component_indices[2]; + task_pool* m_pTask_pool; bool m_debugging; crn_progress_callback_func m_pProgress_func; void* m_pProgress_func_data; - - crnlib::vector *m_endpoint_indices; - crnlib::vector *m_selector_indices; }; - struct selectors { - selectors() { utils::zero_object(*this); } - - uint8 m_selectors[cBlockPixelHeight][cBlockPixelWidth]; - - uint8 get_by_index(uint i) const { - CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight)); - const uint8* p = (const uint8*)m_selectors; - return *(p + i); - } - void set_by_index(uint i, uint v) { - CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight)); - uint8* p = (uint8*)m_selectors; - *(p + i) = static_cast(v); - } - }; - typedef crnlib::vector selectors_vec; - void clear(); - bool compress(const params& p, task_pool& task_pool); - - // Color endpoints - inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); } - inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; } - const crnlib::vector& get_color_endpoint_vec() const { return m_color_endpoints; } - - // Color selectors - uint get_color_selector_codebook_size() const { return m_color_selectors_vec.size(); } - const selectors& get_color_selectors(uint codebook_index) const { return m_color_selectors_vec[codebook_index]; } - const crnlib::vector& get_color_selectors_vec() const { return m_color_selectors_vec; } - - // Alpha endpoints - inline uint get_alpha_endpoint_codebook_size() const { return m_alpha_endpoints.size(); } - inline uint get_alpha_endpoint(uint codebook_index) const { return m_alpha_endpoints[codebook_index]; } - const crnlib::vector& get_alpha_endpoint_vec() const { return m_alpha_endpoints; } - - // Alpha selectors - uint get_alpha_selector_codebook_size() const { return m_alpha_selectors_vec.size(); } - const selectors& get_alpha_selectors(uint codebook_index) const { return m_alpha_selectors_vec[codebook_index]; } - const crnlib::vector& get_alpha_selectors_vec() const { return m_alpha_selectors_vec; } + bool compress( + color_quad_u8 (*blocks)[16], + crnlib::vector& endpoint_indices, + crnlib::vector& selector_indices, + crnlib::vector& color_endpoints, + crnlib::vector& alpha_endpoints, + crnlib::vector& color_selectors, + crnlib::vector& alpha_selectors, + const params& p + ); private: params m_params; uint m_num_alpha_blocks; bool m_has_color_blocks; - bool m_has_alpha0_blocks; - bool m_has_alpha1_blocks; enum { - cColorBlocks = 0, - cAlpha0Blocks = 1, - cAlpha1Blocks = 2, - cNumCompressedComponents = 3 + cColor = 0, + cAlpha0 = 1, + cAlpha1 = 2, + cNumComps = 3 }; - struct endpoint_cluster { - endpoint_cluster() : m_first_endpoint(0), m_second_endpoint(0) {} - crnlib::vector m_blocks[3]; - crnlib::vector m_pixels; - uint m_first_endpoint; - uint m_second_endpoint; - color_quad_u8 m_color_values[4]; - uint m_alpha_values[8]; - bool m_refined_result; - uint m_refined_first_endpoint; - uint m_refined_second_endpoint; - uint m_refined_alpha_values[8]; + struct color_cluster { + color_cluster() : first_endpoint(0), second_endpoint(0) {} + crnlib::vector blocks[3]; + crnlib::vector pixels; + uint first_endpoint; + uint second_endpoint; + color_quad_u8 color_values[4]; }; - crnlib::vector m_color_clusters; - crnlib::vector m_alpha_clusters; + crnlib::vector m_color_clusters; - selectors_vec m_alpha_selectors_vec; - selectors_vec m_color_selectors_vec; - crnlib::vector m_color_endpoints; - crnlib::vector m_alpha_endpoints; + struct alpha_cluster { + alpha_cluster() : first_endpoint(0), second_endpoint(0) {} + crnlib::vector blocks[3]; + crnlib::vector pixels; + uint first_endpoint; + uint second_endpoint; + uint alpha_values[8]; + bool refined_alpha; + uint refined_alpha_values[8]; + }; + crnlib::vector m_alpha_clusters; crn_thread_id_t m_main_thread_id; bool m_canceled; @@ -218,12 +183,6 @@ class dxt_hc { int m_prev_phase_index; int m_prev_percentage_complete; - typedef vec<6, float> vec6F; - typedef vec<16, float> vec16F; - typedef tree_clusterizer vec2F_tree_vq; - typedef tree_clusterizer vec6F_tree_vq; - typedef tree_clusterizer vec16F_tree_vq; - void determine_tiles_task(uint64 data, void* pData_ptr); void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr); @@ -243,6 +202,4 @@ class dxt_hc { bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); }; -CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors); - } // namespace crnlib diff --git a/inc/crn_decomp.h b/inc/crn_decomp.h index 93efd43..9d17f6f 100644 --- a/inc/crn_decomp.h +++ b/inc/crn_decomp.h @@ -1431,44 +1431,6 @@ CRND_DEFINE_BITWISE_MOVABLE(dxt5_block); } // namespace crnd -// File: crnd_dxt_hc_common.h -namespace crnd { -struct chunk_tile_desc { - // These values are in pixels, and always a multiple of cBlockPixelWidth/cBlockPixelHeight. - uint32 m_x_ofs; - uint32 m_y_ofs; - uint32 m_width; - uint32 m_height; - uint32 m_layout_index; -}; - -struct chunk_encoding_desc { - uint32 m_num_tiles; - chunk_tile_desc m_tiles[4]; -}; - -const uint32 cChunkPixelWidth = 8; -const uint32 cChunkPixelHeight = 8; -const uint32 cChunkBlockWidth = 2; -const uint32 cChunkBlockHeight = 2; - -const uint32 cChunkMaxTiles = 4; - -const uint32 cBlockPixelWidthShift = 2; -const uint32 cBlockPixelHeightShift = 2; - -const uint32 cBlockPixelWidth = 4; -const uint32 cBlockPixelHeight = 4; - -const uint32 cNumChunkEncodings = 8; -extern chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings]; - -const uint32 cNumChunkTileLayouts = 9; -const uint32 cFirst4x4ChunkTileLayout = 5; -extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; - -} // namespace crnd - // File: crnd_prefix_coding.h #ifdef _XBOX #define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 1 @@ -2774,44 +2736,6 @@ uint64 symbol_codec::stop_decoding() { } // namespace crnd -// File: crnd_dxt_hc_common.cpp -namespace crnd { -chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = - { - {1, {{0, 0, 8, 8, 0}}}, - - {2, {{0, 0, 8, 4, 1}, {0, 4, 8, 4, 2}}}, - {2, {{0, 0, 4, 8, 3}, {4, 0, 4, 8, 4}}}, - - {3, {{0, 0, 8, 4, 1}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}}, - {3, {{0, 4, 8, 4, 2}, {0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}}}, - - {3, {{0, 0, 4, 8, 3}, {4, 0, 4, 4, 6}, {4, 4, 4, 4, 8}}}, - {3, {{4, 0, 4, 8, 4}, {0, 0, 4, 4, 5}, {0, 4, 4, 4, 7}}}, - - {4, {{0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}}}; - -chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = - { - // 2x2 - {0, 0, 8, 8, 0}, - - // 2x1 - {0, 0, 8, 4, 1}, - {0, 4, 8, 4, 2}, - - // 1x2 - {0, 0, 4, 8, 3}, - {4, 0, 4, 8, 4}, - - // 1x1 - {0, 0, 4, 4, 5}, - {4, 0, 4, 4, 6}, - {0, 4, 4, 4, 7}, - {4, 4, 4, 4, 8}}; - -} // namespace crnd - // File: crnd_dxt.cpp namespace crnd { const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = {0U, 3U, 1U, 2U}; @@ -3096,30 +3020,27 @@ class crn_unpacker { if (dst_size_in_bytes < row_pitch_in_bytes * blocks_y) return false; - const uint32 chunks_x = (blocks_x + 1) >> 1; - const uint32 chunks_y = (blocks_y + 1) >> 1; - if (!m_codec.start_decoding(static_cast(pSrc), src_size_in_bytes)) return false; bool status = false; switch (m_pHeader->m_format) { case cCRNFmtDXT1: - status = unpack_dxt1((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + status = unpack_dxt1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtDXT5: case cCRNFmtDXT5_CCxY: case cCRNFmtDXT5_xGBR: case cCRNFmtDXT5_AGBR: case cCRNFmtDXT5_xGxR: - status = unpack_dxt5((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + status = unpack_dxt5((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtDXT5A: - status = unpack_dxt5a((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + status = unpack_dxt5a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtDXN_XY: case cCRNFmtDXN_YX: - status = unpack_dxn((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + status = unpack_dxn((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; default: return false; @@ -3410,22 +3331,24 @@ class crn_unpacker { x = (x & msk) | (v & ~msk); } - bool unpack_dxt1(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) { + bool unpack_dxt1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_color_endpoints = m_color_endpoints.size(); - const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (chunks_x << 2); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); - if (m_block_buffer.size() < chunks_x << 1) - m_block_buffer.resize(chunks_x << 1); + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); uint32 color_endpoint_index = 0; uint8 reference_group = 0; for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < chunks_y << 1; y++, pData += delta_pitch_in_dwords) { - bool visible = y < blocks_y; - for (uint32 x = 0; x < chunks_x << 1; x++, pData += 2) { - visible = visible && x < blocks_x; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 2) { + visible = visible && x < output_width; if (!(y & 1) && !(x & 1)) reference_group = m_codec.decode(m_reference_encoding_dm); block_buffer_element &buffer = m_block_buffer[x]; @@ -3459,13 +3382,15 @@ class crn_unpacker { return true; } - bool unpack_dxt5(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) { + bool unpack_dxt5(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_color_endpoints = m_color_endpoints.size(); const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); - const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (chunks_x << 3); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); - if (m_block_buffer.size() < chunks_x << 1) - m_block_buffer.resize(chunks_x << 1); + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); uint32 color_endpoint_index = 0; uint32 alpha0_endpoint_index = 0; @@ -3473,10 +3398,10 @@ class crn_unpacker { for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < chunks_y << 1; y++, pData += delta_pitch_in_dwords) { - bool visible = y < blocks_y; - for (uint32 x = 0; x < chunks_x << 1; x++, pData += 4) { - visible = visible && x < blocks_x; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 4) { + visible = visible && x < output_width; if (!(y & 1) && !(x & 1)) reference_group = m_codec.decode(m_reference_encoding_dm); block_buffer_element &buffer = m_block_buffer[x]; @@ -3520,12 +3445,14 @@ class crn_unpacker { return true; } - bool unpack_dxn(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) { + bool unpack_dxn(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); - const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (chunks_x << 3); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); - if (m_block_buffer.size() < chunks_x << 1) - m_block_buffer.resize(chunks_x << 1); + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); uint32 alpha0_endpoint_index = 0; uint32 alpha1_endpoint_index = 0; @@ -3533,10 +3460,10 @@ class crn_unpacker { for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < chunks_y << 1; y++, pData += delta_pitch_in_dwords) { - bool visible = y < blocks_y; - for (uint32 x = 0; x < chunks_x << 1; x++, pData += 4) { - visible = visible && x < blocks_x; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 4) { + visible = visible && x < output_width; if (!(y & 1) && !(x & 1)) reference_group = m_codec.decode(m_reference_encoding_dm); block_buffer_element &buffer = m_block_buffer[x]; @@ -3581,22 +3508,24 @@ class crn_unpacker { return true; } - bool unpack_dxt5a(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) { + bool unpack_dxt5a(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); - const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (chunks_x << 2); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 1); - if (m_block_buffer.size() < chunks_x << 1) - m_block_buffer.resize(chunks_x << 1); + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); uint32 alpha0_endpoint_index = 0; uint8 reference_group = 0; for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; - for (uint32 y = 0; y < chunks_y << 1; y++, pData += delta_pitch_in_dwords) { - bool visible = y < blocks_y; - for (uint32 x = 0; x < chunks_x << 1; x++, pData += 2) { - visible = visible && x < blocks_x; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 2) { + visible = visible && x < output_width; if (!(y & 1) && !(x & 1)) reference_group = m_codec.decode(m_reference_encoding_dm); block_buffer_element &buffer = m_block_buffer[x];