// File: crn_dxt1.cpp // See Copyright Notice and license at the end of inc/crnlib.h // // Notes: // This class is not optimized for performance on small blocks, unlike typical DXT1 compressors. It's optimized for scalability and quality: // - Very high quality in terms of avg. RMSE or Luma RMSE. Goal is to always match or beat every other known offline DXTc compressor: ATI_Compress, squish, NVidia texture tools, nvdxt.exe, etc. // - Reasonable scalability and stability with hundreds to many thousands of input colors (including inputs with many thousands of equal/nearly equal colors). // - Any quality optimization which results in even a tiny improvement is worth it -- as long as it's either a constant or linear slowdown. // Tiny quality improvements can be extremely valuable in large clusters. // - Quality should scale well vs. CPU time cost, i.e. the more time you spend the higher the quality. #include "crn_core.h" #include "crn_dxt1.h" #include "crn_ryg_dxt.hpp" #include "crn_dxt_fast.h" #include "crn_intersect.h" #include "crn_vec_interval.h" namespace crnlib { //----------------------------------------------------------------------------------------------------------------------------------------- static const int16 g_fast_probe_table[] = {0, 1, 2, 3}; static const uint cFastProbeTableSize = sizeof(g_fast_probe_table) / sizeof(g_fast_probe_table[0]); static const int16 g_normal_probe_table[] = {0, 1, 3, 5, 7}; static const uint cNormalProbeTableSize = sizeof(g_normal_probe_table) / sizeof(g_normal_probe_table[0]); static const int16 g_better_probe_table[] = {0, 1, 2, 3, 5, 9, 15, 19, 27, 43}; static const uint cBetterProbeTableSize = sizeof(g_better_probe_table) / sizeof(g_better_probe_table[0]); static const int16 g_uber_probe_table[] = {0, 1, 2, 3, 5, 7, 9, 10, 13, 15, 19, 27, 43, 59, 91}; static const uint cUberProbeTableSize = sizeof(g_uber_probe_table) / sizeof(g_uber_probe_table[0]); struct unique_color_projection { unique_color color; int64 projection; }; static struct { bool operator()(unique_color_projection a, unique_color_projection b) const { return a.projection < b.projection; } } g_unique_color_projection_sort; //----------------------------------------------------------------------------------------------------------------------------------------- dxt1_endpoint_optimizer::dxt1_endpoint_optimizer() : m_pParams(NULL), m_pResults(NULL), m_perceptual(false), m_num_prev_results(0) { m_low_coords.reserve(512); m_high_coords.reserve(512); m_unique_colors.reserve(512); m_temp_unique_colors.reserve(512); m_unique_packed_colors.reserve(512); m_norm_unique_colors.reserve(512); m_norm_unique_colors_weighted.reserve(512); m_lo_cells.reserve(128); m_hi_cells.reserve(128); } // All selectors are equal. Try compressing as if it was solid, using the block's average color, using ryg's optimal single color compression tables. bool dxt1_endpoint_optimizer::try_average_block_as_solid() { uint64 tot_r = 0; uint64 tot_g = 0; uint64 tot_b = 0; uint total_weight = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { uint weight = m_unique_colors[i].m_weight; total_weight += weight; tot_r += m_unique_colors[i].m_color.r * static_cast(weight); tot_g += m_unique_colors[i].m_color.g * static_cast(weight); tot_b += m_unique_colors[i].m_color.b * static_cast(weight); } const uint half_total_weight = total_weight >> 1; uint ave_r = static_cast((tot_r + half_total_weight) / total_weight); uint ave_g = static_cast((tot_g + half_total_weight) / total_weight); uint ave_b = static_cast((tot_b + half_total_weight) / total_weight); uint low_color = (ryg_dxt::OMatch5[ave_r][0] << 11) | (ryg_dxt::OMatch6[ave_g][0] << 5) | ryg_dxt::OMatch5[ave_b][0]; uint high_color = (ryg_dxt::OMatch5[ave_r][1] << 11) | (ryg_dxt::OMatch6[ave_g][1] << 5) | ryg_dxt::OMatch5[ave_b][1]; bool improved = evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) { low_color = (ryg_dxt::OMatch5_3[ave_r][0] << 11) | (ryg_dxt::OMatch6_3[ave_g][0] << 5) | ryg_dxt::OMatch5_3[ave_b][0]; high_color = (ryg_dxt::OMatch5_3[ave_r][1] << 11) | (ryg_dxt::OMatch6_3[ave_g][1] << 5) | ryg_dxt::OMatch5_3[ave_b][1]; improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); } if (m_pParams->m_quality == cCRNDXTQualityUber) { // Try compressing as all-solid using the other (non-average) colors in the block in uber. for (uint i = 0; i < m_unique_colors.size(); i++) { uint r = m_unique_colors[i].m_color[0]; uint g = m_unique_colors[i].m_color[1]; uint b = m_unique_colors[i].m_color[2]; if ((r == ave_r) && (g == ave_g) && (b == ave_b)) continue; uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) { low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0]; high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1]; improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); } } } return improved; } void dxt1_endpoint_optimizer::compute_vectors(const vec3F& perceptual_weights) { m_norm_unique_colors.resize(0); m_norm_unique_colors_weighted.resize(0); m_mean_norm_color.clear(); m_mean_norm_color_weighted.clear(); for (uint i = 0; i < m_unique_colors.size(); i++) { const color_quad_u8& color = m_unique_colors[i].m_color; const uint weight = m_unique_colors[i].m_weight; vec3F norm_color(color.r * 1.0f / 255.0f, color.g * 1.0f / 255.0f, color.b * 1.0f / 255.0f); vec3F norm_color_weighted(vec3F::mul_components(perceptual_weights, norm_color)); m_norm_unique_colors.push_back(norm_color); m_norm_unique_colors_weighted.push_back(norm_color_weighted); m_mean_norm_color += norm_color * (float)weight; m_mean_norm_color_weighted += norm_color_weighted * (float)weight; } if (m_total_unique_color_weight) { m_mean_norm_color *= (1.0f / m_total_unique_color_weight); m_mean_norm_color_weighted *= (1.0f / m_total_unique_color_weight); } for (uint i = 0; i < m_unique_colors.size(); i++) { m_norm_unique_colors[i] -= m_mean_norm_color; m_norm_unique_colors_weighted[i] -= m_mean_norm_color_weighted; } } // Compute PCA (principle axis, i.e. direction of largest variance) of input vectors. void dxt1_endpoint_optimizer::compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def) { double cov[6] = {0, 0, 0, 0, 0, 0}; for (uint i = 0; i < norm_colors.size(); i++) { const vec3F& v = norm_colors[i]; float r = v[0]; float g = v[1]; float b = v[2]; if (m_unique_colors[i].m_weight > 1) { const double weight = m_unique_colors[i].m_weight; cov[0] += r * r * weight; cov[1] += r * g * weight; cov[2] += r * b * weight; cov[3] += g * g * weight; cov[4] += g * b * weight; cov[5] += b * b * weight; } else { cov[0] += r * r; cov[1] += r * g; cov[2] += r * b; cov[3] += g * g; cov[4] += g * b; cov[5] += b * b; } } double vfr = .9f; double vfg = 1.0f; double vfb = .7f; for (uint iter = 0; iter < 8; iter++) { double r = vfr * cov[0] + vfg * cov[1] + vfb * cov[2]; double g = vfr * cov[1] + vfg * cov[3] + vfb * cov[4]; double b = vfr * cov[2] + vfg * cov[4] + vfb * cov[5]; double m = math::maximum(fabs(r), fabs(g), fabs(b)); if (m > 1e-10) { m = 1.0f / m; r *= m; g *= m; b *= m; } double delta = math::square(vfr - r) + math::square(vfg - g) + math::square(vfb - b); vfr = r; vfg = g; vfb = b; if ((iter > 2) && (delta < 1e-8)) break; } double len = vfr * vfr + vfg * vfg + vfb * vfb; if (len < 1e-10) { axis = def; } else { len = 1.0f / sqrt(len); axis.set(static_cast(vfr * len), static_cast(vfg * len), static_cast(vfb * len)); } } static const uint8 g_invTableNull[4] = {0, 1, 2, 3}; static const uint8 g_invTableAlpha[4] = {1, 0, 2, 3}; static const uint8 g_invTableColor[4] = {1, 0, 3, 2}; // Computes a valid (encodable) DXT1 solution (low/high colors, swizzled selectors) from input. void dxt1_endpoint_optimizer::return_solution() { compute_selectors(); bool invert_selectors; if (m_best_solution.m_alpha_block) invert_selectors = (m_best_solution.m_coords.m_low_color > m_best_solution.m_coords.m_high_color); else { CRNLIB_ASSERT(m_best_solution.m_coords.m_low_color != m_best_solution.m_coords.m_high_color); invert_selectors = (m_best_solution.m_coords.m_low_color < m_best_solution.m_coords.m_high_color); } m_pResults->m_alternate_rounding = m_best_solution.m_alternate_rounding; m_pResults->m_enforce_selector = m_best_solution.m_enforce_selector; m_pResults->m_enforced_selector = m_best_solution.m_enforced_selector; m_pResults->m_reordered = invert_selectors; if (invert_selectors) { m_pResults->m_low_color = m_best_solution.m_coords.m_high_color; m_pResults->m_high_color = m_best_solution.m_coords.m_low_color; } else { m_pResults->m_low_color = m_best_solution.m_coords.m_low_color; m_pResults->m_high_color = m_best_solution.m_coords.m_high_color; } const uint8* pInvert_table = g_invTableNull; if (invert_selectors) pInvert_table = m_best_solution.m_alpha_block ? g_invTableAlpha : g_invTableColor; const uint alpha_thresh = m_pParams->m_pixels_have_alpha ? (m_pParams->m_dxt1a_alpha_threshold << 24U) : 0; const uint32* pSrc_pixels = reinterpret_cast(m_pParams->m_pPixels); uint8* pDst_selectors = m_pResults->m_pSelectors; if ((m_unique_colors.size() == 1) && (!m_pParams->m_pixels_have_alpha)) { uint32 c = utils::read_le32(pSrc_pixels); CRNLIB_ASSERT(c >= alpha_thresh); c |= 0xFF000000U; unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); uint unique_color_index = it->second; uint selector = pInvert_table[m_best_solution.m_selectors[unique_color_index]]; memset(pDst_selectors, selector, m_pParams->m_num_pixels); } else { uint8* pDst_selectors_end = pDst_selectors + m_pParams->m_num_pixels; uint8 prev_selector = 0; uint32 prev_color = 0; do { uint32 c = utils::read_le32(pSrc_pixels); pSrc_pixels++; uint8 selector = 3; if (c >= alpha_thresh) { c |= 0xFF000000U; if (c == prev_color) selector = prev_selector; else { unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); uint unique_color_index = it->second; selector = pInvert_table[m_best_solution.m_selectors[unique_color_index]]; prev_color = c; prev_selector = selector; } } *pDst_selectors++ = selector; } while (pDst_selectors != pDst_selectors_end); } m_pResults->m_alpha_block = m_best_solution.m_alpha_block; m_pResults->m_error = m_best_solution.m_error; } // Per-component 1D endpoint optimization. void dxt1_endpoint_optimizer::optimize_endpoint_comps() { compute_selectors(); if ((m_best_solution.m_alpha_block) || (!m_best_solution.m_error)) return; color_quad_u8 orig_l_scaled(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true)); color_quad_u8 orig_h_scaled(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true)); color_quad_u8 min_color(0xFF, 0xFF, 0xFF, 0xFF); color_quad_u8 max_color(0, 0, 0, 0); for (uint i = 0; i < m_unique_colors.size(); i++) { min_color = color_quad_u8::component_min(min_color, m_unique_colors[i].m_color); max_color = color_quad_u8::component_max(max_color, m_unique_colors[i].m_color); } // Try to separately optimize each component. This is a 1D problem so it's easy to compute accurate per-component error bounds. uint64 W[4] = {}, WD2[4] = {}, WDD[4] = {}; for (uint comp_index = 0; comp_index < 3; comp_index++) { uint min_color_weight = 0; uint max_color_weight = 0; for (uint s = 0; s < 4; s++) W[s] = WD2[s] = WDD[s] = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { uint c = m_unique_colors[i].m_color[comp_index]; uint w = m_unique_colors[i].m_weight; uint8 s = m_best_solution.m_selectors[i]; W[s] += (int64)w; WD2[s] += (int64)w * c * 2; WDD[s] += (int64)w * c * c; if (c == min_color[comp_index]) min_color_weight += w; if (c == max_color[comp_index]) max_color_weight += w; } uint ll[4]; ll[0] = orig_l_scaled[comp_index]; ll[1] = orig_h_scaled[comp_index]; ll[2] = (ll[0] * 2 + ll[1]) / 3; ll[3] = (ll[0] + ll[1] * 2) / 3; uint64 error_to_beat = 0; for (int s = 0; s < 4; s++) error_to_beat += W[s] * ll[s] * ll[s] - WD2[s] * ll[s] + WDD[s]; if (!error_to_beat) continue; CRNLIB_ASSERT((min_color_weight > 0) && (max_color_weight > 0)); const uint error_to_beat_div_min_color_weight = min_color_weight ? ((error_to_beat + min_color_weight - 1) / min_color_weight) : 0; const uint error_to_beat_div_max_color_weight = max_color_weight ? ((error_to_beat + max_color_weight - 1) / max_color_weight) : 0; const uint m = (comp_index == 1) ? 63 : 31; const uint m_shift = (comp_index == 1) ? 3 : 2; for (uint o = 0; o <= m; o++) { uint tl[4]; tl[0] = (comp_index == 1) ? ((o << 2) | (o >> 4)) : ((o << 3) | (o >> 2)); for (uint h = 0; h < 8; h++) { const uint pl = h << m_shift; const uint ph = ((h + 1) << m_shift) - 1; uint tl_l = (comp_index == 1) ? ((pl << 2) | (pl >> 4)) : ((pl << 3) | (pl >> 2)); uint tl_h = (comp_index == 1) ? ((ph << 2) | (ph >> 4)) : ((ph << 3) | (ph >> 2)); tl_l = math::minimum(tl_l, tl[0]); tl_h = math::maximum(tl_h, tl[0]); uint c_l = min_color[comp_index]; uint c_h = max_color[comp_index]; if (c_h < tl_l) { uint min_possible_error = math::square(tl_l - c_l); if (min_possible_error > error_to_beat_div_min_color_weight) continue; } else if (c_l > tl_h) { uint min_possible_error = math::square(c_h - tl_h); if (min_possible_error > error_to_beat_div_max_color_weight) continue; } for (uint p = pl; p <= ph; p++) { tl[1] = (comp_index == 1) ? ((p << 2) | (p >> 4)) : ((p << 3) | (p >> 2)); tl[2] = (tl[0] * 2 + tl[1]) / 3; tl[3] = (tl[0] + tl[1] * 2) / 3; uint64 trial_error = 0; for (int s = 0; s < 4; s++) trial_error += W[s] * tl[s] * tl[s] - WD2[s] * tl[s] + WDD[s]; if (trial_error < error_to_beat) { color_quad_u8 l(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); color_quad_u8 h(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); l[comp_index] = static_cast(o); h[comp_index] = static_cast(p); if (evaluate_solution(dxt1_solution_coordinates(dxt1_block::pack_color(l, false), dxt1_block::pack_color(h, false)))) { if (!m_best_solution.m_error) return; compute_selectors(); for (uint s = 0; s < 4; s++) W[s] = WD2[s] = WDD[s] = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { uint c = m_unique_colors[i].m_color[comp_index]; uint w = m_unique_colors[i].m_weight; uint8 s = m_best_solution.m_selectors[i]; W[s] += (int64)w; WD2[s] += (int64)w * c * 2; WDD[s] += (int64)w * c * c; } error_to_beat = 0; for (int s = 0; s < 4; s++) error_to_beat += W[s] * tl[s] * tl[s] - WD2[s] * tl[s] + WDD[s]; } } } } } } } // Voxel adjacency delta coordinations. static const struct adjacent_coords { int8 x, y, z; } g_adjacency[26] = { {-1, -1, -1}, {0, -1, -1}, {1, -1, -1}, {-1, 0, -1}, {0, 0, -1}, {1, 0, -1}, {-1, 1, -1}, {0, 1, -1}, {1, 1, -1}, {-1, -1, 0}, {0, -1, 0}, {1, -1, 0}, {-1, 0, 0}, {1, 0, 0}, {-1, 1, 0}, {0, 1, 0}, {1, 1, 0}, {-1, -1, 1}, {0, -1, 1}, {1, -1, 1}, {-1, 0, 1}, {0, 0, 1}, {1, 0, 1}, {-1, 1, 1}, {0, 1, 1}, {1, 1, 1}}; // Attempt to refine current solution's endpoints given the current selectors using least squares. bool dxt1_endpoint_optimizer::refine_solution(int refinement_level) { compute_selectors(); static const int w1Tab[4] = {3, 0, 2, 1}; static const int prods_0[4] = {0x00, 0x00, 0x02, 0x02}; static const int prods_1[4] = {0x00, 0x09, 0x01, 0x04}; static const int prods_2[4] = {0x09, 0x00, 0x04, 0x01}; double akku_0 = 0; double akku_1 = 0; double akku_2 = 0; double At1_r, At1_g, At1_b; double At2_r, At2_g, At2_b; At1_r = At1_g = At1_b = 0; At2_r = At2_g = At2_b = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { const color_quad_u8& c = m_unique_colors[i].m_color; const double weight = m_unique_colors[i].m_weight; double r = c.r * weight; double g = c.g * weight; double b = c.b * weight; int step = m_best_solution.m_selectors[i] ^ 1; int w1 = w1Tab[step]; akku_0 += prods_0[step] * weight; akku_1 += prods_1[step] * weight; akku_2 += prods_2[step] * weight; At1_r += w1 * r; At1_g += w1 * g; At1_b += w1 * b; At2_r += r; At2_g += g; At2_b += b; } At2_r = 3 * At2_r - At1_r; At2_g = 3 * At2_g - At1_g; At2_b = 3 * At2_b - At1_b; double xx = akku_2; double yy = akku_1; double xy = akku_0; double t = xx * yy - xy * xy; if (!yy || !xx || (fabs(t) < .0000125f)) return false; double frb = (3.0f * 31.0f / 255.0f) / t; double fg = frb * (63.0f / 31.0f); bool improved = false; if (refinement_level == 0) { uint max16; max16 = math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31) << 11; max16 |= math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63) << 5; max16 |= math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31) << 0; uint min16; min16 = math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31) << 11; min16 |= math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63) << 5; min16 |= math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31) << 0; dxt1_solution_coordinates nc((uint16)min16, (uint16)max16); nc.canonicalize(); improved |= evaluate_solution(nc); } else if (refinement_level == 1) { // Try exploring the local lattice neighbors of the least squares optimized result. color_quad_u8 e[2]; e[0].clear(); e[0][0] = (uint8)math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31); e[0][1] = (uint8)math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63); e[0][2] = (uint8)math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31); e[1].clear(); e[1][0] = (uint8)math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31); e[1][1] = (uint8)math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63); e[1][2] = (uint8)math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31); for (uint i = 0; i < 2; i++) { for (int rr = -1; rr <= 1; rr++) { for (int gr = -1; gr <= 1; gr++) { for (int br = -1; br <= 1; br++) { dxt1_solution_coordinates nc; color_quad_u8 c[2]; c[0] = e[0]; c[1] = e[1]; c[i][0] = (uint8)math::clamp(c[i][0] + rr, 0, 31); c[i][1] = (uint8)math::clamp(c[i][1] + gr, 0, 63); c[i][2] = (uint8)math::clamp(c[i][2] + br, 0, 31); nc.m_low_color = dxt1_block::pack_color(c[0], false); nc.m_high_color = dxt1_block::pack_color(c[1], false); nc.canonicalize(); improved |= evaluate_solution(nc); } } } } } else { // Try even harder to explore the local lattice neighbors of the least squares optimized result. color_quad_u8 e[2]; e[0].clear(); e[0][0] = (uint8)math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31); e[0][1] = (uint8)math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63); e[0][2] = (uint8)math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31); e[1].clear(); e[1][0] = (uint8)math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31); e[1][1] = (uint8)math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63); e[1][2] = (uint8)math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31); for (int orr = -1; orr <= 1; orr++) { for (int ogr = -1; ogr <= 1; ogr++) { for (int obr = -1; obr <= 1; obr++) { dxt1_solution_coordinates nc; color_quad_u8 c[2]; c[0] = e[0]; c[1] = e[1]; c[0][0] = (uint8)math::clamp(c[0][0] + orr, 0, 31); c[0][1] = (uint8)math::clamp(c[0][1] + ogr, 0, 63); c[0][2] = (uint8)math::clamp(c[0][2] + obr, 0, 31); for (int rr = -1; rr <= 1; rr++) { for (int gr = -1; gr <= 1; gr++) { for (int br = -1; br <= 1; br++) { c[1][0] = (uint8)math::clamp(c[1][0] + rr, 0, 31); c[1][1] = (uint8)math::clamp(c[1][1] + gr, 0, 63); c[1][2] = (uint8)math::clamp(c[1][2] + br, 0, 31); nc.m_low_color = dxt1_block::pack_color(c[0], false); nc.m_high_color = dxt1_block::pack_color(c[1], false); nc.canonicalize(); improved |= evaluate_solution(nc); } } } } } } } return improved; } //----------------------------------------------------------------------------------------------------------------------------------------- // Primary endpoint optimization entrypoint. void dxt1_endpoint_optimizer::optimize_endpoints(vec3F& low_color, vec3F& high_color) { vec3F orig_low_color(low_color); vec3F orig_high_color(high_color); m_trial_solution.clear(); uint num_passes; const int16* pProbe_table = g_uber_probe_table; uint probe_range; float dist_per_trial = .015625f; // How many probes, and the distance between each probe depends on the quality level. switch (m_pParams->m_quality) { case cCRNDXTQualitySuperFast: pProbe_table = g_fast_probe_table; probe_range = cFastProbeTableSize; dist_per_trial = .027063293f; num_passes = 1; break; case cCRNDXTQualityFast: pProbe_table = g_fast_probe_table; probe_range = cFastProbeTableSize; dist_per_trial = .027063293f; num_passes = 2; break; case cCRNDXTQualityNormal: pProbe_table = g_normal_probe_table; probe_range = cNormalProbeTableSize; dist_per_trial = .027063293f; num_passes = 2; break; case cCRNDXTQualityBetter: pProbe_table = g_better_probe_table; probe_range = cBetterProbeTableSize; num_passes = 2; break; default: pProbe_table = g_uber_probe_table; probe_range = cUberProbeTableSize; num_passes = 4; break; } if (m_pParams->m_endpoint_caching) { // Try the previous X winning endpoints. This may not give us optimal results, but it may increase the probability of early outs while evaluating potential solutions. const uint num_prev_results = math::minimum(cMaxPrevResults, m_num_prev_results); for (uint i = 0; i < num_prev_results; i++) evaluate_solution(m_prev_results[i]); if (!m_best_solution.m_error) { // Got lucky - one of the previous endpoints is optimal. return_solution(); return; } } if (m_pParams->m_quality >= cCRNDXTQualityBetter) { //evaluate_solution(dxt1_solution_coordinates(low_color, high_color), true, &m_best_solution); //refine_solution(); try_median4(orig_low_color, orig_high_color); } uint probe_low[cUberProbeTableSize * 2 + 1]; uint probe_high[cUberProbeTableSize * 2 + 1]; vec3F scaled_principle_axis[2]; scaled_principle_axis[1] = m_principle_axis * dist_per_trial; scaled_principle_axis[1][0] *= 31.0f; scaled_principle_axis[1][1] *= 63.0f; scaled_principle_axis[1][2] *= 31.0f; scaled_principle_axis[0] = -scaled_principle_axis[1]; //vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); //initial_ofs[0] += .5f; //initial_ofs[1] += .5f; //initial_ofs[2] += .5f; low_color[0] = math::clamp(low_color[0] * 31.0f, 0.0f, 31.0f); low_color[1] = math::clamp(low_color[1] * 63.0f, 0.0f, 63.0f); low_color[2] = math::clamp(low_color[2] * 31.0f, 0.0f, 31.0f); high_color[0] = math::clamp(high_color[0] * 31.0f, 0.0f, 31.0f); high_color[1] = math::clamp(high_color[1] * 63.0f, 0.0f, 63.0f); high_color[2] = math::clamp(high_color[2] * 31.0f, 0.0f, 31.0f); int d[3]; for (uint c = 0; c < 3; c++) d[c] = math::float_to_int_round((high_color[c] - low_color[c]) * (c == 0 ? m_perceptual ? 16 : 2 : c == 1 ? m_perceptual ? 25 : 1 : 2)); crnlib::vector evaluated_color_projections(m_evaluated_colors.size()); int64 average_projection = d[0] * (high_color[0] + low_color[0]) * 4 + d[1] * (high_color[1] + low_color[1]) * 2 + d[2] * (high_color[2] + low_color[2]) * 4; for (uint i = 0; i < m_evaluated_colors.size(); i++) { int64 delta = d[0] * m_evaluated_colors[i].m_color[0] + d[1] * m_evaluated_colors[i].m_color[1] + d[2] * m_evaluated_colors[i].m_color[2] - average_projection; evaluated_color_projections[i].projection = delta * m_evaluated_colors[i].m_weight; evaluated_color_projections[i].color = m_evaluated_colors[i]; } std::sort(evaluated_color_projections.begin(), evaluated_color_projections.end(), g_unique_color_projection_sort); for (uint i = 0, iEnd = m_evaluated_colors.size(); i < iEnd; i++) m_evaluated_colors[i] = evaluated_color_projections[i & 1 ? i >> 1 : iEnd - 1 - (i >> 1)].color; for (uint pass = 0; pass < num_passes; pass++) { // Now separately sweep or probe the low and high colors along the principle axis, both positively and negatively. // This results in two arrays of candidate low/high endpoints. Every unique combination of candidate endpoints is tried as a potential solution. // In higher quality modes, the various nearby lattice neighbors of each candidate endpoint are also explored, which allows the current solution to "wobble" or "migrate" // to areas with lower error. // This entire process can be repeated up to X times (depending on the quality level) until a local minimum is established. // This method is very stable and scalable. It could be implemented more elegantly, but I'm now very cautious of touching this code. if (pass) { color_quad_u8 low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); low_color = vec3F(low.r, low.g, low.b); color_quad_u8 high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); high_color = vec3F(high.r, high.g, high.b); } const uint64 prev_best_error = m_best_solution.m_error; if (!prev_best_error) break; // Sweep low endpoint along principle axis, record positions int prev_packed_color[2] = {-1, -1}; uint num_low_trials = 0; vec3F initial_probe_low_color(low_color + vec3F(.5f)); for (uint i = 0; i < probe_range; i++) { const int ls = i ? 0 : 1; int x = pProbe_table[i]; for (int s = ls; s < 2; s++) { vec3F probe_low_color(initial_probe_low_color + scaled_principle_axis[s] * (float)x); int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); int packed_color = b | (g << 5U) | (r << 11U); if (packed_color != prev_packed_color[s]) { probe_low[num_low_trials++] = packed_color; prev_packed_color[s] = packed_color; } } } prev_packed_color[0] = -1; prev_packed_color[1] = -1; // Sweep high endpoint along principle axis, record positions uint num_high_trials = 0; vec3F initial_probe_high_color(high_color + vec3F(.5f)); for (uint i = 0; i < probe_range; i++) { const int ls = i ? 0 : 1; int x = pProbe_table[i]; for (int s = ls; s < 2; s++) { vec3F probe_high_color(initial_probe_high_color + scaled_principle_axis[s] * (float)x); int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); int packed_color = b | (g << 5U) | (r << 11U); if (packed_color != prev_packed_color[s]) { probe_high[num_high_trials++] = packed_color; prev_packed_color[s] = packed_color; } } } // Now try all unique combinations. for (uint i = 0; i < num_low_trials; i++) { for (uint j = 0; j < num_high_trials; j++) { dxt1_solution_coordinates coords((uint16)probe_low[i], (uint16)probe_high[j]); coords.canonicalize(); evaluate_solution(coords); } } if (m_pParams->m_quality >= cCRNDXTQualityNormal) { // Generate new candidates by exploring the low color's direct lattice neighbors color_quad_u8 lc(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); for (int i = 0; i < 26; i++) { int r = lc.r + g_adjacency[i].x; if ((r < 0) || (r > 31)) continue; int g = lc.g + g_adjacency[i].y; if ((g < 0) || (g > 63)) continue; int b = lc.b + g_adjacency[i].z; if ((b < 0) || (b > 31)) continue; dxt1_solution_coordinates coords(dxt1_block::pack_color(r, g, b, false), m_best_solution.m_coords.m_high_color); coords.canonicalize(); evaluate_solution(coords); } if (m_pParams->m_quality == cCRNDXTQualityUber) { // Generate new candidates by exploring the low color's direct lattice neighbors - this time, explore much further separately on each axis. lc = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false); for (int a = 0; a < 3; a++) { int limit = (a == 1) ? 63 : 31; for (int s = -2; s <= 2; s += 4) { color_quad_u8 c(lc); int q = c[a] + s; if ((q < 0) || (q > limit)) continue; c[a] = (uint8)q; dxt1_solution_coordinates coords(dxt1_block::pack_color(c, false), m_best_solution.m_coords.m_high_color); coords.canonicalize(); evaluate_solution(coords); } } } // Generate new candidates by exploring the high color's direct lattice neighbors color_quad_u8 hc(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); for (int i = 0; i < 26; i++) { int r = hc.r + g_adjacency[i].x; if ((r < 0) || (r > 31)) continue; int g = hc.g + g_adjacency[i].y; if ((g < 0) || (g > 63)) continue; int b = hc.b + g_adjacency[i].z; if ((b < 0) || (b > 31)) continue; dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(r, g, b, false)); coords.canonicalize(); evaluate_solution(coords); } if (m_pParams->m_quality == cCRNDXTQualityUber) { // Generate new candidates by exploring the high color's direct lattice neighbors - this time, explore much further separately on each axis. hc = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false); for (int a = 0; a < 3; a++) { int limit = (a == 1) ? 63 : 31; for (int s = -2; s <= 2; s += 4) { color_quad_u8 c(hc); int q = c[a] + s; if ((q < 0) || (q > limit)) continue; c[a] = (uint8)q; dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(c, false)); coords.canonicalize(); evaluate_solution(coords); } } } } if ((!m_best_solution.m_error) || ((pass) && (m_best_solution.m_error == prev_best_error))) break; if (m_pParams->m_quality >= cCRNDXTQualityUber) { // Attempt to refine current solution's endpoints given the current selectors using least squares. refine_solution(1); } } if (m_pParams->m_quality >= cCRNDXTQualityNormal) { if ((m_best_solution.m_error) && (!m_pParams->m_pixels_have_alpha)) { bool choose_solid_block = false; if (m_best_solution.are_selectors_all_equal()) { // All selectors equal - try various solid-block optimizations choose_solid_block = try_average_block_as_solid(); } if ((!choose_solid_block) && (m_pParams->m_quality == cCRNDXTQualityUber)) { // Per-component 1D endpoint optimization. optimize_endpoint_comps(); } } if (m_pParams->m_quality == cCRNDXTQualityUber) { if (m_best_solution.m_error) { // The pixels may have already been DXTc compressed by another compressor. // It's usually possible to recover the endpoints used to previously pack the block. try_combinatorial_encoding(); } } } return_solution(); if (m_pParams->m_endpoint_caching) { // Remember result for later reruse. m_prev_results[m_num_prev_results & (cMaxPrevResults - 1)] = m_best_solution.m_coords; m_num_prev_results++; } } void dxt1_endpoint_optimizer::handle_multicolor_block() { uint num_passes = 1; vec3F perceptual_weights(1.0f); if (m_perceptual) { // Compute RGB weighting for use in perceptual mode. // The more saturated the block, the more the weights deviate from (1,1,1). float ave_redness = 0; float ave_blueness = 0; float ave_l = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { const color_quad_u8& c = m_unique_colors[i].m_color; int l = (c.r + c.g + c.b + 1) / 3; float scale = (float)m_unique_colors[i].m_weight / math::maximum(1.0f, l); ave_redness += scale * c.r; ave_blueness += scale * c.b; ave_l += l; } ave_redness /= m_total_unique_color_weight; ave_blueness /= m_total_unique_color_weight; ave_l /= m_total_unique_color_weight; ave_l = math::minimum(1.0f, ave_l * 16.0f / 255.0f); float p = ave_l * powf(math::saturate(math::maximum(ave_redness, ave_blueness) * 1.0f / 3.0f), 2.75f); if (p >= 1.0f) num_passes = 1; else { num_passes = 2; perceptual_weights = vec3F::lerp(vec3F(.212f, .72f, .072f), perceptual_weights, p); } } for (uint pass_index = 0; pass_index < num_passes; pass_index++) { compute_vectors(perceptual_weights); compute_pca(m_principle_axis, m_norm_unique_colors_weighted, vec3F(.2837149f, 0.9540631f, 0.096277453f)); m_principle_axis[0] /= perceptual_weights[0]; m_principle_axis[1] /= perceptual_weights[1]; m_principle_axis[2] /= perceptual_weights[2]; m_principle_axis.normalize_in_place(); if (num_passes > 1) { // Check for obviously wild principle axes and try to compensate by backing off the component weightings. if (fabs(m_principle_axis[0]) >= .795f) perceptual_weights.set(.424f, .6f, .072f); else if (fabs(m_principle_axis[2]) >= .795f) perceptual_weights.set(.212f, .6f, .212f); else break; } } // Find bounds of projection onto (potentially skewed) principle axis. float l = 1e+9; float h = -1e+9; for (uint i = 0; i < m_norm_unique_colors.size(); i++) { float d = m_norm_unique_colors[i] * m_principle_axis; l = math::minimum(l, d); h = math::maximum(h, d); } vec3F low_color(m_mean_norm_color + l * m_principle_axis); vec3F high_color(m_mean_norm_color + h * m_principle_axis); if (!low_color.is_within_bounds(0.0f, 1.0f)) { // Low color is outside the lattice, so bring it back in by casting a ray. vec3F coord; float t; aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); intersection::result res = intersection::ray_aabb(coord, t, ray3F(low_color, m_principle_axis), bounds); if (res == intersection::cSuccess) low_color = coord; } if (!high_color.is_within_bounds(0.0f, 1.0f)) { // High color is outside the lattice, so bring it back in by casting a ray. vec3F coord; float t; aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); intersection::result res = intersection::ray_aabb(coord, t, ray3F(high_color, -m_principle_axis), bounds); if (res == intersection::cSuccess) high_color = coord; } // Now optimize the endpoints using the projection bounds on the (potentially skewed) principle axis as a starting point. optimize_endpoints(low_color, high_color); } // Tries quantizing the block to 4 colors using vanilla LBG. It tries all combinations of the quantized results as potential endpoints. bool dxt1_endpoint_optimizer::try_median4(const vec3F& low_color, const vec3F& high_color) { vec3F means[4]; if (m_unique_colors.size() <= 4) { for (uint i = 0; i < 4; i++) means[i] = m_norm_unique_colors[math::minimum(m_norm_unique_colors.size() - 1, i)]; } else { means[0] = low_color - m_mean_norm_color; means[3] = high_color - m_mean_norm_color; means[1] = vec3F::lerp(means[0], means[3], 1.0f / 3.0f); means[2] = vec3F::lerp(means[0], means[3], 2.0f / 3.0f); fast_random rm; const uint cMaxIters = 8; uint reassign_rover = 0; float prev_total_dist = math::cNearlyInfinite; for (uint iter = 0; iter < cMaxIters; iter++) { vec3F new_means[4]; float new_weights[4]; utils::zero_object(new_means); utils::zero_object(new_weights); float total_dist = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { const vec3F& v = m_norm_unique_colors[i]; float best_dist = means[0].squared_distance(v); int best_index = 0; for (uint j = 1; j < 4; j++) { float dist = means[j].squared_distance(v); if (dist < best_dist) { best_dist = dist; best_index = j; } } total_dist += best_dist; new_means[best_index] += v * (float)m_unique_colors[i].m_weight; new_weights[best_index] += (float)m_unique_colors[i].m_weight; } uint highest_index = 0; float highest_weight = 0; bool empty_cell = false; for (uint j = 0; j < 4; j++) { if (new_weights[j] > 0.0f) { means[j] = new_means[j] / new_weights[j]; if (new_weights[j] > highest_weight) { highest_weight = new_weights[j]; highest_index = j; } } else empty_cell = true; } if (!empty_cell) { if (fabs(total_dist - prev_total_dist) < .00001f) break; prev_total_dist = total_dist; } else prev_total_dist = math::cNearlyInfinite; if ((empty_cell) && (iter != (cMaxIters - 1))) { const uint ri = (highest_index + reassign_rover) & 3; reassign_rover++; for (uint j = 0; j < 4; j++) { if (new_weights[j] == 0.0f) { means[j] = means[ri]; means[j] += vec3F::make_random(rm, -.00196f, .00196f); } } } } } bool improved = false; for (uint i = 0; i < 3; i++) { for (uint j = i + 1; j < 4; j++) { const vec3F v0(means[i] + m_mean_norm_color); const vec3F v1(means[j] + m_mean_norm_color); dxt1_solution_coordinates sc( color_quad_u8((int)floor(.5f + v0[0] * 31.0f), (int)floor(.5f + v0[1] * 63.0f), (int)floor(.5f + v0[2] * 31.0f), 255), color_quad_u8((int)floor(.5f + v1[0] * 31.0f), (int)floor(.5f + v1[1] * 63.0f), (int)floor(.5f + v1[2] * 31.0f), 255), false); sc.canonicalize(); improved |= evaluate_solution(sc); } } improved |= refine_solution((m_pParams->m_quality == cCRNDXTQualityUber) ? 1 : 0); return improved; } // Given candidate low/high endpoints, find the optimal selectors for 3 and 4 color blocks, compute the resulting error, // and use the candidate if it results in less error than the best found result so far. bool dxt1_endpoint_optimizer::evaluate_solution(const dxt1_solution_coordinates& coords, bool alternate_rounding) { color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, false); color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, false); uint64 rError = c0.r < c1.r ? m_rDist[c0.r].low + m_rDist[c1.r].high : m_rDist[c0.r].high + m_rDist[c1.r].low; uint64 gError = c0.g < c1.g ? m_gDist[c0.g].low + m_gDist[c1.g].high : m_gDist[c0.g].high + m_gDist[c1.g].low; uint64 bError = c0.b < c1.b ? m_bDist[c0.b].low + m_bDist[c1.b].high : m_bDist[c0.b].high + m_bDist[c1.b].low; if (rError + gError + bError >= m_best_solution.m_error) return false; if (!alternate_rounding) { solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | coords.m_high_color << 16)); if (!solution_res.second) return false; } if (m_evaluate_hc) return m_perceptual ? evaluate_solution_hc_perceptual(coords, alternate_rounding) : evaluate_solution_hc_uniform(coords, alternate_rounding); if (m_pParams->m_quality >= cCRNDXTQualityBetter) return evaluate_solution_uber(coords, alternate_rounding); return evaluate_solution_fast(coords, alternate_rounding); } inline uint dxt1_endpoint_optimizer::color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) { if (perceptual) { return color::color_distance(true, e1, e2, alpha); } else if (m_pParams->m_grayscale_sampling) { // Computes error assuming shader will be converting the result to grayscale. int y0 = color::RGB_to_Y(e1); int y1 = color::RGB_to_Y(e2); int yd = y0 - y1; if (alpha) { int da = (int)e1[3] - (int)e2[3]; return yd * yd + da * da; } else { return yd * yd; } } else { return color::color_distance(false, e1, e2, alpha); } } bool dxt1_endpoint_optimizer::evaluate_solution_uber(const dxt1_solution_coordinates& coords, bool alternate_rounding) { m_trial_solution.m_coords = coords; m_trial_solution.m_selectors.resize(m_unique_colors.size()); m_trial_solution.m_error = m_best_solution.m_error; m_trial_solution.m_alpha_block = false; uint first_block_type = 0; uint last_block_type = 1; if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) first_block_type = 1; else if (!m_pParams->m_use_alpha_blocks) last_block_type = 0; m_trial_selectors.resize(m_unique_colors.size()); color_quad_u8 colors[cDXT1SelectorValues]; colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) { uint64 trial_error = 0; if (!block_type) { colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 0); colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 0); if (m_perceptual) { for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; uint best_error = color_distance(true, c, colors[0], false); uint best_color_index = 0; uint err = color_distance(true, c, colors[1], false); if (err < best_error) { best_error = err; best_color_index = 1; } err = color_distance(true, c, colors[2], false); if (err < best_error) { best_error = err; best_color_index = 2; } err = color_distance(true, c, colors[3], false); if (err < best_error) { best_error = err; best_color_index = 3; } trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } else { for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; uint best_error = color_distance(false, c, colors[0], false); uint best_color_index = 0; uint err = color_distance(false, c, colors[1], false); if (err < best_error) { best_error = err; best_color_index = 1; } err = color_distance(false, c, colors[2], false); if (err < best_error) { best_error = err; best_color_index = 2; } err = color_distance(false, c, colors[3], false); if (err < best_error) { best_error = err; best_color_index = 3; } trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } } else { colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); if (m_perceptual) { for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; uint best_error = color_distance(true, c, colors[0], false); uint best_color_index = 0; uint err = color_distance(true, c, colors[1], false); if (err < best_error) { best_error = err; best_color_index = 1; } err = color_distance(true, c, colors[2], false); if (err < best_error) { best_error = err; best_color_index = 2; } trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } else { for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; uint best_error = color_distance(false, c, colors[0], false); uint best_color_index = 0; uint err = color_distance(false, c, colors[1], false); if (err < best_error) { best_error = err; best_color_index = 1; } err = color_distance(false, c, colors[2], false); if (err < best_error) { best_error = err; best_color_index = 2; } trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } } if (trial_error < m_trial_solution.m_error) { m_trial_solution.m_error = trial_error; m_trial_solution.m_alpha_block = (block_type != 0); m_trial_solution.m_selectors = m_trial_selectors; m_trial_solution.m_alternate_rounding = alternate_rounding; } } m_trial_solution.m_enforce_selector = !m_trial_solution.m_alpha_block && m_trial_solution.m_coords.m_low_color == m_trial_solution.m_coords.m_high_color; if (m_trial_solution.m_enforce_selector) { uint s; if ((m_trial_solution.m_coords.m_low_color & 31) != 31) { m_trial_solution.m_coords.m_low_color++; s = 1; } else { m_trial_solution.m_coords.m_high_color--; s = 0; } for (uint i = 0; i < m_unique_colors.size(); i++) m_trial_solution.m_selectors[i] = static_cast(s); m_trial_solution.m_enforced_selector = s; } if (m_trial_solution.m_error < m_best_solution.m_error) { m_best_solution = m_trial_solution; return true; } return false; } bool dxt1_endpoint_optimizer::evaluate_solution_fast(const dxt1_solution_coordinates& coords, bool alternate_rounding) { m_trial_solution.m_coords = coords; m_trial_solution.m_selectors.resize(m_unique_colors.size()); m_trial_solution.m_error = m_best_solution.m_error; m_trial_solution.m_alpha_block = false; uint first_block_type = 0; uint last_block_type = 1; if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) first_block_type = 1; else if (!m_pParams->m_use_alpha_blocks) last_block_type = 0; m_trial_selectors.resize(m_unique_colors.size()); color_quad_u8 colors[cDXT1SelectorValues]; colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); int vr = colors[1].r - colors[0].r; int vg = colors[1].g - colors[0].g; int vb = colors[1].b - colors[0].b; if (m_perceptual) { vr *= 8; vg *= 24; } int stops[4]; stops[0] = colors[0].r * vr + colors[0].g * vg + colors[0].b * vb; stops[1] = colors[1].r * vr + colors[1].g * vg + colors[1].b * vb; int dirr = vr * 2; int dirg = vg * 2; int dirb = vb * 2; for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) { uint64 trial_error = 0; if (!block_type) { colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 255U); colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 255U); stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb; stops[3] = colors[3].r * vr + colors[3].g * vg + colors[3].b * vb; // 0 2 3 1 int c0Point = stops[1] + stops[3]; int halfPoint = stops[3] + stops[2]; int c3Point = stops[2] + stops[0]; for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; int dot = c.r * dirr + c.g * dirg + c.b * dirb; uint8 best_color_index; if (dot < halfPoint) best_color_index = (dot < c3Point) ? 0 : 2; else best_color_index = (dot < c0Point) ? 3 : 1; uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } else { colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb; // 0 2 1 int c02Point = stops[0] + stops[2]; int c21Point = stops[2] + stops[1]; for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; int dot = c.r * dirr + c.g * dirg + c.b * dirb; uint8 best_color_index; if (dot < c02Point) best_color_index = 0; else if (dot < c21Point) best_color_index = 2; else best_color_index = 1; uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } if (trial_error < m_trial_solution.m_error) { m_trial_solution.m_error = trial_error; m_trial_solution.m_alpha_block = (block_type != 0); m_trial_solution.m_selectors = m_trial_selectors; } } if ((!m_trial_solution.m_alpha_block) && (m_trial_solution.m_coords.m_low_color == m_trial_solution.m_coords.m_high_color)) { uint s; if ((m_trial_solution.m_coords.m_low_color & 31) != 31) { m_trial_solution.m_coords.m_low_color++; s = 1; } else { m_trial_solution.m_coords.m_high_color--; s = 0; } for (uint i = 0; i < m_unique_colors.size(); i++) m_trial_solution.m_selectors[i] = static_cast(s); } if (m_trial_solution.m_error < m_best_solution.m_error) { m_best_solution = m_trial_solution; return true; } return false; } bool dxt1_endpoint_optimizer::evaluate_solution_hc_perceptual(const dxt1_solution_coordinates& coords, bool alternate_rounding) { color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, true); color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, true); color_quad_u8 c2((c0.r * 2 + c1.r + alternate_rounding) / 3, (c0.g * 2 + c1.g + alternate_rounding) / 3, (c0.b * 2 + c1.b + alternate_rounding) / 3, 0); color_quad_u8 c3((c1.r * 2 + c0.r + alternate_rounding) / 3, (c1.g * 2 + c0.g + alternate_rounding) / 3, (c1.b * 2 + c0.b + alternate_rounding) / 3, 0); uint64 error = 0; unique_color* color = m_evaluated_colors.get_ptr(); for (uint count = m_evaluated_colors.size(); count; color++, error < m_best_solution.m_error ? count-- : count = 0) { uint e01 = math::minimum(color::color_distance(true, color->m_color, c0, false), color::color_distance(true, color->m_color, c1, false)); uint e23 = math::minimum(color::color_distance(true, color->m_color, c2, false), color::color_distance(true, color->m_color, c3, false)); error += math::minimum(e01, e23) * (uint64)color->m_weight; } if (error >= m_best_solution.m_error) return false; m_best_solution.m_coords = coords; m_best_solution.m_error = error; m_best_solution.m_alpha_block = false; m_best_solution.m_alternate_rounding = alternate_rounding; m_best_solution.m_enforce_selector = m_best_solution.m_coords.m_low_color == m_best_solution.m_coords.m_high_color; if (m_best_solution.m_enforce_selector) { if ((m_best_solution.m_coords.m_low_color & 31) != 31) { m_best_solution.m_coords.m_low_color++; m_best_solution.m_enforced_selector = 1; } else { m_best_solution.m_coords.m_high_color--; m_best_solution.m_enforced_selector = 0; } } return true; } bool dxt1_endpoint_optimizer::evaluate_solution_hc_uniform(const dxt1_solution_coordinates& coords, bool alternate_rounding) { color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, true); color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, true); color_quad_u8 c2((c0.r * 2 + c1.r + alternate_rounding) / 3, (c0.g * 2 + c1.g + alternate_rounding) / 3, (c0.b * 2 + c1.b + alternate_rounding) / 3, 0); color_quad_u8 c3((c1.r * 2 + c0.r + alternate_rounding) / 3, (c1.g * 2 + c0.g + alternate_rounding) / 3, (c1.b * 2 + c0.b + alternate_rounding) / 3, 0); uint64 error = 0; unique_color* color = m_evaluated_colors.get_ptr(); for (uint count = m_evaluated_colors.size(); count; color++, error < m_best_solution.m_error ? count-- : count = 0) { uint e01 = math::minimum(color::color_distance(false, color->m_color, c0, false), color::color_distance(false, color->m_color, c1, false)); uint e23 = math::minimum(color::color_distance(false, color->m_color, c2, false), color::color_distance(false, color->m_color, c3, false)); error += math::minimum(e01, e23) * (uint64)color->m_weight; } if (error >= m_best_solution.m_error) return false; m_best_solution.m_coords = coords; m_best_solution.m_error = error; m_best_solution.m_alpha_block = false; m_best_solution.m_alternate_rounding = alternate_rounding; m_best_solution.m_enforce_selector = m_best_solution.m_coords.m_low_color == m_best_solution.m_coords.m_high_color; if (m_best_solution.m_enforce_selector) { if ((m_best_solution.m_coords.m_low_color & 31) != 31) { m_best_solution.m_coords.m_low_color++; m_best_solution.m_enforced_selector = 1; } else { m_best_solution.m_coords.m_high_color--; m_best_solution.m_enforced_selector = 0; } } return true; } void dxt1_endpoint_optimizer::compute_selectors() { if (m_evaluate_hc) compute_selectors_hc(); } void dxt1_endpoint_optimizer::compute_selectors_hc() { m_best_solution.m_selectors.resize(m_unique_colors.size()); if (m_best_solution.m_enforce_selector) { memset(m_best_solution.m_selectors.get_ptr(), m_best_solution.m_enforced_selector, m_best_solution.m_selectors.size()); return; } color_quad_u8 c0 = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true); color_quad_u8 c1 = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true); color_quad_u8 c2((c0.r * 2 + c1.r + m_best_solution.m_alternate_rounding) / 3, (c0.g * 2 + c1.g + m_best_solution.m_alternate_rounding) / 3, (c0.b * 2 + c1.b + m_best_solution.m_alternate_rounding) / 3, 0); color_quad_u8 c3((c1.r * 2 + c0.r + m_best_solution.m_alternate_rounding) / 3, (c1.g * 2 + c0.g + m_best_solution.m_alternate_rounding) / 3, (c1.b * 2 + c0.b + m_best_solution.m_alternate_rounding) / 3, 0); for (uint i = 0, iEnd = m_unique_colors.size(); i < iEnd; i++) { const color_quad_u8& c = m_unique_colors[i].m_color; uint e0 = color::color_distance(m_perceptual, c, c0, false); uint e1 = color::color_distance(m_perceptual, c, c1, false); uint e2 = color::color_distance(m_perceptual, c, c2, false); uint e3 = color::color_distance(m_perceptual, c, c3, false); uint e01 = math::minimum(e0, e1); uint e23 = math::minimum(e2, e3); m_best_solution.m_selectors[i] = e01 <= e23 ? e01 == e0 ? 0 : 1 : e23 == e2 ? 2 : 3; } } unique_color dxt1_endpoint_optimizer::lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding) { color_quad_u8 res; float r = rounding ? 1.0f : 0.0f; res[0] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[0], b[0], f)), 0, 255)); res[1] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[1], b[1], f)), 0, 255)); res[2] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[2], b[2], f)), 0, 255)); res[3] = 255; return unique_color(res, 1); } // The block may have been already compressed using another DXTc compressor, such as squish, ATI_Compress, ryg_dxt, etc. // Attempt to recover the endpoints used by that block compressor. void dxt1_endpoint_optimizer::try_combinatorial_encoding() { if ((m_unique_colors.size() < 2) || (m_unique_colors.size() > 4)) return; m_temp_unique_colors = m_unique_colors; if (m_temp_unique_colors.size() == 2) { // a b c d // 0.0 1/3 2/3 1.0 for (uint k = 0; k < 2; k++) { for (uint q = 0; q < 2; q++) { const uint r = q ^ 1; // a b m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 3.0f, k)); // a c m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 1.5f, k)); // a d // b c m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); // b d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -.5f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); // c d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -2.0f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); } } } else if (m_temp_unique_colors.size() == 3) { // a b c d // 0.0 1/3 2/3 1.0 for (uint i = 0; i <= 2; i++) { for (uint j = 0; j <= 2; j++) { if (i == j) continue; // a b c m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.5f)); // a b d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 2.0f / 3.0f)); // a c d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.0f / 3.0f)); // b c d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, -.5f)); } } } m_unique_packed_colors.resize(0); for (uint i = 0; i < m_temp_unique_colors.size(); i++) { const color_quad_u8& unique_color = m_temp_unique_colors[i].m_color; const uint16 packed_color = dxt1_block::pack_color(unique_color, true); if (std::find(m_unique_packed_colors.begin(), m_unique_packed_colors.end(), packed_color) != m_unique_packed_colors.end()) continue; m_unique_packed_colors.push_back(packed_color); } for (uint i = 0; m_best_solution.m_error && i < m_unique_packed_colors.size() - 1; i++) { for (uint j = i + 1; m_best_solution.m_error && j < m_unique_packed_colors.size(); j++) evaluate_solution(dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j])); } uint64 error = m_best_solution.m_error; if (error) m_best_solution.m_error = 1; for (uint i = 0; m_best_solution.m_error && i < m_unique_packed_colors.size() - 1; i++) { for (uint j = i + 1; m_best_solution.m_error && j < m_unique_packed_colors.size(); j++) evaluate_solution(dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j]), true); } if (m_best_solution.m_error) m_best_solution.m_error = error; } // The fourth (transparent) color in 3 color "transparent" blocks is black, which can be optionally exploited for small gains in DXT1 mode if the caller // doesn't actually use alpha. (But not in DXT5 mode, because 3-color blocks aren't permitted by GPU's for DXT5.) bool dxt1_endpoint_optimizer::try_alpha_as_black_optimization() { results* pOrig_results = m_pResults; uint num_dark_colors = 0; for (uint i = 0; i < m_unique_colors.size(); i++) if ((m_unique_colors[i].m_color[0] <= 4) && (m_unique_colors[i].m_color[1] <= 4) && (m_unique_colors[i].m_color[2] <= 4)) num_dark_colors++; if ((!num_dark_colors) || (num_dark_colors == m_unique_colors.size())) return true; params trial_params(*m_pParams); crnlib::vector trial_colors; trial_colors.insert(0, m_pParams->m_pPixels, m_pParams->m_num_pixels); trial_params.m_pPixels = trial_colors.get_ptr(); trial_params.m_pixels_have_alpha = true; for (uint i = 0; i < trial_colors.size(); i++) if ((trial_colors[i][0] <= 4) && (trial_colors[i][1] <= 4) && (trial_colors[i][2] <= 4)) trial_colors[i][3] = 0; results trial_results; crnlib::vector trial_selectors(m_pParams->m_num_pixels); trial_results.m_pSelectors = trial_selectors.get_ptr(); compute_internal(trial_params, trial_results); CRNLIB_ASSERT(trial_results.m_alpha_block); color_quad_u8 c[4]; dxt1_block::get_block_colors3(c, trial_results.m_low_color, trial_results.m_high_color); uint64 trial_error = 0; for (uint i = 0; i < trial_colors.size(); i++) { if (trial_colors[i][3] == 0) { CRNLIB_ASSERT(trial_selectors[i] == 3); } else { CRNLIB_ASSERT(trial_selectors[i] != 3); } trial_error += color_distance(m_perceptual, trial_colors[i], c[trial_selectors[i]], false); } if (trial_error < pOrig_results->m_error) { pOrig_results->m_error = trial_error; pOrig_results->m_low_color = trial_results.m_low_color; pOrig_results->m_high_color = trial_results.m_high_color; if (pOrig_results->m_pSelectors) memcpy(pOrig_results->m_pSelectors, trial_results.m_pSelectors, m_pParams->m_num_pixels); pOrig_results->m_alpha_block = true; } return true; } void dxt1_endpoint_optimizer::compute_internal(const params& p, results& r) { m_pParams = &p; m_pResults = &r; m_evaluate_hc = m_pParams->m_quality == cCRNDXTQualityUber && !m_pParams->m_pixels_have_alpha && !m_pParams->m_force_alpha_blocks && !m_pParams->m_use_alpha_blocks && !m_pParams->m_grayscale_sampling; m_perceptual = m_pParams->m_perceptual && !m_pParams->m_grayscale_sampling; if (m_unique_color_hash_map.get_table_size() > 8192) m_unique_color_hash_map.clear(); else m_unique_color_hash_map.reset(); if (m_solutions_tried.get_table_size() > 8192) m_solutions_tried.clear(); else m_solutions_tried.reset(); m_unique_colors.clear(); m_norm_unique_colors.clear(); m_mean_norm_color.clear(); m_norm_unique_colors_weighted.clear(); m_mean_norm_color_weighted.clear(); m_principle_axis.clear(); m_best_solution.clear(); m_total_unique_color_weight = 0; m_unique_colors.reserve(m_pParams->m_num_pixels); unique_color color(color_quad_u8(0), 1); for (uint i = 0; i < m_pParams->m_num_pixels; i++) { if (!m_pParams->m_pixels_have_alpha || m_pParams->m_pPixels[i].a >= m_pParams->m_dxt1a_alpha_threshold) { color.m_color.m_u32 = m_pParams->m_pPixels[i].m_u32 | 0xFF000000; unique_color_hash_map::insert_result ins_result(m_unique_color_hash_map.insert(color.m_color.m_u32, m_unique_colors.size())); if (ins_result.second) { m_unique_colors.push_back(color); } else { m_unique_colors[ins_result.first->second].m_weight++; } m_total_unique_color_weight++; } } m_has_transparent_pixels = m_total_unique_color_weight != m_pParams->m_num_pixels; m_evaluated_colors = m_unique_colors; struct { uint64 weight, weightedColor, weightedSquaredColor; } rPlane[32] = {}, gPlane[64] = {}, bPlane[32] = {}; for (uint i = 0; i < m_unique_colors.size(); i++) { const unique_color& color = m_unique_colors[i]; uint8 R = color.m_color.r, r = (R >> 3) + ((R & 7) > (R >> 5) ? 1 : 0); rPlane[r].weight += color.m_weight; rPlane[r].weightedColor += (uint64)color.m_weight * R; rPlane[r].weightedSquaredColor += (uint64)color.m_weight * R * R; uint8 G = color.m_color.g, g = (G >> 2) + ((G & 3) > (G >> 6) ? 1 : 0); gPlane[g].weight += color.m_weight; gPlane[g].weightedColor += (uint64)color.m_weight * G; gPlane[g].weightedSquaredColor += (uint64)color.m_weight * G * G; uint8 B = color.m_color.b, b = (B >> 3) + ((B & 7) > (B >> 5) ? 1 : 0); bPlane[b].weight += color.m_weight; bPlane[b].weightedColor += (uint64)color.m_weight * B; bPlane[b].weightedSquaredColor += (uint64)color.m_weight * B * B; } if (m_perceptual) { for (uint c = 0; c < 32; c++) { rPlane[c].weight *= 8; rPlane[c].weightedColor *= 8; rPlane[c].weightedSquaredColor *= 8; } for (uint c = 0; c < 64; c++) { gPlane[c].weight *= 25; gPlane[c].weightedColor *= 25; gPlane[c].weightedSquaredColor *= 25; } } for (uint c = 1; c < 32; c++) { rPlane[c].weight += rPlane[c - 1].weight; rPlane[c].weightedColor += rPlane[c - 1].weightedColor; rPlane[c].weightedSquaredColor += rPlane[c - 1].weightedSquaredColor; bPlane[c].weight += bPlane[c - 1].weight; bPlane[c].weightedColor += bPlane[c - 1].weightedColor; bPlane[c].weightedSquaredColor += bPlane[c - 1].weightedSquaredColor; } for (uint c = 1; c < 64; c++) { gPlane[c].weight += gPlane[c - 1].weight; gPlane[c].weightedColor += gPlane[c - 1].weightedColor; gPlane[c].weightedSquaredColor += gPlane[c - 1].weightedSquaredColor; } for (uint c = 0; c < 32; c++) { uint8 C = c << 3 | c >> 2; m_rDist[c].low = rPlane[c].weightedSquaredColor + C * C * rPlane[c].weight - 2 * C * rPlane[c].weightedColor; m_rDist[c].high = rPlane[31].weightedSquaredColor + C * C * rPlane[31].weight - 2 * C * rPlane[31].weightedColor - m_rDist[c].low; m_bDist[c].low = bPlane[c].weightedSquaredColor + C * C * bPlane[c].weight - 2 * C * bPlane[c].weightedColor; m_bDist[c].high = bPlane[31].weightedSquaredColor + C * C * bPlane[31].weight - 2 * C * bPlane[31].weightedColor - m_bDist[c].low; } for (uint c = 0; c < 64; c++) { uint8 C = c << 2 | c >> 4; m_gDist[c].low = gPlane[c].weightedSquaredColor + C * C * gPlane[c].weight - 2 * C * gPlane[c].weightedColor; m_gDist[c].high = gPlane[63].weightedSquaredColor + C * C * gPlane[63].weight - 2 * C * gPlane[63].weightedColor - m_gDist[c].low; } if (!m_unique_colors.size()) { m_pResults->m_low_color = 0; m_pResults->m_high_color = 0; m_pResults->m_alpha_block = true; memset(m_pResults->m_pSelectors, 3, m_pParams->m_num_pixels); } else if (m_unique_colors.size() == 1 && !m_has_transparent_pixels) { int r = m_unique_colors[0].m_color.r; int g = m_unique_colors[0].m_color.g; int b = m_unique_colors[0].m_color.b; uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); if (m_pParams->m_use_alpha_blocks && m_best_solution.m_error) { low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0]; high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1]; evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); } return_solution(); } else { handle_multicolor_block(); } } bool dxt1_endpoint_optimizer::compute(const params& p, results& r) { if (!p.m_pPixels) return false; compute_internal(p, r); if (m_pParams->m_use_alpha_blocks && m_pParams->m_use_transparent_indices_for_black && !m_pParams->m_pixels_have_alpha) return try_alpha_as_black_optimization(); return true; } } // namespace crnlib