diff --git a/bin/crunch_x64.exe b/bin/crunch_x64.exe index 9d27e02..fc9f339 100644 Binary files a/bin/crunch_x64.exe and b/bin/crunch_x64.exe differ diff --git a/crnlib/crn_dxt_endpoint_refiner.cpp b/crnlib/crn_dxt_endpoint_refiner.cpp index 42c1beb..3190432 100644 --- a/crnlib/crn_dxt_endpoint_refiner.cpp +++ b/crnlib/crn_dxt_endpoint_refiner.cpp @@ -91,237 +91,118 @@ bool dxt_endpoint_refiner::refine(const params& p, results& r) { else optimize_dxt5(l, h); - //if (r.m_low_color < r.m_high_color) - // utils::swap(r.m_low_color, r.m_high_color); - return r.m_error < p.m_error_to_beat; } void dxt_endpoint_refiner::optimize_dxt5(vec3F low_color, vec3F high_color) { - float nl = low_color[0]; - float nh = high_color[0]; + uint8 L0 = math::clamp(low_color[0] * 256.0f, 0, 255); + uint8 H0 = math::clamp(high_color[0] * 256.0f, 0, 255); -#if CRNLIB_DXT_ALT_ROUNDING - nl = math::clamp(nl, 0.0f, .999f); - nh = math::clamp(nh, 0.0f, .999f); - uint il = (int)floor(nl * 256.0f); - uint ih = (int)floor(nh * 256.0f); -#else - uint il = (int)floor(.5f + math::clamp(nl, 0.0f, 1.0f) * 255.0f); - uint ih = (int)floor(.5f + math::clamp(nh, 0.0f, 1.0f) * 255.0f); -#endif + uint64 hist[8] = {}, D2[8] = {}, DD[8] = {}; + for (uint c = m_pParams->m_alpha_comp_index, i = 0; i < m_pParams->m_num_pixels; i++) { + uint8 a = m_pParams->m_pPixels[i][c]; + uint8 s = m_pParams->m_pSelectors[i]; + hist[s]++; + D2[s] += a * 2; + DD[s] += a * a; + } - crnlib::vector trial_solutions; - trial_solutions.reserve(256); - trial_solutions.push_back(il | (ih << 8)); - - sparse_bit_array flags; - flags.resize(256 * 256); - - flags.set_bit((il * 256) + ih); - - const int cProbeAmount = 11; - - for (int l_delta = -cProbeAmount; l_delta <= cProbeAmount; l_delta++) { - const int l = il + l_delta; - if (l < 0) - continue; - else if (l > 255) - break; - - const uint bit_index = l * 256; - - for (int h_delta = -cProbeAmount; h_delta <= cProbeAmount; h_delta++) { - const int h = ih + h_delta; - if (h < 0) - continue; - else if (h > 255) - break; - - if ((flags.get_bit(bit_index + h)) || (flags.get_bit(h * 256 + l))) - continue; - - flags.set_bit(bit_index + h); - - trial_solutions.push_back(l | (h << 8)); + uint16 solutions[529]; + uint solutions_count = 0; + solutions[solutions_count++] = L0 == H0 ? H0 ? H0 - 1 << 8 | L0 : 1 : L0 > H0 ? H0 << 8 | L0 : L0 << 8 | H0; + uint8 minL = L0 <= 11 ? 0 : L0 - 11, maxL = L0 >= 244 ? 255 : L0 + 11; + uint8 minH = H0 <= 11 ? 0 : H0 - 11, maxH = H0 >= 244 ? 255 : H0 + 11; + for (uint16 L = minL; L <= maxL; L++) { + for (uint16 H = minH; H <= maxH; H++) { + if ((maxH < L || L <= H || H < minL) && (L != L0 || H != H0) && (L != H0 || H != L0)) + solutions[solutions_count++] = L == H ? H ? H - 1 << 8 | L : 1 : L > H ? H << 8 | L : L << 8 | H; } } - for (uint trial = 0; trial < trial_solutions.size(); trial++) { - uint l = trial_solutions[trial] & 0xFF; - uint h = trial_solutions[trial] >> 8; - - if (l == h) { - if (h) - h--; - else - l++; - } else if (l < h) { - utils::swap(l, h); - } - - CRNLIB_ASSERT(l > h); - - uint values[cDXT5SelectorValues]; - dxt5_block::get_block_values8(values, l, h); - - uint total_error = 0; - - for (uint j = 0; j < m_pParams->m_num_pixels; j++) { - int p = m_pParams->m_pPixels[j][m_pParams->m_alpha_comp_index]; - int c = values[m_pParams->m_pSelectors[j]]; - - int error = p - c; - error *= error; - - total_error += error; - - if (total_error > m_pResults->m_error) - break; - } - - if (total_error < m_pResults->m_error) { - m_pResults->m_error = total_error; - m_pResults->m_low_color = static_cast(l); - m_pResults->m_high_color = static_cast(h); - - if (m_pResults->m_error == 0) + for (uint i = 0; i < solutions_count; i++) { + uint8 L = solutions[i] & 0xFF; + uint8 H = solutions[i] >> 8; + uint values[8]; + dxt5_block::get_block_values8(values, L, H); + uint64 error = 0; + for (uint64 s = 0; s < 8; s++) + error += hist[s] * values[s] * values[s] - D2[s] * values[s] + DD[s]; + if (error < m_pResults->m_error) { + m_pResults->m_low_color = L; + m_pResults->m_high_color = H; + m_pResults->m_error = error; + if (!m_pResults->m_error) return; } } } void dxt_endpoint_refiner::optimize_dxt1(vec3F low_color, vec3F high_color) { - uint selector_hist[4]; - utils::zero_object(selector_hist); - for (uint i = 0; i < m_pParams->m_num_pixels; i++) - selector_hist[m_pParams->m_pSelectors[i]]++; + uint16 L0 = math::clamp(low_color[0] * 32.0f, 0, 31) << 11 | math::clamp(low_color[1] * 64.0f, 0, 63) << 5 | math::clamp(low_color[2] * 32.0f, 0, 31); + uint16 H0 = math::clamp(high_color[0] * 32.0f, 0, 31) << 11 | math::clamp(high_color[1] * 64.0f, 0, 63) << 5 | math::clamp(high_color[2] * 32.0f, 0, 31); - dxt1_solution_coordinates c(low_color, high_color); - - for (uint pass = 0; pass < 8; pass++) { - const uint64 initial_error = m_pResults->m_error; - - dxt1_solution_coordinates_vec coords_to_try; - - coords_to_try.resize(0); - - color_quad_u8 lc(dxt1_block::unpack_color(c.m_low_color, false)); - color_quad_u8 hc(dxt1_block::unpack_color(c.m_high_color, false)); - - for (int i = 0; i < 27; i++) { - if (13 == i) - continue; - - const int ir = (i % 3) - 1; - const int ig = ((i / 3) % 3) - 1; - const int ib = ((i / 9) % 3) - 1; - - int r = lc.r + ir; - int g = lc.g + ig; - int b = lc.b + ib; - if ((r < 0) || (r > 31) || (g < 0) || (g > 63) || (b < 0) || (b > 31)) - continue; - - coords_to_try.push_back( - dxt1_solution_coordinates(dxt1_block::pack_color(r, g, b, false), c.m_high_color)); + uint64 hist[4] = {}, D2[4][3] = {}, DD[4][3] = {}; + for (uint i = 0; i < m_pParams->m_num_pixels; i++) { + const color_quad_u8& pixel = m_pParams->m_pPixels[i]; + uint8 s = m_pParams->m_pSelectors[i]; + hist[s]++; + for (uint c = 0; c < 3; c++) { + D2[s][c] += pixel[c] * 2; + DD[s][c] += pixel[c] * pixel[c]; } + } + crnlib::vector solutions(54); + bool preserveL = hist[0] + hist[2] > hist[1] + hist[3]; + bool improved = true; - for (int i = 0; i < 27; i++) { - if (13 == i) - continue; - - const int ir = (i % 3) - 1; - const int ig = ((i / 3) % 3) - 1; - const int ib = ((i / 9) % 3) - 1; - - int r = hc.r + ir; - int g = hc.g + ig; - int b = hc.b + ib; - if ((r < 0) || (r > 31) || (g < 0) || (g > 63) || (b < 0) || (b > 31)) - continue; - - coords_to_try.push_back(dxt1_solution_coordinates(c.m_low_color, dxt1_block::pack_color(r, g, b, false))); + for (uint iterations = 8; improved && iterations; iterations--) { + improved = false; + uint solutions_count = 0; + for (uint16 b0 = L0 & 31, g0 = L0 >> 5 & 63, r0 = L0 >> 11 & 31, b = b0 ? b0 - 1 : b0; b <= b0 + 1 && b <= 31; b++) { + for (uint16 g = g0 ? g0 - 1 : g0; g <= g0 + 1 && g <= 63; g++) { + for (uint16 r = r0 ? r0 - 1 : r0; r <= r0 + 1 && r <= 31; r++) { + uint16 L = r << 11 | g << 5 | b; + if (L != L0) + solutions[solutions_count++] = L > H0 ? L | H0 << 16 : H0 | L << 16; + } + } } - - std::sort(coords_to_try.begin(), coords_to_try.end()); - - dxt1_solution_coordinates_vec::const_iterator p_last = std::unique(coords_to_try.begin(), coords_to_try.end()); - uint num_coords_to_try = (uint)(p_last - coords_to_try.begin()); - - for (uint i = 0; i < num_coords_to_try; i++) { + for (uint16 b0 = H0 & 31, g0 = H0 >> 5 & 63, r0 = H0 >> 11 & 31, b = b0 ? b0 - 1 : b0; b <= b0 + 1 && b <= 31; b++) { + for (uint16 g = g0 ? g0 - 1 : g0; g <= g0 + 1 && g <= 63; g++) { + for (uint16 r = r0 ? r0 - 1 : r0; r <= r0 + 1 && r <= 31; r++) { + uint16 H = r << 11 | g << 5 | b; + if (H != H0) + solutions[solutions_count++] = H > L0 ? H | L0 << 16 : L0 | H << 16; + } + } + } + std::sort(solutions.begin(), solutions.begin() + solutions_count); + for (uint i = 0; i < solutions_count; i++) { + if (i && solutions[i] == solutions[i - 1]) + continue; + uint16 L = solutions[i] & 0xFFFF; + uint16 H = solutions[i] >> 16; + if (L == H) { + L += !preserveL ? ~L & 0x1F ? 0x1 : ~L & 0xF800 ? 0x800 : ~L & 0x7E0 ? 0x20 : 0 : !L ? 0x1 : 0; + H -= preserveL ? H & 0x1F ? 0x1 : H & 0xF800 ? 0x800 : H & 0x7E0 ? 0x20 : 0 : H == 0xFFFF ? 0x1 : 0; + } color_quad_u8 block_colors[4]; - uint16 l = coords_to_try[i].m_low_color; - uint16 h = coords_to_try[i].m_high_color; - if (l < h) - utils::swap(l, h); - else if (l == h) { - color_quad_u8 lc(dxt1_block::unpack_color(l, false)); - color_quad_u8 hc(dxt1_block::unpack_color(h, false)); - - bool retry = false; - if ((selector_hist[0] + selector_hist[2]) > (selector_hist[1] + selector_hist[3])) { - // l affects the output more than h, so muck with h - if (hc[2] != 0) - hc[2]--; - else if (hc[0] != 0) - hc[0]--; - else if (hc[1] != 0) - hc[1]--; - else - retry = true; - } else { - // h affects the output more than l, so muck with l - if (lc[2] != 31) - lc[2]++; - else if (lc[0] != 31) - lc[0]++; - else if (lc[1] != 63) - lc[1]++; - else - retry = true; - } - - if (retry) { - if (l == 0) - l++; - else - h--; - } else { - l = dxt1_block::pack_color(lc, false); - h = dxt1_block::pack_color(hc, false); - } - - CRNLIB_ASSERT(l > h); + dxt1_block::get_block_colors4(block_colors, L, H); + uint64 error = 0; + for (uint64 s = 0, d[3]; s < 4; s++) { + for (uint c = 0; c < 3; c++) + d[c] = hist[s] * block_colors[s][c] * block_colors[s][c] - D2[s][c] * block_colors[s][c] + DD[s][c]; + error += m_pParams->m_perceptual ? d[0] * 8 + d[1] * 25 + d[2] : d[0] + d[1] + d[2]; } - - dxt1_block::get_block_colors4(block_colors, l, h); - - uint total_error = 0; - - for (uint j = 0; j < m_pParams->m_num_pixels; j++) { - const color_quad_u8& c = block_colors[m_pParams->m_pSelectors[j]]; - total_error += color::color_distance(m_pParams->m_perceptual, c, m_pParams->m_pPixels[j], false); - - if (total_error > m_pResults->m_error) - break; - } - - if (total_error < m_pResults->m_error) { - m_pResults->m_error = total_error; - m_pResults->m_low_color = l; - m_pResults->m_high_color = h; - CRNLIB_ASSERT(l > h); - if (m_pResults->m_error == 0) + if (error < m_pResults->m_error) { + m_pResults->m_low_color = L0 = L; + m_pResults->m_high_color = H0 = H; + m_pResults->m_error = error; + if (!m_pResults->m_error) return; + improved = true; } } - - if (m_pResults->m_error == initial_error) - break; - - c.m_low_color = m_pResults->m_low_color; - c.m_high_color = m_pResults->m_high_color; } }