diff --git a/bin/crunch_x64.exe b/bin/crunch_x64.exe index 81c127e..6a3e27d 100644 Binary files a/bin/crunch_x64.exe and b/bin/crunch_x64.exe differ diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp index 12a7ab7..62a974d 100644 --- a/crnlib/crn_dxt_hc.cpp +++ b/crnlib/crn_dxt_hc.cpp @@ -231,6 +231,56 @@ bool dxt_hc::compress( return true; } +vec6F dxt_hc::palettize_color(color_quad_u8* pixels, uint pixels_count) { + uint color[64]; + for (uint i = 0; i < pixels_count; i++) + color[i] = pixels[i][0] << 16 | pixels[i][1] << 8 | pixels[i][2]; + std::sort(color, color + pixels_count); + vec3F vectors[64]; + uint weights[64]; + uint size = 0; + for (uint i = 0; i < pixels_count; i++) { + if (!i || color[i] != color[i - 1]) { + vectors[size][0] = m_params.m_perceptual ? m_uint8_to_float[color[i] >> 16] * 0.5f : m_uint8_to_float[color[i] >> 16]; + vectors[size][1] = m_uint8_to_float[color[i] >> 8 & 0xFF]; + vectors[size][2] = m_params.m_perceptual ? m_uint8_to_float[color[i] & 0xFF] * 0.25f : m_uint8_to_float[color[i] & 0xFF]; + weights[size] = 1; + size++; + } else { + weights[size - 1]++; + } + } + vec3F result[2]; + split_vectors(vectors, weights, size, result); + if (result[0].length() > result[1].length()) + utils::swap(result[0], result[1]); + return *(vec6F*)result; +} + +vec2F dxt_hc::palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint comp_index) { + uint8 alpha[64]; + for (uint p = 0; p < pixels_count; p++) + alpha[p] = pixels[p][comp_index]; + std::sort(alpha, alpha + pixels_count); + vec1F vectors[64]; + uint weights[64]; + uint size = 0; + for (uint i = 0; i < pixels_count; i++) { + if (!i || alpha[i] != alpha[i - 1]) { + vectors[size][0] = m_uint8_to_float[alpha[i]]; + weights[size] = 1; + size++; + } else { + weights[size - 1]++; + } + } + vec1F result[2]; + split_vectors(vectors, weights, size, result); + if (result[0] > result[1]) + utils::swap(result[0], result[1]); + return *(vec2F*)result; +} + void dxt_hc::determine_tiles_task(uint64 data, void*) { uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint offsets[9] = {0, 16, 32, 48, 0, 32, 64, 96, 64}; @@ -239,8 +289,6 @@ void dxt_hc::determine_tiles_task(uint64 data, void*) { uint8 selectors[64]; uint tile_error[3][9]; uint total_error[3][8]; - tree_clusterizer color_palettizer; - tree_clusterizer alpha_palettizer; for (uint level = 0; level < m_params.m_num_levels; level++) { float weight = m_params.m_levels[level].m_weight; @@ -335,33 +383,10 @@ void dxt_hc::determine_tiles_task(uint64 data, void*) { uint t = tiles[best_encoding][tile_index]; tile.pixels.append(tilePixels + offsets[t], 16 << (t >> 2)); tile.weight = weight; - - if (m_has_color_blocks) { - color_palettizer.clear(); - for (uint p = 0; p < tile.pixels.size(); p++) { - const color_quad_u8& pixel = tile.pixels[p]; - vec3F v(m_uint8_to_float[pixel[0]], m_uint8_to_float[pixel[1]], m_uint8_to_float[pixel[2]]); - color_palettizer.add_training_vec(m_params.m_perceptual ? vec3F(v[0] * 0.5f, v[1], v[2] * 0.25f): v, 1); - } - color_palettizer.generate_codebook(2); - bool single = color_palettizer.get_codebook_size() == 1; - bool reorder = !single && color_palettizer.get_codebook_entry(0).length() > color_palettizer.get_codebook_entry(1).length(); - for (uint t = 0, i = 0; i < 2; i++) { - vec3F v = color_palettizer.get_codebook_entry(single ? 0 : reorder ? 1 - i : i); - for (uint c = 0; c < 3; c++, t++) - tile.color_endpoint[t] = v[c]; - } - } - - for (uint a = 0; a < m_num_alpha_blocks; a++) { - alpha_palettizer.clear(); - for (uint c = m_params.m_alpha_component_indices[a], p = 0; p < tile.pixels.size(); p++) - alpha_palettizer.add_training_vec(vec1F(m_uint8_to_float[tile.pixels[p][c]]), 1); - alpha_palettizer.generate_codebook(2); - float v[2] = {alpha_palettizer.get_codebook_entry(0)[0], alpha_palettizer.get_codebook_entry(alpha_palettizer.get_codebook_size() - 1)[0]}; - tile.alpha_endpoints[a][0] = math::minimum(v[0], v[1]); - tile.alpha_endpoints[a][1] = math::maximum(v[0], v[1]); - } + if (m_has_color_blocks) + tile.color_endpoint = palettize_color(tile.pixels.get_ptr(), tile.pixels.size()); + for (uint a = 0; a < m_num_alpha_blocks; a++) + tile.alpha_endpoints[a] = palettize_alpha(tile.pixels.get_ptr(), tile.pixels.size(), m_params.m_alpha_component_indices[a]); } for (uint by = 0; by < 2; by++) { @@ -385,8 +410,6 @@ void dxt_hc::determine_tiles_task_etc(uint64 data, void*) { uint8 selectors[32]; uint tile_error[5]; uint total_error[3]; - tree_clusterizer color_palettizer; - tree_clusterizer alpha_palettizer; etc1_optimizer optimizer; etc1_optimizer::params params; @@ -438,36 +461,13 @@ void dxt_hc::determine_tiles_task_etc(uint64 data, void*) { } } - vec2F alpha_endpoints; - if (m_num_alpha_blocks) { - alpha_palettizer.clear(); - for (uint p = 0; p < 16; p++) - alpha_palettizer.add_training_vec(vec1F(m_uint8_to_float[tilePixels[p].a]), 1); - alpha_palettizer.generate_codebook(2); - float v[2] = {alpha_palettizer.get_codebook_entry(0)[0], alpha_palettizer.get_codebook_entry(alpha_palettizer.get_codebook_size() - 1)[0]}; - alpha_endpoints[0] = math::minimum(v[0], v[1]); - alpha_endpoints[1] = math::maximum(v[0], v[1]); - } - + vec2F alpha_endpoints = m_num_alpha_blocks ? palettize_alpha(tilePixels, 16, 3) : vec2F(cClear); for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) { tile_details& tile = m_tiles[b | tile_index]; uint t = tiles[best_encoding][tile_index]; tile.pixels.append(tilePixels + offsets[t], 8 << (t >> 2)); tile.weight = weight; - color_palettizer.clear(); - for (uint p = 0; p < tile.pixels.size(); p++) { - const color_quad_u8& pixel = tile.pixels[p]; - vec3F v(m_uint8_to_float[pixel[0]], m_uint8_to_float[pixel[1]], m_uint8_to_float[pixel[2]]); - color_palettizer.add_training_vec(m_params.m_perceptual ? vec3F(v[0] * 0.5f, v[1], v[2] * 0.25f) : v, 1); - } - color_palettizer.generate_codebook(2); - bool single = color_palettizer.get_codebook_size() == 1; - bool reorder = !single && color_palettizer.get_codebook_entry(0).length() > color_palettizer.get_codebook_entry(1).length(); - for (uint t = 0, i = 0; i < 2; i++) { - vec3F v = color_palettizer.get_codebook_entry(single ? 0 : reorder ? 1 - i : i); - for (uint c = 0; c < 3; c++, t++) - tile.color_endpoint[t] = v[c]; - } + tile.color_endpoint = palettize_color(tile.pixels.get_ptr(), tile.pixels.size()); if (m_num_alpha_blocks) tile.alpha_endpoints[0] = alpha_endpoints; } diff --git a/crnlib/crn_dxt_hc.h b/crnlib/crn_dxt_hc.h index 022bab4..1a0ed28 100644 --- a/crnlib/crn_dxt_hc.h +++ b/crnlib/crn_dxt_hc.h @@ -186,6 +186,8 @@ class dxt_hc { int m_prev_phase_index; int m_prev_percentage_complete; + vec<6, float> palettize_color(color_quad_u8* pixels, uint pixels_count); + vec<2, float> palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint comp_index); void determine_tiles_task(uint64 data, void* pData_ptr); void determine_tiles_task_etc(uint64 data, void* pData_ptr); diff --git a/crnlib/crn_tree_clusterizer.h b/crnlib/crn_tree_clusterizer.h index 4c0bc89..113b52c 100644 --- a/crnlib/crn_tree_clusterizer.h +++ b/crnlib/crn_tree_clusterizer.h @@ -25,15 +25,6 @@ class tree_clusterizer { } }; - void clear() { - m_hist.clear(); - m_vectors.clear(); - m_vectorsInfo.clear(); - m_codebook.clear(); - m_nodes.clear(); - m_node_index_map.clear(); - } - void add_training_vec(const VectorType& v, uint weight) { m_hist.push_back(std::make_pair(v, weight)); } @@ -498,4 +489,149 @@ class tree_clusterizer { } }; +template +void split_vectors(VectorType (&vectors)[64], uint (&weights)[64], uint size, VectorType (&result)[2]) { + VectorType weightedVectors[64]; + double weightedDotProducts[64]; + VectorType centroid(cClear); + uint64 total_weight = 0; + double ttsum = 0.0f; + for (uint i = 0; i < size; i++) { + const VectorType& v = vectors[i]; + const uint weight = weights[i]; + weightedVectors[i] = v * (float)weight; + centroid += weightedVectors[i]; + total_weight += weight; + weightedDotProducts[i] = v.dot(v) * weight; + ttsum += weightedDotProducts[i]; + } + float variance = (float)(ttsum - (centroid.dot(centroid) / total_weight)); + centroid *= (1.0f / total_weight); + result[0] = result[1] = centroid; + if (variance <= 0.0f || size == 1) + return; + VectorType furthest; + double furthest_dist = -1.0f; + for (uint i = 0; i < size; i++) { + const VectorType& v = vectors[i]; + double dist = v.squared_distance(centroid); + if (dist > furthest_dist) { + furthest_dist = dist; + furthest = v; + } + } + VectorType opposite; + double opposite_dist = -1.0f; + for (uint i = 0; i < size; i++) { + const VectorType& v = vectors[i]; + double dist = v.squared_distance(furthest); + if (dist > opposite_dist) { + opposite_dist = dist; + opposite = v; + } + } + VectorType left_child((furthest + centroid) * .5f); + VectorType right_child((opposite + centroid) * .5f); + if (size > 2) { + const uint N = VectorType::num_elements; + matrix covar; + covar.clear(); + for (uint i = 0; i < size; i++) { + const VectorType& v = vectors[i] - centroid; + const VectorType w = v * (float)weights[i]; + for (uint x = 0; x < N; x++) { + for (uint y = x; y < N; y++) + covar[x][y] = covar[x][y] + v[x] * w[y]; + } + } + float divider = (float)total_weight; + for (uint x = 0; x < N; x++) { + for (uint y = x; y < N; y++) { + covar[x][y] /= divider; + covar[y][x] = covar[x][y]; + } + } + VectorType axis(1.0f); + for (uint iter = 0; iter < 10; iter++) { + VectorType x; + double max_sum = 0; + for (uint i = 0; i < N; i++) { + double sum = 0; + for (uint j = 0; j < N; j++) + sum += axis[j] * covar[i][j]; + x[i] = (float)sum; + max_sum = i ? math::maximum(max_sum, sum) : sum; + } + if (max_sum != 0.0f) + x *= (float)(1.0f / max_sum); + axis = x; + } + axis.normalize(); + VectorType new_left_child(0.0f); + VectorType new_right_child(0.0f); + double left_weight = 0.0f; + double right_weight = 0.0f; + for (uint i = 0; i < size; i++) { + const VectorType& v = vectors[i]; + const float weight = (float)weights[i]; + double t = (v - centroid) * axis; + if (t < 0.0f) { + new_left_child += weightedVectors[i]; + left_weight += weight; + } else { + new_right_child += weightedVectors[i]; + right_weight += weight; + } + } + if ((left_weight > 0.0f) && (right_weight > 0.0f)) { + left_child = new_left_child * (float)(1.0f / left_weight); + right_child = new_right_child * (float)(1.0f / right_weight); + } + } + uint64 left_weight = 0; + uint64 right_weight = 0; + float prev_total_variance = 1e+10f; + float left_variance = 0.0f; + float right_variance = 0.0f; + const uint cMaxLoops = 1024; + for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) { + VectorType new_left_child(cClear); + VectorType new_right_child(cClear); + double left_ttsum = 0.0f; + double right_ttsum = 0.0f; + left_weight = 0; + right_weight = 0; + for (uint i = 0; i < size; i++) { + const VectorType& v = vectors[i]; + double left_dist2 = left_child.squared_distance(v); + double right_dist2 = right_child.squared_distance(v); + if (left_dist2 < right_dist2) { + new_left_child += weightedVectors[i]; + left_ttsum += weightedDotProducts[i]; + left_weight += weights[i]; + } else { + new_right_child += weightedVectors[i]; + right_ttsum += weightedDotProducts[i]; + right_weight += weights[i]; + } + } + if ((!left_weight) || (!right_weight)) + return; + left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); + right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); + new_left_child *= (1.0f / left_weight); + new_right_child *= (1.0f / right_weight); + left_child = new_left_child; + right_child = new_right_child; + float total_variance = left_variance + right_variance; + if (total_variance < .00001f) + break; + if (((prev_total_variance - total_variance) / total_variance) < .00001f) + break; + prev_total_variance = total_variance; + } + result[0] = left_child; + result[1] = right_child; +} + } // namespace crnlib