diff --git a/bin/crunch_x64.exe b/bin/crunch_x64.exe index 17aa1d9..0f1b437 100644 Binary files a/bin/crunch_x64.exe and b/bin/crunch_x64.exe differ diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp index 11a4385..fdf3715 100644 --- a/crnlib/crn_dxt_hc.cpp +++ b/crnlib/crn_dxt_hc.cpp @@ -629,10 +629,36 @@ void dxt_hc::determine_color_endpoint_codebook_task_etc(uint64 data, void*) { void dxt_hc::determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr) { tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; + const crnlib::vector& codebook = vq->get_codebook(); uint num_tasks = m_pTask_pool->get_num_threads() + 1; for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) { - if (m_tiles[t].pixels.size()) - m_tiles[t].cluster_indices[cColor] = vq->find_best_codebook_entry_fs(m_tiles[t].color_endpoint); + if (m_tiles[t].pixels.size()) { + const vec6F& v = m_tiles[t].color_endpoint; + float node_dist = codebook[vq->get_node_index(v)].squared_distance(v); + float best_dist = math::cNearlyInfinite; + uint best_index = 0; + for (uint i = 0; i < codebook.size(); i++) { + const vec6F& c = codebook[i]; + float dist = 0; + dist += (c[0] - v[0]) * (c[0] - v[0]); + dist += (c[1] - v[1]) * (c[1] - v[1]); + if (dist > node_dist) + continue; + dist += (c[2] - v[2]) * (c[2] - v[2]); + dist += (c[3] - v[3]) * (c[3] - v[3]); + if (dist > node_dist) + continue; + dist += (c[4] - v[4]) * (c[4] - v[4]); + dist += (c[5] - v[5]) * (c[5] - v[5]); + if (dist < best_dist) { + best_dist = dist; + best_index = i; + if (best_dist == 0.0f) + break; + } + } + m_tiles[t].cluster_indices[cColor] = best_index; + } } } @@ -643,7 +669,7 @@ void dxt_hc::determine_color_endpoints() { vq.add_training_vec(m_tiles[t].color_endpoint, (uint)(m_tiles[t].pixels.size() * m_tiles[t].weight)); } - vq.generate_codebook(math::minimum(m_num_tiles, m_params.m_color_endpoint_codebook_size)); + vq.generate_codebook(math::minimum(m_num_tiles, m_params.m_color_endpoint_codebook_size), true); m_color_clusters.resize(vq.get_codebook_size()); for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) @@ -773,11 +799,25 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void*) { void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) { tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; + const crnlib::vector& codebook = vq->get_codebook(); uint num_tasks = m_pTask_pool->get_num_threads() + 1; for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) { if (m_tiles[t].pixels.size()) { - for (uint a = 0; a < m_num_alpha_blocks; a++) - m_tiles[t].cluster_indices[cAlpha0 + a] = vq->find_best_codebook_entry_fs(m_tiles[t].alpha_endpoints[a]); + for (uint a = 0; a < m_num_alpha_blocks; a++) { + const vec2F& v = m_tiles[t].alpha_endpoints[a]; + float best_dist = math::cNearlyInfinite; + uint best_index = 0; + for (uint i = 0; i < codebook.size(); i++) { + float dist = (codebook[i][0] - v[0]) * (codebook[i][0] - v[0]) + (codebook[i][1] - v[1]) * (codebook[i][1] - v[1]); + if (dist < best_dist) { + best_dist = dist; + best_index = i; + if (best_dist == 0.0f) + break; + } + } + m_tiles[t].cluster_indices[cAlpha0 + a] = best_index; + } } } } diff --git a/crnlib/crn_tree_clusterizer.h b/crnlib/crn_tree_clusterizer.h index 94b8580..e543424 100644 --- a/crnlib/crn_tree_clusterizer.h +++ b/crnlib/crn_tree_clusterizer.h @@ -20,13 +20,14 @@ class tree_clusterizer { m_vectors.clear(); m_codebook.clear(); m_nodes.clear(); + m_node_index_map.clear(); } void add_training_vec(const VectorType& v, uint weight) { m_hist.push_back(std::make_pair(v, weight)); } - bool generate_codebook(uint max_size) { + bool generate_codebook(uint max_size, bool generate_node_index_map = false) { if (m_hist.empty()) return false; @@ -115,11 +116,20 @@ class tree_clusterizer { node.m_codebook_index = m_codebook.size(); m_codebook.push_back(node.m_centroid); + + if (generate_node_index_map) { + for (uint j = 0; j < node.m_vectors.size(); j++) + m_node_index_map.insert(std::make_pair(m_vectors[node.m_vectors[j].index], node.m_codebook_index)); + } } return true; } + inline uint get_node_index(const VectorType& v) { + return m_node_index_map.find(v)->second; + } + inline uint get_codebook_size() const { return m_codebook.size(); } @@ -133,23 +143,6 @@ class tree_clusterizer { return m_codebook; } - uint find_best_codebook_entry_fs(const VectorType& v) const { - float best_dist = math::cNearlyInfinite; - uint best_index = 0; - - for (uint i = 0; i < m_codebook.size(); i++) { - float dist = m_codebook[i].squared_distance(v); - if (dist < best_dist) { - best_dist = dist; - best_index = i; - if (best_dist == 0.0f) - break; - } - } - - return best_index; - } - private: crnlib::vector > m_hist; @@ -157,6 +150,7 @@ class tree_clusterizer { crnlib::vector m_weightedVectors; crnlib::vector m_left_children_indices; crnlib::vector m_right_children_indices; + crnlib::hash_map m_node_index_map; struct vq_node { vq_node() diff --git a/crnlib/crn_vec.h b/crnlib/crn_vec.h index c4199bc..9dcbc54 100644 --- a/crnlib/crn_vec.h +++ b/crnlib/crn_vec.h @@ -206,6 +206,10 @@ class vec : public helpers::rel_ops > { return m_s[i]; } + inline operator size_t() const { + return (size_t)fast_hash(this, sizeof(*this)); + } + inline T get_x(void) const { return m_s[0]; } inline T get_y(void) const { CRNLIB_ASSUME(N >= 2);