3e12aff909
DXT Testing:
The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch (revision ea9b8d8).
[Compressing Kodak set without mipmaps using DXT1 encoding]
Original: 1582222 bytes / 28.866 sec
Modified: 1468204 bytes / 11.858 sec
Improvement: 7.21% (compression ratio) / 58.92% (compression time)
[Compressing Kodak set with mipmaps using DXT1 encoding]
Original: 2065243 bytes / 36.878 sec
Modified: 1914805 bytes / 15.625 sec
Improvement: 7.28% (compression ratio) / 57.63% (compression time)
ETC Testing:
The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). The ETC1 quantization parameters have been selected in such a way, so that ETC1 compression gives approximately the same average Luma PSNR as the corresponding DXT1 compression (which is equal to 34.044 dB for the Kodak test set compressed without mipmaps using DXT1 encoding and default quality settings).
[Compressing Kodak set without mipmaps using ETC1 encoding]
Total size: 1607858 bytes
Total time: 17.181 sec
Average bitrate: 1.363 bpp
Average Luma PSNR: 34.050 dB
331 lines
9.9 KiB
C++
331 lines
9.9 KiB
C++
// File: crn_threaded_clusterizer.h
|
|
// See Copyright Notice and license at the end of inc/crnlib.h
|
|
#pragma once
|
|
#include "crn_clusterizer.h"
|
|
#include "crn_threading.h"
|
|
|
|
namespace crnlib {
|
|
template <typename VectorType>
|
|
class threaded_clusterizer {
|
|
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer);
|
|
|
|
public:
|
|
threaded_clusterizer(task_pool& tp)
|
|
: m_pTask_pool(&tp),
|
|
m_pProgress_callback(NULL),
|
|
m_pProgress_callback_data(NULL),
|
|
m_canceled(false) {
|
|
}
|
|
|
|
void clear() {
|
|
for (uint i = 0; i < cMaxClusterizers; i++)
|
|
m_clusterizers[i].clear();
|
|
}
|
|
|
|
struct weighted_vec {
|
|
weighted_vec() {}
|
|
weighted_vec(const VectorType& v, uint w)
|
|
: m_vec(v), m_weight(w) {}
|
|
|
|
VectorType m_vec;
|
|
uint m_weight;
|
|
};
|
|
typedef crnlib::vector<weighted_vec> weighted_vec_array;
|
|
|
|
typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data);
|
|
|
|
bool create_clusters(
|
|
const weighted_vec_array& weighted_vecs,
|
|
uint max_clusters, crnlib::vector<crnlib::vector<uint> >& cluster_indices,
|
|
progress_callback_func pProgress_callback,
|
|
void* pProgress_callback_data) {
|
|
m_main_thread_id = crn_get_current_thread_id();
|
|
m_canceled = false;
|
|
m_pProgress_callback = pProgress_callback;
|
|
m_pProgress_callback_data = pProgress_callback_data;
|
|
|
|
if (max_clusters >= 128) {
|
|
crnlib::vector<uint> primary_indices(weighted_vecs.size());
|
|
for (uint i = 0; i < weighted_vecs.size(); i++)
|
|
primary_indices[i] = i;
|
|
|
|
CRNLIB_ASSUME(cMaxClusterizers == 4);
|
|
|
|
crnlib::vector<uint> indices[6];
|
|
|
|
compute_split(weighted_vecs, primary_indices, indices[0], indices[1]);
|
|
compute_split(weighted_vecs, indices[0], indices[2], indices[3]);
|
|
compute_split(weighted_vecs, indices[1], indices[4], indices[5]);
|
|
|
|
create_clusters_task_state task_state[4];
|
|
|
|
m_cluster_task_displayed_progress = false;
|
|
|
|
uint total_partitions = 0;
|
|
for (uint i = 0; i < 4; i++) {
|
|
const uint num_indices = indices[2 + i].size();
|
|
if (num_indices)
|
|
total_partitions++;
|
|
}
|
|
|
|
for (uint i = 0; i < 4; i++) {
|
|
const uint num_indices = indices[2 + i].size();
|
|
if (!num_indices)
|
|
continue;
|
|
|
|
task_state[i].m_pWeighted_vecs = &weighted_vecs;
|
|
task_state[i].m_pIndices = &indices[2 + i];
|
|
task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions;
|
|
|
|
m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]);
|
|
}
|
|
|
|
m_pTask_pool->join();
|
|
|
|
if (m_canceled)
|
|
return false;
|
|
|
|
uint total_clusters = 0;
|
|
for (uint i = 0; i < 4; i++)
|
|
total_clusters += task_state[i].m_cluster_indices.size();
|
|
|
|
cluster_indices.reserve(total_clusters);
|
|
cluster_indices.resize(0);
|
|
|
|
for (uint i = 0; i < 4; i++) {
|
|
const uint ofs = cluster_indices.size();
|
|
|
|
cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size());
|
|
|
|
for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++) {
|
|
cluster_indices[ofs + j].swap(task_state[i].m_cluster_indices[j]);
|
|
}
|
|
}
|
|
} else {
|
|
m_clusterizers[0].clear();
|
|
m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size());
|
|
|
|
for (uint i = 0; i < weighted_vecs.size(); i++) {
|
|
const weighted_vec& v = weighted_vecs[i];
|
|
|
|
m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight);
|
|
}
|
|
|
|
m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false); //m_params.m_dxt_quality <= cCRNDXTQualityFast);
|
|
|
|
const uint num_clusters = m_clusterizers[0].get_codebook_size();
|
|
|
|
m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices);
|
|
}
|
|
|
|
return !m_canceled;
|
|
}
|
|
|
|
private:
|
|
task_pool* m_pTask_pool;
|
|
|
|
crn_thread_id_t m_main_thread_id;
|
|
|
|
struct create_clusters_task_state {
|
|
create_clusters_task_state()
|
|
: m_pWeighted_vecs(NULL), m_pIndices(NULL), m_max_clusters(0) {
|
|
}
|
|
|
|
const weighted_vec_array* m_pWeighted_vecs;
|
|
crnlib::vector<uint>* m_pIndices;
|
|
crnlib::vector<crnlib::vector<uint> > m_cluster_indices;
|
|
uint m_max_clusters;
|
|
};
|
|
|
|
typedef clusterizer<VectorType> vector_clusterizer;
|
|
|
|
enum { cMaxClusterizers = 4 };
|
|
vector_clusterizer m_clusterizers[cMaxClusterizers];
|
|
bool m_cluster_task_displayed_progress;
|
|
|
|
progress_callback_func m_pProgress_callback;
|
|
void* m_pProgress_callback_data;
|
|
bool m_canceled;
|
|
|
|
static bool generate_codebook_progress_callback(uint percentage_completed, void* pData) {
|
|
threaded_clusterizer* pClusterizer = static_cast<threaded_clusterizer*>(pData);
|
|
|
|
if (!pClusterizer->m_pProgress_callback)
|
|
return true;
|
|
|
|
if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data)) {
|
|
pClusterizer->m_canceled = true;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector<uint>& indices) {
|
|
const uint N = VectorType::num_elements;
|
|
|
|
VectorType centroid(0.0f);
|
|
double total_weight = 0.0f;
|
|
for (uint i = 0; i < indices.size(); i++) {
|
|
const weighted_vec& v = vecs[indices[i]];
|
|
centroid += v.m_vec * static_cast<float>(v.m_weight);
|
|
total_weight += v.m_weight;
|
|
}
|
|
|
|
if (total_weight == 0.0f) {
|
|
axis_res.clear();
|
|
centroid_res = centroid;
|
|
return;
|
|
}
|
|
|
|
double one_over_total_weight = 1.0f / total_weight;
|
|
for (uint i = 0; i < N; i++)
|
|
centroid[i] = static_cast<float>(centroid[i] * one_over_total_weight);
|
|
|
|
matrix<N, N, float> covar;
|
|
covar.clear();
|
|
|
|
for (uint i = 0; i < indices.size(); i++) {
|
|
const weighted_vec& weighted_vec = vecs[indices[i]];
|
|
|
|
const VectorType v(weighted_vec.m_vec - centroid);
|
|
const VectorType w(v * static_cast<float>(weighted_vec.m_weight));
|
|
|
|
for (uint x = 0; x < N; x++)
|
|
for (uint y = x; y < N; y++)
|
|
covar[x][y] = covar[x][y] + v[x] * w[y];
|
|
}
|
|
|
|
for (uint x = 0; x < N; x++)
|
|
for (uint y = x; y < N; y++)
|
|
covar[x][y] = static_cast<float>(covar[x][y] * one_over_total_weight);
|
|
|
|
for (uint x = 0; x < (N - 1); x++)
|
|
for (uint y = x + 1; y < N; y++)
|
|
covar[y][x] = covar[x][y];
|
|
|
|
VectorType axis;
|
|
for (uint i = 0; i < N; i++)
|
|
axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1)));
|
|
|
|
VectorType prev_axis(axis);
|
|
|
|
const uint cMaxIterations = 10;
|
|
for (uint iter = 0; iter < cMaxIterations; iter++) {
|
|
VectorType x;
|
|
|
|
double max_sum = 0;
|
|
|
|
for (uint i = 0; i < N; i++) {
|
|
double sum = 0;
|
|
|
|
for (uint j = 0; j < N; j++)
|
|
sum += axis[j] * covar[i][j];
|
|
|
|
x[i] = static_cast<float>(sum);
|
|
|
|
max_sum = math::maximum(max_sum, fabs(sum));
|
|
}
|
|
|
|
if (max_sum != 0.0f)
|
|
x *= static_cast<float>(1.0f / max_sum);
|
|
|
|
VectorType delta_axis(prev_axis - x);
|
|
|
|
prev_axis = axis;
|
|
axis = x;
|
|
|
|
if (delta_axis.norm() < .0025f)
|
|
break;
|
|
}
|
|
|
|
axis.normalize();
|
|
|
|
axis_res = axis;
|
|
centroid_res = centroid;
|
|
}
|
|
|
|
void compute_division(
|
|
const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector<uint>& indices,
|
|
vector<uint>& left_indices,
|
|
vector<uint>& right_indices) {
|
|
left_indices.resize(0);
|
|
right_indices.resize(0);
|
|
|
|
for (uint i = 0; i < indices.size(); i++) {
|
|
const uint vec_index = indices[i];
|
|
const VectorType v(vecs[vec_index].m_vec - centroid);
|
|
|
|
float t = v * axis;
|
|
if (t < 0.0f)
|
|
left_indices.push_back(vec_index);
|
|
else
|
|
right_indices.push_back(vec_index);
|
|
}
|
|
}
|
|
|
|
void compute_split(
|
|
const weighted_vec_array& vecs, const vector<uint>& indices,
|
|
vector<uint>& left_indices,
|
|
vector<uint>& right_indices) {
|
|
VectorType axis, centroid;
|
|
compute_pca(axis, centroid, vecs, indices);
|
|
|
|
compute_division(axis, centroid, vecs, indices, left_indices, right_indices);
|
|
}
|
|
|
|
static bool generate_codebook_dummy_progress_callback(uint, void* pData) {
|
|
if (static_cast<threaded_clusterizer*>(pData)->m_canceled)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void create_clusters_task(uint64 data, void* pData_ptr) {
|
|
if (m_canceled)
|
|
return;
|
|
|
|
const uint partition_index = static_cast<uint>(data);
|
|
create_clusters_task_state& state = *static_cast<create_clusters_task_state*>(pData_ptr);
|
|
|
|
m_clusterizers[partition_index].clear();
|
|
|
|
for (uint i = 0; i < state.m_pIndices->size(); i++) {
|
|
const uint index = (*state.m_pIndices)[i];
|
|
const weighted_vec& v = (*state.m_pWeighted_vecs)[index];
|
|
|
|
m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight);
|
|
}
|
|
|
|
if (m_canceled)
|
|
return;
|
|
|
|
const bool is_main_thread = (crn_get_current_thread_id() == m_main_thread_id);
|
|
|
|
const bool quick = false;
|
|
m_clusterizers[partition_index].generate_codebook(
|
|
state.m_max_clusters,
|
|
(is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback,
|
|
this,
|
|
quick);
|
|
|
|
if (is_main_thread)
|
|
m_cluster_task_displayed_progress = true;
|
|
|
|
if (m_canceled)
|
|
return;
|
|
|
|
const uint num_clusters = m_clusterizers[partition_index].get_codebook_size();
|
|
|
|
m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices);
|
|
|
|
for (uint i = 0; i < state.m_cluster_indices.size(); i++) {
|
|
crnlib::vector<uint>& indices = state.m_cluster_indices[i];
|
|
|
|
for (uint j = 0; j < indices.size(); j++)
|
|
indices[j] = (*state.m_pIndices)[indices[j]];
|
|
}
|
|
}
|
|
};
|
|
|
|
} // namespace crnlib
|