Optimize DXT color endpoints computation

This change significantly improves the compression speed for DXT encoding.

Explanation:

The main ideas used for the DXT color endpoints computation optimization:
- When the DXT endpoint computation function is called from the qunatization algorithm, almost all of its input parameters (except the color metrics) are hardcoded in the quantization code. This allows to optimize the endpoint evaluation function (which is the bottleneck of the endpoint computation algorithm) for this specific set of parameters.
- In the original version of the evaluation function, selectors are computed each time when a new endpoint is evaluated. While in fact, this is not necessary, because some selector values are never used, so they can be computed lazily, based on the previously determined optimal endpoint values. This approach significantly reduces the amount of computations.

Other improvements:
- The original version of Crunch has a minor bug: the counter for the cached endpoint values does not get initialized. This results in nondeterministic DXT conversion of large textures, as the counter overflow can occur at a random moment. The issue is now fixed in the current branch.

DXT Testing:

The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch (revision ea9b8d8).

[Compressing Kodak set without mipmaps using DXT1 encoding]
Original: 1582222 bytes / 28.893 sec
Modified: 1468204 bytes / 11.882 sec
Improvement: 7.21% (compression ratio) / 58.88% (compression time)

[Compressing Kodak set with mipmaps using DXT1 encoding]
Original: 2065243 bytes / 36.946 sec
Modified: 1914805 bytes / 15.628 sec
Improvement: 7.28% (compression ratio) / 57.70% (compression time)

ETC Testing:

The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). The ETC1 quantization parameters have been selected in such a way, so that ETC1 compression gives approximately the same average Luma PSNR as the corresponding DXT1 compression (which is equal to 34.044 dB for the Kodak test set compressed without mipmaps using DXT1 encoding and default quality settings).

[Compressing Kodak set without mipmaps using ETC1 encoding]
Total size: 1607858 bytes
Total time: 17.352 sec
Average bitrate: 1.363 bpp
Average Luma PSNR: 34.050 dB
This commit is contained in:
Alexander Suvorov
2017-08-11 13:12:44 +02:00
parent bec4114bea
commit 6b3172f793
5 changed files with 253 additions and 566 deletions
Binary file not shown.
+240 -477
View File
File diff suppressed because it is too large Load Diff
+13 -81
View File
@@ -122,9 +122,6 @@ class dxt1_endpoint_optimizer {
m_endpoint_caching(true),
m_use_transparent_indices_for_black(false),
m_force_alpha_blocks(false) {
m_color_weights[0] = 1;
m_color_weights[1] = 1;
m_color_weights[2] = 1;
}
uint m_block_index;
@@ -142,7 +139,6 @@ class dxt1_endpoint_optimizer {
bool m_endpoint_caching;
bool m_use_transparent_indices_for_black;
bool m_force_alpha_blocks;
int m_color_weights[3];
};
struct results {
@@ -162,47 +158,14 @@ class dxt1_endpoint_optimizer {
uint8 m_enforced_selector;
};
struct solution {
solution() {}
solution(const solution& other) {
m_results = other.m_results;
m_selectors = other.m_selectors;
m_results.m_pSelectors = m_selectors.begin();
}
solution& operator=(const solution& rhs) {
if (this == &rhs)
return *this;
m_results = rhs.m_results;
m_selectors = rhs.m_selectors;
m_results.m_pSelectors = m_selectors.begin();
return *this;
}
results m_results;
crnlib::vector<uint8> m_selectors;
inline bool operator<(const solution& other) const {
return m_results.m_error < other.m_results.m_error;
}
static inline bool coords_equal(const solution& lhs, const solution& rhs) {
return (lhs.m_results.m_low_color == rhs.m_results.m_low_color) && (lhs.m_results.m_high_color == rhs.m_results.m_high_color);
}
};
typedef crnlib::vector<solution> solution_vec;
bool compute(const params& p, results& r, solution_vec* pSolutions = NULL);
bool compute(const params& p, results& r);
private:
const params* m_pParams;
results* m_pResults;
solution_vec* m_pSolutions;
bool m_perceptual;
bool m_has_color_weighting;
bool m_evaluate_hc;
typedef crnlib::vector<unique_color> unique_color_vec;
@@ -225,8 +188,6 @@ class dxt1_endpoint_optimizer {
vec3F m_principle_axis;
bool m_all_pixels_grayscale;
crnlib::vector<uint16> m_unique_packed_colors;
crnlib::vector<uint8> m_trial_selectors;
@@ -240,18 +201,15 @@ class dxt1_endpoint_optimizer {
crnlib::vector<vec3I> m_lo_cells;
crnlib::vector<vec3I> m_hi_cells;
uint m_total_evals;
struct potential_solution {
potential_solution()
: m_coords(), m_error(cUINT64_MAX), m_alpha_block(false), m_valid(false) {
: m_coords(), m_error(cUINT64_MAX), m_alpha_block(false) {
}
dxt1_solution_coordinates m_coords;
crnlib::vector<uint8> m_selectors;
uint64 m_error;
bool m_alpha_block;
bool m_valid;
bool m_alternate_rounding;
bool m_enforce_selector;
uint8 m_enforced_selector;
@@ -261,7 +219,6 @@ class dxt1_endpoint_optimizer {
m_selectors.resize(0);
m_error = cUINT64_MAX;
m_alpha_block = false;
m_valid = false;
}
bool are_selectors_all_equal() const {
@@ -283,55 +240,30 @@ class dxt1_endpoint_optimizer {
bool refine_solution(int refinement_level = 0);
bool evaluate_solution(
const dxt1_solution_coordinates& coords,
bool early_out,
potential_solution* pBest_solution,
bool alternate_rounding = false);
bool evaluate_solution(const dxt1_solution_coordinates& coords, bool alternate_rounding = false);
bool evaluate_solution_uber(const dxt1_solution_coordinates& coords, bool alternate_rounding);
bool evaluate_solution_fast(const dxt1_solution_coordinates& coords, bool alternate_rounding);
bool evaluate_solution_hc(const dxt1_solution_coordinates& coords, bool alternate_rounding);
void compute_selectors();
void compute_selectors_hc();
bool evaluate_solution_uber(
potential_solution& solution,
const dxt1_solution_coordinates& coords,
bool early_out,
potential_solution* pBest_solution,
bool alternate_rounding = false);
bool evaluate_solution_fast(
potential_solution& solution,
const dxt1_solution_coordinates& coords,
bool early_out,
potential_solution* pBest_solution,
bool alternate_rounding = false);
void clear();
void find_unique_colors();
bool handle_all_transparent_block();
bool handle_solid_block();
bool handle_multicolor_block();
bool handle_grayscale_block();
void handle_multicolor_block();
void compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def);
void compute_vectors(const vec3F& perceptual_weights);
void return_solution(results& results, const potential_solution& solution);
void return_solution();
void try_combinatorial_encoding();
void optimize_endpoint_comps();
bool optimize_endpoints(vec3F& low_color, vec3F& high_color);
void optimize_endpoints(vec3F& low_color, vec3F& high_color);
bool try_alpha_as_black_optimization();
bool try_average_block_as_solid();
bool try_median4(const vec3F& low_color, const vec3F& high_color);
bool compute_internal(const params& p, results& r, solution_vec* pSolutions);
void compute_internal(const params& p, results& r);
unique_color lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding = 1);
inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha);
static inline vec3F unpack_to_vec3F_raw(uint16 packed_color);
static inline vec3F unpack_to_vec3F(uint16 packed_color);
};
inline void swap(dxt1_endpoint_optimizer::solution& a, dxt1_endpoint_optimizer::solution& b) {
std::swap(a.m_results, b.m_results);
a.m_selectors.swap(b.m_selectors);
}
} // namespace crnlib
-3
View File
@@ -1162,9 +1162,6 @@ void dxt_image::set_block_pixels(
params.m_pPixels = pPixels;
params.m_num_pixels = cDXTBlockSize * cDXTBlockSize;
params.m_endpoint_caching = p.m_endpoint_caching;
params.m_color_weights[0] = p.m_color_weights[0];
params.m_color_weights[1] = p.m_color_weights[1];
params.m_color_weights[2] = p.m_color_weights[2];
if ((m_format != cDXT1) && (m_format != cDXT1A))
params.m_use_alpha_blocks = false;
-5
View File
@@ -120,9 +120,6 @@ class dxt_image {
m_progress_range = 100;
m_use_transparent_indices_for_black = false;
m_pTask_pool = NULL;
m_color_weights[0] = 1;
m_color_weights[1] = 1;
m_color_weights[2] = 1;
}
void init(const crn_comp_params& params) {
@@ -160,8 +157,6 @@ class dxt_image {
uint m_progress_range;
task_pool* m_pTask_pool;
int m_color_weights[3];
};
bool init(dxt_format fmt, const image_u8& img, const pack_params& p = dxt_image::pack_params());