13b1faa48d
This change slightly improves compression ratio and compression time. Explanation: The efficiency of the Crunch encoding scheme depends on the similarity between the neighbour chunks. For this reason in original version of Crunch the order of chunks is reversed after each scanline, so that there is no jump from one side of the image to another at the image borders. The problem here is that inside of each chunk, the blocks are normally ordered in a usual up-to-down-left-to-right manner, regardless of the chunk scanning order. While on the forward scan we normally need to perform diagonal jumps (+1, +1) in order to get to the next chunk, on the reverse scan we normally need to perform much larger (-3, +1) jumps, which usually defeats the advantage of not having discontinuity at the image borders. Note: This modification alters the output format and makes it incompatible with the previous revisions. Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch. [Compressing Kodak set without mipmaps] Original: 1582222 bytes / 28.882 sec Modified: 1579618 bytes / 28.743 sec Improvement: 0.16% (compression ratio) / 0.48% (compression time) [Compressing Kodak set with mipmaps] Original: 2065243 bytes / 36.920 sec Modified: 2061499 bytes / 36.833 sec Improvement: 0.18% (compression ratio) / 0.24% (compression time)
429 lines
16 KiB
C++
429 lines
16 KiB
C++
// File: crn_dxt_hc.h
|
|
// See Copyright Notice and license at the end of inc/crnlib.h
|
|
#pragma once
|
|
#include "crn_dxt1.h"
|
|
#include "crn_dxt5a.h"
|
|
#include "crn_dxt_endpoint_refiner.h"
|
|
#include "crn_image.h"
|
|
#include "crn_dxt.h"
|
|
#include "crn_image.h"
|
|
#include "crn_dxt_hc_common.h"
|
|
#include "crn_tree_clusterizer.h"
|
|
#include "crn_threading.h"
|
|
|
|
#define CRN_NO_FUNCTION_DEFINITIONS
|
|
#include "../inc/crnlib.h"
|
|
|
|
namespace crnlib {
|
|
const uint cTotalCompressionPhases = 25;
|
|
|
|
class dxt_hc {
|
|
public:
|
|
dxt_hc();
|
|
~dxt_hc();
|
|
|
|
struct pixel_chunk {
|
|
pixel_chunk() { clear(); }
|
|
|
|
dxt_pixel_block m_blocks[cChunkBlockHeight][cChunkBlockWidth];
|
|
|
|
const color_quad_u8& operator()(uint cx, uint cy) const {
|
|
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
|
|
|
|
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
|
|
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
|
|
}
|
|
|
|
color_quad_u8& operator()(uint cx, uint cy) {
|
|
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
|
|
|
|
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
|
|
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
|
|
}
|
|
|
|
inline void clear() {
|
|
utils::zero_object(*this);
|
|
m_weight = 1.0f;
|
|
}
|
|
|
|
float m_weight;
|
|
uint m_legacy_index;
|
|
};
|
|
|
|
typedef crnlib::vector<pixel_chunk> pixel_chunk_vec;
|
|
|
|
struct params {
|
|
params()
|
|
: m_color_endpoint_codebook_size(3072),
|
|
m_color_selector_codebook_size(3072),
|
|
m_alpha_endpoint_codebook_size(3072),
|
|
m_alpha_selector_codebook_size(3072),
|
|
m_adaptive_tile_color_psnr_derating(2.0f), // was 3.4f
|
|
m_adaptive_tile_alpha_psnr_derating(2.0f),
|
|
m_adaptive_tile_color_alpha_weighting_ratio(3.0f),
|
|
m_num_levels(0),
|
|
m_format(cDXT1),
|
|
m_hierarchical(true),
|
|
m_perceptual(true),
|
|
m_debugging(false),
|
|
m_pProgress_func(NULL),
|
|
m_pProgress_func_data(NULL) {
|
|
m_alpha_component_indices[0] = 3;
|
|
m_alpha_component_indices[1] = 0;
|
|
|
|
for (uint i = 0; i < cCRNMaxLevels; i++) {
|
|
m_levels[i].m_first_chunk = 0;
|
|
m_levels[i].m_num_chunks = 0;
|
|
}
|
|
}
|
|
|
|
// Valid range for codebook sizes: [32,8192] (non-power of two values are okay)
|
|
uint m_color_endpoint_codebook_size;
|
|
uint m_color_selector_codebook_size;
|
|
|
|
uint m_alpha_endpoint_codebook_size;
|
|
uint m_alpha_selector_codebook_size;
|
|
|
|
// Higher values cause fewer 8x4, 4x8, and 4x4 blocks to be utilized less often (lower quality/smaller files).
|
|
// Lower values cause the encoder to use large tiles less often (better quality/larger files).
|
|
// Valid range: [0.0,100.0].
|
|
// A value of 0 will cause the encoder to only use tiles larger than 4x4 if doing so would incur to quality loss.
|
|
float m_adaptive_tile_color_psnr_derating;
|
|
|
|
float m_adaptive_tile_alpha_psnr_derating;
|
|
|
|
float m_adaptive_tile_color_alpha_weighting_ratio;
|
|
|
|
uint m_alpha_component_indices[2];
|
|
|
|
struct miplevel_desc {
|
|
uint m_first_chunk;
|
|
uint m_num_chunks;
|
|
};
|
|
// The mip level data is optional!
|
|
miplevel_desc m_levels[cCRNMaxLevels];
|
|
uint m_num_levels;
|
|
|
|
dxt_format m_format;
|
|
|
|
// If m_hierarchical is false, only 4x4 blocks will be used by the encoder (leading to higher quality/larger files).
|
|
bool m_hierarchical;
|
|
|
|
// If m_perceptual is true, perceptual color metrics will be used by the encoder.
|
|
bool m_perceptual;
|
|
|
|
bool m_debugging;
|
|
|
|
crn_progress_callback_func m_pProgress_func;
|
|
void* m_pProgress_func_data;
|
|
};
|
|
|
|
void clear();
|
|
|
|
// Main compression function
|
|
bool compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool);
|
|
|
|
// Output accessors
|
|
inline uint get_num_chunks() const { return m_num_chunks; }
|
|
|
|
struct chunk_encoding {
|
|
chunk_encoding() { utils::zero_object(*this); };
|
|
|
|
// Index into g_chunk_encodings.
|
|
uint8 m_encoding_index;
|
|
|
|
// Number of tiles, endpoint indices.
|
|
uint8 m_num_tiles;
|
|
|
|
// Color, alpha0, alpha1
|
|
enum { cColorIndex = 0,
|
|
cAlpha0Index = 1,
|
|
cAlpha1Index = 2 };
|
|
uint16 m_endpoint_indices[3][cChunkMaxTiles];
|
|
uint16 m_selector_indices[3][cChunkBlockHeight][cChunkBlockWidth]; // [block_y][block_x]
|
|
};
|
|
|
|
typedef crnlib::vector<chunk_encoding> chunk_encoding_vec;
|
|
|
|
inline const chunk_encoding& get_chunk_encoding(uint chunk_index) const { return m_chunk_encoding[chunk_index]; }
|
|
inline const chunk_encoding_vec& get_chunk_encoding_vec() const { return m_chunk_encoding; }
|
|
|
|
struct selectors {
|
|
selectors() { utils::zero_object(*this); }
|
|
|
|
uint8 m_selectors[cBlockPixelHeight][cBlockPixelWidth];
|
|
|
|
uint8 get_by_index(uint i) const {
|
|
CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight));
|
|
const uint8* p = (const uint8*)m_selectors;
|
|
return *(p + i);
|
|
}
|
|
void set_by_index(uint i, uint v) {
|
|
CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight));
|
|
uint8* p = (uint8*)m_selectors;
|
|
*(p + i) = static_cast<uint8>(v);
|
|
}
|
|
};
|
|
typedef crnlib::vector<selectors> selectors_vec;
|
|
|
|
// Color endpoints
|
|
inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); }
|
|
inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; }
|
|
const crnlib::vector<uint>& get_color_endpoint_vec() const { return m_color_endpoints; }
|
|
|
|
// Color selectors
|
|
uint get_color_selector_codebook_size() const { return m_color_selectors.size(); }
|
|
const selectors& get_color_selectors(uint codebook_index) const { return m_color_selectors[codebook_index]; }
|
|
const crnlib::vector<selectors>& get_color_selectors_vec() const { return m_color_selectors; }
|
|
|
|
// Alpha endpoints
|
|
inline uint get_alpha_endpoint_codebook_size() const { return m_alpha_endpoints.size(); }
|
|
inline uint get_alpha_endpoint(uint codebook_index) const { return m_alpha_endpoints[codebook_index]; }
|
|
const crnlib::vector<uint>& get_alpha_endpoint_vec() const { return m_alpha_endpoints; }
|
|
|
|
// Alpha selectors
|
|
uint get_alpha_selector_codebook_size() const { return m_alpha_selectors.size(); }
|
|
const selectors& get_alpha_selectors(uint codebook_index) const { return m_alpha_selectors[codebook_index]; }
|
|
const crnlib::vector<selectors>& get_alpha_selectors_vec() const { return m_alpha_selectors; }
|
|
|
|
// Debug images
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels() const { return m_dbg_chunk_pixels; }
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_tile_vis() const { return m_dbg_chunk_pixels_tile_vis; }
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_color_quantized() const { return m_dbg_chunk_pixels_color_quantized; }
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_alpha_quantized() const { return m_dbg_chunk_pixels_alpha_quantized; }
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_final() const { return m_dbg_chunk_pixels_final; }
|
|
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_orig_color_selectors() const { return m_dbg_chunk_pixels_orig_color_selectors; }
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_quantized_color_selectors() const { return m_dbg_chunk_pixels_quantized_color_selectors; }
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_final_color_selectors() const { return m_dbg_chunk_pixels_final_color_selectors; }
|
|
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_orig_alpha_selectors() const { return m_dbg_chunk_pixels_orig_alpha_selectors; }
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_quantized_alpha_selectors() const { return m_dbg_chunk_pixels_quantized_alpha_selectors; }
|
|
const pixel_chunk_vec& get_compressed_chunk_pixels_final_alpha_selectors() const { return m_dbg_chunk_pixels_final_alpha_selectors; }
|
|
|
|
static void create_debug_image_from_chunks(uint num_chunks_x, uint num_chunks_y, const pixel_chunk_vec& chunks, const chunk_encoding_vec* pChunk_encodings, image_u8& img, bool serpentine_scan, int comp_index = -1);
|
|
|
|
private:
|
|
params m_params;
|
|
|
|
uint m_num_chunks;
|
|
const pixel_chunk* m_pChunks;
|
|
|
|
chunk_encoding_vec m_chunk_encoding;
|
|
|
|
uint m_num_alpha_blocks; // 0, 1, or 2
|
|
bool m_has_color_blocks;
|
|
bool m_has_alpha0_blocks;
|
|
bool m_has_alpha1_blocks;
|
|
|
|
struct compressed_tile {
|
|
uint m_endpoint_cluster_index;
|
|
uint m_first_endpoint;
|
|
uint m_second_endpoint;
|
|
|
|
uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight];
|
|
|
|
void set_selector(uint x, uint y, uint s) {
|
|
CRNLIB_ASSERT((x < m_pixel_width) && (y < m_pixel_height));
|
|
m_selectors[x + y * m_pixel_width] = static_cast<uint8>(s);
|
|
}
|
|
|
|
uint get_selector(uint x, uint y) const {
|
|
CRNLIB_ASSERT((x < m_pixel_width) && (y < m_pixel_height));
|
|
return m_selectors[x + y * m_pixel_width];
|
|
}
|
|
|
|
uint8 m_pixel_width;
|
|
uint8 m_pixel_height;
|
|
|
|
uint8 m_layout_index;
|
|
|
|
bool m_alpha_encoding;
|
|
};
|
|
|
|
struct compressed_chunk {
|
|
compressed_chunk() { utils::zero_object(*this); }
|
|
|
|
uint8 m_encoding_index;
|
|
|
|
uint8 m_num_tiles;
|
|
|
|
compressed_tile m_tiles[cChunkMaxTiles];
|
|
compressed_tile m_quantized_tiles[cChunkMaxTiles];
|
|
|
|
uint16 m_endpoint_cluster_index[cChunkMaxTiles];
|
|
uint16 m_selector_cluster_index[cChunkBlockHeight][cChunkBlockWidth];
|
|
};
|
|
|
|
typedef crnlib::vector<compressed_chunk> compressed_chunk_vec;
|
|
enum {
|
|
cColorChunks = 0,
|
|
cAlpha0Chunks = 1,
|
|
cAlpha1Chunks = 2,
|
|
|
|
cNumCompressedChunkVecs = 3
|
|
};
|
|
compressed_chunk_vec m_compressed_chunks[cNumCompressedChunkVecs];
|
|
|
|
volatile atomic32_t m_encoding_hist[cNumChunkEncodings];
|
|
|
|
atomic32_t m_total_tiles;
|
|
|
|
void compress_dxt1_block(
|
|
dxt1_endpoint_optimizer::results& results,
|
|
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
|
|
uint8* pSelectors);
|
|
|
|
void compress_dxt5_block(
|
|
dxt5_endpoint_optimizer::results& results,
|
|
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
|
|
uint8* pAlpha_selectors);
|
|
|
|
void determine_compressed_chunks_task(uint64 data, void* pData_ptr);
|
|
bool determine_compressed_chunks();
|
|
|
|
struct tile_cluster {
|
|
tile_cluster()
|
|
: m_first_endpoint(0), m_second_endpoint(0), m_error(0), m_alpha_encoding(false) {}
|
|
|
|
// first = chunk, second = tile
|
|
// if an alpha tile, second's upper 16 bits contains the alpha index (0 or 1)
|
|
crnlib::vector<std::pair<uint, uint> > m_tiles;
|
|
|
|
uint m_first_endpoint;
|
|
uint m_second_endpoint;
|
|
uint64 m_error;
|
|
|
|
bool m_alpha_encoding;
|
|
};
|
|
|
|
typedef crnlib::vector<tile_cluster> tile_cluster_vec;
|
|
|
|
tile_cluster_vec m_color_clusters;
|
|
tile_cluster_vec m_alpha_clusters;
|
|
|
|
selectors_vec m_color_selectors;
|
|
selectors_vec m_alpha_selectors;
|
|
|
|
// For each selector, this array indicates every chunk/tile/tile block that use this color selector.
|
|
struct block_id {
|
|
block_id() { utils::zero_object(*this); }
|
|
|
|
block_id(uint chunk_index, uint alpha_index, uint tile_index, uint block_x, uint block_y)
|
|
: m_chunk_index(chunk_index), m_alpha_index((uint8)alpha_index), m_tile_index((uint8)tile_index), m_block_x((uint8)block_x), m_block_y((uint8)block_y) {}
|
|
|
|
uint m_chunk_index;
|
|
uint8 m_alpha_index;
|
|
uint8 m_tile_index;
|
|
uint8 m_block_x;
|
|
uint8 m_block_y;
|
|
};
|
|
|
|
typedef crnlib::vector<crnlib::vector<block_id> > chunk_blocks_using_selectors_vec;
|
|
chunk_blocks_using_selectors_vec m_chunk_blocks_using_color_selectors;
|
|
chunk_blocks_using_selectors_vec m_chunk_blocks_using_alpha_selectors; // second's upper 16 bits contain alpha index!
|
|
|
|
crnlib::vector<uint> m_color_endpoints; // not valid until end, only for user access
|
|
crnlib::vector<uint> m_alpha_endpoints; // not valid until end, only for user access
|
|
|
|
// Debugging
|
|
pixel_chunk_vec m_dbg_chunk_pixels;
|
|
pixel_chunk_vec m_dbg_chunk_pixels_tile_vis;
|
|
pixel_chunk_vec m_dbg_chunk_pixels_color_quantized;
|
|
pixel_chunk_vec m_dbg_chunk_pixels_alpha_quantized;
|
|
|
|
pixel_chunk_vec m_dbg_chunk_pixels_orig_color_selectors;
|
|
pixel_chunk_vec m_dbg_chunk_pixels_quantized_color_selectors;
|
|
pixel_chunk_vec m_dbg_chunk_pixels_final_color_selectors;
|
|
|
|
pixel_chunk_vec m_dbg_chunk_pixels_orig_alpha_selectors;
|
|
pixel_chunk_vec m_dbg_chunk_pixels_quantized_alpha_selectors;
|
|
pixel_chunk_vec m_dbg_chunk_pixels_final_alpha_selectors;
|
|
|
|
pixel_chunk_vec m_dbg_chunk_pixels_final;
|
|
|
|
crn_thread_id_t m_main_thread_id;
|
|
bool m_canceled;
|
|
task_pool* m_pTask_pool;
|
|
|
|
int m_prev_phase_index;
|
|
int m_prev_percentage_complete;
|
|
|
|
typedef vec<6, float> vec6F;
|
|
typedef vec<16, float> vec16F;
|
|
typedef tree_clusterizer<vec2F> vec2F_tree_vq;
|
|
typedef tree_clusterizer<vec6F> vec6F_tree_vq;
|
|
typedef tree_clusterizer<vec16F> vec16F_tree_vq;
|
|
|
|
struct assign_color_endpoint_clusters_state {
|
|
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(assign_color_endpoint_clusters_state);
|
|
|
|
assign_color_endpoint_clusters_state(vec6F_tree_vq& vq, crnlib::vector<crnlib::vector<vec6F> >& training_vecs)
|
|
: m_vq(vq), m_training_vecs(training_vecs) {}
|
|
|
|
vec6F_tree_vq& m_vq;
|
|
crnlib::vector<crnlib::vector<vec6F> >& m_training_vecs;
|
|
};
|
|
|
|
struct create_selector_codebook_state {
|
|
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(create_selector_codebook_state);
|
|
|
|
create_selector_codebook_state(dxt_hc& hc, bool alpha_blocks, uint comp_index_start, uint comp_index_end, vec16F_tree_vq& selector_vq, chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors, selectors_vec& selectors_cb)
|
|
: m_hc(hc),
|
|
m_alpha_blocks(alpha_blocks),
|
|
m_comp_index_start(comp_index_start),
|
|
m_comp_index_end(comp_index_end),
|
|
m_selector_vq(selector_vq),
|
|
m_chunk_blocks_using_selectors(chunk_blocks_using_selectors),
|
|
m_selectors_cb(selectors_cb) {
|
|
}
|
|
|
|
dxt_hc& m_hc;
|
|
bool m_alpha_blocks;
|
|
uint m_comp_index_start;
|
|
uint m_comp_index_end;
|
|
vec16F_tree_vq& m_selector_vq;
|
|
chunk_blocks_using_selectors_vec& m_chunk_blocks_using_selectors;
|
|
selectors_vec& m_selectors_cb;
|
|
|
|
mutable spinlock m_chunk_blocks_using_selectors_lock;
|
|
};
|
|
|
|
void assign_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
|
|
bool determine_color_endpoint_clusters();
|
|
|
|
struct determine_alpha_endpoint_clusters_state {
|
|
vec2F_tree_vq m_vq;
|
|
crnlib::vector<crnlib::vector<vec2F> > m_training_vecs[2];
|
|
};
|
|
|
|
void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
|
|
bool determine_alpha_endpoint_clusters();
|
|
|
|
void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr);
|
|
bool determine_color_endpoint_codebook();
|
|
|
|
void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr);
|
|
bool determine_alpha_endpoint_codebook();
|
|
|
|
void create_quantized_debug_images();
|
|
|
|
void create_selector_codebook_task(uint64 data, void* pData_ptr);
|
|
bool create_selector_codebook(bool alpha_blocks);
|
|
|
|
bool refine_quantized_color_endpoints();
|
|
bool refine_quantized_color_selectors();
|
|
bool refine_quantized_alpha_endpoints();
|
|
bool refine_quantized_alpha_selectors();
|
|
void create_final_debug_image();
|
|
bool create_chunk_encodings();
|
|
bool update_progress(uint phase_index, uint subphase_index, uint subphase_total);
|
|
bool compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks);
|
|
};
|
|
|
|
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::pixel_chunk);
|
|
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::chunk_encoding);
|
|
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors);
|
|
|
|
} // namespace crnlib
|