Use XOR-deltas for selector codebook encoding
This change improves compression ratio for both DXT and ETC encodings.
Explanation:
When encoding the deltas between two pixel selectors, it is possible to use XOR-deltas instead of modulo-deltas. At first it might seem counterintuitive that XOR-delta can perform better than modulo-delta, as it does not reflect the continuity properties of the data that well. The actual trick here is that the encoded selectors are first sorted according to the used delta operation and the corresponding metric. The initial distance maps for the XOR-deltas have been obtained experimentally, using bitrate optimization on the test set of images. Additionally, ETC1 decoding has been optimized for speed: all the normal and flipped ETC1 selectors are now computed in advance.
Note: This modification alters the output file format and makes it incompatible with the previous revisions.
DXT Testing:
The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch (revision ea9b8d8).
[Compressing Kodak set without mipmaps using DXT1 encoding]
Original: 1582222 bytes / 28.899 sec
Modified: 1468204 bytes / 13.353 sec
Improvement: 7.21% (compression ratio) / 53.79% (compression time)
[Compressing Kodak set with mipmaps using DXT1 encoding]
Original: 2065243 bytes / 36.985 sec
Modified: 1914805 bytes / 18.111 sec
Improvement: 7.28% (compression ratio) / 51.03% (compression time)
ETC Testing:
The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). The ETC1 quantization parameters have been selected in such a way, so that ETC1 compression gives approximately the same average Luma PSNR as the corresponding DXT1 compression (which is equal to 34.044 dB for the Kodak test set compressed without mipmaps using DXT1 encoding and default quality settings).
[Compressing Kodak set without mipmaps using ETC1 encoding]
Total size: 1607858 bytes
Total time: 17.356 sec
Average bitrate: 1.363 bpp
Average Luma PSNR: 34.050 dB
This commit is contained in:
Binary file not shown.
+24
-57
@@ -184,36 +184,19 @@ bool crn_comp::pack_color_selectors(crnlib::vector<uint8>& packed_data, const cr
|
||||
crnlib::vector<uint32> remapped_selectors(m_color_selectors.size());
|
||||
for (uint i = 0; i < m_color_selectors.size(); i++)
|
||||
remapped_selectors[remapping[i]] = m_color_selectors[i];
|
||||
crnlib::vector<uint> residual_syms;
|
||||
residual_syms.reserve(m_color_selectors.size() * 8);
|
||||
symbol_histogram hist(16);
|
||||
uint32 prev_selector = 0;
|
||||
for (uint selector_index = 0; selector_index < m_color_selectors.size(); selector_index++) {
|
||||
uint32 cur_selector = remapped_selectors[selector_index];
|
||||
uint prev_sym = 0;
|
||||
for (uint32 selector = cur_selector, i = 0; i < 16; i++, selector >>= 2, prev_selector >>= 2) {
|
||||
int sym = selector - prev_selector & 3;
|
||||
if (i & 1) {
|
||||
uint paired_sym = sym << 2 | prev_sym;
|
||||
residual_syms.push_back(paired_sym);
|
||||
hist.inc_freq(paired_sym);
|
||||
} else
|
||||
prev_sym = sym;
|
||||
}
|
||||
prev_selector = cur_selector;
|
||||
for (uint32 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) {
|
||||
for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 4)
|
||||
hist.inc_freq(selector & 0xF);
|
||||
}
|
||||
static_huffman_data_model residual_dm;
|
||||
static_huffman_data_model dm;
|
||||
dm.init(true, hist, 15);
|
||||
symbol_codec codec;
|
||||
codec.start_encoding(1024 * 1024);
|
||||
if (!residual_dm.init(true, hist, 15))
|
||||
return false;
|
||||
if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false))
|
||||
return false;
|
||||
uint start_bits = codec.encode_get_total_bits_written();
|
||||
start_bits;
|
||||
for (uint i = 0; i < residual_syms.size(); i++) {
|
||||
const uint sym = residual_syms[i];
|
||||
codec.encode(sym, residual_dm);
|
||||
codec.encode_transmit_static_huffman_data_model(dm, false);
|
||||
for (uint32 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) {
|
||||
for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 4)
|
||||
codec.encode(selector & 0xF, dm);
|
||||
}
|
||||
codec.stop_encoding(false);
|
||||
packed_data.swap(codec.get_encoding_buf());
|
||||
@@ -224,37 +207,19 @@ bool crn_comp::pack_alpha_selectors(crnlib::vector<uint8>& packed_data, const cr
|
||||
crnlib::vector<uint64> remapped_selectors(m_alpha_selectors.size());
|
||||
for (uint i = 0; i < m_alpha_selectors.size(); i++)
|
||||
remapped_selectors[remapping[i]] = m_alpha_selectors[i];
|
||||
crnlib::vector<uint> residual_syms;
|
||||
residual_syms.reserve(m_alpha_selectors.size() * 8);
|
||||
symbol_histogram hist(64);
|
||||
uint64 prev_selector = 0;
|
||||
for (uint selector_index = 0; selector_index < m_alpha_selectors.size(); selector_index++) {
|
||||
uint64 cur_selector = remapped_selectors[selector_index];
|
||||
uint prev_sym = 0;
|
||||
for (uint64 selector = cur_selector, i = 0; i < 16; i++, selector >>= 3, prev_selector >>= 3) {
|
||||
int sym = selector - prev_selector & 7;
|
||||
if (i & 1) {
|
||||
uint paired_sym = sym << 3 | prev_sym;
|
||||
residual_syms.push_back(paired_sym);
|
||||
hist.inc_freq(paired_sym);
|
||||
} else
|
||||
prev_sym = sym;
|
||||
}
|
||||
prev_selector = cur_selector;
|
||||
for (uint64 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) {
|
||||
for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 6)
|
||||
hist.inc_freq(selector & 0x3F);
|
||||
}
|
||||
|
||||
static_huffman_data_model residual_dm;
|
||||
static_huffman_data_model dm;
|
||||
dm.init(true, hist, 15);
|
||||
symbol_codec codec;
|
||||
codec.start_encoding(1024 * 1024);
|
||||
if (!residual_dm.init(true, hist, 15))
|
||||
return false;
|
||||
if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false))
|
||||
return false;
|
||||
uint start_bits = codec.encode_get_total_bits_written();
|
||||
start_bits;
|
||||
for (uint i = 0; i < residual_syms.size(); i++) {
|
||||
const uint sym = residual_syms[i];
|
||||
codec.encode(sym, residual_dm);
|
||||
codec.encode_transmit_static_huffman_data_model(dm, false);
|
||||
for (uint64 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) {
|
||||
for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 6)
|
||||
codec.encode(selector & 0x3F, dm);
|
||||
}
|
||||
codec.stop_encoding(false);
|
||||
packed_data.swap(codec.get_encoding_buf());
|
||||
@@ -741,10 +706,11 @@ void crn_comp::optimize_color_selectors() {
|
||||
uint16 n = m_color_selectors.size();
|
||||
remapping.resize(n);
|
||||
|
||||
uint8 d[] = {0, 1, 4, 1};
|
||||
uint8 d[] = {0, 5, 14, 10};
|
||||
|
||||
uint8 D4[0x100];
|
||||
for (uint16 i = 0; i < 0x100; i++)
|
||||
D4[i] = d[i - (i >> 4) & 3] + d[(i >> 2) - (i >> 6) & 3];
|
||||
D4[i] = d[(i ^ i >> 4) & 3] + d[(i >> 2 ^ i >> 6) & 3];
|
||||
uint8 D8[0x10000];
|
||||
for (uint32 i = 0; i < 0x10000; i++)
|
||||
D8[i] = D4[i >> 8 & 0xF0 | i >> 4 & 0xF] + D4[i >> 4 & 0xF0 | i & 0xF];
|
||||
@@ -989,10 +955,11 @@ void crn_comp::optimize_alpha_selectors() {
|
||||
uint16 n = m_alpha_selectors.size();
|
||||
remapping.resize(n);
|
||||
|
||||
uint8 d[] = {0, 1, 4, 9, 16, 9, 4, 1};
|
||||
uint8 d[] = {0, 2, 3, 3, 5, 5, 4, 4};
|
||||
|
||||
uint8 D6[0x1000];
|
||||
for (uint16 i = 0; i < 0x1000; i++)
|
||||
D6[i] = d[i - (i >> 6) & 7] + d[(i >> 3) - (i >> 9) & 7];
|
||||
D6[i] = d[(i ^ i >> 6) & 7] + d[(i >> 3 ^ i >> 9) & 7];
|
||||
|
||||
crnlib::vector<uint64> selectors(n);
|
||||
crnlib::vector<uint16> indices(n);
|
||||
|
||||
+35
-81
@@ -3176,44 +3176,27 @@ class crn_unpacker {
|
||||
}
|
||||
|
||||
bool decode_color_selectors() {
|
||||
const uint32 cMaxSelectorValue = 3U;
|
||||
const uint32 cMaxUniqueSelectorDeltas = cMaxSelectorValue * 2U + 1U;
|
||||
|
||||
const uint32 num_color_selectors = m_pHeader->m_color_selectors.m_num;
|
||||
|
||||
if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_selectors.m_ofs, m_pHeader->m_color_selectors.m_size))
|
||||
return false;
|
||||
|
||||
m_codec.start_decoding(m_pData + m_pHeader->m_color_selectors.m_ofs, m_pHeader->m_color_selectors.m_size);
|
||||
static_huffman_data_model dm;
|
||||
if (!m_codec.decode_receive_static_data_model(dm))
|
||||
return false;
|
||||
|
||||
uint32 cur[16];
|
||||
utils::zero_object(cur);
|
||||
|
||||
if (!m_color_selectors.resize(num_color_selectors))
|
||||
return false;
|
||||
|
||||
uint32* CRND_RESTRICT pDst = &m_color_selectors[0];
|
||||
|
||||
const uint8* pFrom_linear = m_pHeader->m_format == cCRNFmtETC1 ? g_etc1_from_linear : g_dxt1_from_linear;
|
||||
|
||||
for (uint32 i = 0; i < num_color_selectors; i++) {
|
||||
for (uint32 j = 0; j < 8; j++) {
|
||||
int32 sym = m_codec.decode(dm);
|
||||
cur[j * 2 + 0] = cur[j * 2 + 0] + (sym & 3) & 3;
|
||||
cur[j * 2 + 1] = cur[j * 2 + 1] + (sym >> 2) & 3;
|
||||
m_codec.decode_receive_static_data_model(dm);
|
||||
m_color_selectors.resize(m_pHeader->m_color_selectors.m_num << (m_pHeader->m_format == cCRNFmtETC1 ? 1 : 0));
|
||||
for (uint32 s = 0, i = 0; i < m_pHeader->m_color_selectors.m_num; i++) {
|
||||
for (uint32 j = 0; j < 32; j += 4)
|
||||
s ^= m_codec.decode(dm) << j;
|
||||
if (m_pHeader->m_format == cCRNFmtETC1) {
|
||||
for (uint32 selector = ~s & 0xAAAAAAAA | ~(s ^ s >> 1) & 0x55555555, t = 8, h = 0; h < 4; h++, t -= 15) {
|
||||
for (uint32 w = 0; w < 4; w++, t += 4) {
|
||||
uint32 s0 = selector >> (w << 3 | h << 1);
|
||||
m_color_selectors[i << 1] |= (s0 >> 1 & 1 | (s0 & 1) << 16) << (t & 15);
|
||||
uint32 s1 = selector >> (h << 3 | w << 1);
|
||||
m_color_selectors[i << 1 | 1] |= (s1 >> 1 & 1 | (s1 & 1) << 16) << (t & 15);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
m_color_selectors[i] = (s ^ s << 1) & 0xAAAAAAAA | s >> 1 & 0x55555555;
|
||||
}
|
||||
|
||||
*pDst++ =
|
||||
(pFrom_linear[cur[0]]) | (pFrom_linear[cur[1]] << 2) | (pFrom_linear[cur[2]] << 4) | (pFrom_linear[cur[3]] << 6) |
|
||||
(pFrom_linear[cur[4]] << 8) | (pFrom_linear[cur[5]] << 10) | (pFrom_linear[cur[6]] << 12) | (pFrom_linear[cur[7]] << 14) |
|
||||
(pFrom_linear[cur[8]] << 16) | (pFrom_linear[cur[9]] << 18) | (pFrom_linear[cur[10]] << 20) | (pFrom_linear[cur[11]] << 22) |
|
||||
(pFrom_linear[cur[12]] << 24) | (pFrom_linear[cur[13]] << 26) | (pFrom_linear[cur[14]] << 28) | (pFrom_linear[cur[15]] << 30);
|
||||
}
|
||||
|
||||
m_codec.stop_decoding();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -3245,47 +3228,24 @@ class crn_unpacker {
|
||||
}
|
||||
|
||||
bool decode_alpha_selectors() {
|
||||
const uint32 cMaxSelectorValue = 7U;
|
||||
const uint32 cMaxUniqueSelectorDeltas = cMaxSelectorValue * 2U + 1U;
|
||||
|
||||
const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num;
|
||||
|
||||
if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size))
|
||||
return false;
|
||||
|
||||
m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size);
|
||||
static_huffman_data_model dm;
|
||||
if (!m_codec.decode_receive_static_data_model(dm))
|
||||
return false;
|
||||
|
||||
uint32 cur[16];
|
||||
utils::zero_object(cur);
|
||||
|
||||
if (!m_alpha_selectors.resize(num_alpha_selectors * 3))
|
||||
return false;
|
||||
|
||||
uint16* CRND_RESTRICT pDst = &m_alpha_selectors[0];
|
||||
|
||||
const uint8* pFrom_linear = g_dxt5_from_linear;
|
||||
|
||||
for (uint32 i = 0; i < num_alpha_selectors; i++) {
|
||||
for (uint32 j = 0; j < 8; j++) {
|
||||
int32 sym = m_codec.decode(dm);
|
||||
cur[j * 2 + 0] = cur[j * 2 + 0] + (sym & 7) & 7;
|
||||
cur[j * 2 + 1] = cur[j * 2 + 1] + (sym >> 3) & 7;
|
||||
}
|
||||
|
||||
*pDst++ = (uint16)((pFrom_linear[cur[0]]) | (pFrom_linear[cur[1]] << 3) | (pFrom_linear[cur[2]] << 6) | (pFrom_linear[cur[3]] << 9) |
|
||||
(pFrom_linear[cur[4]] << 12) | (pFrom_linear[cur[5]] << 15));
|
||||
|
||||
*pDst++ = (uint16)((pFrom_linear[cur[5]] >> 1) | (pFrom_linear[cur[6]] << 2) | (pFrom_linear[cur[7]] << 5) |
|
||||
(pFrom_linear[cur[8]] << 8) | (pFrom_linear[cur[9]] << 11) | (pFrom_linear[cur[10]] << 14));
|
||||
|
||||
*pDst++ = (uint16)((pFrom_linear[cur[10]] >> 2) | (pFrom_linear[cur[11]] << 1) | (pFrom_linear[cur[12]] << 4) |
|
||||
(pFrom_linear[cur[13]] << 7) | (pFrom_linear[cur[14]] << 10) | (pFrom_linear[cur[15]] << 13));
|
||||
m_codec.decode_receive_static_data_model(dm);
|
||||
m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3);
|
||||
uint8 dxt5_from_linear[64];
|
||||
for (uint32 i = 0; i < 64; i++)
|
||||
dxt5_from_linear[i] = g_dxt5_from_linear[i & 7] | g_dxt5_from_linear[i >> 3] << 3;
|
||||
for (uint32 s0_linear = 0, s1_linear = 0, i = 0; i < m_alpha_selectors.size();) {
|
||||
uint32 s0 = 0, s1 = 0;
|
||||
for (uint32 j = 0; j < 24; s0 |= dxt5_from_linear[s0_linear >> j & 0x3F] << j, j += 6)
|
||||
s0_linear ^= m_codec.decode(dm) << j;
|
||||
for (uint32 j = 0; j < 24; s1 |= dxt5_from_linear[s1_linear >> j & 0x3F] << j, j += 6)
|
||||
s1_linear ^= m_codec.decode(dm) << j;
|
||||
m_alpha_selectors[i++] = s0;
|
||||
m_alpha_selectors[i++] = s0 >> 16 | s1 << 8;
|
||||
m_alpha_selectors[i++] = s1 >> 8;
|
||||
}
|
||||
|
||||
m_codec.stop_decoding();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -3585,7 +3545,7 @@ class crn_unpacker {
|
||||
}
|
||||
endpoint_reference >>= 2;
|
||||
*(uint32*)&e0 = m_color_endpoints[color_endpoint_index];
|
||||
uint32 selector = m_color_selectors[m_codec.decode(m_selector_delta_dm[0])];
|
||||
uint32 selector_index = m_codec.decode(m_selector_delta_dm[0]);
|
||||
if (endpoint_reference) {
|
||||
color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]);
|
||||
if (color_endpoint_index >= num_color_endpoints)
|
||||
@@ -3595,20 +3555,14 @@ class crn_unpacker {
|
||||
m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index;
|
||||
*(uint32*)&e1 = m_color_endpoints[color_endpoint_index];
|
||||
if (visible) {
|
||||
uint32 block_selector = 0, flip = endpoint_reference >> 1 ^ 1, diff = 1;
|
||||
for (uint32 t = 8, i = 0; i < 4; i++, t -= 15) {
|
||||
for (uint32 j = 0; j < 4; j++, t += 4) {
|
||||
uint32 s = selector >> (flip ? i << 3 | j << 1 : j << 3 | i << 1);
|
||||
block_selector |= (s >> 1 & 1 | (s & 1) << 16) << (t & 15);
|
||||
}
|
||||
}
|
||||
uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1;
|
||||
for (uint c = 0; diff && c < 3; c++)
|
||||
diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0;
|
||||
for (uint c = 0; c < 3; c++)
|
||||
block_endpoint[c] = diff ? e0[c] << 3 | e1[c] - e0[c] & 7 : e0[c] << 3 & 0xF0 | e1[c] >> 1;
|
||||
block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip;
|
||||
pData[0] = *(uint32*)&block_endpoint;
|
||||
pData[1] = block_selector;
|
||||
pData[1] = m_color_selectors[selector_index << 1 | flip];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user