Use modulo deltas for selector codebook encoding

This change improves compression ratio for both DXT and ETC encodings. Explanation: In the original version of Crunch, selector codebook is encoded with Huffman coding applied to the raw deltas between corresponding pixel selectors of the neighbour codebook elements. However, using Huffman coding for raw deltas has a downside. Specifically, for each individual pixel selector, only about a half of all the possible raw deltas are valid. Indeed, once the value of the current selector is determined, the selector delta depends only on the next selector value, so only N out of 2 * N - 1 total raw delta values are possible at any specific point. And yet, the impossible raw delta values are encoded with a non-zero probability, as the probability table is calculated throughout the whole codebook. The situation can be improved by using modulo deltas instead of raw deltas (modulo 4 for color selectors and modulo 8 for alpha selectors). This eliminates the mentioned implicit restriction on the value of selector delta, and therefore improves the compression ratio. The distance maps are initialized using squared distances between the selector values (the distances are calculated on a wrapped interval, according to the modulo arithmetics). DXT Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch (rev ea9b8d8). [Compressing Kodak set without mipmaps using DXT1 encoding] Original: 1582222 bytes / 28.870 sec Modified: 1473711 bytes / 13.286 sec Improvement: 6.86% (compression ratio) / 53.98% (compression time) [Compressing Kodak set with mipmaps using DXT1 encoding] Original: 2065243 bytes / 36.991 sec Modified: 1920600 bytes / 18.035 sec Improvement: 7.00% (compression ratio) / 51.24% (compression time) ETC Testing: The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). The ETC1 quantization parameters have been selected in such a way, so that ETC1 compression gives approximately the same average Luma PSNR as the corresponding DXT1 compression (which is equal to 34.044 dB for the Kodak test set compressed without mipmaps using DXT1 encoding and default quality settings). [Compressing Kodak set without mipmaps using ETC1 encoding] Total size: 1681327 bytes Total time: 17.403 sec Average bitrate: 1.425 bpp Average Luma PSNR: 34.057 dB
2017-07-10 13:05:10 +02:00
parent 205f8a171d
commit e3c1c6baf6
3 changed files with 16 additions and 46 deletions
@@ -186,15 +186,15 @@ bool crn_comp::pack_color_selectors(crnlib::vector<uint8>& packed_data, const cr
    remapped_selectors[remapping[i]] = m_color_selectors[i];
  crnlib::vector<uint> residual_syms;
  residual_syms.reserve(m_color_selectors.size() * 8);
-  symbol_histogram hist(49);
+  symbol_histogram hist(16);
  uint32 prev_selector = 0;
  for (uint selector_index = 0; selector_index < m_color_selectors.size(); selector_index++) {
    uint32 cur_selector = remapped_selectors[selector_index];
    uint prev_sym = 0;
    for (uint32 selector = cur_selector, i = 0; i < 16; i++, selector >>= 2, prev_selector >>= 2) {
-      int sym = 3 + (selector & 3) - (prev_selector & 3);
+      int sym = selector - prev_selector & 3;
      if (i & 1) {
-        uint paired_sym = 7 * sym + prev_sym;
+        uint paired_sym = sym << 2 | prev_sym;
        residual_syms.push_back(paired_sym);
        hist.inc_freq(paired_sym);
      } else
@@ -226,15 +226,15 @@ bool crn_comp::pack_alpha_selectors(crnlib::vector<uint8>& packed_data, const cr
    remapped_selectors[remapping[i]] = m_alpha_selectors[i];
  crnlib::vector<uint> residual_syms;
  residual_syms.reserve(m_alpha_selectors.size() * 8);
-  symbol_histogram hist(225);
+  symbol_histogram hist(64);
  uint64 prev_selector = 0;
  for (uint selector_index = 0; selector_index < m_alpha_selectors.size(); selector_index++) {
    uint64 cur_selector = remapped_selectors[selector_index];
    uint prev_sym = 0;
    for (uint64 selector = cur_selector, i = 0; i < 16; i++, selector >>= 3, prev_selector >>= 3) {
-      int sym = 7 + (selector & 7) - (prev_selector & 7);
+      int sym = selector - prev_selector & 7;
      if (i & 1) {
-        uint paired_sym = 15 * sym + prev_sym;
+        uint paired_sym = sym << 3 | prev_sym;
        residual_syms.push_back(paired_sym);
        hist.inc_freq(paired_sym);
      } else
@@ -741,12 +741,10 @@ void crn_comp::optimize_color_selectors() {
  uint16 n = m_color_selectors.size();
  remapping.resize(n);

+  uint8 d[] = {0, 1, 4, 1};
  uint8 D4[0x100];
-  for (uint16 i = 0; i < 0x100; i++) {
-    int d0 = (i & 3) - (i >> 4 & 3);
-    int d1 = (i >> 2 & 3) - (i >> 6 & 3);
-    D4[i] = d0 * d0 + d1 * d1;
-  }
+  for (uint16 i = 0; i < 0x100; i++)
+    D4[i] = d[i - (i >> 4) & 3] + d[(i >> 2) - (i >> 6) & 3];
  uint8 D8[0x10000];
  for (uint32 i = 0; i < 0x10000; i++)
    D8[i] = D4[i >> 8 & 0xF0 | i >> 4 & 0xF] + D4[i >> 4 & 0xF0 | i & 0xF];
@@ -991,12 +989,10 @@ void crn_comp::optimize_alpha_selectors() {
  uint16 n = m_alpha_selectors.size();
  remapping.resize(n);

+  uint8 d[] = {0, 1, 4, 9, 16, 9, 4, 1};
  uint8 D6[0x1000];
-  for (uint16 i = 0; i < 0x1000; i++) {
-    int d0 = (i & 7) - (i >> 6 & 7);
-    int d1 = (i >> 3 & 7) - (i >> 9 & 7);
-    D6[i] = d0 * d0 + d1 * d1;
-  }
+  for (uint16 i = 0; i < 0x1000; i++)
+    D6[i] = d[i - (i >> 6) & 7] + d[(i >> 3) - (i >> 9) & 7];

  crnlib::vector<uint64> selectors(n);
  crnlib::vector<uint16> indices(n);
@@ -3188,19 +3188,6 @@ class crn_unpacker {
    if (!m_codec.decode_receive_static_data_model(dm))
      return false;

-    int32 delta0[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas];
-    int32 delta1[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas];
-    int32 l = -(int32)cMaxSelectorValue, m = -(int32)cMaxSelectorValue;
-    for (uint32 i = 0; i < (cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas); i++) {
-      delta0[i] = l;
-      delta1[i] = m;
-
-      if (++l > (int32)cMaxSelectorValue) {
-        l = -(int32)cMaxSelectorValue;
-        m++;
-      }
-    }
-
    uint32 cur[16];
    utils::zero_object(cur);

@@ -3214,8 +3201,8 @@ class crn_unpacker {
    for (uint32 i = 0; i < num_color_selectors; i++) {
      for (uint32 j = 0; j < 8; j++) {
        int32 sym = m_codec.decode(dm);
-        cur[j * 2 + 0] = (delta0[sym] + cur[j * 2 + 0]) & 3;
-        cur[j * 2 + 1] = (delta1[sym] + cur[j * 2 + 1]) & 3;
+        cur[j * 2 + 0] = cur[j * 2 + 0] + (sym & 3) & 3;
+        cur[j * 2 + 1] = cur[j * 2 + 1] + (sym >> 2) & 3;
      }

      *pDst++ =
@@ -3270,19 +3257,6 @@ class crn_unpacker {
    if (!m_codec.decode_receive_static_data_model(dm))
      return false;

-    int32 delta0[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas];
-    int32 delta1[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas];
-    int32 l = -(int32)cMaxSelectorValue, m = -(int32)cMaxSelectorValue;
-    for (uint32 i = 0; i < (cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas); i++) {
-      delta0[i] = l;
-      delta1[i] = m;
-
-      if (++l > (int32)cMaxSelectorValue) {
-        l = -(int32)cMaxSelectorValue;
-        m++;
-      }
-    }
-
    uint32 cur[16];
    utils::zero_object(cur);

@@ -3296,8 +3270,8 @@ class crn_unpacker {
    for (uint32 i = 0; i < num_alpha_selectors; i++) {
      for (uint32 j = 0; j < 8; j++) {
        int32 sym = m_codec.decode(dm);
-        cur[j * 2 + 0] = (delta0[sym] + cur[j * 2 + 0]) & 7;
-        cur[j * 2 + 1] = (delta1[sym] + cur[j * 2 + 1]) & 7;
+        cur[j * 2 + 0] = cur[j * 2 + 0] + (sym & 7) & 7;
+        cur[j * 2 + 1] = cur[j * 2 + 1] + (sym >> 3) & 7;
      }

      *pDst++ = (uint16)((pFrom_linear[cur[0]]) | (pFrom_linear[cur[1]] << 3) | (pFrom_linear[cur[2]] << 6) | (pFrom_linear[cur[3]] << 9) |