diff --git a/bin/crunch_x64.exe b/bin/crunch_x64.exe
index 20510cf..e068703 100644
Binary files a/bin/crunch_x64.exe and b/bin/crunch_x64.exe differ
diff --git a/crnlib/crn_comp.cpp b/crnlib/crn_comp.cpp
index 7e53f66..a8c29d4 100644
--- a/crnlib/crn_comp.cpp
+++ b/crnlib/crn_comp.cpp
@@ -841,50 +841,6 @@ bool crn_comp::alias_images() {
   return true;
 }
 
-void crn_comp::append_chunks(const image_u8& img, uint num_chunks_x, uint num_chunks_y, dxt_hc::pixel_chunk_vec& chunks, float weight) {
-  for (uint y = 0; y < num_chunks_y; y++) {
-    for (uint legacy_index = chunks.size(), x = 0; x < num_chunks_x; x++) {
-      chunks.resize(chunks.size() + 1);
-
-      dxt_hc::pixel_chunk& chunk = chunks.back();
-      chunk.m_weight = weight;
-      chunk.m_legacy_index = legacy_index + (y & 1 ? num_chunks_x - 1 - x : x);
-
-      for (uint cy = 0; cy < cChunkPixelHeight; cy++) {
-        uint py = y * cChunkPixelHeight + cy;
-        py = math::minimum(py, img.get_height() - 1);
-
-        for (uint cx = 0; cx < cChunkPixelWidth; cx++) {
-          uint px = x * cChunkPixelWidth + cx;
-          px = math::minimum(px, img.get_width() - 1);
-
-          chunk(cx, cy) = img(px, py);
-        }
-      }
-    }
-  }
-}
-
-void crn_comp::create_chunks() {
-  m_chunks.reserve(m_total_chunks);
-  m_chunks.resize(0);
-
-  for (uint level = 0; level < m_pParams->m_levels; level++) {
-    for (uint face = 0; face < m_pParams->m_faces; face++) {
-      if (!face) {
-        CRNLIB_ASSERT(m_levels[level].m_first_chunk == m_chunks.size());
-      }
-
-      float mip_weight = math::minimum(12.0f, powf(1.3f, static_cast<float>(level)));
-      //float mip_weight = 1.0f;
-
-      append_chunks(m_images[face][level], m_levels[level].m_chunk_width, m_levels[level].m_chunk_height, m_chunks, mip_weight);
-    }
-  }
-
-  CRNLIB_ASSERT(m_chunks.size() == m_total_chunks);
-}
-
 void crn_comp::clear() {
   m_pParams = NULL;
 
@@ -903,8 +859,6 @@ void crn_comp::clear() {
 
   m_total_chunks = 0;
 
-  m_chunks.clear();
-
   utils::zero_object(m_crn_header);
 
   m_comp_data.clear();
@@ -931,7 +885,7 @@ void crn_comp::clear() {
   m_packed_alpha_selectors.clear();
 }
 
-bool crn_comp::quantize_chunks() {
+bool crn_comp::quantize_images() {
   dxt_hc::params params;
 
   params.m_adaptive_tile_alpha_psnr_derating = m_pParams->m_crn_adaptive_tile_alpha_psnr_derating;
@@ -964,10 +918,8 @@ bool crn_comp::quantize_chunks() {
 
     float alpha_endpoint_quality = powf(quality, 2.1f * alpha_quality_power_mul);
     float alpha_selector_quality = powf(quality, 1.65f * alpha_quality_power_mul);
-    params.m_alpha_endpoint_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);
-    ;
+    params.m_alpha_endpoint_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);    
     params.m_alpha_selector_codebook_size = math::clamp<uint>(math::float_to_uint(.5f + math::lerp<float>(math::maximum<float>(48, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);
-    ;
   }
 
   if (m_pParams->m_flags & cCRNCompFlagDebugging) {
@@ -1058,18 +1010,39 @@ bool crn_comp::quantize_chunks() {
 
   params.m_num_levels = m_pParams->m_levels;
   for (uint i = 0; i < m_pParams->m_levels; i++) {
-    params.m_levels[i].m_first_chunk = m_levels[i].m_first_chunk;
-    params.m_levels[i].m_num_chunks = m_levels[i].m_num_chunks;
-    params.m_levels[i].m_chunk_width = m_levels[i].m_chunk_width;
+    params.m_levels[i].m_first_block = m_levels[i].m_first_chunk << 2;
+    params.m_levels[i].m_num_blocks = m_levels[i].m_num_chunks << 2;
+    params.m_levels[i].m_block_width = m_levels[i].m_chunk_width << 1;
+    params.m_levels[i].m_weight = math::minimum(12.0f, powf(1.3f, (float)i));
   }
+  params.m_num_faces = m_pParams->m_faces;
 
   params.m_endpoint_indices = &m_endpoint_indices;
   params.m_selector_indices = &m_selector_indices;
 
-  if (!m_hvq.compress(params, m_total_chunks, &m_chunks[0], m_task_pool))
-    return false;
+  params.m_num_blocks = m_total_chunks << 2;
+  params.m_blocks = (color_quad_u8(*)[16])crnlib_malloc(params.m_num_blocks * 16 * sizeof(color_quad_u8));
+  for (uint b = 0, level = 0; level < m_pParams->m_levels; level++) {
+    for (uint face = 0; face < m_pParams->m_faces; face++) {
+      image_u8& image = m_images[face][level];
+      uint width = image.get_width();
+      uint height = image.get_height();
+      uint blockWidth = (width + 7 & ~7) >> 2;
+      uint blockHeight = (height + 7 & ~7) >> 2;
+      for (uint by = 0; by < blockHeight; by++) {
+        for (uint y0 = by << 2, bx = 0; bx < blockWidth; bx++, b++) {
+          for (uint t = 0, x0 = bx << 2, dy = 0; dy < 4; dy++) {
+            for (uint y = math::minimum<uint>(y0 + dy, height - 1), dx = 0; dx < 4; dx++, t++)
+              params.m_blocks[b][t] = image(math::minimum<uint>(x0 + dx, width - 1), y);
+          }
+        }
+      }
+    }
+  }
+  bool result = m_hvq.compress(params, m_task_pool);
+  crnlib_free(params.m_blocks);
 
-  return true;
+  return result;
 }
 
 struct optimize_color_endpoint_codebook_params {
@@ -1463,10 +1436,7 @@ bool crn_comp::update_progress(uint phase_index, uint subphase_index, uint subph
 bool crn_comp::compress_internal() {
   if (!alias_images())
     return false;
-
-  create_chunks();
-
-  if (!quantize_chunks())
+  if (!quantize_images())
     return false;
 
   crnlib::vector<uint> endpoint_remap[2];
diff --git a/crnlib/crn_comp.h b/crnlib/crn_comp.h
index 5c6148f..ca326ae 100644
--- a/crnlib/crn_comp.h
+++ b/crnlib/crn_comp.h
@@ -69,7 +69,6 @@ class crn_comp : public itexture_comp {
   crnlib::vector<dxt_hc::selector_indices_details> m_selector_indices;
 
   uint m_total_chunks;
-  dxt_hc::pixel_chunk_vec m_chunks;
 
   crnd::crn_header m_crn_header;
   crnlib::vector<uint8> m_comp_data;
@@ -94,8 +93,6 @@ class crn_comp : public itexture_comp {
 
   void clear();
 
-  void append_chunks(const image_u8& img, uint num_chunks_x, uint num_chunks_y, dxt_hc::pixel_chunk_vec& chunks, float weight);
-
   static float color_endpoint_similarity_func(uint index_a, uint index_b, void* pContext);
   static float alpha_endpoint_similarity_func(uint index_a, uint index_b, void* pContext);
   void sort_color_endpoint_codebook(crnlib::vector<uint>& remapping, const crnlib::vector<uint>& endpoints);
@@ -117,8 +114,7 @@ class crn_comp : public itexture_comp {
       uint trial_index);
 
   bool alias_images();
-  void create_chunks();
-  bool quantize_chunks();
+  bool quantize_images();
 
   bool pack_chunks(
       uint group,
diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp
index 5aa9563..7b51eb8 100644
--- a/crnlib/crn_dxt_hc.cpp
+++ b/crnlib/crn_dxt_hc.cpp
@@ -6,7 +6,6 @@
 #include "crn_console.h"
 #include "crn_dxt_fast.h"
 
-#define CRNLIB_USE_FAST_DXT 1
 #define CRNLIB_ENABLE_DEBUG_MESSAGES 0
 
 namespace crnlib {
@@ -22,41 +21,25 @@ static uint8 g_tile_map[8][2][2] = {
   {{ 0, 1 }, { 2, 3 }},
 };
 
-static color_quad_u8 g_tile_layout_colors[cNumChunkTileLayouts] =
-    {
-        color_quad_u8(255, 90, 32, 255),
-        color_quad_u8(64, 210, 192, 255),
-        color_quad_u8(128, 16, 225, 255),
-        color_quad_u8(255, 192, 200, 255),
-
-        color_quad_u8(255, 128, 200, 255),
-
-        color_quad_u8(255, 0, 0, 255),
-        color_quad_u8(0, 255, 0, 255),
-        color_quad_u8(0, 0, 255, 255),
-        color_quad_u8(255, 0, 255, 255)};
-
 dxt_hc::dxt_hc()
-    : m_num_chunks(0),
-      m_pChunks(NULL),
-      m_num_alpha_blocks(0),
-      m_has_color_blocks(false),
-      m_has_alpha0_blocks(false),
-      m_has_alpha1_blocks(false),
-      m_main_thread_id(crn_get_current_thread_id()),
-      m_canceled(false),
-      m_pTask_pool(NULL),
-      m_prev_phase_index(-1),
-      m_prev_percentage_complete(-1) {
+  : m_num_blocks(0),
+    m_num_alpha_blocks(0),
+    m_has_color_blocks(false),
+    m_has_alpha0_blocks(false),
+    m_has_alpha1_blocks(false),
+    m_main_thread_id(crn_get_current_thread_id()),
+    m_canceled(false),
+    m_pTask_pool(NULL),
+    m_prev_phase_index(-1),
+    m_prev_percentage_complete(-1) {
 }
 
 dxt_hc::~dxt_hc() {
 }
 
 void dxt_hc::clear() {
-  m_num_chunks = 0;
-  m_pChunks = NULL;
-
+  m_blocks = 0;
+  m_num_blocks = 0;
   m_num_alpha_blocks = 0;
   m_has_color_blocks = false;
   m_has_alpha0_blocks = false;
@@ -75,8 +58,6 @@ void dxt_hc::clear() {
   m_prev_phase_index = -1;
   m_prev_percentage_complete = -1;
 
-  m_chunk_details.clear();
-  m_blocks.clear();
   m_block_weights.clear();
   m_block_encodings.clear();
   for (uint c = 0; c < 3; c++)
@@ -89,62 +70,14 @@ void dxt_hc::clear() {
   m_endpoint_indices.clear();
   m_selector_indices.clear();
   m_tiles.clear();
-  m_total_tiles = 0;
+  m_num_tiles = 0;
 }
 
-bool dxt_hc::initialize_blocks(const params& p) {
-  m_chunk_details.resize(m_num_chunks);
-  m_blocks.resize(m_num_chunks << 2);
-  m_block_weights.resize(m_blocks.size());
-  m_block_encodings.resize(m_blocks.size());
-  for (uint c = 0; c < 3; c++)
-    m_block_selectors[c].resize(m_blocks.size());
-  m_tile_indices.resize(m_blocks.size());
-  m_endpoint_indices.resize(m_blocks.size());
-  m_selector_indices.resize(m_blocks.size());
-  m_tiles.resize(m_blocks.size());
-
-  for (uint level = 0; level < p.m_num_levels; level++) {
-    uint first_chunk = p.m_levels[level].m_first_chunk;
-    uint end_chunk = p.m_levels[level].m_first_chunk + p.m_levels[level].m_num_chunks;
-    uint chunk_width = p.m_levels[level].m_chunk_width;
-    uint block_width = chunk_width << 1;
-    for (uint b = first_chunk << 2, cy = 0, chunk_base = first_chunk; chunk_base < end_chunk; chunk_base += chunk_width, cy++) {
-      for (uint by = 0; by < 2; by++) {
-        for (uint cx = 0; cx < chunk_width; cx++) {
-          for (uint bx = 0; bx < 2; bx++, b++) {
-            const pixel_chunk& chunk = m_pChunks[chunk_base + cx];
-            m_block_weights[b] = chunk.m_weight;
-            m_chunk_details[chunk_base + cx].block_index[by][bx] = b;
-            for (uint t = 0, y = 0; y < 4; y++)  {
-              for (uint x = 0; x < 4; x++, t++)
-                m_blocks[b].push_back(chunk(bx << 2 | x, by << 2 | y));
-            }
-          }
-        }
-      }
-    }
-  }
-  return true;
-}
-
-bool dxt_hc::compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool) {
-  m_pTask_pool = &task_pool;
-  m_main_thread_id = crn_get_current_thread_id();
-  crnlib::vector<endpoint_indices_details>& endpoint_indices = *p.m_endpoint_indices;
-  crnlib::vector<selector_indices_details>& selector_indices = *p.m_selector_indices;
-
-  if ((!num_chunks) || (!pChunks))
-    return false;
-  if ((m_params.m_format == cDXT1A) || (m_params.m_format == cDXT3))
-    return false;
-
+bool dxt_hc::compress(const params& p, task_pool& task_pool) {
   clear();
-
   m_params = p;
-
-  m_num_chunks = num_chunks;
-  m_pChunks = pChunks;
+  m_main_thread_id = crn_get_current_thread_id();
+  m_pTask_pool = &task_pool;
 
   switch (m_params.m_format) {
     case cDXT1: {
@@ -174,32 +107,54 @@ bool dxt_hc::compress(const params& p, uint num_chunks, const pixel_chunk* pChun
     }
   }
 
-  initialize_blocks(p);
-  determine_compressed_chunks();
+  for (uint level = 0; level < p.m_num_levels; level++) {
+    float adaptive_tile_color_psnr_derating = p.m_adaptive_tile_color_psnr_derating;
+    if (level && adaptive_tile_color_psnr_derating > .25f)
+      adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast<float>(level)));
+    for (uint e = 0; e < 8; e++)
+      m_color_derating[level][e] = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
+  }
+  for (uint e = 0; e < 8; e++)
+    m_alpha_derating[e] = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
 
-  if (m_has_color_blocks) {
-    if (!determine_color_endpoint_clusters())
-      return false;
-    if (!determine_color_endpoint_codebook())
-      return false;
+  m_blocks = m_params.m_blocks;
+  m_num_blocks = m_params.m_num_blocks;
+  m_block_weights.resize(m_num_blocks);
+  m_block_encodings.resize(m_num_blocks);
+  for (uint c = 0; c < 3; c++)
+    m_block_selectors[c].resize(m_num_blocks);
+  m_tile_indices.resize(m_num_blocks);
+  m_endpoint_indices.resize(m_num_blocks);
+  m_selector_indices.resize(m_num_blocks);
+  m_tiles.resize(m_num_blocks);
+
+  for (uint level = 0; level < p.m_num_levels; level++) {
+    float weight = p.m_levels[level].m_weight;
+    for (uint b = p.m_levels[level].m_first_block, bEnd = b + p.m_levels[level].m_num_blocks; b < bEnd; b++)
+      m_block_weights[b] = weight;
   }
 
-  if (m_num_alpha_blocks) {
-    if (!determine_alpha_endpoint_clusters())
-      return false;
-    if (!determine_alpha_endpoint_codebook())
-      return false;
+  for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
+    m_pTask_pool->queue_object_task(this, &dxt_hc::determine_tiles_task, i);
+  m_pTask_pool->join();
+
+  m_num_tiles = 0;
+  for (uint t = 0; t < m_tiles.size(); t++) {
+    if (m_tiles[t].pixels.size())
+      m_num_tiles++;
   }
 
-  if (m_has_color_blocks) {
-    if (!create_color_selector_codebook())
-      return false;
-  }
+  if (m_has_color_blocks)
+    determine_color_endpoints();
 
-  if (m_num_alpha_blocks) {
-    if (!create_alpha_selector_codebook())
-      return false;
-  }
+  if (m_num_alpha_blocks)
+    determine_alpha_endpoints();
+
+  if (m_has_color_blocks)
+    create_color_selector_codebook();
+
+  if (m_num_alpha_blocks)
+    create_alpha_selector_codebook();
 
   crnlib::vector<uint16> color_endpoint_remap(m_color_clusters.size());
   m_color_endpoints.reserve(m_color_clusters.size());
@@ -269,17 +224,19 @@ bool dxt_hc::compress(const params& p, uint num_chunks, const pixel_chunk* pChun
     }
   }
 
-  endpoint_indices.resize(m_blocks.size());
-  selector_indices.resize(m_blocks.size());
+  crnlib::vector<endpoint_indices_details>& endpoint_indices = *m_params.m_endpoint_indices;
+  crnlib::vector<selector_indices_details>& selector_indices = *m_params.m_selector_indices;
+  endpoint_indices.resize(m_num_blocks);
+  selector_indices.resize(m_num_blocks);
   for (uint level = 0; level < p.m_num_levels; level++) {
-    uint first_block = p.m_levels[level].m_first_chunk << 2;
-    uint end_block = first_block + (p.m_levels[level].m_num_chunks << 2);
-    uint block_width = p.m_levels[level].m_chunk_width << 1;
+    uint first_block = p.m_levels[level].m_first_block;
+    uint end_block = first_block + p.m_levels[level].m_num_blocks;
+    uint block_width = p.m_levels[level].m_block_width;
     for (uint by = 0, b = first_block; b < end_block; by++) {
       for (uint bx = 0; bx < block_width; bx++, b++) {
         bool top_match = by != 0;
         bool left_match = top_match || bx;
-        for (uint c = m_has_color_blocks ? 0 : cAlpha0Chunks; c < cAlpha0Chunks + m_num_alpha_blocks; c++) {
+        for (uint c = m_has_color_blocks ? 0 : cAlpha0Blocks; c < cAlpha0Blocks + m_num_alpha_blocks; c++) {
           uint16 endpoint_index = (c ? alpha_endpoint_remap : color_endpoint_remap)[m_endpoint_indices[b].component[c]];
           left_match = left_match && endpoint_index == endpoint_indices[b - 1].component[c];
           top_match = top_match && endpoint_index == endpoint_indices[b - block_width].component[c];
@@ -296,467 +253,168 @@ bool dxt_hc::compress(const params& p, uint num_chunks, const pixel_chunk* pChun
   return true;
 }
 
-void dxt_hc::compress_dxt1_block(
-    dxt1_endpoint_optimizer::results& results,
-    uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
-    uint8* pColor_Selectors) {
-  chunk_index;
+void dxt_hc::determine_tiles_task(uint64 data, void* pData_ptr) {
+  uint num_tasks = m_pTask_pool->get_num_threads() + 1;
+  uint offsets[9] = {0, 16, 32, 48, 0, 32, 64, 96, 64};
+  uint8 tiles[8][4] = {{8}, {6, 7}, {4, 5}, {6, 1, 3}, {7, 0, 2}, {4, 2, 3}, {5, 0, 1}, {0, 2, 1, 3}};
+  color_quad_u8 chunkPixels[128];
+  uint8 selectors[64];
+  uint tile_error[3][9];
+  uint total_error[3][8];
 
-  color_quad_u8 pixels[cChunkPixelWidth * cChunkPixelHeight];
+  for (uint level = 0; level < m_params.m_num_levels; level++) {
+    uint width = m_params.m_levels[level].m_block_width;
+    uint height = m_params.m_levels[level].m_num_blocks / width;
+    uint faceHeight = height / m_params.m_num_faces;
+    uint h = height * data / num_tasks & ~1;
+    uint hEnd = height * (data + 1) / num_tasks & ~1;
+    uint hFace = h % faceHeight;
+    uint b = m_params.m_levels[level].m_first_block + h * width;
 
-  for (uint y = 0; y < height; y++)
-    for (uint x = 0; x < width; x++)
-      pixels[x + y * width] = chunk(x_ofs + x, y_ofs + y);
-
-//double s = image_utils::compute_std_dev(width * height, pixels, 0, 3);
-
-#if CRNLIB_USE_FAST_DXT
-  uint low16, high16;
-  dxt_fast::compress_color_block(width * height, pixels, low16, high16, pColor_Selectors);
-  results.m_low_color = static_cast<uint16>(low16);
-  results.m_high_color = static_cast<uint16>(high16);
-  results.m_alpha_block = false;
-  results.m_error = INT_MAX;
-  results.m_pSelectors = pColor_Selectors;
-#else
-  dxt1_endpoint_optimizer optimizer;
-
-  dxt1_endpoint_optimizer::params params;
-  params.m_block_index = chunk_index;
-  params.m_pPixels = pixels;
-  params.m_num_pixels = width * height;
-  params.m_pixels_have_alpha = false;
-  params.m_use_alpha_blocks = false;
-  params.m_perceptual = m_params.m_perceptual;
-  params.m_highest_quality = false;  //false;
-  params.m_endpoint_caching = false;
-
-  results.m_pSelectors = pColor_Selectors;
-
-  optimizer.compute(params, results);
-#endif
-}
-
-void dxt_hc::compress_dxt5_block(
-    dxt5_endpoint_optimizer::results& results,
-    uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
-    uint8* pAlpha_selectors) {
-  chunk_index;
-
-  color_quad_u8 pixels[cChunkPixelWidth * cChunkPixelHeight];
-
-  for (uint y = 0; y < height; y++)
-    for (uint x = 0; x < width; x++)
-      pixels[x + y * width] = chunk(x_ofs + x, y_ofs + y);
-
-#if 0  //CRNLIB_USE_FAST_DXT
-      uint low, high;
-      dxt_fast::compress_alpha_block(width * height, pixels, low, high, pAlpha_selectors, component_index);
-      results.m_pSelectors = pAlpha_selectors;
-      results.m_error = INT_MAX;
-      results.m_first_endpoint = static_cast<uint8>(low);
-      results.m_second_endpoint = static_cast<uint8>(high);
-      results.m_block_type = 0;
-#else
-  dxt5_endpoint_optimizer optimizer;
-  dxt5_endpoint_optimizer::params params;
-  params.m_block_index = chunk_index;
-  params.m_pPixels = pixels;
-  params.m_num_pixels = width * height;
-  params.m_comp_index = component_index;
-  params.m_use_both_block_types = false;
-  params.m_quality = cCRNDXTQualityNormal;
-
-  results.m_pSelectors = pAlpha_selectors;
-
-  optimizer.compute(params, results);
-#endif
-}
-
-void dxt_hc::determine_compressed_chunks_task(uint64 data, void* pData_ptr) {
-  pData_ptr;
-  const uint thread_index = static_cast<uint>(data);
-
-  image_u8 orig_chunk;
-  image_u8 decomp_chunk[cNumChunkEncodings];
-
-  orig_chunk.resize(cChunkPixelWidth, cChunkPixelHeight);
-  for (uint i = 0; i < cNumChunkEncodings; i++)
-    decomp_chunk[i].resize(cChunkPixelWidth, cChunkPixelHeight);
-
-  image_utils::error_metrics color_error_metrics[cNumChunkEncodings];
-  dxt1_endpoint_optimizer::results color_optimizer_results[cNumChunkTileLayouts];
-  uint8 layout_color_selectors[cNumChunkTileLayouts][cChunkPixelWidth * cChunkPixelHeight];
-
-  image_utils::error_metrics alpha_error_metrics[2][cNumChunkEncodings];
-  dxt5_endpoint_optimizer::results alpha_optimizer_results[2][cNumChunkTileLayouts];
-  uint8 layout_alpha_selectors[2][cNumChunkTileLayouts][cChunkPixelWidth * cChunkPixelHeight];
-
-  uint first_layout = 0;
-  uint last_layout = cNumChunkTileLayouts;
-
-  uint first_encoding = 0;
-  uint last_encoding = cNumChunkEncodings;
-
-  if (!m_params.m_hierarchical) {
-    first_layout = cFirst4x4ChunkTileLayout;
-    first_encoding = cNumChunkEncodings - 1;
-  }
-
-  float encoding_weight[8];
-  for (uint i = 0; i < 8; i++)
-    encoding_weight[i] = math::lerp(1.15f, 1.0f, i / 7.0f);
-
-  for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) {
-    if (m_canceled)
-      return;
-
-    if ((crn_get_current_thread_id() == m_main_thread_id) && ((chunk_index & 511) == 0)) {
-      if (!update_progress(0, chunk_index, m_num_chunks))
-        return;
-    }
-
-    if (m_pTask_pool->get_num_threads()) {
-      if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
-        continue;
-    }
-
-    uint level_index = 0;
-    for (uint i = 0; i < m_params.m_num_levels; i++) {
-      if ((chunk_index >= m_params.m_levels[i].m_first_chunk) && (chunk_index < m_params.m_levels[i].m_first_chunk + m_params.m_levels[i].m_num_chunks)) {
-        level_index = i;
-        break;
+    for (; h < hEnd; h += 2, hFace += 2, b += width) {
+      uint tile_offset = b;
+      uint tile_offset_delta = 4;
+      if (hFace == faceHeight) {
+        hFace = 0;
+      } else if (hFace & 2) {
+        tile_offset_delta = -4;
+        tile_offset += (width << 1) + tile_offset_delta;
       }
-    }
+      for (uint bNext = b + width; b < bNext; b += 2, tile_offset += tile_offset_delta) {
+        for (int t = 0; t < 64; t += 16)
+          memcpy(chunkPixels + t, m_blocks[b + (t & 16 ? width : 0) + (t & 32 ? 1 : 0)], 64);
+        for (int t = 0; t < 64; t += 4)
+          memcpy(chunkPixels + 64 + t, m_blocks[b + (t & 32 ? width : 0) + (t & 4 ? 1 : 0)] + (t >> 1 & 12), 16);
 
-    for (uint cy = 0; cy < cChunkPixelHeight; cy++)
-      for (uint cx = 0; cx < cChunkPixelWidth; cx++)
-        orig_chunk(cx, cy) = m_pChunks[chunk_index](cx, cy);
-
-    if (m_has_color_blocks) {
-      for (uint l = first_layout; l < last_layout; l++) {
-        utils::zero_object(layout_color_selectors[l]);
-
-        compress_dxt1_block(
-            color_optimizer_results[l], chunk_index,
-            orig_chunk,
-            g_chunk_tile_layouts[l].m_x_ofs, g_chunk_tile_layouts[l].m_y_ofs,
-            g_chunk_tile_layouts[l].m_width, g_chunk_tile_layouts[l].m_height,
-            layout_color_selectors[l]);
-      }
-    }
-
-    float alpha_layout_std_dev[2][cNumChunkTileLayouts];
-    utils::zero_object(alpha_layout_std_dev);
-
-    for (uint a = 0; a < m_num_alpha_blocks; a++) {
-      for (uint l = first_layout; l < last_layout; l++) {
-        utils::zero_object(layout_alpha_selectors[a][l]);
-
-        compress_dxt5_block(
-            alpha_optimizer_results[a][l], chunk_index,
-            orig_chunk,
-            g_chunk_tile_layouts[l].m_x_ofs, g_chunk_tile_layouts[l].m_y_ofs,
-            g_chunk_tile_layouts[l].m_width, g_chunk_tile_layouts[l].m_height,
-            m_params.m_alpha_component_indices[a],
-            layout_alpha_selectors[a][l]);
-
-        for (uint a = 0; a < m_num_alpha_blocks; a++) {
-          float mean = 0.0f;
-          float variance = 0.0f;
-
-          for (uint cy = 0; cy < g_chunk_tile_layouts[l].m_height; cy++) {
-            for (uint cx = 0; cx < g_chunk_tile_layouts[l].m_width; cx++) {
-              uint s = orig_chunk(cx + g_chunk_tile_layouts[l].m_x_ofs, cy + g_chunk_tile_layouts[l].m_y_ofs)[m_params.m_alpha_component_indices[a]];
-
-              mean += s;
-              variance += s * s;
-            }  // cx
-          }    //cy
-
-          float scale = 1.0f / (g_chunk_tile_layouts[l].m_width * g_chunk_tile_layouts[l].m_height);
-
-          mean *= scale;
-          variance *= scale;
-
-          variance -= mean * mean;
-
-          alpha_layout_std_dev[a][l] = sqrt(variance);
-
-        }  //a
-      }
-    }
-
-    for (uint e = first_encoding; e < last_encoding; e++) {
-      for (uint t = 0; t < g_chunk_encodings[e].m_num_tiles; t++) {
-        const uint layout_index = g_chunk_encodings[e].m_tiles[t].m_layout_index;
-        CRNLIB_ASSERT((layout_index >= first_layout) && (layout_index < last_layout));
-
-        if (m_has_color_blocks) {
-          const dxt1_endpoint_optimizer::results& color_results = color_optimizer_results[layout_index];
-          const uint8* pColor_selectors = layout_color_selectors[layout_index];
-
-          color_quad_u8 block_colors[cDXT1SelectorValues];
-          CRNLIB_ASSERT(color_results.m_low_color >= color_results.m_high_color);
-          // it's okay if color_results.m_low_color == color_results.m_high_color, because in this case only selector 0 should be used
-          dxt1_block::get_block_colors4(block_colors, color_results.m_low_color, color_results.m_high_color);
-
-          for (uint cy = 0; cy < g_chunk_encodings[e].m_tiles[t].m_height; cy++) {
-            for (uint cx = 0; cx < g_chunk_encodings[e].m_tiles[t].m_width; cx++) {
-              uint s = pColor_selectors[cx + cy * g_chunk_encodings[e].m_tiles[t].m_width];
-              CRNLIB_ASSERT(s < cDXT1SelectorValues);
-
-              decomp_chunk[e](cx + g_chunk_encodings[e].m_tiles[t].m_x_ofs, cy + g_chunk_encodings[e].m_tiles[t].m_y_ofs) = block_colors[s];
+        for (uint t = 0; t < 9; t++) {
+          color_quad_u8* pixels = chunkPixels + offsets[t];
+          uint size = 16 << (t >> 2);
+          if (m_has_color_blocks) {
+            uint low16, high16;
+            dxt_fast::compress_color_block(size, pixels, low16, high16, selectors);
+            color_quad_u8 block_colors[4];
+            dxt1_block::get_block_colors4(block_colors, low16, high16);
+            uint error = 0;
+            for (uint p = 0; p < size; p++) {
+              for (uint8 c = 0; c < 3; c++) {
+                uint delta = pixels[p][c] - block_colors[selectors[p]][c];
+                error += delta * delta;
+              }
             }
+            tile_error[cColorBlocks][t] = error;
+          }
+          for (uint a = 0; a < m_num_alpha_blocks; a++) {
+            uint8 component = m_params.m_alpha_component_indices[a];
+            dxt5_endpoint_optimizer optimizer;
+            dxt5_endpoint_optimizer::params params;
+            dxt5_endpoint_optimizer::results results;
+            params.m_pPixels = pixels;
+            params.m_num_pixels = size;
+            params.m_comp_index = component;
+            params.m_use_both_block_types = false;
+            params.m_quality = cCRNDXTQualityNormal;
+            results.m_pSelectors = selectors;
+            optimizer.compute(params, results);
+            uint block_values[cDXT5SelectorValues];
+            dxt5_block::get_block_values8(block_values, results.m_first_endpoint, results.m_second_endpoint);
+            tile_error[cAlpha0Blocks + a][t] = results.m_error;
           }
         }
 
-        for (uint a = 0; a < m_num_alpha_blocks; a++) {
-          const dxt5_endpoint_optimizer::results& alpha_results = alpha_optimizer_results[a][layout_index];
-          const uint8* pAlpha_selectors = layout_alpha_selectors[a][layout_index];
+        for (uint8 c = m_has_color_blocks ? 0 : cAlpha0Blocks; c < cAlpha0Blocks + m_num_alpha_blocks; c++) {
+          for (uint8 e = 0; e < 8; e++) {
+            total_error[c][e] = 0;
+            for (uint8 t = 0, s = e + 1; s; s >>= 1, t++)
+              total_error[c][e] += tile_error[c][tiles[e][t]];
+          }
+        }
 
-          uint block_values[cDXT5SelectorValues];
-          CRNLIB_ASSERT(alpha_results.m_first_endpoint >= alpha_results.m_second_endpoint);
-          dxt5_block::get_block_values8(block_values, alpha_results.m_first_endpoint, alpha_results.m_second_endpoint);
+        float best_quality = 0.0f;
+        uint best_encoding = 0;
+        for (uint e = 0; e < 8; e++) {
+          float quality = 0;
+          if (m_has_color_blocks) {
+            double peakSNR = total_error[cColorBlocks][e] ? log10(255.0f / sqrt(total_error[cColorBlocks][e] / 192.0)) * 20.0f : 999999.0f;
+            quality = (float)math::maximum<double>(peakSNR - m_color_derating[level][e], 0.0f);
+            if (m_num_alpha_blocks)
+              quality *= m_params.m_adaptive_tile_color_alpha_weighting_ratio;
+          }
+          for (uint a = 0; a < m_num_alpha_blocks; a++) {
+            double peakSNR = total_error[cAlpha0Blocks + a][e] ? log10(255.0f / sqrt(total_error[cAlpha0Blocks + a][e] / 64.0)) * 20.0f : 999999.0f;
+            quality += (float)math::maximum<double>(peakSNR - m_alpha_derating[e], 0.0f);
+          }
+          if (quality > best_quality) {
+            best_quality = quality;
+            best_encoding = e;
+          }
+        }
+    
+        for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) {
+          tile_details& tile = m_tiles[tile_offset | tile_index];
+          uint t = tiles[best_encoding][tile_index];
+          tile.pixels.append(chunkPixels + offsets[t], 16 << (t >> 2));
+          tile.weight = m_block_weights[b];
 
-          for (uint cy = 0; cy < g_chunk_encodings[e].m_tiles[t].m_height; cy++) {
-            for (uint cx = 0; cx < g_chunk_encodings[e].m_tiles[t].m_width; cx++) {
-              uint s = pAlpha_selectors[cx + cy * g_chunk_encodings[e].m_tiles[t].m_width];
-              CRNLIB_ASSERT(s < cDXT5SelectorValues);
-
-              decomp_chunk[e](cx + g_chunk_encodings[e].m_tiles[t].m_x_ofs, cy + g_chunk_encodings[e].m_tiles[t].m_y_ofs)[m_params.m_alpha_component_indices[a]] =
-                  static_cast<uint8>(block_values[s]);
+          if (m_has_color_blocks) {
+            tree_clusterizer<vec3F> palettizer;
+            for (uint p = 0; p < tile.pixels.size(); p++) {
+              const color_quad_u8& c = tile.pixels[p];
+              vec3F v(c[0] * 1.0f / 255.0f, c[1] * 1.0f / 255.0f, c[2] * 1.0f / 255.0f);
+              if (m_params.m_perceptual) {
+                v[0] *= 0.5f;
+                v[2] *= 0.25f;
+              }
+              palettizer.add_training_vec(v, 1);
             }
+            palettizer.generate_codebook(2);
+            vec3F v[2];
+            utils::zero_object(v);
+            for (uint i = 0; i < palettizer.get_codebook_size(); i++)
+              v[i] = palettizer.get_codebook_entry(i);
+            if (palettizer.get_codebook_size() == 1)
+              v[1] = v[0];
+            if (v[0].length() > v[1].length())
+              utils::swap(v[0], v[1]);
+            vec6F vv;
+            for (uint i = 0; i < 2; i++) {
+              vv[i * 3 + 0] = v[i][0];
+              vv[i * 3 + 1] = v[i][1];
+              vv[i * 3 + 2] = v[i][2];
+            }
+            tile.color_endpoint = vv;
           }
-        }
-      }  // t
-
-      if (m_params.m_hierarchical) {
-        if (m_has_color_blocks)
-          color_error_metrics[e].compute(decomp_chunk[e], orig_chunk, 0, 3);
-
-        for (uint a = 0; a < m_num_alpha_blocks; a++)
-          alpha_error_metrics[a][e].compute(decomp_chunk[e], orig_chunk, m_params.m_alpha_component_indices[a], 1);
-      }
-    }  // e
-
-    uint best_encoding = cNumChunkEncodings - 1;
-
-    if (m_params.m_hierarchical) {
-      float quality[cNumChunkEncodings];
-      utils::zero_object(quality);
-
-      float best_quality = 0.0f;
-
-      best_encoding = 0;
-
-      for (uint e = 0; e < cNumChunkEncodings; e++) {
-        if (m_has_color_blocks) {
-          float adaptive_tile_color_psnr_derating = m_params.m_adaptive_tile_color_psnr_derating;
-          if ((level_index) && (adaptive_tile_color_psnr_derating > .25f)) {
-            //adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, (level_index - 1) / math::maximum(1.0f, float(m_params.m_num_levels - 2)));
-            adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast<float>(level_index)));
-          }
-
-          float color_derating = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
-          quality[e] = (float)math::maximum<double>(color_error_metrics[e].mPeakSNR - color_derating, 0.0f);
-        }
-
-        if (m_num_alpha_blocks) {
-          quality[e] *= m_params.m_adaptive_tile_color_alpha_weighting_ratio;
-          float alpha_derating = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f);
-
-          float max_std_dev = 0.0f;
 
           for (uint a = 0; a < m_num_alpha_blocks; a++) {
-            quality[e] += (float)math::maximum<double>(alpha_error_metrics[a][e].mPeakSNR - alpha_derating, 0.0f);
-
-            for (uint t = 0; t < g_chunk_encodings[e].m_num_tiles; t++) {
-              float std_dev = alpha_layout_std_dev[a][g_chunk_encodings[e].m_tiles[t].m_layout_index];
-              max_std_dev = math::maximum(max_std_dev, std_dev);
+            uint component_index = m_params.m_alpha_component_indices[a];
+            tree_clusterizer<vec1F> palettizer;
+            for (uint p = 0; p < tile.pixels.size(); p++) {
+              vec1F v(tile.pixels[p][component_index] * 1.0f / 255.0f);
+              palettizer.add_training_vec(v, 1);
             }
+            palettizer.generate_codebook(2);
+            vec1F v[2];
+            utils::zero_object(v);
+            for (uint i = 0; i < palettizer.get_codebook_size(); i++)
+              v[i] = palettizer.get_codebook_entry(i);
+            if (palettizer.get_codebook_size() == 1)
+              v[1] = v[0];
+            if (v[0] > v[1])
+              utils::swap(v[0], v[1]);
+            vec2F vv(v[0][0], v[1][0]);
+            tile.alpha_endpoints[a] = vv;
           }
         }
 
-        if (quality[e] > best_quality) {
-          best_quality = quality[e];
-          best_encoding = e;
-        }
-      }
-    }
-    
-    for (uint t = 0; t < g_chunk_encodings[best_encoding].m_num_tiles; t++) {
-      tile_details& tile = m_tiles[chunk_index << 2 | t];
-      const chunk_tile_desc& layout = g_chunk_tile_layouts[g_chunk_encodings[best_encoding].m_tiles[t].m_layout_index];
-      for (uint y = 0; y < layout.m_height; y++) {
-        for (uint x = 0; x < layout.m_width; x++)
-          tile.pixels.push_back(m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y));
-      }
-      tile.weight = (uint)(tile.pixels.size() * m_pChunks[chunk_index].m_weight);
-
-      if (m_has_color_blocks) {
-        tree_clusterizer<vec3F> palettizer;
-        for (uint p = 0; p < tile.pixels.size(); p++) {
-          const color_quad_u8& c = tile.pixels[p];
-          vec3F v(c[0] * 1.0f / 255.0f, c[1] * 1.0f / 255.0f, c[2] * 1.0f / 255.0f);
-          if (m_params.m_perceptual) {
-            v[0] *= 0.5f;
-            v[2] *= 0.25f;
+        for (uint by = 0; by < 2; by++) {
+          for (uint bx = 0; bx < 2; bx++) {
+            m_block_encodings[b + (by ? width : 0) + bx] = best_encoding;
+            m_tile_indices[b + (by ? width : 0) + bx] = tile_offset | g_tile_map[best_encoding][by][bx];
           }
-          palettizer.add_training_vec(v, 1);
         }
-        palettizer.generate_codebook(2);
-        vec3F v[2];
-        utils::zero_object(v);
-        for (uint i = 0; i < palettizer.get_codebook_size(); i++)
-          v[i] = palettizer.get_codebook_entry(i);
-        if (palettizer.get_codebook_size() == 1)
-          v[1] = v[0];
-        if (v[0].length() > v[1].length())
-          utils::swap(v[0], v[1]);
-        vec6F vv;
-        for (uint i = 0; i < 2; i++) {
-          vv[i * 3 + 0] = v[i][0];
-          vv[i * 3 + 1] = v[i][1];
-          vv[i * 3 + 2] = v[i][2];
-        }
-        tile.color_endpoint = vv;
-      }
 
-      for (uint a = 0; a < m_num_alpha_blocks; a++) {
-        uint component_index = m_params.m_alpha_component_indices[a];
-        tree_clusterizer<vec1F> palettizer;
-        for (uint p = 0; p < tile.pixels.size(); p++) {
-          vec1F v(tile.pixels[p][component_index] * 1.0f / 255.0f);
-          palettizer.add_training_vec(v, 1);
-        }
-        palettizer.generate_codebook(2);
-        vec1F v[2];
-        utils::zero_object(v);
-        for (uint i = 0; i < palettizer.get_codebook_size(); i++)
-          v[i] = palettizer.get_codebook_entry(i);
-        if (palettizer.get_codebook_size() == 1)
-          v[1] = v[0];
-        if (v[0] > v[1])
-          utils::swap(v[0], v[1]);
-        vec2F vv(v[0][0], v[1][0]);
-        tile.alpha_endpoints[a] = vv;
-      }
-    }
-
-    for (uint by = 0; by < 2; by++) {
-      for (uint bx = 0; bx < 2; bx++) {
-        uint b = m_chunk_details[chunk_index].block_index[by][bx];
-        m_block_encodings[b] = best_encoding;
-        m_tile_indices[b] = chunk_index << 2 | g_tile_map[best_encoding][by][bx];
-      }
-    }
-
-  }  // chunk_index
-}
-
-bool dxt_hc::determine_compressed_chunks() {
-  for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
-    m_pTask_pool->queue_object_task(this, &dxt_hc::determine_compressed_chunks_task, i);
-  m_pTask_pool->join();
-
-  m_total_tiles = 0;
-  for (uint t = 0; t < m_tiles.size(); t++) {
-    if (m_tiles[t].pixels.size())
-      m_total_tiles++;
-  }
-
-  return true;
-}
-
-void dxt_hc::determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr) {
-  vec6F_tree_vq* vq = (vec6F_tree_vq*)pData_ptr;
-  uint num_tasks = m_pTask_pool->get_num_threads() + 1;
-  for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) {
-    if (m_tiles[t].pixels.size())
-      m_tiles[t].cluster_indices[cColorChunks] = vq->find_best_codebook_entry_fs(m_tiles[t].color_endpoint);
-  }
-}
-
-bool dxt_hc::determine_color_endpoint_clusters() {
-  vec6F_tree_vq vq;
-  for (uint t = 0; t < m_tiles.size(); t++) {
-    if (m_tiles[t].pixels.size())
-      vq.add_training_vec(m_tiles[t].color_endpoint, m_tiles[t].weight);
-  }
-
-  vq.generate_codebook(math::minimum<uint>(m_total_tiles, m_params.m_color_endpoint_codebook_size));
-  m_color_clusters.resize(vq.get_codebook_size());
-  for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
-    m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_clusters_task, i, &vq);
-  m_pTask_pool->join();
-
-  for (uint i = 0; i < m_num_chunks; i++) {
-    for (uint t = m_pChunks[i].m_legacy_index << 2, tEnd = t + 4; t < tEnd; t++) {
-      if (m_tiles[t].pixels.size())
-        m_color_clusters[m_tiles[t].cluster_indices[cColorChunks]].m_pixels.append(m_tiles[t].pixels);
-    }
-  }
-
-  for (uint b = 0; b < m_blocks.size(); b++) {
-    uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cColorChunks];
-    m_endpoint_indices[b].component[cColorChunks] = cluster_index;
-    m_color_clusters[cluster_index].m_blocks[cColorChunks].push_back(b);
-  }
-
-  return true;
-}
-
-void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) {
-  vec2F_tree_vq* vq = (vec2F_tree_vq*)pData_ptr;
-  uint num_tasks = m_pTask_pool->get_num_threads() + 1;
-  for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) {
-    if (m_tiles[t].pixels.size()) {
-      for (uint a = 0; a < m_num_alpha_blocks; a++)
-        m_tiles[t].cluster_indices[cAlpha0Chunks + a] = vq->find_best_codebook_entry_fs(m_tiles[t].alpha_endpoints[a]);
-    }
-  }
-}
-
-bool dxt_hc::determine_alpha_endpoint_clusters() {
-  vec2F_tree_vq vq;
-  for (uint a = 0; a < m_num_alpha_blocks; a++) {
-    uint component_index = m_params.m_alpha_component_indices[a];
-    for (uint t = 0; t < m_tiles.size(); t++) {
-      if (m_tiles[t].pixels.size())
-        vq.add_training_vec(m_tiles[t].alpha_endpoints[a], m_tiles[t].pixels.size());
-    }
-  }
-
-  vq.generate_codebook(math::minimum<uint>(m_total_tiles, m_params.m_alpha_endpoint_codebook_size));
-  m_alpha_clusters.resize(vq.get_codebook_size());
-  for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
-    m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_clusters_task, i, &vq);
-  m_pTask_pool->join();
-
-  for (uint a = 0; a < m_num_alpha_blocks; a++) {
-    uint component_index = m_params.m_alpha_component_indices[a];
-    for (uint i = 0; i < m_num_chunks; i++) {
-      for (uint t = m_pChunks[i].m_legacy_index << 2, tEnd = t + 4; t < tEnd; t++) {
-        crnlib::vector<color_quad_u8>& source = m_tiles[t].pixels;
-        if (source.size()) {
-          crnlib::vector<color_quad_u8>& destination = m_alpha_clusters[m_tiles[t].cluster_indices[cAlpha0Chunks + a]].m_pixels;
-          for (uint p = 0; p < source.size(); p++)
-            destination.push_back(color_quad_u8(source[p][component_index]));
-        }
       }
     }
   }
-
-  for (uint b = 0; b < m_blocks.size(); b++) {
-    for (uint a = 0; a < m_num_alpha_blocks; a++) {
-      uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cAlpha0Chunks + a];
-      m_endpoint_indices[b].component[cAlpha0Chunks + a] = cluster_index;
-      m_alpha_clusters[cluster_index].m_blocks[cAlpha0Chunks + a].push_back(b);
-    }
-  }
-
-  return true;
 }
 
 void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr) {
@@ -823,7 +481,7 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr
     for (uint i = 0; i < 8; i++)
       encoding_weight[i] = math::lerp(1.15f, 1.0f, i / 7.0f);
 
-    crnlib::vector<uint>& blocks = cluster.m_blocks[cColorChunks];
+    crnlib::vector<uint>& blocks = cluster.m_blocks[cColorBlocks];
     for (uint i = 0; i < blocks.size(); i++) {
       uint b = blocks[i];
       uint weight = (uint)(math::clamp<uint>(endpoint_weight * m_block_weights[b], 1, 2048) * encoding_weight[m_block_encodings[b]]);
@@ -841,7 +499,7 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr
         }
         selector |= s_best << sh;
       }
-      m_block_selectors[cColorChunks][b] = selector | (uint64)weight << 32;
+      m_block_selectors[cColorBlocks][b] = selector | (uint64)weight << 32;
     }
 
     dxt_endpoint_refiner refiner;
@@ -865,21 +523,43 @@ void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr
   }
 }
 
-bool dxt_hc::determine_color_endpoint_codebook() {
-  if (!m_has_color_blocks)
-    return true;
+void dxt_hc::determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr) {
+  vec6F_tree_vq* vq = (vec6F_tree_vq*)pData_ptr;
+  uint num_tasks = m_pTask_pool->get_num_threads() + 1;
+  for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) {
+    if (m_tiles[t].pixels.size())
+      m_tiles[t].cluster_indices[cColorBlocks] = vq->find_best_codebook_entry_fs(m_tiles[t].color_endpoint);
+  }
+}
 
-#if CRNLIB_ENABLE_DEBUG_MESSAGES
-  if (m_params.m_debugging)
-    console::info("Computing optimal color cluster endpoints");
-#endif
+void dxt_hc::determine_color_endpoints() {
+  vec6F_tree_vq vq;
+  for (uint t = 0; t < m_tiles.size(); t++) {
+    if (m_tiles[t].pixels.size())
+      vq.add_training_vec(m_tiles[t].color_endpoint, (uint)(m_tiles[t].pixels.size() * m_tiles[t].weight));
+  }
+
+  vq.generate_codebook(math::minimum<uint>(m_num_tiles, m_params.m_color_endpoint_codebook_size));
+  m_color_clusters.resize(vq.get_codebook_size());
+
+  for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
+    m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_clusters_task, i, &vq);
+  m_pTask_pool->join();
+
+  for (uint t = 0; t < m_num_blocks; t++) {
+    if (m_tiles[t].pixels.size())
+      m_color_clusters[m_tiles[t].cluster_indices[cColorBlocks]].m_pixels.append(m_tiles[t].pixels);
+  }
+
+  for (uint b = 0; b < m_num_blocks; b++) {
+    uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cColorBlocks];
+    m_endpoint_indices[b].component[cColorBlocks] = cluster_index;
+    m_color_clusters[cluster_index].m_blocks[cColorBlocks].push_back(b);
+  }
 
   for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
     m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_codebook_task, i, NULL);
-
   m_pTask_pool->join();
-
-  return !m_canceled;
 }
 
 void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr) {
@@ -907,7 +587,6 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
     crnlib::vector<uint8> selectors(cluster.m_pixels.size());
 
     dxt5_endpoint_optimizer::params params;
-    params.m_block_index = cluster_index;
     params.m_pPixels = cluster.m_pixels.get_ptr();
     params.m_num_pixels = cluster.m_pixels.size();
     params.m_comp_index = 0;
@@ -937,7 +616,7 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
 
     for (uint a = 0; a < m_num_alpha_blocks; a++) {
       uint component_index = m_params.m_alpha_component_indices[a];
-      crnlib::vector<uint>& blocks = cluster.m_blocks[cAlpha0Chunks + a];
+      crnlib::vector<uint>& blocks = cluster.m_blocks[cAlpha0Blocks + a];
       for (uint i = 0; i < blocks.size(); i++) {
         uint b = blocks[i];
         uint weight = encoding_weight[m_block_encodings[b]];
@@ -956,7 +635,7 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
           }
           selector |= (uint64)s_best << sh;
         }
-        m_block_selectors[cAlpha0Chunks + a][b] = selector | (uint64)weight << 48;
+        m_block_selectors[cAlpha0Blocks + a][b] = selector | (uint64)weight << 48;
       }
     }
 
@@ -983,21 +662,56 @@ void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr
   }
 }
 
-bool dxt_hc::determine_alpha_endpoint_codebook() {
-  if (!m_num_alpha_blocks)
-    return true;
+void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) {
+  vec2F_tree_vq* vq = (vec2F_tree_vq*)pData_ptr;
+  uint num_tasks = m_pTask_pool->get_num_threads() + 1;
+  for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) {
+    if (m_tiles[t].pixels.size()) {
+      for (uint a = 0; a < m_num_alpha_blocks; a++)
+        m_tiles[t].cluster_indices[cAlpha0Blocks + a] = vq->find_best_codebook_entry_fs(m_tiles[t].alpha_endpoints[a]);
+    }
+  }
+}
 
-#if CRNLIB_ENABLE_DEBUG_MESSAGES
-  if (m_params.m_debugging)
-    console::info("Computing optimal alpha cluster endpoints");
-#endif
+void dxt_hc::determine_alpha_endpoints() {
+  vec2F_tree_vq vq;
+  for (uint a = 0; a < m_num_alpha_blocks; a++) {
+    for (uint t = 0; t < m_tiles.size(); t++) {
+      if (m_tiles[t].pixels.size())
+        vq.add_training_vec(m_tiles[t].alpha_endpoints[a], m_tiles[t].pixels.size());
+    }
+  }
+
+  vq.generate_codebook(math::minimum<uint>(m_num_tiles, m_params.m_alpha_endpoint_codebook_size));
+  m_alpha_clusters.resize(vq.get_codebook_size());
+
+  for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
+    m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_clusters_task, i, &vq);
+  m_pTask_pool->join();
+
+  for (uint a = 0; a < m_num_alpha_blocks; a++) {
+    uint component_index = m_params.m_alpha_component_indices[a];
+    for (uint t = 0; t < m_num_blocks; t++) {
+      crnlib::vector<color_quad_u8>& source = m_tiles[t].pixels;
+      if (source.size()) {
+        crnlib::vector<color_quad_u8>& destination = m_alpha_clusters[m_tiles[t].cluster_indices[cAlpha0Blocks + a]].m_pixels;
+        for (uint p = 0; p < source.size(); p++)
+          destination.push_back(color_quad_u8(source[p][component_index]));
+      }
+    }
+  }
+
+  for (uint b = 0; b < m_num_blocks; b++) {
+    for (uint a = 0; a < m_num_alpha_blocks; a++) {
+      uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cAlpha0Blocks + a];
+      m_endpoint_indices[b].component[cAlpha0Blocks + a] = cluster_index;
+      m_alpha_clusters[cluster_index].m_blocks[cAlpha0Blocks + a].push_back(b);
+    }
+  }
 
   for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
     m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, NULL);
-
   m_pTask_pool->join();
-
-  return !m_canceled;
 }
 
 struct color_selector_details {
@@ -1010,7 +724,7 @@ void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) {
   crnlib::vector<color_selector_details>& selector_details = *static_cast<crnlib::vector<color_selector_details>*>(pData_ptr);
   uint num_tasks = m_pTask_pool->get_num_threads() + 1;
   uint errors[16][4];
-  for (uint b = m_blocks.size() * data / num_tasks, bEnd = m_blocks.size() * (data + 1) / num_tasks; b < bEnd; b++) {
+  for (uint b = m_num_blocks * data / num_tasks, bEnd = m_num_blocks * (data + 1) / num_tasks; b < bEnd; b++) {
     endpoint_cluster& cluster = m_color_clusters[m_endpoint_indices[b].color];
     color_quad_u8* endpoint_colors = cluster.m_color_values;
     for (uint p = 0; p < 16; p++) {
@@ -1051,11 +765,11 @@ void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) {
   }
 }
 
-bool dxt_hc::create_color_selector_codebook() {
+void dxt_hc::create_color_selector_codebook() {
   vec16F_tree_vq selector_vq;
   vec16F v;
-  for (uint b = 0; b < m_blocks.size(); b++) {
-    uint64 selector = m_block_selectors[cColorChunks][b];
+  for (uint b = 0; b < m_num_blocks; b++) {
+    uint64 selector = m_block_selectors[cColorBlocks][b];
     for (uint8 p = 0; p < 16; p++, selector >>= 2)
       v[p] = ((selector & 3) + 0.5f) * 0.25f;
     selector_vq.add_training_vec(v, selector);
@@ -1107,8 +821,6 @@ bool dxt_hc::create_color_selector_codebook() {
       m_color_selectors[i] |= best_s << sh;
     }
   }
-
-  return !m_canceled;
 }
 
 struct alpha_selector_details {
@@ -1121,9 +833,9 @@ void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) {
   crnlib::vector<alpha_selector_details>& selector_details = *static_cast<crnlib::vector<alpha_selector_details>*>(pData_ptr);
   uint num_tasks = m_pTask_pool->get_num_threads() + 1;
   uint errors[16][8];
-  for (uint b = m_blocks.size() * data / num_tasks, bEnd = m_blocks.size() * (data + 1) / num_tasks; b < bEnd; b++) {
-    for (uint c = cAlpha0Chunks; c < cAlpha0Chunks + m_num_alpha_blocks; c++) {
-      const uint alpha_pixel_comp = m_params.m_alpha_component_indices[c - cAlpha0Chunks];
+  for (uint b = m_num_blocks * data / num_tasks, bEnd = m_num_blocks * (data + 1) / num_tasks; b < bEnd; b++) {
+    for (uint c = cAlpha0Blocks; c < cAlpha0Blocks + m_num_alpha_blocks; c++) {
+      const uint alpha_pixel_comp = m_params.m_alpha_component_indices[c - cAlpha0Blocks];
       endpoint_cluster& cluster = m_alpha_clusters[m_endpoint_indices[b].component[c]];
       uint* block_values = cluster.m_alpha_values;
       for (uint p = 0; p < 16; p++) {
@@ -1176,11 +888,11 @@ void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) {
   }
 }
 
-bool dxt_hc::create_alpha_selector_codebook() {
+void dxt_hc::create_alpha_selector_codebook() {
   vec16F_tree_vq selector_vq;
   vec16F v;
-  for (uint c = cAlpha0Chunks; c < cAlpha0Chunks + m_num_alpha_blocks; c++) {
-    for (uint b = 0; b < m_blocks.size(); b++) {
+  for (uint c = cAlpha0Blocks; c < cAlpha0Blocks + m_num_alpha_blocks; c++) {
+    for (uint b = 0; b < m_num_blocks; b++) {
       uint64 selector = m_block_selectors[c][b];
       for (uint8 p = 0; p < 16; p++, selector >>= 3)
         v[p] = ((selector & 7) + 0.5f) * 0.125f;
@@ -1234,8 +946,6 @@ bool dxt_hc::create_alpha_selector_codebook() {
       m_alpha_selectors[i] |= (uint64)best_s << sh;
     }
   }
-
-  return !m_canceled;
 }
 
 bool dxt_hc::update_progress(uint phase_index, uint subphase_index, uint subphase_total) {
@@ -1244,11 +954,6 @@ bool dxt_hc::update_progress(uint phase_index, uint subphase_index, uint subphas
   if (!m_params.m_pProgress_func)
     return true;
 
-#if CRNLIB_ENABLE_DEBUG_MESSAGES
-  if (m_params.m_debugging)
-    return true;
-#endif
-
   const int percentage_complete = (subphase_total > 1) ? ((100 * subphase_index) / (subphase_total - 1)) : 100;
   if (((int)phase_index == m_prev_phase_index) && (m_prev_percentage_complete == percentage_complete))
     return !m_canceled;
diff --git a/crnlib/crn_dxt_hc.h b/crnlib/crn_dxt_hc.h
index 204090c..679afd8 100644
--- a/crnlib/crn_dxt_hc.h
+++ b/crnlib/crn_dxt_hc.h
@@ -45,22 +45,20 @@ class dxt_hc {
     };
   };
 
-  struct chunk_details {
-    uint block_index[2][2];
-  };
-  crnlib::vector<chunk_details> m_chunk_details;
-
   struct tile_details {
     crnlib::vector<color_quad_u8> pixels;
-    uint weight;
+    float weight;
     vec<6, float> color_endpoint;
     vec<2, float> alpha_endpoints[2];
     uint16 cluster_indices[3];
   };
   crnlib::vector<tile_details> m_tiles;
-  uint m_total_tiles;
+  uint m_num_tiles;
+  float m_color_derating[cCRNMaxLevels][8];
+  float m_alpha_derating[8];
 
-  crnlib::vector<crnlib::vector<color_quad_u8>> m_blocks;
+  color_quad_u8 (*m_blocks)[16];
+  uint m_num_blocks;
   crnlib::vector<float> m_block_weights;
   crnlib::vector<uint8> m_block_encodings;
   crnlib::vector<uint64> m_block_selectors[3];
@@ -72,114 +70,70 @@ class dxt_hc {
   crnlib::vector<endpoint_indices_details> m_endpoint_indices;
   crnlib::vector<selector_indices_details> m_selector_indices;
 
-  struct pixel_chunk {
-    pixel_chunk() { clear(); }
-
-    dxt_pixel_block m_blocks[cChunkBlockHeight][cChunkBlockWidth];
-
-    const color_quad_u8& operator()(uint cx, uint cy) const {
-      CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
-
-      return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
-          [cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
-    }
-
-    color_quad_u8& operator()(uint cx, uint cy) {
-      CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
-
-      return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
-          [cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
-    }
-
-    inline void clear() {
-      utils::zero_object(*this);
-      m_weight = 1.0f;
-    }
-
-    float m_weight;
-    uint m_legacy_index;
-  };
-
-  typedef crnlib::vector<pixel_chunk> pixel_chunk_vec;
-
   struct params {
     params()
-        : m_color_endpoint_codebook_size(3072),
+        : m_blocks(0),
+          m_num_blocks(0),
+          m_num_levels(0),
+          m_num_faces(0),
+          m_format(cDXT1),
+          m_perceptual(true),
+          m_hierarchical(true),
+          m_color_endpoint_codebook_size(3072),
           m_color_selector_codebook_size(3072),
           m_alpha_endpoint_codebook_size(3072),
           m_alpha_selector_codebook_size(3072),
-          m_adaptive_tile_color_psnr_derating(2.0f),  // was 3.4f
+          m_adaptive_tile_color_psnr_derating(2.0f),
           m_adaptive_tile_alpha_psnr_derating(2.0f),
           m_adaptive_tile_color_alpha_weighting_ratio(3.0f),
-          m_num_levels(0),
-          m_format(cDXT1),
-          m_hierarchical(true),
-          m_perceptual(true),
           m_debugging(false),
-          m_pProgress_func(NULL),
-          m_pProgress_func_data(NULL) {
+          m_pProgress_func(0),
+          m_pProgress_func_data(0),
+          m_endpoint_indices(0),
+          m_selector_indices(0) {
       m_alpha_component_indices[0] = 3;
       m_alpha_component_indices[1] = 0;
-
       for (uint i = 0; i < cCRNMaxLevels; i++) {
-        m_levels[i].m_first_chunk = 0;
-        m_levels[i].m_num_chunks = 0;
+        m_levels[i].m_first_block = 0;
+        m_levels[i].m_num_blocks = 0;
+        m_levels[i].m_block_width = 0;
       }
     }
 
-    // Valid range for codebook sizes: [32,8192] (non-power of two values are okay)
+    color_quad_u8 (*m_blocks)[16];
+    uint m_num_blocks;
+    uint m_num_levels;
+    uint m_num_faces;
+
+    struct {
+      uint m_first_block;
+      uint m_num_blocks;
+      uint m_block_width;
+      float m_weight;
+    } m_levels[cCRNMaxLevels];
+
+    dxt_format m_format;
+    bool m_perceptual;
+    bool m_hierarchical;
+
     uint m_color_endpoint_codebook_size;
     uint m_color_selector_codebook_size;
-
     uint m_alpha_endpoint_codebook_size;
     uint m_alpha_selector_codebook_size;
 
-    // Higher values cause fewer 8x4, 4x8, and 4x4 blocks to be utilized less often (lower quality/smaller files).
-    // Lower values cause the encoder to use large tiles less often (better quality/larger files).
-    // Valid range: [0.0,100.0].
-    // A value of 0 will cause the encoder to only use tiles larger than 4x4 if doing so would incur to quality loss.
     float m_adaptive_tile_color_psnr_derating;
-
     float m_adaptive_tile_alpha_psnr_derating;
-
     float m_adaptive_tile_color_alpha_weighting_ratio;
-
     uint m_alpha_component_indices[2];
 
-    struct miplevel_desc {
-      uint m_first_chunk;
-      uint m_num_chunks;
-      uint m_chunk_width;
-    };
-    // The mip level data is optional!
-    miplevel_desc m_levels[cCRNMaxLevels];
-    uint m_num_levels;
+    bool m_debugging;
+    crn_progress_callback_func m_pProgress_func;
+    void* m_pProgress_func_data;
 
     crnlib::vector<endpoint_indices_details> *m_endpoint_indices;
     crnlib::vector<selector_indices_details> *m_selector_indices;
-
-    dxt_format m_format;
-
-    // If m_hierarchical is false, only 4x4 blocks will be used by the encoder (leading to higher quality/larger files).
-    bool m_hierarchical;
-
-    // If m_perceptual is true, perceptual color metrics will be used by the encoder.
-    bool m_perceptual;
-
-    bool m_debugging;
-
-    crn_progress_callback_func m_pProgress_func;
-    void* m_pProgress_func_data;
   };
 
-  void clear();
-
-  // Main compression function
-  bool compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool);
-
-  // Output accessors
-  inline uint get_num_chunks() const { return m_num_chunks; }
-
   struct selectors {
     selectors() { utils::zero_object(*this); }
 
@@ -198,6 +152,9 @@ class dxt_hc {
   };
   typedef crnlib::vector<selectors> selectors_vec;
 
+  void clear();
+  bool compress(const params& p, task_pool& task_pool);
+
   // Color endpoints
   inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); }
   inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; }
@@ -221,34 +178,18 @@ class dxt_hc {
  private:
   params m_params;
 
-  uint m_num_chunks;
-  const pixel_chunk* m_pChunks;
-
   uint m_num_alpha_blocks;
   bool m_has_color_blocks;
   bool m_has_alpha0_blocks;
   bool m_has_alpha1_blocks;
 
   enum {
-    cColorChunks = 0,
-    cAlpha0Chunks = 1,
-    cAlpha1Chunks = 2,
-    cNumCompressedChunkVecs = 3
+    cColorBlocks = 0,
+    cAlpha0Blocks = 1,
+    cAlpha1Blocks = 2,
+    cNumCompressedComponents = 3
   };
 
-  void compress_dxt1_block(
-      dxt1_endpoint_optimizer::results& results,
-      uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
-      uint8* pSelectors);
-
-  void compress_dxt5_block(
-      dxt5_endpoint_optimizer::results& results,
-      uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
-      uint8* pAlpha_selectors);
-
-  void determine_compressed_chunks_task(uint64 data, void* pData_ptr);
-  bool determine_compressed_chunks();
-
   struct endpoint_cluster {
     endpoint_cluster() : m_first_endpoint(0), m_second_endpoint(0) {}
     crnlib::vector<uint> m_blocks[3];
@@ -283,30 +224,25 @@ class dxt_hc {
   typedef tree_clusterizer<vec6F> vec6F_tree_vq;
   typedef tree_clusterizer<vec16F> vec16F_tree_vq;
 
-  void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
-  bool determine_color_endpoint_clusters();
-
-  void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
-  bool determine_alpha_endpoint_clusters();
+  void determine_tiles_task(uint64 data, void* pData_ptr);
 
   void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr);
-  bool determine_color_endpoint_codebook();
+  void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
+  void determine_color_endpoints();
 
   void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr);
-  bool determine_alpha_endpoint_codebook();
+  void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
+  void determine_alpha_endpoints();
 
   void create_color_selector_codebook_task(uint64 data, void* pData_ptr);
-  bool create_color_selector_codebook();
+  void create_color_selector_codebook();
 
   void create_alpha_selector_codebook_task(uint64 data, void* pData_ptr);
-  bool create_alpha_selector_codebook();
+  void create_alpha_selector_codebook();
 
-  bool initialize_blocks(const params& p);
-  bool create_block_encodings(const params& p);
   bool update_progress(uint phase_index, uint subphase_index, uint subphase_total);
 };
 
-CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::pixel_chunk);
 CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors);
 
 }  // namespace crnlib