diff --git a/bin/crunch.exe b/bin/crunch.exe new file mode 100644 index 0000000..00c1ac2 Binary files /dev/null and b/bin/crunch.exe differ diff --git a/bin/crunch.pdb b/bin/crunch.pdb new file mode 100644 index 0000000..9aa35c8 Binary files /dev/null and b/bin/crunch.pdb differ diff --git a/bin/crunch_x64.exe b/bin/crunch_x64.exe new file mode 100644 index 0000000..a9a8833 Binary files /dev/null and b/bin/crunch_x64.exe differ diff --git a/bin/crunch_x64.pdb b/bin/crunch_x64.pdb new file mode 100644 index 0000000..1b2c4fc Binary files /dev/null and b/bin/crunch_x64.pdb differ diff --git a/bin_linux/crunch b/bin_linux/crunch new file mode 100644 index 0000000..0bed60d Binary files /dev/null and b/bin_linux/crunch differ diff --git a/bin_mingw/crunch.exe b/bin_mingw/crunch.exe new file mode 100644 index 0000000..c17efe9 Binary files /dev/null and b/bin_mingw/crunch.exe differ diff --git a/crnlib/Makefile b/crnlib/Makefile new file mode 100644 index 0000000..9a9de18 --- /dev/null +++ b/crnlib/Makefile @@ -0,0 +1,96 @@ +COMPILE_OPTIONS = -O3 -fomit-frame-pointer -ffast-math -fno-math-errno -g -fno-strict-aliasing -Wall -Wno-unused-value -Wno-unused -march=core2 +LINKER_OPTIONS = -lpthread -g + +OBJECTS = \ + crn_arealist.o \ + crn_assert.o \ + crn_checksum.o \ + crn_colorized_console.o \ + crn_command_line_params.o \ + crn_comp.o \ + crn_console.o \ + crn_core.o \ + crn_data_stream.o \ + crn_mipmapped_texture.o \ + crn_decomp.o \ + crn_dxt1.o \ + crn_dxt5a.o \ + crn_dxt.o \ + crn_dxt_endpoint_refiner.o \ + crn_dxt_fast.o \ + crn_dxt_hc_common.o \ + crn_dxt_hc.o \ + crn_dxt_image.o \ + crn_dynamic_string.o \ + crn_file_utils.o \ + crn_find_files.o \ + crn_hash.o \ + crn_hash_map.o \ + crn_huffman_codes.o \ + crn_image_utils.o \ + crnlib.o \ + crn_math.o \ + crn_mem.o \ + crn_pixel_format.o \ + crn_platform.o \ + crn_prefix_coding.o \ + crn_qdxt1.o \ + crn_qdxt5.o \ + crn_rand.o \ + crn_resample_filters.o \ + crn_resampler.o \ + crn_ryg_dxt.o \ + crn_sparse_bit_array.o \ + crn_stb_image.o \ + crn_strutils.o \ + crn_symbol_codec.o \ + crn_texture_file_types.o \ + crn_threaded_resampler.o \ + crn_threading_pthreads.o \ + crn_timer.o \ + crn_utils.o \ + crn_value.o \ + crn_vector.o \ + crn_zeng.o \ + crn_texture_comp.o \ + crn_texture_conversion.o \ + crn_dds_comp.o \ + crn_lzma_codec.o \ + crn_ktx_texture.o \ + crn_etc.o \ + crn_rg_etc1.o \ + crn_miniz.o \ + crn_jpge.o \ + crn_jpgd.o \ + lzma_7zBuf2.o \ + lzma_7zBuf.o \ + lzma_7zCrc.o \ + lzma_7zFile.o \ + lzma_7zStream.o \ + lzma_Alloc.o \ + lzma_Bcj2.o \ + lzma_Bra86.o \ + lzma_Bra.o \ + lzma_BraIA64.o \ + lzma_LzFind.o \ + lzma_LzmaDec.o \ + lzma_LzmaEnc.o \ + lzma_LzmaLib.o + +all: crunch + +%.o: %.cpp + g++ $< -o $@ -c $(COMPILE_OPTIONS) + +crunch.o: ../crunch/crunch.cpp + g++ $< -o $@ -c -I../inc -I../crnlib $(COMPILE_OPTIONS) + +corpus_gen.o: ../crunch/corpus_gen.cpp + g++ $< -o $@ -c -I../inc -I../crnlib $(COMPILE_OPTIONS) + +corpus_test.o: ../crunch/corpus_test.cpp + g++ $< -o $@ -c -I../inc -I../crnlib $(COMPILE_OPTIONS) + +crunch: $(OBJECTS) crunch.o corpus_gen.o corpus_test.o + g++ $(OBJECTS) crunch.o corpus_gen.o corpus_test.o -o crunch $(LINKER_OPTIONS) + diff --git a/crnlib/crn_color.h b/crnlib/crn_color.h index 625ada7..127ca04 100644 --- a/crnlib/crn_color.h +++ b/crnlib/crn_color.h @@ -16,6 +16,17 @@ namespace crnlib }; }; + template<> struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT8_MIN, + cMax = cINT8_MAX + }; + }; + template<> struct color_quad_component_traits { enum @@ -86,21 +97,22 @@ namespace crnlib class color_quad : public helpers::rel_ops > { template - static inline T clamp(T v) + static inline parameter_type clamp(T v) { + parameter_type result = static_cast(v); if (!component_traits::cFloat) { if (v < component_traits::cMin) - v = component_traits::cMin; + result = static_cast(component_traits::cMin); else if (v > component_traits::cMax) - v = component_traits::cMax; + result = static_cast(component_traits::cMax); } - return v; + return result; } #ifdef _MSC_VER template<> - static inline int clamp(int v) + static inline parameter_type clamp(int v) { if (!component_traits::cFloat) { @@ -117,7 +129,7 @@ namespace crnlib v = component_traits::cMax; } } - return v; + return static_cast(v); } #endif @@ -179,7 +191,7 @@ namespace crnlib template inline color_quad(const color_quad& other) : - r(clamp(other.r)), g(clamp(other.g)), b(clamp(other.b)), a(clamp(other.a)) + r(static_cast(clamp(other.r))), g(static_cast(clamp(other.g))), b(static_cast(clamp(other.b))), a(static_cast(clamp(other.a))) { } @@ -200,13 +212,21 @@ namespace crnlib return *this; } + inline color_quad& set_rgb(const color_quad& other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + template inline color_quad& operator=(const color_quad& other) { - r = clamp(other.r); - g = clamp(other.g); - b = clamp(other.b); - a = clamp(other.a); + r = static_cast(clamp(other.r)); + g = static_cast(clamp(other.g)); + b = static_cast(clamp(other.b)); + a = static_cast(clamp(other.a)); return *this; } @@ -519,6 +539,7 @@ namespace crnlib }; typedef color_quad color_quad_u8; + typedef color_quad color_quad_i8; typedef color_quad color_quad_i16; typedef color_quad color_quad_u16; typedef color_quad color_quad_i32; @@ -633,7 +654,7 @@ namespace crnlib const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; // YCbCr->RGB constants, scaled by 2^16 const int R_CR = 91881, B_CB = 116130, G_CR = -46802, G_CB = -22554; - + inline int RGB_to_Y(const color_quad_u8& rgb) { const int r = rgb[0], g = rgb[1], b = rgb[2]; @@ -663,7 +684,7 @@ namespace crnlib rgb.b = clamp_component(y + ((B_CB * cb + 32768) >> 16)); rgb.a = 255; } - + // Float RGB->YCbCr constants const float S = 1.0f/65536.0f; const float F_YR = S*YR, F_YG = S*YG, F_YB = S*YB, F_CB_R = S*CB_R, F_CB_G = S*CB_G, F_CB_B = S*CB_B, F_CR_R = S*CR_R, F_CR_G = S*CR_G, F_CR_B = S*CR_B; @@ -690,5 +711,284 @@ namespace crnlib } // namespace color + // This class purposely trades off speed for extremely flexibility. It can handle any component swizzle, any pixel type from 1-4 components and 1-32 bits/component, + // any pixel size between 1-16 bytes/pixel, any pixel stride, any color_quad data type (signed/unsigned/float 8/16/32 bits/component), and scaled/non-scaled components. + // On the downside, it's freaking slow. + class pixel_packer + { + public: + pixel_packer() + { + clear(); + } + + pixel_packer(uint num_comps, uint bits_per_comp, int pixel_stride = -1, bool reversed = false) + { + init(num_comps, bits_per_comp, pixel_stride, reversed); + } + + pixel_packer(const char* pComp_map, int pixel_stride = -1, int force_comp_size = -1) + { + init(pComp_map, pixel_stride, force_comp_size); + } + + void clear() + { + utils::zero_this(this); + } + + inline bool is_valid() const { return m_pixel_stride > 0; } + + inline uint get_pixel_stride() const { return m_pixel_stride; } + void set_pixel_stride(uint n) { m_pixel_stride = n; } + + uint get_num_comps() const { return m_num_comps; } + uint get_comp_size(uint index) const { CRNLIB_ASSERT(index < 4); return m_comp_size[index]; } + uint get_comp_ofs(uint index) const { CRNLIB_ASSERT(index < 4); return m_comp_ofs[index]; } + uint get_comp_max(uint index) const { CRNLIB_ASSERT(index < 4); return m_comp_max[index]; } + bool get_rgb_is_luma() const { return m_rgb_is_luma; } + + template + const void* unpack(const void* p, color_quad_type& color, bool rescale = true) const + { + const uint8* pSrc = static_cast(p); + + for (uint i = 0; i < 4; i++) + { + const uint comp_size = m_comp_size[i]; + if (!comp_size) + { + if (color_quad_type::component_traits::cFloat) + color[i] = static_cast< typename color_quad_type::parameter_t >((i == 3) ? 1 : 0); + else + color[i] = static_cast< typename color_quad_type::parameter_t >((i == 3) ? color_quad_type::component_traits::cMax : 0); + continue; + } + + uint n = 0, dst_bit_ofs = 0; + uint src_bit_ofs = m_comp_ofs[i]; + while (dst_bit_ofs < comp_size) + { + const uint byte_bit_ofs = src_bit_ofs & 7; + n |= ((pSrc[src_bit_ofs >> 3] >> byte_bit_ofs) << dst_bit_ofs); + + const uint bits_read = 8 - byte_bit_ofs; + src_bit_ofs += bits_read; + dst_bit_ofs += bits_read; + } + + const uint32 mx = m_comp_max[i]; + n &= mx; + + const uint32 h = static_cast(color_quad_type::component_traits::cMax); + + if (color_quad_type::component_traits::cFloat) + color.set_component(i, static_cast(n)); + else if (rescale) + color.set_component(i, static_cast( (static_cast(n) * h + (mx >> 1U)) / mx ) ); + else if (color_quad_type::component_traits::cSigned) + color.set_component(i, static_cast(math::minimum(n, h))); + else + color.set_component(i, static_cast(n)); + } + + if (m_rgb_is_luma) + { + color[0] = color[1]; + color[2] = color[1]; + } + + return pSrc + m_pixel_stride; + } + + template + void* pack(const color_quad_type& color, void* p, bool rescale = true) const + { + uint8* pDst = static_cast(p); + + for (uint i = 0; i < 4; i++) + { + const uint comp_size = m_comp_size[i]; + if (!comp_size) + continue; + + uint32 mx = m_comp_max[i]; + + uint32 n; + if (color_quad_type::component_traits::cFloat) + { + typename color_quad_type::parameter_t t = color[i]; + if (t < 0.0f) + n = 0; + else if (t > static_cast(mx)) + n = mx; + else + n = math::minimum(static_cast(floor(t + .5f)), mx); + } + else if (rescale) + { + if (color_quad_type::component_traits::cSigned) + n = math::maximum(static_cast(color[i]), 0); + else + n = static_cast(color[i]); + + const uint32 h = static_cast(color_quad_type::component_traits::cMax); + n = static_cast((static_cast(n) * mx + (h >> 1)) / h); + } + else + { + if (color_quad_type::component_traits::cSigned) + n = math::minimum(static_cast(math::maximum(static_cast(color[i]), 0)), mx); + else + n = math::minimum(static_cast(color[i]), mx); + } + + uint src_bit_ofs = 0; + uint dst_bit_ofs = m_comp_ofs[i]; + while (src_bit_ofs < comp_size) + { + const uint cur_byte_bit_ofs = (dst_bit_ofs & 7); + const uint cur_byte_bits = 8 - cur_byte_bit_ofs; + + uint byte_val = pDst[dst_bit_ofs >> 3]; + uint bit_mask = (mx << cur_byte_bit_ofs) & 0xFF; + byte_val &= ~bit_mask; + byte_val |= (n << cur_byte_bit_ofs); + pDst[dst_bit_ofs >> 3] = static_cast(byte_val); + + mx >>= cur_byte_bits; + n >>= cur_byte_bits; + + dst_bit_ofs += cur_byte_bits; + src_bit_ofs += cur_byte_bits; + } + } + + return pDst + m_pixel_stride; + } + + bool init(uint num_comps, uint bits_per_comp, int pixel_stride = -1, bool reversed = false) + { + clear(); + + if ((num_comps < 1) || (num_comps > 4) || (bits_per_comp < 1) || (bits_per_comp > 32)) + { + CRNLIB_ASSERT(0); + return false; + } + + for (uint i = 0; i < num_comps; i++) + { + m_comp_size[i] = bits_per_comp; + m_comp_ofs[i] = i * bits_per_comp; + if (reversed) + m_comp_ofs[i] = ((num_comps - 1) * bits_per_comp) - m_comp_ofs[i]; + } + + for (uint i = 0; i < 4; i++) + m_comp_max[i] = static_cast((1ULL << m_comp_size[i]) - 1ULL); + + m_pixel_stride = (pixel_stride >= 0) ? pixel_stride : (num_comps * bits_per_comp + 7) / 8; + + return true; + } + + // Format examples: + // R16G16B16 + // B5G6R5 + // B5G5R5x1 + // Y8A8 + // A8R8G8B8 + // First component is at LSB in memory. Assumes unsigned integer components, 1-32bits each. + bool init(const char* pComp_map, int pixel_stride = -1, int force_comp_size = -1) + { + clear(); + + uint cur_bit_ofs = 0; + + while (*pComp_map) + { + char c = *pComp_map++; + + int comp_index = -1; + if (c == 'R') + comp_index = 0; + else if (c == 'G') + comp_index = 1; + else if (c == 'B') + comp_index = 2; + else if (c == 'A') + comp_index = 3; + else if (c == 'Y') + comp_index = 4; + else if (c != 'x') + return false; + + uint comp_size = 0; + + uint n = *pComp_map; + if ((n >= '0') && (n <= '9')) + { + comp_size = n - '0'; + pComp_map++; + + n = *pComp_map; + if ((n >= '0') && (n <= '9')) + { + comp_size = (comp_size * 10) + (n - '0'); + pComp_map++; + } + } + + if (force_comp_size != -1) + comp_size = force_comp_size; + + if ((!comp_size) || (comp_size > 32)) + return false; + + if (comp_index == 4) + { + if (m_comp_size[0] || m_comp_size[1] || m_comp_size[2]) + return false; + + //m_comp_ofs[0] = m_comp_ofs[1] = m_comp_ofs[2] = cur_bit_ofs; + //m_comp_size[0] = m_comp_size[1] = m_comp_size[2] = comp_size; + m_comp_ofs[1] = cur_bit_ofs; + m_comp_size[1] = comp_size; + m_rgb_is_luma = true; + m_num_comps++; + } + else if (comp_index >= 0) + { + if (m_comp_size[comp_index]) + return false; + + m_comp_ofs[comp_index] = cur_bit_ofs; + m_comp_size[comp_index] = comp_size; + m_num_comps++; + } + + cur_bit_ofs += comp_size; + } + + for (uint i = 0; i < 4; i++) + m_comp_max[i] = static_cast((1ULL << m_comp_size[i]) - 1ULL); + + if (pixel_stride >= 0) + m_pixel_stride = pixel_stride; + else + m_pixel_stride = (cur_bit_ofs + 7) / 8; + return true; + } + + private: + uint m_pixel_stride; + uint m_num_comps; + uint m_comp_size[4]; + uint m_comp_ofs[4]; + uint m_comp_max[4]; + bool m_rgb_is_luma; + }; + } // namespace crnlib diff --git a/crnlib/crn_colorized_console.cpp b/crnlib/crn_colorized_console.cpp index 67a4891..a55c84e 100644 --- a/crnlib/crn_colorized_console.cpp +++ b/crnlib/crn_colorized_console.cpp @@ -47,7 +47,7 @@ namespace crnlib if (INVALID_HANDLE_VALUE != cons) SetConsoleTextAttribute(cons, (WORD)attr); - if (console::get_prefixes()) + if ((console::get_prefixes()) && (console::get_at_beginning_of_line())) { switch (type) { @@ -85,7 +85,7 @@ namespace crnlib if (console::get_output_disabled()) return true; - if (console::get_prefixes()) + if ((console::get_prefixes()) && (console::get_at_beginning_of_line())) { switch (type) { diff --git a/crnlib/crn_command_line_params.cpp b/crnlib/crn_command_line_params.cpp index bd48738..97e364b 100644 --- a/crnlib/crn_command_line_params.cpp +++ b/crnlib/crn_command_line_params.cpp @@ -14,7 +14,7 @@ #endif namespace crnlib { - void get_command_line(dynamic_string& cmd_line, int argc, char *argv[]) + void get_command_line_as_single_string(dynamic_string& cmd_line, int argc, char *argv[]) { argc, argv; #if CRNLIB_USE_WIN32_API diff --git a/crnlib/crn_command_line_params.h b/crnlib/crn_command_line_params.h index 42d3355..726b706 100644 --- a/crnlib/crn_command_line_params.h +++ b/crnlib/crn_command_line_params.h @@ -6,7 +6,9 @@ namespace crnlib { - void get_command_line(dynamic_string& cmd_line, int argc, char *argv[]); + // Returns the command line passed to the app as a string. + // On systems where this isn't trivial, this function combines together the separate arguments, quoting and adding spaces as needed. + void get_command_line_as_single_string(dynamic_string& cmd_line, int argc, char *argv[]); class command_line_params { diff --git a/crnlib/crn_comp.cpp b/crnlib/crn_comp.cpp index 3e2070a..1a51edf 100644 --- a/crnlib/crn_comp.cpp +++ b/crnlib/crn_comp.cpp @@ -285,8 +285,8 @@ namespace crnlib } #if CRNLIB_CREATE_DEBUG_IMAGES - image_utils::save_to_file(dynamic_string(cVarArg, "color_endpoint_residuals_%u.tga", trial_index).get_ptr(), endpoint_residual_image); - image_utils::save_to_file(dynamic_string(cVarArg, "color_endpoints_%u.tga", trial_index).get_ptr(), endpoint_image); + image_utils::write_to_file(dynamic_string(cVarArg, "color_endpoint_residuals_%u.tga", trial_index).get_ptr(), endpoint_residual_image); + image_utils::write_to_file(dynamic_string(cVarArg, "color_endpoints_%u.tga", trial_index).get_ptr(), endpoint_image); #endif static_huffman_data_model residual_dm[2]; @@ -432,8 +432,8 @@ namespace crnlib } #if CRNLIB_CREATE_DEBUG_IMAGES - image_utils::save_to_file(dynamic_string(cVarArg, "alpha_endpoint_residuals_%u.tga", trial_index).get_ptr(), endpoint_residual_image); - image_utils::save_to_file(dynamic_string(cVarArg, "alpha_endpoints_%u.tga", trial_index).get_ptr(), endpoint_image); + image_utils::write_to_file(dynamic_string(cVarArg, "alpha_endpoint_residuals_%u.tga", trial_index).get_ptr(), endpoint_residual_image); + image_utils::write_to_file(dynamic_string(cVarArg, "alpha_endpoints_%u.tga", trial_index).get_ptr(), endpoint_image); #endif static_huffman_data_model residual_dm; @@ -686,8 +686,8 @@ namespace crnlib } #if CRNLIB_CREATE_DEBUG_IMAGES - image_utils::save_to_file(dynamic_string(cVarArg, "selectors_%u_%u.tga", trial_index, max_selector_value).get_ptr(), selector_image); - image_utils::save_to_file(dynamic_string(cVarArg, "selector_residuals_%u_%u.tga", trial_index, max_selector_value).get_ptr(), residual_image); + image_utils::write_to_file(dynamic_string(cVarArg, "selectors_%u_%u.tga", trial_index, max_selector_value).get_ptr(), selector_image); + image_utils::write_to_file(dynamic_string(cVarArg, "selector_residuals_%u_%u.tga", trial_index, max_selector_value).get_ptr(), residual_image); #endif static_huffman_data_model residual_dm; @@ -1065,7 +1065,7 @@ namespace crnlib uint mip_group = 0; uint chunk_index = 0; - uint mip_group_chunk_index = 0; + uint mip_group_chunk_index = 0; (void)mip_group_chunk_index; for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { const uint width = math::maximum(1U, m_pParams->m_width >> level_index); @@ -1342,6 +1342,11 @@ namespace crnlib params.m_perceptual = false; break; } + case cCRNFmtETC1: + { + console::warning("crn_comp::quantize_chunks: This class does not support ETC1"); + return false; + } default: { return false; @@ -1366,7 +1371,7 @@ namespace crnlib image_u8 img; dxt_hc::create_debug_image_from_chunks((m_pParams->m_width+7)>>3, (m_pParams->m_height+7)>>3, pixel_chunks, &m_hvq.get_chunk_encoding_vec(), img, true, -1); - image_utils::save_to_file("quantized_chunks.tga", img); + image_utils::write_to_file("quantized_chunks.tga", img); } #endif diff --git a/crnlib/crn_console.cpp b/crnlib/crn_console.cpp index 8d13622..da57125 100644 --- a/crnlib/crn_console.cpp +++ b/crnlib/crn_console.cpp @@ -15,6 +15,7 @@ namespace crnlib data_stream* console::m_pLog_stream; mutex* console::m_pMutex; uint console::m_num_messages[cCMTTotal]; + bool console::m_at_beginning_of_line = true; const uint cConsoleBufSize = 4096; @@ -70,7 +71,7 @@ namespace crnlib } const char* pPrefix = NULL; - if (m_prefixes) + if ((m_prefixes) && (m_at_beginning_of_line)) { switch (type) { @@ -88,6 +89,9 @@ namespace crnlib ::printf(m_crlf ? "%s\n" : "%s", buf); } + uint n = strlen(buf); + m_at_beginning_of_line = (m_crlf) || ((n) && (buf[n - 1] == '\n')); + if ((type != cProgressConsoleMessage) && (m_pLog_stream)) { // Yes this is bad. diff --git a/crnlib/crn_console.h b/crnlib/crn_console.h index 1039680..aa4cc8e 100644 --- a/crnlib/crn_console.h +++ b/crnlib/crn_console.h @@ -59,6 +59,7 @@ namespace crnlib static void disable_prefixes(); static void enable_prefixes(); static bool get_prefixes() { return m_prefixes; } + static bool get_at_beginning_of_line() { return m_at_beginning_of_line; } static void disable_crlf(); static void enable_crlf(); @@ -92,6 +93,8 @@ namespace crnlib static mutex* m_pMutex; static uint m_num_messages[cCMTTotal]; + + static bool m_at_beginning_of_line; }; #if defined(WIN32) diff --git a/crnlib/crn_core.h b/crnlib/crn_core.h index 91a23b9..6c8c7f0 100644 --- a/crnlib/crn_core.h +++ b/crnlib/crn_core.h @@ -12,7 +12,7 @@ #if defined(WIN32) && !defined(CRNLIB_ANSI_CPLUSPLUS) // MSVC or MinGW, x86 or x64, Win32 API's for threading and Win32 Interlocked API's or GCC built-ins for atomic ops. #ifdef NDEBUG - // Ensure checked iterators are disabled. + // Ensure checked iterators are disabled. Note: Be sure anything else that links against this lib also #define's this stuff, or remove this crap! #define _SECURE_SCL 0 #define _HAS_ITERATOR_DEBUGGING 0 #endif diff --git a/crnlib/crn_data_stream_serializer.h b/crnlib/crn_data_stream_serializer.h index 4bb17e2..c249f9a 100644 --- a/crnlib/crn_data_stream_serializer.h +++ b/crnlib/crn_data_stream_serializer.h @@ -19,6 +19,8 @@ namespace crnlib data_stream* get_stream() const { return m_pStream; } void set_stream(data_stream* pStream) { m_pStream = pStream; } + const dynamic_string& get_name() const { return m_pStream ? m_pStream->get_name() : g_empty_dynamic_string; } + bool get_error() { return m_pStream ? m_pStream->get_error() : false; } bool get_little_endian() const { return m_little_endian; } @@ -34,6 +36,30 @@ namespace crnlib return m_pStream->read(pBuf, len) == len; } + // size = size of each element, count = number of elements, returns actual count of elements written + uint write(const void* pBuf, uint size, uint count) + { + uint actual_size = size * count; + if (!actual_size) + return 0; + uint n = m_pStream->write(pBuf, actual_size); + if (n == actual_size) + return count; + return n / size; + } + + // size = size of each element, count = number of elements, returns actual count of elements read + uint read(void* pBuf, uint size, uint count) + { + uint actual_size = size * count; + if (!actual_size) + return 0; + uint n = m_pStream->read(pBuf, actual_size); + if (n == actual_size) + return count; + return n / size; + } + bool write_chars(const char* pBuf, uint len) { return write(pBuf, len); @@ -249,6 +275,16 @@ namespace crnlib return true; } + + bool read_entire_file(crnlib::vector& buf) + { + return m_pStream->read_array(buf); + } + + bool write_entire_file(const crnlib::vector& buf) + { + return m_pStream->write_array(buf); + } // Got this idea from the Molly Rocket forums. // fmt may contain the characters "1", "2", or "4". diff --git a/crnlib/crn_dds_comp.cpp b/crnlib/crn_dds_comp.cpp index f382cee..d988714 100644 --- a/crnlib/crn_dds_comp.cpp +++ b/crnlib/crn_dds_comp.cpp @@ -34,7 +34,7 @@ namespace crnlib } } - bool dds_comp::create_dds_tex(dds_texture &dds_tex) + bool dds_comp::create_dds_tex(mipmapped_texture &dds_tex) { image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; @@ -137,7 +137,7 @@ namespace crnlib if (!m_pQDXT_state) { - m_pQDXT_state = crnlib_new(m_task_pool); + m_pQDXT_state = crnlib_new(m_task_pool); if (params.m_pProgress_func) { diff --git a/crnlib/crn_dds_comp.h b/crnlib/crn_dds_comp.h index 9a8993c..a98ce24 100644 --- a/crnlib/crn_dds_comp.h +++ b/crnlib/crn_dds_comp.h @@ -2,7 +2,7 @@ // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_comp.h" -#include "crn_dds_texture.h" +#include "crn_mipmapped_texture.h" #include "crn_texture_comp.h" namespace crnlib @@ -25,8 +25,8 @@ namespace crnlib virtual crnlib::vector& get_comp_data() { return m_comp_data; } private: - dds_texture m_src_tex; - dds_texture m_packed_tex; + mipmapped_texture m_src_tex; + mipmapped_texture m_packed_tex; crnlib::vector m_comp_data; @@ -38,10 +38,10 @@ namespace crnlib task_pool m_task_pool; qdxt1_params m_q1_params; qdxt5_params m_q5_params; - dds_texture::qdxt_state *m_pQDXT_state; + mipmapped_texture::qdxt_state *m_pQDXT_state; void clear(); - bool create_dds_tex(dds_texture &dds_tex); + bool create_dds_tex(mipmapped_texture &dds_tex); bool convert_to_dxt(const crn_comp_params& params); }; diff --git a/crnlib/crn_dxt.cpp b/crnlib/crn_dxt.cpp index 1cc37d4..bb8c616 100644 --- a/crnlib/crn_dxt.cpp +++ b/crnlib/crn_dxt.cpp @@ -31,6 +31,7 @@ namespace crnlib case cDXT5A: return "DXT5A"; case cDXN_XY: return "DXN_XY"; case cDXN_YX: return "DXN_YX"; + case cETC1: return "ETC1"; default: break; } CRNLIB_ASSERT(false); @@ -60,6 +61,7 @@ namespace crnlib case cDXT1: case cDXT1A: case cDXT5A: + case cETC1: return 4; case cDXT3: case cDXT5: diff --git a/crnlib/crn_dxt.h b/crnlib/crn_dxt.h index 18d2c19..f95349c 100644 --- a/crnlib/crn_dxt.h +++ b/crnlib/crn_dxt.h @@ -43,7 +43,9 @@ namespace crnlib cDXT5A, cDXN_XY, // inverted relative to standard ATI2, 360's DXN - cDXN_YX // standard ATI2 + cDXN_YX, // standard ATI2, + + cETC1 // Ericsson texture compression (color only, 4x4 blocks, 4bpp, 64-bits/block) }; const float cDXT1MaxLinearValue = 3.0f; @@ -131,6 +133,32 @@ namespace crnlib m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } + inline void flip_x(uint w = 4, uint h = 4) + { + for (uint x = 0; x < (w / 2); x++) + { + for (uint y = 0; y < h; y++) + { + const uint c = get_selector(x, y); + set_selector(x, y, get_selector((w - 1) - x, y)); + set_selector((w - 1) - x, y, c); + } + } + } + + inline void flip_y(uint w = 4, uint h = 4) + { + for (uint y = 0; y < (h / 2); y++) + { + for (uint x = 0; x < w; x++) + { + const uint c = get_selector(x, y); + set_selector(x, y, get_selector(x, (h - 1) - y)); + set_selector(x, (h - 1) - y, c); + } + } + } + static uint16 pack_color(const color_quad_u8& color, bool scaled, uint bias = 127U); static uint16 pack_color(uint r, uint g, uint b, bool scaled, uint bias = 127U); @@ -163,6 +191,32 @@ namespace crnlib void set_alpha(uint x, uint y, uint value, bool scaled); uint get_alpha(uint x, uint y, bool scaled) const; + + inline void flip_x(uint w = 4, uint h = 4) + { + for (uint x = 0; x < (w / 2); x++) + { + for (uint y = 0; y < h; y++) + { + const uint c = get_alpha(x, y, false); + set_alpha(x, y, get_alpha((w - 1) - x, y, false), false); + set_alpha((w - 1) - x, y, c, false); + } + } + } + + inline void flip_y(uint w = 4, uint h = 4) + { + for (uint y = 0; y < (h / 2); y++) + { + for (uint x = 0; x < w; x++) + { + const uint c = get_alpha(x, y, false); + set_alpha(x, y, get_alpha(x, (h - 1) - y, false), false); + set_alpha(x, (h - 1) - y, c, false); + } + } + } }; CRNLIB_DEFINE_BITWISE_COPYABLE(dxt3_block); @@ -245,6 +299,32 @@ namespace crnlib m_selectors[byte_index + 1] = static_cast(v >> 8); } + inline void flip_x(uint w = 4, uint h = 4) + { + for (uint x = 0; x < (w / 2); x++) + { + for (uint y = 0; y < h; y++) + { + const uint c = get_selector(x, y); + set_selector(x, y, get_selector((w - 1) - x, y)); + set_selector((w - 1) - x, y, c); + } + } + } + + inline void flip_y(uint w = 4, uint h = 4) + { + for (uint y = 0; y < (h / 2); y++) + { + for (uint x = 0; x < w; x++) + { + const uint c = get_selector(x, y); + set_selector(x, y, get_selector(x, (h - 1) - y)); + set_selector(x, (h - 1) - y, c); + } + } + } + enum { cMaxSelectorValues = 8 }; // Results written to alpha channel. diff --git a/crnlib/crn_dxt1.cpp b/crnlib/crn_dxt1.cpp index 9a94f79..6ee75e6 100644 --- a/crnlib/crn_dxt1.cpp +++ b/crnlib/crn_dxt1.cpp @@ -1,5 +1,13 @@ // File: crn_dxt1.cpp // See Copyright Notice and license at the end of inc/crnlib.h +// +// Notes: +// This class is not optimized for performance on small blocks, unlike typical DXT1 compressors. It's optimized for scalability and quality: +// - Very high quality in terms of avg. RMSE or Luma RMSE. Goal is to always match or beat every other known offline DXTc compressor: ATI_Compress, squish, NVidia texture tools, nvdxt.exe, etc. +// - Reasonable scalability and stability with hundreds to many thousands of input colors (including inputs with many thousands of equal/nearly equal colors). +// - Any quality optimization which results in even a tiny improvement is worth it -- as long as it's either a constant or linear slowdown. +// Tiny quality improvements can be extremely valuable in large clusters. +// - Quality should scale well vs. CPU time cost, i.e. the more time you spend the higher the quality. #include "crn_core.h" #include "crn_dxt1.h" #include "crn_ryg_dxt.hpp" @@ -9,6 +17,22 @@ namespace crnlib { + //----------------------------------------------------------------------------------------------------------------------------------------- + + static const int16 g_fast_probe_table[] = { 0, 1, 2, 3 }; + static const uint cFastProbeTableSize = sizeof(g_fast_probe_table) / sizeof(g_fast_probe_table[0]); + + static const int16 g_normal_probe_table[] = { 0, 1, 3, 5, 7 }; + static const uint cNormalProbeTableSize = sizeof(g_normal_probe_table) / sizeof(g_normal_probe_table[0]); + + static const int16 g_better_probe_table[] = { 0, 1, 2, 3, 5, 9, 15, 19, 27, 43 }; + static const uint cBetterProbeTableSize = sizeof(g_better_probe_table) / sizeof(g_better_probe_table[0]); + + static const int16 g_uber_probe_table[] = { 0, 1, 2, 3, 5, 7, 9, 10, 13, 15, 19, 27, 43, 59, 91 }; + static const uint cUberProbeTableSize = sizeof(g_uber_probe_table) / sizeof(g_uber_probe_table[0]); + + //----------------------------------------------------------------------------------------------------------------------------------------- + dxt1_endpoint_optimizer::dxt1_endpoint_optimizer() : m_pParams(NULL), m_pResults(NULL), @@ -75,6 +99,7 @@ namespace crnlib return true; } + // All selectors are equal. Try compressing as if it was solid, using the block's average color, using ryg's optimal single color compression tables. bool dxt1_endpoint_optimizer::try_average_block_as_solid() { uint64 tot_r = 0; @@ -110,6 +135,7 @@ namespace crnlib if (m_pParams->m_quality == cCRNDXTQualityUber) { + // Try compressing as all-solid using the other (non-average) colors in the block in uber. for (uint i = 0; i < m_unique_colors.size(); i++) { uint r = m_unique_colors[i].m_color[0]; @@ -134,6 +160,7 @@ namespace crnlib return improved; } + // Block is solid, trying using ryg's optimal single color tables. bool dxt1_endpoint_optimizer::handle_solid_block() { int r = m_unique_colors[0].m_color.r; @@ -195,6 +222,7 @@ namespace crnlib } } + // Compute PCA (principle axis, i.e. direction of largest variance) of input vectors. void dxt1_endpoint_optimizer::compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def) { #if 0 @@ -202,6 +230,7 @@ namespace crnlib CRNLIB_ASSERT(m_unique_colors.size() == norm_colors.size()); + // Incremental PCA bool first = true; for (uint i = 0; i < norm_colors.size(); i++) { @@ -272,6 +301,7 @@ namespace crnlib //vfr = hi[0] - lo[0]; //vfg = hi[1] - lo[1]; //vfb = hi[2] - lo[2]; + // This is more stable. vfr = .9f; vfg = 1.0f; vfb = .7f; @@ -325,6 +355,7 @@ namespace crnlib static const uint8 g_invTableAlpha[4] = { 1, 0, 2, 3 }; static const uint8 g_invTableColor[4] = { 1, 0, 3, 2 }; + // Computes a valid (encodable) DXT1 solution (low/high colors, swizzled selectors) from input. void dxt1_endpoint_optimizer::return_solution(results& res, const potential_solution& solution) { bool invert_selectors; @@ -433,6 +464,7 @@ namespace crnlib return vec3F(c.r, c.g, c.b); } + // Per-component 1D endpoint optimization. void dxt1_endpoint_optimizer::optimize_endpoint_comps() { if ((m_best_solution.m_alpha_block) || (!m_best_solution.m_error)) @@ -583,6 +615,7 @@ namespace crnlib } // comp_index } + // Voxel adjacency delta coordinations. static const struct adjacent_coords { int8 x, y, z; @@ -618,6 +651,7 @@ namespace crnlib {1, 1, 1} }; + // Attempt to refine current solution's endpoints given the current selectors using least squares. bool dxt1_endpoint_optimizer::refine_solution(int refinement_level) { CRNLIB_ASSERT(m_best_solution.m_valid); @@ -694,11 +728,15 @@ namespace crnlib } else if (refinement_level == 1) { + // Try exploring the local lattice neighbors of the least squares optimized result. color_quad_u8 e[2]; + + e[0].clear(); e[0][0] = (uint8)math::clamp(static_cast((At1_r*yy - At2_r*xy)*frb+0.5f),0,31); e[0][1] = (uint8)math::clamp(static_cast((At1_g*yy - At2_g*xy)*fg +0.5f),0,63); e[0][2] = (uint8)math::clamp(static_cast((At1_b*yy - At2_b*xy)*frb+0.5f),0,31); + e[1].clear(); e[1][0] = (uint8)math::clamp(static_cast((At2_r*xx - At1_r*xy)*frb+0.5f),0,31); e[1][1] = (uint8)math::clamp(static_cast((At2_g*xx - At1_g*xy)*fg +0.5f),0,63); e[1][2] = (uint8)math::clamp(static_cast((At2_b*xx - At1_b*xy)*frb+0.5f),0,31); @@ -737,11 +775,14 @@ namespace crnlib } else { + // Try even harder to explore the local lattice neighbors of the least squares optimized result. color_quad_u8 e[2]; + e[0].clear(); e[0][0] = (uint8)math::clamp(static_cast((At1_r*yy - At2_r*xy)*frb+0.5f),0,31); e[0][1] = (uint8)math::clamp(static_cast((At1_g*yy - At2_g*xy)*fg +0.5f),0,63); e[0][2] = (uint8)math::clamp(static_cast((At1_b*yy - At2_b*xy)*frb+0.5f),0,31); + e[1].clear(); e[1][0] = (uint8)math::clamp(static_cast((At2_r*xx - At1_r*xy)*frb+0.5f),0,31); e[1][1] = (uint8)math::clamp(static_cast((At2_g*xx - At1_g*xy)*fg +0.5f),0,63); e[1][2] = (uint8)math::clamp(static_cast((At2_b*xx - At1_b*xy)*frb+0.5f),0,31); @@ -790,63 +831,7 @@ namespace crnlib //----------------------------------------------------------------------------------------------------------------------------------------- - static int16 g_fast_probe_table[] = - { - 0, - 1, - 2, - 3 - }; - const uint cFastProbeTableSize = sizeof(g_fast_probe_table) / sizeof(g_fast_probe_table[0]); - - static int16 g_normal_probe_table[] = - { - 0, - 1, - 3, - 5, - 7 - }; - const uint cNormalProbeTableSize = sizeof(g_normal_probe_table) / sizeof(g_normal_probe_table[0]); - - static int16 g_better_probe_table[] = - { - 0, - 1, - 2, - 3, - - 5, - 9, - 15, - 19, - - 27, - 43 - }; - const uint cBetterProbeTableSize = sizeof(g_better_probe_table) / sizeof(g_better_probe_table[0]); - - static int16 g_uber_probe_table[] = - { - 0, - 1, - 2, - 3, - 5, - 7, - 9, - 10, - 13, - 15, - 19, - 27, - 43, - 59, - 91 - }; - - const uint cUberProbeTableSize = sizeof(g_uber_probe_table) / sizeof(g_uber_probe_table[0]); - + // Primary endpoint optimization entrypoint. bool dxt1_endpoint_optimizer::optimize_endpoints(vec3F& low_color, vec3F& high_color) { vec3F orig_low_color(low_color); @@ -855,10 +840,11 @@ namespace crnlib m_trial_solution.clear(); uint num_passes; - int16* pProbe_table = g_uber_probe_table; + const int16* pProbe_table = g_uber_probe_table; uint probe_range; float dist_per_trial = .015625f; + // How many probes, and the distance between each probe depends on the quality level. switch (m_pParams->m_quality) { case cCRNDXTQualitySuperFast: @@ -895,6 +881,7 @@ namespace crnlib if (m_pParams->m_endpoint_caching) { + // Try the previous X winning endpoints. This may not give us optimal results, but it may increase the probability of early outs while evaluating potential solutions. const uint num_prev_results = math::minimum(cMaxPrevResults, m_num_prev_results); for (uint i = 0; i < num_prev_results; i++) { @@ -909,6 +896,7 @@ namespace crnlib if (!m_best_solution.m_error) { + // Got lucky - one of the previous endpoints is optimal. return_solution(*m_pResults, m_best_solution); return true; } @@ -949,6 +937,12 @@ namespace crnlib for (uint pass = 0; pass < num_passes; pass++) { + // Now separately sweep or probe the low and high colors along the principle axis, both positively and negatively. + // This results in two arrays of candidate low/high endpoints. Every unique combination of candidate endpoints is tried as a potential solution. + // In higher quality modes, the various nearby lattice neighbors of each candidate endpoint are also explored, which allows the current solution to "wobble" or "migrate" + // to areas with lower error. + // This entire process can be repeated up to X times (depending on the quality level) until a local minimum is established. + // This method is very stable and scalable. It could be implemented more elegantly, but I'm now very cautious of touching this code. if (pass) { low_color = unpack_to_vec3F_raw(m_best_solution.m_coords.m_low_color); @@ -959,6 +953,7 @@ namespace crnlib if (!prev_best_error) break; + // Sweep low endpoint along principle axis, record positions int prev_packed_color[2] = { -1, -1 }; uint num_low_trials = 0; vec3F initial_probe_low_color(low_color + vec3F(.5f)); @@ -987,6 +982,7 @@ namespace crnlib prev_packed_color[0] = -1; prev_packed_color[1] = -1; + // Sweep high endpoint along principle axis, record positions uint num_high_trials = 0; vec3F initial_probe_high_color(high_color + vec3F(.5f)); for (uint i = 0; i < probe_range; i++) @@ -1011,6 +1007,7 @@ namespace crnlib } } + // Now try all unique combinations. for (uint i = 0; i < num_low_trials; i++) { for (uint j = 0; j < num_high_trials; j++) @@ -1028,6 +1025,7 @@ namespace crnlib if (m_pParams->m_quality >= cCRNDXTQualityNormal) { + // Generate new candidates by exploring the low color's direct lattice neighbors color_quad_u8 lc(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); for (int i = 0; i < 26; i++) @@ -1051,6 +1049,7 @@ namespace crnlib if (m_pParams->m_quality == cCRNDXTQualityUber) { + // Generate new candidates by exploring the low color's direct lattice neighbors - this time, explore much further separately on each axis. lc = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false); for (int a = 0; a < 3; a++) @@ -1075,6 +1074,7 @@ namespace crnlib } } + // Generate new candidates by exploring the high color's direct lattice neighbors color_quad_u8 hc(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); for (int i = 0; i < 26; i++) @@ -1098,6 +1098,7 @@ namespace crnlib if (m_pParams->m_quality == cCRNDXTQualityUber) { + // Generate new candidates by exploring the high color's direct lattice neighbors - this time, explore much further separately on each axis. hc = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false); for (int a = 0; a < 3; a++) @@ -1127,7 +1128,10 @@ namespace crnlib break; if (m_pParams->m_quality >= cCRNDXTQualityUber) + { + // Attempt to refine current solution's endpoints given the current selectors using least squares. refine_solution(1); + } } if (m_pParams->m_quality >= cCRNDXTQualityNormal) @@ -1136,16 +1140,26 @@ namespace crnlib { bool choose_solid_block = false; if (m_best_solution.are_selectors_all_equal()) + { + // All selectors equal - try various solid-block optimizations choose_solid_block = try_average_block_as_solid(); + } if ((!choose_solid_block) && (m_pParams->m_quality == cCRNDXTQualityUber)) + { + // Per-component 1D endpoint optimization. optimize_endpoint_comps(); + } } if (m_pParams->m_quality == cCRNDXTQualityUber) { if (m_best_solution.m_error) + { + // The pixels may have already been DXTc compressed by another compressor. + // It's usually possible to recover the endpoints used to previously pack the block. try_combinatorial_encoding(); + } } } @@ -1153,6 +1167,7 @@ namespace crnlib if (m_pParams->m_endpoint_caching) { + // Remember result for later reruse. m_prev_results[m_num_prev_results & (cMaxPrevResults - 1)] = m_best_solution.m_coords; m_num_prev_results++; } @@ -1173,6 +1188,8 @@ namespace crnlib if (m_perceptual) { + // Compute RGB weighting for use in perceptual mode. + // The more saturated the block, the more the weights deviate from (1,1,1). float ave_redness = 0; float ave_blueness = 0; float ave_l = 0; @@ -1224,6 +1241,8 @@ namespace crnlib im.transpose_in_place(); m_principle_axis = m_principle_axis * im; #else + // Purposely scale the components of the principle axis by the perceptual weighting. + // There's probably a cleaner way to go about this, but it works (more competitive in perceptual mode against nvdxt.exe or ATI_Compress). m_principle_axis[0] /= perceptual_weights[0]; m_principle_axis[1] /= perceptual_weights[1]; m_principle_axis[2] /= perceptual_weights[2]; @@ -1232,6 +1251,7 @@ namespace crnlib if (num_passes > 1) { + // Check for obviously wild principle axes and try to compensate by backing off the component weightings. if (fabs(m_principle_axis[0]) >= .795f) perceptual_weights.set(.424f, .6f, .072f); else if (fabs(m_principle_axis[2]) >= .795f) @@ -1241,6 +1261,7 @@ namespace crnlib } } + // Find bounds of projection onto (potentially skewed) principle axis. float l = 1e+9; float h = -1e+9; @@ -1256,6 +1277,7 @@ namespace crnlib if (!low_color.is_within_bounds(0.0f, 1.0f)) { + // Low color is outside the lattice, so bring it back in by casting a ray. vec3F coord; float t; aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); @@ -1266,6 +1288,7 @@ namespace crnlib if (!high_color.is_within_bounds(0.0f, 1.0f)) { + // High color is outside the lattice, so bring it back in by casting a ray. vec3F coord; float t; aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); @@ -1274,6 +1297,7 @@ namespace crnlib high_color = coord; } + // Now optimize the endpoints using the projection bounds on the (potentially skewed) principle axis as a starting point. if (!optimize_endpoints(low_color, high_color)) return false; @@ -1286,6 +1310,7 @@ namespace crnlib return true; } + // Tries quantizing the block to 4 colors using vanilla LBG. It tries all combinations of the quantized results as potential endpoints. bool dxt1_endpoint_optimizer::try_median4(const vec3F& low_color, const vec3F& high_color) { vec3F means[4]; @@ -1408,6 +1433,8 @@ namespace crnlib return improved; } + // Given candidate low/high endpoints, find the optimal selectors for 3 and 4 color blocks, compute the resulting error, + // and use the candidate if it results in less error than the best found result so far. bool dxt1_endpoint_optimizer::evaluate_solution( const dxt1_solution_coordinates& coords, bool early_out, @@ -1428,6 +1455,7 @@ namespace crnlib CRNLIB_ASSERT(m_trial_solution.m_valid); + // Caller has requested all considered candidate solutions for later analysis. m_pSolutions->resize(m_pSolutions->size() + 1); solution& new_solution = m_pSolutions->back(); new_solution.m_selectors.resize(m_pParams->m_num_pixels); @@ -1843,6 +1871,8 @@ namespace crnlib return unique_color(res, 1); } + // The block may have been already compressed using another DXTc compressor, such as squish, ATI_Compress, ryg_dxt, etc. + // Attempt to recover the endpoints used by that block compressor. void dxt1_endpoint_optimizer::try_combinatorial_encoding() { if ((m_unique_colors.size() < 2) || (m_unique_colors.size() > 4)) @@ -1954,6 +1984,8 @@ namespace crnlib return; } + // The fourth (transparent) color in 3 color "transparent" blocks is black, which can be optionally exploited for small gains in DXT1 mode if the caller + // doesn't actually use alpha. (But not in DXT5 mode, because 3-color blocks aren't permitted by GPU's for DXT5.) bool dxt1_endpoint_optimizer::try_alpha_as_black_optimization() { const params* pOrig_params = m_pParams; @@ -2077,6 +2109,7 @@ namespace crnlib return true; } + // Build array of unique colors and their weights. void dxt1_endpoint_optimizer::find_unique_colors() { m_has_transparent_pixels = false; diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp index 33b309f..0c1cd39 100644 --- a/crnlib/crn_dxt_hc.cpp +++ b/crnlib/crn_dxt_hc.cpp @@ -2504,7 +2504,7 @@ namespace crnlib { const chunk_tile_desc &tile_desc = encoding_desc.m_tiles[t]; - img.draw_box( + img.unclipped_fill_box( x*8 + tile_desc.m_x_ofs, y*8 + tile_desc.m_y_ofs, tile_desc.m_width + 1, tile_desc.m_height + 1, color_quad_u8(128, 128, 128, 255)); } diff --git a/crnlib/crn_dxt_image.cpp b/crnlib/crn_dxt_image.cpp index 7e3e225..e6bd8bb 100644 --- a/crnlib/crn_dxt_image.cpp +++ b/crnlib/crn_dxt_image.cpp @@ -19,6 +19,10 @@ #include "..\ext\ATI_Compress\ATI_Compress.h" #endif +#include "crn_rg_etc1.h" +#include "crn_etc.h" +#define CRNLIB_USE_RG_ETC1 1 + namespace crnlib { dxt_image::dxt_image() : @@ -103,12 +107,13 @@ namespace crnlib m_blocks_y = (m_height + 3) >> cDXTBlockShift; m_num_elements_per_block = 2; - if ((fmt == cDXT1) || (fmt == cDXT1A) || (fmt == cDXT5A)) + if ((fmt == cDXT1) || (fmt == cDXT1A) || (fmt == cDXT5A) || (fmt == cETC1)) m_num_elements_per_block = 1; m_total_blocks = m_blocks_x * m_blocks_y; m_total_elements = m_total_blocks * m_num_elements_per_block; + CRNLIB_ASSUME((uint)cDXT1BytesPerBlock == (uint)cETC1BytesPerBlock); m_bytes_per_block = cDXT1BytesPerBlock * m_num_elements_per_block; m_format = fmt; @@ -118,48 +123,54 @@ namespace crnlib case cDXT1: case cDXT1A: { - m_element_type[0] = cColor; + m_element_type[0] = cColorDXT1; m_element_component_index[0] = -1; break; } case cDXT3: { - m_element_type[0] = cAlpha3; - m_element_type[1] = cColor; + m_element_type[0] = cAlphaDXT3; + m_element_type[1] = cColorDXT1; m_element_component_index[0] = 3; m_element_component_index[1] = -1; break; } case cDXT5: { - m_element_type[0] = cAlpha5; - m_element_type[1] = cColor; + m_element_type[0] = cAlphaDXT5; + m_element_type[1] = cColorDXT1; m_element_component_index[0] = 3; m_element_component_index[1] = -1; break; } case cDXT5A: { - m_element_type[0] = cAlpha5; + m_element_type[0] = cAlphaDXT5; m_element_component_index[0] = 3; break; } case cDXN_XY: { - m_element_type[0] = cAlpha5; - m_element_type[1] = cAlpha5; + m_element_type[0] = cAlphaDXT5; + m_element_type[1] = cAlphaDXT5; m_element_component_index[0] = 0; m_element_component_index[1] = 1; break; } case cDXN_YX: { - m_element_type[0] = cAlpha5; - m_element_type[1] = cAlpha5; + m_element_type[0] = cAlphaDXT5; + m_element_type[1] = cAlphaDXT5; m_element_component_index[0] = 1; m_element_component_index[1] = 0; break; } + case cETC1: + { + m_element_type[0] = cColorETC1; + m_element_component_index[0] = -1; + break; + } default: { CRNLIB_ASSERT(0); @@ -231,8 +242,7 @@ namespace crnlib uint block_index = 0; - dxt1_endpoint_optimizer dxt1_optimizer; - dxt5_endpoint_optimizer dxt5_optimizer; + set_block_pixels_context optimizer_context; int prev_progress_percentage = -1; for (uint block_y = 0; block_y < m_blocks_y; block_y++) @@ -280,7 +290,7 @@ namespace crnlib } } - set_block_pixels(block_x, block_y, pixels, p, dxt1_optimizer, dxt5_optimizer); + set_block_pixels(block_x, block_y, pixels, p, optimizer_context); } } } @@ -430,6 +440,7 @@ namespace crnlib for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) pixels[i].set(0, 0, 0, 255); + bool all_blocks_valid = true; for (uint block_y = 0; block_y < m_blocks_y; block_y++) { const uint pixel_ofs_y = block_y * cDXTBlockSize; @@ -437,7 +448,8 @@ namespace crnlib for (uint block_x = 0; block_x < m_blocks_x; block_x++) { - get_block_pixels(block_x, block_y, pixels); + if (!get_block_pixels(block_x, block_y, pixels)) + all_blocks_valid = false; const uint pixel_ofs_x = block_x * cDXTBlockSize; @@ -457,6 +469,9 @@ namespace crnlib } } + if (!all_blocks_valid) + console::error("dxt_image::unpack: One or more invalid blocks encountered!"); + img.reset_comp_flags(); img.set_component_valid(0, false); img.set_component_valid(1, false); @@ -542,7 +557,53 @@ namespace crnlib { switch (m_element_type[element_index]) { - case cColor: + case cColorETC1: + { + const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint table_index0 = block.get_inten_table(0); + const uint table_index1 = block.get_inten_table(1); + color_quad_u8 subblock_colors0[4], subblock_colors1[4]; + + if (diff_flag) + { + const uint16 base_color5 = block.get_base5_color(); + const uint16 delta_color3 = block.get_delta3_color(); + etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1); + } + else + { + const uint16 base_color4_0 = block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + const uint16 base_color4_1 = block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + const uint bx = x & 3; + const uint by = y & 3; + + const uint selector_index = block.get_selector(bx, by); + if (flip_flag) + { + if (by <= 2) + result = subblock_colors0[selector_index]; + else + result = subblock_colors1[selector_index]; + } + else + { + if (bx <= 2) + result = subblock_colors0[selector_index]; + else + result = subblock_colors1[selector_index]; + } + + break; + } + case cColorDXT1: { const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); @@ -584,7 +645,7 @@ namespace crnlib break; } - case cAlpha5: + case cAlphaDXT5: { const int comp_index = m_element_component_index[element_index]; @@ -626,7 +687,7 @@ namespace crnlib break; } - case cAlpha3: + case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; @@ -652,7 +713,7 @@ namespace crnlib switch (m_element_type[element_index]) { - case cColor: + case cColorDXT1: { if (m_format <= cDXT1A) { @@ -675,7 +736,7 @@ namespace crnlib break; } - case cAlpha5: + case cAlphaDXT5: { const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); @@ -713,7 +774,7 @@ namespace crnlib } } } - case cAlpha3: + case cAlphaDXT3: { const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); @@ -738,7 +799,63 @@ namespace crnlib { switch (m_element_type[element_index]) { - case cColor: + case cColorETC1: + { + etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint table_index0 = block.get_inten_table(0); + const uint table_index1 = block.get_inten_table(1); + color_quad_u8 subblock_colors0[4], subblock_colors1[4]; + + if (diff_flag) + { + const uint16 base_color5 = block.get_base5_color(); + const uint16 delta_color3 = block.get_delta3_color(); + etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1); + } + else + { + const uint16 base_color4_0 = block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + const uint16 base_color4_1 = block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + const uint bx = x & 3; + const uint by = y & 3; + + color_quad_u8* pColors = subblock_colors1; + if (flip_flag) + { + if (by <= 2) + pColors = subblock_colors0; + } + else + { + if (bx <= 2) + pColors = subblock_colors0; + } + + uint best_error = UINT_MAX; + uint best_selector = 0; + + for (uint i = 0; i < 4; i++) + { + uint error = color::color_distance(perceptual, pColors[i], c, false); + if (error < best_error) + { + best_error = error; + best_selector = i; + } + } + + block.set_selector(bx, by, best_selector); + break; + } + case cColorDXT1: { dxt1_block* pDXT1_block = reinterpret_cast(pElement); @@ -767,7 +884,7 @@ namespace crnlib break; } - case cAlpha5: + case cAlphaDXT5: { dxt5_block* pDXT5_block = reinterpret_cast(pElement); @@ -794,7 +911,7 @@ namespace crnlib break; } - case cAlpha3: + case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; @@ -809,15 +926,30 @@ namespace crnlib } // element_index } - void dxt_image::get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const + bool dxt_image::get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const { + bool success = true; const element* pElement = &get_element(block_x, block_y, 0); for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { switch (m_element_type[element_index]) { - case cColor: + case cColorETC1: + { + const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + // Preserve alpha if the format is something weird (like ETC1 for color and DXT5A for alpha) - which isn't currently supported. +#if CRNLIB_USE_RG_ETC1 + if (!rg_etc1::unpack_etc1_block(&block, (uint32*)pPixels, m_format != cETC1)) + success = false; +#else + if (!unpack_etc1(block, pPixels, m_format != cETC1)) + success = false; +#endif + + break; + } + case cColorDXT1: { const dxt1_block* pDXT1_block = reinterpret_cast(pElement); @@ -838,7 +970,7 @@ namespace crnlib break; } - case cAlpha5: + case cAlphaDXT5: { const dxt5_block* pDXT5_block = reinterpret_cast(pElement); @@ -856,7 +988,7 @@ namespace crnlib break; } - case cAlpha3: + case cAlphaDXT3: { const dxt3_block* pDXT3_block = reinterpret_cast(pElement); @@ -874,21 +1006,53 @@ namespace crnlib default: break; } } // element_index + return success; } void dxt_image::set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p) { - dxt1_endpoint_optimizer dxt1_optimizer; - dxt5_endpoint_optimizer dxt5_optimizer; - set_block_pixels(block_x, block_y, pPixels, p, dxt1_optimizer, dxt5_optimizer); + set_block_pixels_context context; + set_block_pixels(block_x, block_y, pPixels, p, context); } void dxt_image::set_block_pixels( uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, - dxt1_endpoint_optimizer& dxt1_optimizer, dxt5_endpoint_optimizer& dxt5_optimizer) + set_block_pixels_context& context) { element* pElement = &get_element(block_x, block_y, 0); + if (m_format == cETC1) + { + etc1_block &dst_block = *reinterpret_cast(pElement); + +#if CRNLIB_USE_RG_ETC1 + rg_etc1::etc1_quality etc_quality = rg_etc1::cHighQuality; + if (p.m_quality <= cCRNDXTQualityFast) + etc_quality = rg_etc1::cLowQuality; + else if (p.m_quality <= cCRNDXTQualityNormal) + etc_quality = rg_etc1::cMediumQuality; + + rg_etc1::etc1_pack_params pack_params; + pack_params.m_dithering = p.m_dithering; + //pack_params.m_perceptual = p.m_perceptual; + pack_params.m_quality = etc_quality; + rg_etc1::pack_etc1_block(&dst_block, (uint32*)pPixels, pack_params); +#else + crn_etc_quality etc_quality = cCRNETCQualitySlow; + if (p.m_quality <= cCRNDXTQualityFast) + etc_quality = cCRNETCQualityFast; + else if (p.m_quality <= cCRNDXTQualityNormal) + etc_quality = cCRNETCQualityMedium; + + crn_etc1_pack_params pack_params; + pack_params.m_perceptual = p.m_perceptual; + pack_params.m_quality = etc_quality; + pack_params.m_dithering = p.m_dithering; + + pack_etc1_block(dst_block, pPixels, pack_params, context.m_etc1_optimizer); +#endif + } + else #if CRNLIB_SUPPORT_SQUISH if ((p.m_compressor == cCRNDXTCompressorSquish) && ((m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cDXT5A))) { @@ -963,21 +1127,21 @@ namespace crnlib { switch (m_element_type[element_index]) { - case cColor: + case cColorDXT1: { dxt1_block* pDXT1_block = reinterpret_cast(pElement); dxt_fast::compress_color_block(pDXT1_block, pPixels, p.m_quality >= cCRNDXTQualityNormal); break; } - case cAlpha5: + case cAlphaDXT5: { dxt5_block* pDXT5_block = reinterpret_cast(pElement); dxt_fast::compress_alpha_block(pDXT5_block, pPixels, m_element_component_index[element_index]); break; } - case cAlpha3: + case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; @@ -994,11 +1158,14 @@ namespace crnlib } else { + dxt1_endpoint_optimizer& dxt1_optimizer = context.m_dxt1_optimizer; + dxt5_endpoint_optimizer& dxt5_optimizer = context.m_dxt5_optimizer; + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { switch (m_element_type[element_index]) { - case cColor: + case cColorDXT1: { dxt1_block* pDXT1_block = reinterpret_cast(pElement); @@ -1050,7 +1217,7 @@ namespace crnlib break; } - case cAlpha5: + case cAlphaDXT5: { dxt5_block* pDXT5_block = reinterpret_cast(pElement); @@ -1081,7 +1248,7 @@ namespace crnlib break; } - case cAlpha3: + case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; @@ -1104,7 +1271,23 @@ namespace crnlib switch (m_element_type[element_index]) { - case cColor: + case cColorETC1: + { + const etc1_block& src_block = *reinterpret_cast(&block); + if (src_block.get_diff_bit()) + { + packed_low_endpoint = src_block.get_base5_color(); + packed_high_endpoint = src_block.get_delta3_color(); + } + else + { + packed_low_endpoint = src_block.get_base4_color(0); + packed_high_endpoint = src_block.get_base4_color(1); + } + + break; + } + case cColorDXT1: { const dxt1_block& block1 = *reinterpret_cast(&block); @@ -1113,7 +1296,7 @@ namespace crnlib break; } - case cAlpha5: + case cAlphaDXT5: { const dxt5_block& block5 = *reinterpret_cast(&block); @@ -1122,7 +1305,7 @@ namespace crnlib break; } - case cAlpha3: + case cAlphaDXT3: { packed_low_endpoint = 0; packed_high_endpoint = 255; @@ -1140,7 +1323,23 @@ namespace crnlib switch (m_element_type[element_index]) { - case cColor: + case cColorETC1: + { + const etc1_block& src_block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + if (src_block.get_diff_bit()) + { + low_endpoint = etc1_block::unpack_color5(static_cast(l), scaled); + etc1_block::unpack_color5(high_endpoint, static_cast(l), static_cast(h), scaled); + } + else + { + low_endpoint = etc1_block::unpack_color4(static_cast(l), scaled); + high_endpoint = etc1_block::unpack_color4(static_cast(h), scaled); + } + + return -1; + } + case cColorDXT1: { uint r, g, b; @@ -1156,7 +1355,7 @@ namespace crnlib return -1; } - case cAlpha5: + case cAlphaDXT5: { const int component = m_element_component_index[element_index]; @@ -1165,7 +1364,7 @@ namespace crnlib return component; } - case cAlpha3: + case cAlphaDXT3: { const int component = m_element_component_index[element_index]; @@ -1180,18 +1379,48 @@ namespace crnlib return 0; } - uint dxt_image::get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors) + uint dxt_image::get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors, uint subblock_index) { const element& block = get_element(block_x, block_y, element_index); switch (m_element_type[element_index]) { - case cColor: + case cColorETC1: + { + const etc1_block& src_block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); + const uint table_index0 = src_block.get_inten_table(0); + const uint table_index1 = src_block.get_inten_table(1); + if (src_block.get_diff_bit()) + { + const uint16 base_color5 = src_block.get_base5_color(); + const uint16 delta_color3 = src_block.get_delta3_color(); + if (subblock_index) + etc1_block::get_diff_subblock_colors(pColors, base_color5, delta_color3, table_index1); + else + etc1_block::get_diff_subblock_colors(pColors, base_color5, table_index0); + } + else + { + if (subblock_index) + { + const uint16 base_color4_1 = src_block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(pColors, base_color4_1, table_index1); + } + else + { + const uint16 base_color4_0 = src_block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(pColors, base_color4_0, table_index0); + } + } + + break; + } + case cColorDXT1: { const dxt1_block& block1 = *reinterpret_cast(&block); return dxt1_block::get_block_colors(pColors, static_cast(block1.get_low_color()), static_cast(block1.get_high_color())); } - case cAlpha5: + case cAlphaDXT5: { const dxt5_block& block5 = *reinterpret_cast(&block); @@ -1205,7 +1434,7 @@ namespace crnlib return n; } - case cAlpha3: + case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; for (uint i = 0; i < 16; i++) @@ -1219,6 +1448,32 @@ namespace crnlib return 0; } + uint dxt_image::get_subblock_index(uint x, uint y, uint element_index) const + { + if (m_element_type[element_index] != cColorETC1) + return 0; + + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; + + const element& block = get_element(block_x, block_y, element_index); + + const etc1_block& src_block = *reinterpret_cast(&block); + if (src_block.get_flip_bit()) + { + return ((y & 3) >= 2) ? 1 : 0; + } + else + { + return ((x & 3) >= 2) ? 1 : 0; + } + } + + uint dxt_image::get_total_subblocks(uint element_index) const + { + return (m_element_type[element_index] == cColorETC1) ? 2 : 0; + } + uint dxt_image::get_selector(uint x, uint y, uint element_index) const { CRNLIB_ASSERT((x < m_width) && (y < m_height)); @@ -1230,17 +1485,22 @@ namespace crnlib switch (m_element_type[element_index]) { - case cColor: + case cColorETC1: + { + const etc1_block& src_block = *reinterpret_cast(&block); + return src_block.get_selector(x & 3, y & 3); + } + case cColorDXT1: { const dxt1_block& block1 = *reinterpret_cast(&block); return block1.get_selector(x & 3, y & 3); } - case cAlpha5: + case cAlphaDXT5: { const dxt5_block& block5 = *reinterpret_cast(&block); return block5.get_selector(x & 3, y & 3); } - case cAlpha3: + case cAlphaDXT3: { const dxt3_block& block3 = *reinterpret_cast(&block); return block3.get_alpha(x & 3, y & 3, false); @@ -1257,6 +1517,165 @@ namespace crnlib m_format = cDXT1A; } + void dxt_image::flip_col(uint x) + { + const uint other_x = (m_blocks_x - 1) - x; + for (uint y = 0; y < m_blocks_y; y++) + { + for (uint e = 0; e < get_elements_per_block(); e++) + { + element tmp[2] = { get_element(x, y, e), get_element(other_x, y, e) }; + + for (uint i = 0; i < 2; i++) + { + switch (get_element_type(e)) + { + case cColorDXT1: reinterpret_cast(&tmp[i])->flip_x(); break; + case cAlphaDXT3: reinterpret_cast(&tmp[i])->flip_x(); break; + case cAlphaDXT5: reinterpret_cast(&tmp[i])->flip_x(); break; + default: CRNLIB_ASSERT(0); break; + } + } + + get_element(x, y, e) = tmp[1]; + get_element(other_x, y, e) = tmp[0]; + } + } + } + + void dxt_image::flip_row(uint y) + { + const uint other_y = (m_blocks_y - 1) - y; + for (uint x = 0; x < m_blocks_x; x++) + { + for (uint e = 0; e < get_elements_per_block(); e++) + { + element tmp[2] = { get_element(x, y, e), get_element(x, other_y, e) }; + + for (uint i = 0; i < 2; i++) + { + switch (get_element_type(e)) + { + case cColorDXT1: reinterpret_cast(&tmp[i])->flip_y(); break; + case cAlphaDXT3: reinterpret_cast(&tmp[i])->flip_y(); break; + case cAlphaDXT5: reinterpret_cast(&tmp[i])->flip_y(); break; + default: CRNLIB_ASSERT(0); break; + } + } + + get_element(x, y, e) = tmp[1]; + get_element(x, other_y, e) = tmp[0]; + } + } + } + + bool dxt_image::can_flip(uint axis_index) + { + if (m_format == cETC1) + { + // Can't reliably flip ETC1 textures (because of asymmetry in the 555/333 differential coding of subblock colors). + return false; + } + + uint d; + if (axis_index) + d = m_height; + else + d = m_width; + + if (d & 3) + { + if (d > 4) + return false; + } + + return true; + } + + bool dxt_image::flip_x() + { + if (m_format == cETC1) + { + // Can't reliably flip ETC1 textures (because of asymmetry in the 555/333 differential coding of subblock colors). + return false; + } + + if ((m_width & 3) && (m_width > 4)) + return false; + + if (m_width == 1) + return true; + + const uint mid_x = m_blocks_x / 2; + + for (uint x = 0; x < mid_x; x++) + flip_col(x); + + if (m_blocks_x & 1) + { + const uint w = math::minimum(m_width, 4U); + for (uint y = 0; y < m_blocks_y; y++) + { + for (uint e = 0; e < get_elements_per_block(); e++) + { + element tmp(get_element(mid_x, y, e)); + switch (get_element_type(e)) + { + case cColorDXT1: reinterpret_cast(&tmp)->flip_x(w, 4); break; + case cAlphaDXT3: reinterpret_cast(&tmp)->flip_x(w, 4); break; + case cAlphaDXT5: reinterpret_cast(&tmp)->flip_x(w, 4); break; + default: CRNLIB_ASSERT(0); break; + } + get_element(mid_x, y, e) = tmp; + } + } + } + + return true; + } + + bool dxt_image::flip_y() + { + if (m_format == cETC1) + { + // Can't reliably flip ETC1 textures (because of asymmetry in the 555/333 differential coding of subblock colors). + return false; + } + + if ((m_height & 3) && (m_height > 4)) + return false; + + if (m_height == 1) + return true; + + const uint mid_y = m_blocks_y / 2; + + for (uint y = 0; y < mid_y; y++) + flip_row(y); + + if (m_blocks_y & 1) + { + const uint h = math::minimum(m_height, 4U); + for (uint x = 0; x < m_blocks_x; x++) + { + for (uint e = 0; e < get_elements_per_block(); e++) + { + element tmp(get_element(x, mid_y, e)); + switch (get_element_type(e)) + { + case cColorDXT1: reinterpret_cast(&tmp)->flip_y(4, h); break; + case cAlphaDXT3: reinterpret_cast(&tmp)->flip_y(4, h); break; + case cAlphaDXT5: reinterpret_cast(&tmp)->flip_y(4, h); break; + default: CRNLIB_ASSERT(0); break; + } + get_element(x, mid_y, e) = tmp; + } + } + } + + return true; + } + } // namespace crnlib diff --git a/crnlib/crn_dxt_image.h b/crnlib/crn_dxt_image.h index 6be139b..50b4838 100644 --- a/crnlib/crn_dxt_image.h +++ b/crnlib/crn_dxt_image.h @@ -3,6 +3,10 @@ #pragma once #include "crn_dxt1.h" #include "crn_dxt5a.h" +#include "crn_etc.h" +#if CRNLIB_SUPPORT_ETC_A1 +#include "crn_etc_a1.h" +#endif #include "crn_image.h" #define CRNLIB_SUPPORT_ATI_COMPRESS 0 @@ -34,7 +38,7 @@ namespace crnlib dxt_format get_format() const { return m_format; } - bool has_color() const { return (m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5); } + bool has_color() const { return (m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cETC1); } // Will be pretty slow if the image is DXT1, as this method scans for alpha blocks/selectors. bool has_alpha() const; @@ -43,10 +47,12 @@ namespace crnlib { cUnused = 0, - cColor, + cColorDXT1, // DXT1 color block - cAlpha3, - cAlpha5, + cAlphaDXT3, // DXT3 alpha block (only) + cAlphaDXT5, // DXT5 alpha block (only) + + cColorETC1, // ETC1 color block }; element_type get_element_type(uint element_index) const { CRNLIB_ASSERT(element_index < m_num_elements_per_block); return m_element_type[element_index]; } @@ -86,6 +92,7 @@ namespace crnlib { m_quality = cCRNDXTQualityUber; m_perceptual = true; + m_dithering = false; m_grayscale_sampling = false; m_use_both_block_types = true; m_endpoint_caching = true; @@ -125,6 +132,7 @@ namespace crnlib crn_dxt_compressor_type m_compressor; bool m_perceptual; + bool m_dithering; bool m_grayscale_sampling; bool m_use_both_block_types; bool m_endpoint_caching; @@ -148,7 +156,7 @@ namespace crnlib void endian_swap(); - uint get_num_elements() const { return m_elements.size(); } + uint get_total_elements() const { return m_elements.size(); } const element_vec& get_element_vec() const { return m_elements; } element_vec& get_element_vec() { return m_elements; } @@ -168,9 +176,19 @@ namespace crnlib void set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual = true); // get_block_pixels() only sets those components stored in the image! - void get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const; + bool get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const; + + struct set_block_pixels_context + { + dxt1_endpoint_optimizer m_dxt1_optimizer; + dxt5_endpoint_optimizer m_dxt5_optimizer; + pack_etc1_block_context m_etc1_optimizer; +#if CRNLIB_SUPPORT_ETC_A1 + etc_a1::pack_etc1_block_context m_etc1_a1_optimizer; +#endif + }; - void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, dxt1_endpoint_optimizer& dxt1_optimizer, dxt5_endpoint_optimizer& dxt5_optimizer); + void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, set_block_pixels_context& context); void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p); void get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const; @@ -181,11 +199,20 @@ namespace crnlib // pColors should point to a 16 entry array, to handle DXT3. // Returns the number of block colors: 3, 4, 6, 8, or 16. - uint get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors); + uint get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors, uint subblock_index = 0); + + uint get_subblock_index(uint x, uint y, uint element_index) const; + uint get_total_subblocks(uint element_index) const; uint get_selector(uint x, uint y, uint element_index) const; void change_dxt1_to_dxt1a(); + + bool can_flip(uint axis_index); + + // Returns true if the texture can actually be flipped. + bool flip_x(); + bool flip_y(); private: element_vec m_elements; @@ -205,7 +232,7 @@ namespace crnlib int8 m_element_component_index[2]; element_type m_element_type[2]; - dxt_format m_format; // DXT1, 1A, 3, 5, N/3DC, or A + dxt_format m_format; // DXT1, 1A, 3, 5, N/3DC, or 5A bool init_internal(dxt_format fmt, uint width, uint height); void init_task(uint64 data, void* pData_ptr); @@ -213,6 +240,9 @@ namespace crnlib #if CRNLIB_SUPPORT_ATI_COMPRESS bool init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p); #endif + + void flip_col(uint x); + void flip_row(uint y); }; } // namespace crnlib diff --git a/crnlib/crn_etc.cpp b/crnlib/crn_etc.cpp new file mode 100644 index 0000000..86937da --- /dev/null +++ b/crnlib/crn_etc.cpp @@ -0,0 +1,1481 @@ +// File: crn_etc.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_etc.h" +#include "crn_radix_sort.h" +#include "crn_ryg_dxt.hpp" + +namespace crnlib +{ + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = + { + { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } + }; + + const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + // [flip][subblock][pixel_index] + const etc1_coord2 g_etc1_pixel_coords[2][2][8] = + { + { + { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + }, + { + { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + } + }, + { + { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + }, + { + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + }, + } + }; + + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. + static uint16 g_etc1_inverse_lookup[2*8*4][256]; // [diff/inten_table/selector][desired_color] + + // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. + // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) + static const uint16 g_color8_to_etc_block_config_0_255[2][33] = + { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, + 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, + 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, + }; + + // Really only [254][11]. + static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = + { + { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E, + 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, { + 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306, + 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112, + 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707, + 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605, + 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF + }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214, + 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, { + 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D, + 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805, + 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, + 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, { + 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521, + 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, + 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, { + 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, + 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917, + 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725, + 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, { + 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437, + 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, { + 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, + 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, { + 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F, + 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, { + 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434, + 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21, + 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E, + 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, { + 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, { + 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, + 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B, + 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, { + 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, + 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536, + 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, + 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, { + 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF + }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031, + 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, { + 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, + 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, + 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832, + 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D, + 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, + 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF + }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, { + 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, + 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF + }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, { + 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, { + 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905, + 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, + 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621, + 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919, + 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F, + 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A, + 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, { + 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913, + 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, { + 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20, + 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, { + 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, { + 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18, + 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929, + 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF + }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E, + 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01, + 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B, + 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34, + 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11, + 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF }, + }; + + uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) + { + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = math::minimum(r, 31U); + g = math::minimum(g, 31U); + b = math::minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 10U)); + } + + color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) + { + uint b = packed_color5 & 31U; + uint g = (packed_color5 >> 5U) & 31U; + uint r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); + } + + void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) + { + color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { + color_quad_i16 dc(unpack_delta3(packed_delta3)); + + int b = (packed_color5 & 31U) + dc.b; + int g = ((packed_color5 >> 5U) & 31U) + dc.g; + int r = ((packed_color5 >> 10U) & 31U) + dc.r; + + bool success = true; + if (static_cast(r | g | b) > 31U) + { + success = false; + r = math::clamp(r, 0, 31); + g = math::clamp(g, 0, 31); + b = math::clamp(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, math::minimum(alpha, 255U)); + return success; + } + + bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { + color_quad_u8 result; + const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); + r = result.r; + g = result.g; + b = result.b; + return success; + } + + uint16 etc1_block::pack_delta3(const color_quad_i16& color) + { + return pack_delta3(color.r, color.g, color.b); + } + + uint16 etc1_block::pack_delta3(int r, int g, int b) + { + CRNLIB_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + CRNLIB_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + CRNLIB_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast(b | (g << 3) | (r << 6)); + } + + color_quad_i16 etc1_block::unpack_delta3(uint16 packed_delta3) + { + int r = (packed_delta3 >> 6) & 7; + int g = (packed_delta3 >> 3) & 7; + int b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + return color_quad_i16(r, g, b, 0); + } + + void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } + + uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } + + r = math::minimum(r, 15U); + g = math::minimum(g, 15U); + b = math::minimum(b, 15U); + + return static_cast(b | (g << 4U) | (r << 8U)); + } + + color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) + { + uint b = packed_color4 & 15U; + uint g = (packed_color4 >> 4U) & 15U; + uint r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); + } + + void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) + { + color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) + { + CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + unpack_color5(r, g, b, packed_color5, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + } + + bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) + { + CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + + return success; + } + + void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) + { + CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + unpack_color4(r, g, b, packed_color4, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + } + + bool unpack_etc1(const etc1_block& block, color_quad_u8 *pDst, bool preserve_alpha) + { + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint table_index0 = block.get_inten_table(0); + const uint table_index1 = block.get_inten_table(1); + + color_quad_u8 subblock_colors0[4]; + color_quad_u8 subblock_colors1[4]; + bool success = true; + + if (diff_flag) + { + const uint16 base_color5 = block.get_base5_color(); + const uint16 delta_color3 = block.get_delta3_color(); + etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + + if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + success = false; + } + else + { + const uint16 base_color4_0 = block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + + const uint16 base_color4_1 = block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + if (preserve_alpha) + { + if (flip_flag) + { + for (uint y = 0; y < 2; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); + pDst += 4; + } + + for (uint y = 2; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + else + { + for (uint y = 0; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + } + else + { + if (flip_flag) + { + // 0000 + // 0000 + // 1111 + // 1111 + for (uint y = 0; y < 2; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors0[block.get_selector(2, y)]; + pDst[3] = subblock_colors0[block.get_selector(3, y)]; + pDst += 4; + } + + for (uint y = 2; y < 4; y++) + { + pDst[0] = subblock_colors1[block.get_selector(0, y)]; + pDst[1] = subblock_colors1[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + else + { + // 0011 + // 0011 + // 0011 + // 0011 + for (uint y = 0; y < 4; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + } + + return success; + } + + bool etc1_optimizer::compute() + { + const uint n = m_pParams->m_num_src_pixels; + const int scan_delta_size = m_pParams->m_scan_delta_size; + + // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. + // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { + const int zd = m_pParams->m_pScan_deltas[zdi]; + const int mbb = m_bb + zd; + if (mbb < 0) continue; else if (mbb > m_limit) break; + + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { + const int yd = m_pParams->m_pScan_deltas[ydi]; + const int mbg = m_bg + yd; + if (mbg < 0) continue; else if (mbg > m_limit) break; + + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { + const int xd = m_pParams->m_pScan_deltas[xdi]; + const int mbr = m_br + xd; + if (mbr < 0) continue; else if (mbr > m_limit) break; + + etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); + if (m_pParams->m_quality == cCRNETCQualitySlow) + { + if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + continue; + } + else + { + if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) + continue; + } + + // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. + // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: + // The goal is: + // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 + // Rearranging this: + // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 + // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 + // So what this means: + // optimal_block_color = avg_input - avg_inten_delta + // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. + // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. + // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. + + const uint max_refinement_trials = (m_pParams->m_quality == cCRNETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); + for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { + const uint8* pSelectors = m_best_solution.m_selectors.get_ptr(); + const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); + for (uint r = 0; r < n; r++) + { + const uint s = *pSelectors++; + const int yd = pInten_table[s]; + // Compute actual delta being applied to each pixel, taking into account clamping. + delta_sum_r += math::clamp(base_color.r + yd, 0, 255) - base_color.r; + delta_sum_g += math::clamp(base_color.g + yd, 0, 255) - base_color.g; + delta_sum_b += math::clamp(base_color.b + yd, 0, 255) - base_color.b; + } + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + break; + const float avg_delta_r_f = static_cast(delta_sum_r) / n; + const float avg_delta_g_f = static_cast(delta_sum_g) / n; + const float avg_delta_b_f = static_cast(delta_sum_b) / n; + const int br1 = math::clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = math::clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = math::clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + + bool skip = false; + + if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) + skip = true; + else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) + skip = true; + else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) + skip = true; + + if (skip) + break; + + etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); + if (m_pParams->m_quality == cCRNETCQualitySlow) + { + if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) + break; + } + else + { + if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) + break; + } + + } // refinement_trial + + } // xdi + } // ydi + } // zdi + + if (!m_best_solution.m_valid) + { + m_pResult->m_error = cUINT32_MAX; + return false; + } + + const uint8* pSelectors = m_best_solution.m_selectors.get_ptr(); + +#ifdef CRNLIB_BUILD_DEBUG + { + color_quad_u8 block_colors[4]; + m_best_solution.m_coords.get_block_colors(block_colors); + + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + uint64 actual_error = 0; + for (uint i = 0; i < n; i++) + actual_error += color::elucidian_distance(pSrc_pixels[i], block_colors[pSelectors[i]], false); + + CRNLIB_ASSERT(actual_error == m_best_solution.m_error); + } +#endif + + m_pResult->m_error = m_best_solution.m_error; + + m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; + m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + + m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; + memcpy(m_pResult->m_pSelectors, pSelectors, n); + m_pResult->m_n = n; + + return true; + } + + void etc1_optimizer::init(const params& params, results& result) + { + m_pParams = ¶ms; + m_pResult = &result; + + const uint n = m_pParams->m_num_src_pixels; + + m_selectors.resize(n); + m_best_selectors.resize(n); + m_temp_selectors.resize(n); + m_trial_solution.m_selectors.resize(n); + m_best_solution.m_selectors.resize(n); + + m_limit = m_pParams->m_use_color4 ? 15 : 31; + + vec3F avg_color(0.0f); + + m_luma.resize(n); + m_sorted_luma[0].resize(n); + m_sorted_luma[1].resize(n); + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; + const vec3F fc(c.r, c.g, c.b); + + avg_color += fc; + + m_luma[i] = static_cast(c.r + c.g + c.b); + m_sorted_luma[0][i] = i; + } + avg_color /= static_cast(n); + m_avg_color = avg_color; + + m_br = math::clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); + m_bg = math::clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); + m_bb = math::clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + + if (m_pParams->m_quality <= cCRNETCQualityMedium) + { + m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0].get_ptr(), m_sorted_luma[1].get_ptr(), m_luma.get_ptr(), 0, sizeof(m_luma[0]), false); + m_pSorted_luma = m_sorted_luma[0].get_ptr(); + if (m_pSorted_luma_indices == m_sorted_luma[0].get_ptr()) + m_pSorted_luma = m_sorted_luma[1].get_ptr(); + + for (uint i = 0; i < n; i++) + m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } + + m_best_solution.m_coords.clear(); + m_best_solution.m_valid = false; + m_best_solution.m_error = cUINT64_MAX; + } + + bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + trial_solution.m_valid = false; + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; + const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; + const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + + if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + return false; + } + + const color_quad_u8 base_color(coords.get_scaled_color()); + + const uint n = m_pParams->m_num_src_pixels; + CRNLIB_ASSERT(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = cUINT64_MAX; + + for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + color_quad_u8 block_colors[4]; + for (uint s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); + } + + uint64 total_error = 0; + + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + for (uint c = 0; c < n; c++) + { + const color_quad_u8& src_pixel = *pSrc_pixels++; + + uint best_selector_index = 0; + uint best_error = math::square(src_pixel.r - block_colors[0].r) + math::square(src_pixel.g - block_colors[0].g) + math::square(src_pixel.b - block_colors[0].b); + + uint trial_error = math::square(src_pixel.r - block_colors[1].r) + math::square(src_pixel.g - block_colors[1].g) + math::square(src_pixel.b - block_colors[1].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 1; + } + + trial_error = math::square(src_pixel.r - block_colors[2].r) + math::square(src_pixel.g - block_colors[2].g) + math::square(src_pixel.b - block_colors[2].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 2; + } + + trial_error = math::square(src_pixel.r - block_colors[3].r) + math::square(src_pixel.g - block_colors[3].g) + math::square(src_pixel.b - block_colors[3].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 3; + } + + m_temp_selectors[c] = static_cast(best_selector_index); + + total_error += best_error; + if (total_error >= trial_solution.m_error) + break; + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; + const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; + const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + + if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + trial_solution.m_valid = false; + return false; + } + } + + const color_quad_u8 base_color(coords.get_scaled_color()); + + const uint n = m_pParams->m_num_src_pixels; + CRNLIB_ASSERT(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = cUINT64_MAX; + + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + uint block_inten[4]; + color_quad_u8 block_colors[4]; + for (uint s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); + block_colors[s] = block_color; + block_inten[s] = block_color.r + block_color.g + block_color.b; + } + + // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. + // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. + // 0 1 2 3 + // 01 12 23 + const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; + + uint64 total_error = 0; + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint min_error = labs(block_inten[0] - m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint c = 0; c < n; c++) + total_error += color::elucidian_distance(block_colors[0], pSrc_pixels[c], false); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint min_error = labs(m_pSorted_luma[0] - block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint c = 0; c < n; c++) + total_error += color::elucidian_distance(block_colors[3], pSrc_pixels[c], false); + } + else + { + uint cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done; + const uint sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + total_error += color::elucidian_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } +done: + while (c < n) + { + const uint sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color::elucidian_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + if (!total_error) + break; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + // Dither function from RYG's public domain real-time DXT1 compressor, modified for 555. + static void DitherBlock(color_quad_u8 *dest, const color_quad_u8 *block) + { + int err[8],*ep1 = err,*ep2 = err+4; + uint8 *quant = ryg_dxt::QuantRBTab+8; + + // process channels seperately + for(int ch=0;ch<3;ch++) + { + uint8 *bp = (uint8 *) block; + uint8 *dp = (uint8 *) dest; + + bp += ch; + dp += ch; + memset(err,0,sizeof(err)); + + for(int y=0;y<4;y++) + { + // pixel 0 + dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)]; + ep1[0] = bp[ 0] - dp[ 0]; + + // pixel 1 + dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)]; + ep1[1] = bp[ 4] - dp[ 4]; + + // pixel 2 + dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)]; + ep1[2] = bp[ 8] - dp[ 8]; + + // pixel 3 + dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)]; + ep1[3] = bp[12] - dp[12]; + + // advance to next line + std::swap(ep1,ep2); + bp += 16; + dp += 16; + } + } + } + + static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) + { + const uint limit = diff ? 32 : 16; limit; + CRNLIB_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); + int c; + if (diff) + c = (packed_c >> 2) | (packed_c << 3); + else + c = packed_c | (packed_c << 4); + c += g_etc1_inten_tables[inten][selector]; + c = math::clamp(c, 0, 255); + return c; + } + + void pack_etc1_block_init() + { + for (uint diff = 0; diff < 2; diff++) + { + const uint limit = diff ? 32 : 16; + + for (uint inten = 0; inten < 8; inten++) + { + for (uint selector = 0; selector < 4; selector++) + { + const uint inverse_table_index = diff + (inten << 1) + (selector << 4); + for (uint color = 0; color < 256; color++) + { + uint best_error = cUINT32_MAX, best_packed_c = 0; + for (uint packed_c = 0; packed_c < limit; packed_c++) + { + int v = etc1_decode_value(diff, inten, selector, packed_c); + uint err = labs(v - color); + if (err < best_error) + { + best_error = err; + best_packed_c = packed_c; + if (!best_error) + break; + } + } + CRNLIB_ASSERT(best_error <= 255); + g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); + } + } + } + } + } + + // Packs solid color blocks efficiently using a set of small precomputed tables. + // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. + static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context) + { + CRNLIB_ASSERT(g_etc1_inverse_lookup[0][255]); + + context, pack_params; + static uint s_next_comp[4] = { 1, 2, 0, 1 }; + + uint best_error = cUINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint i = 0; i < 3; i++) + { + const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = math::clamp(pColor[i] + delta, 0, 255); + + const uint16* pTable; + if (!c_plus_delta) + pTable = g_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_color8_to_etc_block_config_0_255[1]; + else + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint x = *pTable++; + +#ifdef CRNLIB_BUILD_DEBUG + const uint diff = x & 1; + const uint inten = (x >> 1) & 7; + const uint selector = (x >> 4) & 3; + const uint p0 = (x >> 8) & 255; + CRNLIB_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); +#endif + + const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16 p1 = pInverse_table[c1]; + uint16 p2 = pInverse_table[c2]; + const uint trial_error = math::square(c_plus_delta - pColor[i]) + math::square(p1 >> 8) + math::square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } +found_perfect_match: + + const uint diff = best_x & 1; + const uint inten = (best_x >> 1) & 7; + + block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); + + const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; + *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; + *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; + + const uint best_packed_c0 = (best_x >> 8) & 255; + if (diff) + { + block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); + block.m_bytes[s_next_comp[best_i+1]] = static_cast(best_packed_c2 << 3); + } + else + { + block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); + block.m_bytes[s_next_comp[best_i+1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); + } + + return best_error; + } + + static uint pack_etc1_block_solid_color_constrained( + etc1_optimizer::results& results, + uint num_colors, const uint8 *pColor, + crn_etc1_pack_params& pack_params, + pack_etc1_block_context& context, + bool use_diff, + const color_quad_u8* pBase_color5_unscaled) + { + CRNLIB_ASSERT(g_etc1_inverse_lookup[0][255]); + + context, pack_params; + static uint s_next_comp[4] = { 1, 2, 0, 1 }; + + uint best_error = cUINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint i = 0; i < 3; i++) + { + const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = math::clamp(pColor[i] + delta, 0, 255); + + const uint16* pTable; + if (!c_plus_delta) + pTable = g_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_color8_to_etc_block_config_0_255[1]; + else + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint x = *pTable++; + const uint diff = x & 1; + if (static_cast(use_diff) != diff) + { + if (*pTable == 0xFFFF) + break; + continue; + } + + if ((diff) && (pBase_color5_unscaled)) + { + const int p0 = (x >> 8) & 255; + int delta = p0 - static_cast(pBase_color5_unscaled->c[i]); + if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + break; + continue; + } + } + +#ifdef CRNLIB_BUILD_DEBUG + { + const uint inten = (x >> 1) & 7; + const uint selector = (x >> 4) & 3; + const uint p0 = (x >> 8) & 255; + CRNLIB_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); + } +#endif + + const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16 p1 = pInverse_table[c1]; + uint16 p2 = pInverse_table[c2]; + + if ((diff) && (pBase_color5_unscaled)) + { + int delta1 = (p1 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i]]); + int delta2 = (p2 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i + 1]]); + if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + break; + continue; + } + } + + const uint trial_error = math::square(c_plus_delta - pColor[i]) + math::square(p1 >> 8) + math::square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } +found_perfect_match: + + if (best_error == cUINT32_MAX) + return best_error; + + best_error *= num_colors; + + results.m_n = num_colors; + results.m_block_color4 = !(best_x & 1); + results.m_block_inten_table = (best_x >> 1) & 7; + memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); + + const uint best_packed_c0 = (best_x >> 8) & 255; + results.m_block_color_unscaled[best_i] = static_cast(best_packed_c0); + results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast(best_packed_c1); + results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast(best_packed_c2); + results.m_error = best_error; + + return best_error; + } + + uint64 pack_etc1_block(etc1_block& dst_block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context) + { + color_quad_u8 src_pixel0(pSrc_pixels[0]); + + int r; + for (r = 15; r >= 1; --r) + if ((pSrc_pixels[r].r != src_pixel0.r) || (pSrc_pixels[r].g != src_pixel0.g) || (pSrc_pixels[r].b != src_pixel0.b)) + break; + if (!r) + return 16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params, context); + + color_quad_u8 dithered_pixels[16]; + if (pack_params.m_dithering) + { + DitherBlock(dithered_pixels, pSrc_pixels); + pSrc_pixels = dithered_pixels; + } + + uint64 best_error = cUINT64_MAX; + uint best_flip = false, best_use_color4 = false; + + uint8 best_selectors[2][8]; + etc1_optimizer::results best_results[2]; + for (uint i = 0; i < 2; i++) + { + best_results[i].m_n = 8; + best_results[i].m_pSelectors = best_selectors[i]; + } + + uint8 selectors[3][8]; + etc1_optimizer::results results[3]; + + for (uint i = 0; i < 3; i++) + { + results[i].m_n = 8; + results[i].m_pSelectors = selectors[i]; + } + + color_quad_u8 subblock_pixels[8]; + + etc1_optimizer::params params(pack_params); + params.m_num_src_pixels = 8; + params.m_pSrc_pixels = subblock_pixels; + + for (uint flip = 0; flip < 2; flip++) + { + for (uint use_color4 = 0; use_color4 < 2; use_color4++) + { + uint64 trial_error = 0; + + uint subblock; + for (subblock = 0; subblock < 2; subblock++) + { + if (flip) + memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); + else + { + const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; + subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12]; + subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13]; + } + + results[2].m_error = cUINT64_MAX; + if ((params.m_quality >= cCRNETCQualityMedium) && ((subblock) || (use_color4))) + { + color_quad_u8 subblock_pixel0(subblock_pixels[0]); + for (r = 7; r >= 1; --r) + if ((subblock_pixels[r].r != subblock_pixel0.r) || (subblock_pixels[r].g != subblock_pixel0.g) || (subblock_pixels[r].b != subblock_pixel0.b)) + break; + if (!r) + { + pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixel0.r, pack_params, context, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL); + } + } + + params.m_use_color4 = (use_color4 != 0); + params.m_constrain_against_base_color5 = false; + + if ((!use_color4) && (subblock)) + { + params.m_constrain_against_base_color5 = true; + params.m_base_color5 = results[0].m_block_color_unscaled; + } + + if (params.m_quality == cCRNETCQualitySlow) + { + static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0_to_4); + params.m_pScan_deltas = s_scan_delta_0_to_4; + } + else if (params.m_quality == cCRNETCQualityMedium) + { + static const int s_scan_delta_0_to_1[] = { -1, 0, 1 }; + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0_to_1); + params.m_pScan_deltas = s_scan_delta_0_to_1; + } + else + { + static const int s_scan_delta_0[] = { 0 }; + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0); + params.m_pScan_deltas = s_scan_delta_0; + } + + context.m_optimizer.init(params, results[subblock]); + + if (!context.m_optimizer.compute()) + break; + + // Fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. + const uint refinement_error_thresh0 = 3000; + const uint refinement_error_thresh1 = 6000; + if ((params.m_quality >= cCRNETCQualityMedium) && (results[subblock].m_error > refinement_error_thresh0)) + { + if (params.m_quality == cCRNETCQualityMedium) + { + static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 }; + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_2_to_3); + params.m_pScan_deltas = s_scan_delta_2_to_3; + } + else + { + static const int s_scan_delta_5_to_5[] = { -5, 5 }; + static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 }; + if (results[subblock].m_error > refinement_error_thresh1) + { + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_5_to_8); + params.m_pScan_deltas = s_scan_delta_5_to_8; + } + else + { + params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_5_to_5); + params.m_pScan_deltas = s_scan_delta_5_to_5; + } + } + + if (!context.m_optimizer.compute()) + break; + } + + if (results[2].m_error < results[subblock].m_error) + results[subblock] = results[2]; + + trial_error += results[subblock].m_error; + if (trial_error >= best_error) + break; + } + + if (subblock < 2) + continue; + + best_error = trial_error; + best_results[0] = results[0]; + best_results[1] = results[1]; + best_flip = flip; + best_use_color4 = use_color4; + + } // use_color4 + + } // flip + + int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; + int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; + int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; + if (!best_use_color4) + { + if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + // Shouldn't ever happen + CRNLIB_VERIFY(0); + } + } + + if (best_use_color4) + { + dst_block.m_bytes[0] = static_cast(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); + dst_block.m_bytes[1] = static_cast(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); + dst_block.m_bytes[2] = static_cast(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); + } + else + { + if (dr < 0) dr += 8; + if (dg < 0) dg += 8; + if (db < 0) db += 8; + dst_block.m_bytes[0] = static_cast((best_results[0].m_block_color_unscaled.r << 3) | dr); + dst_block.m_bytes[1] = static_cast((best_results[0].m_block_color_unscaled.g << 3) | dg); + dst_block.m_bytes[2] = static_cast((best_results[0].m_block_color_unscaled.b << 3) | db); + } + + dst_block.m_bytes[3] = static_cast( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip ); + + uint selector0 = 0, selector1 = 0; + if (best_flip) + { + // flipped: + // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + // + // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + const uint8* pSelectors0 = best_results[0].m_pSelectors; + const uint8* pSelectors1 = best_results[1].m_pSelectors; + for (int x = 3; x >= 0; --x) + { + uint b; + b = g_selector_index_to_etc1[pSelectors1[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors1[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + } + } + else + { + // non-flipped: + // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + // + // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + for (int subblock = 1; subblock >= 0; --subblock) + { + const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; + for (uint i = 0; i < 2; i++) + { + uint b; + b = g_selector_index_to_etc1[pSelectors[3]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[2]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[1]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[0]]; + selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1); + + pSelectors -= 4; + } + } + } + + dst_block.m_bytes[4] = static_cast(selector1 >> 8); + dst_block.m_bytes[5] = static_cast(selector1 & 0xFF); + dst_block.m_bytes[6] = static_cast(selector0 >> 8); + dst_block.m_bytes[7] = static_cast(selector0 & 0xFF); + + return best_error; + } + +} // namespace crnlib diff --git a/crnlib/crn_etc.h b/crnlib/crn_etc.h new file mode 100644 index 0000000..82b3a12 --- /dev/null +++ b/crnlib/crn_etc.h @@ -0,0 +1,609 @@ +// File: crn_etc.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "../inc/crnlib.h" +#include "crn_dxt.h" + +namespace crnlib +{ + enum etc_constants + { + cETC1BytesPerBlock = 8U, + + cETC1SelectorBits = 2U, + cETC1SelectorValues = 1U << cETC1SelectorBits, + cETC1SelectorMask = cETC1SelectorValues - 1U, + + cETC1BlockShift = 2U, + cETC1BlockSize = 1U << cETC1BlockShift, + + cETC1LSBSelectorIndicesBitOffset = 0, + cETC1MSBSelectorIndicesBitOffset = 16, + + cETC1FlipBitOffset = 32, + cETC1DiffBitOffset = 33, + + cETC1IntenModifierNumBits = 3, + cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, + cETC1RightIntenModifierTableBitOffset = 34, + cETC1LeftIntenModifierTableBitOffset = 37, + + // Base+Delta encoding (5 bit bases, 3 bit delta) + cETC1BaseColorCompNumBits = 5, + cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, + + cETC1DeltaColorCompNumBits = 3, + cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, + cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, + + cETC1BaseColor5RBitOffset = 59, + cETC1BaseColor5GBitOffset = 51, + cETC1BaseColor5BBitOffset = 43, + + cETC1DeltaColor3RBitOffset = 56, + cETC1DeltaColor3GBitOffset = 48, + cETC1DeltaColor3BBitOffset = 40, + + // Absolute (non-delta) encoding (two 4-bit per component bases) + cETC1AbsColorCompNumBits = 4, + cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, + + cETC1AbsColor4R1BitOffset = 60, + cETC1AbsColor4G1BitOffset = 52, + cETC1AbsColor4B1BitOffset = 44, + + cETC1AbsColor4R2BitOffset = 56, + cETC1AbsColor4G2BitOffset = 48, + cETC1AbsColor4B2BitOffset = 40, + + cETC1ColorDeltaMin = -4, + cETC1ColorDeltaMax = 3, + + // Delta3: + // 0 1 2 3 4 5 6 7 + // 000 001 010 011 100 101 110 111 + // 0 1 2 3 -4 -3 -2 -1 + }; + + extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; + extern const uint8 g_etc1_to_selector_index[cETC1SelectorValues]; + extern const uint8 g_selector_index_to_etc1[cETC1SelectorValues]; + + struct etc1_coord2 + { + uint8 m_x, m_y; + }; + extern const etc1_coord2 g_etc1_pixel_coords[2][2][8]; // [flipped][subblock][subblock_pixel] + + struct etc1_block + { + // big endian uint64: + // bit ofs: 56 48 40 32 24 16 8 0 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + union + { + uint64 m_uint64; + uint8 m_bytes[8]; + }; + + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum { cNumSelectorBytes = 4 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + inline uint get_general_bits(uint ofs, uint num) const + { + CRNLIB_ASSERT((ofs + num) <= 64U); + CRNLIB_ASSERT(num && (num < 32U)); + return (utils::read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); + } + + inline void set_general_bits(uint ofs, uint num, uint bits) + { + CRNLIB_ASSERT((ofs + num) <= 64U); + CRNLIB_ASSERT(num && (num < 32U)); + + uint64 x = utils::read_be64(&m_uint64); + uint64 msk = ((1ULL << static_cast(num)) - 1ULL) << static_cast(ofs); + x &= ~msk; + x |= (static_cast(bits) << static_cast(ofs)); + utils::write_be64(&m_uint64, x); + } + + inline uint get_byte_bits(uint ofs, uint num) const + { + CRNLIB_ASSERT((ofs + num) <= 64U); + CRNLIB_ASSERT(num && (num <= 8U)); + CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); + const uint byte_ofs = 7 - (ofs >> 3); + const uint byte_bit_ofs = ofs & 7; + return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); + } + + inline void set_byte_bits(uint ofs, uint num, uint bits) + { + CRNLIB_ASSERT((ofs + num) <= 64U); + CRNLIB_ASSERT(num && (num < 32U)); + CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); + CRNLIB_ASSERT(bits < (1U << num)); + const uint byte_ofs = 7 - (ofs >> 3); + const uint byte_bit_ofs = ofs & 7; + const uint mask = (1 << num) - 1; + m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); + m_bytes[byte_ofs] |= (bits << byte_bit_ofs); + } + + // false = left/right subblocks + // true = upper/lower subblocks + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline void set_flip_bit(bool flip) + { + m_bytes[3] &= ~1; + m_bytes[3] |= static_cast(flip); + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + + inline void set_diff_bit(bool diff) + { + m_bytes[3] &= ~2; + m_bytes[3] |= (static_cast(diff) << 1); + } + + // Returns intensity modifier table (0-7) used by subblock subblock_id. + // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) + inline uint get_inten_table(uint subblock_id) const + { + CRNLIB_ASSERT(subblock_id < 2); + const uint ofs = subblock_id ? 2 : 5; + return (m_bytes[3] >> ofs) & 7; + } + + // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) + inline void set_inten_table(uint subblock_id, uint t) + { + CRNLIB_ASSERT(subblock_id < 2); + CRNLIB_ASSERT(t < 8); + const uint ofs = subblock_id ? 2 : 5; + m_bytes[3] &= ~(7 << ofs); + m_bytes[3] |= (t << ofs); + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint get_selector(uint x, uint y) const + { + CRNLIB_ASSERT((x | y) < 4); + + const uint bit_index = x * 4 + y; + const uint byte_bit_ofs = bit_index & 7; + const uint8 *p = &m_bytes[7 - (bit_index >> 3)]; + const uint lsb = (p[0] >> byte_bit_ofs) & 1; + const uint msb = (p[-2] >> byte_bit_ofs) & 1; + const uint val = lsb | (msb << 1); + + return g_etc1_to_selector_index[val]; + } + + // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline void set_selector(uint x, uint y, uint val) + { + CRNLIB_ASSERT((x | y | val) < 4); + const uint bit_index = x * 4 + y; + + uint8 *p = &m_bytes[7 - (bit_index >> 3)]; + + const uint byte_bit_ofs = bit_index & 7; + const uint mask = 1 << byte_bit_ofs; + + const uint etc1_val = g_selector_index_to_etc1[val]; + + const uint lsb = etc1_val & 1; + const uint msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + inline void set_base4_color(uint idx, uint16 c) + { + if (idx) + { + set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); + } + else + { + set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); + } + } + + inline uint16 get_base4_color(uint idx) const + { + uint r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast(b | (g << 4U) | (r << 8U)); + } + + inline void set_base5_color(uint16 c) + { + set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); + set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); + set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); + } + + inline uint16 get_base5_color() const + { + const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); + const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); + const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); + return static_cast(b | (g << 5U) | (r << 10U)); + } + + void set_delta3_color(uint16 c) + { + set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); + set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); + set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); + } + + inline uint16 get_delta3_color() const + { + const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast(b | (g << 3U) | (r << 6U)); + } + + // Base color 5 + static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); + static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); + + static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); + static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); + + // Delta color 3 + // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static uint16 pack_delta3(const color_quad_i16& color); + static uint16 pack_delta3(int r, int g, int b); + + // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static color_quad_i16 unpack_delta3(uint16 packed_delta3); + static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); + + // Abs color 4 + static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); + static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); + + // subblock colors + static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); + static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); + static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); + + static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) + { + if (color4) + { + dst.r = src.r | (src.r << 4); + dst.g = src.g | (src.g << 4); + dst.b = src.b | (src.b << 4); + } + else + { + dst.r = (src.r >> 2) | (src.r << 3); + dst.g = (src.g >> 2) | (src.g << 3); + dst.b = (src.b >> 2) | (src.b << 3); + } + dst.a = src.a; + } + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(etc1_block); + + // Returns false if the block is invalid (it will still be unpacked with clamping). + bool unpack_etc1(const etc1_block& block, color_quad_u8 *pDst, bool preserve_alpha = false); + + enum crn_etc_quality + { + cCRNETCQualityFast, + cCRNETCQualityMedium, + cCRNETCQualitySlow, + + cCRNETCQualityTotal, + + cCRNETCQualityForceDWORD = 0xFFFFFFFF + }; + + struct crn_etc1_pack_params + { + crn_etc_quality m_quality; + bool m_perceptual; + bool m_dithering; + + inline crn_etc1_pack_params() + { + clear(); + } + + void clear() + { + m_quality = cCRNETCQualitySlow; + m_perceptual = true; + m_dithering = false; + } + }; + + struct etc1_solution_coordinates + { + inline etc1_solution_coordinates() : + m_unscaled_color(0, 0, 0, 0), + m_inten_table(0), + m_color4(false) + { + } + + inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : + m_unscaled_color(r, g, b, 255), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : + m_unscaled_color(c), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const etc1_solution_coordinates& other) + { + *this = other; + } + + inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs) + { + m_unscaled_color = rhs.m_unscaled_color; + m_inten_table = rhs.m_inten_table; + m_color4 = rhs.m_color4; + return *this; + } + + inline void clear() + { + m_unscaled_color.clear(); + m_inten_table = 0; + m_color4 = false; + } + + inline color_quad_u8 get_scaled_color() const + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + return color_quad_u8(br, bg, bb); + } + + inline void get_block_colors(color_quad_u8* pBlock_colors) + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + const int* pInten_table = g_etc1_inten_tables[m_inten_table]; + pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); + pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); + pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); + pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); + } + + color_quad_u8 m_unscaled_color; + uint m_inten_table; + bool m_color4; + }; + + class etc1_optimizer + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(etc1_optimizer); + + public: + etc1_optimizer() + { + clear(); + } + + void clear() + { + m_pParams = NULL; + m_pResult = NULL; + m_pSorted_luma = NULL; + m_pSorted_luma_indices = NULL; + } + + struct params : crn_etc1_pack_params + { + params() + { + clear(); + } + + params(const crn_etc1_pack_params& base_params) : + crn_etc1_pack_params(base_params) + { + clear_optimizer_params(); + } + + void clear() + { + crn_etc1_pack_params::clear(); + clear_optimizer_params(); + } + + void clear_optimizer_params() + { + m_num_src_pixels = 0; + m_pSrc_pixels = 0; + + m_use_color4 = false; + static const int s_default_scan_delta[] = { 0 }; + m_pScan_deltas = s_default_scan_delta; + m_scan_delta_size = 1; + + m_base_color5.clear(); + m_constrain_against_base_color5 = false; + } + + uint m_num_src_pixels; + const color_quad_u8* m_pSrc_pixels; + + bool m_use_color4; + const int* m_pScan_deltas; + uint m_scan_delta_size; + + color_quad_u8 m_base_color5; + bool m_constrain_against_base_color5; + }; + + struct results + { + uint64 m_error; + color_quad_u8 m_block_color_unscaled; + uint m_block_inten_table; + uint m_n; + uint8* m_pSelectors; + bool m_block_color4; + + inline results& operator= (const results& rhs) + { + m_block_color_unscaled = rhs.m_block_color_unscaled; + m_block_color4 = rhs.m_block_color4; + m_block_inten_table = rhs.m_block_inten_table; + m_error = rhs.m_error; + CRNLIB_ASSERT(m_n == rhs.m_n); + memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); + return *this; + } + }; + + void init(const params& params, results& result); + bool compute(); + + private: + struct potential_solution + { + potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false) + { + } + + etc1_solution_coordinates m_coords; + crnlib::vector m_selectors; + uint64 m_error; + bool m_valid; + + void clear() + { + m_coords.clear(); + m_selectors.resize(0); + m_error = cUINT64_MAX; + m_valid = false; + } + + bool are_selectors_all_equal() const + { + if (m_selectors.empty()) + return false; + const uint s = m_selectors[0]; + for (uint i = 1; i < m_selectors.size(); i++) + if (m_selectors[i] != s) + return false; + return true; + } + }; + + const params* m_pParams; + results* m_pResult; + + int m_limit; + + vec3F m_avg_color; + int m_br, m_bg, m_bb; + crnlib::vector m_luma; + crnlib::vector m_sorted_luma[2]; + const uint32* m_pSorted_luma_indices; + uint32* m_pSorted_luma; + + crnlib::vector m_selectors; + crnlib::vector m_best_selectors; + + potential_solution m_best_solution; + potential_solution m_trial_solution; + crnlib::vector m_temp_selectors; + + bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + }; + + struct pack_etc1_block_context + { + etc1_optimizer m_optimizer; + }; + + void pack_etc1_block_init(); + + uint64 pack_etc1_block(etc1_block& block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context); + +} // namespace crnlib diff --git a/crnlib/crn_file_utils.cpp b/crnlib/crn_file_utils.cpp index 1d7731d..f5d6430 100644 --- a/crnlib/crn_file_utils.cpp +++ b/crnlib/crn_file_utils.cpp @@ -554,4 +554,25 @@ namespace crnlib return !*pWild; } + bool file_utils::write_buf_to_file(const char* pPath, const void* pData, size_t data_size) + { + FILE *pFile = NULL; + +#ifdef _MSC_VER + // Compiling with MSVC + if (fopen_s(&pFile, pPath, "wb")) + return false; +#else + pFile = fopen(pPath, "wb"); +#endif + if (!pFile) + return false; + + bool success = fwrite(pData, 1, data_size, pFile) == data_size; + + fclose(pFile); + + return success; + } + } // namespace crnlib diff --git a/crnlib/crn_file_utils.h b/crnlib/crn_file_utils.h index e71f223..247f85a 100644 --- a/crnlib/crn_file_utils.h +++ b/crnlib/crn_file_utils.h @@ -20,29 +20,24 @@ namespace crnlib static bool is_drive_separator(char c); static bool split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt); - static bool split_path(const char* p, dynamic_string& path, dynamic_string& filename); static bool get_pathname(const char* p, dynamic_string& path); - static bool get_filename(const char* p, dynamic_string& filename); static void combine_path(dynamic_string& dst, const char* pA, const char* pB); - static void combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC); static bool full_path(dynamic_string& path); - static bool get_extension(dynamic_string& filename); - static bool remove_extension(dynamic_string& filename); - static bool create_path(const dynamic_string& path); - static void trim_trailing_seperator(dynamic_string& path); static int wildcmp(const char* pWild, const char* pString); + static bool write_buf_to_file(const char* pPath, const void* pData, size_t data_size); + }; // struct file_utils } // namespace crnlib diff --git a/crnlib/crn_hash_map.h b/crnlib/crn_hash_map.h index b736d0e..655796a 100644 --- a/crnlib/crn_hash_map.h +++ b/crnlib/crn_hash_map.h @@ -2,8 +2,9 @@ // See Copyright Notice and license at the end of inc/crnlib.h // // Notes: +// stl-like hash map/hash set, with predictable performance across platforms/compilers/C run times/etc. // Hash function ref: http://www.brpreiss.com/books/opus4/html/page215.html -// Compared for speed against VC9's std::hash_map. +// Compared for performance against VC9's std::hash_map. // Linear probing, auto resizes on ~50% load factor. // Uses Knuth's multiplicative method (Fibonacci hashing). #pragma once @@ -374,6 +375,8 @@ namespace crnlib return iterator(*this, m_values.size()); } + // insert_result.first will always point to inserted key/value (or the already existing key/value). + // insert_resutt.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value). typedef std::pair insert_result; inline insert_result insert(const Key& k, const Value& v = Value()) @@ -508,17 +511,19 @@ namespace crnlib scalar_type::destruct(&p->second); } + // Moves *pSrc to *pDst efficiently. + // pDst should NOT be constructed on entry. static inline void move_node(node* pDst, node* pSrc) { CRNLIB_ASSERT(!pDst->state); - if (CRNLIB_IS_BITWISE_MOVABLE(Key) && CRNLIB_IS_BITWISE_MOVABLE(Value)) + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) { memcpy(pDst, pSrc, sizeof(node)); } else { - if (CRNLIB_IS_BITWISE_MOVABLE(Key)) + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) memcpy(&pDst->first, &pSrc->first, sizeof(Key)); else { @@ -526,7 +531,7 @@ namespace crnlib scalar_type::destruct(&pSrc->first); } - if (CRNLIB_IS_BITWISE_MOVABLE(Value)) + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) memcpy(&pDst->second, &pSrc->second, sizeof(Value)); else { diff --git a/crnlib/crn_image.h b/crnlib/crn_image.h index 5aeecaf..1e14338 100644 --- a/crnlib/crn_image.h +++ b/crnlib/crn_image.h @@ -4,6 +4,7 @@ #include "crn_color.h" #include "crn_vec.h" #include "crn_pixel_format.h" +#include "crn_rect.h" namespace crnlib { @@ -25,6 +26,7 @@ namespace crnlib { } + // pitch is in PIXELS, not bytes. image(uint width, uint height, uint pitch = UINT_MAX, const color_type& background = color_type::make_black(), uint flags = pixel_format_helpers::cDefaultCompFlags) : m_comp_flags(flags) { @@ -44,6 +46,7 @@ namespace crnlib set_all(background); } + // pitch is in PIXELS, not bytes. image(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) { alias(pPixels, width, height, pitch, flags); @@ -94,6 +97,7 @@ namespace crnlib *this = other; } + // pitch is in PIXELS, not bytes. void alias(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) { m_pixel_buf.clear(); @@ -107,6 +111,40 @@ namespace crnlib m_comp_flags = flags; } + // pitch is in PIXELS, not bytes. + bool grant_ownership(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) + { + if (pitch == UINT_MAX) + pitch = width; + + if ((!pPixels) || (!width) || (!height) || (pitch < width)) + { + CRNLIB_ASSERT(0); + return false; + } + + if (pPixels == get_ptr()) + { + CRNLIB_ASSERT(0); + return false; + } + + clear(); + + if (!m_pixel_buf.grant_ownership(pPixels, height * pitch, height * pitch)) + return false; + + m_pPixels = pPixels; + + m_width = width; + m_height = height; + m_pitch = pitch; + m_total = pitch * height; + m_comp_flags = flags; + + return true; + } + void clear() { m_pPixels = NULL; @@ -139,6 +177,34 @@ namespace crnlib m_pPixels[i] = c; } + void flip_x() + { + const uint half_width = m_width / 2; + for (uint y = 0; y < m_height; y++) + { + for (uint x = 0; x < half_width; x++) + { + color_type c((*this)(x, y)); + (*this)(x, y) = (*this)(m_width - 1 - x, y); + (*this)(m_width - 1 - x, y) = c; + } + } + } + + void flip_y() + { + const uint half_height = m_height / 2; + for (uint y = 0; y < half_height; y++) + { + for (uint x = 0; x < m_width; x++) + { + color_type c((*this)(x, y)); + (*this)(x, y) = (*this)(x, m_height - 1 - y); + (*this)(x, m_height - 1 - y) = c; + } + } + } + void convert_to_grayscale() { for (uint y = 0; y < m_height; y++) @@ -183,13 +249,16 @@ namespace crnlib bool extract_block(color_type* pDst, uint x, uint y, uint w, uint h, bool flip_xy = false) const { if ((x >= m_width) || (y >= m_height)) + { + CRNLIB_ASSERT(0); return false; + } if (flip_xy) { for (uint y_ofs = 0; y_ofs < h; y_ofs++) for (uint x_ofs = 0; x_ofs < w; x_ofs++) - pDst[x_ofs * 4 + y_ofs] = get_clamped(x_ofs + x, y_ofs + y); + pDst[x_ofs * h + y_ofs] = get_clamped(x_ofs + x, y_ofs + y); // 5/4/12 - this was incorrectly x_ofs * 4 } else if (((x + w) > m_width) || ((y + h) > m_height)) { @@ -213,10 +282,14 @@ namespace crnlib return true; } - void fill(uint x, uint y, uint w, uint h, const color_type& c) + // No clipping! + void unclipped_fill_box(uint x, uint y, uint w, uint h, const color_type& c) { - CRNLIB_ASSERT((x + w) <= m_width); - CRNLIB_ASSERT((y + h) <= m_height); + if (((x + w) > m_width) || ((y + h) > m_height)) + { + CRNLIB_ASSERT(0); + return; + } color_type* p = get_scanline(y) + x; @@ -229,7 +302,7 @@ namespace crnlib } } - void draw_box(int x, int y, uint width, uint height, const color_type& c) + void draw_rect(int x, int y, uint width, uint height, const color_type& c) { draw_line(x, y, x + width - 1, y, c); draw_line(x, y, x, y + height - 1, c); @@ -238,13 +311,25 @@ namespace crnlib } // No clipping! - bool copy(uint src_x, uint src_y, uint src_w, uint src_h, uint dst_x, uint dst_y, const image& src) + bool unclipped_blit(uint src_x, uint src_y, uint src_w, uint src_h, uint dst_x, uint dst_y, const image& src) { - if ( ((src_x + src_w) > src.get_width()) || ((src_y + src_h) > src.get_height()) ) + if ((!is_valid()) || (!src.is_valid())) + { + CRNLIB_ASSERT(0); return false; + } + + if ( ((src_x + src_w) > src.get_width()) || ((src_y + src_h) > src.get_height()) ) + { + CRNLIB_ASSERT(0); + return false; + } if ( ((dst_x + src_w) > get_width()) || ((dst_y + src_h) > get_height()) ) + { + CRNLIB_ASSERT(0); return false; + } const color_type* pS = &src(src_x, src_y); color_type* pD = &(*this)(dst_x, dst_y); @@ -262,38 +347,74 @@ namespace crnlib } // With clipping. - void blit(int dst_x, int dst_y, const image& src) + bool blit(int dst_x, int dst_y, const image& src) { - uint src_x = 0; - uint src_y = 0; + if ((!is_valid()) || (!src.is_valid())) + { + CRNLIB_ASSERT(0); + return false; + } + + int src_x = 0; + int src_y = 0; if (dst_x < 0) { src_x = -dst_x; - if (src_x >= src.get_width()) - return; + if (src_x >= static_cast(src.get_width())) + return false; dst_x = 0; } if (dst_y < 0) { src_y = -dst_y; - if (src_y >= src.get_height()) - return; + if (src_y >= static_cast(src.get_height())) + return false; dst_y = 0; } if ((dst_x >= (int)m_width) || (dst_y >= (int)m_height)) - return; + return false; uint width = math::minimum(m_width - dst_x, src.get_width() - src_x); uint height = math::minimum(m_height - dst_y, src.get_height() - src_y); - bool success = copy(src_x, src_y, width, height, dst_x, dst_y, src); + bool success = unclipped_blit(src_x, src_y, width, height, dst_x, dst_y, src); success; CRNLIB_ASSERT(success); + + return true; } + // With clipping. + bool blit(int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y, const image& src) + { + if ((!is_valid()) || (!src.is_valid())) + { + CRNLIB_ASSERT(0); + return false; + } + + rect src_rect(src_x, src_y, src_x + src_w, src_y + src_h); + if (!src_rect.intersect(src.get_bounds())) + return false; + + rect dst_rect(dst_x, dst_y, dst_x + src_rect.get_width(), dst_y + src_rect.get_height()); + if (!dst_rect.intersect(get_bounds())) + return false; + + bool success = unclipped_blit( + src_rect.get_left(), src_rect.get_top(), + math::minimum(src_rect.get_width(), dst_rect.get_width()), math::minimum(src_rect.get_height(), dst_rect.get_height()), + dst_rect.get_left(), dst_rect.get_top(), src); + success; + CRNLIB_ASSERT(success); + + return true; + } + + // In-place resize of image dimensions (cropping). bool resize(uint new_width, uint new_height, uint new_pitch = UINT_MAX, const color_type background = color_type::make_black()) { if (new_pitch == UINT_MAX) @@ -341,6 +462,8 @@ namespace crnlib inline uint get_height() const { return m_height; } inline uint get_total_pixels() const { return m_width * m_height; } + inline rect get_bounds() const { return rect(0, 0, m_width, m_height); } + inline uint get_pitch() const { return m_pitch; } inline uint get_pitch_in_bytes() const { return m_pitch * sizeof(color_type); } @@ -368,15 +491,21 @@ namespace crnlib return m_pPixels[x + y * m_pitch]; } - inline const color_type& get_clamped (int x, int y) const + inline const color_type& get_unclamped(uint x, uint y) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_pPixels[x + y * m_pitch]; + } + + inline const color_type& get_clamped(int x, int y) const { x = math::clamp(x, 0, m_width - 1); y = math::clamp(y, 0, m_height - 1); - return (*this)((uint)x, (uint)y); + return m_pPixels[x + y * m_pitch]; } // Sample image with bilinear filtering. - // (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are (0,width] and (0,height]. + // (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. void get_filtered(float x, float y, color_type& result) const { x -= .5f; @@ -430,7 +559,7 @@ namespace crnlib } } - inline void set_pixel(uint x, uint y, const color_type& c) + inline void set_pixel_unclipped(uint x, uint y, const color_type& c) { CRNLIB_ASSERT((x < m_width) && (y < m_height)); m_pPixels[x + y * m_pitch] = c; @@ -438,7 +567,7 @@ namespace crnlib inline void set_pixel_clipped(int x, int y, const color_type& c) { - if ((x < 0) || (x >= (int)m_width) || (y < 0) || (y >= (int)m_height)) + if ((static_cast(x) >= m_width) || (static_cast(y) >= m_height)) return; m_pPixels[x + y * m_pitch] = c; @@ -486,7 +615,6 @@ namespace crnlib } int dx = xe - xs, dy = ye - ys; - if (!dx) { if (ys > ye) @@ -503,35 +631,26 @@ namespace crnlib { if (dy <= dx) { - int e = 2 * dy - dx; - int e_no_inc = 2 * dy; - int e_inc = 2 * (dy - dx); + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color); } else { - int e = 2 * dx - dy; - int e_no_inc = 2 * dx; - int e_inc = 2 * (dx - dy); + int e = 2 * dx - dy, e_no_inc = 2 * dx, e_inc = 2 * (dx - dy); rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color); } } else { dy = -dy; - if (dy <= dx) { - int e = 2 * dy - dx; - int e_no_inc = 2 * dy; - int e_inc = 2 * (dy - dx); + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color); } else { - int e = 2 * dx - dy; - int e_no_inc = (2 * dx); - int e_inc = 2 * (dx - dy); + int e = 2 * dx - dy, e_no_inc = (2 * dx), e_inc = 2 * (dx - dy); rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color); } } @@ -557,14 +676,10 @@ namespace crnlib if (pred) { - start = ys; - end = ye; - var = xs; - + start = ys; end = ye; var = xs; for (int i = start; i <= end; i++) { set_pixel_clipped(var, i, color); - if (e < 0) e += e_no_inc; else @@ -576,14 +691,10 @@ namespace crnlib } else { - start = xs; - end = xe; - var = ys; - + start = xs; end = xe; var = ys; for (int i = start; i <= end; i++) { set_pixel_clipped(i, var, color); - if (e < 0) e += e_no_inc; else diff --git a/crnlib/crn_image_utils.cpp b/crnlib/crn_image_utils.cpp index 603f8c5..409e675 100644 --- a/crnlib/crn_image_utils.cpp +++ b/crnlib/crn_image_utils.cpp @@ -8,27 +8,39 @@ #include "crn_strutils.h" #include "crn_file_utils.h" #include "crn_threading.h" +#include "crn_miniz.h" +#include "crn_jpge.h" +#include "crn_cfile_stream.h" +#include "crn_mipmapped_texture.h" +#include "crn_buffer_stream.h" #define STBI_HEADER_FILE_ONLY #include "crn_stb_image.cpp" +#include "crn_jpgd.h" + #include "crn_pixel_format.h" namespace crnlib { const float cInfinitePSNR = 999999.0f; + const uint CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION = 16384; namespace image_utils { - bool load_from_file_stb(const char* pFilename, image_u8& img) + bool read_from_stream_stb(data_stream_serializer &serializer, image_u8& img) { + uint8_vec buf; + if (!serializer.read_entire_file(buf)) + return false; + int x = 0, y = 0, n = 0; - unsigned char* pData = stbi_load(pFilename, &x, &y, &n, 4); + unsigned char* pData = stbi_load_from_memory(buf.get_ptr(), buf.size_in_bytes(), &x, &y, &n, 4); if (!pData) return false; - if ((x > 8192) || (y > 8192)) + if ((x > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION) || (y > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION)) { stbi_image_free(pData); return false; @@ -68,30 +80,123 @@ namespace crnlib return true; } - bool save_to_file_stb(const char* pFilename, const image_u8& img, uint save_flags, int comp_index) + bool read_from_stream_jpgd(data_stream_serializer &serializer, image_u8& img) { - if (!img.get_width()) + uint8_vec buf; + if (!serializer.read_entire_file(buf)) return false; - bool bSaveBMP = false; + int width = 0, height = 0, actual_comps = 0; + unsigned char *pSrc_img = jpgd::decompress_jpeg_image_from_memory(buf.get_ptr(), buf.size_in_bytes(), &width, &height, &actual_comps, 4); + if (!pSrc_img) + return false; + + if (math::maximum(width, height) > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION) + { + crnlib_free(pSrc_img); + return false; + } + + if (!img.grant_ownership(reinterpret_cast(pSrc_img), width, height)) + { + crnlib_free(pSrc_img); + return false; + } + + img.reset_comp_flags(); + img.set_grayscale(actual_comps == 1); + img.set_component_valid(3, false); + + return true; + } + + bool read_from_stream(image_u8& dest, data_stream_serializer& serializer, uint read_flags) + { + if (read_flags > cReadFlagsAllFlags) + { + CRNLIB_ASSERT(0); + return false; + } + + if (!serializer.get_stream()) + { + CRNLIB_ASSERT(0); + return false; + } + + dynamic_string ext(serializer.get_name()); + file_utils::get_extension(ext); + + if ((ext == "jpg") || (ext == "jpeg")) + { + // Use my jpeg decoder by default because it supports progressive jpeg's. + if ((read_flags & cReadFlagForceSTB) == 0) + { + return image_utils::read_from_stream_jpgd(serializer, dest); + } + } + + return image_utils::read_from_stream_stb(serializer, dest); + } + + bool read_from_file(image_u8& dest, const char* pFilename, uint read_flags) + { + if (read_flags > cReadFlagsAllFlags) + { + CRNLIB_ASSERT(0); + return false; + } + + cfile_stream file_stream; + if (!file_stream.open(pFilename)) + return false; + + data_stream_serializer serializer(file_stream); + return read_from_stream(dest, serializer, read_flags); + } + + bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags, int grayscale_comp_index) + { + if ((grayscale_comp_index < -1) || (grayscale_comp_index > 3)) + { + CRNLIB_ASSERT(0); + return false; + } + + if (!img.get_width()) + { + CRNLIB_ASSERT(0); + return false; + } + dynamic_string ext(pFilename); + bool is_jpeg = false; if (file_utils::get_extension(ext)) { - if (ext == "bmp") - bSaveBMP = true; - else if (ext != "tga") + is_jpeg = ((ext == "jpg") || (ext == "jpeg")); + + if ((ext != "png") && (ext != "bmp") && (ext != "tga") && (!is_jpeg)) { - console::error("crnlib::image_utils::save_to_file_stb: Can only write .BMP or .TGA files!\n"); + console::error("crnlib::image_utils::write_to_file: Can only write .BMP, .TGA, .PNG, or .JPG files!\n"); return false; } } - if ((img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) || (save_flags & image_utils::cSaveGrayscale)) - { - CRNLIB_ASSERT(comp_index < 4); - if (comp_index > 3) comp_index = 3; + crnlib::vector temp; + uint num_src_chans = 0; + const void *pSrc_img = NULL; - crnlib::vector temp(img.get_total_pixels()); + if (is_jpeg) + { + write_flags |= cWriteFlagIgnoreAlpha; + } + + if ((img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) || (write_flags & image_utils::cWriteFlagGrayscale)) + { + CRNLIB_ASSERT(grayscale_comp_index < 4); + if (grayscale_comp_index > 3) grayscale_comp_index = 3; + + temp.resize(img.get_total_pixels()); for (uint y = 0; y < img.get_height(); y++) { @@ -104,7 +209,7 @@ namespace crnlib while (pSrc != pSrc_end) *pDst++ = (*pSrc++)[1]; } - else if (comp_index < 0) + else if (grayscale_comp_index < 0) { while (pSrc != pSrc_end) *pDst++ = static_cast((*pSrc++).get_luma()); @@ -112,15 +217,16 @@ namespace crnlib else { while (pSrc != pSrc_end) - *pDst++ = (*pSrc++)[comp_index]; + *pDst++ = (*pSrc++)[grayscale_comp_index]; } } - return (bSaveBMP ? stbi_write_bmp : stbi_write_tga)(pFilename, img.get_width(), img.get_height(), 1, &temp[0]) == CRNLIB_TRUE; + pSrc_img = &temp[0]; + num_src_chans = 1; } - else if ((!img.is_component_valid(3)) || (save_flags & cSaveIgnoreAlpha)) + else if ((!img.is_component_valid(3)) || (write_flags & cWriteFlagIgnoreAlpha)) { - crnlib::vector temp(img.get_total_pixels() * 3); + temp.resize(img.get_total_pixels() * 3); for (uint y = 0; y < img.get_height(); y++) { @@ -140,37 +246,47 @@ namespace crnlib } } - return (bSaveBMP ? stbi_write_bmp : stbi_write_tga)(pFilename, img.get_width(), img.get_height(), 3, &temp[0]) == CRNLIB_TRUE; + num_src_chans = 3; + pSrc_img = &temp[0]; } else { - return (bSaveBMP ? stbi_write_bmp : stbi_write_tga)(pFilename, img.get_width(), img.get_height(), 4, img.get_ptr()) == CRNLIB_TRUE; + num_src_chans = 4; + pSrc_img = img.get_ptr(); } - } - bool load_from_file(image_u8& dest, const char* pFilename, int flags) - { - flags; - return image_utils::load_from_file_stb(pFilename, dest); - } + bool success = false; + if (ext == "png") + { + size_t png_image_size = 0; + void *pPNG_image_data = tdefl_write_image_to_png_file_in_memory(pSrc_img, img.get_width(), img.get_height(), num_src_chans, &png_image_size); + if (!pPNG_image_data) + return false; + success = file_utils::write_buf_to_file(pFilename, pPNG_image_data, png_image_size); + mz_free(pPNG_image_data); + } + else if (is_jpeg) + { + jpge::params params; + if (write_flags & cWriteFlagJPEGQualityLevelMask) + params.m_quality = math::clamp((write_flags & cWriteFlagJPEGQualityLevelMask) >> cWriteFlagJPEGQualityLevelShift, 1U, 100U); + params.m_two_pass_flag = (write_flags & cWriteFlagJPEGTwoPass) != 0; + params.m_no_chroma_discrim_flag = (write_flags & cWriteFlagJPEGNoChromaDiscrim) != 0; - bool save_to_grayscale_file(const char* pFilename, const image_u8& src, int component, int flags) - { - flags; - return image_utils::save_to_file_stb(pFilename, src, image_utils::cSaveGrayscale, component); - } + if (write_flags & cWriteFlagJPEGH1V1) + params.m_subsampling = jpge::H1V1; + else if (write_flags & cWriteFlagJPEGH2V1) + params.m_subsampling = jpge::H2V1; + else if (write_flags & cWriteFlagJPEGH2V2) + params.m_subsampling = jpge::H2V2; - bool save_to_file(const char* pFilename, const image_u8& src, int flags, bool ignore_alpha) - { - if (src.is_grayscale()) - return save_to_grayscale_file(pFilename, src, cSaveLuma, flags); + success = jpge::compress_image_to_jpeg_file(pFilename, img.get_width(), img.get_height(), num_src_chans, (const jpge::uint8*)pSrc_img, params); + } else { - uint save_flags = 0; - if (ignore_alpha) - save_flags |= image_utils::cSaveIgnoreAlpha; - return image_utils::save_to_file_stb(pFilename, src, save_flags); + success = ((ext == "bmp" ? stbi_write_bmp : stbi_write_tga)(pFilename, img.get_width(), img.get_height(), num_src_chans, pSrc_img) == CRNLIB_TRUE); } + return success; } bool has_alpha(const image_u8& img) @@ -714,7 +830,7 @@ namespace crnlib if (!total_blocks) return 0.0f; - //save_to_file_stb("ssim.tga", yimg, cSaveGrayscale); + //save_to_file_stb_or_miniz("ssim.tga", yimg, cWriteFlagGrayscale); return total_ssim / total_blocks; } @@ -739,9 +855,9 @@ namespace crnlib void error_metrics::print(const char* pName) const { if (mPeakSNR >= cInfinitePSNR) - console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMS: %3.3f, PSNR: Infinite", pName, mMax, mMean, mMeanSquared, mRootMeanSquared); + console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMSE: %3.3f, PSNR: Infinite", pName, mMax, mMean, mMeanSquared, mRootMeanSquared); else - console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMS: %3.3f, PSNR: %3.3f", pName, mMax, mMean, mMeanSquared, mRootMeanSquared, mPeakSNR); + console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMSE: %3.3f, PSNR: %3.3f", pName, mMax, mMean, mMeanSquared, mRootMeanSquared, mPeakSNR); } bool error_metrics::compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error) @@ -926,6 +1042,11 @@ namespace crnlib img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagGrayscale | (img.has_alpha() ? pixel_format_helpers::cCompFlagAValid : 0))); break; } + case cConversion_To_Y: + { + img.set_comp_flags(static_cast(img.get_comp_flags() | pixel_format_helpers::cCompFlagGrayscale)); + break; + } default: { CRNLIB_ASSERT(false); @@ -1035,6 +1156,15 @@ namespace crnlib dst.a = src.a; break; } + case image_utils::cConversion_To_Y: + { + uint8 y = static_cast(src.get_luma()); + dst.r = y; + dst.g = y; + dst.b = y; + dst.a = src.a; + break; + } default: { CRNLIB_ASSERT(false); @@ -1168,6 +1298,70 @@ namespace crnlib return sqrt(var); } + uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename) + { + *pWidth = 0; + *pHeight = 0; + *pActualComps = 0; + + if ((req_comps < 1) || (req_comps > 4)) + return false; + + mipmapped_texture tex; + + buffer_stream buf_stream(pImage, nSize); + buf_stream.set_name(pFilename); + data_stream_serializer serializer(buf_stream); + + if (!tex.read_from_stream(serializer)) + return NULL; + + if (tex.is_packed()) + { + if (!tex.unpack_from_dxt(true)) + return NULL; + } + + image_u8 img; + image_u8* pImg = tex.get_level_image(0, 0, img); + if (!pImg) + return NULL; + + *pWidth = tex.get_width(); + *pHeight = tex.get_height(); + + if (pImg->has_alpha()) + *pActualComps = 4; + else if (pImg->is_grayscale()) + *pActualComps = 1; + else + *pActualComps = 3; + + uint8 *pDst = NULL; + if (req_comps == 4) + { + pDst = (uint8*)malloc(tex.get_total_pixels() * sizeof(uint32)); + uint8 *pSrc = (uint8*)pImg->get_ptr(); + memcpy(pDst, pSrc, tex.get_total_pixels() * sizeof(uint32)); + } + else + { + image_u8 luma_img; + if (req_comps == 1) + { + luma_img = *pImg; + luma_img.convert_to_grayscale(); + pImg = &luma_img; + } + + pixel_packer packer(req_comps, 8); + uint32 n; + pDst = image_utils::pack_image(*pImg, packer, n); + } + + return pDst; + } + } // namespace image_utils } // namespace crnlib diff --git a/crnlib/crn_image_utils.h b/crnlib/crn_image_utils.h index 08ecc60..480b3e7 100644 --- a/crnlib/crn_image_utils.h +++ b/crnlib/crn_image_utils.h @@ -2,6 +2,7 @@ // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_image.h" +#include "crn_data_stream_serializer.h" namespace crnlib { @@ -9,23 +10,41 @@ namespace crnlib namespace image_utils { - bool load_from_file_stb(const char* pFilename, image_u8& img); + enum read_flags_t + { + cReadFlagForceSTB = 1, + + cReadFlagsAllFlags = 1 + }; + + bool read_from_stream_stb(data_stream_serializer& serializer, image_u8& img); + bool read_from_stream_jpgd(data_stream_serializer& serializer, image_u8& img); + bool read_from_stream(image_u8& dest, data_stream_serializer& serializer, uint read_flags = 0); + bool read_from_file(image_u8& dest, const char* pFilename, uint read_flags = 0); + + // Reads texture from memory, results returned stb_image.c style. + // *pActual_comps is set to 1, 3, or 4. req_comps must range from 1-4. + uint8* read_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); enum { - cSaveIgnoreAlpha = 1, - cSaveGrayscale = 2 + cWriteFlagIgnoreAlpha = 0x00000001, + cWriteFlagGrayscale = 0x00000002, + + cWriteFlagJPEGH1V1 = 0x00010000, + cWriteFlagJPEGH2V1 = 0x00020000, + cWriteFlagJPEGH2V2 = 0x00040000, + cWriteFlagJPEGTwoPass = 0x00080000, + cWriteFlagJPEGNoChromaDiscrim = 0x00100000, + cWriteFlagJPEGQualityLevelMask = 0xFF000000, + cWriteFlagJPEGQualityLevelShift = 24, }; - const int cSaveLuma = -1; + const int cLumaComponentIndex = -1; - bool save_to_file_stb(const char* pFilename, const image_u8& img, uint save_flags = 0, int comp_index = cSaveLuma); + inline uint create_jpeg_write_flags(uint base_flags, uint quality_level) { CRNLIB_ASSERT(quality_level <= 100); return base_flags | ((quality_level << cWriteFlagJPEGQualityLevelShift) & cWriteFlagJPEGQualityLevelMask); } - bool load_from_file(image_u8& dest, const char* pFilename, int flags = 0); - - bool save_to_grayscale_file(const char* pFilename, const image_u8& src, int component, int flags = 0); - - bool save_to_file(const char* pFilename, const image_u8& src, int flags = 0, bool ignore_alpha = false); + bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags = 0, int grayscale_comp_index = cLumaComponentIndex); bool has_alpha(const image_u8& img); bool is_normal_map(const image_u8& img, const char* pFilename = NULL); @@ -127,15 +146,48 @@ namespace crnlib cConversion_A_To_RGBA, cConversion_Y_To_RGB, + cConversion_To_Y, + cConversionTotal }; void convert_image(image_u8& img, conversion_type conv_type); + template + inline uint8* pack_image(const image_type& img, const pixel_packer& packer, uint& n) + { + n = 0; + + if (!packer.is_valid()) + return NULL; + + const uint width = img.get_width(), height = img.get_height(); + uint dst_pixel_stride = packer.get_pixel_stride(); + uint dst_pitch = width * dst_pixel_stride; + + n = dst_pitch * height; + + uint8* pImage = static_cast(crnlib_malloc(n)); + + uint8* pDst = pImage; + for (uint y = 0; y < height; y++) + { + const typename image_type::color_t* pSrc = img.get_scanline(y); + for (uint x = 0; x < width; x++) + pDst = (uint8*)packer.pack(*pSrc++, pDst); + } + + return pImage; + } + image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt); image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt); double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels); - } -} + + uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); + + } // namespace image_utils + +} // namespace crnlib diff --git a/crnlib/crn_jpgd.cpp b/crnlib/crn_jpgd.cpp new file mode 100644 index 0000000..9ad1b4a --- /dev/null +++ b/crnlib/crn_jpgd.cpp @@ -0,0 +1,3172 @@ +// jpgd.cpp - C++ class for JPEG decompression. +// Public domain, Rich Geldreich +// Alex Evans: Linear memory allocator (taken from jpge.h). +// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless) +// +// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2. +// +// Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling. +// Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain" +// http://vision.ai.uiuc.edu/~dugad/research/dct/index.html + +#include "crn_jpgd.h" +#include + +#include +#define JPGD_ASSERT(x) assert(x) + +#include "crn_core.h" + +#ifdef _MSC_VER +#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable +#endif + +// Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling). +// This is slower, but results in higher quality on images with highly saturated colors. +#define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1 + +#define JPGD_TRUE (1) +#define JPGD_FALSE (0) + +#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b)) + +namespace jpgd { + +static inline void *jpgd_malloc(size_t nSize) { return crnlib::crnlib_malloc(nSize); } +static inline void jpgd_free(void *p) { crnlib::crnlib_free(p); } + +// DCT coefficients are stored in this sequence. +static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; + +enum JPEG_MARKER +{ + M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8, + M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC, + M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7, + M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF, + M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0 +}; + +enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }; + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define SCALEDONE ((int32)1) + +#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ + +#define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) +#define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) + +#define MULTIPLY(var, cnst) ((var) * (cnst)) + +#define CLAMP(i) ((static_cast(i) > 255) ? (((~i) >> 31) & 0xFF) : (i)) + +// Compiler creates a fast path 1D IDCT for X non-zero columns +template +struct Row +{ + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + // ACCESS_COL() will be optimized at compile time to either an array access, or 0. + #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0) + + const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS; + const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS; + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, - FIX_0_899976223); + const int az2 = MULTIPLY(bz2, - FIX_2_562915447); + const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS); + pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS); + pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS); + pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS); + pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS); + pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS); + pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS); + pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS); + } +}; + +template <> +struct Row<0> +{ + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + (void)pTemp; (void)pSrc; + } +}; + +template <> +struct Row<1> +{ + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + const int dcval = (pSrc[0] << PASS1_BITS); + + pTemp[0] = dcval; + pTemp[1] = dcval; + pTemp[2] = dcval; + pTemp[3] = dcval; + pTemp[4] = dcval; + pTemp[5] = dcval; + pTemp[6] = dcval; + pTemp[7] = dcval; + } +}; + +// Compiler creates a fast path 1D IDCT for X non-zero rows +template +struct Col +{ + static void idct(uint8* pDst_ptr, const int* pTemp) + { + // ACCESS_ROW() will be optimized at compile time to either an array access, or 0. + #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0) + + const int z2 = ACCESS_ROW(2); + const int z3 = ACCESS_ROW(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS; + const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS; + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, - FIX_0_899976223); + const int az2 = MULTIPLY(bz2, - FIX_2_562915447); + const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*0] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*7] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*1] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*6] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*2] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*5] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*3] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*4] = (uint8)CLAMP(i); + } +}; + +template <> +struct Col<1> +{ + static void idct(uint8* pDst_ptr, const int* pTemp) + { + int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3); + const uint8 dcval_clamped = (uint8)CLAMP(dcval); + pDst_ptr[0*8] = dcval_clamped; + pDst_ptr[1*8] = dcval_clamped; + pDst_ptr[2*8] = dcval_clamped; + pDst_ptr[3*8] = dcval_clamped; + pDst_ptr[4*8] = dcval_clamped; + pDst_ptr[5*8] = dcval_clamped; + pDst_ptr[6*8] = dcval_clamped; + pDst_ptr[7*8] = dcval_clamped; + } +}; + +static const uint8 s_idct_row_table[] = +{ + 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0, + 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0, + 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0, + 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0, + 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2, + 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2, + 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4, + 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, +}; + +static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }; + +void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag) +{ + JPGD_ASSERT(block_max_zag >= 1); + JPGD_ASSERT(block_max_zag <= 64); + + if (block_max_zag <= 1) + { + int k = ((pSrc_ptr[0] + 4) >> 3) + 128; + k = CLAMP(k); + k = k | (k<<8); + k = k | (k<<16); + + for (int i = 8; i > 0; i--) + { + *(int*)&pDst_ptr[0] = k; + *(int*)&pDst_ptr[4] = k; + pDst_ptr += 8; + } + return; + } + + int temp[64]; + + const jpgd_block_t* pSrc = pSrc_ptr; + int* pTemp = temp; + + const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8]; + int i; + for (i = 8; i > 0; i--, pRow_tab++) + { + switch (*pRow_tab) + { + case 0: Row<0>::idct(pTemp, pSrc); break; + case 1: Row<1>::idct(pTemp, pSrc); break; + case 2: Row<2>::idct(pTemp, pSrc); break; + case 3: Row<3>::idct(pTemp, pSrc); break; + case 4: Row<4>::idct(pTemp, pSrc); break; + case 5: Row<5>::idct(pTemp, pSrc); break; + case 6: Row<6>::idct(pTemp, pSrc); break; + case 7: Row<7>::idct(pTemp, pSrc); break; + case 8: Row<8>::idct(pTemp, pSrc); break; + } + + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + + const int nonzero_rows = s_idct_col_table[block_max_zag - 1]; + for (i = 8; i > 0; i--) + { + switch (nonzero_rows) + { + case 1: Col<1>::idct(pDst_ptr, pTemp); break; + case 2: Col<2>::idct(pDst_ptr, pTemp); break; + case 3: Col<3>::idct(pDst_ptr, pTemp); break; + case 4: Col<4>::idct(pDst_ptr, pTemp); break; + case 5: Col<5>::idct(pDst_ptr, pTemp); break; + case 6: Col<6>::idct(pDst_ptr, pTemp); break; + case 7: Col<7>::idct(pDst_ptr, pTemp); break; + case 8: Col<8>::idct(pDst_ptr, pTemp); break; + } + + pTemp++; + pDst_ptr++; + } +} + +void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr) +{ + int temp[64]; + int* pTemp = temp; + const jpgd_block_t* pSrc = pSrc_ptr; + + for (int i = 4; i > 0; i--) + { + Row<4>::idct(pTemp, pSrc); + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + for (int i = 8; i > 0; i--) + { + Col<4>::idct(pDst_ptr, pTemp); + pTemp++; + pDst_ptr++; + } +} + +// Retrieve one character from the input stream. +inline uint jpeg_decoder::get_char() +{ + // Any bytes remaining in buffer? + if (!m_in_buf_left) + { + // Try to get more bytes. + prep_in_buffer(); + // Still nothing to get? + if (!m_in_buf_left) + { + // Pad the end of the stream with 0xFF 0xD9 (EOI marker) + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; +} + +// Same as previous method, except can indicate if the character is a pad character or not. +inline uint jpeg_decoder::get_char(bool *pPadding_flag) +{ + if (!m_in_buf_left) + { + prep_in_buffer(); + if (!m_in_buf_left) + { + *pPadding_flag = true; + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + *pPadding_flag = false; + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; +} + +// Inserts a previously retrieved character back into the input buffer. +inline void jpeg_decoder::stuff_char(uint8 q) +{ + *(--m_pIn_buf_ofs) = q; + m_in_buf_left++; +} + +// Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered. +inline uint8 jpeg_decoder::get_octet() +{ + bool padding_flag; + int c = get_char(&padding_flag); + + if (c == 0xFF) + { + if (padding_flag) + return 0xFF; + + c = get_char(&padding_flag); + if (padding_flag) + { + stuff_char(0xFF); + return 0xFF; + } + + if (c == 0x00) + return 0xFF; + else + { + stuff_char(static_cast(c)); + stuff_char(0xFF); + return 0xFF; + } + } + + return static_cast(c); +} + +// Retrieves a variable number of bits from the input stream. Does not recognize markers. +inline uint jpeg_decoder::get_bits(int num_bits) +{ + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + uint c1 = get_char(); + uint c2 = get_char(); + m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2; + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + JPGD_ASSERT(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; +} + +// Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered. +inline uint jpeg_decoder::get_bits_no_markers(int num_bits) +{ + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) + { + uint c1 = get_octet(); + uint c2 = get_octet(); + m_bit_buf |= (c1 << 8) | c2; + } + else + { + m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1]; + m_in_buf_left -= 2; + m_pIn_buf_ofs += 2; + } + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + JPGD_ASSERT(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; +} + +// Decodes a Huffman encoded symbol. +inline int jpeg_decoder::huff_decode(huff_tables *pH) +{ + int symbol; + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) + { + // Decode more bits, use a tree traversal to find symbol. + int ofs = 23; + do + { + symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + } + else + get_bits_no_markers(pH->code_size[symbol]); + + return symbol; +} + +// Decodes a Huffman encoded symbol. +inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits) +{ + int symbol; + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) + { + // Use a tree traversal to find symbol. + int ofs = 23; + do + { + symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + + extra_bits = get_bits_no_markers(symbol & 0xF); + } + else + { + JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0)); + + if (symbol & 0x8000) + { + get_bits_no_markers((symbol >> 8) & 31); + extra_bits = symbol >> 16; + } + else + { + int code_size = (symbol >> 8) & 31; + int num_extra_bits = symbol & 0xF; + int bits = code_size + num_extra_bits; + if (bits <= (m_bits_left + 16)) + extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1); + else + { + get_bits_no_markers(code_size); + extra_bits = get_bits_no_markers(num_extra_bits); + } + } + + symbol &= 0xFF; + } + + return symbol; +} + +// Tables and macro used to fully decode the DPCM differences. +static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; +static const int s_extend_offset[16] = { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; +static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) }; +// The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this) +#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x)) + +// Clamps a value between 0-255. +inline uint8 jpeg_decoder::clamp(int i) +{ + if (static_cast(i) > 255) + i = (((~i) >> 31) & 0xFF); + + return static_cast(i); +} + +namespace DCT_Upsample +{ + struct Matrix44 + { + typedef int Element_Type; + enum { NUM_ROWS = 4, NUM_COLS = 4 }; + + Element_Type v[NUM_ROWS][NUM_COLS]; + + inline int rows() const { return NUM_ROWS; } + inline int cols() const { return NUM_COLS; } + + inline const Element_Type & at(int r, int c) const { return v[r][c]; } + inline Element_Type & at(int r, int c) { return v[r][c]; } + + inline Matrix44() { } + + inline Matrix44& operator += (const Matrix44& a) + { + for (int r = 0; r < NUM_ROWS; r++) + { + at(r, 0) += a.at(r, 0); + at(r, 1) += a.at(r, 1); + at(r, 2) += a.at(r, 2); + at(r, 3) += a.at(r, 3); + } + return *this; + } + + inline Matrix44& operator -= (const Matrix44& a) + { + for (int r = 0; r < NUM_ROWS; r++) + { + at(r, 0) -= a.at(r, 0); + at(r, 1) -= a.at(r, 1); + at(r, 2) -= a.at(r, 2); + at(r, 3) -= a.at(r, 3); + } + return *this; + } + + friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b) + { + Matrix44 ret; + for (int r = 0; r < NUM_ROWS; r++) + { + ret.at(r, 0) = a.at(r, 0) + b.at(r, 0); + ret.at(r, 1) = a.at(r, 1) + b.at(r, 1); + ret.at(r, 2) = a.at(r, 2) + b.at(r, 2); + ret.at(r, 3) = a.at(r, 3) + b.at(r, 3); + } + return ret; + } + + friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b) + { + Matrix44 ret; + for (int r = 0; r < NUM_ROWS; r++) + { + ret.at(r, 0) = a.at(r, 0) - b.at(r, 0); + ret.at(r, 1) = a.at(r, 1) - b.at(r, 1); + ret.at(r, 2) = a.at(r, 2) - b.at(r, 2); + ret.at(r, 3) = a.at(r, 3) - b.at(r, 3); + } + return ret; + } + + static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b) + { + for (int r = 0; r < 4; r++) + { + pDst[0*8 + r] = static_cast(a.at(r, 0) + b.at(r, 0)); + pDst[1*8 + r] = static_cast(a.at(r, 1) + b.at(r, 1)); + pDst[2*8 + r] = static_cast(a.at(r, 2) + b.at(r, 2)); + pDst[3*8 + r] = static_cast(a.at(r, 3) + b.at(r, 3)); + } + } + + static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b) + { + for (int r = 0; r < 4; r++) + { + pDst[0*8 + r] = static_cast(a.at(r, 0) - b.at(r, 0)); + pDst[1*8 + r] = static_cast(a.at(r, 1) - b.at(r, 1)); + pDst[2*8 + r] = static_cast(a.at(r, 2) - b.at(r, 2)); + pDst[3*8 + r] = static_cast(a.at(r, 3) - b.at(r, 3)); + } + } + }; + + const int FRACT_BITS = 10; + const int SCALE = 1 << FRACT_BITS; + + typedef int Temp_Type; + #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS) + #define F(i) ((int)((i) * SCALE + .5f)) + + // Any decent C++ compiler will optimize this at compile time to a 0, or an array access. + #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8]) + + // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix + template + struct P_Q + { + static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc) + { + // 4x8 = 4x8 times 8x8, matrix 0 is constant + const Temp_Type X000 = AT(0, 0); + const Temp_Type X001 = AT(0, 1); + const Temp_Type X002 = AT(0, 2); + const Temp_Type X003 = AT(0, 3); + const Temp_Type X004 = AT(0, 4); + const Temp_Type X005 = AT(0, 5); + const Temp_Type X006 = AT(0, 6); + const Temp_Type X007 = AT(0, 7); + const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0)); + const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1)); + const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2)); + const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3)); + const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4)); + const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5)); + const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6)); + const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7)); + const Temp_Type X020 = AT(4, 0); + const Temp_Type X021 = AT(4, 1); + const Temp_Type X022 = AT(4, 2); + const Temp_Type X023 = AT(4, 3); + const Temp_Type X024 = AT(4, 4); + const Temp_Type X025 = AT(4, 5); + const Temp_Type X026 = AT(4, 6); + const Temp_Type X027 = AT(4, 7); + const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0)); + const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1)); + const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2)); + const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3)); + const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4)); + const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5)); + const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6)); + const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7)); + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + P.at(0, 0) = X000; + P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f)); + P.at(0, 2) = X004; + P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f)); + P.at(1, 0) = X010; + P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f)); + P.at(1, 2) = X014; + P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f)); + P.at(2, 0) = X020; + P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f)); + P.at(2, 2) = X024; + P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f)); + P.at(3, 0) = X030; + P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f)); + P.at(3, 2) = X034; + P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f)); + // 40 muls 24 adds + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f)); + Q.at(0, 1) = X002; + Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f)); + Q.at(0, 3) = X006; + Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f)); + Q.at(1, 1) = X012; + Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f)); + Q.at(1, 3) = X016; + Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f)); + Q.at(2, 1) = X022; + Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f)); + Q.at(2, 3) = X026; + Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f)); + Q.at(3, 1) = X032; + Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f)); + Q.at(3, 3) = X036; + // 40 muls 24 adds + } + }; + + template + struct R_S + { + static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc) + { + // 4x8 = 4x8 times 8x8, matrix 0 is constant + const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0)); + const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1)); + const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2)); + const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3)); + const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4)); + const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5)); + const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6)); + const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7)); + const Temp_Type X110 = AT(2, 0); + const Temp_Type X111 = AT(2, 1); + const Temp_Type X112 = AT(2, 2); + const Temp_Type X113 = AT(2, 3); + const Temp_Type X114 = AT(2, 4); + const Temp_Type X115 = AT(2, 5); + const Temp_Type X116 = AT(2, 6); + const Temp_Type X117 = AT(2, 7); + const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0)); + const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1)); + const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2)); + const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3)); + const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4)); + const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5)); + const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6)); + const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7)); + const Temp_Type X130 = AT(6, 0); + const Temp_Type X131 = AT(6, 1); + const Temp_Type X132 = AT(6, 2); + const Temp_Type X133 = AT(6, 3); + const Temp_Type X134 = AT(6, 4); + const Temp_Type X135 = AT(6, 5); + const Temp_Type X136 = AT(6, 6); + const Temp_Type X137 = AT(6, 7); + // 80 muls 48 adds + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + R.at(0, 0) = X100; + R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f)); + R.at(0, 2) = X104; + R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f)); + R.at(1, 0) = X110; + R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f)); + R.at(1, 2) = X114; + R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f)); + R.at(2, 0) = X120; + R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f)); + R.at(2, 2) = X124; + R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f)); + R.at(3, 0) = X130; + R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f)); + R.at(3, 2) = X134; + R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f)); + // 40 muls 24 adds + // 4x4 = 4x8 times 8x4, matrix 1 is constant + S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f)); + S.at(0, 1) = X102; + S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f)); + S.at(0, 3) = X106; + S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f)); + S.at(1, 1) = X112; + S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f)); + S.at(1, 3) = X116; + S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f)); + S.at(2, 1) = X122; + S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f)); + S.at(2, 3) = X126; + S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f)); + S.at(3, 1) = X132; + S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f)); + S.at(3, 3) = X136; + // 40 muls 24 adds + } + }; +} // end namespace DCT_Upsample + +// Unconditionally frees all allocated m_blocks. +void jpeg_decoder::free_all_blocks() +{ + m_pStream = NULL; + for (mem_block *b = m_pMem_blocks; b; ) + { + mem_block *n = b->m_pNext; + jpgd_free(b); + b = n; + } + m_pMem_blocks = NULL; +} + +// This method handles all errors. It will never return. +// It could easily be changed to use C++ exceptions. +JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status) +{ + m_error_code = status; + free_all_blocks(); + longjmp(m_jmp_state, status); +} + +void *jpeg_decoder::alloc(size_t nSize, bool zero) +{ + nSize = (JPGD_MAX(nSize, 1) + 3) & ~3; + char *rv = NULL; + for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext) + { + if ((b->m_used_count + nSize) <= b->m_size) + { + rv = b->m_data + b->m_used_count; + b->m_used_count += nSize; + break; + } + } + if (!rv) + { + int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047); + mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity); + if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); } + b->m_pNext = m_pMem_blocks; m_pMem_blocks = b; + b->m_used_count = nSize; + b->m_size = capacity; + rv = b->m_data; + } + if (zero) memset(rv, 0, nSize); + return rv; +} + +void jpeg_decoder::word_clear(void *p, uint16 c, uint n) +{ + uint8 *pD = (uint8*)p; + const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF; + while (n) + { + pD[0] = l; pD[1] = h; pD += 2; + n--; + } +} + +// Refill the input buffer. +// This method will sit in a loop until (A) the buffer is full or (B) +// the stream's read() method reports and end of file condition. +void jpeg_decoder::prep_in_buffer() +{ + m_in_buf_left = 0; + m_pIn_buf_ofs = m_in_buf; + + if (m_eof_flag) + return; + + do + { + int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag); + if (bytes_read == -1) + stop_decoding(JPGD_STREAM_READ); + + m_in_buf_left += bytes_read; + } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag)); + + m_total_bytes_read += m_in_buf_left; + + // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid). + // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.) + word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64); +} + +// Read a Huffman code table. +void jpeg_decoder::read_dht_marker() +{ + int i, index, count; + uint8 huff_num[17]; + uint8 huff_val[256]; + + uint num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= 2; + + while (num_left) + { + index = get_bits(8); + + huff_num[0] = 0; + + count = 0; + + for (i = 1; i <= 16; i++) + { + huff_num[i] = static_cast(get_bits(8)); + count += huff_num[i]; + } + + if (count > 255) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + for (i = 0; i < count; i++) + huff_val[i] = static_cast(get_bits(8)); + + i = 1 + 16 + count; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= i; + + if ((index & 0x10) > 0x10) + stop_decoding(JPGD_BAD_DHT_INDEX); + + index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1); + + if (index >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_BAD_DHT_INDEX); + + if (!m_huff_num[index]) + m_huff_num[index] = (uint8 *)alloc(17); + + if (!m_huff_val[index]) + m_huff_val[index] = (uint8 *)alloc(256); + + m_huff_ac[index] = (index & 0x10) != 0; + memcpy(m_huff_num[index], huff_num, 17); + memcpy(m_huff_val[index], huff_val, 256); + } +} + +// Read a quantization table. +void jpeg_decoder::read_dqt_marker() +{ + int n, i, prec; + uint num_left; + uint temp; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DQT_MARKER); + + num_left -= 2; + + while (num_left) + { + n = get_bits(8); + prec = n >> 4; + n &= 0x0F; + + if (n >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_BAD_DQT_TABLE); + + if (!m_quant[n]) + m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t)); + + // read quantization entries, in zag order + for (i = 0; i < 64; i++) + { + temp = get_bits(8); + + if (prec) + temp = (temp << 8) + get_bits(8); + + m_quant[n][i] = static_cast(temp); + } + + i = 64 + 1; + + if (prec) + i += 64; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DQT_LENGTH); + + num_left -= i; + } +} + +// Read the start of frame (SOF) marker. +void jpeg_decoder::read_sof_marker() +{ + int i; + uint num_left; + + num_left = get_bits(16); + + if (get_bits(8) != 8) /* precision: sorry, only 8-bit precision is supported right now */ + stop_decoding(JPGD_BAD_PRECISION); + + m_image_y_size = get_bits(16); + + if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) + stop_decoding(JPGD_BAD_HEIGHT); + + m_image_x_size = get_bits(16); + + if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) + stop_decoding(JPGD_BAD_WIDTH); + + m_comps_in_frame = get_bits(8); + + if (m_comps_in_frame > JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_TOO_MANY_COMPONENTS); + + if (num_left != (uint)(m_comps_in_frame * 3 + 8)) + stop_decoding(JPGD_BAD_SOF_LENGTH); + + for (i = 0; i < m_comps_in_frame; i++) + { + m_comp_ident[i] = get_bits(8); + m_comp_h_samp[i] = get_bits(4); + m_comp_v_samp[i] = get_bits(4); + m_comp_quant[i] = get_bits(8); + } +} + +// Used to skip unrecognized markers. +void jpeg_decoder::skip_variable_marker() +{ + uint num_left; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_VARIABLE_MARKER); + + num_left -= 2; + + while (num_left) + { + get_bits(8); + num_left--; + } +} + +// Read a define restart interval (DRI) marker. +void jpeg_decoder::read_dri_marker() +{ + if (get_bits(16) != 4) + stop_decoding(JPGD_BAD_DRI_LENGTH); + + m_restart_interval = get_bits(16); +} + +// Read a start of scan (SOS) marker. +void jpeg_decoder::read_sos_marker() +{ + uint num_left; + int i, ci, n, c, cc; + + num_left = get_bits(16); + + n = get_bits(8); + + m_comps_in_scan = n; + + num_left -= 3; + + if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) ) + stop_decoding(JPGD_BAD_SOS_LENGTH); + + for (i = 0; i < n; i++) + { + cc = get_bits(8); + c = get_bits(8); + num_left -= 2; + + for (ci = 0; ci < m_comps_in_frame; ci++) + if (cc == m_comp_ident[ci]) + break; + + if (ci >= m_comps_in_frame) + stop_decoding(JPGD_BAD_SOS_COMP_ID); + + m_comp_list[i] = ci; + m_comp_dc_tab[ci] = (c >> 4) & 15; + m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1); + } + + m_spectral_start = get_bits(8); + m_spectral_end = get_bits(8); + m_successive_high = get_bits(4); + m_successive_low = get_bits(4); + + if (!m_progressive_flag) + { + m_spectral_start = 0; + m_spectral_end = 63; + } + + num_left -= 3; + + while (num_left) /* read past whatever is num_left */ + { + get_bits(8); + num_left--; + } +} + +// Finds the next marker. +int jpeg_decoder::next_marker() +{ + uint c, bytes; + + bytes = 0; + + do + { + do + { + bytes++; + c = get_bits(8); + } while (c != 0xFF); + + do + { + c = get_bits(8); + } while (c == 0xFF); + + } while (c == 0); + + // If bytes > 0 here, there where extra bytes before the marker (not good). + + return c; +} + +// Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is +// encountered. +int jpeg_decoder::process_markers() +{ + int c; + + for ( ; ; ) + { + c = next_marker(); + + switch (c) + { + case M_SOF0: + case M_SOF1: + case M_SOF2: + case M_SOF3: + case M_SOF5: + case M_SOF6: + case M_SOF7: +// case M_JPG: + case M_SOF9: + case M_SOF10: + case M_SOF11: + case M_SOF13: + case M_SOF14: + case M_SOF15: + case M_SOI: + case M_EOI: + case M_SOS: + { + return c; + } + case M_DHT: + { + read_dht_marker(); + break; + } + // No arithmitic support - dumb patents! + case M_DAC: + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + case M_DQT: + { + read_dqt_marker(); + break; + } + case M_DRI: + { + read_dri_marker(); + break; + } + //case M_APP0: /* no need to read the JFIF marker */ + + case M_JPG: + case M_RST0: /* no parameters */ + case M_RST1: + case M_RST2: + case M_RST3: + case M_RST4: + case M_RST5: + case M_RST6: + case M_RST7: + case M_TEM: + { + stop_decoding(JPGD_UNEXPECTED_MARKER); + break; + } + default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ + { + skip_variable_marker(); + break; + } + } + } +} + +// Finds the start of image (SOI) marker. +// This code is rather defensive: it only checks the first 512 bytes to avoid +// false positives. +void jpeg_decoder::locate_soi_marker() +{ + uint lastchar, thischar; + uint bytesleft; + + lastchar = get_bits(8); + + thischar = get_bits(8); + + /* ok if it's a normal JPEG file without a special header */ + + if ((lastchar == 0xFF) && (thischar == M_SOI)) + return; + + bytesleft = 4096; //512; + + for ( ; ; ) + { + if (--bytesleft == 0) + stop_decoding(JPGD_NOT_JPEG); + + lastchar = thischar; + + thischar = get_bits(8); + + if (lastchar == 0xFF) + { + if (thischar == M_SOI) + break; + else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end + stop_decoding(JPGD_NOT_JPEG); + } + } + + // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad. + thischar = (m_bit_buf >> 24) & 0xFF; + + if (thischar != 0xFF) + stop_decoding(JPGD_NOT_JPEG); +} + +// Find a start of frame (SOF) marker. +void jpeg_decoder::locate_sof_marker() +{ + locate_soi_marker(); + + int c = process_markers(); + + switch (c) + { + case M_SOF2: + m_progressive_flag = JPGD_TRUE; + case M_SOF0: /* baseline DCT */ + case M_SOF1: /* extended sequential DCT */ + { + read_sof_marker(); + break; + } + case M_SOF9: /* Arithmitic coding */ + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + default: + { + stop_decoding(JPGD_UNSUPPORTED_MARKER); + break; + } + } +} + +// Find a start of scan (SOS) marker. +int jpeg_decoder::locate_sos_marker() +{ + int c; + + c = process_markers(); + + if (c == M_EOI) + return JPGD_FALSE; + else if (c != M_SOS) + stop_decoding(JPGD_UNEXPECTED_MARKER); + + read_sos_marker(); + + return JPGD_TRUE; +} + +// Reset everything to default/uninitialized state. +void jpeg_decoder::init(jpeg_decoder_stream *pStream) +{ + m_pMem_blocks = NULL; + m_error_code = JPGD_SUCCESS; + m_ready_flag = false; + m_image_x_size = m_image_y_size = 0; + m_pStream = pStream; + m_progressive_flag = JPGD_FALSE; + + memset(m_huff_ac, 0, sizeof(m_huff_ac)); + memset(m_huff_num, 0, sizeof(m_huff_num)); + memset(m_huff_val, 0, sizeof(m_huff_val)); + memset(m_quant, 0, sizeof(m_quant)); + + m_scan_type = 0; + m_comps_in_frame = 0; + + memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp)); + memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp)); + memset(m_comp_quant, 0, sizeof(m_comp_quant)); + memset(m_comp_ident, 0, sizeof(m_comp_ident)); + memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks)); + memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks)); + + m_comps_in_scan = 0; + memset(m_comp_list, 0, sizeof(m_comp_list)); + memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab)); + memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab)); + + m_spectral_start = 0; + m_spectral_end = 0; + m_successive_low = 0; + m_successive_high = 0; + m_max_mcu_x_size = 0; + m_max_mcu_y_size = 0; + m_blocks_per_mcu = 0; + m_max_blocks_per_row = 0; + m_mcus_per_row = 0; + m_mcus_per_col = 0; + m_expanded_blocks_per_component = 0; + m_expanded_blocks_per_mcu = 0; + m_expanded_blocks_per_row = 0; + m_freq_domain_chroma_upsample = false; + + memset(m_mcu_org, 0, sizeof(m_mcu_org)); + + m_total_lines_left = 0; + m_mcu_lines_left = 0; + m_real_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_pixel = 0; + + memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs)); + + memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs)); + memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs)); + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_eob_run = 0; + + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_pIn_buf_ofs = m_in_buf; + m_in_buf_left = 0; + m_eof_flag = false; + m_tem_flag = 0; + + memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start)); + memset(m_in_buf, 0, sizeof(m_in_buf)); + memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end)); + + m_restart_interval = 0; + m_restarts_left = 0; + m_next_restart_num = 0; + + m_max_mcus_per_row = 0; + m_max_blocks_per_mcu = 0; + m_max_mcus_per_col = 0; + + memset(m_last_dc_val, 0, sizeof(m_last_dc_val)); + m_pMCU_coefficients = NULL; + m_pSample_buf = NULL; + + m_total_bytes_read = 0; + + m_pScan_line_0 = NULL; + m_pScan_line_1 = NULL; + + // Ready the input buffer. + prep_in_buffer(); + + // Prime the bit buffer. + m_bits_left = 16; + m_bit_buf = 0; + + get_bits(16); + get_bits(16); + + for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) + m_mcu_block_max_zag[i] = 64; +} + +#define SCALEBITS 16 +#define ONE_HALF ((int) 1 << (SCALEBITS-1)) +#define FIX(x) ((int) ((x) * (1L<> SCALEBITS; + m_cbb[i] = ( FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS; + m_crg[i] = (-FIX(0.71414f)) * k; + m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF; + } +} + +// This method throws back into the stream any bytes that where read +// into the bit buffer during initial marker scanning. +void jpeg_decoder::fix_in_buffer() +{ + // In case any 0xFF's where pulled into the buffer during marker scanning. + JPGD_ASSERT((m_bits_left & 7) == 0); + + if (m_bits_left == 16) + stuff_char( (uint8)(m_bit_buf & 0xFF)); + + if (m_bits_left >= 8) + stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF)); + + stuff_char((uint8)((m_bit_buf >> 16) & 0xFF)); + stuff_char((uint8)((m_bit_buf >> 24) & 0xFF)); + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); +} + +void jpeg_decoder::transform_mcu(int mcu_row) +{ + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } +} + +static const uint8 s_max_rc[64] = +{ + 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86, + 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136 +}; + +void jpeg_decoder::transform_mcu_expand(int mcu_row) +{ + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64; + + // Y IDCT + int mcu_block; + for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } + + // Chroma IDCT, with upsampling + jpgd_block_t temp_block[64]; + + for (int i = 0; i < 2; i++) + { + DCT_Upsample::Matrix44 P, Q, R, S; + + JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1); + JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64); + + int max_zag = m_mcu_block_max_zag[mcu_block++] - 1; + if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis + switch (s_max_rc[max_zag]) + { + case 1*16+1: + DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr); + break; + case 1*16+2: + DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr); + break; + case 2*16+2: + DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr); + break; + case 3*16+2: + DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr); + break; + case 3*16+3: + DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr); + break; + case 3*16+4: + DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr); + break; + case 4*16+4: + DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr); + break; + case 5*16+4: + DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr); + break; + case 5*16+5: + DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr); + break; + case 5*16+6: + DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr); + break; + case 6*16+6: + DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr); + break; + case 7*16+6: + DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr); + break; + case 7*16+7: + DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr); + break; + case 7*16+8: + DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr); + break; + case 8*16+8: + DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr); + break; + default: + JPGD_ASSERT(false); + } + + DCT_Upsample::Matrix44 a(P + Q); P -= Q; + DCT_Upsample::Matrix44& b = P; + DCT_Upsample::Matrix44 c(R + S); R -= S; + DCT_Upsample::Matrix44& d = R; + + DCT_Upsample::Matrix44::add_and_store(temp_block, a, c); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::add_and_store(temp_block, b, d); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + pSrc_ptr += 64; + } +} + +// Loads and dequantizes the next row of (already decoded) coefficients. +// Progressive images only. +void jpeg_decoder::load_next_row() +{ + int i; + jpgd_block_t *p; + jpgd_quant_t *q; + int mcu_row, mcu_block, row_block = 0; + int component_num, component_id; + int block_x_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + q = m_quant[m_comp_quant[component_id]]; + + p = m_pMCU_coefficients + 64 * mcu_block; + + jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + p[0] = pDC[0]; + memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t)); + + for (i = 63; i > 0; i--) + if (p[g_ZAG[i]]) + break; + + m_mcu_block_max_zag[mcu_block] = i + 1; + + for ( ; i >= 0; i--) + if (p[g_ZAG[i]]) + p[g_ZAG[i]] = static_cast(p[g_ZAG[i]] * q[i]); + + row_block++; + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + if (m_freq_domain_chroma_upsample) + transform_mcu_expand(mcu_row); + else + transform_mcu(mcu_row); + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } +} + +// Restart interval processing. +void jpeg_decoder::process_restart() +{ + int i; + int c = 0; + + // Align to a byte boundry + // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers! + //get_bits_no_markers(m_bits_left & 7); + + // Let's scan a little bit to find the marker, but not _too_ far. + // 1536 is a "fudge factor" that determines how much to scan. + for (i = 1536; i > 0; i--) + if (get_char() == 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + for ( ; i > 0; i--) + if ((c = get_char()) != 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Is it the expected marker? If not, something bad happened. + if (c != (m_next_restart_num + M_RST0)) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Reset each component's DC prediction values. + memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + m_restarts_left = m_restart_interval; + + m_next_restart_num = (m_next_restart_num + 1) & 7; + + // Get the bit buffer going again... + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); +} + +static inline int dequantize_ac(int c, int q) { c *= q; return c; } + +// Decodes and dequantizes the next row of coefficients. +void jpeg_decoder::decode_next_row() +{ + int row_block = 0; + + for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + jpgd_block_t* p = m_pMCU_coefficients; + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) + { + int component_id = m_mcu_org[mcu_block]; + jpgd_quant_t* q = m_quant[m_comp_quant[component_id]]; + + int r, s; + s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r); + s = JPGD_HUFF_EXTEND(r, s); + + m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]); + + p[0] = static_cast(s * q[0]); + + int prev_num_set = m_mcu_block_max_zag[mcu_block]; + + huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]]; + + int k; + for (k = 1; k < 64; k++) + { + int extra_bits; + s = huff_decode(pH, extra_bits); + + r = s >> 4; + s &= 15; + + if (s) + { + if (r) + { + if ((k + r) > 63) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(r, prev_num_set - k); + int kt = k; + while (n--) + p[g_ZAG[kt++]] = 0; + } + + k += r; + } + + s = JPGD_HUFF_EXTEND(extra_bits, s); + + JPGD_ASSERT(k < 64); + + p[g_ZAG[k]] = static_cast(dequantize_ac(s, q[k])); //s * q[k]; + } + else + { + if (r == 15) + { + if ((k + 16) > 64) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(16, prev_num_set - k); + int kt = k; + while (n--) + { + JPGD_ASSERT(kt <= 63); + p[g_ZAG[kt++]] = 0; + } + } + + k += 16 - 1; // - 1 because the loop counter is k + JPGD_ASSERT(p[g_ZAG[k]] == 0); + } + else + break; + } + } + + if (k < prev_num_set) + { + int kt = k; + while (kt < prev_num_set) + p[g_ZAG[kt++]] = 0; + } + + m_mcu_block_max_zag[mcu_block] = k; + + row_block++; + } + + if (m_freq_domain_chroma_upsample) + transform_mcu_expand(mcu_row); + else + transform_mcu(mcu_row); + + m_restarts_left--; + } +} + +// YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB +void jpeg_decoder::H1V1Convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d = m_pScan_line_0; + uint8 *s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int y = s[j]; + int cb = s[64+j]; + int cr = s[128+j]; + + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + + d += 4; + } + + s += 64*3; + } +} + +// YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB +void jpeg_decoder::H2V1Convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *y = m_pSample_buf + row * 8; + uint8 *c = m_pSample_buf + 2*64 + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 4; j++) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j<<1]; + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + + yy = y[(j<<1)+1]; + d0[4] = clamp(yy+rc); + d0[5] = clamp(yy+gc); + d0[6] = clamp(yy+bc); + d0[7] = 255; + + d0 += 8; + + c++; + } + y += 64; + } + + y += 64*4 - 64*2; + c += 64*4 - 8; + } +} + +// YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB +void jpeg_decoder::H1V2Convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *d1 = m_pScan_line_1; + uint8 *y; + uint8 *c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64*1 + (row & 7) * 8; + + c = m_pSample_buf + 64*2 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int cb = c[0+j]; + int cr = c[64+j]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + + yy = y[8+j]; + d1[0] = clamp(yy+rc); + d1[1] = clamp(yy+gc); + d1[2] = clamp(yy+bc); + d1[3] = 255; + + d0 += 4; + d1 += 4; + } + + y += 64*4; + c += 64*4; + } +} + +// YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB +void jpeg_decoder::H2V2Convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *d1 = m_pScan_line_1; + uint8 *y; + uint8 *c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64*2 + (row & 7) * 8; + + c = m_pSample_buf + 64*4 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 8; j += 2) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + + yy = y[j+1]; + d0[4] = clamp(yy+rc); + d0[5] = clamp(yy+gc); + d0[6] = clamp(yy+bc); + d0[7] = 255; + + yy = y[j+8]; + d1[0] = clamp(yy+rc); + d1[1] = clamp(yy+gc); + d1[2] = clamp(yy+bc); + d1[3] = 255; + + yy = y[j+8+1]; + d1[4] = clamp(yy+rc); + d1[5] = clamp(yy+gc); + d1[6] = clamp(yy+bc); + d1[7] = 255; + + d0 += 8; + d1 += 8; + + c++; + } + y += 64; + } + + y += 64*6 - 64*2; + c += 64*6 - 8; + } +} + +// Y (1 block per MCU) to 8-bit grayscale +void jpeg_decoder::gray_convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d = m_pScan_line_0; + uint8 *s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + *(uint *)d = *(uint *)s; + *(uint *)(&d[4]) = *(uint *)(&s[4]); + + s += 64; + d += 8; + } +} + +void jpeg_decoder::expanded_convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + + uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8; + + uint8* d = m_pScan_line_0; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int k = 0; k < m_max_mcu_x_size; k += 8) + { + const int Y_ofs = k * 8; + const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component; + const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2; + for (int j = 0; j < 8; j++) + { + int y = Py[Y_ofs + j]; + int cb = Py[Cb_ofs + j]; + int cr = Py[Cr_ofs + j]; + + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + + d += 4; + } + } + + Py += 64 * m_expanded_blocks_per_mcu; + } +} + +// Find end of image (EOI) marker, so we can return to the user the exact size of the input stream. +void jpeg_decoder::find_eoi() +{ + if (!m_progressive_flag) + { + // Attempt to read the EOI marker. + //get_bits_no_markers(m_bits_left & 7); + + // Prime the bit buffer + m_bits_left = 16; + get_bits(16); + get_bits(16); + + // The next marker _should_ be EOI + process_markers(); + } + + m_total_bytes_read -= m_in_buf_left; +} + +int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len) +{ + if ((m_error_code) || (!m_ready_flag)) + return JPGD_FAILED; + + if (m_total_lines_left == 0) + return JPGD_DONE; + + if (m_mcu_lines_left == 0) + { + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + if (m_progressive_flag) + load_next_row(); + else + decode_next_row(); + + // Find the EOI marker if that was the last row. + if (m_total_lines_left <= m_max_mcu_y_size) + find_eoi(); + + m_mcu_lines_left = m_max_mcu_y_size; + } + + if (m_freq_domain_chroma_upsample) + { + expanded_convert(); + *pScan_line = m_pScan_line_0; + } + else + { + switch (m_scan_type) + { + case JPGD_YH2V2: + { + if ((m_mcu_lines_left & 1) == 0) + { + H2V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + + break; + } + case JPGD_YH2V1: + { + H2V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_YH1V2: + { + if ((m_mcu_lines_left & 1) == 0) + { + H1V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + + break; + } + case JPGD_YH1V1: + { + H1V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_GRAYSCALE: + { + gray_convert(); + *pScan_line = m_pScan_line_0; + + break; + } + } + } + + *pScan_line_len = m_real_dest_bytes_per_scan_line; + + m_mcu_lines_left--; + m_total_lines_left--; + + return JPGD_SUCCESS; +} + +// Creates the tables needed for efficient Huffman decoding. +void jpeg_decoder::make_huff_table(int index, huff_tables *pH) +{ + int p, i, l, si; + uint8 huffsize[257]; + uint huffcode[257]; + uint code; + uint subtree; + int code_size; + int lastp; + int nextfreeentry; + int currententry; + + pH->ac_table = m_huff_ac[index] != 0; + + p = 0; + + for (l = 1; l <= 16; l++) + { + for (i = 1; i <= m_huff_num[index][l]; i++) + huffsize[p++] = static_cast(l); + } + + huffsize[p] = 0; + + lastp = p; + + code = 0; + si = huffsize[0]; + p = 0; + + while (huffsize[p]) + { + while (huffsize[p] == si) + { + huffcode[p++] = code; + code++; + } + + code <<= 1; + si++; + } + + memset(pH->look_up, 0, sizeof(pH->look_up)); + memset(pH->look_up2, 0, sizeof(pH->look_up2)); + memset(pH->tree, 0, sizeof(pH->tree)); + memset(pH->code_size, 0, sizeof(pH->code_size)); + + nextfreeentry = -1; + + p = 0; + + while (p < lastp) + { + i = m_huff_val[index][p]; + code = huffcode[p]; + code_size = huffsize[p]; + + pH->code_size[i] = static_cast(code_size); + + if (code_size <= 8) + { + code <<= (8 - code_size); + + for (l = 1 << (8 - code_size); l > 0; l--) + { + JPGD_ASSERT(i < 256); + + pH->look_up[code] = i; + + bool has_extrabits = false; + int extra_bits = 0; + int num_extra_bits = i & 15; + + int bits_to_fetch = code_size; + if (num_extra_bits) + { + int total_codesize = code_size + num_extra_bits; + if (total_codesize <= 8) + { + has_extrabits = true; + extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize)); + JPGD_ASSERT(extra_bits <= 0x7FFF); + bits_to_fetch += num_extra_bits; + } + } + + if (!has_extrabits) + pH->look_up2[code] = i | (bits_to_fetch << 8); + else + pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8); + + code++; + } + } + else + { + subtree = (code >> (code_size - 8)) & 0xFF; + + currententry = pH->look_up[subtree]; + + if (currententry == 0) + { + pH->look_up[subtree] = currententry = nextfreeentry; + pH->look_up2[subtree] = currententry = nextfreeentry; + + nextfreeentry -= 2; + } + + code <<= (16 - (code_size - 8)); + + for (l = code_size; l > 9; l--) + { + if ((code & 0x8000) == 0) + currententry--; + + if (pH->tree[-currententry - 1] == 0) + { + pH->tree[-currententry - 1] = nextfreeentry; + + currententry = nextfreeentry; + + nextfreeentry -= 2; + } + else + currententry = pH->tree[-currententry - 1]; + + code <<= 1; + } + + if ((code & 0x8000) == 0) + currententry--; + + pH->tree[-currententry - 1] = i; + } + + p++; + } +} + +// Verifies the quantization tables needed for this scan are available. +void jpeg_decoder::check_quant_tables() +{ + for (int i = 0; i < m_comps_in_scan; i++) + if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL) + stop_decoding(JPGD_UNDEFINED_QUANT_TABLE); +} + +// Verifies that all the Huffman tables needed for this scan are available. +void jpeg_decoder::check_huff_tables() +{ + for (int i = 0; i < m_comps_in_scan; i++) + { + if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + + if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + } + + for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) + if (m_huff_num[i]) + { + if (!m_pHuff_tabs[i]) + m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables)); + + make_huff_table(i, m_pHuff_tabs[i]); + } +} + +// Determines the component order inside each MCU. +// Also calcs how many MCU's are on each row, etc. +void jpeg_decoder::calc_mcu_block_order() +{ + int component_num, component_id; + int max_h_samp = 0, max_v_samp = 0; + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + if (m_comp_h_samp[component_id] > max_h_samp) + max_h_samp = m_comp_h_samp[component_id]; + + if (m_comp_v_samp[component_id] > max_v_samp) + max_v_samp = m_comp_v_samp[component_id]; + } + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8; + m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8; + } + + if (m_comps_in_scan == 1) + { + m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]]; + m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]]; + } + else + { + m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp; + m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp; + } + + if (m_comps_in_scan == 1) + { + m_mcu_org[0] = m_comp_list[0]; + + m_blocks_per_mcu = 1; + } + else + { + m_blocks_per_mcu = 0; + + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + int num_blocks; + + component_id = m_comp_list[component_num]; + + num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id]; + + while (num_blocks--) + m_mcu_org[m_blocks_per_mcu++] = component_id; + } + } +} + +// Starts a new scan. +int jpeg_decoder::init_scan() +{ + if (!locate_sos_marker()) + return JPGD_FALSE; + + calc_mcu_block_order(); + + check_huff_tables(); + + check_quant_tables(); + + memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + if (m_restart_interval) + { + m_restarts_left = m_restart_interval; + m_next_restart_num = 0; + } + + fix_in_buffer(); + + return JPGD_TRUE; +} + +// Starts a frame. Determines if the number of components or sampling factors +// are supported. +void jpeg_decoder::init_frame() +{ + int i; + + if (m_comps_in_frame == 1) + { + if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_scan_type = JPGD_GRAYSCALE; + m_max_blocks_per_mcu = 1; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if (m_comps_in_frame == 3) + { + if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || + ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) ) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH1V1; + + m_max_blocks_per_mcu = 3; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH2V1; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH1V2; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 16; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH2V2; + m_max_blocks_per_mcu = 6; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 16; + } + else + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + } + else + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size; + m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size; + + // These values are for the *destination* pixels: after conversion. + if (m_scan_type == JPGD_GRAYSCALE) + m_dest_bytes_per_pixel = 1; + else + m_dest_bytes_per_pixel = 4; + + m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel; + + m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel); + + // Initialize two scan line buffers. + m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true); + if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) + m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true); + + m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu; + + // Should never happen + if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) + stop_decoding(JPGD_ASSERTION_ERROR); + + // Allocate the coefficient buffer, enough for one MCU + m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t)); + + for (i = 0; i < m_max_blocks_per_mcu; i++) + m_mcu_block_max_zag[i] = 64; + + m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0]; + m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame; + m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu; + // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen). + m_freq_domain_chroma_upsample = false; +#if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING + m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3); +#endif + + if (m_freq_domain_chroma_upsample) + m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64); + else + m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64); + + m_total_lines_left = m_image_y_size; + + m_mcu_lines_left = 0; + + create_look_ups(); +} + +// The coeff_buf series of methods originally stored the coefficients +// into a "virtual" file which was located in EMS, XMS, or a disk file. A cache +// was used to make this process more efficient. Now, we can store the entire +// thing in RAM. +jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) +{ + coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf)); + + cb->block_num_x = block_num_x; + cb->block_num_y = block_num_y; + cb->block_len_x = block_len_x; + cb->block_len_y = block_len_y; + cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t); + cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true); + return cb; +} + +inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y) +{ + JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y)); + return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x)); +} + +// The following methods decode the various types of m_blocks encountered +// in progressively encoded images. +void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y) +{ + int s, r; + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) + { + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + } + + pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]); + + p[0] = static_cast(s << pD->m_successive_low); +} + +void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y) +{ + if (pD->get_bits_no_markers(1)) + { + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + p[0] |= (1 << pD->m_successive_low); + } +} + +void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y) +{ + int k, s, r; + + if (pD->m_eob_run) + { + pD->m_eob_run--; + return; + } + + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) + { + s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]); + + r = s >> 4; + s &= 15; + + if (s) + { + if ((k += r) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + + p[g_ZAG[k]] = static_cast(s << pD->m_successive_low); + } + else + { + if (r == 15) + { + if ((k += 15) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + } + else + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + pD->m_eob_run--; + + break; + } + } + } +} + +void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y) +{ + int s, k, r; + int p1 = 1 << pD->m_successive_low; + int m1 = (-1) << pD->m_successive_low; + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + JPGD_ASSERT(pD->m_spectral_end <= 63); + + k = pD->m_spectral_start; + + if (pD->m_eob_run == 0) + { + for ( ; k <= pD->m_spectral_end; k++) + { + s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]); + + r = s >> 4; + s &= 15; + + if (s) + { + if (s != 1) + pD->stop_decoding(JPGD_DECODE_ERROR); + + if (pD->get_bits_no_markers(1)) + s = p1; + else + s = m1; + } + else + { + if (r != 15) + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + break; + } + } + + do + { + jpgd_block_t *this_coef = p + g_ZAG[k & 63]; + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + else + { + if (--r < 0) + break; + } + + k++; + + } while (k <= pD->m_spectral_end); + + if ((s) && (k < 64)) + { + p[g_ZAG[k]] = static_cast(s); + } + } + } + + if (pD->m_eob_run > 0) + { + for ( ; k <= pD->m_spectral_end; k++) + { + jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + } + + pD->m_eob_run--; + } +} + +// Decode a scan in a progressively encoded image. +void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func) +{ + int mcu_row, mcu_col, mcu_block; + int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS]; + + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) + { + int component_num, component_id; + + memset(block_x_mcu, 0, sizeof(block_x_mcu)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + + decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + m_restarts_left--; + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } +} + +// Decode a progressively encoded image. +void jpeg_decoder::init_progressive() +{ + int i; + + if (m_comps_in_frame == 4) + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + // Allocate the coefficient buffers. + for (i = 0; i < m_comps_in_frame; i++) + { + m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1); + m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8); + } + + for ( ; ; ) + { + int dc_only_scan, refinement_scan; + pDecode_block_func decode_block_func; + + if (!init_scan()) + break; + + dc_only_scan = (m_spectral_start == 0); + refinement_scan = (m_successive_high != 0); + + if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if (dc_only_scan) + { + if (m_spectral_end) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + } + else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) + stop_decoding(JPGD_BAD_SOS_SUCCESSIVE); + + if (dc_only_scan) + { + if (refinement_scan) + decode_block_func = decode_block_dc_refine; + else + decode_block_func = decode_block_dc_first; + } + else + { + if (refinement_scan) + decode_block_func = decode_block_ac_refine; + else + decode_block_func = decode_block_ac_first; + } + + decode_scan(decode_block_func); + + m_bits_left = 16; + get_bits(16); + get_bits(16); + } + + m_comps_in_scan = m_comps_in_frame; + + for (i = 0; i < m_comps_in_frame; i++) + m_comp_list[i] = i; + + calc_mcu_block_order(); +} + +void jpeg_decoder::init_sequential() +{ + if (!init_scan()) + stop_decoding(JPGD_UNEXPECTED_MARKER); +} + +void jpeg_decoder::decode_start() +{ + init_frame(); + + if (m_progressive_flag) + init_progressive(); + else + init_sequential(); +} + +void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream) +{ + init(pStream); + locate_sof_marker(); +} + +jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream) +{ + if (setjmp(m_jmp_state)) + return; + decode_init(pStream); +} + +int jpeg_decoder::begin_decoding() +{ + if (m_ready_flag) + return JPGD_SUCCESS; + + if (m_error_code) + return JPGD_FAILED; + + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + decode_start(); + + m_ready_flag = true; + + return JPGD_SUCCESS; +} + +jpeg_decoder::~jpeg_decoder() +{ + free_all_blocks(); +} + +jpeg_decoder_file_stream::jpeg_decoder_file_stream() +{ + m_pFile = NULL; + m_eof_flag = false; + m_error_flag = false; +} + +void jpeg_decoder_file_stream::close() +{ + if (m_pFile) + { + fclose(m_pFile); + m_pFile = NULL; + } + + m_eof_flag = false; + m_error_flag = false; +} + +jpeg_decoder_file_stream::~jpeg_decoder_file_stream() +{ + close(); +} + +bool jpeg_decoder_file_stream::open(const char *Pfilename) +{ + close(); + + m_eof_flag = false; + m_error_flag = false; + +#if defined(_MSC_VER) + m_pFile = NULL; + fopen_s(&m_pFile, Pfilename, "rb"); +#else + m_pFile = fopen(Pfilename, "rb"); +#endif + return m_pFile != NULL; +} + +int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) +{ + if (!m_pFile) + return -1; + + if (m_eof_flag) + { + *pEOF_flag = true; + return 0; + } + + if (m_error_flag) + return -1; + + int bytes_read = static_cast(fread(pBuf, 1, max_bytes_to_read, m_pFile)); + if (bytes_read < max_bytes_to_read) + { + if (ferror(m_pFile)) + { + m_error_flag = true; + return -1; + } + + m_eof_flag = true; + *pEOF_flag = true; + } + + return bytes_read; +} + +bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size) +{ + close(); + m_pSrc_data = pSrc_data; + m_ofs = 0; + m_size = size; + return true; +} + +int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) +{ + *pEOF_flag = false; + + if (!m_pSrc_data) + return -1; + + uint bytes_remaining = m_size - m_ofs; + if ((uint)max_bytes_to_read > bytes_remaining) + { + max_bytes_to_read = bytes_remaining; + *pEOF_flag = true; + } + + memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read); + m_ofs += max_bytes_to_read; + + return max_bytes_to_read; +} + +unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps) +{ + if (!actual_comps) + return NULL; + *actual_comps = 0; + + if ((!pStream) || (!width) || (!height) || (!req_comps)) + return NULL; + + if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) + return NULL; + + jpeg_decoder decoder(pStream); + if (decoder.get_error_code() != JPGD_SUCCESS) + return NULL; + + const int image_width = decoder.get_width(), image_height = decoder.get_height(); + *width = image_width; + *height = image_height; + *actual_comps = decoder.get_num_components(); + + if (decoder.begin_decoding() != JPGD_SUCCESS) + return NULL; + + const int dst_bpl = image_width * req_comps; + + uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height); + if (!pImage_data) + return NULL; + + for (int y = 0; y < image_height; y++) + { + const uint8* pScan_line; + uint scan_line_len; + if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) + { + jpgd_free(pImage_data); + return NULL; + } + + uint8 *pDst = pImage_data + y * dst_bpl; + + if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3))) + memcpy(pDst, pScan_line, dst_bpl); + else if (decoder.get_num_components() == 1) + { + if (req_comps == 3) + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst += 3; + } + } + else + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst[3] = 255; + pDst += 4; + } + } + } + else if (decoder.get_num_components() == 3) + { + if (req_comps == 1) + { + const int YR = 19595, YG = 38470, YB = 7471; + for (int x = 0; x < image_width; x++) + { + int r = pScan_line[x*4+0]; + int g = pScan_line[x*4+1]; + int b = pScan_line[x*4+2]; + *pDst++ = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + } + } + else + { + for (int x = 0; x < image_width; x++) + { + pDst[0] = pScan_line[x*4+0]; + pDst[1] = pScan_line[x*4+1]; + pDst[2] = pScan_line[x*4+2]; + pDst += 3; + } + } + } + } + + return pImage_data; +} + +unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps) +{ + jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size); + return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps); +} + +unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps) +{ + jpgd::jpeg_decoder_file_stream file_stream; + if (!file_stream.open(pSrc_filename)) + return NULL; + return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps); +} + +} // namespace jpgd diff --git a/crnlib/crn_jpgd.h b/crnlib/crn_jpgd.h new file mode 100644 index 0000000..46069a1 --- /dev/null +++ b/crnlib/crn_jpgd.h @@ -0,0 +1,319 @@ +// jpgd.h - C++ class for JPEG decompression. +// Public domain, Rich Geldreich +#ifndef JPEG_DECODER_H +#define JPEG_DECODER_H + +#include +#include +#include + +#ifdef _MSC_VER + #define JPGD_NORETURN __declspec(noreturn) +#elif defined(__GNUC__) + #define JPGD_NORETURN __attribute__ ((noreturn)) +#else + #define JPGD_NORETURN +#endif + +namespace jpgd +{ + typedef unsigned char uint8; + typedef signed short int16; + typedef unsigned short uint16; + typedef unsigned int uint; + typedef signed int int32; + + // Loads a JPEG image from a memory buffer or a file. + // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA). + // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB). + // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly. + // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp. + unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps); + unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps); + + // Success/failure error codes. + enum jpgd_status + { + JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1, + JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, + JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, + JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH, + JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER, + JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS, + JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE, + JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR, + JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM + }; + + // Input stream interface. + // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available. + // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set. + // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer. + // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding. + class jpeg_decoder_stream + { + public: + jpeg_decoder_stream() { } + virtual ~jpeg_decoder_stream() { } + + // The read() method is called when the internal input buffer is empty. + // Parameters: + // pBuf - input buffer + // max_bytes_to_read - maximum bytes that can be written to pBuf + // pEOF_flag - set this to true if at end of stream (no more bytes remaining) + // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0). + // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full. + virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0; + }; + + // stdio FILE stream class. + class jpeg_decoder_file_stream : public jpeg_decoder_stream + { + jpeg_decoder_file_stream(const jpeg_decoder_file_stream &); + jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &); + + FILE *m_pFile; + bool m_eof_flag, m_error_flag; + + public: + jpeg_decoder_file_stream(); + virtual ~jpeg_decoder_file_stream(); + + bool open(const char *Pfilename); + void close(); + + virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag); + }; + + // Memory stream class. + class jpeg_decoder_mem_stream : public jpeg_decoder_stream + { + const uint8 *m_pSrc_data; + uint m_ofs, m_size; + + public: + jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { } + jpeg_decoder_mem_stream(const uint8 *pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { } + + virtual ~jpeg_decoder_mem_stream() { } + + bool open(const uint8 *pSrc_data, uint size); + void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; } + + virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag); + }; + + // Loads JPEG file from a jpeg_decoder_stream. + unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps); + + enum + { + JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, + JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384 + }; + + typedef int16 jpgd_quant_t; + typedef int16 jpgd_block_t; + + class jpeg_decoder + { + public: + // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc. + // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline. + jpeg_decoder(jpeg_decoder_stream *pStream); + + ~jpeg_decoder(); + + // Call this method after constructing the object to begin decompression. + // If JPGD_SUCCESS is returned you may then call decode() on each scanline. + int begin_decoding(); + + // Returns the next scan line. + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). + // Returns JPGD_SUCCESS if a scan line has been returned. + // Returns JPGD_DONE if all scan lines have been returned. + // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info. + int decode(const void** pScan_line, uint* pScan_line_len); + + inline jpgd_status get_error_code() const { return m_error_code; } + + inline int get_width() const { return m_image_x_size; } + inline int get_height() const { return m_image_y_size; } + + inline int get_num_components() const { return m_comps_in_frame; } + + inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; } + inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); } + + // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file). + inline int get_total_bytes_read() const { return m_total_bytes_read; } + + private: + jpeg_decoder(const jpeg_decoder &); + jpeg_decoder &operator =(const jpeg_decoder &); + + typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int); + + struct huff_tables + { + bool ac_table; + uint look_up[256]; + uint look_up2[256]; + uint8 code_size[256]; + uint tree[512]; + }; + + struct coeff_buf + { + uint8 *pData; + int block_num_x, block_num_y; + int block_len_x, block_len_y; + int block_size; + }; + + struct mem_block + { + mem_block *m_pNext; + size_t m_used_count; + size_t m_size; + char m_data[1]; + }; + + jmp_buf m_jmp_state; + mem_block *m_pMem_blocks; + int m_image_x_size; + int m_image_y_size; + jpeg_decoder_stream *m_pStream; + int m_progressive_flag; + uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES]; + uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size + uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size + jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables + int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported) + int m_comps_in_frame; // # of components in frame + int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor + int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor + int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector + int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID + int m_comp_h_blocks[JPGD_MAX_COMPONENTS]; + int m_comp_v_blocks[JPGD_MAX_COMPONENTS]; + int m_comps_in_scan; // # of components in scan + int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan + int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector + int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector + int m_spectral_start; // spectral selection start + int m_spectral_end; // spectral selection end + int m_successive_low; // successive approximation low + int m_successive_high; // successive approximation high + int m_max_mcu_x_size; // MCU's max. X size in pixels + int m_max_mcu_y_size; // MCU's max. Y size in pixels + int m_blocks_per_mcu; + int m_max_blocks_per_row; + int m_mcus_per_row, m_mcus_per_col; + int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU]; + int m_total_lines_left; // total # lines left in image + int m_mcu_lines_left; // total # lines left in this MCU + int m_real_dest_bytes_per_scan_line; + int m_dest_bytes_per_scan_line; // rounded up + int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) + huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES]; + coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS]; + coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS]; + int m_eob_run; + int m_block_y_mcu[JPGD_MAX_COMPONENTS]; + uint8* m_pIn_buf_ofs; + int m_in_buf_left; + int m_tem_flag; + bool m_eof_flag; + uint8 m_in_buf_pad_start[128]; + uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128]; + uint8 m_in_buf_pad_end[128]; + int m_bits_left; + uint m_bit_buf; + int m_restart_interval; + int m_restarts_left; + int m_next_restart_num; + int m_max_mcus_per_row; + int m_max_blocks_per_mcu; + int m_expanded_blocks_per_mcu; + int m_expanded_blocks_per_row; + int m_expanded_blocks_per_component; + bool m_freq_domain_chroma_upsample; + int m_max_mcus_per_col; + uint m_last_dc_val[JPGD_MAX_COMPONENTS]; + jpgd_block_t* m_pMCU_coefficients; + int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU]; + uint8* m_pSample_buf; + int m_crr[256]; + int m_cbb[256]; + int m_crg[256]; + int m_cbg[256]; + uint8* m_pScan_line_0; + uint8* m_pScan_line_1; + jpgd_status m_error_code; + bool m_ready_flag; + int m_total_bytes_read; + + void free_all_blocks(); + JPGD_NORETURN void stop_decoding(jpgd_status status); + void *alloc(size_t n, bool zero = false); + void word_clear(void *p, uint16 c, uint n); + void prep_in_buffer(); + void read_dht_marker(); + void read_dqt_marker(); + void read_sof_marker(); + void skip_variable_marker(); + void read_dri_marker(); + void read_sos_marker(); + int next_marker(); + int process_markers(); + void locate_soi_marker(); + void locate_sof_marker(); + int locate_sos_marker(); + void init(jpeg_decoder_stream * pStream); + void create_look_ups(); + void fix_in_buffer(); + void transform_mcu(int mcu_row); + void transform_mcu_expand(int mcu_row); + coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y); + inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y); + void load_next_row(); + void decode_next_row(); + void make_huff_table(int index, huff_tables *pH); + void check_quant_tables(); + void check_huff_tables(); + void calc_mcu_block_order(); + int init_scan(); + void init_frame(); + void process_restart(); + void decode_scan(pDecode_block_func decode_block_func); + void init_progressive(); + void init_sequential(); + void decode_start(); + void decode_init(jpeg_decoder_stream * pStream); + void H2V2Convert(); + void H2V1Convert(); + void H1V2Convert(); + void H1V1Convert(); + void gray_convert(); + void expanded_convert(); + void find_eoi(); + inline uint get_char(); + inline uint get_char(bool *pPadding_flag); + inline void stuff_char(uint8 q); + inline uint8 get_octet(); + inline uint get_bits(int num_bits); + inline uint get_bits_no_markers(int numbits); + inline int huff_decode(huff_tables *pH); + inline int huff_decode(huff_tables *pH, int& extrabits); + static inline uint8 clamp(int i); + static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y); + static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y); + static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y); + static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y); + }; + +} // namespace jpgd + +#endif // JPEG_DECODER_H diff --git a/crnlib/crn_jpge.cpp b/crnlib/crn_jpge.cpp new file mode 100644 index 0000000..964f908 --- /dev/null +++ b/crnlib/crn_jpge.cpp @@ -0,0 +1,1044 @@ +// jpge.cpp - C++ class for JPEG compression. +// Public domain, Rich Geldreich +// v1.01, Dec. 18, 2010 - Initial release +// v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.) +// v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc. +// Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03). +// v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug. +// Code tweaks to fix VS2008 static code analysis warnings (all looked harmless). +// Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02. + +#include "crn_jpge.h" + +#include +#include +#include + +#include "crn_core.h" + +#define JPGE_MAX(a,b) (((a)>(b))?(a):(b)) +#define JPGE_MIN(a,b) (((a)<(b))?(a):(b)) + +namespace jpge { + + static inline void *jpge_malloc(size_t nSize) { return crnlib::crnlib_malloc(nSize); } + static inline void jpge_free(void *p) { crnlib::crnlib_free(p); } + + // Various JPEG enums and tables. + enum { M_SOF0 = 0xC0, M_DHT = 0xC4, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_APP0 = 0xE0 }; + enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }; + + static uint8 s_zag[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; + static int16 s_std_lum_quant[64] = { 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 }; + static int16 s_std_croma_quant[64] = { 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 }; + static uint8 s_dc_lum_bits[17] = { 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 }; + static uint8 s_dc_lum_val[DC_LUM_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; + static uint8 s_ac_lum_bits[17] = { 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d }; + static uint8 s_ac_lum_val[AC_LUM_CODES] = + { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0, + 0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49, + 0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5, + 0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8, + 0xf9,0xfa + }; + static uint8 s_dc_chroma_bits[17] = { 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; + static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; + static uint8 s_ac_chroma_bits[17] = { 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 }; + static uint8 s_ac_chroma_val[AC_CHROMA_CODES] = + { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0, + 0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48, + 0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3, + 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8, + 0xf9,0xfa + }; + + // Low-level helper functions. + template inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); } + + const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; + static inline uint8 clamp(int i) { if (static_cast(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast(i); } + + static void RGB_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels) + { + for ( ; num_pixels; pDst += 3, pSrc += 3, num_pixels--) + { + const int r = pSrc[0], g = pSrc[1], b = pSrc[2]; + pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); + pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); + } + } + + static void RGB_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels) + { + for ( ; num_pixels; pDst++, pSrc += 3, num_pixels--) + pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16); + } + + static void RGBA_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels) + { + for ( ; num_pixels; pDst += 3, pSrc += 4, num_pixels--) + { + const int r = pSrc[0], g = pSrc[1], b = pSrc[2]; + pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); + pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); + } + } + + static void RGBA_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels) + { + for ( ; num_pixels; pDst++, pSrc += 4, num_pixels--) + pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16); + } + + static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels) + { + for( ; num_pixels; pDst += 3, pSrc++, num_pixels--) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; } + } + + // Forward DCT - DCT derived from jfdctint. + enum { CONST_BITS = 13, ROW_BITS = 2 }; +#define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n) - 1))) >> (n)) +#define DCT_MUL(var, c) (static_cast(var) * static_cast(c)) +#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \ + int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \ + int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \ + int32 u1 = DCT_MUL(t12 + t13, 4433); \ + s2 = u1 + DCT_MUL(t13, 6270); \ + s6 = u1 + DCT_MUL(t12, -15137); \ + u1 = t4 + t7; \ + int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \ + int32 z5 = DCT_MUL(u3 + u4, 9633); \ + t4 = DCT_MUL(t4, 2446); t5 = DCT_MUL(t5, 16819); \ + t6 = DCT_MUL(t6, 25172); t7 = DCT_MUL(t7, 12299); \ + u1 = DCT_MUL(u1, -7373); u2 = DCT_MUL(u2, -20995); \ + u3 = DCT_MUL(u3, -16069); u4 = DCT_MUL(u4, -3196); \ + u3 += z5; u4 += z5; \ + s0 = t10 + t11; s1 = t7 + u1 + u4; s3 = t6 + u2 + u3; s4 = t10 - t11; s5 = t5 + u2 + u4; s7 = t4 + u1 + u3; + + static void DCT2D(int32 *p) + { + int32 c, *q = p; + for (c = 7; c >= 0; c--, q += 8) + { + int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7]; + DCT1D(s0, s1, s2, s3, s4, s5, s6, s7); + q[0] = s0 << ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS); + q[4] = s4 << ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS); + } + for (q = p, c = 7; c >= 0; c--, q++) + { + int32 s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8]; + DCT1D(s0, s1, s2, s3, s4, s5, s6, s7); + q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3); + q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3); + } + } + + struct sym_freq { uint m_key, m_sym_index; }; + + // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values. + static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) + { + const uint cMaxPasses = 4; + uint32 hist[256 * cMaxPasses]; clear_obj(hist); + for (uint i = 0; i < num_syms; i++) { uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; hist[256*2 + ((freq >> 16) & 0xFF)]++; hist[256*3 + ((freq >> 24) & 0xFF)]++; } + sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; + uint total_passes = cMaxPasses; while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const uint32* pHist = &hist[pass << 8]; + uint offsets[256], cur_ofs = 0; + for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (uint i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; + } + return pCur_syms; + } + + // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. + static void calculate_minimum_redundancy(sym_freq *A, int n) + { + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = dpth; avbl--; } + avbl = 2*used; dpth++; used = 0; + } + } + + // Limits canonical Huffman code table's max code size to max_code_size. + static void huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) + { + if (code_list_len <= 1) return; + + for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + + uint32 total = 0; + for (int i = max_code_size; i > 0; i--) + total += (((uint32)pNum_codes[i]) << (max_code_size - i)); + + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (int i = max_code_size - 1; i > 0; i--) + { + if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + } + total--; + } + } + + // Generates an optimized offman table. + void jpeg_encoder::optimize_huffman_table(int table_num, int table_len) + { + sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS]; + syms0[0].m_key = 1; syms0[0].m_sym_index = 0; // dummy symbol, assures that no valid code contains all 1's + int num_used_syms = 1; + const uint32 *pSym_count = &m_huff_count[table_num][0]; + for (int i = 0; i < table_len; i++) + if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i + 1; } + sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0, syms1); + calculate_minimum_redundancy(pSyms, num_used_syms); + + // Count the # of symbols of each code size. + int num_codes[1 + MAX_HUFF_CODESIZE]; clear_obj(num_codes); + for (int i = 0; i < num_used_syms; i++) + num_codes[pSyms[i].m_key]++; + + const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol) + huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT); + + // Compute m_huff_bits array, which contains the # of symbols per code size. + clear_obj(m_huff_bits[table_num]); + for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++) + m_huff_bits[table_num][i] = static_cast(num_codes[i]); + + // Remove the dummy symbol added above, which must be in largest bucket. + for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) + { + if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; } + } + + // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest). + for (int i = num_used_syms - 1; i >= 1; i--) + m_huff_val[table_num][num_used_syms - 1 - i] = static_cast(pSyms[i].m_sym_index - 1); + } + + // JPEG marker generation. + void jpeg_encoder::emit_byte(uint8 i) + { + m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i); + } + + void jpeg_encoder::emit_word(uint i) + { + emit_byte(uint8(i >> 8)); emit_byte(uint8(i & 0xFF)); + } + + void jpeg_encoder::emit_marker(int marker) + { + emit_byte(uint8(0xFF)); emit_byte(uint8(marker)); + } + + // Emit JFIF marker + void jpeg_encoder::emit_jfif_app0() + { + emit_marker(M_APP0); + emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); + emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */ + emit_byte(0); + emit_byte(1); /* Major version */ + emit_byte(1); /* Minor version */ + emit_byte(0); /* Density unit */ + emit_word(1); + emit_word(1); + emit_byte(0); /* No thumbnail image */ + emit_byte(0); + } + + // Emit quantization tables + void jpeg_encoder::emit_dqt() + { + for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++) + { + emit_marker(M_DQT); + emit_word(64 + 1 + 2); + emit_byte(static_cast(i)); + for (int j = 0; j < 64; j++) + emit_byte(static_cast(m_quantization_tables[i][j])); + } + } + + // Emit start of frame marker + void jpeg_encoder::emit_sof() + { + emit_marker(M_SOF0); /* baseline */ + emit_word(3 * m_num_components + 2 + 5 + 1); + emit_byte(8); /* precision */ + emit_word(m_image_y); + emit_word(m_image_x); + emit_byte(m_num_components); + for (int i = 0; i < m_num_components; i++) + { + emit_byte(static_cast(i + 1)); /* component ID */ + emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]); /* h and v sampling */ + emit_byte(i > 0); /* quant. table num */ + } + } + + // Emit Huffman table. + void jpeg_encoder::emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag) + { + emit_marker(M_DHT); + + int length = 0; + for (int i = 1; i <= 16; i++) + length += bits[i]; + + emit_word(length + 2 + 1 + 16); + emit_byte(static_cast(index + (ac_flag << 4))); + + for (int i = 1; i <= 16; i++) + emit_byte(bits[i]); + + for (int i = 0; i < length; i++) + emit_byte(val[i]); + } + + // Emit all Huffman tables. + void jpeg_encoder::emit_dhts() + { + emit_dht(m_huff_bits[0+0], m_huff_val[0+0], 0, false); + emit_dht(m_huff_bits[2+0], m_huff_val[2+0], 0, true); + if (m_num_components == 3) + { + emit_dht(m_huff_bits[0+1], m_huff_val[0+1], 1, false); + emit_dht(m_huff_bits[2+1], m_huff_val[2+1], 1, true); + } + } + + // emit start of scan + void jpeg_encoder::emit_sos() + { + emit_marker(M_SOS); + emit_word(2 * m_num_components + 2 + 1 + 3); + emit_byte(m_num_components); + for (int i = 0; i < m_num_components; i++) + { + emit_byte(static_cast(i + 1)); + if (i == 0) + emit_byte((0 << 4) + 0); + else + emit_byte((1 << 4) + 1); + } + emit_byte(0); /* spectral selection */ + emit_byte(63); + emit_byte(0); + } + + // Emit all markers at beginning of image file. + void jpeg_encoder::emit_markers() + { + emit_marker(M_SOI); + emit_jfif_app0(); + emit_dqt(); + emit_sof(); + emit_dhts(); + emit_sos(); + } + + // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays. + void jpeg_encoder::compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val) + { + int i, l, last_p, si; + uint8 huff_size[257]; + uint huff_code[257]; + uint code; + + int p = 0; + for (l = 1; l <= 16; l++) + for (i = 1; i <= bits[l]; i++) + huff_size[p++] = (char)l; + + huff_size[p] = 0; last_p = p; // write sentinel + + code = 0; si = huff_size[0]; p = 0; + + while (huff_size[p]) + { + while (huff_size[p] == si) + huff_code[p++] = code++; + code <<= 1; + si++; + } + + memset(codes, 0, sizeof(codes[0])*256); + memset(code_sizes, 0, sizeof(code_sizes[0])*256); + for (p = 0; p < last_p; p++) + { + codes[val[p]] = huff_code[p]; + code_sizes[val[p]] = huff_size[p]; + } + } + + // Quantization table generation. + void jpeg_encoder::compute_quant_table(int32 *pDst, int16 *pSrc) + { + int32 q; + if (m_params.m_quality < 50) + q = 5000 / m_params.m_quality; + else + q = 200 - m_params.m_quality * 2; + for (int i = 0; i < 64; i++) + { + int32 j = *pSrc++; j = (j * q + 50L) / 100L; + *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255); + } + } + + // Higher-level methods. + void jpeg_encoder::first_pass_init() + { + m_bit_buffer = 0; m_bits_in = 0; + memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0])); + m_mcu_y_ofs = 0; + m_pass_num = 1; + } + + bool jpeg_encoder::second_pass_init() + { + compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0], m_huff_val[0+0]); + compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0], m_huff_val[2+0]); + if (m_num_components > 1) + { + compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1], m_huff_val[0+1]); + compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1], m_huff_val[2+1]); + } + first_pass_init(); + emit_markers(); + m_pass_num = 2; + return true; + } + + bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels) + { + m_num_components = 3; + switch (m_params.m_subsampling) + { + case Y_ONLY: + { + m_num_components = 1; + m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1; + m_mcu_x = 8; m_mcu_y = 8; + break; + } + case H1V1: + { + m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 8; m_mcu_y = 8; + break; + } + case H2V1: + { + m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 16; m_mcu_y = 8; + break; + } + case H2V2: + { + m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 16; m_mcu_y = 16; + } + } + + m_image_x = p_x_res; m_image_y = p_y_res; + m_image_bpp = src_channels; + m_image_bpl = m_image_x * src_channels; + m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1)); + m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1)); + m_image_bpl_xlt = m_image_x * m_num_components; + m_image_bpl_mcu = m_image_x_mcu * m_num_components; + m_mcus_per_row = m_image_x_mcu / m_mcu_x; + + if ((m_mcu_lines[0] = static_cast(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL) return false; + for (int i = 1; i < m_mcu_y; i++) + m_mcu_lines[i] = m_mcu_lines[i-1] + m_image_bpl_mcu; + + compute_quant_table(m_quantization_tables[0], s_std_lum_quant); + compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant); + + m_out_buf_left = JPGE_OUT_BUF_SIZE; + m_pOut_buf = m_out_buf; + + if (m_params.m_two_pass_flag) + { + clear_obj(m_huff_count); + first_pass_init(); + } + else + { + memcpy(m_huff_bits[0+0], s_dc_lum_bits, 17); memcpy(m_huff_val [0+0], s_dc_lum_val, DC_LUM_CODES); + memcpy(m_huff_bits[2+0], s_ac_lum_bits, 17); memcpy(m_huff_val [2+0], s_ac_lum_val, AC_LUM_CODES); + memcpy(m_huff_bits[0+1], s_dc_chroma_bits, 17); memcpy(m_huff_val [0+1], s_dc_chroma_val, DC_CHROMA_CODES); + memcpy(m_huff_bits[2+1], s_ac_chroma_bits, 17); memcpy(m_huff_val [2+1], s_ac_chroma_val, AC_CHROMA_CODES); + if (!second_pass_init()) return false; // in effect, skip over the first pass + } + return m_all_stream_writes_succeeded; + } + + void jpeg_encoder::load_block_8_8_grey(int x) + { + uint8 *pSrc; + sample_array_t *pDst = m_sample_array; + x <<= 3; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc = m_mcu_lines[i] + x; + pDst[0] = pSrc[0] - 128; pDst[1] = pSrc[1] - 128; pDst[2] = pSrc[2] - 128; pDst[3] = pSrc[3] - 128; + pDst[4] = pSrc[4] - 128; pDst[5] = pSrc[5] - 128; pDst[6] = pSrc[6] - 128; pDst[7] = pSrc[7] - 128; + } + } + + void jpeg_encoder::load_block_8_8(int x, int y, int c) + { + uint8 *pSrc; + sample_array_t *pDst = m_sample_array; + x = (x * (8 * 3)) + c; + y <<= 3; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc = m_mcu_lines[y + i] + x; + pDst[0] = pSrc[0 * 3] - 128; pDst[1] = pSrc[1 * 3] - 128; pDst[2] = pSrc[2 * 3] - 128; pDst[3] = pSrc[3 * 3] - 128; + pDst[4] = pSrc[4 * 3] - 128; pDst[5] = pSrc[5 * 3] - 128; pDst[6] = pSrc[6 * 3] - 128; pDst[7] = pSrc[7 * 3] - 128; + } + } + + void jpeg_encoder::load_block_16_8(int x, int c) + { + uint8 *pSrc1, *pSrc2; + sample_array_t *pDst = m_sample_array; + x = (x * (16 * 3)) + c; + int a = 0, b = 2; + for (int i = 0; i < 16; i += 2, pDst += 8) + { + pSrc1 = m_mcu_lines[i + 0] + x; + pSrc2 = m_mcu_lines[i + 1] + x; + pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3] + pSrc2[ 0 * 3] + pSrc2[ 1 * 3] + a) >> 2) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3] + pSrc2[ 2 * 3] + pSrc2[ 3 * 3] + b) >> 2) - 128; + pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3] + pSrc2[ 4 * 3] + pSrc2[ 5 * 3] + a) >> 2) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3] + pSrc2[ 6 * 3] + pSrc2[ 7 * 3] + b) >> 2) - 128; + pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3] + pSrc2[ 8 * 3] + pSrc2[ 9 * 3] + a) >> 2) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + b) >> 2) - 128; + pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + a) >> 2) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + b) >> 2) - 128; + int temp = a; a = b; b = temp; + } + } + + void jpeg_encoder::load_block_16_8_8(int x, int c) + { + uint8 *pSrc1; + sample_array_t *pDst = m_sample_array; + x = (x * (16 * 3)) + c; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc1 = m_mcu_lines[i + 0] + x; + pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3]) >> 1) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3]) >> 1) - 128; + pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3]) >> 1) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3]) >> 1) - 128; + pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3]) >> 1) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3]) >> 1) - 128; + pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3]) >> 1) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3]) >> 1) - 128; + } + } + + void jpeg_encoder::load_quantized_coefficients(int component_num) + { + int32 *q = m_quantization_tables[component_num > 0]; + int16 *pDst = m_coefficient_array; + for (int i = 0; i < 64; i++) + { + sample_array_t j = m_sample_array[s_zag[i]]; + if (j < 0) + { + if ((j = -j + (*q >> 1)) < *q) + *pDst++ = 0; + else + *pDst++ = static_cast(-(j / *q)); + } + else + { + if ((j = j + (*q >> 1)) < *q) + *pDst++ = 0; + else + *pDst++ = static_cast((j / *q)); + } + q++; + } + } + + void jpeg_encoder::flush_output_buffer() + { + if (m_out_buf_left != JPGE_OUT_BUF_SIZE) + m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left); + m_pOut_buf = m_out_buf; + m_out_buf_left = JPGE_OUT_BUF_SIZE; + } + + void jpeg_encoder::put_bits(uint bits, uint len) + { + m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len))); + while (m_bits_in >= 8) + { + uint8 c; +#define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); } + JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF)); + if (c == 0xFF) JPGE_PUT_BYTE(0); + m_bit_buffer <<= 8; + m_bits_in -= 8; + } + } + + void jpeg_encoder::code_coefficients_pass_one(int component_num) + { + if (component_num >= 3) return; // just to shut up static analysis + int i, run_len, nbits, temp1; + int16 *src = m_coefficient_array; + uint32 *dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], *ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0]; + + temp1 = src[0] - m_last_dc_val[component_num]; + m_last_dc_val[component_num] = src[0]; + if (temp1 < 0) temp1 = -temp1; + + nbits = 0; + while (temp1) + { + nbits++; temp1 >>= 1; + } + + dc_count[nbits]++; + for (run_len = 0, i = 1; i < 64; i++) + { + if ((temp1 = m_coefficient_array[i]) == 0) + run_len++; + else + { + while (run_len >= 16) + { + ac_count[0xF0]++; + run_len -= 16; + } + if (temp1 < 0) temp1 = -temp1; + nbits = 1; + while (temp1 >>= 1) nbits++; + ac_count[(run_len << 4) + nbits]++; + run_len = 0; + } + } + if (run_len) ac_count[0]++; + } + + void jpeg_encoder::code_coefficients_pass_two(int component_num) + { + int i, j, run_len, nbits, temp1, temp2; + int16 *pSrc = m_coefficient_array; + uint *codes[2]; + uint8 *code_sizes[2]; + + if (component_num == 0) + { + codes[0] = m_huff_codes[0 + 0]; codes[1] = m_huff_codes[2 + 0]; + code_sizes[0] = m_huff_code_sizes[0 + 0]; code_sizes[1] = m_huff_code_sizes[2 + 0]; + } + else + { + codes[0] = m_huff_codes[0 + 1]; codes[1] = m_huff_codes[2 + 1]; + code_sizes[0] = m_huff_code_sizes[0 + 1]; code_sizes[1] = m_huff_code_sizes[2 + 1]; + } + + temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num]; + m_last_dc_val[component_num] = pSrc[0]; + + if (temp1 < 0) + { + temp1 = -temp1; temp2--; + } + + nbits = 0; + while (temp1) + { + nbits++; temp1 >>= 1; + } + + put_bits(codes[0][nbits], code_sizes[0][nbits]); + if (nbits) put_bits(temp2 & ((1 << nbits) - 1), nbits); + + for (run_len = 0, i = 1; i < 64; i++) + { + if ((temp1 = m_coefficient_array[i]) == 0) + run_len++; + else + { + while (run_len >= 16) + { + put_bits(codes[1][0xF0], code_sizes[1][0xF0]); + run_len -= 16; + } + if ((temp2 = temp1) < 0) + { + temp1 = -temp1; + temp2--; + } + nbits = 1; + while (temp1 >>= 1) + nbits++; + j = (run_len << 4) + nbits; + put_bits(codes[1][j], code_sizes[1][j]); + put_bits(temp2 & ((1 << nbits) - 1), nbits); + run_len = 0; + } + } + if (run_len) + put_bits(codes[1][0], code_sizes[1][0]); + } + + void jpeg_encoder::code_block(int component_num) + { + DCT2D(m_sample_array); + load_quantized_coefficients(component_num); + if (m_pass_num == 1) + code_coefficients_pass_one(component_num); + else + code_coefficients_pass_two(component_num); + } + + void jpeg_encoder::process_mcu_row() + { + if (m_num_components == 1) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8_grey(i); code_block(0); + } + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2); + } + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0); + load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2); + } + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0); + load_block_8_8(i * 2 + 0, 1, 0); code_block(0); load_block_8_8(i * 2 + 1, 1, 0); code_block(0); + load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2); + } + } + } + + bool jpeg_encoder::terminate_pass_one() + { + optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES); + if (m_num_components > 1) + { + optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES); + } + return second_pass_init(); + } + + bool jpeg_encoder::terminate_pass_two() + { + put_bits(0x7F, 7); + flush_output_buffer(); + emit_marker(M_EOI); + m_pass_num++; // purposely bump up m_pass_num, for debugging + return true; + } + + bool jpeg_encoder::process_end_of_image() + { + if (m_mcu_y_ofs) + { + if (m_mcu_y_ofs < 16) // check here just to shut up static analysis + { + for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) + memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu); + } + + process_mcu_row(); + } + + if (m_pass_num == 1) + return terminate_pass_one(); + else + return terminate_pass_two(); + } + + void jpeg_encoder::load_mcu(const void *pSrc) + { + const uint8* Psrc = reinterpret_cast(pSrc); + + uint8* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst + + if (m_num_components == 1) + { + if (m_image_bpp == 4) + RGBA_to_Y(pDst, Psrc, m_image_x); + else if (m_image_bpp == 3) + RGB_to_Y(pDst, Psrc, m_image_x); + else + memcpy(pDst, Psrc, m_image_x); + } + else + { + if (m_image_bpp == 4) + RGBA_to_YCC(pDst, Psrc, m_image_x); + else if (m_image_bpp == 3) + RGB_to_YCC(pDst, Psrc, m_image_x); + else + Y_to_YCC(pDst, Psrc, m_image_x); + } + + // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16 + if (m_num_components == 1) + memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x); + else + { + const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2]; + uint8 *q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt; + for (int i = m_image_x; i < m_image_x_mcu; i++) + { + *q++ = y; *q++ = cb; *q++ = cr; + } + } + + if (++m_mcu_y_ofs == m_mcu_y) + { + process_mcu_row(); + m_mcu_y_ofs = 0; + } + } + + void jpeg_encoder::clear() + { + m_mcu_lines[0] = NULL; + m_pass_num = 0; + m_all_stream_writes_succeeded = true; + } + + jpeg_encoder::jpeg_encoder() + { + clear(); + } + + jpeg_encoder::~jpeg_encoder() + { + deinit(); + } + + bool jpeg_encoder::init(output_stream *pStream, int width, int height, int src_channels, const params &comp_params) + { + deinit(); + if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check())) return false; + m_pStream = pStream; + m_params = comp_params; + return jpg_open(width, height, src_channels); + } + + void jpeg_encoder::deinit() + { + jpge_free(m_mcu_lines[0]); + clear(); + } + + bool jpeg_encoder::process_scanline(const void* pScanline) + { + if ((m_pass_num < 1) || (m_pass_num > 2)) return false; + if (m_all_stream_writes_succeeded) + { + if (!pScanline) + { + if (!process_end_of_image()) return false; + } + else + { + load_mcu(pScanline); + } + } + return m_all_stream_writes_succeeded; + } + + // Higher level wrappers/examples (optional). +#include + + class cfile_stream : public output_stream + { + cfile_stream(const cfile_stream &); + cfile_stream &operator= (const cfile_stream &); + + FILE* m_pFile; + bool m_bStatus; + + public: + cfile_stream() : m_pFile(NULL), m_bStatus(false) { } + + virtual ~cfile_stream() + { + close(); + } + + bool open(const char *pFilename) + { + close(); +#ifdef _MSC_VER + fopen_s(&m_pFile, pFilename, "wb"); +#else + m_pFile = fopen(pFilename, "wb"); +#endif + m_bStatus = (m_pFile != NULL); + return m_bStatus; + } + + bool close() + { + if (m_pFile) + { + if (fclose(m_pFile) == EOF) + { + m_bStatus = false; + } + m_pFile = NULL; + } + return m_bStatus; + } + + virtual bool put_buf(const void* pBuf, int len) + { + m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1); + return m_bStatus; + } + + uint get_size() const + { + return m_pFile ? ftell(m_pFile) : 0; + } + }; + + // Writes JPEG image to file. + bool compress_image_to_jpeg_file(const char *pFilename, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params) + { + cfile_stream dst_stream; + if (!dst_stream.open(pFilename)) + return false; + + jpge::jpeg_encoder dst_image; + if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params)) + return false; + + for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++) + { + for (int i = 0; i < height; i++) + { + const uint8* pBuf = pImage_data + i * width * num_channels; + if (!dst_image.process_scanline(pBuf)) + return false; + } + if (!dst_image.process_scanline(NULL)) + return false; + } + + dst_image.deinit(); + + return dst_stream.close(); + } + + class memory_stream : public output_stream + { + memory_stream(const memory_stream &); + memory_stream &operator= (const memory_stream &); + + uint8 *m_pBuf; + uint m_buf_size, m_buf_ofs; + + public: + memory_stream(void *pBuf, uint buf_size) : m_pBuf(static_cast(pBuf)), m_buf_size(buf_size), m_buf_ofs(0) { } + + virtual ~memory_stream() { } + + virtual bool put_buf(const void* pBuf, int len) + { + uint buf_remaining = m_buf_size - m_buf_ofs; + if ((uint)len > buf_remaining) + return false; + memcpy(m_pBuf + m_buf_ofs, pBuf, len); + m_buf_ofs += len; + return true; + } + + uint get_size() const + { + return m_buf_ofs; + } + }; + + bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int &buf_size, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params) + { + if ((!pDstBuf) || (!buf_size)) + return false; + + memory_stream dst_stream(pDstBuf, buf_size); + + buf_size = 0; + + jpge::jpeg_encoder dst_image; + if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params)) + return false; + + for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++) + { + for (int i = 0; i < height; i++) + { + const uint8* pScanline = pImage_data + i * width * num_channels; + if (!dst_image.process_scanline(pScanline)) + return false; + } + if (!dst_image.process_scanline(NULL)) + return false; + } + + dst_image.deinit(); + + buf_size = dst_stream.get_size(); + return true; + } + +} // namespace jpge diff --git a/crnlib/crn_jpge.h b/crnlib/crn_jpge.h new file mode 100644 index 0000000..5765cc9 --- /dev/null +++ b/crnlib/crn_jpge.h @@ -0,0 +1,169 @@ +// jpge.h - C++ class for JPEG compression. +// Public domain, Rich Geldreich +// Alex Evans: Added RGBA support, linear memory allocator. +#ifndef JPEG_ENCODER_H +#define JPEG_ENCODER_H + +namespace jpge +{ + typedef unsigned char uint8; + typedef signed short int16; + typedef signed int int32; + typedef unsigned short uint16; + typedef unsigned int uint32; + typedef unsigned int uint; + + // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common. + enum subsampling_t { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 }; + + // JPEG compression parameters structure. + struct params + { + inline params() : m_quality(85), m_subsampling(H2V2), m_no_chroma_discrim_flag(false), m_two_pass_flag(false) { } + + inline bool check() const + { + if ((m_quality < 1) || (m_quality > 100)) return false; + if ((uint)m_subsampling > (uint)H2V2) return false; + return true; + } + + // Quality: 1-100, higher is better. Typical values are around 50-95. + int m_quality; + + // m_subsampling: + // 0 = Y (grayscale) only + // 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU) + // 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU) + // 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common) + subsampling_t m_subsampling; + + // Disables CbCr discrimination - only intended for testing. + // If true, the Y quantization table is also used for the CbCr channels. + bool m_no_chroma_discrim_flag; + + bool m_two_pass_flag; + }; + + // Writes JPEG image to a file. + // num_channels must be 1 (Y) or 3 (RGB), image pitch must be width*num_channels. + bool compress_image_to_jpeg_file(const char *pFilename, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params = params()); + + // Writes JPEG image to memory buffer. + // On entry, buf_size is the size of the output buffer pointed at by pBuf, which should be at least ~1024 bytes. + // If return value is true, buf_size will be set to the size of the compressed data. + bool compress_image_to_jpeg_file_in_memory(void *pBuf, int &buf_size, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params = params()); + + // Output stream abstract class - used by the jpeg_encoder class to write to the output stream. + // put_buf() is generally called with len==JPGE_OUT_BUF_SIZE bytes, but for headers it'll be called with smaller amounts. + class output_stream + { + public: + virtual ~output_stream() { }; + virtual bool put_buf(const void* Pbuf, int len) = 0; + template inline bool put_obj(const T& obj) { return put_buf(&obj, sizeof(T)); } + }; + + // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions. + class jpeg_encoder + { + public: + jpeg_encoder(); + ~jpeg_encoder(); + + // Initializes the compressor. + // pStream: The stream object to use for writing compressed data. + // params - Compression parameters structure, defined above. + // width, height - Image dimensions. + // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data. + // Returns false on out of memory or if a stream write fails. + bool init(output_stream *pStream, int width, int height, int src_channels, const params &comp_params = params()); + + const params &get_params() const { return m_params; } + + // Deinitializes the compressor, freeing any allocated memory. May be called at any time. + void deinit(); + + uint get_total_passes() const { return m_params.m_two_pass_flag ? 2 : 1; } + inline uint get_cur_pass() { return m_pass_num; } + + // Call this method with each source scanline. + // width * src_channels bytes per scanline is expected (RGB or Y format). + // You must call with NULL after all scanlines are processed to finish compression. + // Returns false on out of memory or if a stream write fails. + bool process_scanline(const void* pScanline); + + private: + jpeg_encoder(const jpeg_encoder &); + jpeg_encoder &operator =(const jpeg_encoder &); + + typedef int32 sample_array_t; + + output_stream *m_pStream; + params m_params; + uint8 m_num_components; + uint8 m_comp_h_samp[3], m_comp_v_samp[3]; + int m_image_x, m_image_y, m_image_bpp, m_image_bpl; + int m_image_x_mcu, m_image_y_mcu; + int m_image_bpl_xlt, m_image_bpl_mcu; + int m_mcus_per_row; + int m_mcu_x, m_mcu_y; + uint8 *m_mcu_lines[16]; + uint8 m_mcu_y_ofs; + sample_array_t m_sample_array[64]; + int16 m_coefficient_array[64]; + int32 m_quantization_tables[2][64]; + uint m_huff_codes[4][256]; + uint8 m_huff_code_sizes[4][256]; + uint8 m_huff_bits[4][17]; + uint8 m_huff_val[4][256]; + uint32 m_huff_count[4][256]; + int m_last_dc_val[3]; + enum { JPGE_OUT_BUF_SIZE = 2048 }; + uint8 m_out_buf[JPGE_OUT_BUF_SIZE]; + uint8 *m_pOut_buf; + uint m_out_buf_left; + uint32 m_bit_buffer; + uint m_bits_in; + uint8 m_pass_num; + bool m_all_stream_writes_succeeded; + + void optimize_huffman_table(int table_num, int table_len); + void emit_byte(uint8 i); + void emit_word(uint i); + void emit_marker(int marker); + void emit_jfif_app0(); + void emit_dqt(); + void emit_sof(); + void emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag); + void emit_dhts(); + void emit_sos(); + void emit_markers(); + void compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val); + void compute_quant_table(int32 *dst, int16 *src); + void adjust_quant_table(int32 *dst, int32 *src); + void first_pass_init(); + bool second_pass_init(); + bool jpg_open(int p_x_res, int p_y_res, int src_channels); + void load_block_8_8_grey(int x); + void load_block_8_8(int x, int y, int c); + void load_block_16_8(int x, int c); + void load_block_16_8_8(int x, int c); + void load_quantized_coefficients(int component_num); + void flush_output_buffer(); + void put_bits(uint bits, uint len); + void code_coefficients_pass_one(int component_num); + void code_coefficients_pass_two(int component_num); + void code_block(int component_num); + void process_mcu_row(); + bool terminate_pass_one(); + bool terminate_pass_two(); + bool process_end_of_image(); + void load_mcu(const void* src); + void clear(); + void init(); + }; + +} // namespace jpge + +#endif // JPEG_ENCODER diff --git a/crnlib/crn_ktx_texture.cpp b/crnlib/crn_ktx_texture.cpp new file mode 100644 index 0000000..ddcef6f --- /dev/null +++ b/crnlib/crn_ktx_texture.cpp @@ -0,0 +1,920 @@ +// File: crn_ktx_texture.cpp +#include "crn_core.h" +#include "crn_ktx_texture.h" +#include "crn_console.h" + +// Set #if CRNLIB_KTX_PVRTEX_WORKAROUNDS to 1 to enable various workarounds for oddball KTX files written by PVRTexTool. +#define CRNLIB_KTX_PVRTEX_WORKAROUNDS 1 + +namespace crnlib +{ + const uint8 s_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; + + bool is_packed_pixel_ogl_type(uint32 ogl_type) + { + switch (ogl_type) + { + case KTX_UNSIGNED_BYTE_3_3_2: + case KTX_UNSIGNED_BYTE_2_3_3_REV: + case KTX_UNSIGNED_SHORT_5_6_5: + case KTX_UNSIGNED_SHORT_5_6_5_REV: + case KTX_UNSIGNED_SHORT_4_4_4_4: + case KTX_UNSIGNED_SHORT_4_4_4_4_REV: + case KTX_UNSIGNED_SHORT_5_5_5_1: + case KTX_UNSIGNED_SHORT_1_5_5_5_REV: + case KTX_UNSIGNED_INT_8_8_8_8: + case KTX_UNSIGNED_INT_8_8_8_8_REV: + case KTX_UNSIGNED_INT_10_10_10_2: + case KTX_UNSIGNED_INT_2_10_10_10_REV: + case KTX_UNSIGNED_INT_24_8: + case KTX_UNSIGNED_INT_10F_11F_11F_REV: + case KTX_UNSIGNED_INT_5_9_9_9_REV: + return true; + } + return false; + } + + uint get_ogl_type_size(uint32 ogl_type) + { + switch (ogl_type) + { + case KTX_UNSIGNED_BYTE: + case KTX_BYTE: + return 1; + case KTX_HALF_FLOAT: + case KTX_UNSIGNED_SHORT: + case KTX_SHORT: + return 2; + case KTX_FLOAT: + case KTX_UNSIGNED_INT: + case KTX_INT: + return 4; + case KTX_UNSIGNED_BYTE_3_3_2: + case KTX_UNSIGNED_BYTE_2_3_3_REV: + return 1; + case KTX_UNSIGNED_SHORT_5_6_5: + case KTX_UNSIGNED_SHORT_5_6_5_REV: + case KTX_UNSIGNED_SHORT_4_4_4_4: + case KTX_UNSIGNED_SHORT_4_4_4_4_REV: + case KTX_UNSIGNED_SHORT_5_5_5_1: + case KTX_UNSIGNED_SHORT_1_5_5_5_REV: + return 2; + case KTX_UNSIGNED_INT_8_8_8_8: + case KTX_UNSIGNED_INT_8_8_8_8_REV: + case KTX_UNSIGNED_INT_10_10_10_2: + case KTX_UNSIGNED_INT_2_10_10_10_REV: + case KTX_UNSIGNED_INT_24_8: + case KTX_UNSIGNED_INT_10F_11F_11F_REV: + case KTX_UNSIGNED_INT_5_9_9_9_REV: + return 4; + } + return 0; + } + + uint32 get_ogl_base_internal_fmt(uint32 ogl_fmt) + { + switch (ogl_fmt) + { + case KTX_ETC1_RGB8_OES: + case KTX_RGB_S3TC: + case KTX_RGB4_S3TC: + case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: + return KTX_RGB; + case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case KTX_RGBA_S3TC: + case KTX_RGBA4_S3TC: + case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + case KTX_RGBA_DXT5_S3TC: + case KTX_RGBA4_DXT5_S3TC: + return KTX_RGBA; + case 1: + case KTX_RED: + case KTX_RED_INTEGER: + case KTX_GREEN: + case KTX_GREEN_INTEGER: + case KTX_BLUE: + case KTX_BLUE_INTEGER: + case KTX_R8: + case KTX_R8UI: + case KTX_LUMINANCE8: + case KTX_ALPHA: + case KTX_LUMINANCE: + case KTX_COMPRESSED_RED_RGTC1_EXT: + case KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT: + case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: + case KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + return KTX_RED; + case 2: + case KTX_RG: + case KTX_RG8: + case KTX_RG_INTEGER: + case KTX_LUMINANCE_ALPHA: + case KTX_COMPRESSED_RED_GREEN_RGTC2_EXT: + case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: + case KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + return KTX_RG; + case 3: + case KTX_SRGB: + case KTX_RGB: + case KTX_RGB_INTEGER: + case KTX_BGR: + case KTX_BGR_INTEGER: + case KTX_RGB8: + case KTX_SRGB8: + return KTX_RGB; + case 4: + case KTX_RGBA: + case KTX_BGRA: + case KTX_RGBA_INTEGER: + case KTX_BGRA_INTEGER: + case KTX_SRGB_ALPHA: + case KTX_SRGB8_ALPHA8: + case KTX_RGBA8: + return KTX_RGBA; + } + return 0; + } + + bool get_ogl_fmt_desc(uint32 ogl_fmt, uint32 ogl_type, uint& block_dim, uint& bytes_per_block) + { + uint ogl_type_size = get_ogl_type_size(ogl_type); + + block_dim = 1; + bytes_per_block = 0; + + switch (ogl_fmt) + { + case KTX_COMPRESSED_RED_RGTC1_EXT: + case KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT: + case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: + case KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + case KTX_ETC1_RGB8_OES: + case KTX_RGB_S3TC: + case KTX_RGB4_S3TC: + case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: + case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + { + block_dim = 4; + bytes_per_block = 8; + break; + } + case KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + case KTX_COMPRESSED_RED_GREEN_RGTC2_EXT: + case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: + case KTX_RGBA_S3TC: + case KTX_RGBA4_S3TC: + case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + case KTX_RGBA_DXT5_S3TC: + case KTX_RGBA4_DXT5_S3TC: + { + block_dim = 4; + bytes_per_block = 16; + break; + } + case 1: + case KTX_ALPHA: + case KTX_RED: + case KTX_GREEN: + case KTX_BLUE: + case KTX_RED_INTEGER: + case KTX_GREEN_INTEGER: + case KTX_BLUE_INTEGER: + case KTX_LUMINANCE: + { + bytes_per_block = ogl_type_size; + break; + } + case KTX_R8: + case KTX_R8UI: + case KTX_ALPHA8: + case KTX_LUMINANCE8: + { + bytes_per_block = 1; + break; + } + case 2: + case KTX_RG: + case KTX_RG_INTEGER: + case KTX_LUMINANCE_ALPHA: + { + bytes_per_block = 2 * ogl_type_size; + break; + } + case KTX_RG8: + case KTX_LUMINANCE8_ALPHA8: + { + bytes_per_block = 2; + break; + } + case 3: + case KTX_SRGB: + case KTX_RGB: + case KTX_BGR: + case KTX_RGB_INTEGER: + case KTX_BGR_INTEGER: + { + bytes_per_block = is_packed_pixel_ogl_type(ogl_type) ? ogl_type_size : (3 * ogl_type_size); + break; + } + case KTX_RGB8: + case KTX_SRGB8: + { + bytes_per_block = 3; + break; + } + case 4: + case KTX_RGBA: + case KTX_BGRA: + case KTX_RGBA_INTEGER: + case KTX_BGRA_INTEGER: + case KTX_SRGB_ALPHA: + { + bytes_per_block = is_packed_pixel_ogl_type(ogl_type) ? ogl_type_size : (4 * ogl_type_size); + break; + } + case KTX_SRGB8_ALPHA8: + case KTX_RGBA8: + { + bytes_per_block = 4; + break; + } + default: + return false; + } + return true; + } + + bool ktx_texture::compute_pixel_info() + { + if ((!m_header.m_glType) || (!m_header.m_glFormat)) + { + if ((m_header.m_glType) || (m_header.m_glFormat)) + return false; + + // Must be a compressed format. + if (!get_ogl_fmt_desc(m_header.m_glInternalFormat, m_header.m_glType, m_block_dim, m_bytes_per_block)) + { +#if CRNLIB_KTX_PVRTEX_WORKAROUNDS + if ((!m_header.m_glInternalFormat) && (!m_header.m_glType) && (!m_header.m_glTypeSize) && (!m_header.m_glBaseInternalFormat)) + { + // PVRTexTool writes bogus headers when outputting ETC1. + console::warning("ktx_texture::compute_pixel_info: Header doesn't specify any format, assuming ETC1 and hoping for the best"); + m_header.m_glBaseInternalFormat = KTX_RGB; + m_header.m_glInternalFormat = KTX_ETC1_RGB8_OES; + m_header.m_glTypeSize = 1; + m_block_dim = 4; + m_bytes_per_block = 8; + return true; + } +#endif + return false; + } + + if (m_block_dim == 1) + return false; + } + else + { + // Must be an uncompressed format. + if (!get_ogl_fmt_desc(m_header.m_glFormat, m_header.m_glType, m_block_dim, m_bytes_per_block)) + return false; + + if (m_block_dim > 1) + return false; + } + return true; + } + + bool ktx_texture::read_from_stream(data_stream_serializer& serializer) + { + clear(); + + // Read header + if (serializer.read(&m_header, 1, sizeof(m_header)) != sizeof(ktx_header)) + return false; + + // Check header + if (memcmp(s_ktx_file_id, m_header.m_identifier, sizeof(m_header.m_identifier))) + return false; + + if ((m_header.m_endianness != KTX_OPPOSITE_ENDIAN) && (m_header.m_endianness != KTX_ENDIAN)) + return false; + + m_opposite_endianness = (m_header.m_endianness == KTX_OPPOSITE_ENDIAN); + if (m_opposite_endianness) + { + m_header.endian_swap(); + + if ((m_header.m_glTypeSize != sizeof(uint8)) && (m_header.m_glTypeSize != sizeof(uint16)) && (m_header.m_glTypeSize != sizeof(uint32))) + return false; + } + + if (!check_header()) + return false; + + if (!compute_pixel_info()) + return false; + + uint8 pad_bytes[3]; + + // Read the key value entries + uint num_key_value_bytes_remaining = m_header.m_bytesOfKeyValueData; + while (num_key_value_bytes_remaining) + { + if (num_key_value_bytes_remaining < sizeof(uint32)) + return false; + + uint32 key_value_byte_size; + if (serializer.read(&key_value_byte_size, 1, sizeof(uint32)) != sizeof(uint32)) + return false; + + num_key_value_bytes_remaining -= sizeof(uint32); + + if (m_opposite_endianness) + key_value_byte_size = utils::swap32(key_value_byte_size); + + if (key_value_byte_size > num_key_value_bytes_remaining) + return false; + + uint8_vec key_value_data; + if (key_value_byte_size) + { + key_value_data.resize(key_value_byte_size); + if (serializer.read(&key_value_data[0], 1, key_value_byte_size) != key_value_byte_size) + return false; + } + + m_key_values.push_back(key_value_data); + + uint padding = 3 - ((key_value_byte_size + 3) % 4); + if (padding) + { + if (serializer.read(pad_bytes, 1, padding) != padding) + return false; + } + + num_key_value_bytes_remaining -= key_value_byte_size; + if (num_key_value_bytes_remaining < padding) + return false; + num_key_value_bytes_remaining -= padding; + } + + // Now read the mip levels + uint total_faces = get_num_mips() * get_array_size() * get_num_faces() * get_depth(); + if ((!total_faces) || (total_faces > 65535)) + return false; + + // See Section 2.8 of KTX file format: No rounding to block sizes should be applied for block compressed textures. + // OK, I'm going to break that rule otherwise KTX can only store a subset of textures that DDS can handle for no good reason. +#if 0 + const uint mip0_row_blocks = m_header.m_pixelWidth / m_block_dim; + const uint mip0_col_blocks = CRNLIB_MAX(1, m_header.m_pixelHeight) / m_block_dim; +#else + const uint mip0_row_blocks = (m_header.m_pixelWidth + m_block_dim - 1) / m_block_dim; + const uint mip0_col_blocks = (CRNLIB_MAX(1, m_header.m_pixelHeight) + m_block_dim - 1) / m_block_dim; +#endif + if ((!mip0_row_blocks) || (!mip0_col_blocks)) + return false; + + const uint mip0_depth = CRNLIB_MAX(1, m_header.m_pixelDepth); mip0_depth; + + bool has_valid_image_size_fields = true; + bool disable_mip_and_cubemap_padding = false; + +#if CRNLIB_KTX_PVRTEX_WORKAROUNDS + { + // PVRTexTool has a bogus KTX writer that doesn't write any imageSize fields. Nice. + size_t expected_bytes_remaining = 0; + for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) + { + uint mip_width, mip_height, mip_depth; + get_mip_dim(mip_level, mip_width, mip_height, mip_depth); + + const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; + const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; + if ((!mip_row_blocks) || (!mip_col_blocks)) + return false; + + expected_bytes_remaining += sizeof(uint32); + + if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) + { + for (uint face = 0; face < get_num_faces(); face++) + { + uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + expected_bytes_remaining += slice_size; + + uint num_cube_pad_bytes = 3 - ((slice_size + 3) % 4); + expected_bytes_remaining += num_cube_pad_bytes; + } + } + else + { + uint total_mip_size = 0; + for (uint array_element = 0; array_element < get_array_size(); array_element++) + { + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint zslice = 0; zslice < mip_depth; zslice++) + { + uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + total_mip_size += slice_size; + } + } + } + expected_bytes_remaining += total_mip_size; + + uint num_mip_pad_bytes = 3 - ((total_mip_size + 3) % 4); + expected_bytes_remaining += num_mip_pad_bytes; + } + } + + if (serializer.get_stream()->get_remaining() < expected_bytes_remaining) + { + has_valid_image_size_fields = false; + disable_mip_and_cubemap_padding = true; + console::warning("ktx_texture::read_from_stream: KTX file size is smaller than expected - trying to read anyway without imageSize fields"); + } + } +#endif + + for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) + { + uint mip_width, mip_height, mip_depth; + get_mip_dim(mip_level, mip_width, mip_height, mip_depth); + + const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; + const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; + if ((!mip_row_blocks) || (!mip_col_blocks)) + return false; + + uint32 image_size = 0; + if (!has_valid_image_size_fields) + image_size = mip_depth * mip_row_blocks * mip_col_blocks * m_bytes_per_block * get_array_size() * get_num_faces(); + else + { + if (serializer.read(&image_size, 1, sizeof(image_size)) != sizeof(image_size)) + return false; + + if (m_opposite_endianness) + image_size = utils::swap32(image_size); + } + + if (!image_size) + return false; + + uint total_mip_size = 0; + + if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) + { + // plain non-array cubemap + for (uint face = 0; face < get_num_faces(); face++) + { + CRNLIB_ASSERT(m_image_data.size() == get_image_index(mip_level, 0, face, 0)); + + m_image_data.push_back(uint8_vec()); + uint8_vec& image_data = m_image_data.back(); + + image_data.resize(image_size); + if (serializer.read(&image_data[0], 1, image_size) != image_size) + return false; + + if (m_opposite_endianness) + utils::endian_swap_mem(&image_data[0], image_size, m_header.m_glTypeSize); + + uint num_cube_pad_bytes = disable_mip_and_cubemap_padding ? 0 : (3 - ((image_size + 3) % 4)); + if (serializer.read(pad_bytes, 1, num_cube_pad_bytes) != num_cube_pad_bytes) + return false; + + total_mip_size += image_size + num_cube_pad_bytes; + } + } + else + { + // 1D, 2D, 3D (normal or array texture), or array cubemap + uint num_image_bytes_remaining = image_size; + + for (uint array_element = 0; array_element < get_array_size(); array_element++) + { + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint zslice = 0; zslice < mip_depth; zslice++) + { + CRNLIB_ASSERT(m_image_data.size() == get_image_index(mip_level, array_element, face, zslice)); + + uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + if ((!slice_size) || (slice_size > num_image_bytes_remaining)) + return false; + + m_image_data.push_back(uint8_vec()); + uint8_vec& image_data = m_image_data.back(); + + image_data.resize(slice_size); + if (serializer.read(&image_data[0], 1, slice_size) != slice_size) + return false; + + if (m_opposite_endianness) + utils::endian_swap_mem(&image_data[0], slice_size, m_header.m_glTypeSize); + + num_image_bytes_remaining -= slice_size; + + total_mip_size += slice_size; + } + } + } + + if (num_image_bytes_remaining) + return false; + } + + uint num_mip_pad_bytes = disable_mip_and_cubemap_padding ? 0 : (3 - ((total_mip_size + 3) % 4)); + if (serializer.read(pad_bytes, 1, num_mip_pad_bytes) != num_mip_pad_bytes) + return false; + } + return true; + } + + bool ktx_texture::write_to_stream(data_stream_serializer& serializer, bool no_keyvalue_data) + { + if (!consistency_check()) + { + CRNLIB_ASSERT(0); + return false; + } + + memcpy(m_header.m_identifier, s_ktx_file_id, sizeof(m_header.m_identifier)); + m_header.m_endianness = m_opposite_endianness ? KTX_OPPOSITE_ENDIAN : KTX_ENDIAN; + + if (m_block_dim == 1) + { + m_header.m_glTypeSize = get_ogl_type_size(m_header.m_glType); + m_header.m_glBaseInternalFormat = m_header.m_glFormat; + } + else + { + m_header.m_glBaseInternalFormat = get_ogl_base_internal_fmt(m_header.m_glInternalFormat); + } + + m_header.m_bytesOfKeyValueData = 0; + if (!no_keyvalue_data) + { + for (uint i = 0; i < m_key_values.size(); i++) + m_header.m_bytesOfKeyValueData += sizeof(uint32) + ((m_key_values[i].size() + 3) & ~3); + } + + if (m_opposite_endianness) + m_header.endian_swap(); + + bool success = (serializer.write(&m_header, sizeof(m_header), 1) == 1); + + if (m_opposite_endianness) + m_header.endian_swap(); + + if (!success) + return success; + + uint total_key_value_bytes = 0; + const uint8 padding[3] = { 0, 0, 0 }; + + if (!no_keyvalue_data) + { + for (uint i = 0; i < m_key_values.size(); i++) + { + uint32 key_value_size = m_key_values[i].size(); + + if (m_opposite_endianness) + key_value_size = utils::swap32(key_value_size); + + success = (serializer.write(&key_value_size, sizeof(key_value_size), 1) == 1); + total_key_value_bytes += sizeof(key_value_size); + + if (m_opposite_endianness) + key_value_size = utils::swap32(key_value_size); + + if (!success) + return false; + + if (key_value_size) + { + if (serializer.write(&m_key_values[i][0], key_value_size, 1) != 1) + return false; + total_key_value_bytes += key_value_size; + + uint num_padding = 3 - ((key_value_size + 3) % 4); + if ((num_padding) && (serializer.write(padding, num_padding, 1) != 1)) + return false; + total_key_value_bytes += num_padding; + } + } + (void)total_key_value_bytes; + } + + CRNLIB_ASSERT(total_key_value_bytes == m_header.m_bytesOfKeyValueData); + + for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) + { + uint mip_width, mip_height, mip_depth; + get_mip_dim(mip_level, mip_width, mip_height, mip_depth); + + const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; + const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; + if ((!mip_row_blocks) || (!mip_col_blocks)) + return false; + + uint32 image_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + if ((m_header.m_numberOfArrayElements) || (get_num_faces() == 1)) + image_size *= (get_array_size() * get_num_faces() * get_depth()); + + if (!image_size) + return false; + + if (m_opposite_endianness) + image_size = utils::swap32(image_size); + + success = (serializer.write(&image_size, sizeof(image_size), 1) == 1); + + if (m_opposite_endianness) + image_size = utils::swap32(image_size); + + if (!success) + return false; + + uint total_mip_size = 0; + + if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) + { + // plain non-array cubemap + for (uint face = 0; face < get_num_faces(); face++) + { + const uint8_vec& image_data = get_image_data(get_image_index(mip_level, 0, face, 0)); + if ((!image_data.size()) || (image_data.size() != image_size)) + return false; + + if (m_opposite_endianness) + { + uint8_vec tmp_image_data(image_data); + utils::endian_swap_mem(&tmp_image_data[0], tmp_image_data.size(), m_header.m_glTypeSize); + if (serializer.write(&tmp_image_data[0], tmp_image_data.size(), 1) != 1) + return false; + } + else if (serializer.write(&image_data[0], image_data.size(), 1) != 1) + return false; + + uint num_cube_pad_bytes = 3 - ((image_data.size() + 3) % 4); + if ((num_cube_pad_bytes) && (serializer.write(padding, num_cube_pad_bytes, 1) != 1)) + return false; + + total_mip_size += image_size + num_cube_pad_bytes; + } + } + else + { + // 1D, 2D, 3D (normal or array texture), or array cubemap + for (uint array_element = 0; array_element < get_array_size(); array_element++) + { + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint zslice = 0; zslice < mip_depth; zslice++) + { + const uint8_vec& image_data = get_image_data(get_image_index(mip_level, array_element, face, zslice)); + if (!image_data.size()) + return false; + + if (m_opposite_endianness) + { + uint8_vec tmp_image_data(image_data); + utils::endian_swap_mem(&tmp_image_data[0], tmp_image_data.size(), m_header.m_glTypeSize); + if (serializer.write(&tmp_image_data[0], tmp_image_data.size(), 1) != 1) + return false; + } + else if (serializer.write(&image_data[0], image_data.size(), 1) != 1) + return false; + + total_mip_size += image_data.size(); + } + } + } + + uint num_mip_pad_bytes = 3 - ((total_mip_size + 3) % 4); + if ((num_mip_pad_bytes) && (serializer.write(padding, num_mip_pad_bytes, 1) != 1)) + return false; + total_mip_size += num_mip_pad_bytes; + } + CRNLIB_ASSERT((total_mip_size & 3) == 0); + } + + return true; + } + + bool ktx_texture::init_2D(uint width, uint height, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) + { + clear(); + + m_header.m_pixelWidth = width; + m_header.m_pixelHeight = height; + m_header.m_numberOfMipmapLevels = num_mips; + m_header.m_glInternalFormat = ogl_internal_fmt; + m_header.m_glFormat = ogl_fmt; + m_header.m_glType = ogl_type; + m_header.m_numberOfFaces = 1; + + if (!compute_pixel_info()) + return false; + + return true; + } + + bool ktx_texture::init_2D_array(uint width, uint height, uint num_mips, uint array_size, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) + { + clear(); + + m_header.m_pixelWidth = width; + m_header.m_pixelHeight = height; + m_header.m_numberOfMipmapLevels = num_mips; + m_header.m_numberOfArrayElements = array_size; + m_header.m_glInternalFormat = ogl_internal_fmt; + m_header.m_glFormat = ogl_fmt; + m_header.m_glType = ogl_type; + m_header.m_numberOfFaces = 1; + + if (!compute_pixel_info()) + return false; + + return true; + } + + bool ktx_texture::init_3D(uint width, uint height, uint depth, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) + { + clear(); + + m_header.m_pixelWidth = width; + m_header.m_pixelHeight = height; + m_header.m_pixelDepth = depth; + m_header.m_numberOfMipmapLevels = num_mips; + m_header.m_glInternalFormat = ogl_internal_fmt; + m_header.m_glFormat = ogl_fmt; + m_header.m_glType = ogl_type; + m_header.m_numberOfFaces = 1; + + if (!compute_pixel_info()) + return false; + + return true; + } + + bool ktx_texture::init_cubemap(uint dim, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) + { + clear(); + + m_header.m_pixelWidth = dim; + m_header.m_pixelHeight = dim; + m_header.m_numberOfMipmapLevels = num_mips; + m_header.m_glInternalFormat = ogl_internal_fmt; + m_header.m_glFormat = ogl_fmt; + m_header.m_glType = ogl_type; + m_header.m_numberOfFaces = 6; + + if (!compute_pixel_info()) + return false; + + return true; + } + + bool ktx_texture::check_header() const + { + if (((get_num_faces() != 1) && (get_num_faces() != 6)) || (!m_header.m_pixelWidth)) + return false; + + if ((!m_header.m_pixelHeight) && (m_header.m_pixelDepth)) + return false; + + if ((get_num_faces() == 6) && ((m_header.m_pixelDepth) || (!m_header.m_pixelHeight))) + return false; + + if (m_header.m_numberOfMipmapLevels) + { + const uint max_mipmap_dimension = 1U << (m_header.m_numberOfMipmapLevels - 1U); + if (max_mipmap_dimension > (CRNLIB_MAX(CRNLIB_MAX(m_header.m_pixelWidth, m_header.m_pixelHeight), m_header.m_pixelDepth))) + return false; + } + + return true; + } + + bool ktx_texture::consistency_check() const + { + if (!check_header()) + return false; + + uint block_dim = 0, bytes_per_block = 0; + if ((!m_header.m_glType) || (!m_header.m_glFormat)) + { + if ((m_header.m_glType) || (m_header.m_glFormat)) + return false; + if (!get_ogl_fmt_desc(m_header.m_glInternalFormat, m_header.m_glType, block_dim, bytes_per_block)) + return false; + if (block_dim == 1) + return false; + //if ((get_width() % block_dim) || (get_height() % block_dim)) + // return false; + } + else + { + if (!get_ogl_fmt_desc(m_header.m_glFormat, m_header.m_glType, block_dim, bytes_per_block)) + return false; + if (block_dim > 1) + return false; + } + if ((m_block_dim != block_dim) || (m_bytes_per_block != bytes_per_block)) + return false; + + if (m_image_data.size() != get_total_images()) + return false; + + for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) + { + uint mip_width, mip_height, mip_depth; + get_mip_dim(mip_level, mip_width, mip_height, mip_depth); + + const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; + const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; + if ((!mip_row_blocks) || (!mip_col_blocks)) + return false; + + for (uint array_element = 0; array_element < get_array_size(); array_element++) + { + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint zslice = 0; zslice < mip_depth; zslice++) + { + const uint8_vec& image_data = get_image_data(get_image_index(mip_level, array_element, face, zslice)); + + uint expected_image_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; + if (image_data.size() != expected_image_size) + return false; + } + } + } + } + + return true; + } + + const uint8_vec* ktx_texture::find_key(const char* pKey) const + { + const size_t n = strlen(pKey) + 1; + for (uint i = 0; i < m_key_values.size(); i++) + { + const uint8_vec& v = m_key_values[i]; + if ((v.size() >= n) && (!memcmp(&v[0], pKey, n))) + return &v; + } + + return NULL; + } + + bool ktx_texture::get_key_value_as_string(const char* pKey, dynamic_string& str) const + { + const uint8_vec* p = find_key(pKey); + if (!p) + { + str.clear(); + return false; + } + + const uint ofs = (static_cast(strlen(pKey)) + 1); + const uint8* pValue = p->get_ptr() + ofs; + const uint n = p->size() - ofs; + + uint i; + for (i = 0; i < n; i++) + if (!pValue[i]) + break; + + str.set_from_buf(pValue, i); + return true; + } + + uint ktx_texture::add_key_value(const char* pKey, const void* pVal, uint val_size) + { + const uint idx = m_key_values.size(); + m_key_values.resize(idx + 1); + uint8_vec& v = m_key_values.back(); + v.append(reinterpret_cast(pKey), static_cast(strlen(pKey)) + 1); + v.append(static_cast(pVal), val_size); + return idx; + } + +} // namespace crnlib diff --git a/crnlib/crn_ktx_texture.h b/crnlib/crn_ktx_texture.h new file mode 100644 index 0000000..b694fa1 --- /dev/null +++ b/crnlib/crn_ktx_texture.h @@ -0,0 +1,244 @@ +// File: crn_ktx_texture.h +#ifndef _KTX_TEXTURE_H_ +#define _KTX_TEXTURE_H_ +#ifdef _MSC_VER +#pragma once +#endif + +#include "crn_data_stream_serializer.h" + +#define KTX_ENDIAN 0x04030201 +#define KTX_OPPOSITE_ENDIAN 0x01020304 + +namespace crnlib +{ + extern const uint8 s_ktx_file_id[12]; + + struct ktx_header + { + uint8 m_identifier[12]; + uint32 m_endianness; + uint32 m_glType; + uint32 m_glTypeSize; + uint32 m_glFormat; + uint32 m_glInternalFormat; + uint32 m_glBaseInternalFormat; + uint32 m_pixelWidth; + uint32 m_pixelHeight; + uint32 m_pixelDepth; + uint32 m_numberOfArrayElements; + uint32 m_numberOfFaces; + uint32 m_numberOfMipmapLevels; + uint32 m_bytesOfKeyValueData; + + void clear() + { + memset(this, 0, sizeof(*this)); + } + + void endian_swap() + { + utils::endian_swap_mem32(&m_endianness, (sizeof(*this) - sizeof(m_identifier)) / sizeof(uint32)); + } + }; + + typedef crnlib::vector ktx_key_value_vec; + typedef crnlib::vector ktx_image_data_vec; + + // Compressed pixel data formats: ETC1, DXT1, DXT3, DXT5 + enum + { + KTX_ETC1_RGB8_OES = 0x8D64, KTX_RGB_S3TC = 0x83A0, KTX_RGB4_S3TC = 0x83A1, KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0, + KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT = 0x83F1, KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT = 0x8C4C, KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT = 0x8C4D, + KTX_RGBA_S3TC = 0x83A2, KTX_RGBA4_S3TC = 0x83A3, KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT = 0x83F2, KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT = 0x8C4E, + KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3, KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT = 0x8C4F, KTX_RGBA_DXT5_S3TC = 0x83A4, KTX_RGBA4_DXT5_S3TC = 0x83A5, + KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB, KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT = 0x8DBC, KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD, KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT = 0x8DBE, + KTX_COMPRESSED_LUMINANCE_LATC1_EXT = 0x8C70, KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT = 0x8C71, KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT = 0x8C72, KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT = 0x8C73 + }; + + // Pixel formats (various internal, base, and base internal formats) + enum + { + KTX_R8 = 0x8229, KTX_R8UI = 0x8232, KTX_RGB8 = 0x8051, KTX_SRGB8 = 0x8C41, KTX_SRGB = 0x8C40, KTX_SRGB_ALPHA = 0x8C42, + KTX_SRGB8_ALPHA8 = 0x8C43, KTX_RGBA8 = 0x8058, KTX_STENCIL_INDEX = 0x1901, KTX_DEPTH_COMPONENT = 0x1902, KTX_DEPTH_STENCIL = 0x84F9, KTX_RED = 0x1903, + KTX_GREEN = 0x1904, KTX_BLUE = 0x1905, KTX_ALPHA = 0x1906, KTX_RG = 0x8227, KTX_RGB = 0x1907, KTX_RGBA = 0x1908, KTX_BGR = 0x80E0, KTX_BGRA = 0x80E1, + KTX_RED_INTEGER = 0x8D94, KTX_GREEN_INTEGER = 0x8D95, KTX_BLUE_INTEGER = 0x8D96, KTX_ALPHA_INTEGER = 0x8D97, KTX_RGB_INTEGER = 0x8D98, KTX_RGBA_INTEGER = 0x8D99, + KTX_BGR_INTEGER = 0x8D9A, KTX_BGRA_INTEGER = 0x8D9B, KTX_LUMINANCE = 0x1909, KTX_LUMINANCE_ALPHA = 0x190A, KTX_RG_INTEGER = 0x8228, KTX_RG8 = 0x822B, + KTX_ALPHA8 = 0x803C, KTX_LUMINANCE8 = 0x8040, KTX_LUMINANCE8_ALPHA8 = 0x8045 + }; + + // Pixel data types + enum + { + KTX_UNSIGNED_BYTE = 0x1401, KTX_BYTE = 0x1400, KTX_UNSIGNED_SHORT = 0x1403, KTX_SHORT = 0x1402, + KTX_UNSIGNED_INT = 0x1405, KTX_INT = 0x1404, KTX_HALF_FLOAT = 0x140B, KTX_FLOAT = 0x1406, + KTX_UNSIGNED_BYTE_3_3_2 = 0x8032, KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362, KTX_UNSIGNED_SHORT_5_6_5 = 0x8363, + KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364, KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033, KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365, + KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034, KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366, KTX_UNSIGNED_INT_8_8_8_8 = 0x8035, + KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367, KTX_UNSIGNED_INT_10_10_10_2 = 0x8036, KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368, + KTX_UNSIGNED_INT_24_8 = 0x84FA, KTX_UNSIGNED_INT_10F_11F_11F_REV = 0x8C3B, KTX_UNSIGNED_INT_5_9_9_9_REV = 0x8C3E, + KTX_FLOAT_32_UNSIGNED_INT_24_8_REV = 0x8DAD + }; + + bool is_packed_pixel_ogl_type(uint32 ogl_type); + uint get_ogl_type_size(uint32 ogl_type); + bool get_ogl_fmt_desc(uint32 ogl_fmt, uint32 ogl_type, uint& block_dim, uint& bytes_per_block); + uint get_ogl_type_size(uint32 ogl_type); + uint32 get_ogl_base_internal_fmt(uint32 ogl_fmt); + + class ktx_texture + { + public: + ktx_texture() + { + clear(); + } + + ktx_texture(const ktx_texture& other) + { + *this = other; + } + + ktx_texture& operator= (const ktx_texture& rhs) + { + if (this == &rhs) + return *this; + + clear(); + + m_header = rhs.m_header; + m_key_values = rhs.m_key_values; + m_image_data = rhs.m_image_data; + m_block_dim = rhs.m_block_dim; + m_bytes_per_block = rhs.m_bytes_per_block; + m_opposite_endianness = rhs.m_opposite_endianness; + + return *this; + } + + void clear() + { + m_header.clear(); + m_key_values.clear(); + m_image_data.clear(); + + m_block_dim = 0; + m_bytes_per_block = 0; + + m_opposite_endianness = false; + } + + // High level methods + bool read_from_stream(data_stream_serializer& serializer); + bool write_to_stream(data_stream_serializer& serializer, bool no_keyvalue_data = false); + + bool init_2D(uint width, uint height, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); + bool init_2D_array(uint width, uint height, uint num_mips, uint array_size, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); + bool init_3D(uint width, uint height, uint depth, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); + bool init_cubemap(uint dim, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); + + bool check_header() const; + bool consistency_check() const; + + // General info + + bool is_valid() const { return (m_header.m_pixelWidth > 0) && (m_image_data.size() > 0); } + + uint get_width() const { return m_header.m_pixelWidth; } + uint get_height() const { return CRNLIB_MAX(m_header.m_pixelHeight, 1); } + uint get_depth() const { return CRNLIB_MAX(m_header.m_pixelDepth, 1); } + uint get_num_mips() const { return CRNLIB_MAX(m_header.m_numberOfMipmapLevels, 1); } + uint get_array_size() const { return CRNLIB_MAX(m_header.m_numberOfArrayElements, 1); } + uint get_num_faces() const { return m_header.m_numberOfFaces; } + + uint32 get_ogl_type() const { return m_header.m_glType; } + uint32 get_ogl_fmt() const { return m_header.m_glFormat; } + uint32 get_ogl_base_fmt() const { return m_header.m_glBaseInternalFormat; } + uint32 get_ogl_internal_fmt() const { return m_header.m_glInternalFormat; } + + uint get_total_images() const { return get_num_mips() * (get_depth() * get_num_faces() * get_array_size()); } + + bool is_compressed() const { return m_block_dim > 1; } + bool is_uncompressed() const { return !is_compressed(); } + + bool get_opposite_endianness() const { return m_opposite_endianness; } + void set_opposite_endianness(bool flag) { m_opposite_endianness = flag; } + + uint32 get_block_dim() const { return m_block_dim; } + uint32 get_bytes_per_block() const { return m_bytes_per_block; } + + const ktx_header& get_header() const { return m_header; } + + // Key values + const ktx_key_value_vec& get_key_value_vec() const { return m_key_values; } + ktx_key_value_vec& get_key_value_vec() { return m_key_values; } + + const uint8_vec* find_key(const char* pKey) const; + bool get_key_value_as_string(const char* pKey, dynamic_string& str) const; + + uint add_key_value(const char* pKey, const void* pVal, uint val_size); + uint add_key_value(const char* pKey, const char* pVal) { return add_key_value(pKey, pVal, static_cast(strlen(pVal)) + 1); } + + // Image data + uint get_num_images() const { return m_image_data.size(); } + + const uint8_vec& get_image_data(uint image_index) const { return m_image_data[image_index]; } + uint8_vec& get_image_data(uint image_index) { return m_image_data[image_index]; } + + const uint8_vec& get_image_data(uint mip_index, uint array_index, uint face_index, uint zslice_index) const { return get_image_data(get_image_index(mip_index, array_index, face_index, zslice_index)); } + uint8_vec& get_image_data(uint mip_index, uint array_index, uint face_index, uint zslice_index) { return get_image_data(get_image_index(mip_index, array_index, face_index, zslice_index)); } + + const ktx_image_data_vec& get_image_data_vec() const { return m_image_data; } + ktx_image_data_vec& get_image_data_vec() { return m_image_data; } + + void add_image(uint face_index, uint mip_index, const void* pImage, uint image_size) + { + const uint image_index = get_image_index(mip_index, 0, face_index, 0); + if (image_index >= m_image_data.size()) + m_image_data.resize(image_index + 1); + if (image_size) + { + uint8_vec& v = m_image_data[image_index]; + v.resize(image_size); + memcpy(&v[0], pImage, image_size); + } + } + + uint get_image_index(uint mip_index, uint array_index, uint face_index, uint zslice_index) const + { + CRNLIB_ASSERT((mip_index < get_num_mips()) && (array_index < get_array_size()) && (face_index < get_num_faces()) && (zslice_index < get_depth())); + return zslice_index + (face_index * get_depth()) + (array_index * (get_depth() * get_num_faces())) + (mip_index * (get_depth() * get_num_faces() * get_array_size())); + } + + void get_mip_dim(uint mip_index, uint& mip_width, uint& mip_height) const + { + CRNLIB_ASSERT(mip_index < get_num_mips()); + mip_width = CRNLIB_MAX(get_width() >> mip_index, 1); + mip_height = CRNLIB_MAX(get_height() >> mip_index, 1); + } + + void get_mip_dim(uint mip_index, uint& mip_width, uint& mip_height, uint& mip_depth) const + { + CRNLIB_ASSERT(mip_index < get_num_mips()); + mip_width = CRNLIB_MAX(get_width() >> mip_index, 1); + mip_height = CRNLIB_MAX(get_height() >> mip_index, 1); + mip_depth = CRNLIB_MAX(get_depth() >> mip_index, 1); + } + + private: + ktx_header m_header; + + ktx_key_value_vec m_key_values; + ktx_image_data_vec m_image_data; + + uint32 m_block_dim; + uint32 m_bytes_per_block; + + bool m_opposite_endianness; + + bool compute_pixel_info(); + }; + +} // namespace crnlib + +#endif // #ifndef _KTX_TEXTURE_H_ diff --git a/crnlib/crn_math.h b/crnlib/crn_math.h index 7d0e4c6..5b48f8a 100644 --- a/crnlib/crn_math.h +++ b/crnlib/crn_math.h @@ -216,10 +216,12 @@ namespace crnlib void compute_lower_pow2_dim(int& width, int& height); void compute_upper_pow2_dim(int& width, int& height); + inline bool equal_tol(float a, float b, float t) { return fabs(a - b) < ((maximum(fabs(a), fabs(b)) + 1.0f) * t); } + inline bool equal_tol(double a, double b, double t) { return fabs(a - b) < ((maximum(fabs(a), fabs(b)) + 1.0f) * t); @@ -228,3 +230,8 @@ namespace crnlib } // namespace crnlib + + + + + diff --git a/crnlib/crn_mem.cpp b/crnlib/crn_mem.cpp index b388ea2..b7abfb9 100644 --- a/crnlib/crn_mem.cpp +++ b/crnlib/crn_mem.cpp @@ -72,6 +72,11 @@ namespace crnlib p_new = ::malloc(size); CRNLIB_ASSERT( (reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0 ); + if (!p_new) + { + printf("WARNING: ::malloc() of size %u failed!\n", (uint)size); + } + if (pActual_size) *pActual_size = p_new ? ::_msize(p_new) : 0; } @@ -106,6 +111,10 @@ namespace crnlib CRNLIB_ASSERT( (reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0 ); p_final_block = p_new; } + else + { + printf("WARNING: ::realloc() of size %u failed!\n", (uint)size); + } } if (pActual_size) diff --git a/crnlib/crn_miniz.cpp b/crnlib/crn_miniz.cpp new file mode 100644 index 0000000..6fc8bdd --- /dev/null +++ b/crnlib/crn_miniz.cpp @@ -0,0 +1,3948 @@ +// File: crn_miniz.cpp +#include "crn_core.h" +#include "crn_miniz.h" + +// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1]; + +#include +#include + +#include "crn_core.h" + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC + #define MZ_MALLOC(x) NULL + #define MZ_FREE(x) (void)x, ((void)0) + #define MZ_REALLOC(p, x) NULL +#else + #define MZ_MALLOC(x) crnlib::crnlib_malloc(x) + #define MZ_FREE(x) crnlib::crnlib_free(x) + #define MZ_REALLOC(p, x) crnlib::crnlib_realloc(p, x) +#endif + +#define MZ_MAX(a,b) (((a)>(b))?(a):(b)) +#define MZ_MIN(a,b) (((a)<(b))?(a):(b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + #define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) + #define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else + #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) + #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER + #define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define MZ_FORCEINLINE __attribute__((__always_inline__)) +#else + #define MZ_FORCEINLINE +#endif + +#ifdef __cplusplus + extern "C" { +#endif + +// ------------------- zlib-style API's + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + return (s2 << 16) + s1; +} + +// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) +{ + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } + return ~crcu32; +} + +void mz_free(void *p) +{ + MZ_FREE(p); +} + +#ifndef MINIZ_NO_ZLIB_APIS + +static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } +static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } +static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } + +const char *mz_version(void) +{ + return MZ_VERSION; +} + +int mz_deflateInit(mz_streamp pStream, int level) +{ + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) +{ + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) +{ + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; + for ( ; ; ) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) +{ + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) +{ + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) + return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) +{ + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) +{ + return mz_deflateBound(NULL, source_len); +} + +typedef struct +{ + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) +{ + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) +{ + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflate(mz_streamp pStream, int flush) +{ + inflate_state* pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + + pState = (inflate_state*)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for ( ; ; ) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. + else if (flush == MZ_FINISH) + { + // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; +} + +int mz_inflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) +{ + static struct { int m_err; const char *m_pDesc; } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, + { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif //MINIZ_NO_ZLIB_APIS + +// ------------------- Low-level Decompression (completely independent from all compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN switch(r->m_state) { case 0: +#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END +#define TINFL_CR_FINISH } + +// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never +// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) do { \ + if (pIn_buf_cur >= pIn_buf_end) { \ + for ( ; ; ) { \ + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ + TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ + if (pIn_buf_cur < pIn_buf_end) { \ + c = *pIn_buf_cur++; \ + break; \ + } \ + } else { \ + c = 0; \ + break; \ + } \ + } \ + } else c = *pIn_buf_cur++; } MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END + +// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. +// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a +// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the +// bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) \ + break; \ + } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \ + } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \ + } while (num_bits < 15); + +// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read +// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully +// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. +// The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \ + int temp; mz_uint code_len, c; \ + if (num_bits < 15) { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } else { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ + code_len = temp >> 9, temp &= 511; \ + else { \ + code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \ + } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) +{ + static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; + static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } + + num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } + while (pIn_buf_cur >= pIn_buf_end) + { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) + { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } + else + { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; + r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for ( i = 0; i <= 143; ++i) *p++ = 8; for ( ; i <= 255; ++i) *p++ = 9; for ( ; i <= 279; ++i) *p++ = 7; for ( ; i <= 287; ++i) *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } + r->m_table_sizes[2] = 19; + } + for ( ; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; + cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); ) + { + mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for ( ; ; ) + { + mz_uint8 *pSrc; + for ( ; ; ) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } +#else + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + counter = sym2; bit_buf >>= code_len; num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + bit_buf >>= code_len; num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + +common_exit: + r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +// Higher level helper functions. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +// ------------------- Low-level Compression (independent from all decompression API's) + +// Purposely making these tables static for faster init and thread safety. +static const mz_uint16 s_tdefl_len_sym[256] = { + 257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272, + 273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276, + 277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278, + 279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, + 281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281, + 282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282, + 283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283, + 284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 }; + +static const mz_uint8 s_tdefl_len_extra[256] = { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 }; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 }; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7 }; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26, + 26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 }; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 }; + +// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. +typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; +static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) +{ + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32* pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } + } + return pCur_syms; +} + +// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } + avbl = 2*used; dpth++; used = 0; + } +} + +// Limits canonical Huffman code table's max code size. +enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + int i; mz_uint32 total = 0; if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) +{ + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) do { \ + mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \ + while (d->m_bits_in >= 8) { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ +} MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \ + if (rle_repeat_count < 3) { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } else { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ +} rle_repeat_count = 0; } } + +#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \ + if (rle_z_count < 3) { \ + d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ + } else if (rle_z_count <= 10) { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ + } else { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ +} rle_z_count = 0; } } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) +{ + int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; ) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) +{ + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for ( ; i <= 255; ++i) *p++ = 9; + for ( ; i <= 279; ++i) *p++ = 7; + for ( ; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64*)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) +{ + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) +{ + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. + if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) ) + { + mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } + } + else + { + mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p) +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + if (!probe_len) + { + *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; + if (probe_len > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +static mz_bool tdefl_compress_fast(tdefl_compressor *d) +{ + // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) +{ + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) +{ + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) +{ + const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output buffer. + if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) ) + { + int n; + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) +{ + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) +{ + if (!d) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) ) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) +{ + MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; + d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) +{ + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) +{ + return d->m_adler32; +} + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); return succeeded; +} + +typedef struct +{ + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) +{ + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; + do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); + pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; + } + memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; + *pOut_len = out_buf.m_size; return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; + return out_buf.m_size; +} + +#ifndef MINIZ_NO_ZLIB_APIS +static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + +// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) +{ + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} +#endif //MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) +#endif + +// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at +// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) +{ + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, TDEFL_DEFAULT_MAX_PROBES | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + y * bpl, bpl, TDEFL_NO_FLUSH); } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + // write real header + *pLen_out = out_buf.m_size-41; + { + mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52, + 0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,"\0\0\04\02\06"[num_chans],0,0,0,0,0,0,0, + (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54}; + c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; +} + +#ifdef _MSC_VER +#pragma warning (pop) +#endif + +// ------------------- .ZIP archive reading + +#ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef MINIZ_NO_STDIO + #define MZ_FILE void * +#else + #include + #include + + #if defined(_MSC_VER) + static FILE *mz_fopen(const char *pFilename, const char *pMode) + { + FILE* pFile = NULL; + fopen_s(&pFile, pFilename, pMode); + return pFile; + } + static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) + { + FILE* pFile = NULL; + if (freopen_s(&pFile, pPath, pMode, pStream)) + return NULL; + return pFile; + } + #else + static FILE *mz_fopen(const char *pFilename, const char *pMode) + { + return fopen(pFilename, pMode); + } + static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) + { + return freopen(pPath, pMode, pStream); + } + #endif // #if defined(_MSC_VER) + + #if defined(_MSC_VER) || defined(__MINGW64__) + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN mz_fopen + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 _ftelli64 + #define MZ_FSEEK64 _fseeki64 + #define MZ_FILE_STAT_STRUCT _stat + #define MZ_FILE_STAT _stat + #define MZ_FFLUSH fflush + #define MZ_FREOPEN mz_freopen + #define MZ_DELETE_FILE remove + #elif defined(__MINGW32__) + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN mz_fopen + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 ftello64 + #define MZ_FSEEK64 fseeko64 + #define MZ_FILE_STAT_STRUCT _stat + #define MZ_FILE_STAT _stat + #define MZ_FFLUSH fflush + #define MZ_FREOPEN mz_freopen + #define MZ_DELETE_FILE remove + #elif defined(__TINYC__) + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN mz_fopen + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 ftell + #define MZ_FSEEK64 fseek + #define MZ_FILE_STAT_STRUCT stat + #define MZ_FILE_STAT stat + #define MZ_FFLUSH fflush + #define MZ_FREOPEN mz_freopen + #define MZ_DELETE_FILE remove + #else + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN mz_fopen + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 ftello + #define MZ_FSEEK64 fseeko + #define MZ_FILE_STAT_STRUCT stat + #define MZ_FILE_STAT stat + #define MZ_FFLUSH fflush + #define MZ_FREOPEN mz_freopen + #define MZ_DELETE_FILE remove + #endif // #ifdef _MSC_VER +#endif // #ifdef MINIZ_NO_STDIO + +#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) + +// Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. +enum +{ + // ZIP archive identifiers and record sizes + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, + // Central directory header record offsets + MZ_ZIP_CDH_SIG_OFS = 0, MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, MZ_ZIP_CDH_BIT_FLAG_OFS = 8, + MZ_ZIP_CDH_METHOD_OFS = 10, MZ_ZIP_CDH_FILE_TIME_OFS = 12, MZ_ZIP_CDH_FILE_DATE_OFS = 14, MZ_ZIP_CDH_CRC32_OFS = 16, + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, + MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, MZ_ZIP_CDH_DISK_START_OFS = 34, MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, + // Local directory header offsets + MZ_ZIP_LDH_SIG_OFS = 0, MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, MZ_ZIP_LDH_BIT_FLAG_OFS = 6, MZ_ZIP_LDH_METHOD_OFS = 8, MZ_ZIP_LDH_FILE_TIME_OFS = 10, + MZ_ZIP_LDH_FILE_DATE_OFS = 12, MZ_ZIP_LDH_CRC32_OFS = 14, MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, + MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, + // End of central directory offsets + MZ_ZIP_ECDH_SIG_OFS = 0, MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, +}; + +typedef struct +{ + void *m_p; + size_t m_size, m_capacity; + mz_uint m_element_size; +} mz_zip_array; + +struct mz_zip_internal_state_tag +{ + mz_zip_array m_central_dir; + mz_zip_array m_central_dir_offsets; + mz_zip_array m_sorted_central_dir_offsets; + MZ_FILE *m_pFile; + void *m_pMem; + size_t m_mem_size; + size_t m_mem_capacity; +}; + +#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size +#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index] + +static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray) +{ + pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); + memset(pArray, 0, sizeof(mz_zip_array)); +} + +static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing) +{ + void *pNew_p; size_t new_capacity = min_new_capacity; MZ_ASSERT(pArray->m_element_size); if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; + if (growing) { new_capacity = MZ_MAX(1, pArray->m_capacity); while (new_capacity < min_new_capacity) new_capacity *= 2; } + if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) return MZ_FALSE; + pArray->m_p = pNew_p; pArray->m_capacity = new_capacity; + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing) +{ + if (new_capacity > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) return MZ_FALSE; } + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing) +{ + if (new_size > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) return MZ_FALSE; } + pArray->m_size = new_size; + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n) +{ + return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n) +{ + size_t orig_size = pArray->m_size; if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) return MZ_FALSE; + memcpy((mz_uint8*)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size); + return MZ_TRUE; +} + +#ifndef MINIZ_NO_TIME +static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) +{ + struct tm tm; + memset(&tm, 0, sizeof(tm)); tm.tm_isdst = -1; + tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; tm.tm_mon = ((dos_date >> 5) & 15) - 1; tm.tm_mday = dos_date & 31; + tm.tm_hour = (dos_time >> 11) & 31; tm.tm_min = (dos_time >> 5) & 63; tm.tm_sec = (dos_time << 1) & 62; + return mktime(&tm); +} + +static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) +{ +#ifdef _MSC_VER + struct tm tm_struct; + struct tm *tm = &tm_struct; + errno_t err = localtime_s(tm, &time); + if (err) + { + *pDOS_date = 0; *pDOS_time = 0; + return; + } +#else + struct tm *tm = localtime(&time); +#endif + *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1)); + *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday); +} +#endif + +#ifndef MINIZ_NO_STDIO +static mz_bool mz_zip_get_file_modified_time(const char *pFilename, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) +{ +#ifdef MINIZ_NO_TIME + (void)pFilename; *pDOS_date = *pDOS_time = 0; +#else + struct MZ_FILE_STAT_STRUCT file_stat; if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; + mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); +#endif // #ifdef MINIZ_NO_TIME + return MZ_TRUE; +} + +static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, time_t modified_time) +{ +#ifndef MINIZ_NO_TIME + struct utimbuf t; t.actime = access_time; t.modtime = modified_time; + return !utime(pFilename, &t); +#else + (void)pFilename, (void)access_time, (void)modified_time; + return MZ_TRUE; +#endif // #ifndef MINIZ_NO_TIME +} +#endif + +static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint32 flags) +{ + (void)flags; + if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_READING; + pZip->m_archive_size = 0; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index) +{ + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; + const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) + { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) + break; + pL++; pR++; + } + return (pL == pE) ? (l_len < r_len) : (l < r); +} + +#define MZ_SWAP_UINT32(a, b) do { mz_uint32 t = a; a = b; b = t; } MZ_MACRO_END + +// Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) +static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip) +{ + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + int start = (size - 2) >> 1, end; + while (start >= 0) + { + int child, root = start; + for ( ; ; ) + { + if ((child = (root << 1) + 1) >= size) + break; + child += (((child + 1) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1]))); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; + } + start--; + } + + end = size - 1; + while (end > 0) + { + int child, root = 0; + MZ_SWAP_UINT32(pIndices[end], pIndices[0]); + for ( ; ; ) + { + if ((child = (root << 1) + 1) >= end) + break; + child += (((child + 1) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1])); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; + } + end--; + } +} + +static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint32 flags) +{ + mz_uint cdir_size, num_this_disk, cdir_disk_index; + mz_uint64 cdir_ofs; + mz_int64 cur_file_ofs; + const mz_uint8 *p; + mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32; + // Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. + if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + // Find the end of central directory record by scanning the file from the end towards the beginning. + cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); + for ( ; ; ) + { + int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) + return MZ_FALSE; + for (i = n - 4; i >= 0; --i) + if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) + break; + if (i >= 0) + { + cur_file_ofs += i; + break; + } + if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) + return MZ_FALSE; + cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); + } + // Read and verify the end of central directory record. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || + ((pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) + return MZ_FALSE; + + num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); + cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); + if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1))) + return MZ_FALSE; + + if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + + cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); + if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) + return MZ_FALSE; + + pZip->m_central_directory_file_ofs = cdir_ofs; + + if (pZip->m_total_files) + { + mz_uint i, n; + // Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and another to hold the sorted indices. + if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) || + (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE)) || + (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE))) + return MZ_FALSE; + if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size) + return MZ_FALSE; + + // Now create an index into the central directory file records, do some basic sanity checking on each record, and check for zip64 entries (which are not yet supported). + p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; + for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) + { + mz_uint total_header_size, comp_size, decomp_size, disk_index; + if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) + return MZ_FALSE; + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i; + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || (comp_size == 0xFFFFFFFF)) + return MZ_FALSE; + disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); + if ((disk_index != num_this_disk) && (disk_index != 1)) + return MZ_FALSE; + if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) + return MZ_FALSE; + if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n) + return MZ_FALSE; + n -= total_header_size; p += total_header_size; + } + } + + if ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0) + mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); + + return MZ_TRUE; +} + +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags) +{ + if ((!pZip) || (!pZip->m_pRead)) + return MZ_FALSE; + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + pZip->m_archive_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} + +static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); + memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); + return s; +} + +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags) +{ + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + pZip->m_archive_size = size; + pZip->m_pRead = mz_zip_mem_read_func; + pZip->m_pIO_opaque = pZip; + pZip->m_pState->m_pMem = (void *)pMem; + pZip->m_pState->m_mem_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags) +{ + mz_uint64 file_size; + MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb"); + if (!pFile) + return MZ_FALSE; + if (MZ_FSEEK64(pFile, 0, SEEK_END)) + return MZ_FALSE; + file_size = MZ_FTELL64(pFile); + if (!mz_zip_reader_init_internal(pZip, flags)) + { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + pZip->m_pRead = mz_zip_file_read_func; + pZip->m_pIO_opaque = pZip; + pZip->m_pState->m_pFile = pFile; + pZip->m_archive_size = file_size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) +{ + return pZip ? pZip->m_total_files : 0; +} + +static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh(mz_zip_archive *pZip, mz_uint file_index) +{ + if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return NULL; + return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); +} + +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index) +{ + mz_uint m_bit_flag; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) + return MZ_FALSE; + m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + return (m_bit_flag & 1); +} + +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index) +{ + mz_uint filename_len, internal_attr, external_attr; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) + return MZ_FALSE; + + internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); + external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + if ((!internal_attr) && ((external_attr & 0x10) != 0)) + return MZ_TRUE; + + filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_len) + { + if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') + return MZ_TRUE; + } + + return MZ_FALSE; +} + +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat) +{ + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if ((!p) || (!pStat)) + return MZ_FALSE; + + // Unpack the central directory record. + pStat->m_file_index = file_index; + pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); + pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); + pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); + pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); +#ifndef MINIZ_NO_TIME + pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); +#endif + pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); + pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); + pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + + // Copy as much of the filename and comment as possible. + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); + memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pStat->m_filename[n] = '\0'; + + n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); + pStat->m_comment_size = n; + memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); pStat->m_comment[n] = '\0'; + + return MZ_TRUE; +} + +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size) +{ + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) { if (filename_buf_size) pFilename[0] = '\0'; return 0; } + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_buf_size) + { + n = MZ_MIN(n, filename_buf_size - 1); + memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pFilename[n] = '\0'; + } + return n + 1; +} + +static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags) +{ + mz_uint i; + if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) + return 0 == memcmp(pA, pB, len); + for (i = 0; i < len; ++i) + if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) + return MZ_FALSE; + return MZ_TRUE; +} + +static MZ_FORCEINLINE int mz_zip_reader_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len) +{ + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) + { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) + break; + pL++; pR++; + } + return (pL == pE) ? (int)(l_len - r_len) : (l - r); +} + +static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename) +{ + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + const mz_uint filename_len = (mz_uint)strlen(pFilename); + int l = 0, h = size - 1; + while (l <= h) + { + int m = (l + h) >> 1, file_index = pIndices[m], comp = mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len); + if (!comp) + return file_index; + else if (comp < 0) + l = m + 1; + else + h = m - 1; + } + return -1; +} + +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags) +{ + mz_uint file_index; size_t name_len, comment_len; + if ((!pZip) || (!pZip->m_pState) || (!pName) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return -1; + if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_p)) + return mz_zip_reader_locate_file_binary_search(pZip, pName); + name_len = strlen(pName); if (name_len > 0xFFFF) return -1; + comment_len = pComment ? strlen(pComment) : 0; if (comment_len > 0xFFFF) return -1; + for (file_index = 0; file_index < pZip->m_total_files; file_index++) + { + const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); + mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); + const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + if (filename_len < name_len) + continue; + if (comment_len) + { + mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); + const char *pFile_comment = pFilename + filename_len + file_extra_len; + if ((file_comment_len != comment_len) || (!mz_zip_reader_string_equal(pComment, pFile_comment, file_comment_len, flags))) + continue; + } + if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) + { + int ofs = filename_len - 1; + do + { + if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':')) + break; + } while (--ofs >= 0); + ofs++; + pFilename += ofs; filename_len -= ofs; + } + if ((filename_len == name_len) && (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) + return file_index; + } + return -1; +} + +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) +{ + int status = TINFL_STATUS_DONE; + mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; + mz_zip_archive_file_stat file_stat; + void *pRead_buf; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + tinfl_decompressor inflator; + + if ((buf_size) && (!pBuf)) + return MZ_FALSE; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + if (!file_stat.m_comp_size) + return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) + return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Ensure supplied output buffer is large enough. + needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size; + if (buf_size < needed_size) + return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) + { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size) + return MZ_FALSE; + return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); + } + + // Decompress the file either directly from memory or from a file input buffer. + tinfl_init(&inflator); + + if (pZip->m_pState->m_pMem) + { + // Read directly from the archive in memory. + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } + else if (pUser_read_buf) + { + // Use a user provided read buffer. + if (!user_read_buf_size) + return MZ_FALSE; + pRead_buf = (mz_uint8 *)pUser_read_buf; + read_buf_size = user_read_buf_size; + read_buf_avail = 0; + comp_remaining = file_stat.m_uncomp_size; + } + else + { + // Temporarily allocate a read buffer. + read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) +#endif + return MZ_FALSE; + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + do + { + size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + out_buf_ofs += out_buf_size; + } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); + + if (status == TINFL_STATUS_DONE) + { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) +{ + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size); +} + +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags) +{ + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0); +} + +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags) +{ + return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0); +} + +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags) +{ + mz_uint64 comp_size, uncomp_size, alloc_size; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + void *pBuf; + + if (pSize) + *pSize = 0; + if (!p) + return NULL; + + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + + alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#endif + return NULL; + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) + return NULL; + + if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return NULL; + } + + if (pSize) *pSize = (size_t)alloc_size; + return pBuf; +} + +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags) +{ + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + { + if (pSize) *pSize = 0; + return MZ_FALSE; + } + return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); +} + +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) +{ + int status = TINFL_STATUS_DONE; mz_uint file_crc32 = MZ_CRC32_INIT; + mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs; + mz_zip_archive_file_stat file_stat; + void *pRead_buf = NULL; void *pWrite_buf = NULL; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + if (!file_stat.m_comp_size) + return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) + return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + // Decompress the file either directly from memory or from a file input buffer. + if (pZip->m_pState->m_pMem) + { + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } + else + { + read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) + { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pState->m_pMem) + { +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF)) +#endif + return MZ_FALSE; + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) + status = TINFL_STATUS_FAILED; + else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size); + cur_file_ofs += file_stat.m_comp_size; + out_buf_ofs += file_stat.m_comp_size; + comp_remaining = 0; + } + else + { + while (comp_remaining) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + + if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); + + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + out_buf_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + } + } + } + else + { + tinfl_decompressor inflator; + tinfl_init(&inflator); + + if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) + status = TINFL_STATUS_FAILED; + else + { + do + { + mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + + if (out_buf_size) + { + if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size) + { + status = TINFL_STATUS_FAILED; + break; + } + file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); + if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) + { + status = TINFL_STATUS_FAILED; + break; + } + } + } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT)); + } + } + + if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) + { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || (file_crc32 != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if (!pZip->m_pState->m_pMem) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + if (pWrite_buf) + pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) +{ + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags); +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n) +{ + (void)ofs; return MZ_FWRITE(pBuf, 1, n, (MZ_FILE*)pOpaque); +} + +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags) +{ + mz_bool status; + mz_zip_archive_file_stat file_stat; + MZ_FILE *pFile; + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + pFile = MZ_FOPEN(pDst_filename, "wb"); + if (!pFile) + return MZ_FALSE; + status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); + if (MZ_FCLOSE(pFile) == EOF) + return MZ_FALSE; +#ifndef MINIZ_NO_TIME + if (status) + mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); +#endif + return status; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_reader_end(mz_zip_archive *pZip) +{ + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + + if (pZip->m_pState) + { + mz_zip_internal_state *pState = pZip->m_pState; pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) + { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + } + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags) +{ + int file_index = mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); +} +#endif + +// ------------------- .ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); } +static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); p[2] = (mz_uint8)(v >> 16); p[3] = (mz_uint8)(v >> 24); } +#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) +#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) + +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) +{ + if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (pZip->m_file_offset_alignment) + { + // Ensure user specified file offset alignment is a power of 2. + if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) + return MZ_FALSE; + } + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_archive_size = existing_size; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); + return MZ_TRUE; +} + +static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); +#ifdef _MSC_VER + if ((!n) || ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#else + if ((!n) || ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#endif + return 0; + if (new_size > pState->m_mem_capacity) + { + void *pNew_block; + size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); while (new_capacity < new_size) new_capacity *= 2; + if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) + return 0; + pState->m_pMem = pNew_block; pState->m_mem_capacity = new_capacity; + } + memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); + pState->m_mem_size = (size_t)new_size; + return n; +} + +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size) +{ + pZip->m_pWrite = mz_zip_heap_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) + return MZ_FALSE; + if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning))) + { + if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size))) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_mem_capacity = initial_allocation_size; + } + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning) +{ + MZ_FILE *pFile; + pZip->m_pWrite = mz_zip_file_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) + return MZ_FALSE; + if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_pFile = pFile; + if (size_to_reserve_at_beginning) + { + mz_uint64 cur_ofs = 0; char buf[4096]; MZ_CLEAR_OBJ(buf); + do + { + size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + cur_ofs += n; size_to_reserve_at_beginning -= n; + } while (size_to_reserve_at_beginning); + } + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename) +{ + mz_zip_internal_state *pState; + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + // No sense in trying to write to an archive that's already at the support max size + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if (pState->m_pFile) + { +#ifdef MINIZ_NO_STDIO + pFilename; return MZ_FALSE; +#else + // Archive is being read from stdio - try to reopen as writable. + if (pZip->m_pIO_opaque != pZip) + return MZ_FALSE; + if (!pFilename) + return MZ_FALSE; + pZip->m_pWrite = mz_zip_file_write_func; + if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) + { + // The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it. + mz_zip_reader_end(pZip); + return MZ_FALSE; + } +#endif // #ifdef MINIZ_NO_STDIO + } + else if (pState->m_pMem) + { + // Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback. + if (pZip->m_pIO_opaque != pZip) + return MZ_FALSE; + pState->m_mem_capacity = pState->m_mem_size; + pZip->m_pWrite = mz_zip_heap_write_func; + } + // Archive is being read via a user provided read function - make sure the user has specified a write function too. + else if (!pZip->m_pWrite) + return MZ_FALSE; + + // Start writing new files at the archive's current central directory location. + pZip->m_archive_size = pZip->m_central_directory_file_ofs; + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_central_directory_file_ofs = 0; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags) +{ + return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0); +} + +typedef struct +{ + mz_zip_archive *m_pZip; + mz_uint64 m_cur_archive_file_ofs; + mz_uint64 m_comp_size; +} mz_zip_writer_add_state; + +static mz_bool mz_zip_writer_add_put_buf_callback(const void* pBuf, int len, void *pUser) +{ + mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; + if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len) + return MZ_FALSE; + pState->m_cur_archive_file_ofs += len; + pState->m_comp_size += len; + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date) +{ + (void)pZip; + memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) +{ + (void)pZip; + memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) +{ + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; + size_t orig_central_dir_size = pState->m_central_dir.m_size; + mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + + // No zip64 support yet + if ((local_header_ofs > 0xFFFFFFFF) || (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + comment_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, extra_size, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes)) + return MZ_FALSE; + + if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, ¢ral_dir_ofs, 1))) + { + // Try to push the central directory array back into its original state. + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) +{ + // Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes. + if (*pArchive_name == '/') + return MZ_FALSE; + while (*pArchive_name) + { + if ((*pArchive_name == '\\') || (*pArchive_name == ':')) + return MZ_FALSE; + pArchive_name++; + } + return MZ_TRUE; +} + +static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip) +{ + mz_uint32 n; + if (!pZip->m_file_offset_alignment) + return 0; + n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); + return (pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1); +} + +static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n) +{ + char buf[4096]; + memset(buf, 0, MZ_MIN(sizeof(buf), n)); + while (n) + { + mz_uint32 s = MZ_MIN(sizeof(buf), n); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) + return MZ_FALSE; + cur_file_ofs += s; n -= s; + } + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32) +{ + mz_uint16 method = 0, dos_time = 0, dos_date = 0; + mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + tdefl_compressor *pComp = NULL; + mz_bool store_data_uncompressed; + mz_zip_internal_state *pState; + + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) + return MZ_FALSE; + // No zip64 support yet + if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + +#ifndef MINIZ_NO_TIME + { + time_t cur_time; time(&cur_time); + mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); + } +#endif // #ifndef MINIZ_NO_TIME + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) + return MZ_FALSE; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) + { + // Set DOS Subdirectory attribute bit. + ext_attributes |= 0x10; + // Subdirectories cannot contain data. + if ((buf_size) || (uncomp_size)) + return MZ_FALSE; + } + + // Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.) + if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) + return MZ_FALSE; + + if ((!store_data_uncompressed) && (buf_size)) + { + if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) + return MZ_FALSE; + } + + if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header))) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + { + uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8*)pBuf, buf_size); + uncomp_size = buf_size; + if (uncomp_size <= 3) + { + level = 0; + store_data_uncompressed = MZ_TRUE; + } + } + + if (store_data_uncompressed) + { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + cur_archive_file_ofs += buf_size; + comp_size = buf_size; + + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) + method = MZ_DEFLATED; + } + else if (buf_size) + { + mz_zip_writer_add_state state; + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) || + (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pComp = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) +{ + mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; + mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + MZ_FILE *pSrc_file = NULL; + + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) + return MZ_FALSE; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) + return MZ_FALSE; + + pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); + if (!pSrc_file) + return MZ_FALSE; + MZ_FSEEK64(pSrc_file, 0, SEEK_END); + uncomp_size = MZ_FTELL64(pSrc_file); + MZ_FSEEK64(pSrc_file, 0, SEEK_SET); + + if (uncomp_size > 0xFFFFFFFF) + { + // No zip64 support yet + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + if (uncomp_size <= 3) + level = 0; + + if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header))) + return MZ_FALSE; + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (uncomp_size) + { + mz_uint64 uncomp_remaining = uncomp_size; + void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); + if (!pRead_buf) + { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + if (!level) + { + while (uncomp_remaining) + { + mz_uint n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); + if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); + uncomp_remaining -= n; + cur_archive_file_ofs += n; + } + comp_size = uncomp_size; + } + else + { + mz_bool result = MZ_FALSE; + mz_zip_writer_add_state state; + tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + for ( ; ; ) + { + size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, MZ_ZIP_MAX_IO_BUF_SIZE); + tdefl_status status; + + if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) + break; + + uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); + uncomp_remaining -= in_buf_size; + + status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); + if (status == TDEFL_STATUS_DONE) + { + result = MZ_TRUE; + break; + } + else if (status != TDEFL_STATUS_OKAY) + break; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + + if (!result) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + } + + MZ_FCLOSE(pSrc_file); pSrc_file = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index) +{ + mz_uint n, bit_flags, num_alignment_padding_bytes; + mz_uint64 comp_bytes_remaining, local_dir_header_ofs; + mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + size_t orig_central_dir_size; + mz_zip_internal_state *pState; + void *pBuf; const mz_uint8 *pSrc_central_header; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + if (NULL == (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) + return MZ_FALSE; + pState = pZip->m_pState; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + cur_src_file_ofs = MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + cur_dst_file_ofs = pZip->m_archive_size; + + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes)) + return MZ_FALSE; + cur_dst_file_ofs += num_alignment_padding_bytes; + local_dir_header_ofs = cur_dst_file_ofs; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + comp_bytes_remaining = n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(sizeof(mz_uint32) * 4, MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining))))) + return MZ_FALSE; + + while (comp_bytes_remaining) + { + n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_src_file_ofs += n; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_dst_file_ofs += n; + + comp_bytes_remaining -= n; + } + + bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); + if (bit_flags & 8) + { + // Copy data descriptor + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + cur_src_file_ofs += n; + cur_dst_file_ofs += n; + } + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + + // no zip64 support yet + if (cur_dst_file_ofs > 0xFFFFFFFF) + return MZ_FALSE; + + orig_central_dir_size = pState->m_central_dir.m_size; + + memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs); + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) + return MZ_FALSE; + + n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) + { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + if (pState->m_central_dir.m_size > 0xFFFFFFFF) + return MZ_FALSE; + n = (mz_uint32)pState->m_central_dir.m_size; + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) + { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + pZip->m_total_files++; + pZip->m_archive_size = cur_dst_file_ofs; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) +{ + mz_zip_internal_state *pState; + mz_uint64 central_dir_ofs, central_dir_size; + mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + + pState = pZip->m_pState; + + // no zip64 support yet + if ((pZip->m_total_files > 0xFFFF) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + central_dir_ofs = 0; + central_dir_size = 0; + if (pZip->m_total_files) + { + // Write central directory + central_dir_ofs = pZip->m_archive_size; + central_dir_size = pState->m_central_dir.m_size; + pZip->m_central_directory_file_ofs = central_dir_ofs; + if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size) + return MZ_FALSE; + pZip->m_archive_size += central_dir_size; + } + + // Write end of central directory record + MZ_CLEAR_OBJ(hdr); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, sizeof(hdr)) != sizeof(hdr)) + return MZ_FALSE; +#ifndef MINIZ_NO_STDIO + if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) + return MZ_FALSE; +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_archive_size += sizeof(hdr); + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize) +{ + if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) + return MZ_FALSE; + if (pZip->m_pWrite != mz_zip_heap_write_func) + return MZ_FALSE; + if (!mz_zip_writer_finalize_archive(pZip)) + return MZ_FALSE; + + *pBuf = pZip->m_pState->m_pMem; + *pSize = pZip->m_pState->m_mem_size; + pZip->m_pState->m_pMem = NULL; + pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_end(mz_zip_archive *pZip) +{ + mz_zip_internal_state *pState; + mz_bool status = MZ_TRUE; + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) + return MZ_FALSE; + + pState = pZip->m_pState; + pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) + { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); + pState->m_pMem = NULL; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + return status; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) +{ + mz_bool status, created_new_archive = MZ_FALSE; + mz_zip_archive zip_archive; + struct MZ_FILE_STAT_STRUCT file_stat; + MZ_CLEAR_OBJ(zip_archive); + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) + { + // Create a new archive. + if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) + return MZ_FALSE; + created_new_archive = MZ_TRUE; + } + else + { + // Append to an existing archive. + if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return MZ_FALSE; + if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) + { + mz_zip_reader_end(&zip_archive); + return MZ_FALSE; + } + } + status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0); + // Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.) + if (!mz_zip_writer_finalize_archive(&zip_archive)) + status = MZ_FALSE; + if (!mz_zip_writer_end(&zip_archive)) + status = MZ_FALSE; + if ((!status) && (created_new_archive)) + { + // It's a new archive and something went wrong, so just delete it. + int ignoredStatus = MZ_DELETE_FILE(pZip_filename); + (void)ignoredStatus; + } + return status; +} + +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags) +{ + int file_index; + mz_zip_archive zip_archive; + void *p = NULL; + + if (pSize) + *pSize = 0; + + if ((!pZip_filename) || (!pArchive_name)) + return NULL; + + MZ_CLEAR_OBJ(zip_archive); + if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return NULL; + + if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, pComment, flags)) >= 0) + p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); + + mz_zip_reader_end(&zip_archive); + return p; +} + +#endif // #ifndef MINIZ_NO_STDIO + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_FILE_ONLY + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to +*/ diff --git a/crnlib/crn_miniz.h b/crnlib/crn_miniz.h new file mode 100644 index 0000000..6d518c6 --- /dev/null +++ b/crnlib/crn_miniz.h @@ -0,0 +1,893 @@ +/* miniz.c v1.14 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich , last updated May 20, 2012 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). + + * Change History + 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, #include (thanks fermtect). + 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. + Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files. + Eliminated a bunch of warnings when compiling with GCC 32-bit/64. + Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly + "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning). + Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64. + Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test. + Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives. + Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.) + Fix ftell() usage in examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself). + 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple minor level_and_flags issues in the archive API's. + level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce Dawson for the feedback/bug report. + 5/28/11 v1.11 - Added statement from unlicense.org + 5/27/11 v1.10 - Substantial compressor optimizations: + Level 1 is now ~4x faster than before. The L1 compressor's throughput now varies between 70-110MB/sec. on a + Core i7 (actual throughput varies depending on the type of data, and x64 vs. x86). + Improved baseline L2-L9 compression perf. Also, greatly improved compression perf. issues on some file types. + Refactored the compression code for better readability and maintainability. + Added level 10 compression level (L10 has slightly better ratio than level 9, but could have a potentially large + drop in throughput on some files). + 5/15/11 v1.09 - Initial stable release. + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. + Supports raw deflate streams or standard zlib streams with adler-32 checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file information, read files from + existing archives, create new archives, append new files to existing archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central + directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one example) can be used to identify + multiple versions of the same file in an archive. This function uses a simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be appended to. + Note the appending is done in-place and is not an atomic operation, so if something goes wrong + during the operation it's possible the archive could be left without a central directory (although the local + file headers and file data will be fine, so the archive will be recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to + preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an updated central directory to the + original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 +*/ +#pragma once + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +#include + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) +#include +#endif + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or +// get/set file times. +//#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------- zlib-style API Definitions. + +// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! +typedef unsigned long mz_ulong; + +// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +// Compression strategies. +enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; + +// Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + +// Heap allocation callbacks. +// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +#define MZ_VERSION "9.1.14" +#define MZ_VERNUM 0x91E0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 14 +#define MZ_VER_SUBREVISION 0 + +// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). +enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; + +// Return status codes. MZ_PARAM_ERROR is non-standard. +enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; + +// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. +enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; + +// Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +// Compression/decompression stream struct. +typedef struct mz_stream_s +{ + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used +} mz_stream; + +typedef mz_stream *mz_streamp; + +// Returns the version string of miniz.c. +const char *mz_version(void); + +// mz_deflateInit() initializes a compressor with default options: +// Parameters: +// pStream must point to an initialized mz_stream struct. +// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. +// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. +// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if the input parameters are bogus. +// MZ_MEM_ERROR on out of memory. +int mz_deflateInit(mz_streamp pStream, int level); + +// mz_deflateInit2() is like mz_deflate(), except with more control: +// Additional parameters: +// method must be MZ_DEFLATED +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) +// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). +int mz_deflateReset(mz_streamp pStream); + +// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. +// Return values: +// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). +// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) +int mz_deflate(mz_streamp pStream, int flush); + +// mz_deflateEnd() deinitializes a compressor: +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +int mz_deflateEnd(mz_streamp pStream); + +// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +// Single-call compression functions mz_compress() and mz_compress2(): +// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + +// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). +mz_ulong mz_compressBound(mz_ulong source_len); + +// Initializes a decompressor. +int mz_inflateInit(mz_streamp pStream); + +// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. +// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). +// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. +// Return values: +// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. +// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_DATA_ERROR if the deflate stream is invalid. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again +// with more input data, or with more room in the output buffer (except when using single call decompression, described above). +int mz_inflate(mz_streamp pStream, int flush); + +// Deinitializes a decompressor. +int mz_inflateEnd(mz_streamp pStream); + +// Single-call decompression. +// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +// Returns a string description of the specified error code, or NULL if the error code is invalid. +const char *mz_error(int err); + +// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + typedef unsigned char Byte; + typedef unsigned int uInt; + typedef mz_ulong uLong; + typedef Byte Bytef; + typedef uInt uIntf; + typedef char charf; + typedef int intf; + typedef void *voidpf; + typedef uLong uLongf; + typedef void *voidp; + typedef void *const voidpc; + #define Z_NULL 0 + #define Z_NO_FLUSH MZ_NO_FLUSH + #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH + #define Z_SYNC_FLUSH MZ_SYNC_FLUSH + #define Z_FULL_FLUSH MZ_FULL_FLUSH + #define Z_FINISH MZ_FINISH + #define Z_BLOCK MZ_BLOCK + #define Z_OK MZ_OK + #define Z_STREAM_END MZ_STREAM_END + #define Z_NEED_DICT MZ_NEED_DICT + #define Z_ERRNO MZ_ERRNO + #define Z_STREAM_ERROR MZ_STREAM_ERROR + #define Z_DATA_ERROR MZ_DATA_ERROR + #define Z_MEM_ERROR MZ_MEM_ERROR + #define Z_BUF_ERROR MZ_BUF_ERROR + #define Z_VERSION_ERROR MZ_VERSION_ERROR + #define Z_PARAM_ERROR MZ_PARAM_ERROR + #define Z_NO_COMPRESSION MZ_NO_COMPRESSION + #define Z_BEST_SPEED MZ_BEST_SPEED + #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION + #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION + #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY + #define Z_FILTERED MZ_FILTERED + #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY + #define Z_RLE MZ_RLE + #define Z_FIXED MZ_FIXED + #define Z_DEFLATED MZ_DEFLATED + #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS + #define alloc_func mz_alloc_func + #define free_func mz_free_func + #define internal_state mz_internal_state + #define z_stream mz_stream + #define deflateInit mz_deflateInit + #define deflateInit2 mz_deflateInit2 + #define deflateReset mz_deflateReset + #define deflate mz_deflate + #define deflateEnd mz_deflateEnd + #define deflateBound mz_deflateBound + #define compress mz_compress + #define compress2 mz_compress2 + #define compressBound mz_compressBound + #define inflateInit mz_inflateInit + #define inflateInit2 mz_inflateInit2 + #define inflate mz_inflate + #define inflateEnd mz_inflateEnd + #define uncompress mz_uncompress + #define crc32 mz_crc32 + #define adler32 mz_adler32 + #define MAX_WBITS 15 + #define MAX_MEM_LEVEL 9 + #define zError mz_error + #define ZLIB_VERSION MZ_VERSION + #define ZLIB_VERNUM MZ_VERNUM + #define ZLIB_VER_MAJOR MZ_VER_MAJOR + #define ZLIB_VER_MINOR MZ_VER_MINOR + #define ZLIB_VER_REVISION MZ_VER_REVISION + #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION + #define zlibVersion mz_version + #define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Types and macros + +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef long long mz_int64; +typedef unsigned long long mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +// Works around MSVC's spammy "warning C4127: conditional expression is constant" message. +#ifdef _MSC_VER + #define MZ_MACRO_END while (0, 0) +#else + #define MZ_MACRO_END while (0) +#endif + +// ------------------- ZIP archive reading/writing + +#ifndef MINIZ_NO_ARCHIVE_APIS + +enum +{ + MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024, + MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, + MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 +}; + +typedef struct +{ + mz_uint32 m_file_index; + mz_uint32 m_central_dir_ofs; + mz_uint16 m_version_made_by; + mz_uint16 m_version_needed; + mz_uint16 m_bit_flag; + mz_uint16 m_method; +#ifndef MINIZ_NO_TIME + time_t m_time; +#endif + mz_uint32 m_crc32; + mz_uint64 m_comp_size; + mz_uint64 m_uncomp_size; + mz_uint16 m_internal_attr; + mz_uint32 m_external_attr; + mz_uint64 m_local_header_ofs; + mz_uint32 m_comment_size; + char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; + char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; +} mz_zip_archive_file_stat; + +typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); +typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); + +struct mz_zip_internal_state_tag; +typedef struct mz_zip_internal_state_tag mz_zip_internal_state; + +typedef enum +{ + MZ_ZIP_MODE_INVALID = 0, + MZ_ZIP_MODE_READING = 1, + MZ_ZIP_MODE_WRITING = 2, + MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 +} mz_zip_mode; + +typedef struct +{ + mz_uint64 m_archive_size; + mz_uint64 m_central_directory_file_ofs; + mz_uint m_total_files; + mz_zip_mode m_zip_mode; + + mz_uint m_file_offset_alignment; + + mz_alloc_func m_pAlloc; + mz_free_func m_pFree; + mz_realloc_func m_pRealloc; + void *m_pAlloc_opaque; + + mz_file_read_func m_pRead; + mz_file_write_func m_pWrite; + void *m_pIO_opaque; + + mz_zip_internal_state *m_pState; + +} mz_zip_archive; + +typedef enum +{ + MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, + MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, + MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 +} mz_zip_flags; + +// ZIP archive reading + +// Inits a ZIP archive reader. +// These functions read and validate the archive's central directory. +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags); +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); +#endif + +// Returns the total number of files in the archive. +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); + +// Returns detailed information about an archive file entry. +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); + +// Determines if an archive file entry is a directory entry. +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); + +// Retrieves the filename of an archive file entry. +// Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); + +// Attempts to locates a file in the archive's central directory. +// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH +// Returns -1 if the file cannot be found. +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + +// Extracts a archive file to a memory buffer using no memory allocation. +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); + +// Extracts a archive file to a memory buffer. +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags); + +// Extracts a archive file to a dynamically allocated heap buffer. +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags); +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags); + +// Extracts a archive file using a callback function to output the file's data. +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); + +#ifndef MINIZ_NO_STDIO +// Extracts a archive file to a disk file and sets its last accessed and modified times. +// This function only extracts files, not archive directory records. +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); +#endif + +// Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. +mz_bool mz_zip_reader_end(mz_zip_archive *pZip); + +// ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +// Inits a ZIP archive writer. +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); +#endif + +// Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. +// For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. +// For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). +// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. +// Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before +// the archive is finalized the file's central directory will be hosed. +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); + +// Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. +// To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); + +#ifndef MINIZ_NO_STDIO +// Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); +#endif + +// Adds a file to an archive by fully cloning the data from another archive. +// This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields. +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index); + +// Finalizes the archive by writing the central directory records followed by the end of central directory record. +// After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). +// An archive must be manually finalized by calling this function for it to be valid. +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize); + +// Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. +// Note for the archive to be valid, it must have been finalized before ending. +mz_bool mz_zip_writer_end(mz_zip_archive *pZip); + +// Misc. high-level helper functions: + +// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + +// Reads a single file from an archive into a heap block. +// If pComment is not NULL, only the file with the specified comment will be extracted. +// Returns NULL on failure. +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags); + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +// ------------------- Low-level Decompression API Definitions + +// Decompression flags used by tinfl_decompress(). +// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. +// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. +// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). +// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. +enum +{ + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +// High level decompression functions: +// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. +// On return: +// Function returns a pointer to the decompressed data, or NULL on failure. +// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must call mz_free() on the returned block when it's no longer needed. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. +// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. +// Returns 1 on success or 0 on failure. +typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; + +// Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + +// Return status. +typedef enum +{ + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +// Initializes the decompressor to its initial state. +#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. +// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + +// Internal/private bits follow. +enum +{ + TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS + #define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF + typedef mz_uint64 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (64) +#else + typedef mz_uint32 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag +{ + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +// ------------------- Low-level Compression API Definitions + +// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + +// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): +// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). +enum +{ + TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. +// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). +// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. +// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). +// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) +// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. +// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. +// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. +enum +{ + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +// High level compression functions: +// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of source block to compress. +// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must free() the returned block when it's no longer needed. +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. +// Returns 0 on failure. +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// Compresses an image to a compressed PNG file in memory. +// On entry: +// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. +// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pLen_out will be set to the size of the PNG image file. +// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + +// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); + +// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; + +// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY +enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#else +enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#endif + +// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. +typedef enum +{ + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1, +} tdefl_status; + +// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums +typedef enum +{ + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +// tdefl's compression state structure. +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +// Initializes the compressor. +// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. +// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. +// If pBut_buf_func is NULL the user should always call the tdefl_compress() API. +// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + +// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. +// tdefl_compress_buffer() always consumes the entire input buffer. +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS +// Create tdefl_compress() flags given zlib-style compression parameters. +// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) +// window_bits may be -15 (raw deflate) or 15 (zlib) +// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_INCLUDED \ No newline at end of file diff --git a/crnlib/crn_dds_texture.cpp b/crnlib/crn_mipmapped_texture.cpp similarity index 62% rename from crnlib/crn_dds_texture.cpp rename to crnlib/crn_mipmapped_texture.cpp index f8e43c2..99bfe3b 100644 --- a/crnlib/crn_dds_texture.cpp +++ b/crnlib/crn_mipmapped_texture.cpp @@ -1,11 +1,12 @@ -// File: crn_dds_texture.cpp +// File: crn_dds_texture.cpp - Actually supports both .DDS and .KTX. Probably will rename this eventually. // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" -#include "crn_dds_texture.h" +#include "crn_mipmapped_texture.h" #include "crn_cfile_stream.h" #include "crn_image_utils.h" #include "crn_console.h" #include "crn_texture_comp.h" +#include "crn_ktx_texture.h" #define CRND_HEADER_FILE_ONLY #include "../inc/crn_decomp.h" @@ -20,7 +21,8 @@ namespace crnlib m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_format(PIXEL_FMT_INVALID), m_pImage(NULL), - m_pDXTImage(NULL) + m_pDXTImage(NULL), + m_orient_flags(cDefaultOrientationFlags) { } @@ -30,7 +32,8 @@ namespace crnlib m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_format(PIXEL_FMT_INVALID), m_pImage(NULL), - m_pDXTImage(NULL) + m_pDXTImage(NULL), + m_orient_flags(cDefaultOrientationFlags) { *this = other; } @@ -43,6 +46,7 @@ namespace crnlib m_height = rhs.m_height; m_comp_flags = rhs.m_comp_flags; m_format = rhs.m_format; + m_orient_flags = rhs.m_orient_flags; if (rhs.m_pImage) m_pImage = crnlib_new(*rhs.m_pImage); @@ -65,6 +69,7 @@ namespace crnlib m_height = 0; m_comp_flags = pixel_format_helpers::cDefaultCompFlags; m_format = PIXEL_FMT_INVALID; + m_orient_flags = cDefaultOrientationFlags; if (m_pImage) { @@ -79,7 +84,7 @@ namespace crnlib } } - void mip_level::assign(image_u8* p, pixel_format fmt) + void mip_level::assign(image_u8* p, pixel_format fmt, orientation_flags_t orient_flags) { CRNLIB_ASSERT(p); @@ -89,6 +94,7 @@ namespace crnlib m_width = p->get_width(); m_height = p->get_height(); + m_orient_flags = orient_flags; if (fmt != PIXEL_FMT_INVALID) m_format = fmt; @@ -103,7 +109,7 @@ namespace crnlib m_comp_flags = p->get_comp_flags(); //pixel_format_helpers::get_component_flags(m_format); } - void mip_level::assign(dxt_image* p, pixel_format fmt) + void mip_level::assign(dxt_image* p, pixel_format fmt, orientation_flags_t orient_flags) { CRNLIB_ASSERT(p); @@ -113,6 +119,7 @@ namespace crnlib m_width = p->get_width(); m_height = p->get_height(); + m_orient_flags = orient_flags; if (fmt != PIXEL_FMT_INVALID) m_format = fmt; @@ -122,7 +129,7 @@ namespace crnlib m_comp_flags = pixel_format_helpers::get_component_flags(m_format); } - bool mip_level::pack_to_dxt(const image_u8& img, pixel_format fmt, bool cook, const dxt_image::pack_params& orig_params) + bool mip_level::pack_to_dxt(const image_u8& img, pixel_format fmt, bool cook, const dxt_image::pack_params& orig_params, orientation_flags_t orient_flags) { CRNLIB_ASSERT(pixel_format_helpers::is_dxt(fmt)); if (!pixel_format_helpers::is_dxt(fmt)) @@ -156,7 +163,7 @@ namespace crnlib return false; } - assign(pDXT_image, fmt); + assign(pDXT_image, fmt, orient_flags); return true; } @@ -168,9 +175,9 @@ namespace crnlib return false; image_u8 tmp_img; - image_u8* pImage = get_unpacked_image(tmp_img, true); + image_u8* pImage = get_unpacked_image(tmp_img, cUnpackFlagUncook); - return pack_to_dxt(*pImage, fmt, cook, p); + return pack_to_dxt(*pImage, fmt, cook, p, m_orient_flags); } bool mip_level::unpack_from_dxt(bool uncook) @@ -179,12 +186,98 @@ namespace crnlib return false; image_u8* pNew_img = crnlib_new(); - image_u8* pImg = get_unpacked_image(*pNew_img, uncook); + image_u8* pImg = get_unpacked_image(*pNew_img, uncook ? cUnpackFlagUncook : 0); pImg; CRNLIB_ASSERT(pImg == pNew_img); - assign(pNew_img); + assign(pNew_img, PIXEL_FMT_INVALID, m_orient_flags); + return true; + } + + bool mip_level::is_flipped() const + { + return ((m_orient_flags & (cOrientationFlagXFlipped | cOrientationFlagYFlipped)) != 0); + } + + bool mip_level::is_x_flipped() const + { + return ((m_orient_flags & cOrientationFlagXFlipped) != 0); + } + + bool mip_level::is_y_flipped() const + { + return ((m_orient_flags & cOrientationFlagYFlipped) != 0); + } + + bool mip_level::can_unflip_without_unpacking() const + { + if (!is_valid()) + return false; + + if (!is_packed()) + return true; + + bool can_unflip = true; + if (m_orient_flags & cOrientationFlagXFlipped) + { + if (!m_pDXTImage->can_flip(0)) + can_unflip = false; + } + if (m_orient_flags & cOrientationFlagYFlipped) + { + if (!m_pDXTImage->can_flip(1)) + can_unflip = false; + } + + return can_unflip; + } + + bool mip_level::unflip(bool allow_unpacking_to_flip, bool uncook_if_necessary_to_unpack) + { + if (!is_valid()) + return false; + + if (!is_flipped()) + return false; + + if (is_packed()) + { + if (can_unflip_without_unpacking()) + { + if (m_orient_flags & cOrientationFlagXFlipped) + { + m_pDXTImage->flip_x(); + m_orient_flags = static_cast(m_orient_flags & ~cOrientationFlagXFlipped); + } + + if (m_orient_flags & cOrientationFlagYFlipped) + { + m_pDXTImage->flip_y(); + m_orient_flags = static_cast(m_orient_flags & ~cOrientationFlagYFlipped); + } + + return true; + } + + if (!allow_unpacking_to_flip) + return false; + } + + unpack_from_dxt(uncook_if_necessary_to_unpack); + + if (m_orient_flags & cOrientationFlagXFlipped) + { + m_pImage->flip_x(); + m_orient_flags = static_cast(m_orient_flags & ~cOrientationFlagXFlipped); + } + + if (m_orient_flags & cOrientationFlagYFlipped) + { + m_pImage->flip_y(); + m_orient_flags = static_cast(m_orient_flags & ~cOrientationFlagYFlipped); + } + return true; } @@ -228,11 +321,11 @@ namespace crnlib return pack_to_dxt(fmt, cook, p); image_u8 tmp_img; - image_u8* pImg = get_unpacked_image(tmp_img, true); + image_u8* pImg = get_unpacked_image(tmp_img, cUnpackFlagUncook); image_u8* pImage = crnlib_new(); pImage->set_comp_flags(pixel_format_helpers::get_component_flags(fmt)); - + if (!pImage->resize(pImg->get_width(), pImg->get_height())) return false; @@ -264,7 +357,7 @@ namespace crnlib } } - assign(pImage, fmt); + assign(pImage, fmt, m_orient_flags); return true; } @@ -285,10 +378,10 @@ namespace crnlib image_utils::convert_image(img, conv_type); } - image_u8* mip_level::get_unpacked_image(image_u8& tmp, bool uncook) const + image_u8* mip_level::get_unpacked_image(image_u8& tmp, uint unpack_flags) const { - if (m_pImage) - return m_pImage; + if (!is_valid()) + return NULL; if (m_pDXTImage) { @@ -296,18 +389,58 @@ namespace crnlib tmp.set_comp_flags(m_comp_flags); - if (uncook) + if (unpack_flags & cUnpackFlagUncook) uncook_image(tmp); + } + else if ((unpack_flags & cUnpackFlagUnflip) && (m_orient_flags & (cOrientationFlagXFlipped | cOrientationFlagYFlipped))) + tmp = *m_pImage; + else + return m_pImage; - return &tmp; + if (unpack_flags & cUnpackFlagUnflip) + { + if (m_orient_flags & cOrientationFlagXFlipped) tmp.flip_x(); + if (m_orient_flags & cOrientationFlagYFlipped) tmp.flip_y(); } - return NULL; + return &tmp; + } + + bool mip_level::flip_x() + { + if (!is_valid()) + return false; + + if (m_pDXTImage) + return m_pDXTImage->flip_x(); + else if (m_pImage) + { + m_pImage->flip_x(); + return true; + } + + return false; + } + + bool mip_level::flip_y() + { + if (!is_valid()) + return false; + + if (m_pDXTImage) + return m_pDXTImage->flip_y(); + else if (m_pImage) + { + m_pImage->flip_y(); + return true; + } + + return false; } // ------------------------------------------------------------------------- - dds_texture::dds_texture() : + mipmapped_texture::mipmapped_texture() : m_width(0), m_height(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), @@ -316,12 +449,12 @@ namespace crnlib { } - dds_texture::~dds_texture() + mipmapped_texture::~mipmapped_texture() { free_all_mips(); } - void dds_texture::clear() + void mipmapped_texture::clear() { free_all_mips(); @@ -334,7 +467,7 @@ namespace crnlib m_last_error.clear(); } - void dds_texture::free_all_mips() + void mipmapped_texture::free_all_mips() { for (uint i = 0; i < m_faces.size(); i++) for (uint j = 0; j < m_faces[i].size(); j++) @@ -343,7 +476,7 @@ namespace crnlib m_faces.clear(); } - dds_texture::dds_texture(const dds_texture& other) : + mipmapped_texture::mipmapped_texture(const mipmapped_texture& other) : m_width(0), m_height(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), @@ -352,7 +485,7 @@ namespace crnlib *this = other; } - dds_texture& dds_texture::operator= (const dds_texture& rhs) + mipmapped_texture& mipmapped_texture::operator= (const mipmapped_texture& rhs) { if (this == &rhs) return *this; @@ -379,28 +512,8 @@ namespace crnlib return *this; } - - bool dds_texture::write_dds(const char* pFilename) const - { - cfile_stream out_stream(pFilename, cDataStreamWritable | cDataStreamSeekable); - if (!out_stream.is_opened()) - return false; - - data_stream_serializer out_serializer(out_stream); - return write_dds(out_serializer); - } - - bool dds_texture::read_dds(const char* pFilename) - { - cfile_stream in_stream(pFilename); - if (!in_stream.is_opened()) - return false; - - data_stream_serializer in_serializer(in_stream); - return read_dds(in_serializer); - } - - bool dds_texture::read_dds(data_stream_serializer& serializer) + + bool mipmapped_texture::read_dds(data_stream_serializer& serializer) { if (!read_dds_internal(serializer)) { @@ -411,7 +524,7 @@ namespace crnlib return true; } - bool dds_texture::read_dds_internal(data_stream_serializer& serializer) + bool mipmapped_texture::read_dds_internal(data_stream_serializer& serializer) { CRNLIB_ASSERT(serializer.get_little_endian()); @@ -553,6 +666,12 @@ namespace crnlib dxt_fmt = cDXT5A; break; } + case PIXEL_FMT_ETC1: + { + m_format = PIXEL_FMT_ETC1; + dxt_fmt = cETC1; + break; + } default: { dynamic_string err_msg(cVarArg, "Unsupported DDS FOURCC format: 0x%08X", desc.ddpfPixelFormat.dwFourCC); @@ -606,7 +725,6 @@ namespace crnlib uint bits_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount; if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) - //bits_per_pixel = ((m_format == PIXEL_FMT_DXT1) || (m_format == PIXEL_FMT_DXT5A)) ? 4 : 8; bits_per_pixel = pixel_format_helpers::get_bpp(m_format); set_last_error("Load failed"); @@ -617,7 +735,12 @@ namespace crnlib else default_pitch = (desc.dwWidth * bits_per_pixel) >> 3; - uint pitch = desc.lPitch; + uint pitch = 0; + if ( ( desc.dwFlags & DDSD_PITCH ) && ( !( desc.dwFlags & DDSD_LINEARSIZE ) ) ) + { + pitch = desc.lPitch; + } + if (!pitch) pitch = default_pitch; #if 0 @@ -675,7 +798,7 @@ namespace crnlib if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) { - const uint bytes_per_block = ((m_format == PIXEL_FMT_DXT1) || (m_format == PIXEL_FMT_DXT1A) || (m_format == PIXEL_FMT_DXT5A)) ? 8 : 16; + const uint bytes_per_block = pixel_format_helpers::get_dxt_bytes_per_block(m_format); const uint num_blocks_x = (width + 3) >> 2; const uint num_blocks_y = (height + 3) >> 2; @@ -691,7 +814,7 @@ namespace crnlib CRNLIB_ASSERT(0); return false; } - + CRNLIB_ASSERT(pDXTImage->get_element_vec().size() * sizeof(dxt_image::element) == actual_level_pitch); if (!serializer.read(&pDXTImage->get_element_vec()[0], actual_level_pitch)) @@ -778,47 +901,56 @@ namespace crnlib } pMip->assign(pImage, m_format); - + CRNLIB_ASSERT(pMip->get_comp_flags() == m_comp_flags); } } } - + clear_last_error(); if (dxt1_alpha) - { - m_format = PIXEL_FMT_DXT1A; - - m_comp_flags = pixel_format_helpers::get_component_flags(m_format); - - for (uint f = 0; f < m_faces.size(); f++) - { - for (uint l = 0; l < m_faces[f].size(); l++) - { - if (m_faces[f][l]->get_dxt_image()) - { - m_faces[f][l]->set_format(m_format); - m_faces[f][l]->set_comp_flags(m_comp_flags); - - m_faces[f][l]->get_dxt_image()->change_dxt1_to_dxt1a(); - } - } - } - } - - CRNLIB_ASSERT(check()); - + change_dxt1_to_dxt1a(); + return true; } - bool dds_texture::check() const + void mipmapped_texture::change_dxt1_to_dxt1a() + { + if (m_format != PIXEL_FMT_DXT1) + return; + + m_format = PIXEL_FMT_DXT1A; + + m_comp_flags = pixel_format_helpers::get_component_flags(m_format); + + for (uint f = 0; f < m_faces.size(); f++) + { + for (uint l = 0; l < m_faces[f].size(); l++) + { + if (m_faces[f][l]->get_dxt_image()) + { + m_faces[f][l]->set_format(m_format); + m_faces[f][l]->set_comp_flags(m_comp_flags); + + m_faces[f][l]->get_dxt_image()->change_dxt1_to_dxt1a(); + } + } + } + } + + bool mipmapped_texture::check() const { uint levels = 0; + orientation_flags_t orient_flags = cDefaultOrientationFlags; for (uint f = 0; f < m_faces.size(); f++) { if (!f) + { levels = m_faces[f].size(); + if ((levels) && (m_faces[f][0])) + orient_flags = m_faces[f][0]->get_orientation_flags(); + } else if (m_faces[f].size() != levels) return false; @@ -831,6 +963,9 @@ namespace crnlib if (!p->is_valid()) return false; + if (p->get_orientation_flags() != orient_flags) + return false; + if (!l) { if (m_width != p->get_width()) @@ -865,7 +1000,7 @@ namespace crnlib return true; } - bool dds_texture::write_dds(data_stream_serializer& serializer) const + bool mipmapped_texture::write_dds(data_stream_serializer& serializer) const { if (!m_width) { @@ -873,7 +1008,7 @@ namespace crnlib return false; } - set_last_error("Write_dds() failed"); + set_last_error("write_dds() failed"); if (!serializer.write("DDS ", sizeof(uint32))) return false; @@ -883,7 +1018,7 @@ namespace crnlib desc.dwSize = sizeof(desc); desc.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT | DDSD_CAPS; - + desc.dwWidth = m_width; desc.dwHeight = m_height; @@ -913,6 +1048,12 @@ namespace crnlib switch (m_format) { + case PIXEL_FMT_ETC1: + { + desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_ETC1; + desc.ddpfPixelFormat.dwRGBBitCount = 0; + break; + } case PIXEL_FMT_DXN: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_3DC; @@ -1029,6 +1170,12 @@ namespace crnlib crnlib::vector write_buf; + const bool can_unflip_packed_texture = can_unflip_without_unpacking(); + if ((is_packed()) && (is_flipped()) && (!can_unflip_without_unpacking())) + { + console::warning("mipmapped_texture::write_dds: One or more faces/miplevels cannot be unflipped without unpacking. Writing flipped .DDS texture."); + } + for (uint face = 0; face < get_num_faces(); face++) { for (uint level = 0; level < get_num_levels(); level++) @@ -1044,20 +1191,38 @@ namespace crnlib CRNLIB_ASSERT(height == math::maximum(1, m_height >> level)); const dxt_image* p = pLevel->get_dxt_image(); + dxt_image tmp; + if ((can_unflip_packed_texture) && (pLevel->get_orientation_flags() & (cOrientationFlagXFlipped | cOrientationFlagYFlipped))) + { + tmp = *p; + if (pLevel->get_orientation_flags() & cOrientationFlagXFlipped) + { + if (!tmp.flip_x()) + console::warning("mipmapped_texture::write_dds: Unable to unflip compressed texture on X axis"); + } + + if (pLevel->get_orientation_flags() & cOrientationFlagYFlipped) + { + if (!tmp.flip_y()) + console::warning("mipmapped_texture::write_dds: Unable to unflip compressed texture on Y axis"); + } + p = &tmp; + } const uint num_blocks_x = (width + 3) >> 2; const uint num_blocks_y = (height + 3) >> 2; - CRNLIB_ASSERT(num_blocks_x * num_blocks_y * p->get_elements_per_block() == p->get_num_elements()); + CRNLIB_ASSERT(num_blocks_x * num_blocks_y * p->get_elements_per_block() == p->get_total_elements()); width, height, num_blocks_x, num_blocks_y; - const uint size_in_bytes = p->get_num_elements() * sizeof(dxt_image::element); + const uint size_in_bytes = p->get_total_elements() * sizeof(dxt_image::element); if (size_in_bytes > write_buf.size()) write_buf.resize(size_in_bytes); memcpy(&write_buf[0], p->get_element_ptr(), size_in_bytes); // DXT data is always little endian in memory, just like the DDS format. + // (Except for ETC1, which contains big endian 64-bit QWORD's). //if (!c_crnlib_little_endian_platform) // utils::endian_switch_words(reinterpret_cast(&write_buf[0]), size_in_bytes / sizeof(WORD)); @@ -1070,6 +1235,11 @@ namespace crnlib const uint height = pLevel->get_height(); const image_u8* p = pLevel->get_image(); + image_u8 tmp; + if (pLevel->get_orientation_flags() & (cOrientationFlagXFlipped | cOrientationFlagYFlipped)) + { + p = pLevel->get_unpacked_image(tmp, cUnpackFlagUnflip); + } const uint bits_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount; const uint bytes_per_pixel = bits_per_pixel >> 3; @@ -1151,7 +1321,503 @@ namespace crnlib return true; } - void dds_texture::assign(face_vec& faces) + bool mipmapped_texture::read_ktx(data_stream_serializer& serializer) + { + clear(); + + set_last_error("Unable to read KTX file"); + + ktx_texture kt; + if (!kt.read_from_stream(serializer)) + return false; + + if ((kt.get_depth() > 1) || (kt.get_array_size() > 1)) + { + set_last_error("read_ktx: Depth and array textures are not supported"); + return false; + } + + // Must be 1D, 2D, or a cubemap, with or without mipmaps. + m_width = kt.get_width(); + m_height = kt.get_height(); + + uint num_mip_levels = kt.get_num_mips(); + uint num_faces = kt.get_num_faces(); + + uint32 crnlib_fourcc = 0; + dynamic_string crnlib_fourcc_str; + if (kt.get_key_value_as_string("CRNLIB_FOURCC", crnlib_fourcc_str)) + { + if (crnlib_fourcc_str.get_len() == 4) + { + for (int i = 3; i >= 0; i--) + crnlib_fourcc = (crnlib_fourcc << 8) | crnlib_fourcc_str[i]; + } + } + + const bool is_compressed_texture = kt.is_compressed(); + dxt_format dxt_fmt = cDXTInvalid; + + pixel_packer unpacker; + if (is_compressed_texture) + { + switch (kt.get_ogl_internal_fmt()) + { + case KTX_ETC1_RGB8_OES: + dxt_fmt = cETC1; + break; + case KTX_RGB_S3TC: + case KTX_RGB4_S3TC: + case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: + dxt_fmt = cDXT1; + break; + case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + dxt_fmt = cDXT1A; + break; + case KTX_RGBA_S3TC: + case KTX_RGBA4_S3TC: + case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + dxt_fmt = cDXT3; + break; + case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + case KTX_RGBA_DXT5_S3TC: + case KTX_RGBA4_DXT5_S3TC: + dxt_fmt = cDXT5; + break; + case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: + dxt_fmt = cDXN_YX; + if (crnlib_fourcc == PIXEL_FMT_DXN) + { + dxt_fmt = cDXN_XY; + } + break; + case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: + dxt_fmt = cDXT5A; + break; + default: + set_last_error("Unsupported KTX internal format"); + return false; + } + + m_format = pixel_format_helpers::from_dxt_format(dxt_fmt); + if (m_format == PIXEL_FMT_INVALID) + { + set_last_error("Unsupported KTX internal compressed format"); + return false; + } + + if (crnlib_fourcc != 0) + { + switch (crnlib_fourcc) + { + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + { + if (dxt_fmt == cDXT5) + { + m_format = static_cast(crnlib_fourcc); + } + break; + } + } + } + } + else + { + m_format = PIXEL_FMT_A8R8G8B8; + const uint type_size = get_ogl_type_size(kt.get_ogl_type()); + const uint type_bits = type_size * 8; + + // Normal component order: 1,2,3,4 (*last* component packed into LSB of output type) + // Reversed component order: 4,3,2,1 (*first* component packed into LSB of output type) + + if (is_packed_pixel_ogl_type(kt.get_ogl_type())) + { + switch (kt.get_ogl_type()) + { + // 24bpp packed formats + case KTX_UNSIGNED_BYTE_3_3_2: unpacker.init("B2G3R3"); m_format = PIXEL_FMT_R8G8B8; break; + case KTX_UNSIGNED_BYTE_2_3_3_REV: unpacker.init("R3G3B2"); m_format = PIXEL_FMT_R8G8B8; break; + case KTX_UNSIGNED_SHORT_5_6_5: unpacker.init("B5G6R5"); m_format = PIXEL_FMT_R8G8B8; break; + case KTX_UNSIGNED_SHORT_5_6_5_REV: unpacker.init("R5G6B5"); m_format = PIXEL_FMT_R8G8B8; break; + // 32bpp packed formats + case KTX_UNSIGNED_SHORT_4_4_4_4: unpacker.init("A4B4G4R4"); break; + case KTX_UNSIGNED_SHORT_4_4_4_4_REV: unpacker.init("R4G4B4A4"); break; + case KTX_UNSIGNED_SHORT_5_5_5_1: unpacker.init("A1B5G5R5"); break; + case KTX_UNSIGNED_SHORT_1_5_5_5_REV: unpacker.init("R5G5B5A1"); break; + case KTX_UNSIGNED_INT_8_8_8_8: unpacker.init("A8B8G8R8"); break; + case KTX_UNSIGNED_INT_8_8_8_8_REV: unpacker.init("R8G8B8A8"); break; + case KTX_UNSIGNED_INT_10_10_10_2: unpacker.init("A2B10G10R10"); break; + case KTX_UNSIGNED_INT_2_10_10_10_REV: unpacker.init("R10G10B10A2"); break; + case KTX_UNSIGNED_INT_5_9_9_9_REV: unpacker.init("R9G9B9A5"); break; + default: + set_last_error("Unsupported KTX packed pixel type"); + return false; + } + + unpacker.set_pixel_stride(get_ogl_type_size(kt.get_ogl_type())); + } + else + { + switch (kt.get_ogl_fmt()) + { + case 1: + case KTX_RED: + case KTX_RED_INTEGER: + case KTX_R8: + case KTX_R8UI: + { + unpacker.init("R", -1, type_bits); + m_format = PIXEL_FMT_R8G8B8; + break; + } + case KTX_GREEN: + case KTX_GREEN_INTEGER: + { + unpacker.init("G", -1, type_bits); + m_format = PIXEL_FMT_R8G8B8; + break; + } + case KTX_BLUE: + case KTX_BLUE_INTEGER: + { + unpacker.init("B", -1, type_bits); + m_format = PIXEL_FMT_R8G8B8; + break; + } + case KTX_ALPHA: + { + unpacker.init("A", -1, type_bits); + m_format = PIXEL_FMT_A8; + break; + } + case KTX_LUMINANCE: + { + unpacker.init("Y", -1, type_bits); + m_format = PIXEL_FMT_L8; + break; + } + case 2: + case KTX_RG: + case KTX_RG8: + case KTX_RG_INTEGER: + { + unpacker.init("RG", -1, type_bits); + m_format = PIXEL_FMT_A8L8; + break; + } + case KTX_LUMINANCE_ALPHA: + { + unpacker.init("YA", -1, type_bits); + m_format = PIXEL_FMT_A8L8; + break; + } + case 3: + case KTX_SRGB: + case KTX_RGB: + case KTX_RGB_INTEGER: + case KTX_RGB8: + case KTX_SRGB8: + { + unpacker.init("RGB", -1, type_bits); + m_format = PIXEL_FMT_R8G8B8; + break; + } + case KTX_BGR: + case KTX_BGR_INTEGER: + { + unpacker.init("BGR", -1, type_bits); + m_format = PIXEL_FMT_R8G8B8; + break; + } + case 4: + case KTX_RGBA_INTEGER: + case KTX_RGBA: + case KTX_SRGB_ALPHA: + case KTX_SRGB8_ALPHA8: + case KTX_RGBA8: + { + unpacker.init("RGBA", -1, type_bits); + break; + } + case KTX_BGRA: + case KTX_BGRA_INTEGER: + { + unpacker.init("BGRA", -1, type_bits); + break; + } + default: + set_last_error("Unsupported KTX pixel format"); + return false; + } + + unpacker.set_pixel_stride(unpacker.get_num_comps() * get_ogl_type_size(kt.get_ogl_type())); + } + + CRNLIB_ASSERT(unpacker.is_valid()); + } + + m_comp_flags = pixel_format_helpers::get_component_flags(m_format); + + m_faces.resize(num_faces); + + bool x_flipped = false; + bool y_flipped = true; + + dynamic_string orient; + if ((kt.get_key_value_as_string("KTXorientation", orient)) && (orient.get_len() >= 7)) + { + // 0123456 + // "S=r,T=d" + if ((orient[0] == 'S') && (orient[1] == '=') && (orient[3] == ',') && + (orient[4] == 'T') && (orient[5] == '=')) + { + if (tolower(orient[2]) == 'l') + x_flipped = true; + else if (tolower(orient[2]) == 'r') + x_flipped = false; + + if (tolower(orient[6]) == 'u') + y_flipped = true; + else if (tolower(orient[6]) == 'd') + y_flipped = false; + } + } + + orientation_flags_t orient_flags = cDefaultOrientationFlags; + if (x_flipped) orient_flags = static_cast(orient_flags | cOrientationFlagXFlipped); + if (y_flipped) orient_flags = static_cast(orient_flags | cOrientationFlagYFlipped); + + bool dxt1_alpha = false; + + for (uint face_index = 0; face_index < num_faces; face_index++) + { + m_faces[face_index].resize(num_mip_levels); + + for (uint level_index = 0; level_index < num_mip_levels; level_index++) + { + const uint width = math::maximum(m_width >> level_index, 1U); + const uint height = math::maximum(m_height >> level_index, 1U); + + mip_level* pMip = crnlib_new(); + m_faces[face_index][level_index] = pMip; + + const crnlib::vector& image_data = kt.get_image_data(level_index, 0, face_index, 0); + + if (is_compressed_texture) + { + const uint bytes_per_block = pixel_format_helpers::get_dxt_bytes_per_block(m_format); + + const uint num_blocks_x = (width + 3) >> 2; + const uint num_blocks_y = (height + 3) >> 2; + + const uint level_pitch = num_blocks_x * num_blocks_y * bytes_per_block; + if (image_data.size() != level_pitch) + return false; + + dxt_image* pDXTImage = crnlib_new(); + if (!pDXTImage->init(dxt_fmt, width, height, false)) + { + crnlib_delete(pDXTImage); + + CRNLIB_ASSERT(0); + return false; + } + + CRNLIB_ASSERT(pDXTImage->get_element_vec().size() * sizeof(dxt_image::element) == level_pitch); + + memcpy(&pDXTImage->get_element_vec()[0], image_data.get_ptr(), image_data.size()); + + if ((m_format == PIXEL_FMT_DXT1) && (!dxt1_alpha)) + dxt1_alpha = pDXTImage->has_alpha(); + + pMip->assign(pDXTImage, m_format, orient_flags); + } + else + { + if (image_data.size() != (width * height * unpacker.get_pixel_stride())) + return false; + + image_u8* pImage = crnlib_new(width, height); + + pImage->set_comp_flags(m_comp_flags); + + const uint8* pSrc = image_data.get_ptr(); + + color_quad_u8 q(0, 0, 0, 255); + + for (uint y = 0; y < height; y++) + { + for (uint x = 0; x < width; x++) + { + color_quad_u8 c; + pSrc = static_cast(unpacker.unpack(pSrc, c)); + pImage->set_pixel_unclipped(x, y, c); + } + } + + pMip->assign(pImage, m_format, orient_flags); + + CRNLIB_ASSERT(pMip->get_comp_flags() == m_comp_flags); + } + } + } + + clear_last_error(); + + if (dxt1_alpha) + change_dxt1_to_dxt1a(); + + return true; + } + + bool mipmapped_texture::write_ktx(data_stream_serializer& serializer) const + { + if (!m_width) + { + set_last_error("Nothing to write"); + return false; + } + + set_last_error("write_ktx() failed"); + + uint32 ogl_internal_fmt = 0, ogl_fmt = 0, ogl_type = 0; + + pixel_packer packer; + + if (is_packed()) + { + switch (get_format()) + { + case PIXEL_FMT_DXT1: + { + ogl_internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT; + break; + } + case PIXEL_FMT_DXT1A: + { + ogl_internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT; + break; + } + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + { + ogl_internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT; + break; + } + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + { + ogl_internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT; + break; + } + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXN: + { + ogl_internal_fmt = KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT; + break; + } + case PIXEL_FMT_DXT5A: + { + ogl_internal_fmt = KTX_COMPRESSED_LUMINANCE_LATC1_EXT; + break; + } + case PIXEL_FMT_ETC1: + { + ogl_internal_fmt = KTX_ETC1_RGB8_OES; + break; + } + default: + { + CRNLIB_ASSERT(0); + return false; + } + } + } + else + { + ogl_type = KTX_UNSIGNED_BYTE; + + switch (get_format()) + { + case PIXEL_FMT_R8G8B8: ogl_internal_fmt = KTX_RGB8; ogl_fmt = KTX_RGB; packer.init("R8G8B8"); break; + case PIXEL_FMT_L8: ogl_internal_fmt = KTX_LUMINANCE8; ogl_fmt = KTX_LUMINANCE; packer.init("G8"); break; + case PIXEL_FMT_A8: ogl_internal_fmt = KTX_ALPHA8; ogl_fmt = KTX_ALPHA; packer.init("A8"); break; + case PIXEL_FMT_A8L8: ogl_internal_fmt = KTX_LUMINANCE8_ALPHA8; ogl_fmt = KTX_LUMINANCE_ALPHA; packer.init("Y8A8"); break; + case PIXEL_FMT_A8R8G8B8: ogl_internal_fmt = KTX_RGBA8; ogl_fmt = KTX_RGBA; packer.init("R8G8B8A8"); break; + default: + { + CRNLIB_ASSERT(0); + return false; + } + } + } + + ktx_texture kt; + bool success; + if (determine_texture_type() == cTextureTypeCubemap) + success = kt.init_cubemap(get_width(), get_num_levels(), ogl_internal_fmt, ogl_fmt, ogl_type); + else + success = kt.init_2D(get_width(), get_height(), get_num_levels(), ogl_internal_fmt, ogl_fmt, ogl_type); + if (!success) + return false; + + dynamic_string fourcc_str(cVarArg, "%c%c%c%c", m_format & 0xFF, (m_format >> 8) & 0xFF, (m_format >> 16) & 0xFF, (m_format >> 24) & 0xFF); + kt.add_key_value("CRNLIB_FOURCC", fourcc_str.get_ptr()); + + const mip_level* pLevel0 = get_level(0, 0); + dynamic_string ktx_orient_str(cVarArg, "S=%c,T=%c", (pLevel0->get_orientation_flags() & cOrientationFlagXFlipped) ? 'l' : 'r', (pLevel0->get_orientation_flags() & cOrientationFlagYFlipped) ? 'u' : 'd'); + kt.add_key_value("KTXorientation", ktx_orient_str.get_ptr()); + + for (uint face_index = 0; face_index < get_num_faces(); face_index++) + { + for (uint level_index = 0; level_index < get_num_levels(); level_index++) + { + const mip_level* pLevel = get_level(face_index, level_index); + + const uint mip_width = pLevel->get_width(); + const uint mip_height = pLevel->get_height(); + + if (is_packed()) + { + const dxt_image* p = pLevel->get_dxt_image(); + + kt.add_image(face_index, level_index, p->get_element_ptr(), p->get_size_in_bytes()); + } + else + { + const image_u8* p = pLevel->get_image(); + + crnlib::vector tmp(mip_width * mip_height * packer.get_pixel_stride()); + + uint8* pDst = tmp.get_ptr(); + for (uint y = 0; y < mip_height; y++) + for (uint x = 0; x < mip_width; x++) + pDst = (uint8*)packer.pack(p->get_unclamped(x, y), pDst); + + kt.add_image(face_index, level_index, tmp.get_ptr(), tmp.size_in_bytes()); + } + } + } + + if (!kt.write_to_stream(serializer)) + return false; + + clear_last_error(); + return true; + } + + void mipmapped_texture::assign(face_vec& faces) { CRNLIB_ASSERT(!faces.empty()); if (faces.empty()) @@ -1175,45 +1841,45 @@ namespace crnlib CRNLIB_ASSERT(check()); } - void dds_texture::assign(mip_level* pLevel) + void mipmapped_texture::assign(mip_level* pLevel) { face_vec faces(1, mip_ptr_vec(1, pLevel)); assign(faces); } - void dds_texture::assign(image_u8* p, pixel_format fmt) + void mipmapped_texture::assign(image_u8* p, pixel_format fmt, orientation_flags_t orient_flags) { mip_level* pLevel = crnlib_new(); - pLevel->assign(p, fmt); + pLevel->assign(p, fmt, orient_flags); assign(pLevel); } - void dds_texture::assign(dxt_image* p, pixel_format fmt) + void mipmapped_texture::assign(dxt_image* p, pixel_format fmt, orientation_flags_t orient_flags) { mip_level* pLevel = crnlib_new(); - pLevel->assign(p, fmt); + pLevel->assign(p, fmt, orient_flags); assign(pLevel); } - void dds_texture::set(texture_file_types::format source_file_type, const dds_texture& dds_texture) + void mipmapped_texture::set(texture_file_types::format source_file_type, const mipmapped_texture& mipmapped_texture) { clear(); - *this = dds_texture; + *this = mipmapped_texture; m_source_file_type = source_file_type; } - image_u8* dds_texture::get_level_image(uint face, uint level, image_u8& img, bool uncook) const + image_u8* mipmapped_texture::get_level_image(uint face, uint level, image_u8& img, uint unpack_flags) const { if (!is_valid()) return NULL; const mip_level* pLevel = get_level(face, level); - return pLevel->get_unpacked_image(img, uncook); + return pLevel->get_unpacked_image(img, unpack_flags); } - void dds_texture::swap(dds_texture& img) + void mipmapped_texture::swap(mipmapped_texture& img) { utils::swap(m_width, img.m_width); utils::swap(m_height, img.m_height); @@ -1226,7 +1892,7 @@ namespace crnlib CRNLIB_ASSERT(check()); } - texture_type dds_texture::determine_texture_type() const + texture_type mipmapped_texture::determine_texture_type() const { if (!is_valid()) return cTextureTypeUnknown; @@ -1241,7 +1907,7 @@ namespace crnlib return cTextureTypeRegularMap; } - void dds_texture::discard_mips() + void mipmapped_texture::discard_mips() { for (uint f = 0; f < m_faces.size(); f++) { @@ -1257,7 +1923,7 @@ namespace crnlib CRNLIB_ASSERT(check()); } - void dds_texture::init(uint width, uint height, uint levels, uint faces, pixel_format fmt, const char* pName) + void mipmapped_texture::init(uint width, uint height, uint levels, uint faces, pixel_format fmt, const char* pName, orientation_flags_t orient_flags) { clear(); @@ -1270,7 +1936,7 @@ namespace crnlib m_format = fmt; if (pName) m_name.set(pName); - + m_faces.resize(faces); for (uint f = 0; f < faces; f++) { @@ -1285,13 +1951,13 @@ namespace crnlib { dxt_image* p = crnlib_new(); p->init(pixel_format_helpers::get_dxt_format(fmt), mip_width, mip_height, true); - m_faces[f][l]->assign(p, m_format); + m_faces[f][l]->assign(p, m_format, orient_flags); } else { image_u8* p = crnlib_new(mip_width, mip_height); p->set_comp_flags(m_comp_flags); - m_faces[f][l]->assign(p, m_format); + m_faces[f][l]->assign(p, m_format, orient_flags); } } } @@ -1299,7 +1965,7 @@ namespace crnlib CRNLIB_ASSERT(check()); } - void dds_texture::discard_mipmaps() + void mipmapped_texture::discard_mipmaps() { if (!is_valid()) return; @@ -1307,7 +1973,7 @@ namespace crnlib discard_mips(); } - bool dds_texture::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p) + bool mipmapped_texture::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p) { if (!is_valid()) return false; @@ -1362,27 +2028,27 @@ namespace crnlib return true; } - bool dds_texture::convert(pixel_format fmt, const dxt_image::pack_params& p) + bool mipmapped_texture::convert(pixel_format fmt, const dxt_image::pack_params& p) { return convert(fmt, true, p); } - bool dds_texture::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p, int qdxt_quality, bool hierarchical) + bool mipmapped_texture::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p, int qdxt_quality, bool hierarchical) { - if ((!pixel_format_helpers::is_dxt(fmt)) || (fmt == PIXEL_FMT_DXT3)) + if ((!pixel_format_helpers::is_dxt(fmt)) || (fmt == PIXEL_FMT_DXT3) || (fmt == PIXEL_FMT_ETC1)) { - // QDXT doesn't support DXT3. + // QDXT doesn't support DXT3 or ETC1 yet. return convert(fmt, cook, p); } - dds_texture src_tex(*this); + mipmapped_texture src_tex(*this); if (src_tex.is_packed()) src_tex.unpack_from_dxt(true); if (cook) { - dds_texture cooked_tex(src_tex); + mipmapped_texture cooked_tex(src_tex); for (uint f = 0; f < m_faces.size(); f++) for (uint l = 0; l < m_faces[f].size(); l++) @@ -1407,7 +2073,7 @@ namespace crnlib if (!tp.init(p.m_num_helper_threads)) return false; - dds_texture packed_tex; + mipmapped_texture packed_tex; qdxt_state state(tp); if (!src_tex.qdxt_pack_init(state, packed_tex, q1_params, q5_params, fmt, false)) @@ -1421,7 +2087,7 @@ namespace crnlib return true; } - bool dds_texture::is_packed() const + bool mipmapped_texture::is_packed() const { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1430,7 +2096,7 @@ namespace crnlib return get_level(0, 0)->is_packed(); } - bool dds_texture::set_alpha_to_luma() + bool mipmapped_texture::set_alpha_to_luma() { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1451,7 +2117,7 @@ namespace crnlib return true; } - bool dds_texture::convert(image_utils::conversion_type conv_type) + bool mipmapped_texture::convert(image_utils::conversion_type conv_type) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1472,7 +2138,7 @@ namespace crnlib return true; } - bool dds_texture::unpack_from_dxt(bool uncook) + bool mipmapped_texture::unpack_from_dxt(bool uncook) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1495,7 +2161,7 @@ namespace crnlib return true; } - bool dds_texture::has_alpha() const + bool mipmapped_texture::has_alpha() const { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1515,7 +2181,7 @@ namespace crnlib return false; } - bool dds_texture::is_normal_map() const + bool mipmapped_texture::is_normal_map() const { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1534,7 +2200,7 @@ namespace crnlib return image_utils::is_normal_map(tmp, m_name.get_ptr()); } - bool dds_texture::is_vertical_cross() const + bool mipmapped_texture::is_vertical_cross() const { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1549,7 +2215,7 @@ namespace crnlib return true; } - bool dds_texture::resize(uint new_width, uint new_height, const resample_params& params) + bool mipmapped_texture::resize(uint new_width, uint new_height, const resample_params& params) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1567,7 +2233,7 @@ namespace crnlib for (uint f = 0; f < faces.size(); f++) { image_u8 tmp; - image_u8* pImg = get_level(f, 0)->get_unpacked_image(tmp, true); + image_u8* pImg = get_level(f, 0)->get_unpacked_image(tmp, cUnpackFlagUncook); image_u8* pMip = crnlib_new(); @@ -1598,7 +2264,7 @@ namespace crnlib pMip->set_comp_flags(pImg->get_comp_flags()); - faces[f][0]->assign(pMip); + faces[f][0]->assign(pMip, PIXEL_FMT_INVALID, get_level(f, 0)->get_orientation_flags()); } assign(faces); @@ -1608,7 +2274,7 @@ namespace crnlib return true; } - bool dds_texture::generate_mipmaps(const generate_mipmap_params& params, bool force) + bool mipmapped_texture::generate_mipmaps(const generate_mipmap_params& params, bool force) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1646,7 +2312,7 @@ namespace crnlib for (uint f = 0; f < faces.size(); f++) { image_u8 tmp; - image_u8* pImg = get_level(f, 0)->get_unpacked_image(tmp, true); + image_u8* pImg = get_level(f, 0)->get_unpacked_image(tmp, cUnpackFlagUncook); for (uint l = 0; l < num_levels; l++) { @@ -1687,7 +2353,7 @@ namespace crnlib pMip->set_comp_flags(pImg->get_comp_flags()); } - faces[f][l]->assign(pMip); + faces[f][l]->assign(pMip, PIXEL_FMT_INVALID, get_level(f, 0)->get_orientation_flags()); } } @@ -1698,7 +2364,7 @@ namespace crnlib return true; } - bool dds_texture::crop(uint x, uint y, uint width, uint height) + bool mipmapped_texture::crop(uint x, uint y, uint width, uint height) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) @@ -1710,7 +2376,7 @@ namespace crnlib return false; image_u8 tmp; - image_u8* pImg = get_level(0, 0)->get_unpacked_image(tmp, true); + image_u8* pImg = get_level(0, 0)->get_unpacked_image(tmp, cUnpackFlagUncook | cUnpackFlagUnflip); image_u8* pMip = crnlib_new(width, height); @@ -1732,7 +2398,7 @@ namespace crnlib return true; } - bool dds_texture::vertical_cross_to_cubemap() + bool mipmapped_texture::vertical_cross_to_cubemap() { if (!is_vertical_cross()) return false; @@ -1741,11 +2407,11 @@ namespace crnlib bool alpha_is_valid = has_alpha(); - dds_texture cubemap; + mipmapped_texture cubemap; pixel_format fmt = alpha_is_valid ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; - cubemap.init(face_width, face_width, 1, 6, fmt, m_name.get_ptr()); + cubemap.init(face_width, face_width, 1, 6, fmt, m_name.get_ptr(), cDefaultOrientationFlags); // +x -x +y -y +z -z // 0 1 2 @@ -1759,7 +2425,7 @@ namespace crnlib const mip_level* pSrc = get_level(0, 0); image_u8 tmp_img; - image_u8* pSrc_image = pSrc->get_unpacked_image(tmp_img, true); + image_u8* pSrc_image = pSrc->get_unpacked_image(tmp_img, cUnpackFlagUncook | cUnpackFlagUnflip); const mip_level* pDst = get_level(face_index, 0); image_u8* pDst_image = pDst->get_image(); @@ -1785,10 +2451,12 @@ namespace crnlib swap(cubemap); + CRNLIB_ASSERT(check()); + return true; } - bool dds_texture::qdxt_pack_init(qdxt_state& state, dds_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params, pixel_format fmt, bool cook) + bool mipmapped_texture::qdxt_pack_init(qdxt_state& state, mipmapped_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params, pixel_format fmt, bool cook) { if (!is_valid()) return false; @@ -1854,8 +2522,14 @@ namespace crnlib state.m_qdxt5_params[0].m_comp_index = 3; break; } + case PIXEL_FMT_ETC1: + { + console::warning("mipmapped_texture::qdxt_pack_init: This method does not support ETC1"); + return false; + } default: { + CRNLIB_ASSERT(0); return false; } } @@ -1885,7 +2559,7 @@ namespace crnlib state.m_fmt = fmt; - dst_tex.init(get_width(), get_height(), get_num_levels(), get_num_faces(), fmt, get_name().get_ptr()); + dst_tex.init(get_width(), get_height(), get_num_levels(), get_num_faces(), fmt, get_name().get_ptr(), cDefaultOrientationFlags); state.m_pixel_blocks.resize(0); @@ -1907,8 +2581,10 @@ namespace crnlib { mip_level* pLevel = get_level(f, l); + dst_tex.get_level(f, l)->set_orientation_flags(pLevel->get_orientation_flags()); + image_u8 tmp_img; - image_u8 img(*pLevel->get_unpacked_image(tmp_img, true)); + image_u8 img(*pLevel->get_unpacked_image(tmp_img, cUnpackFlagUncook)); if (cook_conv_type != image_utils::cConversion_Invalid) image_utils::convert_image(img, cook_conv_type); @@ -1981,7 +2657,7 @@ namespace crnlib return true; } - bool dds_texture::qdxt_pack(qdxt_state& state, dds_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params) + bool mipmapped_texture::qdxt_pack(qdxt_state& state, mipmapped_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params) { if (!is_valid()) return false; @@ -2096,59 +2772,101 @@ namespace crnlib return false; } + CRNLIB_ASSERT(dst_tex.check()); + return true; } - - bool dds_texture::load_from_file(const char* pFilename, texture_file_types::format file_format) + + bool mipmapped_texture::read_from_file(const char* pFilename, texture_file_types::format file_format) { clear(); + + set_last_error("Can't open file"); + bool success = false; + + cfile_stream in_stream; + if (in_stream.open(pFilename)) + { + data_stream_serializer serializer(in_stream); + success = read_from_stream(serializer, file_format); + } + + return success; + } + + bool mipmapped_texture::read_from_stream(data_stream_serializer& serializer, texture_file_types::format file_format) + { + clear(); + + if (!serializer.get_stream()) + { + set_last_error("Invalid stream"); + return false; + } + if (file_format == texture_file_types::cFormatInvalid) - file_format = texture_file_types::determine_file_format(pFilename); + file_format = texture_file_types::determine_file_format(serializer.get_name().get_ptr()); if (file_format == texture_file_types::cFormatInvalid) { - set_last_error("Unrecognized image format extension"); + set_last_error("Unsupported file format"); return false; } - + set_last_error("Image file load failed"); bool success = false; - switch (file_format) + + if (!texture_file_types::supports_mipmaps(file_format)) { - case texture_file_types::cFormatDDS: + success = read_regular_image(serializer, file_format); + } + else + { + switch (file_format) { - success = load_dds(pFilename); - break; - } - case texture_file_types::cFormatCRN: - { - success = load_crn(pFilename); - break; - } - default: - { - success = load_regular(pFilename, file_format); - break; + case texture_file_types::cFormatDDS: + { + success = read_dds(serializer); + break; + } + case texture_file_types::cFormatCRN: + { + success = read_crn(serializer); + break; + } + case texture_file_types::cFormatKTX: + { + success = read_ktx(serializer); + break; + } + default: + { + CRNLIB_ASSERT(0); + break; + } } } if (success) { + CRNLIB_ASSERT(check()); + m_source_file_type = file_format; + set_name(serializer.get_name()); clear_last_error(); } return success; } - - bool dds_texture::load_regular(const char* pFilename, texture_file_types::format file_format) + + bool mipmapped_texture::read_regular_image(data_stream_serializer &serializer, texture_file_types::format file_format) { file_format; image_u8* pImg = crnlib_new(); - bool status = image_utils::load_from_file(*pImg, pFilename, 0); + bool status = image_utils::read_from_stream(*pImg, serializer, 0); if (!status) { crnlib_delete(pImg); @@ -2161,34 +2879,13 @@ namespace crnlib pLevel->assign(pImg); assign(pLevel); - set_name(pFilename); + set_name(serializer.get_name()); return true; + } - bool dds_texture::load_dds(const char* pFilename) - { - cfile_stream in_stream; - if (!in_stream.open(pFilename)) - { - set_last_error("Failed opening file"); - return false; - } - - data_stream_serializer serializer(in_stream); - - if (!read_dds(serializer)) - { - set_last_error(get_last_error().get_ptr()); - return false; - } - - set_name(pFilename); - - return true; - } - - bool dds_texture::load_crn_from_memory(const char* pFilename, const void *pData, uint data_size) + bool mipmapped_texture::read_crn_from_memory(const void *pData, uint data_size, const char* pFilename) { clear(); @@ -2231,11 +2928,17 @@ namespace crnlib set_last_error("CRN unpack failed"); +#if 0 timer t; double total_time = 0.0f; t.start(); +#endif + crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(pData, data_size); + +#if 0 total_time += t.get_elapsed_secs(); +#endif if (!pContext) { @@ -2263,7 +2966,9 @@ namespace crnlib total_pixels += num_blocks_x * num_blocks_y * 4 * 4 * tex_info.m_faces; +#if 0 t.start(); +#endif for (uint f = 0; f < tex_info.m_faces; f++) pFaces[f] = &dxt_data[f * size_of_face]; @@ -2277,7 +2982,9 @@ namespace crnlib return false; } +#if 0 total_time += t.get_elapsed_secs(); +#endif for (uint f = 0; f < tex_info.m_faces; f++) { @@ -2305,7 +3012,7 @@ namespace crnlib #if 0 if (total_pixels) { - console::info("load_crn: Total pixels: %u, ms: %3.3fms, megapixels/sec: %3.3f", + console::info("read_crn_from_memory: Total pixels: %u, ms: %3.3fms, megapixels/sec: %3.3f", total_pixels, total_time * 1000.0f, total_pixels / total_time); } #endif @@ -2321,77 +3028,98 @@ namespace crnlib return true; } - bool dds_texture::load_crn(const char* pFilename) + bool mipmapped_texture::read_crn(data_stream_serializer& serializer) { - cfile_stream in_stream; - if (!in_stream.open(pFilename)) - { - set_last_error("Failed opening CRN file"); - return false; - } - crnlib::vector crn_data; - if (!in_stream.read_array(crn_data)) + if (!serializer.read_entire_file(crn_data)) { set_last_error("Failed reading CRN file"); return false; } - - in_stream.close(); - - return load_crn_from_memory(pFilename, crn_data.get_ptr(), crn_data.size()); + return read_crn_from_memory(crn_data.get_ptr(), crn_data.size(), serializer.get_name().get_ptr()); } - bool dds_texture::write_to_file( + bool mipmapped_texture::write_to_file( const char* pFilename, texture_file_types::format file_format, - crn_comp_params* pCRN_comp_params, - uint32 *pActual_quality_level, float *pActual_bitrate) + crn_comp_params* pComp_params, + uint32 *pActual_quality_level, float *pActual_bitrate, + uint32 image_write_flags) { if (pActual_quality_level) *pActual_quality_level = 0; if (pActual_bitrate) *pActual_bitrate = 0.0f; if (!is_valid()) { - set_last_error("Unable to save empty texture"); + set_last_error("Unable to write empty texture"); + return false; + } + + if (file_format == texture_file_types::cFormatInvalid) + file_format = texture_file_types::determine_file_format(pFilename); + + if (file_format == texture_file_types::cFormatInvalid) + { + set_last_error("Unknown file format"); return false; } bool success = false; - switch (file_format) + if ( ((pComp_params) && (file_format == texture_file_types::cFormatDDS)) || + (file_format == texture_file_types::cFormatCRN) ) { - case texture_file_types::cFormatDDS: + if (!pComp_params) + return false; + success = write_comp_texture(pFilename, *pComp_params, pActual_quality_level, pActual_bitrate); + } + else if (!texture_file_types::supports_mipmaps(file_format)) + { + success = write_regular_image(pFilename, image_write_flags); + } + else + { + if (pComp_params) { - if (!pCRN_comp_params) - success = save_dds(pFilename); - else - success = save_comp_texture(pFilename, *pCRN_comp_params, pActual_quality_level, pActual_bitrate); - break; + console::warning("mipmapped_texture::write_to_file: Ignoring CRN compression parameters (currently unsupported for this file type)."); } - case texture_file_types::cFormatCRN: + + cfile_stream write_stream; + if (!write_stream.open(pFilename, cDataStreamWritable | cDataStreamSeekable)) { - if (!pCRN_comp_params) - return false; - success = save_comp_texture(pFilename, *pCRN_comp_params, pActual_quality_level, pActual_bitrate); - break; + set_last_error(dynamic_string(cVarArg, "Failed creating output file \"%s\"", pFilename).get_ptr()); + return false; } - default: + data_stream_serializer serializer(write_stream); + + switch (file_format) { - success = save_regular(pFilename); - break; + case texture_file_types::cFormatDDS: + { + success = write_dds(serializer); + break; + } + case texture_file_types::cFormatKTX: + { + success = write_ktx(serializer); + break; + } + default: + { + break; + } } } return success; } - bool dds_texture::save_regular(const char* pFilename) + bool mipmapped_texture::write_regular_image(const char* pFilename, uint32 image_write_flags) { image_u8 tmp; image_u8* pLevel_image = get_level_image(0, 0, tmp); - if (!image_utils::save_to_file(pFilename, *pLevel_image, 0)) + if (!image_utils::write_to_file(pFilename, *pLevel_image, image_write_flags)) { set_last_error("File write failed"); return false; @@ -2399,28 +3127,8 @@ namespace crnlib return true; } - - bool dds_texture::save_dds(const char* pFilename) - { - cfile_stream out_stream; - if (!out_stream.open(pFilename, cDataStreamWritable | cDataStreamSeekable)) - { - set_last_error("Unable to open file"); - return false; - } - - data_stream_serializer serializer(out_stream); - - if (!write_dds(serializer)) - { - set_last_error("File write failed"); - return false; - } - - return true; - } - - void dds_texture::print_crn_comp_params(const crn_comp_params& p) + + void mipmapped_texture::print_crn_comp_params(const crn_comp_params& p) { console::debug("CRN compression params:"); console::debug(" File Type: %s", crn_get_file_type_ext(p.m_file_type)); @@ -2449,7 +3157,7 @@ namespace crnlib console::debug("NumHelperThreads: %u", p.m_num_helper_threads); } - bool dds_texture::save_comp_texture(const char* pFilename, const crn_comp_params &orig_comp_params, uint32 *pActual_quality_level, float *pActual_bitrate) + bool mipmapped_texture::write_comp_texture(const char* pFilename, const crn_comp_params &orig_comp_params, uint32 *pActual_quality_level, float *pActual_bitrate) { crn_comp_params comp_params(orig_comp_params); @@ -2519,7 +3227,7 @@ namespace crnlib return true; } - uint dds_texture::get_total_pixels_in_all_faces_and_mips() const + uint mipmapped_texture::get_total_pixels_in_all_faces_and_mips() const { uint total_pixels = 0; for (uint l = 0; l < m_faces.size(); l++) @@ -2529,5 +3237,138 @@ namespace crnlib return total_pixels; } + void mipmapped_texture::set_orientation_flags(orientation_flags_t flags) + { + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + m_faces[l][m]->set_orientation_flags(flags); + } + + bool mipmapped_texture::is_flipped() const + { + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + if (m_faces[l][m]->is_flipped()) + return true; + + return false; + } + + bool mipmapped_texture::is_x_flipped() const + { + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + if (m_faces[l][m]->is_x_flipped()) + return true; + + return false; + } + + bool mipmapped_texture::is_y_flipped() const + { + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + if (m_faces[l][m]->is_y_flipped()) + return true; + + return false; + } + + bool mipmapped_texture::can_unflip_without_unpacking() const + { + if (!is_valid()) + return false; + + if (!is_packed()) + return true; + + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + if (!m_faces[l][m]->can_unflip_without_unpacking()) + return false; + + return true; + } + + bool mipmapped_texture::unflip(bool allow_unpacking_to_flip, bool uncook_if_necessary_to_unpack) + { + if (!is_valid()) + return false; + + if (is_packed()) + { + // The texture is packed - make sure all faces/miplevels can be consistently unflipped. + bool can_do_packed_unflip = can_unflip_without_unpacking(); + + if ((!can_do_packed_unflip) && (!allow_unpacking_to_flip)) + return false; + + // If any face/miplevel can't unflip the packed bits, then just unpack the whole texture. + if (!can_do_packed_unflip) + unpack_from_dxt(uncook_if_necessary_to_unpack); + } + + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + if (!m_faces[l][m]->unflip(true, false)) + return false; + + CRNLIB_VERIFY(check()); + + return true; + } + +#if 0 + bool mipmapped_texture::flip_x() + { + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + if (!m_faces[l][m]->flip_x()) + return false; + + return true; + } +#endif + + bool mipmapped_texture::flip_y_helper() + { + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + if (!m_faces[l][m]->flip_y()) + return false; + + return true; + } + + bool mipmapped_texture::flip_y(bool update_orientation_flags) + { + mipmapped_texture temp_tex(*this); + if (!temp_tex.flip_y_helper()) + { + temp_tex = *this; + temp_tex.unpack_from_dxt(true); + if (!temp_tex.flip_y_helper()) + return false; + } + swap(temp_tex); + + if (update_orientation_flags) + { + for (uint f = 0; f < get_num_faces(); f++) + { + for (uint m = 0; m < get_face(f).size(); m++) + { + uint orient_flags = get_face(f)[m]->get_orientation_flags(); + orient_flags ^= cOrientationFlagYFlipped; + get_face(f)[m]->set_orientation_flags(static_cast(orient_flags)); + } + } + } + + CRNLIB_ASSERT(check()); + + return true; + } + } // namespace crnlib diff --git a/crnlib/crn_dds_texture.h b/crnlib/crn_mipmapped_texture.h similarity index 66% rename from crnlib/crn_dds_texture.h rename to crnlib/crn_mipmapped_texture.h index f372b63..7fed19b 100644 --- a/crnlib/crn_dds_texture.h +++ b/crnlib/crn_mipmapped_texture.h @@ -1,4 +1,4 @@ -// File: crn_dds_texture.h +// File: crn_mipmapped_texture.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt_image.h" @@ -16,9 +16,23 @@ namespace crnlib { extern const vec2I g_vertical_cross_image_offsets[6]; + enum orientation_flags_t + { + cOrientationFlagXFlipped = 1, + cOrientationFlagYFlipped = 2, + + cDefaultOrientationFlags = 0 + }; + + enum unpack_flags_t + { + cUnpackFlagUncook = 1, + cUnpackFlagUnflip = 2 + }; + class mip_level { - friend class dds_texture; + friend class mipmapped_texture; public: mip_level(); @@ -28,19 +42,22 @@ namespace crnlib mip_level& operator= (const mip_level& rhs); // Assumes ownership. - void assign(image_u8* p, pixel_format fmt = PIXEL_FMT_INVALID); - void assign(dxt_image* p, pixel_format fmt = PIXEL_FMT_INVALID); + void assign(image_u8* p, pixel_format fmt = PIXEL_FMT_INVALID, orientation_flags_t orient_flags = cDefaultOrientationFlags); + void assign(dxt_image* p, pixel_format fmt = PIXEL_FMT_INVALID, orientation_flags_t orient_flags = cDefaultOrientationFlags); void clear(); inline uint get_width() const { return m_width; } inline uint get_height() const { return m_height; } inline uint get_total_pixels() const { return m_width * m_height; } + + orientation_flags_t get_orientation_flags() const { return m_orient_flags; } + void set_orientation_flags(orientation_flags_t flags) { m_orient_flags = flags; } inline image_u8* get_image() const { return m_pImage; } inline dxt_image* get_dxt_image() const { return m_pDXTImage; } - - image_u8* get_unpacked_image(image_u8& tmp, bool uncook) const; + + image_u8* get_unpacked_image(image_u8& tmp, uint unpack_flags) const; inline bool is_packed() const { return m_pDXTImage != NULL; } @@ -54,14 +71,29 @@ namespace crnlib bool convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p); - bool pack_to_dxt(const image_u8& img, pixel_format fmt, bool cook, const dxt_image::pack_params& p); + bool pack_to_dxt(const image_u8& img, pixel_format fmt, bool cook, const dxt_image::pack_params& p, orientation_flags_t orient_flags = cDefaultOrientationFlags); bool pack_to_dxt(pixel_format fmt, bool cook, const dxt_image::pack_params& p); bool unpack_from_dxt(bool uncook = true); + // Returns true if flipped on either axis. + bool is_flipped() const; + + bool is_x_flipped() const; + bool is_y_flipped() const; + + bool can_unflip_without_unpacking() const; + + // Returns true if unflipped on either axis. + // Will try to flip packed (DXT/ETC) data in-place, if this isn't possible it'll unpack/uncook the mip level then unflip. + bool unflip(bool allow_unpacking_to_flip, bool uncook_during_unpack); + bool set_alpha_to_luma(); bool convert(image_utils::conversion_type conv_type); + bool flip_x(); + bool flip_y(); + private: uint m_width; uint m_height; @@ -72,6 +104,8 @@ namespace crnlib image_u8* m_pImage; dxt_image* m_pDXTImage; + orientation_flags_t m_orient_flags; + void cook_image(image_u8& img) const; void uncook_image(image_u8& img) const; }; @@ -82,30 +116,30 @@ namespace crnlib // And an array of one, six, or N faces make up a texture. typedef crnlib::vector face_vec; - class dds_texture + class mipmapped_texture { public: // Construction/destruction - dds_texture(); - ~dds_texture(); + mipmapped_texture(); + ~mipmapped_texture(); - dds_texture(const dds_texture& other); - dds_texture& operator= (const dds_texture& rhs); + mipmapped_texture(const mipmapped_texture& other); + mipmapped_texture& operator= (const mipmapped_texture& rhs); void clear(); - void init(uint width, uint height, uint levels, uint faces, pixel_format fmt, const char* pName); + void init(uint width, uint height, uint levels, uint faces, pixel_format fmt, const char* pName, orientation_flags_t orient_flags); // Assumes ownership. void assign(face_vec& faces); void assign(mip_level* pLevel); - void assign(image_u8* p, pixel_format fmt = PIXEL_FMT_INVALID); - void assign(dxt_image* p, pixel_format fmt = PIXEL_FMT_INVALID); + void assign(image_u8* p, pixel_format fmt = PIXEL_FMT_INVALID, orientation_flags_t orient_flags = cDefaultOrientationFlags); + void assign(dxt_image* p, pixel_format fmt = PIXEL_FMT_INVALID, orientation_flags_t orient_flags = cDefaultOrientationFlags); - void set(texture_file_types::format source_file_type, const dds_texture& dds_texture); + void set(texture_file_types::format source_file_type, const mipmapped_texture& mipmapped_texture); // Accessors - image_u8* get_level_image(uint face, uint level, image_u8& img, bool uncook = true) const; + image_u8* get_level_image(uint face, uint level, image_u8& img, uint unpack_flags = cUnpackFlagUncook | cUnpackFlagUnflip) const; inline bool is_valid() const { return m_faces.size() > 0; } @@ -143,23 +177,26 @@ namespace crnlib const dynamic_string& get_last_error() const { return m_last_error; } void clear_last_error() { m_last_error.clear(); } - // Loading/saving - bool read_dds(const char* pFilename); + // Reading/writing bool read_dds(data_stream_serializer& serializer); - - bool write_dds(const char* pFilename) const; bool write_dds(data_stream_serializer& serializer) const; - bool load_crn_from_memory(const char* pFilename, const void *pData, uint data_size); - + bool read_ktx(data_stream_serializer& serializer); + bool write_ktx(data_stream_serializer& serializer) const; + + bool read_crn(data_stream_serializer& serializer); + bool read_crn_from_memory(const void *pData, uint data_size, const char* pFilename); + // If file_format is texture_file_types::cFormatInvalid, the format will be determined from the filename's extension. - bool load_from_file(const char* pFilename, texture_file_types::format file_format); + bool read_from_file(const char* pFilename, texture_file_types::format file_format = texture_file_types::cFormatInvalid); + bool read_from_stream(data_stream_serializer& serializer, texture_file_types::format file_format = texture_file_types::cFormatInvalid); bool write_to_file( const char* pFilename, - texture_file_types::format file_format, - crn_comp_params* pCRN_comp_params, - uint32 *pActual_quality_level, float *pActual_bitrate); + texture_file_types::format file_format = texture_file_types::cFormatInvalid, + crn_comp_params* pComp_params = NULL, + uint32* pActual_quality_level = NULL, float* pActual_bitrate = NULL, + uint32 image_write_flags = 0); // Conversion bool convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p); @@ -248,15 +285,26 @@ namespace crnlib utils::zero_object(m_has_blocks); } }; - bool qdxt_pack_init(qdxt_state& state, dds_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params, pixel_format fmt, bool cook); - bool qdxt_pack(qdxt_state& state, dds_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params); + bool qdxt_pack_init(qdxt_state& state, mipmapped_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params, pixel_format fmt, bool cook); + bool qdxt_pack(qdxt_state& state, mipmapped_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params); - void swap(dds_texture& img); + void swap(mipmapped_texture& img); bool check() const; + void set_orientation_flags(orientation_flags_t flags); + + // Returns true if any face/miplevel is flipped. + bool is_flipped() const; + bool is_x_flipped() const; + bool is_y_flipped() const; + bool can_unflip_without_unpacking() const; + bool unflip(bool allow_unpacking_to_flip, bool uncook_if_necessary_to_unpack); + + bool flip_y(bool update_orientation_flags); + private: - dynamic_string m_name; + dynamic_string m_name; uint m_width; uint m_height; @@ -268,23 +316,22 @@ namespace crnlib texture_file_types::format m_source_file_type; - mutable dynamic_string m_last_error; + mutable dynamic_string m_last_error; inline void clear_last_error() const { m_last_error.clear(); } inline void set_last_error(const char* p) const { m_last_error = p; } void free_all_mips(); + bool read_regular_image(data_stream_serializer &serializer, texture_file_types::format file_format); + bool write_regular_image(const char* pFilename, uint32 image_write_flags); bool read_dds_internal(data_stream_serializer& serializer); - bool load_regular(const char* pFilename, texture_file_types::format file_format); - bool load_dds(const char* pFilename); - bool load_crn(const char* pFilename); void print_crn_comp_params(const crn_comp_params& p); - bool save_regular(const char* pFilename); - bool save_dds(const char* pFilename); - bool save_comp_texture(const char* pFilename, const crn_comp_params &comp_params, uint32 *pActual_quality_level, float *pActual_bitrate); + bool write_comp_texture(const char* pFilename, const crn_comp_params &comp_params, uint32 *pActual_quality_level, float *pActual_bitrate); + void change_dxt1_to_dxt1a(); + bool flip_y_helper(); }; - inline void swap(dds_texture& a, dds_texture& b) + inline void swap(mipmapped_texture& a, mipmapped_texture& b) { a.swap(b); } diff --git a/crnlib/crn_pixel_format.cpp b/crnlib/crn_pixel_format.cpp index 88bda7f..b86345f 100644 --- a/crnlib/crn_pixel_format.cpp +++ b/crnlib/crn_pixel_format.cpp @@ -23,6 +23,7 @@ namespace crnlib PIXEL_FMT_DXT5_xGBR, PIXEL_FMT_DXT5_AGBR, PIXEL_FMT_DXT1A, + PIXEL_FMT_ETC1, PIXEL_FMT_R8G8B8, PIXEL_FMT_L8, PIXEL_FMT_A8, @@ -59,6 +60,7 @@ namespace crnlib case PIXEL_FMT_DXT5_xGxR: return "DXT5_xGxR"; case PIXEL_FMT_DXT5_xGBR: return "DXT5_xGBR"; case PIXEL_FMT_DXT5_AGBR: return "DXT5_AGBR"; + case PIXEL_FMT_ETC1: return "ETC1"; case PIXEL_FMT_R8G8B8: return "R8G8B8"; case PIXEL_FMT_A8R8G8B8: return "A8R8G8B8"; case PIXEL_FMT_A8: return "A8"; @@ -84,6 +86,7 @@ namespace crnlib case cCRNFmtDXN_XY: return "DXN_XY"; case cCRNFmtDXN_YX: return "DXN_YX"; case cCRNFmtDXT5A: return "DXT5A"; + case cCRNFmtETC1: return "ETC1"; default: break; } CRNLIB_ASSERT(false); @@ -98,6 +101,7 @@ namespace crnlib switch (fmt) { case PIXEL_FMT_DXT1: + case PIXEL_FMT_ETC1: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; break; @@ -233,6 +237,9 @@ namespace crnlib case PIXEL_FMT_DXT5_xGxR: fmt = cCRNFmtDXT5_xGxR; break; + case PIXEL_FMT_ETC1: + fmt = cCRNFmtETC1; + break; default: { CRNLIB_ASSERT(false); @@ -256,7 +263,12 @@ namespace crnlib case cCRNFmtDXN_XY: return PIXEL_FMT_DXN; case cCRNFmtDXN_YX: return PIXEL_FMT_3DC; case cCRNFmtDXT5A: return PIXEL_FMT_DXT5A; - default: break; + case cCRNFmtETC1: return PIXEL_FMT_ETC1; + default: + { + CRNLIB_ASSERT(false); + break; + } } return PIXEL_FMT_INVALID; diff --git a/crnlib/crn_pixel_format.h b/crnlib/crn_pixel_format.h index 527c9b2..0fcc9b1 100644 --- a/crnlib/crn_pixel_format.h +++ b/crnlib/crn_pixel_format.h @@ -99,6 +99,7 @@ namespace crnlib case PIXEL_FMT_DXT5_xGxR: case PIXEL_FMT_DXT5_xGBR: case PIXEL_FMT_DXT5_AGBR: + case PIXEL_FMT_ETC1: return true; default: break; } @@ -138,6 +139,7 @@ namespace crnlib case PIXEL_FMT_DXT5_xGxR: return cDXT5; case PIXEL_FMT_DXT5_xGBR: return cDXT5; case PIXEL_FMT_DXT5_AGBR: return cDXT5; + case PIXEL_FMT_ETC1: return cETC1; default: break; } return cDXTInvalid; @@ -161,6 +163,8 @@ namespace crnlib return PIXEL_FMT_3DC; case cDXT5A: return PIXEL_FMT_DXT5A; + case cETC1: + return PIXEL_FMT_ETC1; default: break; } CRNLIB_ASSERT(false); @@ -207,6 +211,7 @@ namespace crnlib { case PIXEL_FMT_DXT1: return 4; case PIXEL_FMT_DXT1A: return 4; + case PIXEL_FMT_ETC1: return 4; case PIXEL_FMT_DXT2: return 8; case PIXEL_FMT_DXT3: return 8; case PIXEL_FMT_DXT4: return 8; @@ -236,7 +241,7 @@ namespace crnlib case PIXEL_FMT_DXT1: return 8; case PIXEL_FMT_DXT1A: return 8; case PIXEL_FMT_DXT5A: return 8; - + case PIXEL_FMT_ETC1: return 8; case PIXEL_FMT_DXT2: return 16; case PIXEL_FMT_DXT3: return 16; case PIXEL_FMT_DXT4: return 16; diff --git a/crnlib/crn_platform.h b/crnlib/crn_platform.h index 5e4b312..3588e37 100644 --- a/crnlib/crn_platform.h +++ b/crnlib/crn_platform.h @@ -45,10 +45,13 @@ const bool c_crnlib_big_endian_platform = !c_crnlib_little_endian_platform; #if defined(__GNUC__) #define CRNLIB_ALIGNED(x) __attribute__((aligned(x))) + #define CRNLIB_NOINLINE __attribute__((noinline)) #elif defined(_MSC_VER) #define CRNLIB_ALIGNED(x) __declspec(align(x)) + #define CRNLIB_NOINLINE __declspec(noinline) #else #define CRNLIB_ALIGNED(x) + #define CRNLIB_NOINLINE #endif #define CRNLIB_GET_ALIGNMENT(v) ((!sizeof(v)) ? 1 : (__alignof(v) ? __alignof(v) : sizeof(uint32))) diff --git a/crnlib/crn_qdxt1.cpp b/crnlib/crn_qdxt1.cpp index e092d88..e978d5e 100644 --- a/crnlib/crn_qdxt1.cpp +++ b/crnlib/crn_qdxt1.cpp @@ -306,7 +306,7 @@ namespace crnlib #if GENERATE_DEBUG_IMAGES if (debugging) - image_utils::save_to_file_stb(dynamic_string(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cSaveIgnoreAlpha); + image_utils::write_to_file(dynamic_string(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); #endif } // level diff --git a/crnlib/crn_qdxt5.cpp b/crnlib/crn_qdxt5.cpp index fa27c8d..4e3136d 100644 --- a/crnlib/crn_qdxt5.cpp +++ b/crnlib/crn_qdxt5.cpp @@ -286,7 +286,7 @@ namespace crnlib #if QDXT5_DEBUGGING if (debugging) - image_utils::save_to_file_stb(dynamic_wstring(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cSaveIgnoreAlpha); + image_utils::write_to_file(dynamic_wstring(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); #endif } // level diff --git a/crnlib/crn_radix_sort.h b/crnlib/crn_radix_sort.h new file mode 100644 index 0000000..e71d242 --- /dev/null +++ b/crnlib/crn_radix_sort.h @@ -0,0 +1,345 @@ +// File: crn_radix_sort.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + // Returns pointer to sorted array. + template + T* radix_sort(uint num_vals, T* pBuf0, T* pBuf1, uint key_ofs, uint key_size) + { + CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); + CRNLIB_ASSERT_CLOSED_RANGE(key_size, 1, 4); + + uint hist[256 * 4]; + + memset(hist, 0, sizeof(hist[0]) * 256 * key_size); + +#define CRNLIB_GET_KEY(p) (*(uint*)((uint8*)(p) + key_ofs)) + + if (key_size == 4) + { + T* p = pBuf0; + T* q = pBuf0 + num_vals; + for ( ; p != q; p++) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + hist[768 + ((key >> 24) & 0xFF)]++; + } + } + else if (key_size == 3) + { + T* p = pBuf0; + T* q = pBuf0 + num_vals; + for ( ; p != q; p++) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + } + } + else if (key_size == 2) + { + T* p = pBuf0; + T* q = pBuf0 + (num_vals >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint key0 = CRNLIB_GET_KEY(p); + const uint key1 = CRNLIB_GET_KEY(p+1); + + hist[ key0 & 0xFF]++; + hist[256 + ((key0 >> 8) & 0xFF)]++; + + hist[ key1 & 0xFF]++; + hist[256 + ((key1 >> 8) & 0xFF)]++; + } + + if (num_vals & 1) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + } + } + else + { + CRNLIB_ASSERT(key_size == 1); + if (key_size != 1) + return NULL; + + T* p = pBuf0; + T* q = pBuf0 + (num_vals >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint key0 = CRNLIB_GET_KEY(p); + const uint key1 = CRNLIB_GET_KEY(p+1); + + hist[key0 & 0xFF]++; + hist[key1 & 0xFF]++; + } + + if (num_vals & 1) + { + const uint key = CRNLIB_GET_KEY(p); + hist[key & 0xFF]++; + } + } + + T* pCur = pBuf0; + T* pNew = pBuf1; + + for (uint pass = 0; pass < key_size; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i+1] = cur_ofs; + cur_ofs += pHist[i+1]; + } + + const uint pass_shift = pass << 3; + + T* p = pCur; + T* q = pCur + (num_vals >> 1) * 2; + + for ( ; p != q; p += 2) + { + uint c0 = (CRNLIB_GET_KEY(p) >> pass_shift) & 0xFF; + uint c1 = (CRNLIB_GET_KEY(p+1) >> pass_shift) & 0xFF; + + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew[dst_offset0] = p[0]; + pNew[dst_offset0 + 1] = p[1]; + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew[dst_offset0] = p[0]; + pNew[dst_offset1] = p[1]; + } + } + + if (num_vals & 1) + { + uint c = (CRNLIB_GET_KEY(p) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew[dst_offset] = *p; + } + + T* t = pCur; + pCur = pNew; + pNew = t; + } + + return pCur; + } + +#undef CRNLIB_GET_KEY + + // Returns pointer to sorted array. + template + T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) + { + CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); + CRNLIB_ASSERT_CLOSED_RANGE(key_size, 1, 4); + + if (init_indices) + { + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + uint i; + for (i = 0; p != q; p += 2, i += 2) + { + p[0] = static_cast(i); + p[1] = static_cast(i + 1); + } + + if (num_indices & 1) + *p = static_cast(i); + } + + uint hist[256 * 4]; + + memset(hist, 0, sizeof(hist[0]) * 256 * key_size); + +#define CRNLIB_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs)) +#define CRNLIB_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs)) + + if (key_size == 4) + { + T* p = pIndices0; + T* q = pIndices0 + num_indices; + for ( ; p != q; p++) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + hist[768 + ((key >> 24) & 0xFF)]++; + } + } + else if (key_size == 3) + { + T* p = pIndices0; + T* q = pIndices0 + num_indices; + for ( ; p != q; p++) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + } + } + else if (key_size == 2) + { + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint key0 = CRNLIB_GET_KEY(p); + const uint key1 = CRNLIB_GET_KEY(p+1); + + hist[ key0 & 0xFF]++; + hist[256 + ((key0 >> 8) & 0xFF)]++; + + hist[ key1 & 0xFF]++; + hist[256 + ((key1 >> 8) & 0xFF)]++; + } + + if (num_indices & 1) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + } + } + else + { + CRNLIB_ASSERT(key_size == 1); + if (key_size != 1) + return NULL; + + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint key0 = CRNLIB_GET_KEY(p); + const uint key1 = CRNLIB_GET_KEY(p+1); + + hist[key0 & 0xFF]++; + hist[key1 & 0xFF]++; + } + + if (num_indices & 1) + { + const uint key = CRNLIB_GET_KEY(p); + + hist[key & 0xFF]++; + } + } + + T* pCur = pIndices0; + T* pNew = pIndices1; + + for (uint pass = 0; pass < key_size; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i+1] = cur_ofs; + cur_ofs += pHist[i+1]; + } + + const uint pass_shift = pass << 3; + + T* p = pCur; + T* q = pCur + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + uint index0 = p[0]; + uint index1 = p[1]; + + uint c0 = (CRNLIB_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; + uint c1 = (CRNLIB_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; + + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew[dst_offset0] = static_cast(index0); + pNew[dst_offset0 + 1] = static_cast(index1); + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew[dst_offset0] = static_cast(index0); + pNew[dst_offset1] = static_cast(index1); + } + } + + if (num_indices & 1) + { + uint index = *p; + uint c = (CRNLIB_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew[dst_offset] = static_cast(index); + } + + T* t = pCur; + pCur = pNew; + pNew = t; + } + + return pCur; + } + +#undef CRNLIB_GET_KEY +#undef CRNLIB_GET_KEY_FROM_INDEX + +} // namespace crnlib diff --git a/crnlib/crn_rand.cpp b/crnlib/crn_rand.cpp index c6159dd..507083f 100644 --- a/crnlib/crn_rand.cpp +++ b/crnlib/crn_rand.cpp @@ -211,7 +211,7 @@ namespace crnlib return math::clamp(r, l, h); } - + int random::irand(int l, int h) { CRNLIB_ASSERT(l < h); @@ -236,6 +236,12 @@ namespace crnlib return result; } + int random::irand_inclusive(int l, int h) + { + CRNLIB_ASSERT(h < cINT32_MAX); + return irand(l, h + 1); + } + /* ALGORITHM 712, COLLECTED ALGORITHMS FROM ACM. THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE, diff --git a/crnlib/crn_rand.h b/crnlib/crn_rand.h index 64d823d..3179dce 100644 --- a/crnlib/crn_rand.h +++ b/crnlib/crn_rand.h @@ -77,6 +77,9 @@ namespace crnlib // Returns random between [l, h) int irand(int l, int h); + + // Returns random between [l, h] + int irand_inclusive(int l, int h); double gaussian(double mean, double stddev); diff --git a/crnlib/crn_rect.h b/crnlib/crn_rect.h index a7d4a0e..9364c31 100644 --- a/crnlib/crn_rect.h +++ b/crnlib/crn_rect.h @@ -17,7 +17,8 @@ namespace crnlib { clear(); } - + + // up to, but not including right/bottom inline rect(int left, int top, int right, int bottom) { set(left, top, right, bottom); diff --git a/crnlib/crn_resampler.cpp b/crnlib/crn_resampler.cpp index cbe2ab5..4fded5b 100644 --- a/crnlib/crn_resampler.cpp +++ b/crnlib/crn_resampler.cpp @@ -132,7 +132,7 @@ namespace crnlib if (xscale < 1.0f) { - int total; + int total; (void)total; /* Handle case when there are fewer destination * samples than source samples (downsampling/minification). diff --git a/crnlib/crn_rg_etc1.cpp b/crnlib/crn_rg_etc1.cpp new file mode 100644 index 0000000..664825e --- /dev/null +++ b/crnlib/crn_rg_etc1.cpp @@ -0,0 +1,2449 @@ +// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich +// Please see ZLIB license at the end of rg_etc1.h. +// +// For more information Ericsson Texture Compression (ETC/ETC1), see: +// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt +// +// v1.03 - 5/12/13 - Initial public release +#include "crn_core.h" +#include "crn_rg_etc1.h" + +#include +#include +#include +//#include +#include + +#if defined(_MSC_VER) +#pragma warning (disable: 4201) // nonstandard extension used : nameless struct/union +#endif + +#if defined(_DEBUG) || defined(DEBUG) +#define RG_ETC1_BUILD_DEBUG +#endif + +#define RG_ETC1_ASSERT CRNLIB_ASSERT + +namespace crnlib { + +namespace rg_etc1 +{ + typedef unsigned char uint8; + typedef unsigned short uint16; + typedef unsigned int uint; + typedef unsigned int uint32; + typedef long long int64; + typedef unsigned long long uint64; + + const uint32 cUINT32_MAX = 0xFFFFFFFFU; + const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; + + template inline T minimum(T a, T b) { return (a < b) ? a : b; } + template inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } + template inline T maximum(T a, T b) { return (a > b) ? a : b; } + template inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } + template inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } + template inline T square(T value) { return value * value; } + template inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); } + template inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); } + + template T decay_array_to_subtype(T (&a)[N]); + +#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) + + enum eNoClamp { cNoClamp }; + + struct color_quad_u8 + { + static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast(v) >> 31)) & 0xFF; return v; } + + struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; }; + + public: + typedef unsigned char component_t; + typedef int parameter_t; + + enum { cNumComps = 4 }; + + union + { + struct + { + component_t r; + component_t g; + component_t b; + component_t a; + }; + + component_t c[cNumComps]; + + uint32 m_u32; + }; + + inline color_quad_u8() + { + } + + inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32) + { + } + + explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax) + { + set(y, alpha); + } + + inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { + set(red, green, blue, alpha); + } + + explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax) + { + set_noclamp_y_alpha(y, alpha); + } + + inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { + set_noclamp_rgba(red, green, blue, alpha); + } + + inline void clear() + { + m_u32 = 0; + } + + inline color_quad_u8& operator= (const color_quad_u8& other) + { + m_u32 = other.m_u32; + return *this; + } + + inline color_quad_u8& set_rgb(const color_quad_u8& other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline color_quad_u8& operator= (parameter_t y) + { + set(y, component_traits::cMax); + return *this; + } + + inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax) + { + y = clamp(y); + alpha = clamp(alpha); + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax) + { + RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) ); + RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) ); + + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { + r = static_cast(clamp(red)); + g = static_cast(clamp(green)); + b = static_cast(clamp(blue)); + a = static_cast(clamp(alpha)); + return *this; + } + + inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha) + { + RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) ); + RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) ); + RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) ); + RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) ); + + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + a = static_cast(alpha); + return *this; + } + + inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue) + { + RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) ); + RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) ); + RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) ); + + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + return *this; + } + + static inline parameter_t get_min_comp() { return component_traits::cMin; } + static inline parameter_t get_max_comp() { return component_traits::cMax; } + static inline bool get_comps_are_signed() { return component_traits::cSigned; } + + inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } + inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } + + inline color_quad_u8& set_component(uint i, parameter_t f) + { + RG_ETC1_ASSERT(i < cNumComps); + + c[i] = static_cast(clamp(f)); + + return *this; + } + + inline color_quad_u8& set_grayscale(parameter_t l) + { + component_t x = static_cast(clamp(l)); + c[0] = x; + c[1] = x; + c[2] = x; + return *this; + } + + inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h) + { + for (uint i = 0; i < cNumComps; i++) + c[i] = static_cast(rg_etc1::clamp(c[i], l[i], h[i])); + return *this; + } + + inline color_quad_u8& clamp(parameter_t l, parameter_t h) + { + for (uint i = 0; i < cNumComps; i++) + c[i] = static_cast(rg_etc1::clamp(c[i], l, h)); + return *this; + } + + // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). + inline parameter_t get_luma() const + { + return static_cast((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); + } + + // Returns REC 709 luma. + inline parameter_t get_luma_rec709() const + { + return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); + } + + inline uint squared_distance_rgb(const color_quad_u8& c) const + { + return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b); + } + + inline uint squared_distance_rgba(const color_quad_u8& c) const + { + return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a); + } + + inline bool rgb_equals(const color_quad_u8& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } + + inline bool operator== (const color_quad_u8& rhs) const + { + return m_u32 == rhs.m_u32; + } + + color_quad_u8& operator+= (const color_quad_u8& other) + { + for (uint i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] + other.c[i])); + return *this; + } + + color_quad_u8& operator-= (const color_quad_u8& other) + { + for (uint i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] - other.c[i])); + return *this; + } + + friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs) + { + color_quad_u8 result(lhs); + result += rhs; + return result; + } + + friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs) + { + color_quad_u8 result(lhs); + result -= rhs; + return result; + } + }; // class color_quad_u8 + + struct vec3F + { + float m_s[3]; + + inline vec3F() { } + inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; } + inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; } + + inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; } + + inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; } + + inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; } + }; + + enum etc_constants + { + cETC1BytesPerBlock = 8U, + + cETC1SelectorBits = 2U, + cETC1SelectorValues = 1U << cETC1SelectorBits, + cETC1SelectorMask = cETC1SelectorValues - 1U, + + cETC1BlockShift = 2U, + cETC1BlockSize = 1U << cETC1BlockShift, + + cETC1LSBSelectorIndicesBitOffset = 0, + cETC1MSBSelectorIndicesBitOffset = 16, + + cETC1FlipBitOffset = 32, + cETC1DiffBitOffset = 33, + + cETC1IntenModifierNumBits = 3, + cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, + cETC1RightIntenModifierTableBitOffset = 34, + cETC1LeftIntenModifierTableBitOffset = 37, + + // Base+Delta encoding (5 bit bases, 3 bit delta) + cETC1BaseColorCompNumBits = 5, + cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, + + cETC1DeltaColorCompNumBits = 3, + cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, + cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, + + cETC1BaseColor5RBitOffset = 59, + cETC1BaseColor5GBitOffset = 51, + cETC1BaseColor5BBitOffset = 43, + + cETC1DeltaColor3RBitOffset = 56, + cETC1DeltaColor3GBitOffset = 48, + cETC1DeltaColor3BBitOffset = 40, + + // Absolute (non-delta) encoding (two 4-bit per component bases) + cETC1AbsColorCompNumBits = 4, + cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, + + cETC1AbsColor4R1BitOffset = 60, + cETC1AbsColor4G1BitOffset = 52, + cETC1AbsColor4B1BitOffset = 44, + + cETC1AbsColor4R2BitOffset = 56, + cETC1AbsColor4G2BitOffset = 48, + cETC1AbsColor4B2BitOffset = 40, + + cETC1ColorDeltaMin = -4, + cETC1ColorDeltaMax = 3, + + // Delta3: + // 0 1 2 3 4 5 6 7 + // 000 001 010 011 100 101 110 111 + // 0 1 2 3 -4 -3 -2 -1 + }; + + static uint8 g_quant5_tab[256+16]; + + static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = + { + { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } + }; + + static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. + static uint16 g_etc1_inverse_lookup[2*8*4][256]; // [diff/inten_table/selector][desired_color] + + // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. + // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) + static const uint16 g_color8_to_etc_block_config_0_255[2][33] = + { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, + 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, + 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, + }; + + // Really only [254][11]. + static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = + { + { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E, + 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, { + 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306, + 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112, + 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707, + 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605, + 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF + }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214, + 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, { + 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D, + 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805, + 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, + 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, { + 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521, + 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, + 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, { + 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, + 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917, + 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725, + 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, { + 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437, + 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, { + 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, + 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, { + 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F, + 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, { + 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434, + 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21, + 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E, + 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, { + 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, { + 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, + 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B, + 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, { + 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, + 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536, + 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, + 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, { + 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF + }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031, + 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, { + 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, + 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, + 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832, + 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D, + 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, + 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF + }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, { + 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, + 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF + }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, { + 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, { + 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905, + 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, + 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621, + 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919, + 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F, + 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A, + 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, { + 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913, + 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, { + 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20, + 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, { + 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, { + 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18, + 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929, + 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF + }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E, + 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01, + 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B, + 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34, + 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11, + 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF }, + }; + + struct etc1_block + { + // big endian uint64: + // bit ofs: 56 48 40 32 24 16 8 0 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + union + { + uint64 m_uint64; + uint8 m_bytes[8]; + }; + + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum { cNumSelectorBytes = 4 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + zero_this(this); + } + + inline uint get_byte_bits(uint ofs, uint num) const + { + RG_ETC1_ASSERT((ofs + num) <= 64U); + RG_ETC1_ASSERT(num && (num <= 8U)); + RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); + const uint byte_ofs = 7 - (ofs >> 3); + const uint byte_bit_ofs = ofs & 7; + return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); + } + + inline void set_byte_bits(uint ofs, uint num, uint bits) + { + RG_ETC1_ASSERT((ofs + num) <= 64U); + RG_ETC1_ASSERT(num && (num < 32U)); + RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); + RG_ETC1_ASSERT(bits < (1U << num)); + const uint byte_ofs = 7 - (ofs >> 3); + const uint byte_bit_ofs = ofs & 7; + const uint mask = (1 << num) - 1; + m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); + m_bytes[byte_ofs] |= (bits << byte_bit_ofs); + } + + // false = left/right subblocks + // true = upper/lower subblocks + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline void set_flip_bit(bool flip) + { + m_bytes[3] &= ~1; + m_bytes[3] |= static_cast(flip); + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + + inline void set_diff_bit(bool diff) + { + m_bytes[3] &= ~2; + m_bytes[3] |= (static_cast(diff) << 1); + } + + // Returns intensity modifier table (0-7) used by subblock subblock_id. + // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) + inline uint get_inten_table(uint subblock_id) const + { + RG_ETC1_ASSERT(subblock_id < 2); + const uint ofs = subblock_id ? 2 : 5; + return (m_bytes[3] >> ofs) & 7; + } + + // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) + inline void set_inten_table(uint subblock_id, uint t) + { + RG_ETC1_ASSERT(subblock_id < 2); + RG_ETC1_ASSERT(t < 8); + const uint ofs = subblock_id ? 2 : 5; + m_bytes[3] &= ~(7 << ofs); + m_bytes[3] |= (t << ofs); + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint get_selector(uint x, uint y) const + { + RG_ETC1_ASSERT((x | y) < 4); + + const uint bit_index = x * 4 + y; + const uint byte_bit_ofs = bit_index & 7; + const uint8 *p = &m_bytes[7 - (bit_index >> 3)]; + const uint lsb = (p[0] >> byte_bit_ofs) & 1; + const uint msb = (p[-2] >> byte_bit_ofs) & 1; + const uint val = lsb | (msb << 1); + + return g_etc1_to_selector_index[val]; + } + + // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline void set_selector(uint x, uint y, uint val) + { + RG_ETC1_ASSERT((x | y | val) < 4); + const uint bit_index = x * 4 + y; + + uint8 *p = &m_bytes[7 - (bit_index >> 3)]; + + const uint byte_bit_ofs = bit_index & 7; + const uint mask = 1 << byte_bit_ofs; + + const uint etc1_val = g_selector_index_to_etc1[val]; + + const uint lsb = etc1_val & 1; + const uint msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + inline void set_base4_color(uint idx, uint16 c) + { + if (idx) + { + set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); + } + else + { + set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); + } + } + + inline uint16 get_base4_color(uint idx) const + { + uint r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast(b | (g << 4U) | (r << 8U)); + } + + inline void set_base5_color(uint16 c) + { + set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); + set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); + set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); + } + + inline uint16 get_base5_color() const + { + const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); + const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); + const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); + return static_cast(b | (g << 5U) | (r << 10U)); + } + + void set_delta3_color(uint16 c) + { + set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); + set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); + set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); + } + + inline uint16 get_delta3_color() const + { + const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast(b | (g << 3U) | (r << 6U)); + } + + // Base color 5 + static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); + static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); + + static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); + static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); + + // Delta color 3 + // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static uint16 pack_delta3(int r, int g, int b); + + // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); + + // Abs color 4 + static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); + static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); + + // subblock colors + static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); + static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); + static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); + + static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) + { + if (color4) + { + dst.r = src.r | (src.r << 4); + dst.g = src.g | (src.g << 4); + dst.b = src.b | (src.b << 4); + } + else + { + dst.r = (src.r >> 2) | (src.r << 3); + dst.g = (src.g >> 2) | (src.g << 3); + dst.b = (src.b >> 2) | (src.b << 3); + } + dst.a = src.a; + } + }; + + // Returns pointer to sorted array. + template + T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) + { + RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T))); + RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4)); + + if (init_indices) + { + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + uint i; + for (i = 0; p != q; p += 2, i += 2) + { + p[0] = static_cast(i); + p[1] = static_cast(i + 1); + } + + if (num_indices & 1) + *p = static_cast(i); + } + + uint hist[256 * 4]; + + memset(hist, 0, sizeof(hist[0]) * 256 * key_size); + +#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs)) +#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs)) + + if (key_size == 4) + { + T* p = pIndices0; + T* q = pIndices0 + num_indices; + for ( ; p != q; p++) + { + const uint key = RG_ETC1_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + hist[768 + ((key >> 24) & 0xFF)]++; + } + } + else if (key_size == 3) + { + T* p = pIndices0; + T* q = pIndices0 + num_indices; + for ( ; p != q; p++) + { + const uint key = RG_ETC1_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + } + } + else if (key_size == 2) + { + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint key0 = RG_ETC1_GET_KEY(p); + const uint key1 = RG_ETC1_GET_KEY(p+1); + + hist[ key0 & 0xFF]++; + hist[256 + ((key0 >> 8) & 0xFF)]++; + + hist[ key1 & 0xFF]++; + hist[256 + ((key1 >> 8) & 0xFF)]++; + } + + if (num_indices & 1) + { + const uint key = RG_ETC1_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + } + } + else + { + RG_ETC1_ASSERT(key_size == 1); + if (key_size != 1) + return NULL; + + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint key0 = RG_ETC1_GET_KEY(p); + const uint key1 = RG_ETC1_GET_KEY(p+1); + + hist[key0 & 0xFF]++; + hist[key1 & 0xFF]++; + } + + if (num_indices & 1) + { + const uint key = RG_ETC1_GET_KEY(p); + + hist[key & 0xFF]++; + } + } + + T* pCur = pIndices0; + T* pNew = pIndices1; + + for (uint pass = 0; pass < key_size; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i+1] = cur_ofs; + cur_ofs += pHist[i+1]; + } + + const uint pass_shift = pass << 3; + + T* p = pCur; + T* q = pCur + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + uint index0 = p[0]; + uint index1 = p[1]; + + uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; + uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; + + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew[dst_offset0] = static_cast(index0); + pNew[dst_offset0 + 1] = static_cast(index1); + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew[dst_offset0] = static_cast(index0); + pNew[dst_offset1] = static_cast(index1); + } + } + + if (num_indices & 1) + { + uint index = *p; + uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew[dst_offset] = static_cast(index); + } + + T* t = pCur; + pCur = pNew; + pNew = t; + } + + return pCur; + } + +#undef RG_ETC1_GET_KEY +#undef RG_ETC1_GET_KEY_FROM_INDEX + + uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) + { + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = rg_etc1::minimum(r, 31U); + g = rg_etc1::minimum(g, 31U); + b = rg_etc1::minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 10U)); + } + + color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) + { + uint b = packed_color5 & 31U; + uint g = (packed_color5 >> 5U) & 31U; + uint r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); + } + + void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) + { + color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { + int dc_r, dc_g, dc_b; + unpack_delta3(dc_r, dc_g, dc_b, packed_delta3); + + int b = (packed_color5 & 31U) + dc_b; + int g = ((packed_color5 >> 5U) & 31U) + dc_g; + int r = ((packed_color5 >> 10U) & 31U) + dc_r; + + bool success = true; + if (static_cast(r | g | b) > 31U) + { + success = false; + r = rg_etc1::clamp(r, 0, 31); + g = rg_etc1::clamp(g, 0, 31); + b = rg_etc1::clamp(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U)); + return success; + } + + bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { + color_quad_u8 result; + const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); + r = result.r; + g = result.g; + b = result.b; + return success; + } + + uint16 etc1_block::pack_delta3(int r, int g, int b) + { + RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast(b | (g << 3) | (r << 6)); + } + + void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } + + uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } + + r = rg_etc1::minimum(r, 15U); + g = rg_etc1::minimum(g, 15U); + b = rg_etc1::minimum(b, 15U); + + return static_cast(b | (g << 4U) | (r << 8U)); + } + + color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) + { + uint b = packed_color4 & 15U; + uint g = (packed_color4 >> 4U) & 15U; + uint r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); + } + + void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) + { + color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) + { + RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + unpack_color5(r, g, b, packed_color5, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + } + + bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) + { + RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + + return success; + } + + void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) + { + RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + unpack_color4(r, g, b, packed_color4, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + } + + bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha) + { + color_quad_u8* pDst = reinterpret_cast(pDst_pixels_rgba); + const etc1_block& block = *static_cast(pETC1_block); + + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint table_index0 = block.get_inten_table(0); + const uint table_index1 = block.get_inten_table(1); + + color_quad_u8 subblock_colors0[4]; + color_quad_u8 subblock_colors1[4]; + bool success = true; + + if (diff_flag) + { + const uint16 base_color5 = block.get_base5_color(); + const uint16 delta_color3 = block.get_delta3_color(); + etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + + if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + success = false; + } + else + { + const uint16 base_color4_0 = block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + + const uint16 base_color4_1 = block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + if (preserve_alpha) + { + if (flip_flag) + { + for (uint y = 0; y < 2; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); + pDst += 4; + } + + for (uint y = 2; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + else + { + for (uint y = 0; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + } + else + { + if (flip_flag) + { + // 0000 + // 0000 + // 1111 + // 1111 + for (uint y = 0; y < 2; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors0[block.get_selector(2, y)]; + pDst[3] = subblock_colors0[block.get_selector(3, y)]; + pDst += 4; + } + + for (uint y = 2; y < 4; y++) + { + pDst[0] = subblock_colors1[block.get_selector(0, y)]; + pDst[1] = subblock_colors1[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + else + { + // 0011 + // 0011 + // 0011 + // 0011 + for (uint y = 0; y < 4; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + } + + return success; + } + + struct etc1_solution_coordinates + { + inline etc1_solution_coordinates() : + m_unscaled_color(0, 0, 0, 0), + m_inten_table(0), + m_color4(false) + { + } + + inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : + m_unscaled_color(r, g, b, 255), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : + m_unscaled_color(c), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const etc1_solution_coordinates& other) + { + *this = other; + } + + inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs) + { + m_unscaled_color = rhs.m_unscaled_color; + m_inten_table = rhs.m_inten_table; + m_color4 = rhs.m_color4; + return *this; + } + + inline void clear() + { + m_unscaled_color.clear(); + m_inten_table = 0; + m_color4 = false; + } + + inline color_quad_u8 get_scaled_color() const + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + return color_quad_u8(br, bg, bb); + } + + inline void get_block_colors(color_quad_u8* pBlock_colors) + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + const int* pInten_table = g_etc1_inten_tables[m_inten_table]; + pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); + pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); + pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); + pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); + } + + color_quad_u8 m_unscaled_color; + uint m_inten_table; + bool m_color4; + }; + + class etc1_optimizer + { + etc1_optimizer(const etc1_optimizer&); + etc1_optimizer& operator= (const etc1_optimizer&); + + public: + etc1_optimizer() + { + clear(); + } + + void clear() + { + m_pParams = NULL; + m_pResult = NULL; + m_pSorted_luma = NULL; + m_pSorted_luma_indices = NULL; + } + + struct params : etc1_pack_params + { + params() + { + clear(); + } + + params(const etc1_pack_params& base_params) : + etc1_pack_params(base_params) + { + clear_optimizer_params(); + } + + void clear() + { + etc1_pack_params::clear(); + clear_optimizer_params(); + } + + void clear_optimizer_params() + { + m_num_src_pixels = 0; + m_pSrc_pixels = 0; + + m_use_color4 = false; + static const int s_default_scan_delta[] = { 0 }; + m_pScan_deltas = s_default_scan_delta; + m_scan_delta_size = 1; + + m_base_color5.clear(); + m_constrain_against_base_color5 = false; + } + + uint m_num_src_pixels; + const color_quad_u8* m_pSrc_pixels; + + bool m_use_color4; + const int* m_pScan_deltas; + uint m_scan_delta_size; + + color_quad_u8 m_base_color5; + bool m_constrain_against_base_color5; + }; + + struct results + { + uint64 m_error; + color_quad_u8 m_block_color_unscaled; + uint m_block_inten_table; + uint m_n; + uint8* m_pSelectors; + bool m_block_color4; + + inline results& operator= (const results& rhs) + { + m_block_color_unscaled = rhs.m_block_color_unscaled; + m_block_color4 = rhs.m_block_color4; + m_block_inten_table = rhs.m_block_inten_table; + m_error = rhs.m_error; + RG_ETC1_ASSERT(m_n == rhs.m_n); + memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); + return *this; + } + }; + + void init(const params& params, results& result); + bool compute(); + + private: + struct potential_solution + { + potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false) + { + } + + etc1_solution_coordinates m_coords; + uint8 m_selectors[8]; + uint64 m_error; + bool m_valid; + + void clear() + { + m_coords.clear(); + m_error = cUINT64_MAX; + m_valid = false; + } + }; + + const params* m_pParams; + results* m_pResult; + + int m_limit; + + vec3F m_avg_color; + int m_br, m_bg, m_bb; + uint16 m_luma[8]; + uint32 m_sorted_luma[2][8]; + const uint32* m_pSorted_luma_indices; + uint32* m_pSorted_luma; + + uint8 m_selectors[8]; + uint8 m_best_selectors[8]; + + potential_solution m_best_solution; + potential_solution m_trial_solution; + uint8 m_temp_selectors[8]; + + bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + }; + + bool etc1_optimizer::compute() + { + const uint n = m_pParams->m_num_src_pixels; + const int scan_delta_size = m_pParams->m_scan_delta_size; + + // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. + // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { + const int zd = m_pParams->m_pScan_deltas[zdi]; + const int mbb = m_bb + zd; + if (mbb < 0) continue; else if (mbb > m_limit) break; + + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { + const int yd = m_pParams->m_pScan_deltas[ydi]; + const int mbg = m_bg + yd; + if (mbg < 0) continue; else if (mbg > m_limit) break; + + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { + const int xd = m_pParams->m_pScan_deltas[xdi]; + const int mbr = m_br + xd; + if (mbr < 0) continue; else if (mbr > m_limit) break; + + etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); + if (m_pParams->m_quality == cHighQuality) + { + if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + continue; + } + else + { + if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) + continue; + } + + // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. + // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: + // The goal is: + // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 + // Rearranging this: + // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 + // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 + // So what this means: + // optimal_block_color = avg_input - avg_inten_delta + // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. + // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. + // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. + + const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); + for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { + const uint8* pSelectors = m_best_solution.m_selectors; + const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); + for (uint r = 0; r < n; r++) + { + const uint s = *pSelectors++; + const int yd = pInten_table[s]; + // Compute actual delta being applied to each pixel, taking into account clamping. + delta_sum_r += rg_etc1::clamp(base_color.r + yd, 0, 255) - base_color.r; + delta_sum_g += rg_etc1::clamp(base_color.g + yd, 0, 255) - base_color.g; + delta_sum_b += rg_etc1::clamp(base_color.b + yd, 0, 255) - base_color.b; + } + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + break; + const float avg_delta_r_f = static_cast(delta_sum_r) / n; + const float avg_delta_g_f = static_cast(delta_sum_g) / n; + const float avg_delta_b_f = static_cast(delta_sum_b) / n; + const int br1 = rg_etc1::clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = rg_etc1::clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = rg_etc1::clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + + bool skip = false; + + if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) + skip = true; + else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) + skip = true; + else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) + skip = true; + + if (skip) + break; + + etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); + if (m_pParams->m_quality == cHighQuality) + { + if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) + break; + } + else + { + if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) + break; + } + + } // refinement_trial + + } // xdi + } // ydi + } // zdi + + if (!m_best_solution.m_valid) + { + m_pResult->m_error = cUINT32_MAX; + return false; + } + + const uint8* pSelectors = m_best_solution.m_selectors; + +#ifdef RG_ETC1_BUILD_DEBUG + { + color_quad_u8 block_colors[4]; + m_best_solution.m_coords.get_block_colors(block_colors); + + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + uint64 actual_error = 0; + for (uint i = 0; i < n; i++) + actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]); + + RG_ETC1_ASSERT(actual_error == m_best_solution.m_error); + } +#endif + + m_pResult->m_error = m_best_solution.m_error; + + m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; + m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + + m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; + memcpy(m_pResult->m_pSelectors, pSelectors, n); + m_pResult->m_n = n; + + return true; + } + + void etc1_optimizer::init(const params& p, results& r) + { + // This version is hardcoded for 8 pixel subblocks. + RG_ETC1_ASSERT(p.m_num_src_pixels == 8); + + m_pParams = &p; + m_pResult = &r; + + const uint n = 8; + + m_limit = m_pParams->m_use_color4 ? 15 : 31; + + vec3F avg_color(0.0f); + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; + const vec3F fc(c.r, c.g, c.b); + + avg_color += fc; + + m_luma[i] = static_cast(c.r + c.g + c.b); + m_sorted_luma[0][i] = i; + } + avg_color *= (1.0f / static_cast(n)); + m_avg_color = avg_color; + + m_br = rg_etc1::clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); + m_bg = rg_etc1::clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); + m_bb = rg_etc1::clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + + if (m_pParams->m_quality <= cMediumQuality) + { + m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false); + m_pSorted_luma = m_sorted_luma[0]; + if (m_pSorted_luma_indices == m_sorted_luma[0]) + m_pSorted_luma = m_sorted_luma[1]; + + for (uint i = 0; i < n; i++) + m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } + + m_best_solution.m_coords.clear(); + m_best_solution.m_valid = false; + m_best_solution.m_error = cUINT64_MAX; + } + + bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + trial_solution.m_valid = false; + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; + const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; + const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + + if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + return false; + } + + const color_quad_u8 base_color(coords.get_scaled_color()); + + const uint n = 8; + + trial_solution.m_error = cUINT64_MAX; + + for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + color_quad_u8 block_colors[4]; + for (uint s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); + } + + uint64 total_error = 0; + + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + for (uint c = 0; c < n; c++) + { + const color_quad_u8& src_pixel = *pSrc_pixels++; + + uint best_selector_index = 0; + uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b); + + uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 1; + } + + trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 2; + } + + trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 3; + } + + m_temp_selectors[c] = static_cast(best_selector_index); + + total_error += best_error; + if (total_error >= trial_solution.m_error) + break; + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + memcpy(trial_solution.m_selectors, m_temp_selectors, 8); + trial_solution.m_valid = true; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; + const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; + const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + + if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + trial_solution.m_valid = false; + return false; + } + } + + const color_quad_u8 base_color(coords.get_scaled_color()); + + const uint n = 8; + + trial_solution.m_error = cUINT64_MAX; + + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + uint block_inten[4]; + color_quad_u8 block_colors[4]; + for (uint s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); + block_colors[s] = block_color; + block_inten[s] = block_color.r + block_color.g + block_color.b; + } + + // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. + // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. + // 0 1 2 3 + // 01 12 23 + const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; + + uint64 total_error = 0; + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint min_error = labs(block_inten[0] - m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint c = 0; c < n; c++) + total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint min_error = labs(m_pSorted_luma[0] - block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint c = 0; c < n; c++) + total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]); + } + else + { + uint cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done; + const uint sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); + } +done: + while (c < n) + { + const uint sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); + ++c; + } + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + memcpy(trial_solution.m_selectors, m_temp_selectors, n); + trial_solution.m_valid = true; + if (!total_error) + break; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) + { + const uint limit = diff ? 32 : 16; limit; + RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); + int c; + if (diff) + c = (packed_c >> 2) | (packed_c << 3); + else + c = packed_c | (packed_c << 4); + c += g_etc1_inten_tables[inten][selector]; + c = rg_etc1::clamp(c, 0, 255); + return c; + } + + static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; } + + void pack_etc1_block_init() + { + for (uint diff = 0; diff < 2; diff++) + { + const uint limit = diff ? 32 : 16; + + for (uint inten = 0; inten < 8; inten++) + { + for (uint selector = 0; selector < 4; selector++) + { + const uint inverse_table_index = diff + (inten << 1) + (selector << 4); + for (uint color = 0; color < 256; color++) + { + uint best_error = cUINT32_MAX, best_packed_c = 0; + for (uint packed_c = 0; packed_c < limit; packed_c++) + { + int v = etc1_decode_value(diff, inten, selector, packed_c); + uint err = labs(v - color); + if (err < best_error) + { + best_error = err; + best_packed_c = packed_c; + if (!best_error) + break; + } + } + RG_ETC1_ASSERT(best_error <= 255); + g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); + } + } + } + } + + uint expand5[32]; + for(int i = 0; i < 32; i++) + expand5[i] = (i << 3) | (i >> 2); + + for(int i = 0; i < 256 + 16; i++) + { + int v = clamp(i - 8, 0, 255); + g_quant5_tab[i] = static_cast(expand5[mul_8bit(v,31)]); + } + } + + // Packs solid color blocks efficiently using a set of small precomputed tables. + // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. + static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params) + { + pack_params; + RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); + + static uint s_next_comp[4] = { 1, 2, 0, 1 }; + + uint best_error = cUINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint i = 0; i < 3; i++) + { + const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = rg_etc1::clamp(pColor[i] + delta, 0, 255); + + const uint16* pTable; + if (!c_plus_delta) + pTable = g_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_color8_to_etc_block_config_0_255[1]; + else + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint x = *pTable++; + +#ifdef RG_ETC1_BUILD_DEBUG + const uint diff = x & 1; + const uint inten = (x >> 1) & 7; + const uint selector = (x >> 4) & 3; + const uint p0 = (x >> 8) & 255; + RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); +#endif + + const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16 p1 = pInverse_table[c1]; + uint16 p2 = pInverse_table[c2]; + const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } +found_perfect_match: + + const uint diff = best_x & 1; + const uint inten = (best_x >> 1) & 7; + + block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); + + const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; + *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; + *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; + + const uint best_packed_c0 = (best_x >> 8) & 255; + if (diff) + { + block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); + block.m_bytes[s_next_comp[best_i+1]] = static_cast(best_packed_c2 << 3); + } + else + { + block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); + block.m_bytes[s_next_comp[best_i+1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); + } + + return best_error; + } + + static uint pack_etc1_block_solid_color_constrained( + etc1_optimizer::results& results, + uint num_colors, const uint8* pColor, + etc1_pack_params& pack_params, + bool use_diff, + const color_quad_u8* pBase_color5_unscaled) + { + RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); + + pack_params; + static uint s_next_comp[4] = { 1, 2, 0, 1 }; + + uint best_error = cUINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint i = 0; i < 3; i++) + { + const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = rg_etc1::clamp(pColor[i] + delta, 0, 255); + + const uint16* pTable; + if (!c_plus_delta) + pTable = g_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_color8_to_etc_block_config_0_255[1]; + else + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint x = *pTable++; + const uint diff = x & 1; + if (static_cast(use_diff) != diff) + { + if (*pTable == 0xFFFF) + break; + continue; + } + + if ((diff) && (pBase_color5_unscaled)) + { + const int p0 = (x >> 8) & 255; + int delta = p0 - static_cast(pBase_color5_unscaled->c[i]); + if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + break; + continue; + } + } + +#ifdef RG_ETC1_BUILD_DEBUG + { + const uint inten = (x >> 1) & 7; + const uint selector = (x >> 4) & 3; + const uint p0 = (x >> 8) & 255; + RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); + } +#endif + + const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16 p1 = pInverse_table[c1]; + uint16 p2 = pInverse_table[c2]; + + if ((diff) && (pBase_color5_unscaled)) + { + int delta1 = (p1 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i]]); + int delta2 = (p2 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i + 1]]); + if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + break; + continue; + } + } + + const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } +found_perfect_match: + + if (best_error == cUINT32_MAX) + return best_error; + + best_error *= num_colors; + + results.m_n = num_colors; + results.m_block_color4 = !(best_x & 1); + results.m_block_inten_table = (best_x >> 1) & 7; + memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); + + const uint best_packed_c0 = (best_x >> 8) & 255; + results.m_block_color_unscaled[best_i] = static_cast(best_packed_c0); + results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast(best_packed_c1); + results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast(best_packed_c2); + results.m_error = best_error; + + return best_error; + } + + // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555. + static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block) + { + int err[8],*ep1 = err,*ep2 = err+4; + uint8 *quant = g_quant5_tab+8; + + memset(dest, 0xFF, sizeof(color_quad_u8)*16); + + // process channels seperately + for(int ch=0;ch<3;ch++) + { + uint8* bp = (uint8*)block; + uint8* dp = (uint8*)dest; + + bp += ch; dp += ch; + + memset(err,0, sizeof(err)); + for(int y = 0; y < 4; y++) + { + // pixel 0 + dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)]; + ep1[0] = bp[ 0] - dp[ 0]; + + // pixel 1 + dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)]; + ep1[1] = bp[ 4] - dp[ 4]; + + // pixel 2 + dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)]; + ep1[2] = bp[ 8] - dp[ 8]; + + // pixel 3 + dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)]; + ep1[3] = bp[12] - dp[12]; + + // advance to next line + int* tmp = ep1; ep1 = ep2; ep2 = tmp; + bp += 16; + dp += 16; + } + } + } + + unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params) + { + const color_quad_u8* pSrc_pixels = reinterpret_cast(pSrc_pixels_rgba); + etc1_block& dst_block = *static_cast(pETC1_block); + +#ifdef RG_ETC1_BUILD_DEBUG + // Ensure all alpha values are 0xFF. + for (uint i = 0; i < 16; i++) + { + RG_ETC1_ASSERT(pSrc_pixels[i].a == 255); + } +#endif + + color_quad_u8 src_pixel0(pSrc_pixels[0]); + + // Check for solid block. + const uint32 first_pixel_u32 = pSrc_pixels->m_u32; + int r; + for (r = 15; r >= 1; --r) + if (pSrc_pixels[r].m_u32 != first_pixel_u32) + break; + if (!r) + return static_cast(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params)); + + color_quad_u8 dithered_pixels[16]; + if (pack_params.m_dithering) + { + dither_block_555(dithered_pixels, pSrc_pixels); + pSrc_pixels = dithered_pixels; + } + + etc1_optimizer optimizer; + + uint64 best_error = cUINT64_MAX; + uint best_flip = false, best_use_color4 = false; + + uint8 best_selectors[2][8]; + etc1_optimizer::results best_results[2]; + for (uint i = 0; i < 2; i++) + { + best_results[i].m_n = 8; + best_results[i].m_pSelectors = best_selectors[i]; + } + + uint8 selectors[3][8]; + etc1_optimizer::results results[3]; + + for (uint i = 0; i < 3; i++) + { + results[i].m_n = 8; + results[i].m_pSelectors = selectors[i]; + } + + color_quad_u8 subblock_pixels[8]; + + etc1_optimizer::params params(pack_params); + params.m_num_src_pixels = 8; + params.m_pSrc_pixels = subblock_pixels; + + for (uint flip = 0; flip < 2; flip++) + { + for (uint use_color4 = 0; use_color4 < 2; use_color4++) + { + uint64 trial_error = 0; + + uint subblock; + for (subblock = 0; subblock < 2; subblock++) + { + if (flip) + memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); + else + { + const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; + subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12]; + subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13]; + } + + results[2].m_error = cUINT64_MAX; + if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4))) + { + const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32; + for (r = 7; r >= 1; --r) + if (subblock_pixels[r].m_u32 != subblock_pixel0_u32) + break; + if (!r) + { + pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL); + } + } + + params.m_use_color4 = (use_color4 != 0); + params.m_constrain_against_base_color5 = false; + + if ((!use_color4) && (subblock)) + { + params.m_constrain_against_base_color5 = true; + params.m_base_color5 = results[0].m_block_color_unscaled; + } + + if (params.m_quality == cHighQuality) + { + static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4); + params.m_pScan_deltas = s_scan_delta_0_to_4; + } + else if (params.m_quality == cMediumQuality) + { + static const int s_scan_delta_0_to_1[] = { -1, 0, 1 }; + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1); + params.m_pScan_deltas = s_scan_delta_0_to_1; + } + else + { + static const int s_scan_delta_0[] = { 0 }; + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0); + params.m_pScan_deltas = s_scan_delta_0; + } + + optimizer.init(params, results[subblock]); + if (!optimizer.compute()) + break; + + if (params.m_quality >= cMediumQuality) + { + // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. + const uint refinement_error_thresh0 = 3000; + const uint refinement_error_thresh1 = 6000; + if (results[subblock].m_error > refinement_error_thresh0) + { + if (params.m_quality == cMediumQuality) + { + static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 }; + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3); + params.m_pScan_deltas = s_scan_delta_2_to_3; + } + else + { + static const int s_scan_delta_5_to_5[] = { -5, 5 }; + static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 }; + if (results[subblock].m_error > refinement_error_thresh1) + { + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8); + params.m_pScan_deltas = s_scan_delta_5_to_8; + } + else + { + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5); + params.m_pScan_deltas = s_scan_delta_5_to_5; + } + } + + if (!optimizer.compute()) + break; + } + + if (results[2].m_error < results[subblock].m_error) + results[subblock] = results[2]; + } + + trial_error += results[subblock].m_error; + if (trial_error >= best_error) + break; + } + + if (subblock < 2) + continue; + + best_error = trial_error; + best_results[0] = results[0]; + best_results[1] = results[1]; + best_flip = flip; + best_use_color4 = use_color4; + + } // use_color4 + + } // flip + + int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; + int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; + int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; + RG_ETC1_ASSERT(best_use_color4 || (rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)); + + if (best_use_color4) + { + dst_block.m_bytes[0] = static_cast(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); + dst_block.m_bytes[1] = static_cast(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); + dst_block.m_bytes[2] = static_cast(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); + } + else + { + if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast((best_results[0].m_block_color_unscaled.r << 3) | dr); + if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast((best_results[0].m_block_color_unscaled.g << 3) | dg); + if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast((best_results[0].m_block_color_unscaled.b << 3) | db); + } + + dst_block.m_bytes[3] = static_cast( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip ); + + uint selector0 = 0, selector1 = 0; + if (best_flip) + { + // flipped: + // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + // + // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + const uint8* pSelectors0 = best_results[0].m_pSelectors; + const uint8* pSelectors1 = best_results[1].m_pSelectors; + for (int x = 3; x >= 0; --x) + { + uint b; + b = g_selector_index_to_etc1[pSelectors1[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors1[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + } + } + else + { + // non-flipped: + // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + // + // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + for (int subblock = 1; subblock >= 0; --subblock) + { + const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; + for (uint i = 0; i < 2; i++) + { + uint b; + b = g_selector_index_to_etc1[pSelectors[3]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[2]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[1]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[0]]; + selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1); + + pSelectors -= 4; + } + } + } + + dst_block.m_bytes[4] = static_cast(selector1 >> 8); dst_block.m_bytes[5] = static_cast(selector1 & 0xFF); + dst_block.m_bytes[6] = static_cast(selector0 >> 8); dst_block.m_bytes[7] = static_cast(selector0 & 0xFF); + + return static_cast(best_error); + } + +} // namespace rg_etc1 + +} // namespace crnlib diff --git a/crnlib/crn_rg_etc1.h b/crnlib/crn_rg_etc1.h new file mode 100644 index 0000000..ba8cad0 --- /dev/null +++ b/crnlib/crn_rg_etc1.h @@ -0,0 +1,80 @@ +// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich +// Please see ZLIB license at the end of this file. +#pragma once + +namespace crnlib { + +namespace rg_etc1 +{ + // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels. + // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping. + // This function is thread safe, and does not dynamically allocate any memory. + // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255. + bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false); + + // Quality setting = the higher the quality, the slower. + // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality). + enum etc1_quality + { + cLowQuality, + cMediumQuality, + cHighQuality, + }; + + struct etc1_pack_params + { + etc1_quality m_quality; + bool m_dithering; + + inline etc1_pack_params() + { + clear(); + } + + void clear() + { + m_quality = cHighQuality; + m_dithering = false; + } + }; + + // Important: pack_etc1_block_init() must be called before calling pack_etc1_block(). + void pack_etc1_block_init(); + + // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block. + // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255. + // Returns squared error of result. + // This function is thread safe, and does not dynamically allocate any memory. + // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE. + unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params); + +} // namespace rg_etc1 + +} // namespace crnlib + +//------------------------------------------------------------------------------ +// +// rg_etc1 uses the ZLIB license: +// http://opensource.org/licenses/Zlib +// +// Copyright (c) 2012 Rich Geldreich +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source distribution. +// +//------------------------------------------------------------------------------ diff --git a/crnlib/crn_stb_image.cpp b/crnlib/crn_stb_image.cpp index 0a37228..0778e39 100644 --- a/crnlib/crn_stb_image.cpp +++ b/crnlib/crn_stb_image.cpp @@ -2900,7 +2900,7 @@ static int shiftsigned(int v, int shift, int bits) static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) { uint8 *out; - unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0; + unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0; (void)fake_a; stbi_uc pal[256][4]; int psize=0,i,j,compress=0,width; int bpp, flip_vertically, pad, target, offset, hsz; diff --git a/crnlib/crn_texture_comp.cpp b/crnlib/crn_texture_comp.cpp index 862096f..8423541 100644 --- a/crnlib/crn_texture_comp.cpp +++ b/crnlib/crn_texture_comp.cpp @@ -231,7 +231,7 @@ namespace crnlib return true; } - static bool create_dds_tex(const crn_comp_params ¶ms, dds_texture &dds_tex) + static bool create_dds_tex(const crn_comp_params ¶ms, mipmapped_texture &dds_tex) { image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; @@ -281,7 +281,7 @@ namespace crnlib return true; } - bool create_texture_mipmaps(dds_texture &work_tex, const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, bool generate_mipmaps) + bool create_texture_mipmaps(mipmapped_texture &work_tex, const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, bool generate_mipmaps) { crn_comp_params new_params(params); @@ -438,7 +438,7 @@ namespace crnlib bool srgb = mipmap_params.m_gamma_filtering != 0; - dds_texture::resample_params res_params; + mipmapped_texture::resample_params res_params; res_params.m_pFilter = pFilter; res_params.m_wrapping = mipmap_params.m_tiled != 0; if (work_tex.get_num_faces()) @@ -462,7 +462,7 @@ namespace crnlib const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); - dds_texture::generate_mipmap_params gen_params; + mipmapped_texture::generate_mipmap_params gen_params; gen_params.m_pFilter = pFilter; gen_params.m_wrapping = mipmap_params.m_tiled != 0; gen_params.m_renormalize = mipmap_params.m_renormalize != 0; @@ -496,7 +496,7 @@ namespace crnlib if (pActual_bitrate) *pActual_bitrate = 0.0f; if (pActual_quality_level) *pActual_quality_level = 0; - dds_texture work_tex; + mipmapped_texture work_tex; if (!create_dds_tex(params, work_tex)) { console::error("Failed creating DDS texture from crn_comp_params!"); diff --git a/crnlib/crn_texture_comp.h b/crnlib/crn_texture_comp.h index 12692e6..9319054 100644 --- a/crnlib/crn_texture_comp.h +++ b/crnlib/crn_texture_comp.h @@ -6,7 +6,7 @@ namespace crnlib { - class dds_texture; + class mipmapped_texture; class itexture_comp { @@ -27,7 +27,7 @@ namespace crnlib }; bool create_compressed_texture(const crn_comp_params ¶ms, crnlib::vector &comp_data, uint32 *pActual_quality_level, float *pActual_bitrate); - bool create_texture_mipmaps(dds_texture &work_tex, const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, bool generate_mipmaps); + bool create_texture_mipmaps(mipmapped_texture &work_tex, const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, bool generate_mipmaps); bool create_compressed_texture(const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, crnlib::vector &comp_data, uint32 *pActual_quality_level, float *pActual_bitrate); } // namespace crnlib diff --git a/crnlib/crn_texture_conversion.cpp b/crnlib/crn_texture_conversion.cpp index b6334e9..9255b8a 100644 --- a/crnlib/crn_texture_conversion.cpp +++ b/crnlib/crn_texture_conversion.cpp @@ -28,7 +28,7 @@ namespace crnlib bool convert_stats::init( const char* pSrc_filename, const char* pDst_filename, - dds_texture& src_tex, + mipmapped_texture& src_tex, texture_file_types::format dst_file_type, bool lzma_stats) { @@ -74,7 +74,7 @@ namespace crnlib } } - if (!m_output_tex.load_from_file(pDst_filename, m_dst_file_type)) + if (!m_output_tex.read_from_file(pDst_filename, m_dst_file_type)) { console::error("Failed loading output file: %s", pDst_filename); return false; @@ -130,42 +130,46 @@ namespace crnlib } else { + uint num_faces = math::minimum(m_pInput_tex->get_num_faces(), m_output_tex.get_num_faces()); uint num_levels = math::minimum(m_pInput_tex->get_num_levels(), m_output_tex.get_num_levels()); if (!mip_stats) num_levels = 1; - for (uint level = 0; level < num_levels; level++) + for (uint face = 0; face < num_faces; face++) { - image_u8 a, b; - image_u8* pA = m_pInput_tex->get_level_image(0, level, a); - image_u8* pB = m_output_tex.get_level_image(0, level, b); - - if (pA && pB) + for (uint level = 0; level < num_levels; level++) { - image_u8 grayscale_a, grayscale_b; - if (grayscale_sampling) + image_u8 a, b; + image_u8* pA = m_pInput_tex->get_level_image(face, level, a); + image_u8* pB = m_output_tex.get_level_image(face, level, b); + + if (pA && pB) { - grayscale_a = *pA; - grayscale_a.convert_to_grayscale(); - pA = &grayscale_a; + image_u8 grayscale_a, grayscale_b; + if (grayscale_sampling) + { + grayscale_a = *pA; + grayscale_a.convert_to_grayscale(); + pA = &grayscale_a; - grayscale_b = *pB; - grayscale_b.convert_to_grayscale(); - pB = &grayscale_b; + grayscale_b = *pB; + grayscale_b.convert_to_grayscale(); + pB = &grayscale_b; + } + + console::info("Face %u Mipmap level %u statistics:", face, level); + image_utils::print_image_metrics(*pA, *pB); + + if ((pA->has_rgb()) || (pB->has_rgb())) + image_utils::print_ssim(*pA, *pB); } - - console::info("Mipmap level %u statistics:", level); - image_utils::print_image_metrics(*pA, *pB); - - if ((pA->has_rgb()) || (pB->has_rgb())) - image_utils::print_ssim(*pA, *pB); } } if (pCSVStatsFile) { - // FIXME: This is kind of a hack, and should be combine with the code above. + // FIXME: This is kind of a hack, and should be combined with the code above. image_u8 a, b; image_u8* pA = m_pInput_tex->get_level_image(0, 0, a); image_u8* pB = m_output_tex.get_level_image(0, 0, b); @@ -292,56 +296,63 @@ namespace crnlib return false; } - static pixel_format choose_pixel_format(convert_params& params, const crn_comp_params &comp_params, const dds_texture& src_tex, texture_type tex_type) + static pixel_format choose_pixel_format(convert_params& params, const crn_comp_params &comp_params, const mipmapped_texture& src_tex, texture_type tex_type) { - if (params.m_use_source_format) - return src_tex.get_format(); - + const pixel_format src_fmt = src_tex.get_format(); + const texture_file_types::format src_file_type = src_tex.get_source_file_type(); const bool is_normal_map = (tex_type == cTextureTypeNormalMap); + if (params.m_always_use_source_pixel_format) + return src_fmt; + + // Attempt to choose a reasonable/sane output pixel format. if (params.m_dst_file_type == texture_file_types::cFormatCRN) { if (is_normal_map) { - switch (src_tex.get_format()) + if (pixel_format_helpers::is_dxt(src_fmt)) + return src_fmt; + else + return PIXEL_FMT_DXT5_AGBR; + } + } + else if (params.m_dst_file_type == texture_file_types::cFormatKTX) + { + if ((src_file_type != texture_file_types::cFormatCRN) && (src_file_type != texture_file_types::cFormatKTX) && (src_file_type != texture_file_types::cFormatDDS)) + { + if (is_normal_map) { - case PIXEL_FMT_DXN: - case PIXEL_FMT_3DC: - case PIXEL_FMT_DXT5_xGBR: - case PIXEL_FMT_DXT5_AGBR: - case PIXEL_FMT_DXT5_xGxR: - return src_tex.get_format(); - default: - return PIXEL_FMT_DXT5_AGBR; + return pixel_format_helpers::has_alpha(src_fmt) ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; } + else if (pixel_format_helpers::is_grayscale(src_fmt)) + { + if (pixel_format_helpers::has_alpha(src_fmt)) + return PIXEL_FMT_A8L8; + else + return PIXEL_FMT_ETC1; + } + else if (pixel_format_helpers::has_alpha(src_fmt)) + return PIXEL_FMT_A8R8G8B8; + else + return PIXEL_FMT_ETC1; } } else if (params.m_dst_file_type == texture_file_types::cFormatDDS) { - if (src_tex.get_source_file_type() != texture_file_types::cFormatCRN) + if ((src_file_type != texture_file_types::cFormatCRN) && (src_file_type != texture_file_types::cFormatKTX) && (src_file_type != texture_file_types::cFormatDDS)) { if (is_normal_map) { - switch (src_tex.get_format()) - { - case PIXEL_FMT_DXN: - case PIXEL_FMT_3DC: - case PIXEL_FMT_DXT5_xGBR: - case PIXEL_FMT_DXT5_AGBR: - case PIXEL_FMT_DXT5_xGxR: - return src_tex.get_format(); - default: - return PIXEL_FMT_DXT5_AGBR; - } + return PIXEL_FMT_DXT5_AGBR; } - else if (pixel_format_helpers::is_grayscale(src_tex.get_format())) + else if (pixel_format_helpers::is_grayscale(src_fmt)) { - if (pixel_format_helpers::has_alpha(src_tex.get_format())) + if (pixel_format_helpers::has_alpha(src_fmt)) return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; else return PIXEL_FMT_DXT1; } - else if (pixel_format_helpers::has_alpha(src_tex.get_format())) + else if (pixel_format_helpers::has_alpha(src_fmt)) return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; else return PIXEL_FMT_DXT1; @@ -349,21 +360,21 @@ namespace crnlib } else { - // A regular image format. - if (pixel_format_helpers::is_grayscale(src_tex.get_format())) + // Destination is a regular image format. + if (pixel_format_helpers::is_grayscale(src_fmt)) { - if (pixel_format_helpers::has_alpha(src_tex.get_format())) + if (pixel_format_helpers::has_alpha(src_fmt)) return PIXEL_FMT_A8L8; else return PIXEL_FMT_L8; } - else if (pixel_format_helpers::has_alpha(src_tex.get_format())) + else if (pixel_format_helpers::has_alpha(src_fmt)) return PIXEL_FMT_A8R8G8B8; else return PIXEL_FMT_R8G8B8; } - return src_tex.get_format(); + return src_fmt; } static void print_comp_params(const crn_comp_params &comp_params) @@ -404,23 +415,27 @@ namespace crnlib void convert_params::print() { console::debug("\nTexture conversion parameters:"); - console::debug(" Resolution: %ux%u, Faces: %u, Levels: %u, Format: %s", + console::debug(" Resolution: %ux%u, Faces: %u, Levels: %u, Format: %s, X Flipped: %u, Y Flipped: %u", m_pInput_texture->get_width(), m_pInput_texture->get_height(), m_pInput_texture->get_num_faces(), m_pInput_texture->get_num_levels(), - pixel_format_helpers::get_pixel_format_string(m_pInput_texture->get_format())); + pixel_format_helpers::get_pixel_format_string(m_pInput_texture->get_format()), + m_pInput_texture->is_x_flipped(), + m_pInput_texture->is_y_flipped()); console::debug(" texture_type: %s", get_texture_type_desc(m_texture_type)); console::debug(" dst_filename: %s", m_dst_filename.get_ptr()); console::debug(" dst_file_type: %s", texture_file_types::get_extension(m_dst_file_type)); console::debug(" dst_format: %s", pixel_format_helpers::get_pixel_format_string(m_dst_format)); console::debug(" quick: %u", m_quick); - console::debug(" use_source_format: %u", m_use_source_format); + console::debug(" use_source_format: %u", m_always_use_source_pixel_format); + console::debug(" Y Flip: %u", m_y_flip); + console::debug(" Unflip: %u", m_unflip); } static bool write_compressed_texture( - dds_texture& work_tex, convert_params& params, crn_comp_params &comp_params, pixel_format dst_format, progress_params& progress_state, bool perceptual, convert_stats &stats) + mipmapped_texture& work_tex, convert_params& params, crn_comp_params &comp_params, pixel_format dst_format, progress_params& progress_state, bool perceptual, convert_stats &stats) { comp_params.m_file_type = (params.m_dst_file_type == texture_file_types::cFormatCRN) ? cCRNFileTypeCRN : cCRNFileTypeDDS; @@ -450,7 +465,7 @@ namespace crnlib return true; } - static bool convert_and_write_normal_texture(dds_texture& work_tex, convert_params& params, const crn_comp_params &comp_params, pixel_format dst_format, progress_params& progress_state, bool formats_differ, bool perceptual, convert_stats& stats) + static bool convert_and_write_normal_texture(mipmapped_texture& work_tex, convert_params& params, const crn_comp_params &comp_params, pixel_format dst_format, progress_params& progress_state, bool formats_differ, bool perceptual, convert_stats& stats) { if (formats_differ) { @@ -517,7 +532,7 @@ namespace crnlib face_vec face(1); face[0].push_back(crnlib_new(*pLevel)); - dds_texture new_tex; + mipmapped_texture new_tex; new_tex.assign(face); console::info("Writing texture face %u mip level %u to file %s", f, l, filename.get_ptr()); @@ -567,9 +582,29 @@ namespace crnlib params.m_pIntermediate_texture = NULL; } - params.m_pIntermediate_texture = crnlib_new(*params.m_pInput_texture); + params.m_pIntermediate_texture = crnlib_new(*params.m_pInput_texture); + + mipmapped_texture& work_tex = *params.m_pInput_texture; - dds_texture& work_tex = *params.m_pInput_texture; + if ((params.m_unflip) && (work_tex.is_flipped())) + { + console::info("Unflipping texture"); + work_tex.unflip(true, true); + } + + if (params.m_y_flip) + { + console::info("Flipping texture on Y axis"); + + // This is awkward - if we're writing to KTX, then go ahead and properly update the work texture's orientation flags. + // Otherwise, don't bother updating the orientation flags because the writer may then attempt to unflip the texture before writing to formats + // that don't support flipped textures (ugh). + const bool bOutputFormatSupportsFlippedTextures = params.m_dst_file_type == texture_file_types::cFormatKTX; + if (!work_tex.flip_y(bOutputFormatSupportsFlippedTextures)) + { + console::warning("Failed flipping texture on Y axis"); + } + } if ((params.m_dst_format != PIXEL_FMT_INVALID) && (pixel_format_helpers::is_alpha_only(params.m_dst_format))) { @@ -588,13 +623,14 @@ namespace crnlib if (pixel_format_helpers::is_dxt(dst_format)) { if ((params.m_dst_file_type != texture_file_types::cFormatCRN) && - (params.m_dst_file_type != texture_file_types::cFormatDDS)) + (params.m_dst_file_type != texture_file_types::cFormatDDS) && + (params.m_dst_file_type != texture_file_types::cFormatKTX)) { - console::warning("Output file format does not support DXT - automatically choosing pixel format."); + console::warning("Output file format does not support DXTc - automatically choosing a non-DXT pixel format."); dst_format = PIXEL_FMT_INVALID; } } - + if (dst_format == PIXEL_FMT_INVALID) { // Caller didn't specify a format to use, so try to pick something reasonable. @@ -606,6 +642,18 @@ namespace crnlib dst_format = PIXEL_FMT_DXT1A; else if (dst_format == PIXEL_FMT_DXT1A) comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, true); + + if ((dst_format == PIXEL_FMT_ETC1) && (params.m_dst_file_type == texture_file_types::cFormatCRN)) + { + console::warning("CRN file format does not support ETC1 compressed textures - converting to DXT1 instead."); + dst_format = PIXEL_FMT_DXT1; + } + + if ((dst_format == PIXEL_FMT_DXT1A) && (params.m_dst_file_type == texture_file_types::cFormatCRN)) + { + console::warning("CRN file format does not support DXT1A compressed textures - converting to DXT5 instead."); + dst_format = PIXEL_FMT_DXT5; + } const bool is_normal_map = (tex_type == cTextureTypeNormalMap); bool perceptual = comp_params.get_flag(cCRNCompFlagPerceptual); @@ -633,7 +681,9 @@ namespace crnlib } bool generate_mipmaps = texture_file_types::supports_mipmaps(params.m_dst_file_type); - if ((params.m_write_mipmaps_to_multiple_files) && ((params.m_dst_file_type != texture_file_types::cFormatCRN) && (params.m_dst_file_type != texture_file_types::cFormatDDS))) + if ( (params.m_write_mipmaps_to_multiple_files) && + ((params.m_dst_file_type != texture_file_types::cFormatCRN) && (params.m_dst_file_type != texture_file_types::cFormatDDS) && (params.m_dst_file_type != texture_file_types::cFormatKTX)) + ) { generate_mipmaps = true; } @@ -657,13 +707,14 @@ namespace crnlib } bool status = false; - + timer t; t.start(); if ( (params.m_dst_file_type == texture_file_types::cFormatCRN) || ( (params.m_dst_file_type == texture_file_types::cFormatDDS) && (pixel_format_helpers::is_dxt(dst_format)) && - ((formats_differ) || (comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) + //((formats_differ) || (comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) + ((comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) ) ) { @@ -671,6 +722,10 @@ namespace crnlib } else { + if ((comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) + { + console::warning( "Target bitrate/quality level is not supported for this output file format.\n"); + } status = convert_and_write_normal_texture(work_tex, params, comp_params, dst_format, progress_state, formats_differ, perceptual, stats); } diff --git a/crnlib/crn_texture_conversion.h b/crnlib/crn_texture_conversion.h index 89245ad..3b21915 100644 --- a/crnlib/crn_texture_conversion.h +++ b/crnlib/crn_texture_conversion.h @@ -2,7 +2,7 @@ // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt_image.h" -#include "crn_dds_texture.h" +#include "crn_mipmapped_texture.h" #include "crn_rect.h" #include "crn_lzma_codec.h" @@ -18,7 +18,7 @@ namespace crnlib bool init( const char* pSrc_filename, const char* pDst_filename, - dds_texture& src_tex, + mipmapped_texture& src_tex, texture_file_types::format dst_file_type, bool lzma_stats); @@ -26,12 +26,12 @@ namespace crnlib void clear(); - dynamic_string m_src_filename; - dynamic_string m_dst_filename; + dynamic_string m_src_filename; + dynamic_string m_dst_filename; texture_file_types::format m_dst_file_type; - dds_texture* m_pInput_tex; - dds_texture m_output_tex; + mipmapped_texture* m_pInput_tex; + mipmapped_texture m_output_tex; uint64 m_input_file_size; uint m_total_input_pixels; @@ -53,12 +53,14 @@ namespace crnlib m_pProgress_func(NULL), m_pProgress_user_data(NULL), m_pIntermediate_texture(NULL), + m_y_flip(false), + m_unflip(false), + m_always_use_source_pixel_format(false), m_write_mipmaps_to_multiple_files(false), m_quick(false), m_debugging(false), m_param_debugging(false), m_no_stats(false), - m_use_source_format(false), m_lzma_stats(false), m_status(false), m_canceled(false) @@ -73,11 +75,11 @@ namespace crnlib void print(); // Input parameters - dds_texture* m_pInput_texture; + mipmapped_texture* m_pInput_texture; texture_type m_texture_type; - dynamic_string m_dst_filename; + dynamic_string m_dst_filename; texture_file_types::format m_dst_file_type; pixel_format m_dst_format; @@ -89,15 +91,17 @@ namespace crnlib void* m_pProgress_user_data; // Return parameters - dds_texture* m_pIntermediate_texture; - mutable dynamic_string m_error_message; + mipmapped_texture* m_pIntermediate_texture; + mutable dynamic_string m_error_message; + bool m_y_flip; + bool m_unflip; + bool m_always_use_source_pixel_format; bool m_write_mipmaps_to_multiple_files; bool m_quick; bool m_debugging; bool m_param_debugging; bool m_no_stats; - bool m_use_source_format; bool m_lzma_stats; mutable bool m_status; diff --git a/crnlib/crn_texture_file_types.cpp b/crnlib/crn_texture_file_types.cpp index 75b933c..e737ce9 100644 --- a/crnlib/crn_texture_file_types.cpp +++ b/crnlib/crn_texture_file_types.cpp @@ -14,6 +14,10 @@ namespace crnlib static const char* extensions[cNumFileFormats] = { + "dds", + "crn", + "ktx", + "tga", "png", "jpg", @@ -24,10 +28,9 @@ namespace crnlib "tiff", "ppm", "pgm", - "dds", "psd", "jp2", - "crn", + "", "" }; @@ -59,6 +62,7 @@ namespace crnlib { case cFormatCRN: case cFormatDDS: + case cFormatKTX: return true; default: break; } diff --git a/crnlib/crn_texture_file_types.h b/crnlib/crn_texture_file_types.h index 08d4c76..dba4ed6 100644 --- a/crnlib/crn_texture_file_types.h +++ b/crnlib/crn_texture_file_types.h @@ -13,7 +13,13 @@ namespace crnlib { cFormatInvalid = -1, - cFormatTGA = 0, + cFormatDDS, + cFormatCRN, + cFormatKTX, + + cNumMipmappedFileFormats, + + cFormatTGA = cNumMipmappedFileFormats, cFormatPNG, cFormatJPG, cFormatJPEG, @@ -23,12 +29,12 @@ namespace crnlib cFormatTIFF, cFormatPPM, cFormatPGM, - cFormatDDS, cFormatPSD, cFormatJP2, - cFormatCRN, - + cNumRegularFileFormats, + + cNumImageFileFormats = cNumRegularFileFormats - cNumMipmappedFileFormats, // Not really a file format cFormatClipboard = cNumRegularFileFormats, diff --git a/crnlib/crn_threading_pthreads.cpp b/crnlib/crn_threading_pthreads.cpp index 52f019c..f57adb2 100644 --- a/crnlib/crn_threading_pthreads.cpp +++ b/crnlib/crn_threading_pthreads.cpp @@ -305,7 +305,6 @@ namespace crnlib bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) { - CRNLIB_ASSERT(m_num_threads); CRNLIB_ASSERT(pFunc); task tsk; @@ -329,7 +328,6 @@ namespace crnlib // It's the object's responsibility to delete pObj within the execute_task() method, if needed! bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) { - CRNLIB_ASSERT(m_num_threads); CRNLIB_ASSERT(pObj); task tsk; diff --git a/crnlib/crn_threading_pthreads.h b/crnlib/crn_threading_pthreads.h index 55caa88..547bb88 100644 --- a/crnlib/crn_threading_pthreads.h +++ b/crnlib/crn_threading_pthreads.h @@ -299,7 +299,6 @@ namespace crnlib template inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) { - CRNLIB_ASSERT(m_num_threads); CRNLIB_ASSERT(pObject); CRNLIB_ASSERT(num_tasks); if (!num_tasks) diff --git a/crnlib/crn_threading_win32.cpp b/crnlib/crn_threading_win32.cpp index 824eedd..26eb3ea 100644 --- a/crnlib/crn_threading_win32.cpp +++ b/crnlib/crn_threading_win32.cpp @@ -325,7 +325,6 @@ namespace crnlib bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) { - CRNLIB_ASSERT(m_num_threads); CRNLIB_ASSERT(pFunc); task tsk; @@ -350,7 +349,6 @@ namespace crnlib // It's the object's responsibility to delete pObj within the execute_task() method, if needed! bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) { - CRNLIB_ASSERT(m_num_threads); CRNLIB_ASSERT(pObj); task tsk; @@ -426,4 +424,5 @@ namespace crnlib return 0; } -} +} // namespace crnlib + diff --git a/crnlib/crn_threading_win32.h b/crnlib/crn_threading_win32.h index e11d020..098f1c3 100644 --- a/crnlib/crn_threading_win32.h +++ b/crnlib/crn_threading_win32.h @@ -370,7 +370,6 @@ namespace crnlib template inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) { - CRNLIB_ASSERT(m_num_threads); CRNLIB_ASSERT(pObject); CRNLIB_ASSERT(num_tasks); if (!num_tasks) diff --git a/crnlib/crn_timer.h b/crnlib/crn_timer.h index 4e895f5..f96e532 100644 --- a/crnlib/crn_timer.h +++ b/crnlib/crn_timer.h @@ -42,4 +42,22 @@ namespace crnlib bool m_stopped : 1; }; + // Prints object's lifetime to stdout + class timed_scope + { + const char* m_pName; + timer m_tm; + + public: + inline timed_scope(char* pName = "timed_scope") : m_pName(pName) { m_tm.start(); } + + inline double get_elapsed_secs() const { return m_tm.get_elapsed_secs(); } + inline double get_elapsed_ms() const { return m_tm.get_elapsed_ms(); } + + const timer &get_timer() const { return m_tm; } + timer &get_timer() { return m_tm; } + + inline ~timed_scope() { double secs = m_tm.get_elapsed_secs(); printf("%s: %f secs, %f ms\n", m_pName, secs, secs * 1000.0f); } + }; + } // namespace crnlib diff --git a/crnlib/crn_traits.h b/crnlib/crn_traits.h index b882750..2d1e2c1 100644 --- a/crnlib/crn_traits.h +++ b/crnlib/crn_traits.h @@ -71,12 +71,16 @@ namespace crnlib struct bitwise_movable { enum { cFlag = false }; }; // Defines type Q as bitwise movable. +// Bitwise movable: type T may be safely moved to a new location via memcpy, without requiring the old copy to be destructed. +// However, the final version of the object (wherever it winds up in memory) must be eventually destructed (a single time, of course). +// Bitwise movable is a superset of bitwise copyable (all bitwise copyable types are also bitwise movable). #define CRNLIB_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable { enum { cFlag = true }; }; template struct bitwise_copyable { enum { cFlag = false }; }; - // Defines type Q as bitwise copyable. +// Defines type Q as bitwise copyable. +// Bitwise copyable: type T may be safely and freely copied (duplicated) via memcpy, and *does not* require destruction. #define CRNLIB_DEFINE_BITWISE_COPYABLE(Q) template<> struct bitwise_copyable { enum { cFlag = true }; }; #define CRNLIB_IS_POD(T) __is_pod(T) @@ -85,7 +89,7 @@ namespace crnlib #define CRNLIB_IS_BITWISE_COPYABLE(T) (CRNLIB_IS_SCALAR_TYPE(T) || CRNLIB_IS_POD(T) || (bitwise_copyable::cFlag)) -#define CRNLIB_IS_BITWISE_MOVABLE(T) (CRNLIB_IS_BITWISE_COPYABLE(T) || (bitwise_movable::cFlag)) +#define CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T) (CRNLIB_IS_BITWISE_COPYABLE(T) || (bitwise_movable::cFlag)) #define CRNLIB_HAS_DESTRUCTOR(T) ((!scalar_type::cFlag) && (!__is_pod(T))) diff --git a/crnlib/crn_tree_clusterizer.h b/crnlib/crn_tree_clusterizer.h index f6388be..8ace530 100644 --- a/crnlib/crn_tree_clusterizer.h +++ b/crnlib/crn_tree_clusterizer.h @@ -66,6 +66,8 @@ namespace crnlib m_nodes.push_back(root); + // Warning: if this code is NOT compiled with -fno-strict-aliasing, m_nodes.get_ptr() can be NULL here. (Argh!) + uint total_leaves = 1; while (total_leaves < max_size) diff --git a/crnlib/crn_utils.h b/crnlib/crn_utils.h index 1740dc1..3cb5332 100644 --- a/crnlib/crn_utils.h +++ b/crnlib/crn_utils.h @@ -155,16 +155,53 @@ namespace crnlib return true; } - static inline uint16 swap16(uint16 x) { return static_cast((x << 8U) | (x >> 8U)); } - static inline uint32 swap32(uint32 x) { return ((x << 24U) | ((x << 8U) & 0x00FF0000U) | ((x >> 8U) & 0x0000FF00U) | (x >> 24U)); } +#if defined(_MSC_VER) + static CRNLIB_FORCE_INLINE uint16 swap16(uint16 x) { return _byteswap_ushort(x); } + static CRNLIB_FORCE_INLINE uint32 swap32(uint32 x) { return _byteswap_ulong(x); } + static CRNLIB_FORCE_INLINE uint64 swap64(uint64 x) { return _byteswap_uint64(x); } +#elif defined(__GNUC__) + static CRNLIB_FORCE_INLINE uint16 swap16(uint16 x) { return static_cast((x << 8U) | (x >> 8U)); } + static CRNLIB_FORCE_INLINE uint32 swap32(uint32 x) { return __builtin_bswap32(x); } + static CRNLIB_FORCE_INLINE uint64 swap64(uint64 x) { return __builtin_bswap64(x); } +#else + static CRNLIB_FORCE_INLINE uint16 swap16(uint16 x) { return static_cast((x << 8U) | (x >> 8U)); } + static CRNLIB_FORCE_INLINE uint32 swap32(uint32 x) { return ((x << 24U) | ((x << 8U) & 0x00FF0000U) | ((x >> 8U) & 0x0000FF00U) | (x >> 24U)); } + static CRNLIB_FORCE_INLINE uint64 swap64(uint64 x) { return (static_cast(swap32(static_cast(x))) << 32ULL) | swap32(static_cast(x >> 32U)); } +#endif - inline uint16 swap_le16_to_native(uint16 x) { return c_crnlib_little_endian_platform ? x : swap16(x); } - inline uint32 swap_le32_to_native(uint32 x) { return c_crnlib_little_endian_platform ? x : swap32(x); } - inline uint16 swap_be16_to_native(uint16 x) { return c_crnlib_big_endian_platform ? x : swap16(x); } - inline uint32 swap_be32_to_native(uint32 x) { return c_crnlib_big_endian_platform ? x : swap32(x); } + // Assumes x has been read from memory as a little endian value, converts to native endianness for manipulation. + CRNLIB_FORCE_INLINE uint16 swap_le16_to_native(uint16 x) { return c_crnlib_little_endian_platform ? x : swap16(x); } + CRNLIB_FORCE_INLINE uint32 swap_le32_to_native(uint32 x) { return c_crnlib_little_endian_platform ? x : swap32(x); } + CRNLIB_FORCE_INLINE uint64 swap_le64_to_native(uint64 x) { return c_crnlib_little_endian_platform ? x : swap64(x); } - inline uint32 read_le32(const void* p) { return swap_le32_to_native(*static_cast(p)); } - inline void write_le32(void* p, uint32 x) { *static_cast(p) = swap_le32_to_native(x); } + // Assumes x has been read from memory as a big endian value, converts to native endianness for manipulation. + CRNLIB_FORCE_INLINE uint16 swap_be16_to_native(uint16 x) { return c_crnlib_big_endian_platform ? x : swap16(x); } + CRNLIB_FORCE_INLINE uint32 swap_be32_to_native(uint32 x) { return c_crnlib_big_endian_platform ? x : swap32(x); } + CRNLIB_FORCE_INLINE uint64 swap_be64_to_native(uint64 x) { return c_crnlib_big_endian_platform ? x : swap64(x); } + + CRNLIB_FORCE_INLINE uint32 read_le32(const void* p) { return swap_le32_to_native(*static_cast(p)); } + CRNLIB_FORCE_INLINE void write_le32(void* p, uint32 x) { *static_cast(p) = swap_le32_to_native(x); } + CRNLIB_FORCE_INLINE uint64 read_le64(const void* p) { return swap_le64_to_native(*static_cast(p)); } + CRNLIB_FORCE_INLINE void write_le64(void* p, uint64 x) { *static_cast(p) = swap_le64_to_native(x); } + + CRNLIB_FORCE_INLINE uint32 read_be32(const void* p) { return swap_be32_to_native(*static_cast(p)); } + CRNLIB_FORCE_INLINE void write_be32(void* p, uint32 x) { *static_cast(p) = swap_be32_to_native(x); } + CRNLIB_FORCE_INLINE uint64 read_be64(const void* p) { return swap_be64_to_native(*static_cast(p)); } + CRNLIB_FORCE_INLINE void write_be64(void* p, uint64 x) { *static_cast(p) = swap_be64_to_native(x); } + + inline void endian_swap_mem16(uint16* p, uint n) { while (n--) { *p = swap16(*p); ++p; } } + inline void endian_swap_mem32(uint32* p, uint n) { while (n--) { *p = swap32(*p); ++p; } } + inline void endian_swap_mem64(uint64* p, uint n) { while (n--) { *p = swap64(*p); ++p; } } + + inline void endian_swap_mem(void* p, uint size_in_bytes, uint type_size) + { + switch (type_size) + { + case sizeof(uint16): endian_swap_mem16(static_cast(p), size_in_bytes / type_size); break; + case sizeof(uint32): endian_swap_mem32(static_cast(p), size_in_bytes / type_size); break; + case sizeof(uint64): endian_swap_mem64(static_cast(p), size_in_bytes / type_size); break; + } + } inline void fast_memset(void* pDst, int val, size_t size) { diff --git a/crnlib/crn_vec_interval.h b/crnlib/crn_vec_interval.h index 4adec59..8dc452e 100644 --- a/crnlib/crn_vec_interval.h +++ b/crnlib/crn_vec_interval.h @@ -18,7 +18,7 @@ namespace crnlib inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); } inline const T& operator[] (uint i) const { CRNLIB_ASSERT(i < 2); return m_bounds[i]; } - inline T& operator[] (uint i) { CRNLIB_ASSERT(i < 2); return m_bounds[i]; } + inline T& operator[] (uint i) { CRNLIB_ASSERT(i < 2); return m_bounds[i]; } private: T m_bounds[2]; diff --git a/crnlib/crn_vector.h b/crnlib/crn_vector.h index 9deca99..5f56f94 100644 --- a/crnlib/crn_vector.h +++ b/crnlib/crn_vector.h @@ -138,6 +138,7 @@ namespace crnlib inline const T* get_ptr() const { return m_p; } inline T* get_ptr() { return m_p; } + // clear() sets the container to empty, then frees the allocated block. inline void clear() { if (m_p) @@ -149,7 +150,7 @@ namespace crnlib m_capacity = 0; } } - + inline void clear_no_destruction() { if (m_p) @@ -181,6 +182,7 @@ namespace crnlib return increase_capacity(new_capacity, true, true); } + // resize(0) sets the container to empty, but does not free the allocated block. inline void resize(uint new_size, bool grow_hint = false) { if (m_size != new_size) @@ -222,6 +224,16 @@ namespace crnlib return true; } + // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future). + // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494 + inline void reset() + { + if (m_size >= (m_capacity >> 1)) + resize(0); + else + clear(); + } + inline T* enlarge(uint i) { uint cur_size = m_size; @@ -364,17 +376,15 @@ namespace crnlib const T* pSrc = m_p + start + n; - if ((CRNLIB_IS_BITWISE_COPYABLE(T)) && (!CRNLIB_IS_BITWISE_MOVABLE(T))) + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) { - // Type is bitwise copyable, so there's no need to destruct the overwritten objects. - // Copy "down" the objects to preserve, filling in the empty slots. - memmove(pDst, pSrc, num_to_move * sizeof(T)); - } - else if (CRNLIB_IS_BITWISE_MOVABLE(T)) - { - // Type is bitwise movable, which means we can move them around but they still may need to be destructed. - // First destroy the erased objects. - scalar_type::destruct_array(pDst, n); + // This test is overly cautious. + if ((!CRNLIB_IS_BITWISE_COPYABLE(T)) || (CRNLIB_HAS_DESTRUCTOR(T))) + { + // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed. + // First destroy the erased objects. + scalar_type::destruct_array(pDst, n); + } // Copy "down" the objects to preserve, filling in the empty slots. memmove(pDst, pSrc, num_to_move * sizeof(T)); @@ -601,6 +611,7 @@ namespace crnlib } } + // Caller assumes ownership of the heap block associated with the container. Container is cleared. inline void *assume_ownership() { T* p = m_p; @@ -610,6 +621,45 @@ namespace crnlib return p; } + // Caller is granting ownership of the indicated heap block. + // Block must have size constructed elements, and have enough room for capacity elements. + inline bool grant_ownership(T* p, uint size, uint capacity) + { + // To to prevent the caller from obviously shooting themselves in the foot. + if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) + { + // Can grant ownership of a block inside the container itself! + CRNLIB_ASSERT(0); + return false; + } + + if (size > capacity) + { + CRNLIB_ASSERT(0); + return false; + } + + if (!p) + { + if (capacity) + { + CRNLIB_ASSERT(0); + return false; + } + } + else if (!capacity) + { + CRNLIB_ASSERT(0); + return false; + } + + clear(); + m_p = p; + m_size = size; + m_capacity = capacity; + return true; + } + private: T* m_p; uint m_size; @@ -638,10 +688,12 @@ namespace crnlib { return reinterpret_cast(this)->increase_capacity( min_new_capacity, grow_hint, sizeof(T), - (CRNLIB_IS_BITWISE_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); + (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); } }; + typedef crnlib::vector uint8_vec; + template struct bitwise_movable< vector > { enum { cFlag = true }; }; extern void vector_test(); diff --git a/crnlib/crn_vector2d.h b/crnlib/crn_vector2d.h new file mode 100644 index 0000000..dcfcf2a --- /dev/null +++ b/crnlib/crn_vector2d.h @@ -0,0 +1,155 @@ +// File: crn_vector2d.h +#pragma once + +namespace crnlib +{ + template + class vector2D + { + public: + typedef crnlib::vector vector_type; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector2D(uint width = 0, uint height = 0, const T& def = T()) : + m_width(width), + m_height(height), + m_vec(width * height), + m_def(def) + { + } + + inline vector2D(const vector2D& other) : + m_width(other.m_width), + m_height(other.m_height), + m_vec(other.m_vec), + m_def(other.m_def) + { + } + + inline vector2D& operator= (const vector2D& rhs) + { + if (this == &rhs) + return *this; + + m_width = rhs.m_width; + m_height = rhs.m_height; + m_vec = rhs.m_vec; + + return *this; + } + + bool try_resize(uint width, uint height, bool preserve = true) + { + if ((width == m_width) && (height == m_height)) + return true; + + vector_type new_vec; + if (!new_vec.try_resize(width * height)) + return false; + + if (preserve) + { + const uint nx = math::minimum(width, m_width); + const uint ny = math::minimum(height, m_height); + + for (uint y = 0; y < ny; y++) + { + const T* pSrc = &m_vec[y * m_width]; + T* pDst = &new_vec[y * width]; + if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) + memcpy(pDst, pSrc, nx * sizeof(T)); + else + { + for (uint x = 0; x < nx; x++) + *pDst++ = *pSrc++; + } + } + } + + m_width = width; + m_height = height; + m_vec.swap(new_vec); + + return true; + } + + void resize(uint width, uint height, bool preserve = true) + { + if (!try_resize(width, height, preserve)) + { + CRNLIB_FAIL("vector2D::resize: Out of memory"); + } + } + + inline void clear() + { + m_vec.clear(); + m_width = 0; + m_height = 0; + } + + inline uint width() const { return m_width; } + inline uint height() const { return m_height; } + inline uint size() const { return m_vec.size(); } + + inline uint size_in_bytes() const { return m_vec.size() * sizeof(T); } + + const vector_type& get_vec() const { return m_vec; } + vector_type& get_vec() { return m_vec; } + + inline const T* get_ptr() const { return m_vec.get_ptr(); } + inline T* get_ptr() { return m_vec.get_ptr(); } + + inline const T& operator[] (uint i) const { return m_vec[i]; } + inline T& operator[] (uint i) { return m_vec[i]; } + + inline const T& operator() (uint x, uint y) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_vec[x + y * m_width]; + } + + inline T& operator() (uint x, uint y) + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_vec[x + y * m_width]; + } + + inline const T& at (uint x, uint y) const + { + if ((x >= m_width) || (y >= m_height)) + return m_def; + return m_vec[x + y * m_width]; + } + + inline T& at (uint x, uint y) + { + if ((x >= m_width) || (y >= m_height)) + return m_def; + return m_vec[x + y * m_width]; + } + + inline void swap(vector2D& other) + { + m_vec.swap(other.m_vec); + anvil::swap(m_width, other.m_width); + anvil::swap(m_height, other.m_height); + } + + inline void set_all(const T& x) + { + m_vec.set_all(x); + } + + private: + vector_type m_vec; + uint m_width; + uint m_height; + T m_def; + }; + +} // namespace anvil diff --git a/crnlib/crnlib.2008.vcproj b/crnlib/crnlib.2008.vcproj index 5464c1a..2c5291e 100644 --- a/crnlib/crnlib.2008.vcproj +++ b/crnlib/crnlib.2008.vcproj @@ -771,6 +771,10 @@ RelativePath=".\crn_vector.h" > + + - - - - - - - - @@ -807,6 +795,150 @@ RelativePath=".\crn_image_utils.h" > + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -971,6 +1103,14 @@ RelativePath=".\crn_dxt_hc_common.h" > + + + + @@ -987,6 +1127,14 @@ RelativePath=".\crn_lzma_codec.h" > + + + + @@ -1011,6 +1159,14 @@ RelativePath=".\crn_qdxt5.h" > + + + + @@ -1111,6 +1267,10 @@ RelativePath=".\crn_packed_uint.h" > + + @@ -1194,13 +1354,6 @@ - - - + + + + + + + + + + + + + + diff --git a/crnlib/crnlib.cbp b/crnlib/crnlib.cbp index cc62daf..b18ac9e 100644 --- a/crnlib/crnlib.cbp +++ b/crnlib/crnlib.cbp @@ -70,8 +70,6 @@ - - @@ -92,6 +90,8 @@ + + @@ -108,6 +108,12 @@ + + + + + + @@ -115,6 +121,10 @@ + + + + @@ -135,6 +145,8 @@ + + @@ -173,6 +185,7 @@ + diff --git a/crnlib/crnlib.cpp b/crnlib/crnlib.cpp index b478b79..ac485d0 100644 --- a/crnlib/crnlib.cpp +++ b/crnlib/crnlib.cpp @@ -7,10 +7,13 @@ #include "crn_dynamic_stream.h" #include "crn_buffer_stream.h" #include "crn_ryg_dxt.hpp" +#include "crn_etc.h" #define CRND_HEADER_FILE_ONLY #include "../inc/crn_decomp.h" +#include "crn_rg_etc1.h" + namespace crnlib { static void* realloc_func(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data) @@ -31,13 +34,17 @@ namespace crnlib crnlib_global_initializer() { crn_threading_init(); - - ryg_dxt::sInitDXT(); - + crnlib_enable_fail_exceptions(true); // Redirect crn_decomp.h's memory allocations into crnlib, which may be further redirected by the outside caller. crnd::crnd_set_memory_callbacks(realloc_func, msize_func, NULL); + + ryg_dxt::sInitDXT(); + + pack_etc1_block_init(); + + rg_etc1::pack_etc1_block_init(); } }; @@ -194,8 +201,8 @@ void *crn_compress(const crn_comp_params &comp_params, const crn_mipmap_params & void *crn_decompress_crn_to_dds(const void *pCRN_file_data, crn_uint32 &file_size) { - dds_texture tex; - if (!tex.load_crn_from_memory("from_memory.crn", pCRN_file_data, file_size)) + mipmapped_texture tex; + if (!tex.read_crn_from_memory(pCRN_file_data, file_size, "from_memory.crn")) { file_size = 0; return NULL; @@ -218,7 +225,7 @@ bool crn_decompress_dds_to_images(const void *pDDS_file_data, crn_uint32 dds_fil { memset(&tex_desc, 0, sizeof(tex_desc)); - dds_texture tex; + mipmapped_texture tex; buffer_stream in_stream(pDDS_file_data, dds_file_size); data_stream_serializer in_serializer(in_stream); if (!tex.read_dds(in_serializer)) @@ -295,7 +302,7 @@ namespace crnlib { if (m_image.is_valid()) { - m_image.set_block_pixels(0, 0, reinterpret_cast(pPixels), m_pack_params, m_dxt1_optimizer, m_dxt5_optimizer); + m_image.set_block_pixels(0, 0, reinterpret_cast(pPixels), m_pack_params, m_set_block_pixels_context); memcpy(pDst_block, &m_image.get_element(0, 0, 0), m_image.get_bytes_per_block()); } } @@ -304,8 +311,7 @@ namespace crnlib dxt_image m_image; crn_comp_params m_comp_params; dxt_image::pack_params m_pack_params; - dxt1_endpoint_optimizer m_dxt1_optimizer; - dxt5_endpoint_optimizer m_dxt5_optimizer; + dxt_image::set_block_pixels_context m_set_block_pixels_context; }; } @@ -330,3 +336,124 @@ void crn_free_block_compressor(crn_block_compressor_context_t pContext) { crnlib_delete(static_cast(pContext)); } + +bool crn_decompress_block(const void *pSrc_block, crn_uint32 *pDst_pixels_u32, crn_format crn_fmt) +{ + color_quad_u8* pDst_pixels = reinterpret_cast(pDst_pixels_u32); + + switch (crn_get_fundamental_dxt_format(crn_fmt)) + { + case cCRNFmtETC1: + { + const etc1_block& block = *reinterpret_cast(pSrc_block); + unpack_etc1(block, pDst_pixels, false); + break; + } + case cCRNFmtDXT1: + { + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block); + + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + + pDst_pixels[i] = colors[s]; + } + + break; + } + + case cCRNFmtDXT3: + { + const dxt3_block* pDXT3_block = reinterpret_cast(pSrc_block); + + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + const uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); + + pDst_pixels[i] = colors[s]; + pDst_pixels[i].a = static_cast(a); + } + + break; + } + + case cCRNFmtDXT5: + { + const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); + + const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s = pDXT1_block->get_selector(i & 3, i >> 2); + const uint a = pDXT5_block->get_selector(i & 3, i >> 2); + + pDst_pixels[i] = colors[s]; + pDst_pixels[i].a = static_cast(values[a]); + } + } + + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + { + const dxt5_block* pDXT5_block0 = reinterpret_cast(pSrc_block); + const dxt5_block* pDXT5_block1 = reinterpret_cast(pSrc_block) + 1; + + uint values0[cDXT5SelectorValues]; + dxt5_block::get_block_values(values0, pDXT5_block0->get_low_alpha(), pDXT5_block0->get_high_alpha()); + + uint values1[cDXT5SelectorValues]; + dxt5_block::get_block_values(values1, pDXT5_block1->get_low_alpha(), pDXT5_block1->get_high_alpha()); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s0 = pDXT5_block0->get_selector(i & 3, i >> 2); + const uint s1 = pDXT5_block1->get_selector(i & 3, i >> 2); + + if (crn_fmt == cCRNFmtDXN_XY) + pDst_pixels[i].set_noclamp_rgba(values0[s0], values1[s1], 255, 255); + else + pDst_pixels[i].set_noclamp_rgba(values1[s1], values0[s0], 255, 255); + } + + break; + } + + case cCRNFmtDXT5A: + { + const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); + + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const uint s = pDXT5_block->get_selector(i & 3, i >> 2); + + pDst_pixels[i].set_noclamp_rgba(255, 255, 255, values[s]); + } + + break; + } + default: + { + return false; + } + } + + return true; +} \ No newline at end of file diff --git a/crnlib/crnlib_linux.cbp b/crnlib/crnlib_linux.cbp index 963a9b0..712dc1d 100644 --- a/crnlib/crnlib_linux.cbp +++ b/crnlib/crnlib_linux.cbp @@ -16,7 +16,6 @@ - @@ -27,22 +26,24 @@ + + + + @@ -71,8 +72,6 @@ - - @@ -93,6 +92,8 @@ + + @@ -109,6 +110,12 @@ + + + + + + @@ -116,6 +123,10 @@ + + + + @@ -136,6 +147,8 @@ + + @@ -174,6 +187,7 @@ + @@ -209,6 +223,9 @@ + + + diff --git a/crnlib/lzma_LzFindMt.cpp b/crnlib/lzma_LzFindMt.cpp index ce72e00..d1451a0 100644 --- a/crnlib/lzma_LzFindMt.cpp +++ b/crnlib/lzma_LzFindMt.cpp @@ -60,7 +60,7 @@ void MtSync_StopWriting(CMtSync *p) p->csWasEntered = False; } Semaphore_Release1(&p->freeSemaphore); - + Event_Wait(&p->wasStopped); while (myNumBlocks++ != p->numProcessedBlocks) @@ -110,12 +110,12 @@ static SRes MtSync_Create2(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); - + RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); p->needStart = True; - + RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); p->wasCreated = True; return SZ_OK; @@ -387,7 +387,7 @@ void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) CriticalSection_Enter(&sync->cs); sync->csWasEntered = True; } - + BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) @@ -456,7 +456,7 @@ void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc) static unsigned MY_STD_CALL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } static unsigned MY_STD_CALL BtThreadFunc2(void *p) { - Byte allocaDummy[0x180]; + Byte allocaDummy[0x180]; (void)allocaDummy; int i = 0; for (i = 0; i < 16; i++) allocaDummy[i] = (Byte)i; @@ -563,7 +563,7 @@ UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) const Byte *cur = p->pointerToCurPos; UInt32 lzPos = p->lzPos; MT_HASH2_CALC - + curMatch2 = hash[hash2Value]; hash[hash2Value] = lzPos; @@ -586,7 +586,7 @@ UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) curMatch2 = hash[ hash2Value]; curMatch3 = hash[kFix3HashSize + hash3Value]; - + hash[ hash2Value] = hash[kFix3HashSize + hash3Value] = lzPos; @@ -618,11 +618,11 @@ UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) const Byte *cur = p->pointerToCurPos; UInt32 lzPos = p->lzPos; MT_HASH4_CALC - + curMatch2 = hash[ hash2Value]; curMatch3 = hash[kFix3HashSize + hash3Value]; curMatch4 = hash[kFix4HashSize + hash4Value]; - + hash[ hash2Value] = hash[kFix3HashSize + hash3Value] = hash[kFix4HashSize + hash4Value] = diff --git a/crnlib/lzma_LzmaEnc.cpp b/crnlib/lzma_LzmaEnc.cpp index 2821a3b..81a0a35 100644 --- a/crnlib/lzma_LzmaEnc.cpp +++ b/crnlib/lzma_LzmaEnc.cpp @@ -2176,7 +2176,7 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i SRes res = SZ_OK; #ifdef COMPRESS_MF_MT - Byte allocaDummy[0x300]; + Byte allocaDummy[0x300]; (void)allocaDummy; int i = 0; for (i = 0; i < 16; i++) allocaDummy[i] = (Byte)i; diff --git a/crunch/corpus_gen.cpp b/crunch/corpus_gen.cpp new file mode 100644 index 0000000..792e57d --- /dev/null +++ b/crunch/corpus_gen.cpp @@ -0,0 +1,357 @@ +// File: corpus_gen.cpp - Block compression corpus generator. +// See Copyright Notice and license at the end of inc/crnlib.h +// +// Example command line: +// -gentest [-deep] [-blockpercentage .035] [-width 4096] [-height 4096] -in c:\temp\*.jpg [-in c:\temp\*.jpeg] [-in @blah.txt] +#include "crn_core.h" +#include "corpus_gen.h" +#include "crn_console.h" + +#include "crn_find_files.h" +#include "crn_file_utils.h" +#include "crn_command_line_params.h" + +#include "crn_dxt.h" +#include "crn_cfile_stream.h" +#include "crn_texture_conversion.h" +#include "crn_radix_sort.h" + +#define CRND_HEADER_FILE_ONLY +#include "crn_decomp.h" + +namespace crnlib +{ + struct block + { + color_quad_u8 m_c[4*4]; + + inline operator size_t() const { return fast_hash(this, sizeof(*this)); } + + inline bool operator== (const block& rhs) const + { + return memcmp(this, &rhs, sizeof(*this)) == 0; + } + }; + + typedef crnlib::hash_map block_hash_map; + + corpus_gen::corpus_gen() + { + + + } + + void corpus_gen::sort_blocks(image_u8& img) + { + const uint num_blocks_x = img.get_width() / 4; + const uint num_blocks_y = img.get_height() / 4; + const uint total_blocks = num_blocks_x * num_blocks_y; + + console::printf("Sorting %u blocks...", total_blocks); + + crnlib::vector block_std_dev(total_blocks); + + for (uint by = 0; by < num_blocks_y; by++) + { + for (uint bx = 0; bx < num_blocks_x; bx++) + { + color_quad_u8 c[4 * 4]; + + for (uint y = 0; y < 4; y++) + for (uint x = 0; x < 4; x++) + c[x+y*4] = img(bx*4+x, by*4+y); + + double std_dev = 0.0f; + for (uint i = 0; i < 3; i++) + std_dev += image_utils::compute_std_dev(16, c, i, 1); + + block_std_dev[bx + by * num_blocks_x] = (float)std_dev; + } + } + + crnlib::vector block_indices0(total_blocks); + crnlib::vector block_indices1(total_blocks); + + const uint* pIndices = indirect_radix_sort(total_blocks, &block_indices0[0], &block_indices1[0], &block_std_dev[0], 0, sizeof(float), true); + + image_u8 new_img(img.get_width(), img.get_height()); + + uint dst_block_index = 0; + + //float prev_std_dev = -999; + for (uint i = 0; i < total_blocks; i++) + { + uint src_block_index = pIndices[i]; + + //float std_dev = block_std_dev[src_block_index]; + //crnlib_ASSERT(std_dev >= prev_std_dev); + //prev_std_dev = std_dev; + + uint src_block_x = src_block_index % num_blocks_x; + uint src_block_y = src_block_index / num_blocks_x; + + uint dst_block_x = dst_block_index % num_blocks_x; + uint dst_block_y = dst_block_index / num_blocks_x; + + new_img.unclipped_blit(src_block_x * 4, src_block_y * 4, 4, 4, dst_block_x * 4, dst_block_y * 4, img); + + dst_block_index++; + } + +#if 0 + //new_img.swap(img); +#else + crnlib::vector remaining_blocks(num_blocks_x); + + console::printf("Arranging %u blocks...", total_blocks); + + for (uint by = 0; by < num_blocks_y; by++) + { + console::printf("%u of %u", by, num_blocks_y); + + remaining_blocks.resize(num_blocks_x); + for (uint i = 0; i < num_blocks_x; i++) + remaining_blocks[i] = i; + + color_quad_u8 match_block[16]; + utils::zero_object(match_block); + for (uint bx = 0; bx < num_blocks_x; bx++) + { + uint best_index = 0; + + uint64 best_error = cUINT64_MAX; + + for (uint i = 0; i < remaining_blocks.size(); i++) + { + uint src_block_index = remaining_blocks[i]; + + uint64 error = 0; + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + const color_quad_u8& c = new_img(src_block_index*4+x, by*4+y); + error += color::elucidian_distance(c, match_block[x+y*4], false); + } + } + + if (error < best_error) + { + best_error = error; + best_index = i; + } + } + + uint src_block_index = remaining_blocks[best_index]; + + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + const color_quad_u8& c = new_img(src_block_index*4+x, by*4+y); + match_block[x+y*4] = c; + + img(bx * 4 + x, by * 4 + y) = c; + } + } + + remaining_blocks.erase_unordered(best_index); + } + } +#endif + } + + bool corpus_gen::generate(const char* pCmd_line) + { + static const command_line_params::param_desc param_desc_array[] = + { + { "corpus_gen", 0, false }, + { "in", 1, true }, + { "deep", 0, false }, + { "blockpercentage", 1, false }, + { "width", 1, false }, + { "height", 1, false }, + { "alpha", 0, false }, + }; + + command_line_params params; + if (!params.parse(pCmd_line, CRNLIB_ARRAY_SIZE(param_desc_array), param_desc_array, true)) + return false; + + if (!params.has_key("in")) + { + console::error("Must specify one or more input files using the /in option!"); + return false; + } + + uint num_dst_blocks_x = params.get_value_as_int("width", 0, 4096, 128, 4096); + num_dst_blocks_x = (num_dst_blocks_x + 3) / 4; + uint num_dst_blocks_y = params.get_value_as_int("height", 0, 4096, 128, 4096); + num_dst_blocks_y = (num_dst_blocks_y + 3) / 4; + + const uint total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y; + image_u8 dst_img(num_dst_blocks_x * 4, num_dst_blocks_y * 4); + uint next_dst_block = 0; + uint total_dst_images = 0; + + random rm; + + block_hash_map block_hash; + block_hash.reserve(total_dst_blocks); + + uint total_images_loaded = 0; + uint total_blocks_written = 0; + + command_line_params::param_map_const_iterator it = params.begin(); + for ( ; it != params.end(); ++it) + { + if (it->first != "in") + continue; + if (it->second.m_values.empty()) + { + console::error("Must follow /in parameter with a filename!\n"); + return false; + } + + for (uint in_value_index = 0; in_value_index < it->second.m_values.size(); in_value_index++) + { + const dynamic_string& filespec = it->second.m_values[in_value_index]; + + find_files file_finder; + if (!file_finder.find(filespec.get_ptr(), find_files::cFlagAllowFiles | (params.has_key("deep") ? find_files::cFlagRecursive : 0))) + { + console::warning("Failed finding files: %s", filespec.get_ptr()); + continue; + } + + if (file_finder.get_files().empty()) + { + console::warning("No files found: %s", filespec.get_ptr()); + return false; + } + + const find_files::file_desc_vec& files = file_finder.get_files(); + + for (uint file_index = 0; file_index < files.size(); file_index++) + { + const find_files::file_desc& file_desc = files[file_index]; + + console::printf("Loading image: %s", file_desc.m_fullname.get_ptr()); + + image_u8 img; + if (!image_utils::read_from_file(img, file_desc.m_fullname.get_ptr(), 0)) + { + console::warning("Failed loading image file: %s", file_desc.m_fullname.get_ptr()); + continue; + } + + if (!params.has_key("alpha")) + { + for (uint y = 0; y < img.get_height(); y++) + for (uint x = 0; x < img.get_width(); x++) + img(x, y).a = 255; + } + + total_images_loaded++; + + uint width = img.get_width(); + uint height = img.get_height(); + + uint num_blocks_x = (width + 3) / 4; + uint num_blocks_y = (height + 3) / 4; + uint total_blocks = num_blocks_x * num_blocks_y; + + float percentage = params.get_value_as_float("blockpercentage", 0, .1f, .001f, 1.0f); + uint total_rand_blocks = math::maximum(1U, (uint)(total_blocks * percentage)); + + crnlib::vector remaining_blocks(total_blocks); + for (uint i = 0; i < total_blocks; i++) + remaining_blocks[i] = i; + + uint num_blocks_remaining = total_rand_blocks; + while (num_blocks_remaining) + { + if (remaining_blocks.empty()) + break; + + uint rand_block_index = rm.irand(0, remaining_blocks.size()); + uint block_index = remaining_blocks[rand_block_index]; + remaining_blocks.erase_unordered(rand_block_index); + + uint block_y = block_index / num_blocks_x; + uint block_x = block_index % num_blocks_x; + + block b; + + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + b.m_c[x+y*4] = img.get_clamped(block_x*4+x, block_y*4+y); + } + } + + if (!block_hash.insert(b).second) + continue; + if (block_hash.size() == total_dst_blocks) + { + block_hash.clear(); + block_hash.reserve(total_dst_blocks); + } + + uint dst_block_x = next_dst_block % num_dst_blocks_x; + uint dst_block_y = next_dst_block / num_dst_blocks_x; + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + dst_img(dst_block_x * 4 + x, dst_block_y * 4 + y) = b.m_c[x + y*4]; + } + } + + next_dst_block++; + if (total_dst_blocks == next_dst_block) + { + sort_blocks(dst_img); + + dynamic_string dst_filename(cVarArg, "test_%u.tga", total_dst_images); + console::printf("Writing image: %s", dst_filename.get_ptr()); + image_utils::write_to_file(dst_filename.get_ptr(), dst_img, 0); + + dst_img.set_all(color_quad_u8::make_black()); + + next_dst_block = 0; + + total_dst_images++; + } + + total_blocks_written++; + + num_blocks_remaining--; + } + + } // file_index + + } // in_value_index + + } + + if (next_dst_block) + { + sort_blocks(dst_img); + + dynamic_string dst_filename(cVarArg, "test_%u.tga", total_dst_images); + console::printf("Writing image: %s", dst_filename.get_ptr()); + image_utils::write_to_file(dst_filename.get_ptr(), dst_img, 0); + + next_dst_block = 0; + + total_dst_images++; + } + + console::printf("Found %u input images, %u output images, %u total blocks", total_images_loaded, total_dst_images, total_blocks_written); + + return true; + } + +} // namespace crnlib diff --git a/crunch/corpus_gen.h b/crunch/corpus_gen.h new file mode 100644 index 0000000..dbbec5d --- /dev/null +++ b/crunch/corpus_gen.h @@ -0,0 +1,20 @@ +// File: corpus_gen.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_command_line_params.h" +#include "crn_image.h" + +namespace crnlib +{ + class corpus_gen + { + public: + corpus_gen(); + + bool generate(const char* pCmd_line); + + private: + void sort_blocks(image_u8& img); + }; + +} // namespace crnlib diff --git a/crunch/corpus_test.cpp b/crunch/corpus_test.cpp new file mode 100644 index 0000000..94bc50f --- /dev/null +++ b/crunch/corpus_test.cpp @@ -0,0 +1,404 @@ +// File: corpus_test.cpp +#include "crn_core.h" +#include "corpus_test.h" +#include "crn_find_files.h" +#include "crn_console.h" +#include "crn_image_utils.h" +#include "crn_hash.h" +#include "crn_hash_map.h" +#include "crn_radix_sort.h" +#include "crn_mipmapped_texture.h" + +namespace crnlib +{ + corpus_tester::corpus_tester() + { + m_bad_block_img.resize(256, 256); + m_next_bad_block_index = 0; + m_total_bad_block_files = 0; + } + + void corpus_tester::print_comparative_metric_stats(const command_line_params& cmd_line_params, const crnlib::vector& stats1, const crnlib::vector& stats2, uint num_blocks_x, uint num_blocks_y) + { + num_blocks_y; + + crnlib::vector better_blocks; + crnlib::vector equal_blocks; + crnlib::vector worse_blocks; + crnlib::vector delta_psnr; + + for (uint i = 0; i < stats1.size(); i++) + { + //uint bx = i % num_blocks_x; + //uint by = i / num_blocks_x; + + const image_utils::error_metrics& em1 = stats1[i]; + const image_utils::error_metrics& em2 = stats2[i]; + + if (em1.mPeakSNR < em2.mPeakSNR) + { + worse_blocks.push_back(i); + delta_psnr.push_back((float)(em2.mPeakSNR - em1.mPeakSNR)); + } + else if (fabs(em1.mPeakSNR - em2.mPeakSNR) < .001f) + equal_blocks.push_back(i); + else + better_blocks.push_back(i); + } + + console::printf("Num worse blocks: %u, %3.3f%%", worse_blocks.size(), worse_blocks.size() * 100.0f / stats1.size()); + console::printf("Num equal blocks: %u, %3.3f%%", equal_blocks.size(), equal_blocks.size() * 100.0f / stats1.size()); + console::printf("Num better blocks: %u, %3.3f%%", better_blocks.size(), better_blocks.size() * 100.0f / stats1.size()); + console::printf("Num equal+better blocks: %u, %3.3f%%", equal_blocks.size()+better_blocks.size(), (equal_blocks.size()+better_blocks.size()) * 100.0f / stats1.size()); + + if (!cmd_line_params.has_key("nobadblocks")) + { + crnlib::vector indices[2]; + indices[0].resize(worse_blocks.size()); + indices[1].resize(worse_blocks.size()); + + uint* pSorted_indices = NULL; + if (worse_blocks.size()) + { + pSorted_indices = indirect_radix_sort(worse_blocks.size(), &indices[0][0], &indices[1][0], &delta_psnr[0], 0, sizeof(float), true); + + console::printf("List of worse blocks sorted by delta PSNR:"); + for (uint i = 0; i < worse_blocks.size(); i++) + { + uint block_index = worse_blocks[pSorted_indices[i]]; + uint bx = block_index % num_blocks_x; + uint by = block_index / num_blocks_x; + + console::printf("%u. [%u,%u] %3.3f %3.3f %3.3f", + i, + bx, by, + stats1[block_index].mPeakSNR, + stats2[block_index].mPeakSNR, + stats2[block_index].mPeakSNR - stats1[block_index].mPeakSNR); + } + } + } + } + + void corpus_tester::print_metric_stats(const crnlib::vector& stats, uint num_blocks_x, uint num_blocks_y) + { + num_blocks_y; + + image_utils::error_metrics best_metrics; + image_utils::error_metrics worst_metrics; + worst_metrics.mPeakSNR = 1e+6f; + + vec2I best_loc; + vec2I worst_loc; + utils::zero_object(best_loc); + utils::zero_object(worst_loc); + + double psnr_total = 0.0f; + double psnr2_total = 0.0f; + uint num_non_inf = 0; + uint num_inf = 0; + + for (uint i = 0; i < stats.size(); i++) + { + uint bx = i % num_blocks_x; + uint by = i / num_blocks_x; + + const image_utils::error_metrics& em = stats[i]; + + if ((em.mPeakSNR < 200.0f) && (em > best_metrics)) { best_metrics = em; best_loc.set(bx, by); } + if (em < worst_metrics) { worst_metrics = em; worst_loc.set(bx, by); } + + if (em.mPeakSNR < 200.0f) + { + psnr_total += em.mPeakSNR; + psnr2_total += em.mPeakSNR*em.mPeakSNR; + num_non_inf++; + } + else + { + num_inf++; + } + } + + console::printf("Number of infinite PSNR blocks: %u", num_inf); + console::printf("Number of non-infinite PSNR blocks: %u", num_non_inf); + if (num_non_inf) + { + psnr_total /= num_non_inf; + psnr2_total /= num_non_inf; + + double psnr_std_dev = sqrt(psnr2_total - psnr_total * psnr_total); + + console::printf("Average Non-Inf PSNR: %3.3f, Std dev: %3.3f", psnr_total, psnr_std_dev); + console::printf("Worst PSNR: %3.3f, Block Location: %i,%i", worst_metrics.mPeakSNR, worst_loc[0], worst_loc[1]); + console::printf("Best Non-Inf PSNR: %3.3f, Block Location: %i,%i", best_metrics.mPeakSNR, best_loc[0], best_loc[1]); + } + } + + void corpus_tester::flush_bad_blocks() + { + if (!m_next_bad_block_index) + return; + + dynamic_string filename(cVarArg, "badblocks_%u.tga", m_total_bad_block_files); + console::printf("Writing bad block image: %s", filename.get_ptr()); + image_utils::write_to_file(filename.get_ptr(), m_bad_block_img, image_utils::cWriteFlagIgnoreAlpha); + + m_bad_block_img.set_all(color_quad_u8::make_black()); + + m_total_bad_block_files++; + + m_next_bad_block_index = 0; + } + + void corpus_tester::add_bad_block(image_u8& block) + { + uint num_blocks_x = m_bad_block_img.get_block_width(4); + uint num_blocks_y = m_bad_block_img.get_block_height(4); + uint total_blocks = num_blocks_x * num_blocks_y; + + m_bad_block_img.blit((m_next_bad_block_index % num_blocks_x) * 4, (m_next_bad_block_index / num_blocks_x) * 4, block); + m_next_bad_block_index++; + + if (m_next_bad_block_index == total_blocks) + flush_bad_blocks(); + } + + static bool progress_callback(uint percentage_complete, void* pUser_data_ptr) + { + static int s_prev_percentage_complete = -1; + pUser_data_ptr; + if (s_prev_percentage_complete != static_cast(percentage_complete)) + { + console::progress("%u%%", percentage_complete); + s_prev_percentage_complete = percentage_complete; + } + return true; + } + + bool corpus_tester::test(const char* pCmd_line) + { + console::printf("Command line:\n\"%s\"", pCmd_line); + + static const command_line_params::param_desc param_desc_array[] = + { + { "corpus_test", 0, false }, + { "in", 1, true }, + { "deep", 0, false }, + { "alpha", 0, false }, + { "nomips", 0, false }, + { "perceptual", 0, false }, + { "endpointcaching", 0, false }, + { "multithreaded", 0, false }, + { "writehybrid", 0, false }, + { "nobadblocks", 0, false }, + }; + + command_line_params cmd_line_params; + if (!cmd_line_params.parse(pCmd_line, CRNLIB_ARRAY_SIZE(param_desc_array), param_desc_array, true)) + return false; + + double total_time1 = 0, total_time2 = 0; + + command_line_params::param_map_const_iterator it = cmd_line_params.begin(); + for ( ; it != cmd_line_params.end(); ++it) + { + if (it->first != "in") + continue; + if (it->second.m_values.empty()) + { + console::error("Must follow /in parameter with a filename!\n"); + return false; + } + + for (uint in_value_index = 0; in_value_index < it->second.m_values.size(); in_value_index++) + { + const dynamic_string& filespec = it->second.m_values[in_value_index]; + + find_files file_finder; + if (!file_finder.find(filespec.get_ptr(), find_files::cFlagAllowFiles | (cmd_line_params.has_key("deep") ? find_files::cFlagRecursive : 0))) + { + console::warning("Failed finding files: %s", filespec.get_ptr()); + continue; + } + + if (file_finder.get_files().empty()) + { + console::warning("No files found: %s", filespec.get_ptr()); + return false; + } + + const find_files::file_desc_vec& files = file_finder.get_files(); + + image_u8 o(4, 4), a(4, 4), b(4, 4); + + uint first_channel = 0; + uint num_channels = 3; + bool perceptual = cmd_line_params.get_value_as_bool("perceptual", false); + if (perceptual) + { + first_channel = 0; + num_channels = 0; + } + console::printf("Perceptual mode: %u", perceptual); + + for (uint file_index = 0; file_index < files.size(); file_index++) + { + const find_files::file_desc& file_desc = files[file_index]; + + console::printf("-------- Loading image: %s", file_desc.m_fullname.get_ptr()); + + image_u8 img; + if (!image_utils::read_from_file(img, file_desc.m_fullname.get_ptr(), 0)) + { + console::warning("Failed loading image file: %s", file_desc.m_fullname.get_ptr()); + continue; + } + + if ((!cmd_line_params.has_key("alpha")) && img.is_component_valid(3)) + { + for (uint y = 0; y < img.get_height(); y++) + for (uint x = 0; x < img.get_width(); x++) + img(x, y).a = 255; + + img.set_component_valid(3, false); + } + + mipmapped_texture orig_tex; + orig_tex.assign(crnlib_new(img)); + + if (!cmd_line_params.has_key("nomips")) + { + mipmapped_texture::generate_mipmap_params genmip_params; + genmip_params.m_srgb = true; + + console::printf("Generating mipmaps"); + + if (!orig_tex.generate_mipmaps(genmip_params, false)) + { + console::error("Mipmap generation failed!"); + return false; + } + } + + console::printf("Compress 1"); + + mipmapped_texture tex1(orig_tex); + dxt_image::pack_params convert_params; + convert_params.m_endpoint_caching = cmd_line_params.get_value_as_bool("endpointcaching", 0, false); + convert_params.m_compressor = cCRNDXTCompressorCRN; + convert_params.m_quality = cCRNDXTQualityNormal; + convert_params.m_perceptual = perceptual; + convert_params.m_num_helper_threads = cmd_line_params.get_value_as_bool("multithreaded", 0, true) ? (g_number_of_processors - 1) : 0; + convert_params.m_pProgress_callback = progress_callback; + timer t; + t.start(); + if (!tex1.convert(PIXEL_FMT_ETC1, false, convert_params)) + { + console::error("Texture conversion failed!"); + return false; + } + double time1 = t.get_elapsed_secs(); + total_time1 += time1; + console::printf("Elapsed time: %3.3f", time1); + + console::printf("Compress 2"); + + mipmapped_texture tex2(orig_tex); + convert_params.m_endpoint_caching = false; + convert_params.m_compressor = cCRNDXTCompressorCRN; + convert_params.m_quality = cCRNDXTQualitySuperFast; + t.start(); + if (!tex2.convert(PIXEL_FMT_ETC1, false, convert_params)) + { + console::error("Texture conversion failed!"); + return false; + } + double time2 = t.get_elapsed_secs(); + total_time2 += time2; + console::printf("Elapsed time: %3.3f", time2); + + image_u8 hybrid_img(img.get_width(), img.get_height()); + + for (uint l = 0; l < orig_tex.get_num_levels(); l++) + { + image_u8 orig_img, img1, img2; + + image_u8* pOrig = orig_tex.get_level(0, l)->get_unpacked_image(orig_img, cUnpackFlagUncook | cUnpackFlagUnflip); + image_u8* pImg1 = tex1.get_level(0, l)->get_unpacked_image(img1, cUnpackFlagUncook | cUnpackFlagUnflip); + image_u8* pImg2 = tex2.get_level(0, l)->get_unpacked_image(img2, cUnpackFlagUncook | cUnpackFlagUnflip); + + const uint num_blocks_x = pOrig->get_block_width(4); + const uint num_blocks_y = pOrig->get_block_height(4); + + crnlib::vector metrics[2]; + + for (uint by = 0; by < num_blocks_y; by++) + { + for (uint bx = 0; bx < num_blocks_x; bx++) + { + pOrig->extract_block(o.get_ptr(), bx * 4, by * 4, 4, 4); + pImg1->extract_block(a.get_ptr(), bx * 4, by * 4, 4, 4); + pImg2->extract_block(b.get_ptr(), bx * 4, by * 4, 4, 4); + + image_utils::error_metrics em1; + em1.compute(o, a, first_channel, num_channels); + + image_utils::error_metrics em2; + em2.compute(o, b, first_channel, num_channels); + + metrics[0].push_back(em1); + metrics[1].push_back(em2); + + if (em1.mPeakSNR < em2.mPeakSNR) + { + add_bad_block(o); + + hybrid_img.blit(bx * 4, by * 4, b); + } + else + { + hybrid_img.blit(bx * 4, by * 4, a); + } + } + } + + if (cmd_line_params.has_key("writehybrid")) + image_utils::write_to_file("hybrid.tga", hybrid_img, image_utils::cWriteFlagIgnoreAlpha); + + console::printf("---- Mip level: %u, Total blocks: %ux%u, %u", l, num_blocks_x, num_blocks_y, num_blocks_x * num_blocks_y); + + console::printf("Compressor 1:"); + print_metric_stats(metrics[0], num_blocks_x, num_blocks_y); + + console::printf("Compressor 2:"); + print_metric_stats(metrics[1], num_blocks_x, num_blocks_y); + + console::printf("Compressor 1 vs. 2:"); + print_comparative_metric_stats(cmd_line_params, metrics[0], metrics[1], num_blocks_x, num_blocks_y); + + image_utils::error_metrics em; + + em.compute(*pOrig, *pImg1, 0, perceptual ? 0 : 3); + em.print("Compressor 1: "); + + em.compute(*pOrig, *pImg2, 0, perceptual ? 0 : 3); + em.print("Compressor 2: "); + + em.compute(*pOrig, hybrid_img, 0, perceptual ? 0 : 3); + em.print("Best of Both: "); + } + } + + } // file_index + } + + flush_bad_blocks(); + + console::printf("Total times: %4.3f vs. %4.3f", total_time1, total_time2); + + return true; + } + +} // namespace crnlib + diff --git a/crunch/corpus_test.h b/crunch/corpus_test.h new file mode 100644 index 0000000..8368761 --- /dev/null +++ b/crunch/corpus_test.h @@ -0,0 +1,28 @@ +// File: corpus_test.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_command_line_params.h" +#include "crn_image_utils.h" + +namespace crnlib +{ + class corpus_tester + { + public: + corpus_tester(); + + bool test(const char* pCmd_line); + + private: + void print_comparative_metric_stats(const command_line_params& params, const crnlib::vector& stats1, const crnlib::vector& stats2, uint num_blocks_x, uint num_blocks_y); + void print_metric_stats(const crnlib::vector& stats, uint num_blocks_x, uint num_blocks_y); + + image_u8 m_bad_block_img; + uint m_next_bad_block_index; + uint m_total_bad_block_files; + + void flush_bad_blocks(); + void add_bad_block(image_u8& block); + }; + +} // namespace crnlib diff --git a/crunch/crunch.2008.vcproj b/crunch/crunch.2008.vcproj index 0d8d174..17e702e 100644 --- a/crunch/crunch.2008.vcproj +++ b/crunch/crunch.2008.vcproj @@ -350,6 +350,22 @@ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx" UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" > + + + + + + + + diff --git a/crunch/crunch.cbp b/crunch/crunch.cbp index d0c4f0e..631b65c 100644 --- a/crunch/crunch.cbp +++ b/crunch/crunch.cbp @@ -43,6 +43,10 @@ + + + + diff --git a/crunch/crunch.cpp b/crunch/crunch.cpp index b852434..8d9709a 100644 --- a/crunch/crunch.cpp +++ b/crunch/crunch.cpp @@ -5,6 +5,8 @@ // lifting is actually done by functions in the crnlib::texture_conversion namespace, // which are mostly wrappers over the public crnlib.h functions. // See Copyright Notice and license at the end of inc/crnlib.h +// +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing #include "crn_core.h" #include "crn_console.h" @@ -21,6 +23,9 @@ #define CRND_HEADER_FILE_ONLY #include "crn_decomp.h" +#include "corpus_gen.h" +#include "corpus_test.h" + using namespace crnlib; const int cDefaultCRNQualityLevel = 128; @@ -69,9 +74,10 @@ public: console::printf("crunch [options] -file filename"); console::printf("-file filename - Required input filename, wildcards, multiple -file params OK."); console::printf("-file @list.txt - List of files to convert."); - console::printf("Supported source file formats: dds,crn,tga,bmp,png,jpg/jpeg,psd"); - console::printf("Note: Some file format variants are unsupported, such as progressive JPEG's."); + console::printf("Supported source file formats: dds,ktx,crn,tga,bmp,png,jpg/jpeg,psd"); + console::printf("Note: Some file format variants are unsupported."); console::printf("See the docs for stb_image.c: http://www.nothings.org/stb_image.c"); + console::printf("Progressive JPEG files are supported, see: http://code.google.com/p/jpeg-compressor/"); console::message("\nPath/file related parameters:"); console::printf("-out filename - Output filename"); @@ -82,7 +88,7 @@ public: console::printf("-timestamp - Update only changed files"); console::printf("-forcewrite - Overwrite read-only files"); console::printf("-recreate - Recreate directory structure"); - console::printf("-fileformat [dds,crn,tga,bmp] - Output file format, default=crn or dds"); + console::printf("-fileformat [dds,ktx,crn,tga,bmp,png] - Output file format, default=crn or dds"); console::message("\nModes:"); console::printf("-compare - Compare input and output files (no output files are written)."); @@ -103,7 +109,9 @@ public: console::printf("-imagestats - Print various image qualilty statistics"); console::printf("-mipstats - Print statistics for each mipmap, not just the top mip"); console::printf("-lzmastats - Print size of output file compressed with LZMA codec"); - console::printf("-split - Write faces/mip levels to multiple separate output files"); + console::printf("-split - Write faces/mip levels to multiple separate output PNG files"); + console::printf("-yflip - Always flip texture on Y axis before processing"); + console::printf("-unflip - Unflip texture if read from source file as flipped"); console::message("\nImage rescaling (mutually exclusive options)"); console::printf("-rescale - Rescale image to specified resolution"); @@ -251,6 +259,9 @@ public: { "lzmastats", 0, false }, { "split", 0, false }, { "csvfile", 1, false }, + + { "yflip", 0, false }, + { "unflip", 0, false }, }; crnlib::vector params; @@ -398,6 +409,11 @@ private: { dynamic_string find_name(input_spec); + if ((find_name.get_len()) && (file_utils::does_dir_exist(find_name.get_ptr()))) + { + file_utils::combine_path(find_name, find_name.get_ptr(), "*"); + } + if ((find_name.is_empty()) || (!file_utils::full_path(find_name))) { console::error("Invalid input filename: %s", find_name.get_ptr()); @@ -421,6 +437,7 @@ private: console::error("Failed finding files: %s", find_name.get_ptr()); return false; } + if (file_finder.get_files().empty()) { console::warning("No files found: %s", find_name.get_ptr()); @@ -479,8 +496,12 @@ private: out_file_type = texture_file_types::cFormatBMP; else if (fmt == "dds") out_file_type = texture_file_types::cFormatDDS; + else if (fmt == "ktx") + out_file_type = texture_file_types::cFormatKTX; else if (fmt == "crn") out_file_type = texture_file_types::cFormatCRN; + else if (fmt == "png") + out_file_type = texture_file_types::cFormatPNG; else { console::error("Unsupported output file type: %s", fmt.get_ptr()); @@ -488,19 +509,26 @@ private: } } + // No explicit output format has been specified - try to determine something doable. if (!m_params.has_key("fileformat")) { if (m_params.has_key("split")) { - out_file_type = texture_file_types::cFormatTGA; + out_file_type = texture_file_types::cFormatPNG; } else { texture_file_types::format input_file_type = texture_file_types::determine_file_format(in_filename.get_ptr()); if (input_file_type == texture_file_types::cFormatCRN) { + // Automatically transcode CRN->DXTc and write to DDS files, unless the user specifies either the /fileformat or /split options. out_file_type = texture_file_types::cFormatDDS; } + else if (input_file_type == texture_file_types::cFormatKTX) + { + // Default to converting KTX files to PNG + out_file_type = texture_file_types::cFormatPNG; + } } } @@ -528,15 +556,22 @@ private: { dynamic_string out_dir(m_params.get_value_as_string_or_empty("outdir")); - if (m_params.get_value_as_bool("recreate")) + if (m_params.get_value_as_bool("recreate") && file_desc.m_rel.get_len()) { file_utils::combine_path(out_dir, out_dir.get_ptr(), file_desc.m_rel.get_ptr()); } if (out_dir.get_len()) - out_filename.format("%s\\%s.%s", out_dir.get_ptr(), in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); + { + if (file_utils::is_path_separator(out_dir.back())) + out_filename.format("%s%s.%s", out_dir.get_ptr(), in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); + else + out_filename.format("%s\\%s.%s", out_dir.get_ptr(), in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); + } else + { out_filename.format("%s.%s", in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); + } if (m_params.get_value_as_bool("recreate")) { @@ -621,7 +656,7 @@ private: return true; } - void print_texture_info(const char* pTex_desc, texture_conversion::convert_params& params, dds_texture& tex) + void print_texture_info(const char* pTex_desc, texture_conversion::convert_params& params, mipmapped_texture& tex) { console::info("%s: %ux%u, Levels: %u, Faces: %u, Format: %s", pTex_desc, @@ -642,6 +677,7 @@ private: if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) console::info("Grayscale "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagNormalMap) console::info("NormalMap "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagLumaChroma) console::info("LumaChroma "); + if (tex.is_flipped()) console::info("Flipped "); else console::info("Non-Flipped "); console::info("\n"); console::enable_crlf(); } @@ -817,7 +853,7 @@ private: { const char *pKeyName = m_params.has_key("q") ? "q" : "quality"; - if ((dst_file_format == texture_file_types::cFormatDDS) || (dst_file_format == texture_file_types::cFormatCRN)) + if ((dst_file_format == texture_file_types::cFormatDDS) || (dst_file_format == texture_file_types::cFormatCRN) || (dst_file_format == texture_file_types::cFormatKTX)) { uint32 i = m_params.get_value_as_int(pKeyName, 0, cDefaultCRNQualityLevel, 0, cCRNMaxQualityLevel); @@ -825,7 +861,7 @@ private: } else { - console::error("/quality or /q option is only invalid when writing DDS or CRN files!"); + console::error("/quality or /q option is only invalid when writing DDS, KTX or CRN files!"); return false; } } @@ -937,8 +973,8 @@ private: return cCSFailed; } - dds_texture src_tex; - if (!src_tex.load_from_file(pSrc_filename, src_file_format)) + mipmapped_texture src_tex; + if (!src_tex.read_from_file(pSrc_filename, src_file_format)) { if (src_tex.get_last_error().is_empty()) console::error("Failed reading source file: \"%s\"", pSrc_filename); @@ -982,12 +1018,14 @@ private: compressed_size = cmp_tex_bytes.size(); } } - console::info("Source texture dimensions: %ux%u, Levels: %u, Faces: %u, Format: %s", + console::info("Source texture dimensions: %ux%u, Levels: %u, Faces: %u, Format: %s\nPacked Format: %u, Apparent Type: %s, Flipped: %u, Can Unflip Without Unpacking: %u", src_tex.get_width(), src_tex.get_height(), src_tex.get_num_levels(), src_tex.get_num_faces(), - pixel_format_helpers::get_pixel_format_string(src_tex.get_format())); + pixel_format_helpers::get_pixel_format_string(src_tex.get_format()), + src_tex.is_packed(), get_texture_type_desc(src_tex.determine_texture_type()), + src_tex.is_flipped(), src_tex.can_unflip_without_unpacking()); console::info("Total pixels: %u, Source file size: " CRNLIB_UINT64_FORMAT_SPECIFIER ", Source file bits/pixel: %1.3f", total_in_pixels, input_file_size, (input_file_size * 8.0f) / total_in_pixels); @@ -1054,9 +1092,9 @@ private: return cCSFailed; } - dds_texture src_tex; + mipmapped_texture src_tex; - if (!src_tex.load_from_file(pSrc_filename, src_file_format)) + if (!src_tex.read_from_file(pSrc_filename, src_file_format)) { if (src_tex.get_last_error().is_empty()) console::error("Failed reading source file: \"%s\"", pSrc_filename); @@ -1091,9 +1129,9 @@ private: return cCSFailed; } - dds_texture src_tex; + mipmapped_texture src_tex; tim.start(); - if (!src_tex.load_from_file(pSrc_filename, src_file_format)) + if (!src_tex.read_from_file(pSrc_filename, src_file_format)) { if (src_tex.get_last_error().is_empty()) console::error("Failed reading source file: \"%s\"", pSrc_filename); @@ -1118,7 +1156,9 @@ private: params.m_dst_file_type = out_file_type; params.m_lzma_stats = m_params.has_key("lzmastats"); params.m_write_mipmaps_to_multiple_files = m_params.has_key("split"); - params.m_use_source_format = m_params.has_key("usesourceformat"); + params.m_always_use_source_pixel_format = m_params.has_key("usesourceformat"); + params.m_y_flip = m_params.has_key("yflip"); + params.m_unflip = m_params.has_key("unflip"); if ((!m_params.get_value_as_bool("noprogress")) && (!m_params.get_value_as_bool("quiet"))) params.m_pProgress_func = progress_callback_func; @@ -1233,12 +1273,25 @@ static int main_internal(int argc, char *argv[]) print_title(); - crunch converter; - dynamic_string cmd_line; - get_command_line(cmd_line, argc, argv); + get_command_line_as_single_string(cmd_line, argc, argv); - bool status = converter.convert(cmd_line.get_ptr()); + bool status = false; + if (check_for_option(argc, argv, "corpus_gen")) + { + corpus_gen generator; + status = generator.generate(cmd_line.get_ptr()); + } + else if (check_for_option(argc, argv, "corpus_test")) + { + corpus_tester tester; + status = tester.test(cmd_line.get_ptr()); + } + else + { + crunch converter; + status = converter.convert(cmd_line.get_ptr()); + } colorized_console::deinit(); @@ -1247,7 +1300,6 @@ static int main_internal(int argc, char *argv[]) return status ? EXIT_SUCCESS : EXIT_FAILURE; } - static void pause_and_wait(void) { console::enable_output(); @@ -1265,21 +1317,6 @@ static void pause_and_wait(void) int main(int argc, char *argv[]) { -#if 0 - dynamic_string path, filename; - file_utils::split_path("/usr/local/blah/xxx.tga", path, filename); - - dynamic_string combined; - file_utils::combine_path(combined, path.get_ptr(), filename.get_ptr()); - - find_files ff; - bool b = ff.find("/home/richg", "*", find_files::cFlagAllowFiles|find_files::cFlagAllowDirs|find_files::cFlagRecursive); - for (uint32 i = 0; i < ff.get_files().size(); i++) - { - printf("%s dir:%i\n", ff.get_files()[i].m_fullname.get_ptr(), ff.get_files()[i].m_is_dir); - } -#endif - int status = EXIT_FAILURE; if (crnlib_is_debugger_present()) diff --git a/crunch/crunch_linux.cbp b/crunch/crunch_linux.cbp index ee84827..1fca90e 100644 --- a/crunch/crunch_linux.cbp +++ b/crunch/crunch_linux.cbp @@ -8,15 +8,15 @@