diff --git a/ext/decoders/native/dxtc.c b/ext/decoders/native/dxtc.c new file mode 100644 index 0000000..458e72d --- /dev/null +++ b/ext/decoders/native/dxtc.c @@ -0,0 +1,103 @@ +#include +#include "dxtc.h" + +static inline uint_fast32_t color(uint_fast8_t r, uint_fast8_t g, uint_fast8_t b, uint_fast8_t a) { + return r | g << 8 | b << 16 | a << 24; +} + +static inline void rgb565(const uint_fast16_t c, int *r, int *g, int *b) { + *r = (c & 0xf800) >> 8; + *g = (c & 0x07e0) >> 3; + *b = (c & 0x001f) << 3; + *r |= *r >> 5; + *g |= *g >> 6; + *b |= *b >> 5; +} + +static inline void decode_dxt1_block(const uint64_t *data, uint32_t *outbuf) { + int r0, g0, b0, r1, g1, b1; + int q0 = ((uint16_t*)data)[0]; + int q1 = ((uint16_t*)data)[1]; + rgb565(q0, &r0, &g0, &b0); + rgb565(q1, &r1, &g1, &b1); + uint_fast32_t c[4] = { color(r0, g0, b0, 255), color(r1, g1, b1, 255) }; + if (q0 > q1) { + c[2] = color((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); + c[3] = color((r0 + r1 * 2) / 3, (g0 + g1 * 2) / 3, (b0 + b1 * 2) / 3, 255); + } else { + c[2] = color((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); + } + uint_fast32_t d = *data >> 32; + for (int i = 0; i < 16; i++, d >>= 2) + outbuf[i] = c[d & 3]; +} + +void decode_dxt1(const uint64_t *data, const int w, const int h, uint32_t *image) { + int bcw = (w + 3) / 4; + int bch = (h + 3) / 4; + int clen_last = (w + 3) % 4 + 1; + uint32_t buf[16]; + const uint64_t *d = data; + for (int t = 0; t < bch; t++) { + for (int s = 0; s < bcw; s++, d++) { + decode_dxt1_block(d, buf); + int clen = (s < bcw - 1 ? 4 : clen_last) * 4; + for (int i = 0, y = h - t * 4 - 1; i < 4 && y >= 0; i++, y--) + memcpy(image + y * w + s * 4, buf + i * 4, clen); + } + } +} + +static inline void decode_dxt5_block(const uint64_t *data, uint32_t *outbuf) { + uint_fast32_t a[8] = { ((uint8_t*)data)[0], ((uint8_t*)data)[1] }; + if (a[0] > a[1]) { + a[2] = (a[0] * 6 + a[1] ) / 7; + a[3] = (a[0] * 5 + a[1] * 2) / 7; + a[4] = (a[0] * 4 + a[1] * 3) / 7; + a[5] = (a[0] * 3 + a[1] * 4) / 7; + a[6] = (a[0] * 2 + a[1] * 5) / 7; + a[7] = (a[0] + a[1] * 6) / 7; + } else { + a[2] = (a[0] * 4 + a[1] ) / 5; + a[3] = (a[0] * 3 + a[1] * 2) / 5; + a[4] = (a[0] * 2 + a[1] * 3) / 5; + a[5] = (a[0] + a[1] * 4) / 5; + a[7] = 255; + } + for (int i = 0; i < 8; i++) + a[i] <<= 24; + + int r0, g0, b0, r1, g1, b1; + int q0 = ((uint16_t*)(data + 1))[0]; + int q1 = ((uint16_t*)(data + 1))[1]; + rgb565(q0, &r0, &g0, &b0); + rgb565(q1, &r1, &g1, &b1); + uint_fast32_t c[4] = { color(r0, g0, b0, 0), color(r1, g1, b1, 0) }; + if (q0 > q1) { + c[2] = color((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 0); + c[3] = color((r0 + r1 * 2) / 3, (g0 + g1 * 2) / 3, (b0 + b1 * 2) / 3, 0); + } else { + c[2] = color((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 0); + } + + uint_fast64_t da = *data >> 16; + uint_fast32_t dc = *(data + 1) >> 32; + for (int i = 0; i < 16; i++, da >>= 3, dc >>= 2) + outbuf[i] = a[da & 7] | c[dc & 3]; +} + +void decode_dxt5(const uint64_t *data, const int w, const int h, uint32_t *image) { + int bcw = (w + 3) / 4; + int bch = (h + 3) / 4; + int clen_last = (w + 3) % 4 + 1; + uint32_t buf[16]; + const uint64_t *d = data; + for (int t = 0; t < bch; t++) { + for (int s = 0; s < bcw; s++, d += 2) { + decode_dxt5_block(d, buf); + int clen = (s < bcw - 1 ? 4 : clen_last) * 4; + for (int i = 0, y = h - t * 4 - 1; i < 4 && y >= 0; i++, y--) + memcpy(image + y * w + s * 4, buf + i * 4, clen); + } + } +} diff --git a/ext/decoders/native/dxtc.h b/ext/decoders/native/dxtc.h new file mode 100644 index 0000000..d58156b --- /dev/null +++ b/ext/decoders/native/dxtc.h @@ -0,0 +1,9 @@ +#ifndef DXTC_H +#define DXTC_H + +#include + +void decode_dxt1(const uint64_t*, const int, const int, uint32_t*); +void decode_dxt5(const uint64_t*, const int, const int, uint32_t*); + +#endif /* end of include guard: DXTC_H */ diff --git a/ext/decoders/native/main.c b/ext/decoders/native/main.c index c12b4d0..4d16710 100644 --- a/ext/decoders/native/main.c +++ b/ext/decoders/native/main.c @@ -1,8 +1,9 @@ #include #include #include -#include "astc.h" #include "rgb.h" +#include "astc.h" +#include "dxtc.h" static VALUE rb_decode_rgb565(VALUE self, VALUE rb_data, VALUE size, VALUE big) { if (RSTRING_LEN(rb_data) < FIX2LONG(size) * 2) @@ -25,9 +26,31 @@ static VALUE rb_decode_astc(VALUE self, VALUE rb_data, VALUE w, VALUE h, VALUE b return ret; } +static VALUE rb_decode_dxt1(VALUE self, VALUE rb_data, VALUE w, VALUE h) { + if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 8) + rb_raise(rb_eStandardError, "Data size is not enough."); + uint32_t *image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); + decode_dxt1((uint64_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), image); + VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); + free(image); + return ret; +} + +static VALUE rb_decode_dxt5(VALUE self, VALUE rb_data, VALUE w, VALUE h) { + if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 16) + rb_raise(rb_eStandardError, "Data size is not enough."); + uint32_t *image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); + decode_dxt5((uint64_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), image); + VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); + free(image); + return ret; +} + void Init_native() { VALUE mMikunyan = rb_define_module("Mikunyan"); VALUE mDecodeHelper = rb_define_module_under(mMikunyan, "DecodeHelper"); - rb_define_module_function(mDecodeHelper, "decode_astc", rb_decode_astc, 5); rb_define_module_function(mDecodeHelper, "decode_rgb565", rb_decode_rgb565, 3); + rb_define_module_function(mDecodeHelper, "decode_astc", rb_decode_astc, 5); + rb_define_module_function(mDecodeHelper, "decode_dxt1", rb_decode_dxt1, 3); + rb_define_module_function(mDecodeHelper, "decode_dxt5", rb_decode_dxt5, 3); } diff --git a/lib/mikunyan/decoders/dxtc_block_decoder.rb b/lib/mikunyan/decoders/dxtc_block_decoder.rb deleted file mode 100644 index e80c77d..0000000 --- a/lib/mikunyan/decoders/dxtc_block_decoder.rb +++ /dev/null @@ -1,89 +0,0 @@ -require 'bin_utils' -require 'fiddle' - -module Mikunyan - module DecodeHelper - # Module for decoding DXTC block - module DxtcBlockDecoder - def self.decode_dxt1_block(bin) - c0 = BinUtils.get_int16_le(bin, 0) - c1 = BinUtils.get_int16_le(bin, 2) - color = [get_rgb565a(c0), get_rgb565a(c1), nil, nil] - if c0 > c1 - color[2] = [(color[0][0] * 2 + color[1][0]) / 3, (color[0][1] * 2 + color[1][1]) / 3, (color[0][2] * 2 + color[1][2]) / 3, 255] - color[3] = [(color[0][0] + color[1][0] * 2) / 3, (color[0][1] + color[1][1] * 2) / 3, (color[0][2] + color[1][2] * 2) / 3, 255] - else - color[2] = [(color[0][0] + color[1][0]) / 2, (color[0][1] + color[1][1]) / 2, (color[0][2] + color[1][2]) / 2, 255] - color[3] = [0, 0, 0, 0] - end - color.map!{|e| e.pack('C4')} - code = BinUtils.get_int32_le(bin, 4) - mem = String.new(capacity: 64) - 16.times do - mem << color[code & 3] - code >>= 2 - end - mem - end - - def self.decode_dxt5_block(bin) - alpha_list, alpha_code = decode_dxt5_alpha(bin) - color_list, color_code = decode_dxtc_rgb(bin) - mem = String.new(capacity: 64) - 16.times do - mem << color_list[color_code & 3] - mem << alpha_list[alpha_code & 7] - color_code >>= 2 - alpha_code >>= 3 - end - mem - end - - def self.decode_dxtc_rgb(bin) - c0 = BinUtils.get_int16_le(bin, 8) - c1 = BinUtils.get_int16_le(bin, 10) - color = [get_rgb565(c0), get_rgb565(c1), nil, nil] - if c0 > c1 - color[2] = (0...3).map{|i| (color[0][i] * 2 + color[1][i]) / 3} - color[3] = (0...3).map{|i| (color[0][i] + color[1][i] * 2) / 3} - else - color[2] = (0...3).map{|i| (color[0][i] + color[1][i]) / 2} - color[3] = [0, 0, 0] - end - color.map!{|e| e.pack('C3')} - code = BinUtils.get_int32_le(bin, 12) - [color, code] - end - - def self.decode_dxt5_alpha(bin) - a0 = BinUtils.get_int8(bin, 0) - a1 = BinUtils.get_int8(bin, 1) - alpha = [a0, a1, nil, nil, nil, nil, nil, nil] - if a0 > a1 - alpha[2, 6] = (1..6).map{|n| (a0 * (7-n) + a1 * n) / 7} - else - alpha[2, 4] = (1..4).map{|n| (a0 * (5-n) + a1 * n) / 5} - alpha[6] = 0 - alpha[7] = 255 - end - alpha.pack('C8') - code = BinUtils.get_int64_le(bin) >> 16 - [alpha, code] - end - - def self.get_rgb565(c) - r = (c & 0xf800) >> 8 - g = (c & 0x07e0) >> 3 - b = (c & 0x001f) << 3 - [r | r >> 5, g | g >> 6, b | b >> 5] - end - - def self.get_rgb565a(c) - r = (c & 0xf800) >> 8 - g = (c & 0x07e0) >> 3 - b = (c & 0x001f) << 3 - [r | r >> 5, g | g >> 6, b | b >> 5, 255] - end - end - end -end diff --git a/lib/mikunyan/decoders/image_decoder.rb b/lib/mikunyan/decoders/image_decoder.rb index eab79ec..fd6e858 100644 --- a/lib/mikunyan/decoders/image_decoder.rb +++ b/lib/mikunyan/decoders/image_decoder.rb @@ -1,7 +1,6 @@ begin; require 'oily_png'; rescue LoadError; require 'chunky_png'; end require 'bin_utils' require 'mikunyan/decoders/native' -require 'mikunyan/decoders/dxtc_block_decoder' module Mikunyan # Class for image decoding tools @@ -353,16 +352,7 @@ module Mikunyan # @param [String] bin binary to decode # @return [ChunkyPNG::Image] decoded image def self.decode_dxt1(width, height, bin) - bw = (width + 3) / 4 - bh = (height + 3) / 4 - ret = ChunkyPNG::Image.new(bh * 4, bw * 4) - bh.times do |by| - bw.times do |bx| - block = DecodeHelper::DxtcBlockDecoder::decode_dxt1_block(bin.byteslice((bx + by * bw) * 8, 8)) - ret.replace!(ChunkyPNG::Image.from_rgba_stream(4, 4, block), bx * 4, by * 4) - end - end - ret.crop(0, 0, height, width).flip + ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_dxt1(bin, width, height)) end # Decode image from DXT5 compressed binary @@ -371,16 +361,7 @@ module Mikunyan # @param [String] bin binary to decode # @return [ChunkyPNG::Image] decoded image def self.decode_dxt5(width, height, bin) - bw = (width + 3) / 4 - bh = (height + 3) / 4 - ret = ChunkyPNG::Image.new(bh * 4, bw * 4) - bh.times do |by| - bw.times do |bx| - block = DecodeHelper::DxtcBlockDecoder::decode_dxt5_block(bin.byteslice((bx + by * bw) * 16, 16)) - ret.replace!(ChunkyPNG::Image.from_rgba_stream(4, 4, block), bx * 4, by * 4) - end - end - ret.crop(0, 0, height, width).flip + ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_dxt5(bin, width, height)) end # Decode image from ETC1 compressed binary