diff --git a/ext/decoders/native/main.c b/ext/decoders/native/main.c index fc31fcb..a733f60 100644 --- a/ext/decoders/native/main.c +++ b/ext/decoders/native/main.c @@ -237,6 +237,30 @@ static VALUE rb_decode_pvrtc1_4bpp(VALUE self, VALUE rb_data, VALUE w, VALUE h) return ret; } +/* + * Decode image from PVRTC1 2bpp compressed binary + * + * @param [String] rb_data binary to decode + * @param [Integer] w image width + * @param [Integer] h image height + * @return [String] decoded rgba binary + */ +static VALUE rb_decode_pvrtc1_2bpp(VALUE self, VALUE rb_data, VALUE w, VALUE h) +{ + if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 7) / 8) * ((FIX2LONG(h) + 3) / 4) * 8) { + rb_raise(rb_eStandardError, "Data size is not enough."); + return Qnil; + } + size_t buffer_length = FIX2LONG(w) * FIX2LONG(h) * 8; + VALUE ret = rb_str_buf_new(buffer_length); + if (!decode_pvrtc_2bpp((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), (uint32_t*)RSTRING_PTR(ret))) { + rb_raise(rb_eStandardError, "internal error"); + return Qnil; + } + rb_str_set_len(ret, buffer_length); + return ret; +} + void Init_native() { VALUE mMikunyan = rb_define_module("Mikunyan"); @@ -253,4 +277,5 @@ void Init_native() rb_define_module_function(mDecodeHelper, "decode_dxt1", rb_decode_dxt1, 3); rb_define_module_function(mDecodeHelper, "decode_dxt5", rb_decode_dxt5, 3); rb_define_module_function(mDecodeHelper, "decode_pvrtc1_4bpp", rb_decode_pvrtc1_4bpp, 3); + rb_define_module_function(mDecodeHelper, "decode_pvrtc1_2bpp", rb_decode_pvrtc1_2bpp, 3); } diff --git a/ext/decoders/native/pvrtc.c b/ext/decoders/native/pvrtc.c index d0ec1a8..f298cb2 100644 --- a/ext/decoders/native/pvrtc.c +++ b/ext/decoders/native/pvrtc.c @@ -3,21 +3,9 @@ #include #include -#define MORTON_POS(x, y) (morton_table_buf[num_blocks_x * (y) + (x)]) +#define MORTON_POS(x, y) (morton_table[num_blocks_x * (y) + (x)]) -typedef struct { - uint8_t a_r; - uint8_t a_g; - uint8_t a_b; - uint8_t a_a; - uint8_t b_r; - uint8_t b_g; - uint8_t b_b; - uint8_t b_a; -} PVRTCTexelColor; - -static inline uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) -{ +static inline uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { #if BYTE_ORDER == LITTLE_ENDIAN return r | g << 8 | b << 16 | a << 24; #else @@ -25,8 +13,7 @@ static inline uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) #endif } -static inline int morton_index(const int x, const int y, const int numblocks_x, const int numblocks_y) -{ +static inline int morton_index(const int x, const int y, const int numblocks_x, const int numblocks_y) { const int min_dim = numblocks_x <= numblocks_y ? numblocks_x : numblocks_y; int offset = 0, shift = 0; for (int mask = 1; mask < min_dim; mask <<= 1, shift++) { @@ -36,51 +23,44 @@ static inline int morton_index(const int x, const int y, const int numblocks_x, return offset; } -static void applicate_color_4bpp(const uint8_t* data, const PVRTCTexelColor colors[9], uint32_t buf[16]) -{ - typedef struct { - uint16_t a_r; - uint16_t a_g; - uint16_t a_b; - uint16_t a_a; - uint16_t b_r; - uint16_t b_g; - uint16_t b_b; - uint16_t b_a; - } PVRTCInterpColor; - - static const int INTERP_WEIGHT[4][3] = { { 2, 2, 0 }, { 1, 3, 0 }, { 0, 4, 0 }, { 0, 3, 1 } }; - PVRTCInterpColor interp_colors[16] = {}; - - for (int cy = 0, c = 0; cy < 4; cy++) { - for (int cx = 0; cx < 4; cx++, c++) { - for (int acy = 0, ac = 0; acy < 3; acy++) { - for (int acx = 0; acx < 3; acx++, ac++) { - int interp_weight = INTERP_WEIGHT[cx][acx] * INTERP_WEIGHT[cy][acy]; - interp_colors[c].a_r += colors[ac].a_r * interp_weight; - interp_colors[c].a_g += colors[ac].a_g * interp_weight; - interp_colors[c].a_b += colors[ac].a_b * interp_weight; - interp_colors[c].a_a += colors[ac].a_a * interp_weight; - interp_colors[c].b_r += colors[ac].b_r * interp_weight; - interp_colors[c].b_g += colors[ac].b_g * interp_weight; - interp_colors[c].b_b += colors[ac].b_b * interp_weight; - interp_colors[c].b_a += colors[ac].b_a * interp_weight; - } - } - interp_colors[c].a_r = (interp_colors[c].a_r >> 1) + (interp_colors[c].a_r >> 6); - interp_colors[c].a_g = (interp_colors[c].a_g >> 1) + (interp_colors[c].a_g >> 6); - interp_colors[c].a_b = (interp_colors[c].a_b >> 1) + (interp_colors[c].a_b >> 6); - interp_colors[c].a_a = (interp_colors[c].a_a) + (interp_colors[c].a_a >> 4); - interp_colors[c].b_r = (interp_colors[c].b_r >> 1) + (interp_colors[c].b_r >> 6); - interp_colors[c].b_g = (interp_colors[c].b_g >> 1) + (interp_colors[c].b_g >> 6); - interp_colors[c].b_b = (interp_colors[c].b_b >> 1) + (interp_colors[c].b_b >> 6); - interp_colors[c].b_a = (interp_colors[c].b_a) + (interp_colors[c].b_a >> 4); - } +static void get_texel_colors(const uint8_t *data, PVRTCTexelInfo *info) { +#if BYTE_ORDER == LITTLE_ENDIAN + uint16_t ca = *(uint16_t *)(data + 4); + uint16_t cb = *(uint16_t *)(data + 6); +#else + uint16_t ca = data[4] | data[5] << 8; + uint16_t cb = data[6] | data[7] << 8; +#endif + if (ca & 0x8000) { + info->a.r = ca >> 10 & 0x1f; + info->a.g = ca >> 5 & 0x1f; + info->a.b = (ca & 0x1e) | (ca >> 4 & 1); + info->a.a = 0xf; + } else { + info->a.r = (ca >> 7 & 0x1e) | (ca >> 11 & 1); + info->a.g = (ca >> 3 & 0x1e) | (ca >> 7 & 1); + info->a.b = (ca << 1 & 0x1c) | (ca >> 2 & 3); + info->a.a = ca >> 11 & 0xe; } + if (cb & 0x8000) { + info->b.r = cb >> 10 & 0x1f; + info->b.g = cb >> 5 & 0x1f; + info->b.b = cb & 0x1f; + info->b.a = 0xf; + } else { + info->b.r = (cb >> 7 & 0x1e) | (cb >> 11 & 1); + info->b.g = (cb >> 3 & 0x1e) | (cb >> 7 & 1); + info->b.b = (cb << 1 & 0x1e) | (cb >> 3 & 1); + info->b.a = cb >> 11 & 0xe; + } +} + +static void get_texel_weights_4bpp(const uint8_t *data, PVRTCTexelInfo *info) { + info->punch_through_flag = 0; int mod_mode = data[4] & 1; #if BYTE_ORDER == LITTLE_ENDIAN - uint32_t mod_bits = *(uint32_t*)data; + uint32_t mod_bits = *(uint32_t *)data; #else uint32_t mod_bits = data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; #endif @@ -88,143 +68,302 @@ static void applicate_color_4bpp(const uint8_t* data, const PVRTCTexelColor colo if (mod_mode) { // punch-through for (int i = 0; i < 16; i++, mod_bits >>= 2) { - int r, g, b, a; switch (mod_bits & 3) { case 0: - r = interp_colors[i].a_r; - g = interp_colors[i].a_g; - b = interp_colors[i].a_b; - a = interp_colors[i].a_a; + info->weight[i] = 0; break; case 3: - r = interp_colors[i].b_r; - g = interp_colors[i].b_g; - b = interp_colors[i].b_b; - a = interp_colors[i].b_a; + info->weight[i] = 8; break; + case 2: + info->punch_through_flag |= 1 << i; + // fall through default: - r = (interp_colors[i].a_r + interp_colors[i].b_r) / 2; - g = (interp_colors[i].a_g + interp_colors[i].b_g) / 2; - b = (interp_colors[i].a_b + interp_colors[i].b_b) / 2; - a = (mod_bits & 3) == 2 ? 0 : (interp_colors[i].a_a + interp_colors[i].b_a) / 2; + info->weight[i] = 4; } - buf[i] = color(r, g, b, a); } } else { // standard for (int i = 0; i < 16; i++, mod_bits >>= 2) { - int r, g, b, a; switch (mod_bits & 3) { case 0: - r = interp_colors[i].a_r; - g = interp_colors[i].a_g; - b = interp_colors[i].a_b; - a = interp_colors[i].a_a; + info->weight[i] = 0; break; case 1: - r = (interp_colors[i].a_r * 5 + interp_colors[i].b_r * 3) / 8; - g = (interp_colors[i].a_g * 5 + interp_colors[i].b_g * 3) / 8; - b = (interp_colors[i].a_b * 5 + interp_colors[i].b_b * 3) / 8; - a = (interp_colors[i].a_a * 5 + interp_colors[i].b_a * 3) / 8; + info->weight[i] = 3; break; case 2: - r = (interp_colors[i].a_r * 3 + interp_colors[i].b_r * 5) / 8; - g = (interp_colors[i].a_g * 3 + interp_colors[i].b_g * 5) / 8; - b = (interp_colors[i].a_b * 3 + interp_colors[i].b_b * 5) / 8; - a = (interp_colors[i].a_a * 3 + interp_colors[i].b_a * 5) / 8; + info->weight[i] = 5; break; case 3: - r = interp_colors[i].b_r; - g = interp_colors[i].b_g; - b = interp_colors[i].b_b; - a = interp_colors[i].b_a; + info->weight[i] = 8; break; } - buf[i] = color(r, g, b, a); } } } -static inline void expand_color(const uint8_t* data, PVRTCTexelColor* color) -{ +static void get_texel_weights_2bpp(const uint8_t *data, PVRTCTexelInfo *info) { + info->punch_through_flag = 0; + + int mod_mode = data[4] & 1; #if BYTE_ORDER == LITTLE_ENDIAN - uint16_t ca = *(uint16_t*)(data + 4); - uint16_t cb = *(uint16_t*)(data + 6); + uint32_t mod_bits = *(uint32_t *)data; #else - uint16_t ca = data[4] | data[5] << 8; - uint16_t cb = data[6] | data[7] << 8; + uint32_t mod_bits = data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; #endif - if (ca & 0x8000) { - color->a_r = ca >> 10 & 0x1f; - color->a_g = ca >> 5 & 0x1f; - color->a_b = (ca & 0x1e) | (ca >> 4 & 1); - color->a_a = 0xf; + + if (mod_mode) { + // interporated modulation + // ここは仕様書が間違ってる(4bpp の M=0 の standard bilinear のテーブルしか使わない・punch through は 2bpp + // にはない) + int fillflag = data[0] & 1 ? (data[2] & 0x10 ? -1 : -2) : -3; + // 決定できない(後から補完しないといけない)ものは負の数で埋めておく + // -3: 上下左右 / -2: 左右 / -1: 上下 + for (int y = 0, i = 1; y < 4; ++y & 1 ? --i : ++i) + for (int x = 0; x < 4; x++, i += 2) + info->weight[i] = fillflag; + for (int y = 0, i = 0; y < 4; ++y & 1 ? ++i : --i) { + for (int x = 0; x < 4; x++, i += 2, mod_bits >>= 2) { + switch (mod_bits & 3) { + case 0: + info->weight[i] = 0; + break; + case 1: + info->weight[i] = 3; + break; + case 2: + info->weight[i] = 5; + break; + case 3: + info->weight[i] = 8; + break; + } + } + } + // 0 は常に 1bpp + info->weight[0] = (info->weight[0] + 3) & 8; + if (data[0] & 1) + // bit0 が 1 のときは (4, 2) が 1bpp + info->weight[20] = (info->weight[20] + 3) & 8; } else { - color->a_r = (ca >> 7 & 0x1e) | (ca >> 11 & 1); - color->a_g = (ca >> 3 & 0x1e) | (ca >> 7 & 1); - color->a_b = (ca << 1 & 0x1c) | (ca >> 2 & 3); - color->a_a = ca >> 11 & 0xe; - } - if (cb & 0x8000) { - color->b_r = cb >> 10 & 0x1f; - color->b_g = cb >> 5 & 0x1f; - color->b_b = cb & 0x1f; - color->b_a = 0xf; - } else { - color->b_r = (cb >> 7 & 0x1e) | (cb >> 11 & 1); - color->b_g = (cb >> 3 & 0x1e) | (cb >> 7 & 1); - color->b_b = (cb << 1 & 0x1e) | (cb >> 3 & 1); - color->b_a = cb >> 11 & 0xe; + // 1bpp + for (int i = 0; i < 32; i++, mod_bits >>= 1) + info->weight[i] = mod_bits & 1 ? 8 : 0; } } -int decode_pvrtc_4bpp(const uint8_t* data, const int w, const int h, uint32_t* image) -{ +static void applicate_color_4bpp(const uint8_t *data, PVRTCTexelInfo *const info[9], uint32_t buf[16]) { + static const int INTERP_WEIGHT[4][3] = {{2, 2, 0}, {1, 3, 0}, {0, 4, 0}, {0, 3, 1}}; + PVRTCTexelColorInt clr_a[16] = {}, clr_b[16] = {}; + + for (int y = 0, i = 0; y < 4; y++) { + for (int x = 0; x < 4; x++, i++) { + for (int acy = 0, ac = 0; acy < 3; acy++) { + for (int acx = 0; acx < 3; acx++, ac++) { + int interp_weight = INTERP_WEIGHT[x][acx] * INTERP_WEIGHT[y][acy]; + clr_a[i].r += info[ac]->a.r * interp_weight; + clr_a[i].g += info[ac]->a.g * interp_weight; + clr_a[i].b += info[ac]->a.b * interp_weight; + clr_a[i].a += info[ac]->a.a * interp_weight; + clr_b[i].r += info[ac]->b.r * interp_weight; + clr_b[i].g += info[ac]->b.g * interp_weight; + clr_b[i].b += info[ac]->b.b * interp_weight; + clr_b[i].a += info[ac]->b.a * interp_weight; + } + } + clr_a[i].r = (clr_a[i].r >> 1) + (clr_a[i].r >> 6); + clr_a[i].g = (clr_a[i].g >> 1) + (clr_a[i].g >> 6); + clr_a[i].b = (clr_a[i].b >> 1) + (clr_a[i].b >> 6); + clr_a[i].a = (clr_a[i].a) + (clr_a[i].a >> 4); + clr_b[i].r = (clr_b[i].r >> 1) + (clr_b[i].r >> 6); + clr_b[i].g = (clr_b[i].g >> 1) + (clr_b[i].g >> 6); + clr_b[i].b = (clr_b[i].b >> 1) + (clr_b[i].b >> 6); + clr_b[i].a = (clr_b[i].a) + (clr_b[i].a >> 4); + } + } + + const PVRTCTexelInfo *self_info = info[4]; + uint32_t punch_through_flag = self_info->punch_through_flag; + for (int i = 0; i < 16; i++, punch_through_flag >>= 1) { + buf[i] = color((clr_a[i].r * (8 - self_info->weight[i]) + clr_b[i].r * self_info->weight[i]) / 8, + (clr_a[i].g * (8 - self_info->weight[i]) + clr_b[i].g * self_info->weight[i]) / 8, + (clr_a[i].b * (8 - self_info->weight[i]) + clr_b[i].b * self_info->weight[i]) / 8, + punch_through_flag & 1 + ? 0 + : (clr_a[i].a * (8 - self_info->weight[i]) + clr_b[i].a * self_info->weight[i]) / 8); + } +} + +static void applicate_color_2bpp(const uint8_t *data, PVRTCTexelInfo *info[9], uint32_t buf[32]) { + static const int INTERP_WEIGHT_X[8][3] = {{4, 4, 0}, {3, 5, 0}, {2, 6, 0}, {1, 7, 0}, + {0, 8, 0}, {0, 7, 1}, {0, 6, 2}, {0, 5, 3}}; + static const int INTERP_WEIGHT_Y[4][3] = {{2, 2, 0}, {1, 3, 0}, {0, 4, 0}, {0, 3, 1}}; + PVRTCTexelColorInt clr_a[32] = {}, clr_b[32] = {}; + + for (int y = 0, i = 0; y < 4; y++) { + for (int x = 0; x < 8; x++, i++) { + for (int acy = 0, ac = 0; acy < 3; acy++) { + for (int acx = 0; acx < 3; acx++, ac++) { + int interp_weight = INTERP_WEIGHT_X[x][acx] * INTERP_WEIGHT_Y[y][acy]; + clr_a[i].r += info[ac]->a.r * interp_weight; + clr_a[i].g += info[ac]->a.g * interp_weight; + clr_a[i].b += info[ac]->a.b * interp_weight; + clr_a[i].a += info[ac]->a.a * interp_weight; + clr_b[i].r += info[ac]->b.r * interp_weight; + clr_b[i].g += info[ac]->b.g * interp_weight; + clr_b[i].b += info[ac]->b.b * interp_weight; + clr_b[i].a += info[ac]->b.a * interp_weight; + } + } + clr_a[i].r = (clr_a[i].r >> 2) + (clr_a[i].r >> 7); + clr_a[i].g = (clr_a[i].g >> 2) + (clr_a[i].g >> 7); + clr_a[i].b = (clr_a[i].b >> 2) + (clr_a[i].b >> 7); + clr_a[i].a = (clr_a[i].a >> 1) + (clr_a[i].a >> 5); + clr_b[i].r = (clr_b[i].r >> 2) + (clr_b[i].r >> 7); + clr_b[i].g = (clr_b[i].g >> 2) + (clr_b[i].g >> 7); + clr_b[i].b = (clr_b[i].b >> 2) + (clr_b[i].b >> 7); + clr_b[i].a = (clr_b[i].a >> 1) + (clr_b[i].a >> 5); + } + } + + static const int POSYA[4][2] = {{1, 24}, {4, -8}, {4, -8}, {4, -8}}; + static const int POSYB[4][2] = {{4, 8}, {4, 8}, {4, 8}, {7, -24}}; + static const int POSXL[8][2] = {{3, 7}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}}; + static const int POSXR[8][2] = {{4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {5, -7}}; + + PVRTCTexelInfo *self_info = info[4]; + uint32_t punch_through_flag = self_info->punch_through_flag; + for (int y = 0, i = 0; y < 4; y++) { + for (int x = 0; x < 8; x++, i++, punch_through_flag >>= 1) { + switch (self_info->weight[i]) { + case -1: + self_info->weight[i] = + (info[POSYA[y][0]]->weight[i + POSYA[y][1]] + info[POSYB[y][0]]->weight[i + POSYB[y][1]] + 1) / 2; + break; + case -2: + self_info->weight[i] = + (info[POSXL[x][0]]->weight[i + POSXL[x][1]] + info[POSXR[x][0]]->weight[i + POSXR[x][1]] + 1) / 2; + break; + case -3: + self_info->weight[i] = + (info[POSYA[y][0]]->weight[i + POSYA[y][1]] + info[POSYB[y][0]]->weight[i + POSYB[y][1]] + + info[POSXL[x][0]]->weight[i + POSXL[x][1]] + info[POSXR[x][0]]->weight[i + POSXR[x][1]] + 2) / + 4; + break; + } + buf[i] = color((clr_a[i].r * (8 - self_info->weight[i]) + clr_b[i].r * self_info->weight[i]) / 8, + (clr_a[i].g * (8 - self_info->weight[i]) + clr_b[i].g * self_info->weight[i]) / 8, + (clr_a[i].b * (8 - self_info->weight[i]) + clr_b[i].b * self_info->weight[i]) / 8, + punch_through_flag & 1 + ? 0 + : (clr_a[i].a * (8 - self_info->weight[i]) + clr_b[i].a * self_info->weight[i]) / 8); + } + } +} + +int decode_pvrtc_4bpp(const uint8_t *data, const int w, const int h, uint32_t *image) { int num_blocks_x = (w + 3) / 4; int num_blocks_y = (h + 3) / 4; int num_blocks = num_blocks_x * num_blocks_y; int copy_length_last = (w + 3) % 4 + 1; - PVRTCTexelColor* texel_colors = (PVRTCTexelColor*)malloc(sizeof(PVRTCTexelColor) * num_blocks); - if (texel_colors == NULL) + int *morton_table = (int *)malloc(sizeof(int) * num_blocks); + if (morton_table == NULL) return 0; - const uint8_t* d = data; - for (int i = 0; i < num_blocks; i++, d += 8) - expand_color(d, texel_colors + i); - - int* morton_table_buf = (int*)malloc(sizeof(int) * num_blocks); - if (morton_table_buf == NULL) { - free(texel_colors); + PVRTCTexelInfo *texel_info = (PVRTCTexelInfo *)malloc(sizeof(PVRTCTexelInfo) * num_blocks); + if (texel_info == NULL) { + free(morton_table); return 0; } + for (int y = 0; y < num_blocks_y; y++) for (int x = 0; x < num_blocks_x; x++) MORTON_POS(x, y) = morton_index(x, y, num_blocks_x, num_blocks_y); + const uint8_t *d = data; + for (int i = 0; i < num_blocks; i++, d += 8) { + get_texel_colors(d, &texel_info[i]); + get_texel_weights_4bpp(d, &texel_info[i]); + } + uint32_t buffer[16]; - uint32_t* buffer_end = buffer + 16; - PVRTCTexelColor colors[9]; + uint32_t *buffer_end = buffer + 16; + PVRTCTexelInfo *local_info[9]; int pos_x[3], pos_y[3]; for (int by = 0; by < num_blocks_y; by++) { - pos_y[0] = by == 0 ? 0 : by - 1; + pos_y[0] = by == 0 ? num_blocks_y - 1 : by - 1; pos_y[1] = by; - pos_y[2] = by == num_blocks_y - 1 ? num_blocks_y - 1 : by + 1; - for (int bx = 0, x = 0; bx < num_blocks_x; bx++, d += 8, x += 4) { - pos_x[0] = bx == 0 ? 0 : bx - 1; + pos_y[2] = by == num_blocks_y - 1 ? 0 : by + 1; + for (int bx = 0, x = 0; bx < num_blocks_x; bx++, x += 4) { + pos_x[0] = bx == 0 ? num_blocks_x - 1 : bx - 1; pos_x[1] = bx; - pos_x[2] = bx == num_blocks_x - 1 ? num_blocks_x - 1 : bx + 1; + pos_x[2] = bx == num_blocks_x - 1 ? 0 : bx + 1; for (int cy = 0, c = 0; cy < 3; cy++) for (int cx = 0; cx < 3; cx++, c++) - colors[c] = texel_colors[MORTON_POS(pos_x[cx], pos_y[cy])]; - applicate_color_4bpp(data + MORTON_POS(bx, by) * 8, colors, buffer); + local_info[c] = &texel_info[MORTON_POS(pos_x[cx], pos_y[cy])]; + applicate_color_4bpp(data + MORTON_POS(bx, by) * 8, local_info, buffer); int copy_length = (bx < num_blocks_x - 1 ? 4 : copy_length_last) * 4; - uint32_t* b = buffer; + uint32_t *b = buffer; for (int y = h - by * 4 - 1; b < buffer_end && y >= 0; y--, b += 4) memcpy(image + y * w + x, b, copy_length); } } - free(morton_table_buf); - free(texel_colors); + free(morton_table); + free(texel_info); + return 1; +} + +int decode_pvrtc_2bpp(const uint8_t *data, const int w, const int h, uint32_t *image) { + int num_blocks_x = (w + 7) / 8; + int num_blocks_y = (h + 3) / 4; + int num_blocks = num_blocks_x * num_blocks_y; + int copy_length_last = (w + 7) % 8 + 1; + + int *morton_table = (int *)malloc(sizeof(int) * num_blocks); + if (morton_table == NULL) + return 0; + PVRTCTexelInfo *texel_info = (PVRTCTexelInfo *)malloc(sizeof(PVRTCTexelInfo) * num_blocks); + if (texel_info == NULL) { + free(morton_table); + return 0; + } + + for (int y = 0; y < num_blocks_y; y++) + for (int x = 0; x < num_blocks_x; x++) + MORTON_POS(x, y) = morton_index(x, y, num_blocks_x, num_blocks_y); + + const uint8_t *d = data; + for (int i = 0; i < num_blocks; i++, d += 8) { + get_texel_colors(d, &texel_info[i]); + get_texel_weights_2bpp(d, &texel_info[i]); + } + + uint32_t buffer[32]; + uint32_t *buffer_end = buffer + 32; + PVRTCTexelInfo *local_info[9]; + int pos_x[3], pos_y[3]; + for (int by = 0; by < num_blocks_y; by++) { + pos_y[0] = by == 0 ? num_blocks_y - 1 : by - 1; + pos_y[1] = by; + pos_y[2] = by == num_blocks_y - 1 ? 0 : by + 1; + for (int bx = 0, x = 0; bx < num_blocks_x; bx++, x += 8) { + pos_x[0] = bx == 0 ? num_blocks_x - 1 : bx - 1; + pos_x[1] = bx; + pos_x[2] = bx == num_blocks_x - 1 ? 0 : bx + 1; + for (int cy = 0, c = 0; cy < 3; cy++) + for (int cx = 0; cx < 3; cx++, c++) + local_info[c] = &texel_info[MORTON_POS(pos_x[cx], pos_y[cy])]; + applicate_color_2bpp(data + MORTON_POS(bx, by) * 8, local_info, buffer); + int copy_length = (bx < num_blocks_x - 1 ? 8 : copy_length_last) * 4; + uint32_t *b = buffer; + for (int y = h - by * 4 - 1; b < buffer_end && y >= 0; y--, b += 8) + memcpy(image + y * w + x, b, copy_length); + } + } + + free(morton_table); + free(texel_info); return 1; } diff --git a/ext/decoders/native/pvrtc.h b/ext/decoders/native/pvrtc.h index 398e840..3bfe57e 100644 --- a/ext/decoders/native/pvrtc.h +++ b/ext/decoders/native/pvrtc.h @@ -3,6 +3,28 @@ #include +typedef struct { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; +} PVRTCTexelColor; + +typedef struct { + int r; + int g; + int b; + int a; +} PVRTCTexelColorInt; + +typedef struct { + PVRTCTexelColor a; + PVRTCTexelColor b; + int8_t weight[32]; + uint32_t punch_through_flag; +} PVRTCTexelInfo; + int decode_pvrtc_4bpp(const uint8_t*, const int, const int, uint32_t*); +int decode_pvrtc_2bpp(const uint8_t*, const int, const int, uint32_t*); #endif /* end of include guard: PVRTC_H */ diff --git a/lib/mikunyan/decoders/image_decoder.rb b/lib/mikunyan/decoders/image_decoder.rb index fb8d1bf..f21f5f1 100644 --- a/lib/mikunyan/decoders/image_decoder.rb +++ b/lib/mikunyan/decoders/image_decoder.rb @@ -389,8 +389,7 @@ module Mikunyan # @param [String] bin binary to decode # @return [ChunkyPNG::Image] decoded image def self.decode_pvrtc1_2bpp(width, height, bin) - # ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_pvrtc1_2bpp(bin, width, height)) - nil + ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_pvrtc1_2bpp(bin, width, height)) end # Decode image from ETC1 compressed binary