diff --git a/ext/decoders/native/astc.c b/ext/decoders/native/astc.c index 5938d73..4083808 100644 --- a/ext/decoders/native/astc.c +++ b/ext/decoders/native/astc.c @@ -1,106 +1,81 @@ #include "astc.h" -#include "fp16.h" #include #include #include #include #include +#include "color.h" +#include "fp16.h" static const int BitReverseTable[] = { - 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, - 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, - 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, - 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, - 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, - 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, - 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, - 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, - 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, - 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, - 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, - 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, - 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, - 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, - 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, - 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, - 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, - 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, - 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, - 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, - 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, - 0x3F, 0xBF, 0x7F, 0xFF -}; + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, + 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, + 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, + 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, + 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, + 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, + 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, + 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, + 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, + 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, + 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, + 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, + 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, + 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF}; -static const int WeightPrecTableA[] = { 0, 0, 0, 3, 0, 5, 3, 0, 0, 0, 5, 3, 0, 5, 3, 0 }; -static const int WeightPrecTableB[] = { 0, 0, 1, 0, 2, 0, 1, 3, 0, 0, 1, 2, 4, 2, 3, 5 }; +static const int WeightPrecTableA[] = {0, 0, 0, 3, 0, 5, 3, 0, 0, 0, 5, 3, 0, 5, 3, 0}; +static const int WeightPrecTableB[] = {0, 0, 1, 0, 2, 0, 1, 3, 0, 0, 1, 2, 4, 2, 3, 5}; -static const int CemTableA[] = { 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 0, 0 }; -static const int CemTableB[] = { 8, 6, 5, 7, 5, 4, 6, 4, 3, 5, 3, 2, 4, 2, 1, 3, 1, 2, 1 }; +static const int CemTableA[] = {0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 0, 0}; +static const int CemTableB[] = {8, 6, 5, 7, 5, 4, 6, 4, 3, 5, 3, 2, 4, 2, 1, 3, 1, 2, 1}; -static inline uint_fast32_t color(uint_fast8_t r, uint_fast8_t g, uint_fast8_t b, uint_fast8_t a) -{ -#if BYTE_ORDER == LITTLE_ENDIAN - return r | g << 8 | b << 16 | a << 24; -#else - return a | b << 8 | g << 16 | r << 24; -#endif -} - -static inline uint_fast8_t bit_reverse_u8(const uint_fast8_t c, const int bits) -{ +static inline uint_fast8_t bit_reverse_u8(const uint_fast8_t c, const int bits) { return BitReverseTable[c] >> (8 - bits); } -static inline uint_fast64_t bit_reverse_u64(const uint_fast64_t d, const int bits) -{ - uint_fast64_t ret = (uint_fast64_t)BitReverseTable[d & 0xff] << 56 | (uint_fast64_t)BitReverseTable[d >> 8 & 0xff] << 48 | (uint_fast64_t)BitReverseTable[d >> 16 & 0xff] << 40 | (uint_fast64_t)BitReverseTable[d >> 24 & 0xff] << 32 | (uint_fast32_t)BitReverseTable[d >> 32 & 0xff] << 24 | (uint_fast32_t)BitReverseTable[d >> 40 & 0xff] << 16 | (uint_fast16_t)BitReverseTable[d >> 48 & 0xff] << 8 | BitReverseTable[d >> 56 & 0xff]; +static inline uint_fast64_t bit_reverse_u64(const uint_fast64_t d, const int bits) { + uint_fast64_t ret = (uint_fast64_t)BitReverseTable[d & 0xff] << 56 | + (uint_fast64_t)BitReverseTable[d >> 8 & 0xff] << 48 | (uint_fast64_t)BitReverseTable[d >> 16 & 0xff] << 40 | + (uint_fast64_t)BitReverseTable[d >> 24 & 0xff] << 32 | (uint_fast32_t)BitReverseTable[d >> 32 & 0xff] << 24 | + (uint_fast32_t)BitReverseTable[d >> 40 & 0xff] << 16 | (uint_fast16_t)BitReverseTable[d >> 48 & 0xff] << 8 | + BitReverseTable[d >> 56 & 0xff]; return ret >> (64 - bits); } -static inline int getbits(const uint8_t* buf, const int bit, const int len) -{ - return (*(int*)(buf + bit / 8) >> (bit % 8)) & ((1 << len) - 1); +static inline int getbits(const uint8_t *buf, const int bit, const int len) { + return (*(int *)(buf + bit / 8) >> (bit % 8)) & ((1 << len) - 1); } -static inline uint_fast64_t getbits64(const uint8_t* buf, const int bit, const int len) -{ +static inline uint_fast64_t getbits64(const uint8_t *buf, const int bit, const int len) { uint_fast64_t mask = len == 64 ? 0xffffffffffffffff : (1ull << len) - 1; if (len < 1) return 0; else if (bit >= 64) - return (*(uint_fast64_t*)(buf + 8)) >> (bit - 64) & mask; + return (*(uint_fast64_t *)(buf + 8)) >> (bit - 64) & mask; else if (bit <= 0) - return (*(uint_fast64_t*)buf) << -bit & mask; + return (*(uint_fast64_t *)buf) << -bit & mask; else if (bit + len <= 64) - return (*(uint_fast64_t*)buf) >> bit & mask; + return (*(uint_fast64_t *)buf) >> bit & mask; else - return ((*(uint_fast64_t*)buf) >> bit | *(uint_fast64_t*)(buf + 8) << (64 - bit)) & mask; + return ((*(uint_fast64_t *)buf) >> bit | *(uint_fast64_t *)(buf + 8) << (64 - bit)) & mask; } -static inline uint16_t u8ptr_to_u16(const uint8_t* ptr) -{ -#if BYTE_ORDER == LITTLE_ENDIAN - return *(uint16_t*)ptr; -#else - return ptr[0] | ptr[1] << 8; -#endif +static inline uint16_t u8ptr_to_u16(const uint8_t *ptr) { + return lton16(*(uint16_t *)ptr); } -static inline uint_fast8_t clamp(const int n) -{ +static inline uint_fast8_t clamp(const int n) { return n < 0 ? 0 : n > 255 ? 255 : n; } -static inline void bit_transfer_signed(int* a, int* b) -{ +static inline void bit_transfer_signed(int *a, int *b) { *b = (*b >> 1) | (*a & 0x80); *a = (*a >> 1) & 0x3f; if (*a & 0x20) *a -= 0x40; } -static inline void set_endpoint(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) -{ +static inline void set_endpoint(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { endpoint[0] = r1; endpoint[1] = g1; endpoint[2] = b1; @@ -111,8 +86,7 @@ static inline void set_endpoint(int endpoint[8], int r1, int g1, int b1, int a1, endpoint[7] = a2; } -static inline void set_endpoint_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) -{ +static inline void set_endpoint_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { endpoint[0] = clamp(r1); endpoint[1] = clamp(g1); endpoint[2] = clamp(b1); @@ -123,8 +97,7 @@ static inline void set_endpoint_clamp(int endpoint[8], int r1, int g1, int b1, i endpoint[7] = clamp(a2); } -static inline void set_endpoint_blue(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) -{ +static inline void set_endpoint_blue(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { endpoint[0] = (r1 + b1) >> 1; endpoint[1] = (g1 + b1) >> 1; endpoint[2] = b1; @@ -135,8 +108,8 @@ static inline void set_endpoint_blue(int endpoint[8], int r1, int g1, int b1, in endpoint[7] = a2; } -static inline void set_endpoint_blue_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) -{ +static inline void set_endpoint_blue_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, + int a2) { endpoint[0] = clamp((r1 + b1) >> 1); endpoint[1] = clamp((g1 + b1) >> 1); endpoint[2] = clamp(b1); @@ -147,13 +120,11 @@ static inline void set_endpoint_blue_clamp(int endpoint[8], int r1, int g1, int endpoint[7] = clamp(a2); } -static inline uint_fast16_t clamp_hdr(const int n) -{ +static inline uint_fast16_t clamp_hdr(const int n) { return n < 0 ? 0 : n > 0xfff ? 0xfff : n; } -static inline void set_endpoint_hdr(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) -{ +static inline void set_endpoint_hdr(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { endpoint[0] = r1; endpoint[1] = g1; endpoint[2] = b1; @@ -164,8 +135,8 @@ static inline void set_endpoint_hdr(int endpoint[8], int r1, int g1, int b1, int endpoint[7] = a2; } -static inline void set_endpoint_hdr_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) -{ +static inline void set_endpoint_hdr_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, + int a2) { endpoint[0] = clamp_hdr(r1); endpoint[1] = clamp_hdr(g1); endpoint[2] = clamp_hdr(b1); @@ -178,13 +149,11 @@ static inline void set_endpoint_hdr_clamp(int endpoint[8], int r1, int g1, int b typedef uint_fast8_t (*t_select_folor_func_ptr)(int, int, int); -static uint_fast8_t select_color(int v0, int v1, int weight) -{ +static uint_fast8_t select_color(int v0, int v1, int weight) { return ((((v0 << 8 | v0) * (64 - weight) + (v1 << 8 | v1) * weight + 32) >> 6) * 255 + 32768) / 65536; } -static uint_fast8_t select_color_hdr(int v0, int v1, int weight) -{ +static uint_fast8_t select_color_hdr(int v0, int v1, int weight) { uint16_t c = ((v0 << 4) * (64 - weight) + (v1 << 4) * weight + 32) >> 6; uint16_t m = c & 0x7ff; if (m < 512) @@ -197,8 +166,7 @@ static uint_fast8_t select_color_hdr(int v0, int v1, int weight) return isfinite(f) ? clamp(roundf(f * 255)) : 255; } -static inline uint8_t f32_to_u8(const float f) -{ +static inline uint8_t f32_to_u8(const float f) { float c = roundf(f * 255); if (c < 0) return 0; @@ -208,16 +176,8 @@ static inline uint8_t f32_to_u8(const float f) return c; } -static inline uint8_t f16ptr_to_u8(const uint8_t* ptr) -{ - const uint16_t c = -#if BYTE_ORDER == LITTLE_ENDIAN - *(uint16_t*)ptr -#else - ptr[0] | ptr[1] << 8 -#endif - ; - return f32_to_u8(fp16_ieee_to_fp32_value(c)); +static inline uint8_t f16ptr_to_u8(const uint8_t *ptr) { + return f32_to_u8(fp16_ieee_to_fp32_value(lton16(*(uint16_t *)ptr))); } typedef struct { @@ -229,10 +189,10 @@ typedef struct { int dual_plane; int plane_selector; int weight_range; - int weight_num; // max: 120 + int weight_num; // max: 120 int cem[4]; int cem_range; - int endpoint_value_num; // max: 32 + int endpoint_value_num; // max: 32 int endpoints[4][8]; int weights[144][2]; int partition[144]; @@ -243,87 +203,59 @@ typedef struct { int nonbits; } IntSeqData; -void decode_intseq(const uint8_t* buf, int offset, const int a, const int b, const int count, const int reverse, IntSeqData* out) -{ - static int mt[] = { 0, 2, 4, 5, 7 }; - static int mq[] = { 0, 3, 5 }; +void decode_intseq(const uint8_t *buf, int offset, const int a, const int b, const int count, const int reverse, + IntSeqData *out) { + static int mt[] = {0, 2, 4, 5, 7}; + static int mq[] = {0, 3, 5}; static int TritsTable[5][256] = { - { 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, - 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, - 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 1, - 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, - 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, - 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, - 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 1, - 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 1, - 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, - 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, - 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, - { 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, - 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, - 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, - 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 0, - 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, - 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, - 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, - 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, - 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, - 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, - { 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, - 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, - 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, - 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, - 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, - 1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, - 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, - 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, - 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, - 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, - 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 } - }; + {0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 0, 0, + 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, + 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, + 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, + 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, + 1, 2, 2, 0, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, + 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2}, + {0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, + 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, + 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, + 2, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1}, + {0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, + 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, + 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, + 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, + 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, + 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, + 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}}; static int QuintsTable[3][128] = { - { 0, 1, 2, 3, 4, 0, 4, 4, 0, 1, 2, 3, 4, 1, 4, 4, 0, 1, 2, 3, 4, 2, - 4, 4, 0, 1, 2, 3, 4, 3, 4, 4, 0, 1, 2, 3, 4, 0, 4, 0, 0, 1, 2, 3, - 4, 1, 4, 1, 0, 1, 2, 3, 4, 2, 4, 2, 0, 1, 2, 3, 4, 3, 4, 3, 0, 1, - 2, 3, 4, 0, 2, 3, 0, 1, 2, 3, 4, 1, 2, 3, 0, 1, 2, 3, 4, 2, 2, 3, - 0, 1, 2, 3, 4, 3, 2, 3, 0, 1, 2, 3, 4, 0, 0, 1, 0, 1, 2, 3, 4, 1, - 0, 1, 0, 1, 2, 3, 4, 2, 0, 1, 0, 1, 2, 3, 4, 3, 0, 1 }, - { 0, 0, 0, 0, 0, 4, 4, 4, 1, 1, 1, 1, 1, 4, 4, 4, 2, 2, 2, 2, 2, 4, - 4, 4, 3, 3, 3, 3, 3, 4, 4, 4, 0, 0, 0, 0, 0, 4, 0, 4, 1, 1, 1, 1, - 1, 4, 1, 4, 2, 2, 2, 2, 2, 4, 2, 4, 3, 3, 3, 3, 3, 4, 3, 4, 0, 0, - 0, 0, 0, 4, 0, 0, 1, 1, 1, 1, 1, 4, 1, 1, 2, 2, 2, 2, 2, 4, 2, 2, - 3, 3, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 0, 4, 0, 0, 1, 1, 1, 1, 1, 4, - 1, 1, 2, 2, 2, 2, 2, 4, 2, 2, 3, 3, 3, 3, 3, 4, 3, 3 }, - { 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, - 2, 4, 0, 0, 0, 0, 0, 0, 3, 4, 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, - 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 4, 2, 2, - 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, - 2, 2, 2, 2, 2, 2, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, - 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4 } - }; + {0, 1, 2, 3, 4, 0, 4, 4, 0, 1, 2, 3, 4, 1, 4, 4, 0, 1, 2, 3, 4, 2, 4, 4, 0, 1, 2, 3, 4, 3, 4, 4, + 0, 1, 2, 3, 4, 0, 4, 0, 0, 1, 2, 3, 4, 1, 4, 1, 0, 1, 2, 3, 4, 2, 4, 2, 0, 1, 2, 3, 4, 3, 4, 3, + 0, 1, 2, 3, 4, 0, 2, 3, 0, 1, 2, 3, 4, 1, 2, 3, 0, 1, 2, 3, 4, 2, 2, 3, 0, 1, 2, 3, 4, 3, 2, 3, + 0, 1, 2, 3, 4, 0, 0, 1, 0, 1, 2, 3, 4, 1, 0, 1, 0, 1, 2, 3, 4, 2, 0, 1, 0, 1, 2, 3, 4, 3, 0, 1}, + {0, 0, 0, 0, 0, 4, 4, 4, 1, 1, 1, 1, 1, 4, 4, 4, 2, 2, 2, 2, 2, 4, 4, 4, 3, 3, 3, 3, 3, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 0, 4, 1, 1, 1, 1, 1, 4, 1, 4, 2, 2, 2, 2, 2, 4, 2, 4, 3, 3, 3, 3, 3, 4, 3, 4, + 0, 0, 0, 0, 0, 4, 0, 0, 1, 1, 1, 1, 1, 4, 1, 1, 2, 2, 2, 2, 2, 4, 2, 2, 3, 3, 3, 3, 3, 4, 3, 3, + 0, 0, 0, 0, 0, 4, 0, 0, 1, 1, 1, 1, 1, 4, 1, 1, 2, 2, 2, 2, 2, 4, 2, 2, 3, 3, 3, 3, 3, 4, 3, 3}, + {0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 3, 4, + 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 4, + 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, + 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4}}; if (count <= 0) return; @@ -341,17 +273,18 @@ void decode_intseq(const uint8_t* buf, int offset, const int a, const int b, con for (int i = 0, p = offset; i < block_count; i++, p -= block_size) { int now_size = (i < block_count - 1) ? block_size : last_block_size; uint_fast64_t d = bit_reverse_u64(getbits64(buf, p - now_size, now_size), now_size); - int x = (d >> b & 3) | (d >> b * 2 & 0xc) | (d >> b * 3 & 0x10) | (d >> b * 4 & 0x60) | (d >> b * 5 & 0x80); + int x = + (d >> b & 3) | (d >> b * 2 & 0xc) | (d >> b * 3 & 0x10) | (d >> b * 4 & 0x60) | (d >> b * 5 & 0x80); for (int j = 0; j < 5 && n < count; j++, n++) - out[n] = (IntSeqData){ d >> (mt[j] + b * j) & mask, TritsTable[j][x] }; + out[n] = (IntSeqData){d >> (mt[j] + b * j) & mask, TritsTable[j][x]}; } } else { for (int i = 0, p = offset; i < block_count; i++, p += block_size) { - uint_fast64_t d = getbits64( - buf, p, (i < block_count - 1) ? block_size : last_block_size); - int x = (d >> b & 3) | (d >> b * 2 & 0xc) | (d >> b * 3 & 0x10) | (d >> b * 4 & 0x60) | (d >> b * 5 & 0x80); + uint_fast64_t d = getbits64(buf, p, (i < block_count - 1) ? block_size : last_block_size); + int x = + (d >> b & 3) | (d >> b * 2 & 0xc) | (d >> b * 3 & 0x10) | (d >> b * 4 & 0x60) | (d >> b * 5 & 0x80); for (int j = 0; j < 5 && n < count; j++, n++) - out[n] = (IntSeqData){ d >> (mt[j] + b * j) & mask, TritsTable[j][x] }; + out[n] = (IntSeqData){d >> (mt[j] + b * j) & mask, TritsTable[j][x]}; } } } else if (a == 5) { @@ -367,29 +300,27 @@ void decode_intseq(const uint8_t* buf, int offset, const int a, const int b, con uint_fast64_t d = bit_reverse_u64(getbits64(buf, p - now_size, now_size), now_size); int x = (d >> b & 7) | (d >> b * 2 & 0x18) | (d >> b * 3 & 0x60); for (int j = 0; j < 3 && n < count; j++, n++) - out[n] = (IntSeqData){ d >> (mq[j] + b * j) & mask, QuintsTable[j][x] }; + out[n] = (IntSeqData){d >> (mq[j] + b * j) & mask, QuintsTable[j][x]}; } } else { for (int i = 0, p = offset; i < block_count; i++, p += block_size) { - uint_fast64_t d = getbits64( - buf, p, (i < block_count - 1) ? block_size : last_block_size); + uint_fast64_t d = getbits64(buf, p, (i < block_count - 1) ? block_size : last_block_size); int x = (d >> b & 7) | (d >> b * 2 & 0x18) | (d >> b * 3 & 0x60); for (int j = 0; j < 3 && n < count; j++, n++) - out[n] = (IntSeqData){ d >> (mq[j] + b * j) & mask, QuintsTable[j][x] }; + out[n] = (IntSeqData){d >> (mq[j] + b * j) & mask, QuintsTable[j][x]}; } } } else { if (reverse) for (int p = offset - b; n < count; n++, p -= b) - out[n] = (IntSeqData){ bit_reverse_u8(getbits(buf, p, b), b), 0 }; + out[n] = (IntSeqData){bit_reverse_u8(getbits(buf, p, b), b), 0}; else for (int p = offset; n < count; n++, p += b) - out[n] = (IntSeqData){ getbits(buf, p, b), 0 }; + out[n] = (IntSeqData){getbits(buf, p, b), 0}; } } -void decode_block_params(const uint8_t* buf, BlockData* block_data) -{ +void decode_block_params(const uint8_t *buf, BlockData *block_data) { block_data->dual_plane = !!(buf[1] & 4); block_data->weight_range = (buf[0] >> 4 & 1) | (buf[1] << 2 & 8); @@ -454,10 +385,12 @@ void decode_block_params(const uint8_t* buf, BlockData* block_data) switch (WeightPrecTableA[block_data->weight_range]) { case 3: - weight_bits = block_data->weight_num * WeightPrecTableB[block_data->weight_range] + (block_data->weight_num * 8 + 4) / 5; + weight_bits = + block_data->weight_num * WeightPrecTableB[block_data->weight_range] + (block_data->weight_num * 8 + 4) / 5; break; case 5: - weight_bits = block_data->weight_num * WeightPrecTableB[block_data->weight_range] + (block_data->weight_num * 7 + 2) / 3; + weight_bits = + block_data->weight_num * WeightPrecTableB[block_data->weight_range] + (block_data->weight_num * 7 + 2) / 3; break; default: weight_bits = block_data->weight_num * WeightPrecTableB[block_data->weight_range]; @@ -498,7 +431,8 @@ void decode_block_params(const uint8_t* buf, BlockData* block_data) if (block_data->dual_plane) { config_bits += 2; - block_data->plane_selector = getbits(buf, cem_base ? 130 - weight_bits - block_data->part_num * 3 : 126 - weight_bits, 2); + block_data->plane_selector = + getbits(buf, cem_base ? 130 - weight_bits - block_data->part_num * 3 : 126 - weight_bits, 2); } int remain_bits = 128 - config_bits - weight_bits; @@ -510,10 +444,12 @@ void decode_block_params(const uint8_t* buf, BlockData* block_data) for (int i = 0, endpoint_bits; i < (int)(sizeof(CemTableA) / sizeof(int)); i++) { switch (CemTableA[i]) { case 3: - endpoint_bits = block_data->endpoint_value_num * CemTableB[i] + (block_data->endpoint_value_num * 8 + 4) / 5; + endpoint_bits = + block_data->endpoint_value_num * CemTableB[i] + (block_data->endpoint_value_num * 8 + 4) / 5; break; case 5: - endpoint_bits = block_data->endpoint_value_num * CemTableB[i] + (block_data->endpoint_value_num * 7 + 2) / 3; + endpoint_bits = + block_data->endpoint_value_num * CemTableB[i] + (block_data->endpoint_value_num * 7 + 2) / 3; break; default: endpoint_bits = block_data->endpoint_value_num * CemTableB[i]; @@ -526,8 +462,7 @@ void decode_block_params(const uint8_t* buf, BlockData* block_data) } } -void decode_endpoints_hdr7(int* endpoints, int* v) -{ +void decode_endpoints_hdr7(int *endpoints, int *v) { int modeval = (v[2] >> 4 & 0x8) | (v[1] >> 5 & 0x4) | (v[0] >> 6); int major_component, mode; if ((modeval & 0xc) != 0xc) { @@ -540,7 +475,7 @@ void decode_endpoints_hdr7(int* endpoints, int* v) major_component = 0; mode = 5; } - int c[] = { v[0] & 0x3f, v[1] & 0x1f, v[2] & 0x1f, v[3] & 0x1f }; + int c[] = {v[0] & 0x3f, v[1] & 0x1f, v[2] & 0x1f, v[3] & 0x1f}; switch (mode) { case 0: @@ -621,11 +556,11 @@ void decode_endpoints_hdr7(int* endpoints, int* v) set_endpoint_hdr_clamp(endpoints, c[0] - c[3], c[1] - c[3], c[2] - c[3], 0x780, c[0], c[1], c[2], 0x780); } -void decode_endpoints_hdr11(int* endpoints, int* v, int alpha1, int alpha2) -{ +void decode_endpoints_hdr11(int *endpoints, int *v, int alpha1, int alpha2) { int major_component = (v[4] >> 7) | (v[5] >> 6 & 2); if (major_component == 3) { - set_endpoint_hdr(endpoints, v[0] << 4, v[2] << 4, v[4] << 5 & 0xfe0, alpha1, v[1] << 4, v[3] << 4, v[5] << 5 & 0xfe0, alpha2); + set_endpoint_hdr(endpoints, v[0] << 4, v[2] << 4, v[4] << 5 & 0xfe0, alpha1, v[1] << 4, v[3] << 4, + v[5] << 5 & 0xfe0, alpha2); return; } int mode = (v[1] >> 7) | (v[2] >> 6 & 2) | (v[3] >> 5 & 4); @@ -726,20 +661,23 @@ void decode_endpoints_hdr11(int* endpoints, int* v, int alpha1, int alpha2) vd1 *= mult; if (major_component == 1) - set_endpoint_hdr_clamp(endpoints, va - vb0 - vc - vd0, va - vc, va - vb1 - vc - vd1, alpha1, va - vb0, va, va - vb1, alpha2); + set_endpoint_hdr_clamp(endpoints, va - vb0 - vc - vd0, va - vc, va - vb1 - vc - vd1, alpha1, va - vb0, va, + va - vb1, alpha2); else if (major_component == 2) - set_endpoint_hdr_clamp(endpoints, va - vb1 - vc - vd1, va - vb0 - vc - vd0, va - vc, alpha1, va - vb1, va - vb0, va, alpha2); + set_endpoint_hdr_clamp(endpoints, va - vb1 - vc - vd1, va - vb0 - vc - vd0, va - vc, alpha1, va - vb1, va - vb0, + va, alpha2); else - set_endpoint_hdr_clamp(endpoints, va - vc, va - vb0 - vc - vd0, va - vb1 - vc - vd1, alpha1, va, va - vb0, va - vb1, alpha2); + set_endpoint_hdr_clamp(endpoints, va - vc, va - vb0 - vc - vd0, va - vb1 - vc - vd1, alpha1, va, va - vb0, + va - vb1, alpha2); } -void decode_endpoints(const uint8_t* buf, BlockData* data) -{ - static const int TritsTable[] = { 0, 204, 93, 44, 22, 11, 5 }; - static const int QuintsTable[] = { 0, 113, 54, 26, 13, 6 }; +void decode_endpoints(const uint8_t *buf, BlockData *data) { + static const int TritsTable[] = {0, 204, 93, 44, 22, 11, 5}; + static const int QuintsTable[] = {0, 113, 54, 26, 13, 6}; IntSeqData seq[32]; int ev[32]; - decode_intseq(buf, data->part_num == 1 ? 17 : 29, CemTableA[data->cem_range], CemTableB[data->cem_range], data->endpoint_value_num, 0, seq); + decode_intseq(buf, data->part_num == 1 ? 17 : 29, CemTableA[data->cem_range], CemTableB[data->cem_range], + data->endpoint_value_num, 0, seq); switch (CemTableA[data->cem_range]) { case 3: @@ -830,7 +768,7 @@ void decode_endpoints(const uint8_t* buf, BlockData* data) } } - int* v = ev; + int *v = ev; for (int cem = 0; cem < data->part_num; v += (data->cem[cem] / 4 + 1) * 2, cem++) { switch (data->cem[cem]) { case 0: @@ -874,7 +812,8 @@ void decode_endpoints(const uint8_t* buf, BlockData* data) set_endpoint_clamp(data->endpoints[cem], v[0], v[0], v[0], v[2], v[1], v[1], v[1], v[2] + v[3]); break; case 6: - set_endpoint(data->endpoints[cem], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8, 255, v[0], v[1], v[2], 255); + set_endpoint(data->endpoints[cem], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8, 255, v[0], v[1], + v[2], 255); break; case 7: decode_endpoints_hdr7(data->endpoints[cem], v); @@ -890,12 +829,15 @@ void decode_endpoints(const uint8_t* buf, BlockData* data) bit_transfer_signed(&v[3], &v[2]); bit_transfer_signed(&v[5], &v[4]); if (v[1] + v[3] + v[5] >= 0) - set_endpoint_clamp(data->endpoints[cem], v[0], v[2], v[4], 255, v[0] + v[1], v[2] + v[3], v[4] + v[5], 255); + set_endpoint_clamp(data->endpoints[cem], v[0], v[2], v[4], 255, v[0] + v[1], v[2] + v[3], v[4] + v[5], + 255); else - set_endpoint_blue_clamp(data->endpoints[cem], v[0] + v[1], v[2] + v[3], v[4] + v[5], 255, v[0], v[2], v[4], 255); + set_endpoint_blue_clamp(data->endpoints[cem], v[0] + v[1], v[2] + v[3], v[4] + v[5], 255, v[0], v[2], + v[4], 255); break; case 10: - set_endpoint(data->endpoints[cem], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8, v[4], v[0], v[1], v[2], v[5]); + set_endpoint(data->endpoints[cem], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8, v[4], v[0], v[1], + v[2], v[5]); break; case 11: decode_endpoints_hdr11(data->endpoints[cem], v, 0x780, 0x780); @@ -912,9 +854,11 @@ void decode_endpoints(const uint8_t* buf, BlockData* data) bit_transfer_signed(&v[5], &v[4]); bit_transfer_signed(&v[7], &v[6]); if (v[1] + v[3] + v[5] >= 0) - set_endpoint_clamp(data->endpoints[cem], v[0], v[2], v[4], v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5], v[6] + v[7]); + set_endpoint_clamp(data->endpoints[cem], v[0], v[2], v[4], v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5], + v[6] + v[7]); else - set_endpoint_blue_clamp(data->endpoints[cem], v[0] + v[1], v[2] + v[3], v[4] + v[5], v[6] + v[7], v[0], v[2], v[4], v[6]); + set_endpoint_blue_clamp(data->endpoints[cem], v[0] + v[1], v[2] + v[3], v[4] + v[5], v[6] + v[7], v[0], + v[2], v[4], v[6]); break; case 14: decode_endpoints_hdr11(data->endpoints[cem], v, v[6], v[7]); @@ -939,12 +883,11 @@ void decode_endpoints(const uint8_t* buf, BlockData* data) } } -void decode_weights(const uint8_t* buf, BlockData* data) -{ +void decode_weights(const uint8_t *buf, BlockData *data) { IntSeqData seq[128]; int wv[128] = {}; - decode_intseq(buf, 128, WeightPrecTableA[data->weight_range], - WeightPrecTableB[data->weight_range], data->weight_num, 1, seq); + decode_intseq(buf, 128, WeightPrecTableA[data->weight_range], WeightPrecTableB[data->weight_range], + data->weight_num, 1, seq); if (WeightPrecTableA[data->weight_range] == 0) { switch (WeightPrecTableB[data->weight_range]) { @@ -1045,10 +988,9 @@ void decode_weights(const uint8_t* buf, BlockData* data) } } -void select_partition(const uint8_t* buf, BlockData* data) -{ +void select_partition(const uint8_t *buf, BlockData *data) { int small_block = data->bw * data->bh < 31; - int seed = (*(int*)buf >> 13 & 0x3ff) | (data->part_num - 1) << 10; + int seed = (*(int *)buf >> 13 & 0x3ff) | (data->part_num - 1) << 10; uint32_t rnum = seed; rnum ^= rnum >> 15; @@ -1068,7 +1010,7 @@ void select_partition(const uint8_t* buf, BlockData* data) seeds[i] *= seeds[i]; } - int sh[2] = { seed & 2 ? 4 : 5, data->part_num == 3 ? 6 : 5 }; + int sh[2] = {seed & 2 ? 4 : 5, data->part_num == 3 ? 6 : 5}; if (seed & 1) for (int i = 0; i < 8; i++) @@ -1102,63 +1044,73 @@ void select_partition(const uint8_t* buf, BlockData* data) } } -void applicate_color(const BlockData* data, uint32_t* outbuf) -{ +void applicate_color(const BlockData *data, uint32_t *outbuf) { static const t_select_folor_func_ptr FuncTableC[] = { - select_color, select_color, select_color_hdr, select_color_hdr, - select_color, select_color, select_color, select_color_hdr, - select_color, select_color, select_color, select_color_hdr, - select_color, select_color, select_color_hdr, select_color_hdr - }; + select_color, select_color, select_color_hdr, select_color_hdr, select_color, select_color, + select_color, select_color_hdr, select_color, select_color, select_color, select_color_hdr, + select_color, select_color, select_color_hdr, select_color_hdr}; static const t_select_folor_func_ptr FuncTableA[] = { - select_color, select_color, select_color_hdr, select_color_hdr, - select_color, select_color, select_color, select_color_hdr, - select_color, select_color, select_color, select_color_hdr, - select_color, select_color, select_color, select_color_hdr - }; + select_color, select_color, select_color_hdr, select_color_hdr, select_color, select_color, + select_color, select_color_hdr, select_color, select_color, select_color, select_color_hdr, + select_color, select_color, select_color, select_color_hdr}; if (data->dual_plane) { - int ps[] = { 0, 0, 0, 0 }; + int ps[] = {0, 0, 0, 0}; ps[data->plane_selector] = 1; if (data->part_num > 1) { for (int i = 0; i < data->bw * data->bh; i++) { int p = data->partition[i]; - uint_fast8_t r = FuncTableC[data->cem[p]](data->endpoints[p][0], data->endpoints[p][4], data->weights[i][ps[0]]); - uint_fast8_t g = FuncTableC[data->cem[p]](data->endpoints[p][1], data->endpoints[p][5], data->weights[i][ps[1]]); - uint_fast8_t b = FuncTableC[data->cem[p]](data->endpoints[p][2], data->endpoints[p][6], data->weights[i][ps[2]]); - uint_fast8_t a = FuncTableA[data->cem[p]](data->endpoints[p][3], data->endpoints[p][7], data->weights[i][ps[3]]); + uint_fast8_t r = + FuncTableC[data->cem[p]](data->endpoints[p][0], data->endpoints[p][4], data->weights[i][ps[0]]); + uint_fast8_t g = + FuncTableC[data->cem[p]](data->endpoints[p][1], data->endpoints[p][5], data->weights[i][ps[1]]); + uint_fast8_t b = + FuncTableC[data->cem[p]](data->endpoints[p][2], data->endpoints[p][6], data->weights[i][ps[2]]); + uint_fast8_t a = + FuncTableA[data->cem[p]](data->endpoints[p][3], data->endpoints[p][7], data->weights[i][ps[3]]); outbuf[i] = color(r, g, b, a); } } else { for (int i = 0; i < data->bw * data->bh; i++) { - uint_fast8_t r = FuncTableC[data->cem[0]](data->endpoints[0][0], data->endpoints[0][4], data->weights[i][ps[0]]); - uint_fast8_t g = FuncTableC[data->cem[0]](data->endpoints[0][1], data->endpoints[0][5], data->weights[i][ps[1]]); - uint_fast8_t b = FuncTableC[data->cem[0]](data->endpoints[0][2], data->endpoints[0][6], data->weights[i][ps[2]]); - uint_fast8_t a = FuncTableA[data->cem[0]](data->endpoints[0][3], data->endpoints[0][7], data->weights[i][ps[3]]); + uint_fast8_t r = + FuncTableC[data->cem[0]](data->endpoints[0][0], data->endpoints[0][4], data->weights[i][ps[0]]); + uint_fast8_t g = + FuncTableC[data->cem[0]](data->endpoints[0][1], data->endpoints[0][5], data->weights[i][ps[1]]); + uint_fast8_t b = + FuncTableC[data->cem[0]](data->endpoints[0][2], data->endpoints[0][6], data->weights[i][ps[2]]); + uint_fast8_t a = + FuncTableA[data->cem[0]](data->endpoints[0][3], data->endpoints[0][7], data->weights[i][ps[3]]); outbuf[i] = color(r, g, b, a); } } } else if (data->part_num > 1) { for (int i = 0; i < data->bw * data->bh; i++) { int p = data->partition[i]; - uint_fast8_t r = FuncTableC[data->cem[p]](data->endpoints[p][0], data->endpoints[p][4], data->weights[i][0]); - uint_fast8_t g = FuncTableC[data->cem[p]](data->endpoints[p][1], data->endpoints[p][5], data->weights[i][0]); - uint_fast8_t b = FuncTableC[data->cem[p]](data->endpoints[p][2], data->endpoints[p][6], data->weights[i][0]); - uint_fast8_t a = FuncTableA[data->cem[p]](data->endpoints[p][3], data->endpoints[p][7], data->weights[i][0]); + uint_fast8_t r = + FuncTableC[data->cem[p]](data->endpoints[p][0], data->endpoints[p][4], data->weights[i][0]); + uint_fast8_t g = + FuncTableC[data->cem[p]](data->endpoints[p][1], data->endpoints[p][5], data->weights[i][0]); + uint_fast8_t b = + FuncTableC[data->cem[p]](data->endpoints[p][2], data->endpoints[p][6], data->weights[i][0]); + uint_fast8_t a = + FuncTableA[data->cem[p]](data->endpoints[p][3], data->endpoints[p][7], data->weights[i][0]); outbuf[i] = color(r, g, b, a); } } else { for (int i = 0; i < data->bw * data->bh; i++) { - uint_fast8_t r = FuncTableC[data->cem[0]](data->endpoints[0][0], data->endpoints[0][4], data->weights[i][0]); - uint_fast8_t g = FuncTableC[data->cem[0]](data->endpoints[0][1], data->endpoints[0][5], data->weights[i][0]); - uint_fast8_t b = FuncTableC[data->cem[0]](data->endpoints[0][2], data->endpoints[0][6], data->weights[i][0]); - uint_fast8_t a = FuncTableA[data->cem[0]](data->endpoints[0][3], data->endpoints[0][7], data->weights[i][0]); + uint_fast8_t r = + FuncTableC[data->cem[0]](data->endpoints[0][0], data->endpoints[0][4], data->weights[i][0]); + uint_fast8_t g = + FuncTableC[data->cem[0]](data->endpoints[0][1], data->endpoints[0][5], data->weights[i][0]); + uint_fast8_t b = + FuncTableC[data->cem[0]](data->endpoints[0][2], data->endpoints[0][6], data->weights[i][0]); + uint_fast8_t a = + FuncTableA[data->cem[0]](data->endpoints[0][3], data->endpoints[0][7], data->weights[i][0]); outbuf[i] = color(r, g, b, a); } } } -void decode_block(const uint8_t* buf, const int bw, const int bh, uint32_t* outbuf) -{ +void decode_block(const uint8_t *buf, const int bw, const int bh, uint32_t *outbuf) { if (buf[0] == 0xfc && (buf[1] & 1) == 1) { // void-extent uint_fast32_t c; @@ -1186,21 +1138,16 @@ void decode_block(const uint8_t* buf, const int bw, const int bh, uint32_t* outb } } -void decode_astc(const uint8_t* data, const int w, const int h, const int bw, const int bh, uint32_t* image) -{ - const int num_blocks_x = (w + bw - 1) / bw; - const int num_blocks_y = (h + bh - 1) / bh; - const int copy_length_last = (w + bw - 1) % bw + 1; - uint32_t buf[144]; - uint32_t* buf_end = buf + bw * bh; - const uint8_t* ptr = data; - for (int by = 0; by < num_blocks_y; by++) { - for (int bx = 0, x = 0; bx < num_blocks_x; bx++, ptr += 16, x += bw) { - decode_block(ptr, bw, bh, buf); - int copy_length = (bx < num_blocks_x - 1 ? bw : copy_length_last) * 4; - uint32_t* b = buf; - for (int y = h - by * bh - 1; b < buf_end && y >= 0; y--, b += bw) - memcpy(image + y * w + x, b, copy_length); +int decode_astc(const uint8_t *data, const long w, const long h, const int bw, const int bh, uint32_t *image) { + const long num_blocks_x = (w + bw - 1) / bw; + const long num_blocks_y = (h + bh - 1) / bh; + uint32_t buffer[144]; + const uint8_t *d = data; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, d += 16) { + decode_block(d, bw, bh, buffer); + copy_block_buffer(bx, by, w, h, bw, bh, buffer, image); } } + return 1; } diff --git a/ext/decoders/native/astc.h b/ext/decoders/native/astc.h index b5b2ee5..3fa2cfc 100644 --- a/ext/decoders/native/astc.h +++ b/ext/decoders/native/astc.h @@ -3,6 +3,6 @@ #include -void decode_astc(const uint8_t*, const int, const int, const int, const int, uint32_t*); +int decode_astc(const uint8_t *, const long, const long, const int, const int, uint32_t *); #endif /* end of include guard: ASTC_H */ diff --git a/ext/decoders/native/color.h b/ext/decoders/native/color.h new file mode 100644 index 0000000..a2283c7 --- /dev/null +++ b/ext/decoders/native/color.h @@ -0,0 +1,87 @@ +#ifndef COLOR_H +#define COLOR_H + +#include +#include +#include "endianness.h" + +#ifdef __LITTLE_ENDIAN__ +static const uint_fast32_t TRANSPARENT_MASK = 0x00ffffff; +#else +static const uint_fast32_t TRANSPARENT_MASK = 0xffffff00; +#endif + +static inline uint_fast32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { +#ifdef __LITTLE_ENDIAN__ + return r | g << 8 | b << 16 | a << 24; +#else + return a | b << 8 | g << 16 | r << 24; +#endif +} + +static inline uint_fast32_t alpha_mask(uint8_t a) { +#ifdef __LITTLE_ENDIAN__ + return TRANSPARENT_MASK | a << 24; +#else + return TRANSPARENT_MASK | a; +#endif +} + +static inline void rgb565_le(const uint16_t d, uint8_t *r, uint8_t *g, uint8_t *b) { +#ifdef __LITTLE_ENDIAN__ + *r = (d >> 8 & 0xf8) | (d >> 13); + *g = (d >> 3 & 0xfc) | (d >> 9 & 3); + *b = (d << 3) | (d >> 2 & 7); +#else + *r = (d & 0xf8) | (d >> 5 & 7); + *g = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); + *b = (d >> 5 & 0xf8) | (d >> 10 & 0x7); +#endif +} + +static inline void rgb565_be(const uint16_t d, uint8_t *r, uint8_t *g, uint8_t *b) { +#ifdef __BIG_ENDIAN__ + *r = (d >> 8 & 0xf8) | (d >> 13); + *g = (d >> 3 & 0xfc) | (d >> 9 & 3); + *b = (d << 3) | (d >> 2 & 7); +#else + *r = (d & 0xf8) | (d >> 5 & 7); + *g = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); + *b = (d >> 5 & 0xf8) | (d >> 10 & 0x7); +#endif +} + +static inline void rgb565_lep(const uint16_t d, uint8_t *c) { +#ifdef __LITTLE_ENDIAN__ + *(c++) = (d >> 8 & 0xf8) | (d >> 13); + *(c++) = (d >> 3 & 0xfc) | (d >> 9 & 3); + *(c++) = (d << 3) | (d >> 2 & 7); +#else + *(c++) = (d & 0xf8) | (d >> 5 & 7); + *(c++) = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); + *(c++) = (d >> 5 & 0xf8) | (d >> 10 & 0x7); +#endif +} + +static inline void rgb565_bep(const uint16_t d, uint8_t *c) { +#ifdef __BIG_ENDIAN__ + *(c++) = (d >> 8 & 0xf8) | (d >> 13); + *(c++) = (d >> 3 & 0xfc) | (d >> 9 & 3); + *(c++) = (d << 3) | (d >> 2 & 7); +#else + *(c++) = (d & 0xf8) | (d >> 5 & 7); + *(c++) = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); + *(c++) = (d >> 5 & 0xf8) | (d >> 10 & 0x7); +#endif +} + +static inline void copy_block_buffer(const long bx, const long by, const long w, const long h, const long bw, + const long bh, const uint32_t *buffer, uint32_t *image) { + long x = bw * bx; + long xl = (bw * (bx + 1) > w ? w - bw * bx : bw) * 4; + const uint32_t *buffer_end = buffer + bw * bh; + for (long y = h - by * bh; buffer < buffer_end && y-- > 0; buffer += bw) + memcpy(image + y * w + x, buffer, xl); +} + +#endif /* end of include guard: COLOR_H */ diff --git a/ext/decoders/native/common.h b/ext/decoders/native/common.h deleted file mode 100644 index eafdd8e..0000000 --- a/ext/decoders/native/common.h +++ /dev/null @@ -1,44 +0,0 @@ -#include - -/* https://github.com/ruby/ruby/blob/master/siphash.c */ - -#ifdef _WIN32 -#define BYTE_ORDER __LITTLE_ENDIAN -#elif !defined BYTE_ORDER -#include -#endif - -#ifndef BYTE_ORDER -#if defined(__BYTE_ORDER__) -#define BYTE_ORDER __BYTE_ORDER__ -#elif defined(__BYTE_ORDER) -#define BYTE_ORDER __BYTE_ORDER -#else -#error "Neither BYTE_ORDER nor __BYTE_ORDER__ is defined." -#endif -#endif - -#ifndef LITTLE_ENDIAN -#if defined(__LITTLE_ENDIAN) -#define LITTLE_ENDIAN __LITTLE_ENDIAN -#define BIG_ENDIAN __BIG_ENDIAN -#elif defined(__LITTLE_ENDIAN__) -#define LITTLE_ENDIAN __LITTLE_ENDIAN__ -#define BIG_ENDIAN __BIG_ENDIAN__ -#elif defined(__ORDER_LITTLE_ENDIAN__) -#define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ -#define BIG_ENDIAN __ORDER_BIG_ENDIAN__ -#else -#error "Neither LITTLE_ENDIAN, __LITTLE_ENDIAN, nor __ORDER_LITTLE_ENDIAN__ is defined." -#endif -#endif - -#if BYTE_ORDER == LITTLE_ENDIAN -#define IS_LITTLE_ENDIAN 1 -#define IS_BIG_ENDIAN 0 -#elif BYTE_ORDER == BIG_ENDIAN -#define IS_LITTLE_ENDIAN 0 -#define IS_BIG_ENDIAN 1 -#else -#error "Only strictly little or big endian supported" -#endif diff --git a/ext/decoders/native/dxtc.c b/ext/decoders/native/dxtc.c index bed3894..ffce257 100644 --- a/ext/decoders/native/dxtc.c +++ b/ext/decoders/native/dxtc.c @@ -1,38 +1,16 @@ #include "dxtc.h" -#include "common.h" #include #include +#include "color.h" +#include "endianness.h" -static inline uint_fast32_t color(uint_fast8_t r, uint_fast8_t g, uint_fast8_t b, uint_fast8_t a) -{ -#if BYTE_ORDER == LITTLE_ENDIAN - return r | g << 8 | b << 16 | a << 24; -#else - return a | b << 8 | g << 16 | r << 24; -#endif -} - -static inline void rgb565(const uint16_t d, uint8_t* r, uint8_t* g, uint8_t* b) -{ -#if BYTE_ORDER == LITTLE_ENDIAN - *r = (d >> 8 & 0xf8) | (d >> 13); - *g = (d >> 3 & 0xfc) | (d >> 9 & 3); - *b = (d << 3) | (d >> 2 & 7); -#else - *r = (d & 0xf8) | (d >> 5 & 7); - *g = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); - *b = (d >> 5 & 0xf8) | (d >> 10 & 0x7); -#endif -} - -static inline void decode_dxt1_block(const uint8_t* data, uint32_t* outbuf) -{ +static inline void decode_dxt1_block(const uint8_t *data, uint32_t *outbuf) { uint8_t r0, g0, b0, r1, g1, b1; - int q0 = *(uint16_t*)(data); - int q1 = *(uint16_t*)(data + 2); - rgb565(q0, &r0, &g0, &b0); - rgb565(q1, &r1, &g1, &b1); - uint_fast32_t c[4] = { color(r0, g0, b0, 255), color(r1, g1, b1, 255) }; + int q0 = *(uint16_t *)(data); + int q1 = *(uint16_t *)(data + 2); + rgb565_le(q0, &r0, &g0, &b0); + rgb565_le(q1, &r1, &g1, &b1); + uint_fast32_t c[4] = {color(r0, g0, b0, 255), color(r1, g1, b1, 255)}; if (q0 > q1) { c[2] = color((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); c[3] = color((r0 + r1 * 2) / 3, (g0 + g1 * 2) / 3, (b0 + b1 * 2) / 3, 255); @@ -40,37 +18,27 @@ static inline void decode_dxt1_block(const uint8_t* data, uint32_t* outbuf) c[2] = color((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); c[3] = color(0, 0, 0, 255); } -#if BYTE_ORDER == LITTLE_ENDIAN - uint_fast32_t d = *(uint32_t*)(data + 4); -#else - uint_fast32_t d = data[4] | data[5] << 8 | data[6] << 16 | data[7] << 24; -#endif + uint_fast32_t d = lton32(*(uint32_t *)(data + 4)); for (int i = 0; i < 16; i++, d >>= 2) outbuf[i] = c[d & 3]; } -void decode_dxt1(const uint8_t* data, const int w, const int h, uint32_t* image) -{ - int num_blocks_x = (w + 3) / 4; - int num_blocks_y = (h + 3) / 4; - int copy_length_last = (w + 3) % 4 + 1; - uint32_t buf[16]; - uint32_t* buf_end = buf + 16; - const uint8_t* d = data; - for (int t = 0; t < num_blocks_y; t++) { - for (int s = 0; s < num_blocks_x; s++, d += 8) { - decode_dxt1_block(d, buf); - int copy_length = (s < num_blocks_x - 1 ? 4 : copy_length_last) * 4; - uint32_t* b = buf; - for (int y = h - t * 4 - 1; b < buf_end && y >= 0; b += 4, y--) - memcpy(image + y * w + s * 4, b, copy_length); +int decode_dxt1(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + const uint8_t *d = data; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, d += 8) { + decode_dxt1_block(d, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); } } + return 1; } -static inline void decode_dxt5_block(const uint8_t* data, uint32_t* outbuf) -{ - uint_fast32_t a[8] = { data[0], data[1] }; +static inline void decode_dxt5_block(const uint8_t *data, uint32_t *outbuf) { + uint_fast32_t a[8] = {data[0], data[1]}; if (a[0] > a[1]) { a[2] = (a[0] * 6 + a[1]) / 7; a[3] = (a[0] * 5 + a[1] * 2) / 7; @@ -87,32 +55,23 @@ static inline void decode_dxt5_block(const uint8_t* data, uint32_t* outbuf) a[7] = 255; } for (int i = 0; i < 8; i++) - a[i] = color(255, 255, 255, a[i]); + a[i] = alpha_mask(a[i]); decode_dxt1_block(data + 8, outbuf); -#if BYTE_ORDER == LITTLE_ENDIAN - uint_fast64_t d = *(uint64_t*)data >> 16; -#else - uint_fast64_t d = data[2] | data[3] << 8 | data[4] << 16 | data[5] << 24 | data[6] << 32 | data[7] << 40; -#endif + uint_fast64_t d = lton64(*(uint64_t *)data) >> 16; for (int i = 0; i < 16; i++, d >>= 3) outbuf[i] &= a[d & 7]; } -void decode_dxt5(const uint8_t* data, const int w, const int h, uint32_t* image) -{ - int num_blocks_x = (w + 3) / 4; - int num_blocks_y = (h + 3) / 4; - int copy_length_last = (w + 3) % 4 + 1; - uint32_t buf[16]; - uint32_t *buf_end = buf + 16; - const uint8_t* d = data; - for (int t = 0; t < num_blocks_y; t++) { - for (int s = 0; s < num_blocks_x; s++, d += 16) { - decode_dxt5_block(d, buf); - int copy_length = (s < num_blocks_x - 1 ? 4 : copy_length_last) * 4; - uint32_t *b = buf; - for (int y = h - t * 4 - 1; b < buf_end && y >= 0; b += 4, y--) - memcpy(image + y * w + s * 4, b, copy_length); +int decode_dxt5(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + const uint8_t *d = data; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, d += 16) { + decode_dxt5_block(d, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); } } + return 1; } diff --git a/ext/decoders/native/dxtc.h b/ext/decoders/native/dxtc.h index 429549e..112b867 100644 --- a/ext/decoders/native/dxtc.h +++ b/ext/decoders/native/dxtc.h @@ -3,7 +3,7 @@ #include -void decode_dxt1(const uint8_t*, const int, const int, uint32_t*); -void decode_dxt5(const uint8_t*, const int, const int, uint32_t*); +int decode_dxt1(const uint8_t *, const long, const long, uint32_t *); +int decode_dxt5(const uint8_t *, const long, const long, uint32_t *); #endif /* end of include guard: DXTC_H */ diff --git a/ext/decoders/native/endianness.h b/ext/decoders/native/endianness.h new file mode 100644 index 0000000..7866e3b --- /dev/null +++ b/ext/decoders/native/endianness.h @@ -0,0 +1,180 @@ +/* + * + * License Information + * + * endianness.h is derived from https://gist.github.com/jtbr/7a43e6281e6cca353b33ee501421860c + * The file is licensed under the MIT License shown below. + * + * + * The MIT License (MIT) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _ENDIANNESS_H +#define _ENDIANNESS_H + +#include +#include + +/* Detect platform endianness at compile time */ + +// If boost were available on all platforms, could use this instead to detect endianness +// #include + +// When available, these headers can improve platform endianness detection +#ifdef __has_include // C++17, supported as extension to C++11 in clang, GCC 5+, vs2015 +#if __has_include() +#include // gnu libc normally provides, linux +#elif __has_include() +#include //open bsd, macos +#elif __has_include() +#include // mingw, some bsd (not open/macos) +#elif __has_include() +#include // solaris +#endif +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) || \ + (defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN) || (defined(_BYTE_ORDER) && _BYTE_ORDER == _BIG_ENDIAN) || \ + (defined(BYTE_ORDER) && BYTE_ORDER == BIG_ENDIAN) || (defined(__sun) && defined(__SVR4) && defined(_BIG_ENDIAN)) || \ + defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || defined(_MIBSEB) || defined(__MIBSEB) || \ + defined(__MIBSEB__) || defined(_M_PPC) +#define __BIG_ENDIAN__ +#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || /* gcc */ \ + (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN) /* linux header */ || \ + (defined(_BYTE_ORDER) && _BYTE_ORDER == _LITTLE_ENDIAN) || \ + (defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN) /* mingw header */ || \ + (defined(__sun) && defined(__SVR4) && defined(_LITTLE_ENDIAN)) || /* solaris */ \ + defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ + defined(__MIPSEL__) || defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64) || /* msvc for intel processors */ \ + defined(_M_ARM) /* msvc code on arm executes in little endian mode */ +#define __LITTLE_ENDIAN__ +#endif +#endif + +#ifdef bswap16 +#undef bswap16 +#endif +#ifdef bswap32 +#undef bswap32 +#endif +#ifdef bswap64 +#undef bswap64 +#endif + +/* Define byte-swap functions, using fast processor-native built-ins where possible */ +// needs to be first because msvc doesn't short-circuit after failing defined(__has_builtin) +#if defined(_MSC_VER) +#define bswap16(x) _byteswap_ushort((x)) +#define bswap32(x) _byteswap_ulong((x)) +#define bswap64(x) _byteswap_uint64((x)) +#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +#define bswap16(x) __builtin_bswap16((x)) +#define bswap32(x) __builtin_bswap32((x)) +#define bswap64(x) __builtin_bswap64((x)) +#elif defined(__has_builtin) && __has_builtin(__builtin_bswap64) +/* for clang; gcc 5 fails on this and && shortcircuit fails; must be after GCC check */ +#define bswap16(x) __builtin_bswap16((x)) +#define bswap32(x) __builtin_bswap32((x)) +#define bswap64(x) __builtin_bswap64((x)) +#else +/* even in this case, compilers often optimize by using native instructions */ +static inline uint16_t bswap16(uint16_t x) { + return (((x >> 8) & 0xffu) | ((x & 0xffu) << 8)); +} +static inline uint32_t bswap32(uint32_t x) { + return (((x & 0xff000000u) >> 24) | ((x & 0x00ff0000u) >> 8) | ((x & 0x0000ff00u) << 8) | + ((x & 0x000000ffu) << 24)); +} +static inline uint64_t bswap64(uint64_t x) { + return (((x & 0xff00000000000000ull) >> 56) | ((x & 0x00ff000000000000ull) >> 40) | + ((x & 0x0000ff0000000000ull) >> 24) | ((x & 0x000000ff00000000ull) >> 8) | + ((x & 0x00000000ff000000ull) << 8) | ((x & 0x0000000000ff0000ull) << 24) | + ((x & 0x000000000000ff00ull) << 40) | ((x & 0x00000000000000ffull) << 56)); +} +#endif + + +/* Defines network - host byte swaps as needed depending upon platform endianness */ +// note that network order is big endian) + +#if defined(__LITTLE_ENDIAN__) +#define ntoh16(x) bswap16((x)) +#define hton16(x) bswap16((x)) +#define ntoh32(x) bswap32((x)) +#define hton32(x) bswap32((x)) +#define ntoh64(x) bswap64((x)) +#define hton64(x) bswap64((x)) +#define lton16(x) (x) +#define lton32(x) (x) +#define lton64(x) (x) +#define ltonf(x) (x) +#define ltond(x) (x) +#define bton16(x) bswap16((x)) +#define bton32(x) bswap32((x)) +#define bton64(x) bswap64((x)) +#define btonf(x) htonf((x)) +#define btond(x) htond((x)) +#elif defined(__BIG_ENDIAN__) +#define ntoh16(x) (x) +#define hton16(x) (x) +#define ntoh32(x) (x) +#define hton32(x) (x) +#define ntoh64(x) (x) +#define hton64(x) (x) +#define bton16(x) (x) +#define bton32(x) (x) +#define bton64(x) (x) +#define btonf(x) (x) +#define btond(x) (x) +#define lton16(x) bswap16((x)) +#define lton32(x) bswap32((x)) +#define lton64(x) bswap64((x)) +#define ltonf(x) htonf((x)) +#define ltond(x) htond((x)) +#else +#warning "UNKNOWN Platform / endianness; network / host byte swaps not defined." +#endif + + +//! Convert 32-bit float from host to network byte order +static inline float htonf(float f) { +#ifdef __cplusplus + static_assert(sizeof(float) == sizeof(uint32_t), "Unexpected float format"); + uint32_t val = hton32(*(reinterpret_cast(&f))); + return *(reinterpret_cast(&val)); +#else + uint32_t val = hton32(*(const uint32_t *)(&f)); + return *((float *)(&val)); +#endif +} +#define ntohf(x) htonf((x)) + +//! Convert 64-bit double from host to network byte order +static inline double htond(double f) { +#ifdef __cplusplus + static_assert(sizeof(double) == sizeof(uint64_t), "Unexpected double format"); + uint64_t val = hton64(*(reinterpret_cast(&f))); + return *(reinterpret_cast(&val)); +#else + uint64_t val = hton64(*(const uint64_t *)(&f)); + return *((double *)(&val)); +#endif +} +#define ntohd(x) htond((x)) + +#endif //_ENDIANNESS_H diff --git a/ext/decoders/native/etc.c b/ext/decoders/native/etc.c index d349f89..4de0145 100644 --- a/ext/decoders/native/etc.c +++ b/ext/decoders/native/etc.c @@ -1,75 +1,45 @@ #include "etc.h" -#include "common.h" #include #include +#include "color.h" -const uint_fast8_t WriteOrderTable[16] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; -const uint_fast8_t WriteOrderTableRev[16] = { 15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0 }; -const uint_fast8_t Etc1ModifierTable[8][2] = { { 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 }, { 18, 60 }, { 24, 80 }, { 33, 106 }, { 47, 183 } }; +const uint_fast8_t WriteOrderTable[16] = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; +const uint_fast8_t WriteOrderTableRev[16] = {15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0}; +const uint_fast8_t Etc1ModifierTable[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, + {18, 60}, {24, 80}, {33, 106}, {47, 183}}; const uint_fast8_t Etc2aModifierTable[2][8][2] = { - { { 0, 8 }, { 0, 17 }, { 0, 29 }, { 0, 42 }, { 0, 60 }, { 0, 80 }, { 0, 106 }, { 0, 183 } }, - { { 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 }, { 18, 60 }, { 24, 80 }, { 33, 106 }, { 47, 183 } } -}; -const uint_fast8_t Etc1SubblockTable[2][16] = { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 } }; -const uint_fast8_t Etc2DistanceTable[8] = { 3, 6, 11, 16, 23, 32, 41, 64 }; + {{0, 8}, {0, 17}, {0, 29}, {0, 42}, {0, 60}, {0, 80}, {0, 106}, {0, 183}}, + {{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}}}; +const uint_fast8_t Etc1SubblockTable[2][16] = {{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}}; +const uint_fast8_t Etc2DistanceTable[8] = {3, 6, 11, 16, 23, 32, 41, 64}; const int_fast8_t Etc2AlphaModTable[16][8] = { - { -3, -6, -9, -15, 2, 5, 8, 14 }, - { -3, -7, -10, -13, 2, 6, 9, 12 }, - { -2, -5, -8, -13, 1, 4, 7, 12 }, - { -2, -4, -6, -13, 1, 3, 5, 12 }, - { -3, -6, -8, -12, 2, 5, 7, 11 }, - { -3, -7, -9, -11, 2, 6, 8, 10 }, - { -4, -7, -8, -11, 3, 6, 7, 10 }, - { -3, -5, -8, -11, 2, 4, 7, 10 }, - { -2, -6, -8, -10, 1, 5, 7, 9 }, - { -2, -5, -8, -10, 1, 4, 7, 9 }, - { -2, -4, -8, -10, 1, 3, 7, 9 }, - { -2, -5, -7, -10, 1, 4, 6, 9 }, - { -3, -4, -7, -10, 2, 3, 6, 9 }, - { -1, -2, -3, -10, 0, 1, 2, 9 }, - { -4, -6, -8, -9, 3, 5, 7, 8 }, - { -3, -5, -7, -9, 2, 4, 6, 8 } -}; + {-3, -6, -9, -15, 2, 5, 8, 14}, {-3, -7, -10, -13, 2, 6, 9, 12}, {-2, -5, -8, -13, 1, 4, 7, 12}, + {-2, -4, -6, -13, 1, 3, 5, 12}, {-3, -6, -8, -12, 2, 5, 7, 11}, {-3, -7, -9, -11, 2, 6, 8, 10}, + {-4, -7, -8, -11, 3, 6, 7, 10}, {-3, -5, -8, -11, 2, 4, 7, 10}, {-2, -6, -8, -10, 1, 5, 7, 9}, + {-2, -5, -8, -10, 1, 4, 7, 9}, {-2, -4, -8, -10, 1, 3, 7, 9}, {-2, -5, -7, -10, 1, 4, 6, 9}, + {-3, -4, -7, -10, 2, 3, 6, 9}, {-1, -2, -3, -10, 0, 1, 2, 9}, {-4, -6, -8, -9, 3, 5, 7, 8}, + {-3, -5, -7, -9, 2, 4, 6, 8}}; -#if BYTE_ORDER == LITTLE_ENDIAN -static const uint_fast32_t TRANSPARENT_MASK = 0x00ffffff; -#else -static const uint_fast32_t TRANSPARENT_MASK = 0xffffff00; -#endif - -static inline uint_fast32_t color(uint_fast8_t r, uint_fast8_t g, uint_fast8_t b, uint_fast8_t a) -{ -#if BYTE_ORDER == LITTLE_ENDIAN - return r | g << 8 | b << 16 | a << 24; -#else - return a | b << 8 | g << 16 | r << 24; -#endif -} - -static inline uint_fast8_t clamp(const int n) -{ +static inline uint_fast8_t clamp(const int n) { return n < 0 ? 0 : n > 255 ? 255 : n; } -static inline uint32_t applicate_color(uint_fast8_t c[3], int_fast16_t m) -{ +static inline uint32_t applicate_color(uint_fast8_t c[3], int_fast16_t m) { return color(clamp(c[0] + m), clamp(c[1] + m), clamp(c[2] + m), 255); } -static inline uint32_t applicate_color_alpha(uint_fast8_t c[3], int_fast16_t m, int transparent) -{ +static inline uint32_t applicate_color_alpha(uint_fast8_t c[3], int_fast16_t m, int transparent) { return color(clamp(c[0] + m), clamp(c[1] + m), clamp(c[2] + m), transparent ? 0 : 255); } -static inline uint32_t applicate_color_raw(uint_fast8_t c[3]) -{ +static inline uint32_t applicate_color_raw(uint_fast8_t c[3]) { return color(c[0], c[1], c[2], 255); } -static inline void decode_etc1_block(const uint8_t* data, uint32_t* outbuf) -{ - const uint_fast8_t code[2] = { data[3] >> 5, data[3] >> 2 & 7 }; // Table codewords - const uint_fast8_t* table = Etc1SubblockTable[data[3] & 1]; +static void decode_etc1_block(const uint8_t *data, uint32_t *outbuf) { + const uint_fast8_t code[2] = {data[3] >> 5, data[3] >> 2 & 7}; // Table codewords + const uint_fast8_t *table = Etc1SubblockTable[data[3] & 1]; uint_fast8_t c[2][3]; if (data[3] & 2) { // diff bit == 1 @@ -95,8 +65,8 @@ static inline void decode_etc1_block(const uint8_t* data, uint32_t* outbuf) c[1][2] = (data[2] & 0x0f) | data[2] << 4; } - uint_fast16_t j = data[6] << 8 | data[7]; // less significant pixel index bits - uint_fast16_t k = data[4] << 8 | data[5]; // more significant pixel index bits + uint_fast16_t j = data[6] << 8 | data[7]; // less significant pixel index bits + uint_fast16_t k = data[4] << 8 | data[5]; // more significant pixel index bits for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { uint_fast8_t s = table[i]; uint_fast8_t m = Etc1ModifierTable[code[s]][j & 1]; @@ -104,29 +74,9 @@ static inline void decode_etc1_block(const uint8_t* data, uint32_t* outbuf) } } -void decode_etc1(const void* data, const int w, const int h, uint32_t* image) -{ - int num_blocks_x = (w + 3) / 4; - int num_blocks_y = (h + 3) / 4; - int copy_length_last = (w + 3) % 4 + 1; - uint32_t buf[16]; - uint32_t* buf_end = buf + 16; - const uint8_t* d = (uint8_t*)data; - for (int by = 0; by < num_blocks_y; by++) { - for (int bx = 0, x = 0; bx < num_blocks_x; bx++, d += 8, x += 4) { - decode_etc1_block(d, buf); - int copy_length = (bx < num_blocks_x - 1 ? 4 : copy_length_last) * 4; - uint32_t* b = buf; - for (int y = h - 1 - by * 4; b < buf_end && y >= 0; y--, b += 4) - memcpy(image + y * w + x, b, copy_length); - } - } -} - -static inline void decode_etc2_block(const uint8_t* data, uint32_t* outbuf) -{ - uint_fast16_t j = data[6] << 8 | data[7]; // 15 -> 0 - uint_fast32_t k = data[4] << 8 | data[5]; // 31 -> 16 +static void decode_etc2_block(const uint8_t *data, uint32_t *outbuf) { + uint_fast16_t j = data[6] << 8 | data[7]; // 15 -> 0 + uint_fast32_t k = data[4] << 8 | data[5]; // 31 -> 16 uint_fast8_t c[3][3] = {}; if (data[3] & 2) { @@ -146,7 +96,8 @@ static inline void decode_etc2_block(const uint8_t* data, uint32_t* outbuf) c[1][1] = (data[2] & 0x0f) | data[2] << 4; c[1][2] = (data[3] & 0xf0) | data[3] >> 4; const uint_fast8_t d = Etc2DistanceTable[(data[3] >> 1 & 6) | (data[3] & 1)]; - uint_fast32_t color_set[4] = { applicate_color_raw(c[0]), applicate_color(c[1], d), applicate_color_raw(c[1]), applicate_color(c[1], -d) }; + uint_fast32_t color_set[4] = {applicate_color_raw(c[0]), applicate_color(c[1], d), + applicate_color_raw(c[1]), applicate_color(c[1], -d)}; k <<= 1; for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) outbuf[WriteOrderTable[i]] = color_set[(k & 2) | (j & 1)]; @@ -162,10 +113,12 @@ static inline void decode_etc2_block(const uint8_t* data, uint32_t* outbuf) c[1][1] |= c[1][1] >> 4; c[1][2] = (data[3] << 1 & 0xf0) | (data[3] >> 3 & 0xf); uint_fast8_t d = (data[3] & 4) | (data[3] << 1 & 2); - if (c[0][0] > c[1][0] || (c[0][0] == c[1][0] && (c[0][1] > c[1][1] || (c[0][1] == c[1][1] && c[0][2] >= c[1][2])))) + if (c[0][0] > c[1][0] || + (c[0][0] == c[1][0] && (c[0][1] > c[1][1] || (c[0][1] == c[1][1] && c[0][2] >= c[1][2])))) ++d; d = Etc2DistanceTable[d]; - uint_fast32_t color_set[4] = { applicate_color(c[0], d), applicate_color(c[0], -d), applicate_color(c[1], d), applicate_color(c[1], -d) }; + uint_fast32_t color_set[4] = {applicate_color(c[0], d), applicate_color(c[0], -d), applicate_color(c[1], d), + applicate_color(c[1], -d)}; k <<= 1; for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) outbuf[WriteOrderTable[i]] = color_set[(k & 2) | (j & 1)]; @@ -192,8 +145,8 @@ static inline void decode_etc2_block(const uint8_t* data, uint32_t* outbuf) } } else { // differential - const uint_fast8_t code[2] = { data[3] >> 5, data[3] >> 2 & 7 }; - const uint_fast8_t* table = Etc1SubblockTable[data[3] & 1]; + const uint_fast8_t code[2] = {data[3] >> 5, data[3] >> 2 & 7}; + const uint_fast8_t *table = Etc1SubblockTable[data[3] & 1]; c[0][0] = r | r >> 5; c[0][1] = g | g >> 5; c[0][2] = b | b >> 5; @@ -211,8 +164,8 @@ static inline void decode_etc2_block(const uint8_t* data, uint32_t* outbuf) } } else { // individual (diff bit == 0) - const uint_fast8_t code[2] = { data[3] >> 5, data[3] >> 2 & 7 }; - const uint_fast8_t* table = Etc1SubblockTable[data[3] & 1]; + const uint_fast8_t code[2] = {data[3] >> 5, data[3] >> 2 & 7}; + const uint_fast8_t *table = Etc1SubblockTable[data[3] & 1]; c[0][0] = (data[0] & 0xf0) | data[0] >> 4; c[1][0] = (data[0] & 0x0f) | data[0] << 4; c[0][1] = (data[1] & 0xf0) | data[1] >> 4; @@ -227,10 +180,9 @@ static inline void decode_etc2_block(const uint8_t* data, uint32_t* outbuf) } } -static inline void decode_etc2a1_block(const uint8_t* data, uint32_t* outbuf) -{ - uint_fast16_t j = data[6] << 8 | data[7]; // 15 -> 0 - uint_fast32_t k = data[4] << 8 | data[5]; // 31 -> 16 +static void decode_etc2a1_block(const uint8_t *data, uint32_t *outbuf) { + uint_fast16_t j = data[6] << 8 | data[7]; // 15 -> 0 + uint_fast32_t k = data[4] << 8 | data[5]; // 31 -> 16 uint_fast8_t c[3][3] = {}; int obaq = data[3] >> 1 & 1; @@ -251,7 +203,8 @@ static inline void decode_etc2a1_block(const uint8_t* data, uint32_t* outbuf) c[1][1] = (data[2] & 0x0f) | data[2] << 4; c[1][2] = (data[3] & 0xf0) | data[3] >> 4; const uint_fast8_t d = Etc2DistanceTable[(data[3] >> 1 & 6) | (data[3] & 1)]; - uint_fast32_t color_set[4] = { applicate_color_raw(c[0]), applicate_color(c[1], d), applicate_color_raw(c[1]), applicate_color(c[1], -d) }; + uint_fast32_t color_set[4] = {applicate_color_raw(c[0]), applicate_color(c[1], d), applicate_color_raw(c[1]), + applicate_color(c[1], -d)}; k <<= 1; for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { int index = (k & 2) | (j & 1); @@ -271,10 +224,12 @@ static inline void decode_etc2a1_block(const uint8_t* data, uint32_t* outbuf) c[1][1] |= c[1][1] >> 4; c[1][2] = (data[3] << 1 & 0xf0) | (data[3] >> 3 & 0xf); uint_fast8_t d = (data[3] & 4) | (data[3] << 1 & 2); - if (c[0][0] > c[1][0] || (c[0][0] == c[1][0] && (c[0][1] > c[1][1] || (c[0][1] == c[1][1] && c[0][2] >= c[1][2])))) + if (c[0][0] > c[1][0] || + (c[0][0] == c[1][0] && (c[0][1] > c[1][1] || (c[0][1] == c[1][1] && c[0][2] >= c[1][2])))) ++d; d = Etc2DistanceTable[d]; - uint_fast32_t color_set[4] = { applicate_color(c[0], d), applicate_color(c[0], -d), applicate_color(c[1], d), applicate_color(c[1], -d) }; + uint_fast32_t color_set[4] = {applicate_color(c[0], d), applicate_color(c[0], -d), applicate_color(c[1], d), + applicate_color(c[1], -d)}; k <<= 1; for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { int index = (k & 2) | (j & 1); @@ -305,8 +260,8 @@ static inline void decode_etc2a1_block(const uint8_t* data, uint32_t* outbuf) } } else { // differential - const uint_fast8_t code[2] = { data[3] >> 5, data[3] >> 2 & 7 }; - const uint_fast8_t* table = Etc1SubblockTable[data[3] & 1]; + const uint_fast8_t code[2] = {data[3] >> 5, data[3] >> 2 & 7}; + const uint_fast8_t *table = Etc1SubblockTable[data[3] & 1]; c[0][0] = r | r >> 5; c[0][1] = g | g >> 5; c[0][2] = b | b >> 5; @@ -324,76 +279,71 @@ static inline void decode_etc2a1_block(const uint8_t* data, uint32_t* outbuf) } } -static inline void decode_etc2a8_block(const uint8_t* data, uint32_t* outbuf) -{ +static void decode_etc2a8_block(const uint8_t *data, uint32_t *outbuf) { if (data[1] & 0xf0) { // multiplier != 0 const uint_fast8_t multiplier = data[1] >> 4; - const int_fast8_t* table = Etc2AlphaModTable[data[1] & 0xf]; - uint_fast64_t l = data[7] | (uint_fast16_t)data[6] << 8 | (uint_fast32_t)data[5] << 16 | (uint_fast32_t)data[4] << 24 | (uint_fast64_t)data[3] << 32 | (uint_fast64_t)data[2] << 40; + const int_fast8_t *table = Etc2AlphaModTable[data[1] & 0xf]; + uint_fast64_t l = data[7] | (uint_fast16_t)data[6] << 8 | (uint_fast32_t)data[5] << 16 | + (uint_fast32_t)data[4] << 24 | (uint_fast64_t)data[3] << 32 | (uint_fast64_t)data[2] << 40; for (int i = 0; i < 16; i++, l >>= 3) - ((uint8_t*)(outbuf + WriteOrderTableRev[i]))[3] = clamp(data[0] + multiplier * table[l & 7]); + ((uint8_t *)(outbuf + WriteOrderTableRev[i]))[3] = clamp(data[0] + multiplier * table[l & 7]); } else { // multiplier == 0 (always same as base codeword) for (int i = 0; i < 16; i++, outbuf++) - ((uint8_t*)outbuf)[3] = data[0]; + ((uint8_t *)outbuf)[3] = data[0]; } } -void decode_etc2(const void* data, const int w, const int h, uint32_t* image) -{ - int num_blocks_x = (w + 3) / 4; - int num_blocks_y = (h + 3) / 4; - int copy_length_last = (w + 3) % 4 + 1; - uint32_t buf[16]; - uint32_t* buf_end = buf + 16; - const uint8_t* d = (uint8_t*)data; - for (int by = 0; by < num_blocks_y; by++) { - for (int bx = 0, x = 0; bx < num_blocks_x; bx++, d += 8, x += 4) { - decode_etc2_block(d, buf); - int copy_length = (bx < num_blocks_x - 1 ? 4 : copy_length_last) * 4; - uint32_t* b = buf; - for (int y = h - by * 4 - 1; b < buf_end && y >= 0; y--, b += 4) - memcpy(image + y * w + x, b, copy_length); +int decode_etc1(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 8) { + decode_etc1_block(data, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); } } + return 1; } -void decode_etc2a1(const void* data, const int w, const int h, uint32_t* image) -{ - int num_blocks_x = (w + 3) / 4; - int num_blocks_y = (h + 3) / 4; - int copy_length_last = (w + 3) % 4 + 1; - uint32_t buf[16]; - uint32_t* buf_end = buf + 16; - const uint8_t* d = (uint8_t*)data; - for (int by = 0; by < num_blocks_y; by++) { - for (int bx = 0, x = 0; bx < num_blocks_x; bx++, d += 8, x += 4) { - decode_etc2a1_block(d, buf); - int copy_length = (bx < num_blocks_x - 1 ? 4 : copy_length_last) * 4; - uint32_t* b = buf; - for (int y = h - by * 4 - 1; b < buf_end && y >= 0; y--, b += 4) - memcpy(image + y * w + x, b, copy_length); +int decode_etc2(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 8) { + decode_etc2_block(data, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); } } + return 1; } -void decode_etc2a8(const void* data, const int w, const int h, uint32_t* image) -{ - int num_blocks_x = (w + 3) / 4; - int num_blocks_y = (h + 3) / 4; - int copy_length_last = (w + 3) % 4 + 1; - uint32_t buf[16]; - uint32_t* buf_end = buf + 16; - const uint8_t* d = (uint8_t*)data; - for (int by = 0; by < num_blocks_y; by++) { - for (int bx = 0, x = 0; bx < num_blocks_x; bx++, d += 16, x += 4) { - decode_etc2_block(d + 8, buf); - decode_etc2a8_block(d, buf); - int copy_length = (bx < num_blocks_x - 1 ? 4 : copy_length_last) * 4; - uint32_t* b = buf; - for (int y = h - by * 4 - 1; b < buf_end && y >= 0; y--, b += 4) - memcpy(image + y * w + x, b, copy_length); +int decode_etc2a1(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 8) { + decode_etc2a1_block(data, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); } } + return 1; +} + +int decode_etc2a8(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 16) { + decode_etc2_block(data + 8, buffer); + decode_etc2a8_block(data, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; } diff --git a/ext/decoders/native/etc.h b/ext/decoders/native/etc.h index 9057114..f68f804 100644 --- a/ext/decoders/native/etc.h +++ b/ext/decoders/native/etc.h @@ -3,9 +3,9 @@ #include -void decode_etc1(const void*, const int, const int, uint32_t*); -void decode_etc2(const void*, const int, const int, uint32_t*); -void decode_etc2a1(const void*, const int, const int, uint32_t*); -void decode_etc2a8(const void*, const int, const int, uint32_t*); +int decode_etc1(const uint8_t *, const long, const long, uint32_t *); +int decode_etc2(const uint8_t *, const long, const long, uint32_t *); +int decode_etc2a1(const uint8_t *, const long, const long, uint32_t *); +int decode_etc2a8(const uint8_t *, const long, const long, uint32_t *); #endif /* end of include guard: ETC_H */ diff --git a/ext/decoders/native/fp16.h b/ext/decoders/native/fp16.h index 4721740..b7f0c1d 100644 --- a/ext/decoders/native/fp16.h +++ b/ext/decoders/native/fp16.h @@ -6,35 +6,31 @@ #endif /* FP16_H */ -/** +/* * * License Information * * FP16 library is derived from https://github.com/Maratyszcza/FP16. * The library is licensed under the MIT License shown below. - -The MIT License (MIT) - -Copyright (c) 2017 Facebook Inc. -Copyright (c) 2017 Georgia Institute of Technology -Copyright 2019 Google LLC - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - **/ + * + * + * The MIT License (MIT) + * + * Copyright (c) 2017 Facebook Inc. + * Copyright (c) 2017 Georgia Institute of Technology + * Copyright 2019 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ diff --git a/ext/decoders/native/main.c b/ext/decoders/native/main.c index a733f60..74da224 100644 --- a/ext/decoders/native/main.c +++ b/ext/decoders/native/main.c @@ -1,79 +1,174 @@ +#include +#include +#include #include "astc.h" #include "dxtc.h" #include "etc.h" #include "pvrtc.h" #include "rgb.h" -#include -#include -#include + +const char *error_msg = NULL; + +#define DECODE_CHECK(call) \ + if (!call) { \ + rb_raise(rb_eRuntimeError, "%s", error_msg ? error_msg : "unknown internal error"); \ + error_msg = NULL; \ + return Qnil; \ + } + +static int check_str_len(VALUE data, long len, long unit) { + if (RSTRING_LEN(data) < len * unit) { + rb_raise(rb_eStandardError, "Data size is not enough."); + return 0; + } + return 1; +} + +static int check_str_len_block(VALUE data, long w, long h, long bw, long bh, long unit) { + long size = ((w + bw - 1) / bw) * ((h + bh - 1) / bh); + return check_str_len(data, size, unit); +} + +static VALUE rb_alloc_rgb(long n) { + VALUE ret = rb_str_buf_new(n * 3); + rb_str_set_len(ret, n * 3); + return ret; +} + +static VALUE rb_alloc_rgba(long n) { + VALUE ret = rb_str_buf_new(n * 4); + rb_str_set_len(ret, n * 4); + return ret; +} /* * Decode image from A8 binary + * Returned image is not flipped * * @param [String] rb_data binary to decode - * @param [Integer] size width * height + * @param [Integer] rb_size width * height * @return [String] decoded rgb binary */ -static VALUE rb_decode_a8(VALUE self, VALUE rb_data, VALUE size) -{ - if (RSTRING_LEN(rb_data) < FIX2LONG(size)) - rb_raise(rb_eStandardError, "Data size is not enough."); - VALUE ret = rb_str_buf_new(FIX2LONG(size) * 3); - decode_a8((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(size), (uint8_t*)RSTRING_PTR(ret)); - rb_str_set_len(ret, FIX2LONG(size) * 3); +static VALUE rb_decode_a8(VALUE self, VALUE rb_data, VALUE rb_size) { + long size = FIX2LONG(rb_size); + if (!check_str_len(rb_data, size, 1)) + return Qnil; + VALUE ret = rb_alloc_rgb(size); + if (!decode_a8((uint8_t *)RSTRING_PTR(rb_data), size, (uint8_t *)RSTRING_PTR(ret))) + return Qnil; return ret; } /* * Decode image from R8 binary + * Returned image is not flipped * * @param [String] rb_data binary to decode - * @param [Integer] size width * height + * @param [Integer] rb_size width * height * @return [String] decoded rgb binary */ -static VALUE rb_decode_r8(VALUE self, VALUE rb_data, VALUE size) -{ - if (RSTRING_LEN(rb_data) < FIX2LONG(size)) - rb_raise(rb_eStandardError, "Data size is not enough."); - VALUE ret = rb_str_buf_new(FIX2LONG(size) * 3); - decode_r8((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(size), (uint8_t*)RSTRING_PTR(ret)); - rb_str_set_len(ret, FIX2LONG(size) * 3); +static VALUE rb_decode_r8(VALUE self, VALUE rb_data, VALUE rb_size) { + long size = FIX2LONG(rb_size); + if (!check_str_len(rb_data, size, 1)) + return Qnil; + VALUE ret = rb_alloc_rgb(size); + if (!decode_r8((uint8_t *)RSTRING_PTR(rb_data), size, (uint8_t *)RSTRING_PTR(ret))) + return Qnil; return ret; } /* * Decode image from R16 binary + * Returned image is not flipped * * @param [String] rb_data binary to decode - * @param [Integer] size width * height - * @param [Boolean] big whether input data are big endian + * @param [Integer] rb_size width * height + * @param [Boolean] rb_big whether input data are big endian * @return [String] decoded rgb binary */ -static VALUE rb_decode_r16(VALUE self, VALUE rb_data, VALUE size, VALUE big) -{ - if (RSTRING_LEN(rb_data) < FIX2LONG(size) * 2) - rb_raise(rb_eStandardError, "Data size is not enough."); - VALUE ret = rb_str_buf_new(FIX2LONG(size) * 3); - decode_r16((uint16_t*)RSTRING_PTR(rb_data), FIX2INT(size), RTEST(big), (uint8_t*)RSTRING_PTR(ret)); - rb_str_set_len(ret, FIX2LONG(size) * 3); +static VALUE rb_decode_r16(VALUE self, VALUE rb_data, VALUE rb_size, VALUE rb_big) { + long size = FIX2LONG(rb_size); + if (!check_str_len(rb_data, size, 2)) + return Qnil; + VALUE ret = rb_alloc_rgb(size); + if (!decode_r16((uint8_t *)RSTRING_PTR(rb_data), size, RTEST(rb_big), (uint8_t *)RSTRING_PTR(ret))) + return Qnil; return ret; } /* * Decode image from RGB565 binary + * Returned image is not flipped * * @param [String] rb_data binary to decode - * @param [Integer] size width * height - * @param [Boolean] big whether input data are big endian + * @param [Integer] rb_size width * height + * @param [Boolean] rb_big whether input data are big endian * @return [String] decoded rgb binary */ -static VALUE rb_decode_rgb565(VALUE self, VALUE rb_data, VALUE size, VALUE big) -{ - if (RSTRING_LEN(rb_data) < FIX2LONG(size) * 2) - rb_raise(rb_eStandardError, "Data size is not enough."); - VALUE ret = rb_str_buf_new(FIX2LONG(size) * 3); - decode_rgb565((uint16_t*)RSTRING_PTR(rb_data), FIX2INT(size), RTEST(big), (uint8_t*)RSTRING_PTR(ret)); - rb_str_set_len(ret, FIX2LONG(size) * 3); +static VALUE rb_decode_rgb565(VALUE self, VALUE rb_data, VALUE rb_size, VALUE rb_big) { + long size = FIX2LONG(rb_size); + if (!check_str_len(rb_data, size, 2)) + return Qnil; + VALUE ret = rb_alloc_rgb(size); + if (!decode_rgb565((uint16_t *)RSTRING_PTR(rb_data), size, RTEST(rb_big), (uint8_t *)RSTRING_PTR(ret))) + return Qnil; + return ret; +} + +/* + * Decode image from RHalf binary + * Returned image is not flipped + * + * @param [String] rb_data binary to decode + * @param [Integer] rb_size width * height + * @param [Boolean] rb_big whether input data are big endian + * @return [String] decoded rgb binary + */ +static VALUE rb_decode_rhalf(VALUE self, VALUE rb_data, VALUE rb_size, VALUE rb_big) { + long size = FIX2LONG(rb_size); + if (!check_str_len(rb_data, size, 2)) + return Qnil; + VALUE ret = rb_alloc_rgb(size); + if (!decode_rhalf((uint16_t *)RSTRING_PTR(rb_data), size, RTEST(rb_big), (uint8_t *)RSTRING_PTR(ret))) + return Qnil; + return ret; +} + +/* + * Decode image from RGHalf binary + * Returned image is not flipped + * + * @param [String] rb_data binary to decode + * @param [Integer] rb_size width * height + * @param [Boolean] rb_big whether input data are big endian + * @return [String] decoded rgb binary + */ +static VALUE rb_decode_rghalf(VALUE self, VALUE rb_data, VALUE rb_size, VALUE rb_big) { + long size = FIX2LONG(rb_size); + if (!check_str_len(rb_data, size, 4)) + return Qnil; + VALUE ret = rb_alloc_rgb(size); + if (!decode_rghalf((uint16_t *)RSTRING_PTR(rb_data), size, RTEST(rb_big), (uint8_t *)RSTRING_PTR(ret))) + return Qnil; + return ret; +} + +/* + * Decode image from RGBAHalf binary + * Returned image is not flipped + * + * @param [String] rb_data binary to decode + * @param [Integer] rb_size width * height + * @param [Boolean] rb_big whether input data are big endian + * @return [String] decoded rgba binary + */ +static VALUE rb_decode_rgbahalf(VALUE self, VALUE rb_data, VALUE rb_size, VALUE rb_big) { + long size = FIX2LONG(rb_size); + if (!check_str_len(rb_data, size, 8)) + return Qnil; + VALUE ret = rb_alloc_rgba(size); + if (!decode_rgbahalf((uint16_t *)RSTRING_PTR(rb_data), size, RTEST(rb_big), (uint8_t *)RSTRING_PTR(ret))) + return Qnil; return ret; } @@ -81,18 +176,17 @@ static VALUE rb_decode_rgb565(VALUE self, VALUE rb_data, VALUE size, VALUE big) * Decode image from ETC1 compressed binary * * @param [String] rb_data binary to decode - * @param [Integer] w image width - * @param [Integer] h image height + * @param [Integer] rb_w image width + * @param [Integer] rb_h image height * @return [String] decoded rgba binary */ -static VALUE rb_decode_etc1(VALUE self, VALUE rb_data, VALUE w, VALUE h) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 8) - rb_raise(rb_eStandardError, "Data size is not enough."); - uint32_t* image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); - decode_etc1((uint64_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), image); - VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); - free(image); +static VALUE rb_decode_etc1(VALUE self, VALUE rb_data, VALUE rb_w, VALUE rb_h) { + long w = FIX2LONG(rb_w), h = FIX2LONG(rb_h); + if (!check_str_len_block(rb_data, w, h, 4, 4, 8)) + return Qnil; + VALUE ret = rb_alloc_rgba(w * h); + if (!decode_etc1((uint8_t *)RSTRING_PTR(rb_data), w, h, (uint32_t *)RSTRING_PTR(ret))) + return Qnil; return ret; } @@ -104,14 +198,13 @@ static VALUE rb_decode_etc1(VALUE self, VALUE rb_data, VALUE w, VALUE h) * @param [Integer] h image height * @return [String] decoded rgba binary */ -static VALUE rb_decode_etc2(VALUE self, VALUE rb_data, VALUE w, VALUE h) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 8) - rb_raise(rb_eStandardError, "Data size is not enough."); - uint32_t* image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); - decode_etc2((uint64_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), image); - VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); - free(image); +static VALUE rb_decode_etc2(VALUE self, VALUE rb_data, VALUE rb_w, VALUE rb_h) { + long w = FIX2LONG(rb_w), h = FIX2LONG(rb_h); + if (!check_str_len_block(rb_data, w, h, 4, 4, 8)) + return Qnil; + VALUE ret = rb_alloc_rgba(w * h); + if (!decode_etc2((uint8_t *)RSTRING_PTR(rb_data), w, h, (uint32_t *)RSTRING_PTR(ret))) + return Qnil; return ret; } @@ -123,14 +216,13 @@ static VALUE rb_decode_etc2(VALUE self, VALUE rb_data, VALUE w, VALUE h) * @param [Integer] h image height * @return [String] decoded rgba binary */ -static VALUE rb_decode_etc2a1(VALUE self, VALUE rb_data, VALUE w, VALUE h) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 8) - rb_raise(rb_eStandardError, "Data size is not enough."); - uint32_t* image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); - decode_etc2a1((uint64_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), image); - VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); - free(image); +static VALUE rb_decode_etc2a1(VALUE self, VALUE rb_data, VALUE rb_w, VALUE rb_h) { + long w = FIX2LONG(rb_w), h = FIX2LONG(rb_h); + if (!check_str_len_block(rb_data, w, h, 4, 4, 8)) + return Qnil; + VALUE ret = rb_alloc_rgba(w * h); + if (!decode_etc2a1((uint8_t *)RSTRING_PTR(rb_data), w, h, (uint32_t *)RSTRING_PTR(ret))) + return Qnil; return ret; } @@ -142,14 +234,13 @@ static VALUE rb_decode_etc2a1(VALUE self, VALUE rb_data, VALUE w, VALUE h) * @param [Integer] h image height * @return [String] decoded rgba binary */ -static VALUE rb_decode_etc2a8(VALUE self, VALUE rb_data, VALUE w, VALUE h) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 16) - rb_raise(rb_eStandardError, "Data size is not enough."); - uint32_t* image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); - decode_etc2a8((uint64_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), image); - VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); - free(image); +static VALUE rb_decode_etc2a8(VALUE self, VALUE rb_data, VALUE rb_w, VALUE rb_h) { + long w = FIX2LONG(rb_w), h = FIX2LONG(rb_h); + if (!check_str_len_block(rb_data, w, h, 4, 4, 16)) + return Qnil; + VALUE ret = rb_alloc_rgba(w * h); + if (!decode_etc2a8((uint8_t *)RSTRING_PTR(rb_data), w, h, (uint32_t *)RSTRING_PTR(ret))) + return Qnil; return ret; } @@ -157,21 +248,21 @@ static VALUE rb_decode_etc2a8(VALUE self, VALUE rb_data, VALUE w, VALUE h) * Decode image from ASTC compressed binary * * @param [String] rb_data binary to decode - * @param [Integer] w image width - * @param [Integer] h image height - * @param [Integer] bw block width - * @param [Integer] bh block height + * @param [Integer] rb_w image width + * @param [Integer] rb_h image height + * @param [Integer] rb_bw block width + * @param [Integer] rb_bh block height * @return [String] decoded rgba binary */ -static VALUE rb_decode_astc(VALUE self, VALUE rb_data, VALUE w, VALUE h, - VALUE bw, VALUE bh) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + FIX2LONG(bw) - 1) / FIX2LONG(bw)) * ((FIX2LONG(h) + FIX2LONG(bh) - 1) / FIX2LONG(bh)) * 16) - rb_raise(rb_eStandardError, "Data size is not enough."); - uint32_t* image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); - decode_astc((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), FIX2INT(bw), FIX2INT(bh), image); - VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); - free(image); +static VALUE rb_decode_astc(VALUE self, VALUE rb_data, VALUE rb_w, VALUE rb_h, VALUE rb_bw, VALUE rb_bh) { + long w = FIX2LONG(rb_w); + long h = FIX2LONG(rb_h); + int bw = FIX2INT(rb_bw); + int bh = FIX2INT(rb_bh); + if (!check_str_len_block(rb_data, w, h, bw, bh, 16)) + return Qnil; + VALUE ret = rb_alloc_rgba(w * h); + DECODE_CHECK(decode_astc((uint8_t *)RSTRING_PTR(rb_data), w, h, bw, bh, (uint32_t *)RSTRING_PTR(ret))); return ret; } @@ -179,18 +270,17 @@ static VALUE rb_decode_astc(VALUE self, VALUE rb_data, VALUE w, VALUE h, * Decode image from DXT1 compressed binary * * @param [String] rb_data binary to decode - * @param [Integer] w image width - * @param [Integer] h image height + * @param [Integer] rb_w image width + * @param [Integer] rb_h image height * @return [String] decoded rgba binary */ -static VALUE rb_decode_dxt1(VALUE self, VALUE rb_data, VALUE w, VALUE h) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 8) - rb_raise(rb_eStandardError, "Data size is not enough."); - uint32_t* image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); - decode_dxt1((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), image); - VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); - free(image); +static VALUE rb_decode_dxt1(VALUE self, VALUE rb_data, VALUE rb_w, VALUE rb_h) { + long w = FIX2LONG(rb_w); + long h = FIX2LONG(rb_h); + if (!check_str_len_block(rb_data, w, h, 4, 4, 8)) + return Qnil; + VALUE ret = rb_alloc_rgba(w * h); + DECODE_CHECK(decode_dxt1((uint8_t *)RSTRING_PTR(rb_data), w, h, (uint32_t *)RSTRING_PTR(ret))); return ret; } @@ -198,77 +288,50 @@ static VALUE rb_decode_dxt1(VALUE self, VALUE rb_data, VALUE w, VALUE h) * Decode image from DXT5 compressed binary * * @param [String] rb_data binary to decode - * @param [Integer] w image width - * @param [Integer] h image height + * @param [Integer] rb_w image width + * @param [Integer] rb_h image height * @return [String] decoded rgba binary */ -static VALUE rb_decode_dxt5(VALUE self, VALUE rb_data, VALUE w, VALUE h) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 16) - rb_raise(rb_eStandardError, "Data size is not enough."); - uint32_t* image = (uint32_t*)calloc(FIX2LONG(w) * FIX2LONG(h), sizeof(uint32_t)); - decode_dxt5((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), image); - VALUE ret = rb_str_new((char*)image, FIX2LONG(w) * FIX2LONG(h) * sizeof(uint32_t)); - free(image); +static VALUE rb_decode_dxt5(VALUE self, VALUE rb_data, VALUE rb_w, VALUE rb_h) { + long w = FIX2LONG(rb_w); + long h = FIX2LONG(rb_h); + if (!check_str_len_block(rb_data, w, h, 4, 4, 16)) + return Qnil; + VALUE ret = rb_alloc_rgba(w * h); + DECODE_CHECK(decode_dxt5((uint8_t *)RSTRING_PTR(rb_data), w, h, (uint32_t *)RSTRING_PTR(ret))); return ret; } /* - * Decode image from PVRTC1 4bpp compressed binary + * Decode image from PVRTC1 compressed binary * * @param [String] rb_data binary to decode - * @param [Integer] w image width - * @param [Integer] h image height + * @param [Integer] rb_w image width + * @param [Integer] rb_h image height + * @param [Boolean] rb_is2bpp whether 2bpp or not (4bpp) * @return [String] decoded rgba binary */ -static VALUE rb_decode_pvrtc1_4bpp(VALUE self, VALUE rb_data, VALUE w, VALUE h) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 3) / 4) * ((FIX2LONG(h) + 3) / 4) * 8) { - rb_raise(rb_eStandardError, "Data size is not enough."); +static VALUE rb_decode_pvrtc1(VALUE self, VALUE rb_data, VALUE rb_w, VALUE rb_h, VALUE rb_is2bpp) { + int is2bpp = RTEST(rb_is2bpp); + long w = FIX2LONG(rb_w); + long h = FIX2LONG(rb_h); + if (!check_str_len_block(rb_data, w, h, is2bpp ? 8 : 4, 4, 8)) return Qnil; - } - size_t buffer_length = FIX2LONG(w) * FIX2LONG(h) * 8; - VALUE ret = rb_str_buf_new(buffer_length); - if (!decode_pvrtc_4bpp((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), (uint32_t*)RSTRING_PTR(ret))) { - rb_raise(rb_eStandardError, "internal error"); - return Qnil; - } - rb_str_set_len(ret, buffer_length); + VALUE ret = rb_alloc_rgba(w * h); + DECODE_CHECK(decode_pvrtc((uint8_t *)RSTRING_PTR(rb_data), w, h, (uint32_t *)RSTRING_PTR(ret), is2bpp)); return ret; } -/* - * Decode image from PVRTC1 2bpp compressed binary - * - * @param [String] rb_data binary to decode - * @param [Integer] w image width - * @param [Integer] h image height - * @return [String] decoded rgba binary - */ -static VALUE rb_decode_pvrtc1_2bpp(VALUE self, VALUE rb_data, VALUE w, VALUE h) -{ - if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 7) / 8) * ((FIX2LONG(h) + 3) / 4) * 8) { - rb_raise(rb_eStandardError, "Data size is not enough."); - return Qnil; - } - size_t buffer_length = FIX2LONG(w) * FIX2LONG(h) * 8; - VALUE ret = rb_str_buf_new(buffer_length); - if (!decode_pvrtc_2bpp((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), (uint32_t*)RSTRING_PTR(ret))) { - rb_raise(rb_eStandardError, "internal error"); - return Qnil; - } - rb_str_set_len(ret, buffer_length); - return ret; -} - -void Init_native() -{ +void Init_native() { VALUE mMikunyan = rb_define_module("Mikunyan"); VALUE mDecodeHelper = rb_define_module_under(mMikunyan, "DecodeHelper"); rb_define_module_function(mDecodeHelper, "decode_a8", rb_decode_a8, 2); rb_define_module_function(mDecodeHelper, "decode_r8", rb_decode_r8, 2); rb_define_module_function(mDecodeHelper, "decode_r16", rb_decode_r16, 3); rb_define_module_function(mDecodeHelper, "decode_rgb565", rb_decode_rgb565, 3); + rb_define_module_function(mDecodeHelper, "decode_rhalf", rb_decode_rhalf, 3); + rb_define_module_function(mDecodeHelper, "decode_rghalf", rb_decode_rghalf, 3); + rb_define_module_function(mDecodeHelper, "decode_rgbahalf", rb_decode_rgbahalf, 3); rb_define_module_function(mDecodeHelper, "decode_etc1", rb_decode_etc1, 3); rb_define_module_function(mDecodeHelper, "decode_etc2", rb_decode_etc2, 3); rb_define_module_function(mDecodeHelper, "decode_etc2a1", rb_decode_etc2a1, 3); @@ -276,6 +339,5 @@ void Init_native() rb_define_module_function(mDecodeHelper, "decode_astc", rb_decode_astc, 5); rb_define_module_function(mDecodeHelper, "decode_dxt1", rb_decode_dxt1, 3); rb_define_module_function(mDecodeHelper, "decode_dxt5", rb_decode_dxt5, 3); - rb_define_module_function(mDecodeHelper, "decode_pvrtc1_4bpp", rb_decode_pvrtc1_4bpp, 3); - rb_define_module_function(mDecodeHelper, "decode_pvrtc1_2bpp", rb_decode_pvrtc1_2bpp, 3); + rb_define_module_function(mDecodeHelper, "decode_pvrtc1", rb_decode_pvrtc1, 4); } diff --git a/ext/decoders/native/pvrtc.c b/ext/decoders/native/pvrtc.c index f298cb2..81f77bb 100644 --- a/ext/decoders/native/pvrtc.c +++ b/ext/decoders/native/pvrtc.c @@ -1,36 +1,23 @@ #include "pvrtc.h" -#include "common.h" #include #include +#include "color.h" +#include "endianness.h" -#define MORTON_POS(x, y) (morton_table[num_blocks_x * (y) + (x)]) +static const int PVRTC1_STANDARD_WEIGHT[] = {0, 3, 5, 8}; +static const int PVRTC1_PUNCHTHROUGH_WEIGHT[] = {0, 4, 4, 8}; -static inline uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { -#if BYTE_ORDER == LITTLE_ENDIAN - return r | g << 8 | b << 16 | a << 24; -#else - return a | b << 8 | g << 16 | r << 24; -#endif -} - -static inline int morton_index(const int x, const int y, const int numblocks_x, const int numblocks_y) { - const int min_dim = numblocks_x <= numblocks_y ? numblocks_x : numblocks_y; - int offset = 0, shift = 0; - for (int mask = 1; mask < min_dim; mask <<= 1, shift++) { +static inline long morton_index(const long x, const long y, const long min_dim) { + long offset = 0, shift = 0; + for (long mask = 1; mask < min_dim; mask <<= 1, shift++) offset |= (((y & mask) | ((x & mask) << 1))) << shift; - } offset |= ((x | y) >> shift) << (shift * 2); return offset; } static void get_texel_colors(const uint8_t *data, PVRTCTexelInfo *info) { -#if BYTE_ORDER == LITTLE_ENDIAN - uint16_t ca = *(uint16_t *)(data + 4); - uint16_t cb = *(uint16_t *)(data + 6); -#else - uint16_t ca = data[4] | data[5] << 8; - uint16_t cb = data[6] | data[7] << 8; -#endif + uint16_t ca = lton16(*(uint16_t *)(data + 4)); + uint16_t cb = lton16(*(uint16_t *)(data + 6)); if (ca & 0x8000) { info->a.r = ca >> 10 & 0x1f; info->a.g = ca >> 5 & 0x1f; @@ -59,47 +46,19 @@ static void get_texel_weights_4bpp(const uint8_t *data, PVRTCTexelInfo *info) { info->punch_through_flag = 0; int mod_mode = data[4] & 1; -#if BYTE_ORDER == LITTLE_ENDIAN - uint32_t mod_bits = *(uint32_t *)data; -#else - uint32_t mod_bits = data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; -#endif + uint32_t mod_bits = lton32(*(uint32_t *)data); if (mod_mode) { // punch-through for (int i = 0; i < 16; i++, mod_bits >>= 2) { - switch (mod_bits & 3) { - case 0: - info->weight[i] = 0; - break; - case 3: - info->weight[i] = 8; - break; - case 2: + info->weight[i] = PVRTC1_PUNCHTHROUGH_WEIGHT[mod_bits & 3]; + if ((mod_bits & 3) == 2) info->punch_through_flag |= 1 << i; - // fall through - default: - info->weight[i] = 4; - } } } else { // standard - for (int i = 0; i < 16; i++, mod_bits >>= 2) { - switch (mod_bits & 3) { - case 0: - info->weight[i] = 0; - break; - case 1: - info->weight[i] = 3; - break; - case 2: - info->weight[i] = 5; - break; - case 3: - info->weight[i] = 8; - break; - } - } + for (int i = 0; i < 16; i++, mod_bits >>= 2) + info->weight[i] = PVRTC1_STANDARD_WEIGHT[mod_bits & 3]; } } @@ -107,11 +66,7 @@ static void get_texel_weights_2bpp(const uint8_t *data, PVRTCTexelInfo *info) { info->punch_through_flag = 0; int mod_mode = data[4] & 1; -#if BYTE_ORDER == LITTLE_ENDIAN - uint32_t mod_bits = *(uint32_t *)data; -#else - uint32_t mod_bits = data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; -#endif + uint32_t mod_bits = lton32(*(uint32_t *)data); if (mod_mode) { // interporated modulation @@ -123,24 +78,9 @@ static void get_texel_weights_2bpp(const uint8_t *data, PVRTCTexelInfo *info) { for (int y = 0, i = 1; y < 4; ++y & 1 ? --i : ++i) for (int x = 0; x < 4; x++, i += 2) info->weight[i] = fillflag; - for (int y = 0, i = 0; y < 4; ++y & 1 ? ++i : --i) { - for (int x = 0; x < 4; x++, i += 2, mod_bits >>= 2) { - switch (mod_bits & 3) { - case 0: - info->weight[i] = 0; - break; - case 1: - info->weight[i] = 3; - break; - case 2: - info->weight[i] = 5; - break; - case 3: - info->weight[i] = 8; - break; - } - } - } + for (int y = 0, i = 0; y < 4; ++y & 1 ? ++i : --i) + for (int x = 0; x < 4; x++, i += 2, mod_bits >>= 2) + info->weight[i] = PVRTC1_STANDARD_WEIGHT[mod_bits & 3]; // 0 は常に 1bpp info->weight[0] = (info->weight[0] + 3) & 8; if (data[0] & 1) @@ -153,7 +93,7 @@ static void get_texel_weights_2bpp(const uint8_t *data, PVRTCTexelInfo *info) { } } -static void applicate_color_4bpp(const uint8_t *data, PVRTCTexelInfo *const info[9], uint32_t buf[16]) { +static void applicate_color_4bpp(const uint8_t *data, PVRTCTexelInfo *const info[9], uint32_t buf[32]) { static const int INTERP_WEIGHT[4][3] = {{2, 2, 0}, {1, 3, 0}, {0, 4, 0}, {0, 3, 1}}; PVRTCTexelColorInt clr_a[16] = {}, clr_b[16] = {}; @@ -195,7 +135,7 @@ static void applicate_color_4bpp(const uint8_t *data, PVRTCTexelInfo *const info } } -static void applicate_color_2bpp(const uint8_t *data, PVRTCTexelInfo *info[9], uint32_t buf[32]) { +static void applicate_color_2bpp(const uint8_t *data, PVRTCTexelInfo *const info[9], uint32_t buf[32]) { static const int INTERP_WEIGHT_X[8][3] = {{4, 4, 0}, {3, 5, 0}, {2, 6, 0}, {1, 7, 0}, {0, 8, 0}, {0, 7, 1}, {0, 6, 2}, {0, 5, 3}}; static const int INTERP_WEIGHT_Y[4][3] = {{2, 2, 0}, {1, 3, 0}, {0, 4, 0}, {0, 3, 1}}; @@ -262,108 +202,57 @@ static void applicate_color_2bpp(const uint8_t *data, PVRTCTexelInfo *info[9], u } } -int decode_pvrtc_4bpp(const uint8_t *data, const int w, const int h, uint32_t *image) { - int num_blocks_x = (w + 3) / 4; - int num_blocks_y = (h + 3) / 4; - int num_blocks = num_blocks_x * num_blocks_y; - int copy_length_last = (w + 3) % 4 + 1; +int decode_pvrtc(const uint8_t *data, const long w, const long h, uint32_t *image, const int is2bpp) { + long bw = is2bpp ? 8 : 4; + long num_blocks_x = is2bpp ? (w + 7) / 8 : (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + long num_blocks = num_blocks_x * num_blocks_y; + long min_num_blocks = num_blocks_x <= num_blocks_y ? num_blocks_x : num_blocks_y; - int *morton_table = (int *)malloc(sizeof(int) * num_blocks); - if (morton_table == NULL) + if ((num_blocks_x & (num_blocks_x - 1)) || (num_blocks_y & (num_blocks_y - 1))) { + extern const char *error_msg; + error_msg = "the number of blocks of each side must be a power of 2"; return 0; + } + PVRTCTexelInfo *texel_info = (PVRTCTexelInfo *)malloc(sizeof(PVRTCTexelInfo) * num_blocks); if (texel_info == NULL) { - free(morton_table); + extern const char *error_msg; + error_msg = "memory allocation failed"; return 0; } - for (int y = 0; y < num_blocks_y; y++) - for (int x = 0; x < num_blocks_x; x++) - MORTON_POS(x, y) = morton_index(x, y, num_blocks_x, num_blocks_y); + void (*get_texel_weights_func)(const uint8_t *, PVRTCTexelInfo *) = + is2bpp ? get_texel_weights_2bpp : get_texel_weights_4bpp; + void (*applicate_color_func)(const uint8_t *, PVRTCTexelInfo *const[9], uint32_t[32]) = + is2bpp ? applicate_color_2bpp : applicate_color_4bpp; const uint8_t *d = data; - for (int i = 0; i < num_blocks; i++, d += 8) { + for (long i = 0; i < num_blocks; i++, d += 8) { get_texel_colors(d, &texel_info[i]); - get_texel_weights_4bpp(d, &texel_info[i]); - } - - uint32_t buffer[16]; - uint32_t *buffer_end = buffer + 16; - PVRTCTexelInfo *local_info[9]; - int pos_x[3], pos_y[3]; - for (int by = 0; by < num_blocks_y; by++) { - pos_y[0] = by == 0 ? num_blocks_y - 1 : by - 1; - pos_y[1] = by; - pos_y[2] = by == num_blocks_y - 1 ? 0 : by + 1; - for (int bx = 0, x = 0; bx < num_blocks_x; bx++, x += 4) { - pos_x[0] = bx == 0 ? num_blocks_x - 1 : bx - 1; - pos_x[1] = bx; - pos_x[2] = bx == num_blocks_x - 1 ? 0 : bx + 1; - for (int cy = 0, c = 0; cy < 3; cy++) - for (int cx = 0; cx < 3; cx++, c++) - local_info[c] = &texel_info[MORTON_POS(pos_x[cx], pos_y[cy])]; - applicate_color_4bpp(data + MORTON_POS(bx, by) * 8, local_info, buffer); - int copy_length = (bx < num_blocks_x - 1 ? 4 : copy_length_last) * 4; - uint32_t *b = buffer; - for (int y = h - by * 4 - 1; b < buffer_end && y >= 0; y--, b += 4) - memcpy(image + y * w + x, b, copy_length); - } - } - - free(morton_table); - free(texel_info); - return 1; -} - -int decode_pvrtc_2bpp(const uint8_t *data, const int w, const int h, uint32_t *image) { - int num_blocks_x = (w + 7) / 8; - int num_blocks_y = (h + 3) / 4; - int num_blocks = num_blocks_x * num_blocks_y; - int copy_length_last = (w + 7) % 8 + 1; - - int *morton_table = (int *)malloc(sizeof(int) * num_blocks); - if (morton_table == NULL) - return 0; - PVRTCTexelInfo *texel_info = (PVRTCTexelInfo *)malloc(sizeof(PVRTCTexelInfo) * num_blocks); - if (texel_info == NULL) { - free(morton_table); - return 0; - } - - for (int y = 0; y < num_blocks_y; y++) - for (int x = 0; x < num_blocks_x; x++) - MORTON_POS(x, y) = morton_index(x, y, num_blocks_x, num_blocks_y); - - const uint8_t *d = data; - for (int i = 0; i < num_blocks; i++, d += 8) { - get_texel_colors(d, &texel_info[i]); - get_texel_weights_2bpp(d, &texel_info[i]); + get_texel_weights_func(d, &texel_info[i]); } uint32_t buffer[32]; - uint32_t *buffer_end = buffer + 32; PVRTCTexelInfo *local_info[9]; - int pos_x[3], pos_y[3]; - for (int by = 0; by < num_blocks_y; by++) { + long pos_x[3], pos_y[3]; + + for (long by = 0; by < num_blocks_y; by++) { pos_y[0] = by == 0 ? num_blocks_y - 1 : by - 1; pos_y[1] = by; pos_y[2] = by == num_blocks_y - 1 ? 0 : by + 1; - for (int bx = 0, x = 0; bx < num_blocks_x; bx++, x += 8) { + for (long bx = 0, x = 0; bx < num_blocks_x; bx++, x += 4) { pos_x[0] = bx == 0 ? num_blocks_x - 1 : bx - 1; pos_x[1] = bx; pos_x[2] = bx == num_blocks_x - 1 ? 0 : bx + 1; - for (int cy = 0, c = 0; cy < 3; cy++) - for (int cx = 0; cx < 3; cx++, c++) - local_info[c] = &texel_info[MORTON_POS(pos_x[cx], pos_y[cy])]; - applicate_color_2bpp(data + MORTON_POS(bx, by) * 8, local_info, buffer); - int copy_length = (bx < num_blocks_x - 1 ? 8 : copy_length_last) * 4; - uint32_t *b = buffer; - for (int y = h - by * 4 - 1; b < buffer_end && y >= 0; y--, b += 8) - memcpy(image + y * w + x, b, copy_length); + for (long cy = 0, c = 0; cy < 3; cy++) + for (long cx = 0; cx < 3; cx++, c++) + local_info[c] = &texel_info[morton_index(pos_x[cx], pos_y[cy], min_num_blocks)]; + applicate_color_func(data + morton_index(bx, by, min_num_blocks) * 8, local_info, buffer); + copy_block_buffer(bx, by, w, h, bw, 4, buffer, image); } } - free(morton_table); free(texel_info); return 1; } diff --git a/ext/decoders/native/pvrtc.h b/ext/decoders/native/pvrtc.h index 3bfe57e..f0a7e60 100644 --- a/ext/decoders/native/pvrtc.h +++ b/ext/decoders/native/pvrtc.h @@ -24,7 +24,6 @@ typedef struct { uint32_t punch_through_flag; } PVRTCTexelInfo; -int decode_pvrtc_4bpp(const uint8_t*, const int, const int, uint32_t*); -int decode_pvrtc_2bpp(const uint8_t*, const int, const int, uint32_t*); +int decode_pvrtc(const uint8_t *, const long, const long, uint32_t *, const int); #endif /* end of include guard: PVRTC_H */ diff --git a/ext/decoders/native/rgb.c b/ext/decoders/native/rgb.c index 0554805..95b5059 100644 --- a/ext/decoders/native/rgb.c +++ b/ext/decoders/native/rgb.c @@ -1,66 +1,102 @@ #include "rgb.h" -#include "common.h" +#include #include +#include "color.h" +#include "fp16.h" -void decode_a8(const uint8_t* data, const int size, uint8_t* image) -{ +int decode_a8(const uint8_t *const data, const long size, uint8_t *image) { const uint8_t *d = data, *d_end = data + size; for (int i = 0; d < d_end; d++) { image[i++] = *d; image[i++] = *d; image[i++] = *d; } + return 1; } -void decode_r8(const uint8_t* data, const int size, uint8_t* image) -{ +int decode_r8(const uint8_t *const data, const long size, uint8_t *image) { const uint8_t *d = data, *d_end = data + size; for (int i = 0; d < d_end; d++) { image[i++] = *d; image[i++] = 0; image[i++] = 0; } + return 1; } -void decode_r16(const uint16_t* data, const int size, const int endian_big, uint8_t* image) -{ - const uint16_t *d = data, *d_end = data + size; - if (IS_LITTLE_ENDIAN == !endian_big) { - // Same endian - for (int i = 0; d < d_end; d++) { - uint8_t c = *d >> 8; - image[i++] = c; - image[i++] = 0; - image[i++] = 0; - } - } else { - // Different endian - for (int i = 0; d < d_end; d++) { - image[i++] = *d; - image[i++] = 0; - image[i++] = 0; - } +int decode_r16(const uint8_t *const data, const long size, const int endian_big, uint8_t *image) { + const uint8_t *d = endian_big ? data : data + 1; + const uint8_t *d_end = data + size * 2; + for (int i = 0; d < d_end; d += 2) { + image[i++] = *d; + image[i++] = 0; + image[i++] = 0; } + return 1; } -void decode_rgb565(const uint16_t* data, const int size, const int endian_big, uint8_t* image) -{ +int decode_rgb565(const uint16_t *const data, const long size, const int endian_big, uint8_t *image) { const uint16_t *d = data, *d_end = data + size; - if (IS_LITTLE_ENDIAN == !endian_big) { - // Same endian - // RRRRR GGG | GGG BBBBB - for (int i = 0; d < d_end; d++) { - image[i++] = (*d >> 8 & 0xf8) | (*d >> 13); - image[i++] = (*d >> 3 & 0xfc) | (*d >> 9 & 3); - image[i++] = (*d << 3) | (*d >> 2 & 7); + if (endian_big) + for (; d < d_end; d++, image += 3) + rgb565_bep(*d, image); + else + for (; d < d_end; d++, image += 3) + rgb565_lep(*d, image); + return 1; +} + +static inline uint8_t u16_f16_u8(const uint16_t val) { + float f = fp16_ieee_to_fp32_value(val); + if (!isfinite(f) || f < 0) + return 0; + else if (f > 1) + return 255; + else + return roundf(f * 255); +} + +int decode_rhalf(const uint16_t *data, const long size, const int endian_big, uint8_t *image) { + if (endian_big) { + for (long i = 0; i < size; i++, data++) { + *image++ = u16_f16_u8(bton16(*data)); + *image++ = 0; + *image++ = 0; } } else { - // Different endian - // GGG BBBBB | RRRRR GGG - for (int i = 0; d < d_end; d++) { - image[i++] = (*d & 0xf8) | (*d >> 5 & 7); - image[i++] = (*d << 5 & 0xe0) | (*d >> 11 & 0x1c) | (*d >> 1 & 3); - image[i++] = (*d >> 5 & 0xf8) | (*d >> 10 & 0x7); + for (long i = 0; i < size; i++, data++) { + *image++ = u16_f16_u8(lton16(*data)); + *image++ = 0; + *image++ = 0; } } + return 1; +} + +int decode_rghalf(const uint16_t *data, const long size, const int endian_big, uint8_t *image) { + if (endian_big) { + for (long i = 0; i < size; i++, data++, image++) { + *image++ = u16_f16_u8(bton16(*data++)); + *image++ = u16_f16_u8(bton16(*data++)); + *image++ = 0; + } + } else { + for (long i = 0; i < size; i++, data++) { + *image++ = u16_f16_u8(lton16(*data++)); + *image++ = u16_f16_u8(lton16(*data++)); + *image++ = 0; + } + } + return 1; +} + +int decode_rgbahalf(const uint16_t *data, const long size, const int endian_big, uint8_t *image) { + long lsize = size * 4; + if (endian_big) + for (long i = 0; i < lsize; i++, data++, image++) + *image = u16_f16_u8(bton16(*data)); + else + for (long i = 0; i < lsize; i++, data++, image++) + *image = u16_f16_u8(lton16(*data)); + return 1; } diff --git a/ext/decoders/native/rgb.h b/ext/decoders/native/rgb.h index 72ad336..e5d9f87 100644 --- a/ext/decoders/native/rgb.h +++ b/ext/decoders/native/rgb.h @@ -3,9 +3,12 @@ #include -void decode_a8(const uint8_t*, const int, uint8_t*); -void decode_r8(const uint8_t*, const int, uint8_t*); -void decode_r16(const uint16_t*, const int, const int, uint8_t*); -void decode_rgb565(const uint16_t*, const int, const int, uint8_t*); +int decode_a8(const uint8_t *const, const long, uint8_t *); +int decode_r8(const uint8_t *const, const long, uint8_t *); +int decode_r16(const uint8_t *const, const long, const int, uint8_t *); +int decode_rgb565(const uint16_t *const, const long, const int, uint8_t *); +int decode_rhalf(const uint16_t *const, const long, const int, uint8_t *); +int decode_rghalf(const uint16_t *const, const long, const int, uint8_t *); +int decode_rgbahalf(const uint16_t *const, const long, const int, uint8_t *); #endif /* end of include guard: RGB_H */ diff --git a/lib/mikunyan/decoders/image_decoder.rb b/lib/mikunyan/decoders/image_decoder.rb index f21f5f1..0bf17f0 100644 --- a/lib/mikunyan/decoders/image_decoder.rb +++ b/lib/mikunyan/decoders/image_decoder.rb @@ -74,9 +74,9 @@ module Mikunyan when 28, 29, 64, 65 # DXT1Crunched, DXT5Crunched, ETC_RGB4Crunched, ETC2_RGBA8Crunched decode_crunched(width, height, bin) when 30, 31, -127 # PVRTC_RGB2, PVRTC_RGBA2, PVRTC_2BPP_RGBA - decode_pvrtc1_2bpp(width, height, bin) + decode_pvrtc1(width, height, bin, 2) when 32, 33 # PVRTC_RGB4, PVRTC_RGBA4 - decode_pvrtc1_4bpp(width, height, bin) + decode_pvrtc1(width, height, bin, 4) when 34 # ETC_RGB4 decode_etc1(width, height, bin) # when 41 # EAC_R @@ -266,12 +266,7 @@ module Mikunyan # @param [Symbol] endian endianness of binary # @return [ChunkyPNG::Image] decoded image def self.decode_rhalf(width, height, bin, endian = :big) - mem = String.new(capacity: width * height * 3) - (width * height).times do |i| - c = f2i(n2f(endian == :little ? BinUtils.get_int16_le(bin, i * 2) : BinUtils.get_int16_be(bin, i * 2))) - BinUtils.append_int8!(mem, c, c, c) - end - ChunkyPNG::Image.from_rgb_stream(width, height, mem).flip + ChunkyPNG::Image.from_rgb_stream(width, height, DecodeHelper.decode_rhalf(bin, width * height, endian == :big)).flip end # Decode image from RG Half-float binary @@ -281,13 +276,7 @@ module Mikunyan # @param [Symbol] endian endianness of binary # @return [ChunkyPNG::Image] decoded image def self.decode_rghalf(width, height, bin, endian = :big) - mem = String.new(capacity: width * height * 3) - (width * height).times do |i| - r = f2i(n2f(endian == :little ? BinUtils.get_int16_le(bin, i * 4) : BinUtils.get_int16_be(bin, i * 4))) - g = f2i(n2f(endian == :little ? BinUtils.get_int16_le(bin, i * 4 + 2) : BinUtils.get_int16_be(bin, i * 4 + 2))) - BinUtils.append_int8!(mem, r, g, 0) - end - ChunkyPNG::Image.from_rgb_stream(width, height, mem).flip + ChunkyPNG::Image.from_rgb_stream(width, height, DecodeHelper.decode_rghalf(bin, width * height, endian == :big)).flip end # Decode image from RGBA Half-float binary @@ -297,15 +286,7 @@ module Mikunyan # @param [Symbol] endian endianness of binary # @return [ChunkyPNG::Image] decoded image def self.decode_rgbahalf(width, height, bin, endian = :big) - mem = String.new(capacity: width * height * 4) - (width * height).times do |i| - r = f2i(n2f(endian == :little ? BinUtils.get_int16_le(bin, i * 8) : BinUtils.get_int16_be(bin, i * 8))) - g = f2i(n2f(endian == :little ? BinUtils.get_int16_le(bin, i * 8 + 2) : BinUtils.get_int16_be(bin, i * 8 + 2))) - b = f2i(n2f(endian == :little ? BinUtils.get_int16_le(bin, i * 8 + 4) : BinUtils.get_int16_be(bin, i * 8 + 4))) - a = f2i(n2f(endian == :little ? BinUtils.get_int16_le(bin, i * 8 + 6) : BinUtils.get_int16_be(bin, i * 8 + 6))) - BinUtils.append_int8!(mem, r, g, b, a) - end - ChunkyPNG::Image.from_rgba_stream(width, height, mem).flip + ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_rgbahalf(bin, width * height, endian == :big)).flip end # Decode image from R float binary @@ -374,22 +355,15 @@ module Mikunyan ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_dxt5(bin, width, height)) end - # Decode image from PVRTC1 4bpp compressed binary + # Decode image from PVRTC1 compressed binary # @param [Integer] width image width # @param [Integer] height image height # @param [String] bin binary to decode + # @param [Integer] bpp bit per pixel (2 or 4) # @return [ChunkyPNG::Image] decoded image - def self.decode_pvrtc1_4bpp(width, height, bin) - ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_pvrtc1_4bpp(bin, width, height)) - end - - # Decode image from PVRTC1 2bpp compressed binary - # @param [Integer] width image width - # @param [Integer] height image height - # @param [String] bin binary to decode - # @return [ChunkyPNG::Image] decoded image - def self.decode_pvrtc1_2bpp(width, height, bin) - ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_pvrtc1_2bpp(bin, width, height)) + def self.decode_pvrtc1(width, height, bin, bpp) + raise 'bpp of PVRTC1 must be 2 or 4' unless bpp == 2 || bpp == 4 + ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_pvrtc1(bin, width, height, bpp == 2)) end # Decode image from ETC1 compressed binary @@ -483,35 +457,10 @@ module Mikunyan header + bin end - # convert 16bit float - def self.n2f(n) - case n - when 0x0000 - 0.0 - when 0x8000 - -0.0 - when 0x7c00 - Float::INFINITY - when 0xfc00 - -Float::INFINITY - else - s = n & 0x8000 != 0 - e = n & 0x7c00 - f = n & 0x03ff - case e - when 0x7c00 - Float::NAN - when 0 - (s ? -f : f) * 2.0**-24 - else - (s ? -1 : 1) * (f / 1024.0 + 1) * (2.0**((e >> 10) - 15)) - end - end - end - # [0.0,1.0] -> [0,255] - def self.f2i(d) - (d * 255).round.clamp(0, 255) + def self.f2i(val) + return 0 unless val.finite? + (val * 255).round.clamp(0, 255) end end end