diff --git a/ext/decoders/native/astc.c b/ext/decoders/native/astc.c index 2d45a76..9a56aaa 100644 --- a/ext/decoders/native/astc.c +++ b/ext/decoders/native/astc.c @@ -4,84 +4,7 @@ #include #include "astc.h" -static inline uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { - return r | g << 8 | b << 16 | a << 24; -} - -static inline int getbits(const uint8_t *buf, const int bit, const int len) { - return (*(int*)(buf + bit / 8) >> (bit % 8)) & ((1 << len) - 1); -} - -static inline uint64_t getbits64(const uint8_t *buf, const int bit, const int len) { - uint64_t mask = len == 64 ? -1 : (1ull << len) - 1; - if (len < 1) - return 0; - else if (bit >= 64) - return ((uint64_t*)buf)[1] >> (bit - 64) & mask; - else if (bit <= 0) - return ((uint64_t*)buf)[0] << -bit & mask; - else if (bit + len <= 64) - return (*(uint64_t*)buf) >> bit & mask; - else - return ((*(uint64_t*)buf) >> bit | ((uint64_t*)buf)[1] << (64 - bit)) & mask; -} - -static inline uint8_t clamp(const int n) { - return n < 0 ? 0 : n > 255 ? 255 : n; -} - -static inline void bit_transfer_signed(int *a, int *b) { - *b = (*b >> 1) | (*a & 0x80); - *a = (*a >> 1) & 0x3f; - if (*a & 0x20) - *a -= 0x40; -} - -static inline void set_endpoint(uint8_t endpoint[8], uint8_t r1, uint8_t g1, uint8_t b1, uint8_t a1, uint8_t r2, uint8_t g2, uint8_t b2, uint8_t a2) { - endpoint[0] = r1; - endpoint[1] = g1; - endpoint[2] = b1; - endpoint[3] = a1; - endpoint[4] = r2; - endpoint[5] = g2; - endpoint[6] = b2; - endpoint[7] = a2; -} - -static inline void set_endpoint_clamp(uint8_t endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { - endpoint[0] = clamp(r1); - endpoint[1] = clamp(g1); - endpoint[2] = clamp(b1); - endpoint[3] = clamp(a1); - endpoint[4] = clamp(r2); - endpoint[5] = clamp(g2); - endpoint[6] = clamp(b2); - endpoint[7] = clamp(a2); -} - -static inline void set_endpoint_blue(uint8_t endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { - endpoint[0] = (r1 + b1) >> 1; - endpoint[1] = (g1 + b1) >> 1; - endpoint[2] = b1; - endpoint[3] = a1; - endpoint[4] = (r2 + b2) >> 1; - endpoint[5] = (g2 + b2) >> 1; - endpoint[6] = b2; - endpoint[7] = a2; -} - -static inline void set_endpoint_blue_clamp(uint8_t endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { - endpoint[0] = clamp((r1 + b1) >> 1); - endpoint[1] = clamp((g1 + b1) >> 1); - endpoint[2] = clamp(b1); - endpoint[3] = clamp(a1); - endpoint[4] = clamp((r2 + b2) >> 1); - endpoint[5] = clamp((g2 + b2) >> 1); - endpoint[6] = clamp(b2); - endpoint[7] = clamp(a2); -} - -static const uint8_t BitReverseTable[] = { +static const int BitReverseTable[] = { 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, @@ -100,28 +23,109 @@ static const uint8_t BitReverseTable[] = { 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF }; -static inline uint8_t bit_reverse_u8(const uint8_t c, const int bits) { - return BitReverseTable[c] >> (8 - bits); -} - -static inline uint64_t bit_reverse_u64(const uint64_t d, const int bits) { - uint64_t ret = - (uint64_t)BitReverseTable[d & 0xff] << 56 | - (uint64_t)BitReverseTable[d >> 8 & 0xff] << 48 | - (uint64_t)BitReverseTable[d >> 16 & 0xff] << 40 | - (uint64_t)BitReverseTable[d >> 24 & 0xff] << 32 | - (uint64_t)BitReverseTable[d >> 32 & 0xff] << 24 | - (uint64_t)BitReverseTable[d >> 40 & 0xff] << 16 | - (uint64_t)BitReverseTable[d >> 48 & 0xff] << 8 | BitReverseTable[d >> 56 & 0xff]; - return ret >> (64 - bits); -} - static int WeightPrecTableA[] = {0, 0, 0, 3, 0, 5, 3, 0, 0, 0, 5, 3, 0, 5, 3, 0}; static int WeightPrecTableB[] = {0, 0, 1, 0, 2, 0, 1, 3, 0, 0, 1, 2, 4, 2, 3, 5}; static int CemTableA[] = {0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 0, 0}; static int CemTableB[] = {8, 6, 5, 7, 5, 4, 6, 4, 3, 5, 3, 2, 4, 2, 1, 3, 1, 2, 1}; +static inline uint_fast32_t color(uint_fast8_t r, uint_fast8_t g, uint_fast8_t b, uint_fast8_t a) { + return r | g << 8 | b << 16 | a << 24; +} + +static inline uint_fast8_t bit_reverse_u8(const uint_fast8_t c, const int bits) { + return BitReverseTable[c] >> (8 - bits); +} + +static inline uint_fast64_t bit_reverse_u64(const uint_fast64_t d, const int bits) { + uint_fast64_t ret = + (uint_fast64_t)BitReverseTable[d & 0xff] << 56 | + (uint_fast64_t)BitReverseTable[d >> 8 & 0xff] << 48 | + (uint_fast64_t)BitReverseTable[d >> 16 & 0xff] << 40 | + (uint_fast64_t)BitReverseTable[d >> 24 & 0xff] << 32 | + (uint_fast32_t)BitReverseTable[d >> 32 & 0xff] << 24 | + (uint_fast32_t)BitReverseTable[d >> 40 & 0xff] << 16 | + (uint_fast16_t)BitReverseTable[d >> 48 & 0xff] << 8 | BitReverseTable[d >> 56 & 0xff]; + return ret >> (64 - bits); +} + +static inline int getbits(const uint8_t *buf, const int bit, const int len) { + return (*(int*)(buf + bit / 8) >> (bit % 8)) & ((1 << len) - 1); +} + +static inline uint_fast64_t getbits64(const uint8_t *buf, const int bit, const int len) { + uint_fast64_t mask = len == 64 ? -1 : (1ull << len) - 1; + if (len < 1) + return 0; + else if (bit >= 64) + return (*(uint_fast64_t*)(buf + 8)) >> (bit - 64) & mask; + else if (bit <= 0) + return (*(uint_fast64_t*)buf) << -bit & mask; + else if (bit + len <= 64) + return (*(uint_fast64_t*)buf) >> bit & mask; + else + return ((*(uint_fast64_t*)buf) >> bit | *(uint_fast64_t*)(buf + 8) << (64 - bit)) & mask; +} + +static inline uint_fast8_t clamp(const int n) { + return n < 0 ? 0 : n > 255 ? 255 : n; +} + +static inline void bit_transfer_signed(int *a, int *b) { + *b = (*b >> 1) | (*a & 0x80); + *a = (*a >> 1) & 0x3f; + if (*a & 0x20) + *a -= 0x40; +} + +static inline void set_endpoint(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { + endpoint[0] = r1; + endpoint[1] = g1; + endpoint[2] = b1; + endpoint[3] = a1; + endpoint[4] = r2; + endpoint[5] = g2; + endpoint[6] = b2; + endpoint[7] = a2; +} + +static inline void set_endpoint_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { + endpoint[0] = clamp(r1); + endpoint[1] = clamp(g1); + endpoint[2] = clamp(b1); + endpoint[3] = clamp(a1); + endpoint[4] = clamp(r2); + endpoint[5] = clamp(g2); + endpoint[6] = clamp(b2); + endpoint[7] = clamp(a2); +} + +static inline void set_endpoint_blue(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { + endpoint[0] = (r1 + b1) >> 1; + endpoint[1] = (g1 + b1) >> 1; + endpoint[2] = b1; + endpoint[3] = a1; + endpoint[4] = (r2 + b2) >> 1; + endpoint[5] = (g2 + b2) >> 1; + endpoint[6] = b2; + endpoint[7] = a2; +} + +static inline void set_endpoint_blue_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { + endpoint[0] = clamp((r1 + b1) >> 1); + endpoint[1] = clamp((g1 + b1) >> 1); + endpoint[2] = clamp(b1); + endpoint[3] = clamp(a1); + endpoint[4] = clamp((r2 + b2) >> 1); + endpoint[5] = clamp((g2 + b2) >> 1); + endpoint[6] = clamp(b2); + endpoint[7] = clamp(a2); +} + +static inline uint_fast8_t select_color(int v0, int v1, int weight) { + return ((((v0 << 8 | v0) * (64 - weight) + (v1 << 8 | v1) * weight + 32) >> 6) * 255 + 32768) / 65536; +} + typedef struct { int bw; int bh; @@ -135,9 +139,9 @@ typedef struct { int cem[4]; int cem_range; int endpoint_value_num; // max: 32 - uint8_t endpoints[4][8]; - uint8_t weights[144][2]; - uint8_t partition[144]; + int endpoints[4][8]; + int weights[144][2]; + int partition[144]; } BlockData; typedef struct { @@ -176,14 +180,14 @@ void decode_intseq(const uint8_t *buf, int offset, const int a, const int b, con if (reverse) { for (int i = 0, p = offset; i < block_count; i++, p -= block_size) { int now_size = (i < block_count - 1) ? block_size : last_block_size; - uint64_t d = bit_reverse_u64(getbits64(buf, p - now_size, now_size), now_size); + uint_fast64_t d = bit_reverse_u64(getbits64(buf, p - now_size, now_size), now_size); int x = (d >> b & 3) | (d >> b * 2 & 0xc) | (d >> b * 3 & 0x10) | (d >> b * 4 & 0x60) | (d >> b * 5 & 0x80); for (int j = 0; j < 5 && n < count; j++, n++) out[n] = (IntSeqData){ d >> (mt[j] + b * j) & mask, TritsTable[j][x] }; } } else { for (int i = 0, p = offset; i < block_count; i++, p += block_size) { - uint64_t d = getbits64(buf, p, (i < block_count - 1) ? block_size : last_block_size); + uint_fast64_t d = getbits64(buf, p, (i < block_count - 1) ? block_size : last_block_size); int x = (d >> b & 3) | (d >> b * 2 & 0xc) | (d >> b * 3 & 0x10) | (d >> b * 4 & 0x60) | (d >> b * 5 & 0x80); for (int j = 0; j < 5 && n < count; j++, n++) out[n] = (IntSeqData){ d >> (mt[j] + b * j) & mask, TritsTable[j][x] }; @@ -199,14 +203,14 @@ void decode_intseq(const uint8_t *buf, int offset, const int a, const int b, con if (reverse) { for (int i = 0, p = offset; i < block_count; i++, p -= block_size) { int now_size = (i < block_count - 1) ? block_size : last_block_size; - uint64_t d = bit_reverse_u64(getbits64(buf, p - now_size, now_size), now_size); + uint_fast64_t d = bit_reverse_u64(getbits64(buf, p - now_size, now_size), now_size); int x = (d >> b & 7) | (d >> b * 2 & 0x18) | (d >> b * 3 & 0x60); for (int j = 0; j < 3 && n < count; j++, n++) out[n] = (IntSeqData){ d >> (mq[j] + b * j) & mask, QuintsTable[j][x] }; } } else { for (int i = 0, p = offset; i < block_count; i++, p += block_size) { - uint64_t d = getbits64(buf, p, (i < block_count - 1) ? block_size : last_block_size); + uint_fast64_t d = getbits64(buf, p, (i < block_count - 1) ? block_size : last_block_size); int x = (d >> b & 7) | (d >> b * 2 & 0x18) | (d >> b * 3 & 0x60); for (int j = 0; j < 3 && n < count; j++, n++) out[n] = (IntSeqData){ d >> (mq[j] + b * j) & mask, QuintsTable[j][x] }; @@ -230,16 +234,16 @@ void decode_block_params(const uint8_t *buf, BlockData *block_data) { block_data->weight_range |= buf[0] << 1 & 6; switch (buf[0] & 0xc) { case 0: - block_data->width = (*(uint16_t*)buf >> 7 & 3) + 4; + block_data->width = (*(int*)buf >> 7 & 3) + 4; block_data->height = (buf[0] >> 5 & 3) + 2; break; case 4: - block_data->width = (*(uint16_t*)buf >> 7 & 3) + 8; + block_data->width = (*(int*)buf >> 7 & 3) + 8; block_data->height = (buf[0] >> 5 & 3) + 2; break; case 8: block_data->width = (buf[0] >> 5 & 3) + 2; - block_data->height = (*(uint16_t*)buf >> 7 & 3) + 8; + block_data->height = (*(int*)buf >> 7 & 3) + 8; break; case 12: if (buf[1] & 1) { @@ -461,8 +465,8 @@ void decode_endpoints(const uint8_t *buf, BlockData *data) { break; case 1: { - uint8_t l0 = (v[0] >> 2) | (v[1] & 0xc0); - uint8_t l1 = clamp(l0 + (v[1] & 0x3f)); + int l0 = (v[0] >> 2) | (v[1] & 0xc0); + int l1 = clamp(l0 + (v[1] & 0x3f)); set_endpoint(data->endpoints[cem], l0, l0, l0, 255, l1, l1, l1, 255); } break; @@ -638,7 +642,7 @@ void select_partition(const uint8_t *buf, BlockData *data) { rnum ^= rnum << 6; rnum ^= rnum >> 17; - uint8_t seeds[8]; + int seeds[8]; for (int i = 0; i < 8; i++) { seeds[i] = (rnum >> (i * 4)) & 0xF; seeds[i] *= seeds[i]; @@ -678,10 +682,6 @@ void select_partition(const uint8_t *buf, BlockData *data) { } } -static inline uint8_t select_color(int v0, int v1, int weight) { - return ((((v0 << 8 | v0) * (64 - weight) + (v1 << 8 | v1) * weight + 32) >> 6) * 255 + 32768) / 65536; -} - void applicate_color(const BlockData *data, uint32_t *outbuf) { if (data->dual_plane) { int ps[] = { 0, 0, 0, 0 }; @@ -689,36 +689,36 @@ void applicate_color(const BlockData *data, uint32_t *outbuf) { if (data->part_num > 1) { for (int i = 0; i < data->bw * data->bh; i++) { int p = data->partition[i]; - uint8_t r = select_color(data->endpoints[p][0], data->endpoints[p][4], data->weights[i][ps[0]]); - uint8_t g = select_color(data->endpoints[p][1], data->endpoints[p][5], data->weights[i][ps[1]]); - uint8_t b = select_color(data->endpoints[p][2], data->endpoints[p][6], data->weights[i][ps[2]]); - uint8_t a = select_color(data->endpoints[p][3], data->endpoints[p][7], data->weights[i][ps[3]]); + uint_fast8_t r = select_color(data->endpoints[p][0], data->endpoints[p][4], data->weights[i][ps[0]]); + uint_fast8_t g = select_color(data->endpoints[p][1], data->endpoints[p][5], data->weights[i][ps[1]]); + uint_fast8_t b = select_color(data->endpoints[p][2], data->endpoints[p][6], data->weights[i][ps[2]]); + uint_fast8_t a = select_color(data->endpoints[p][3], data->endpoints[p][7], data->weights[i][ps[3]]); outbuf[i] = color(r, g, b, a); } } else { for (int i = 0; i < data->bw * data->bh; i++) { - uint8_t r = select_color(data->endpoints[0][0], data->endpoints[0][4], data->weights[i][ps[0]]); - uint8_t g = select_color(data->endpoints[0][1], data->endpoints[0][5], data->weights[i][ps[1]]); - uint8_t b = select_color(data->endpoints[0][2], data->endpoints[0][6], data->weights[i][ps[2]]); - uint8_t a = select_color(data->endpoints[0][3], data->endpoints[0][7], data->weights[i][ps[3]]); + uint_fast8_t r = select_color(data->endpoints[0][0], data->endpoints[0][4], data->weights[i][ps[0]]); + uint_fast8_t g = select_color(data->endpoints[0][1], data->endpoints[0][5], data->weights[i][ps[1]]); + uint_fast8_t b = select_color(data->endpoints[0][2], data->endpoints[0][6], data->weights[i][ps[2]]); + uint_fast8_t a = select_color(data->endpoints[0][3], data->endpoints[0][7], data->weights[i][ps[3]]); outbuf[i] = color(r, g, b, a); } } } else if (data->part_num > 1) { for (int i = 0; i < data->bw * data->bh; i++) { int p = data->partition[i]; - uint8_t r = select_color(data->endpoints[p][0], data->endpoints[p][4], data->weights[i][0]); - uint8_t g = select_color(data->endpoints[p][1], data->endpoints[p][5], data->weights[i][0]); - uint8_t b = select_color(data->endpoints[p][2], data->endpoints[p][6], data->weights[i][0]); - uint8_t a = select_color(data->endpoints[p][3], data->endpoints[p][7], data->weights[i][0]); + uint_fast8_t r = select_color(data->endpoints[p][0], data->endpoints[p][4], data->weights[i][0]); + uint_fast8_t g = select_color(data->endpoints[p][1], data->endpoints[p][5], data->weights[i][0]); + uint_fast8_t b = select_color(data->endpoints[p][2], data->endpoints[p][6], data->weights[i][0]); + uint_fast8_t a = select_color(data->endpoints[p][3], data->endpoints[p][7], data->weights[i][0]); outbuf[i] = color(r, g, b, a); } } else { for (int i = 0; i < data->bw * data->bh; i++) { - uint8_t r = select_color(data->endpoints[0][0], data->endpoints[0][4], data->weights[i][0]); - uint8_t g = select_color(data->endpoints[0][1], data->endpoints[0][5], data->weights[i][0]); - uint8_t b = select_color(data->endpoints[0][2], data->endpoints[0][6], data->weights[i][0]); - uint8_t a = select_color(data->endpoints[0][3], data->endpoints[0][7], data->weights[i][0]); + uint_fast8_t r = select_color(data->endpoints[0][0], data->endpoints[0][4], data->weights[i][0]); + uint_fast8_t g = select_color(data->endpoints[0][1], data->endpoints[0][5], data->weights[i][0]); + uint_fast8_t b = select_color(data->endpoints[0][2], data->endpoints[0][6], data->weights[i][0]); + uint_fast8_t a = select_color(data->endpoints[0][3], data->endpoints[0][7], data->weights[i][0]); outbuf[i] = color(r, g, b, a); } } @@ -726,7 +726,7 @@ void applicate_color(const BlockData *data, uint32_t *outbuf) { void decode_block(const uint8_t *buf, const int bw, const int bh, uint32_t *outbuf) { if (buf[0] == 0xfc && (buf[1] & 1) == 1) { - uint32_t c = color(buf[9], buf[11], buf[13], buf[15]); + uint_fast32_t c = color(buf[9], buf[11], buf[13], buf[15]); for (int i = 0; i < bw * bh; i++) outbuf[i] = c; } else {