diff --git a/ext/decoders/native/main.c b/ext/decoders/native/main.c
index fc31fcb..a733f60 100644
--- a/ext/decoders/native/main.c
+++ b/ext/decoders/native/main.c
@@ -237,6 +237,30 @@ static VALUE rb_decode_pvrtc1_4bpp(VALUE self, VALUE rb_data, VALUE w, VALUE h)
     return ret;
 }
 
+/*
+ * Decode image from PVRTC1 2bpp compressed binary
+ *
+ * @param [String] rb_data binary to decode
+ * @param [Integer] w image width
+ * @param [Integer] h image height
+ * @return [String] decoded rgba binary
+ */
+static VALUE rb_decode_pvrtc1_2bpp(VALUE self, VALUE rb_data, VALUE w, VALUE h)
+{
+    if (RSTRING_LEN(rb_data) < ((FIX2LONG(w) + 7) / 8) * ((FIX2LONG(h) + 3) / 4) * 8) {
+        rb_raise(rb_eStandardError, "Data size is not enough.");
+        return Qnil;
+    }
+    size_t buffer_length = FIX2LONG(w) * FIX2LONG(h) * 8;
+    VALUE ret = rb_str_buf_new(buffer_length);
+    if (!decode_pvrtc_2bpp((uint8_t*)RSTRING_PTR(rb_data), FIX2INT(w), FIX2INT(h), (uint32_t*)RSTRING_PTR(ret))) {
+        rb_raise(rb_eStandardError, "internal error");
+        return Qnil;
+    }
+    rb_str_set_len(ret, buffer_length);
+    return ret;
+}
+
 void Init_native()
 {
     VALUE mMikunyan = rb_define_module("Mikunyan");
@@ -253,4 +277,5 @@ void Init_native()
     rb_define_module_function(mDecodeHelper, "decode_dxt1", rb_decode_dxt1, 3);
     rb_define_module_function(mDecodeHelper, "decode_dxt5", rb_decode_dxt5, 3);
     rb_define_module_function(mDecodeHelper, "decode_pvrtc1_4bpp", rb_decode_pvrtc1_4bpp, 3);
+    rb_define_module_function(mDecodeHelper, "decode_pvrtc1_2bpp", rb_decode_pvrtc1_2bpp, 3);
 }
diff --git a/ext/decoders/native/pvrtc.c b/ext/decoders/native/pvrtc.c
index d0ec1a8..f298cb2 100644
--- a/ext/decoders/native/pvrtc.c
+++ b/ext/decoders/native/pvrtc.c
@@ -3,21 +3,9 @@
 #include <stdint.h>
 #include <string.h>
 
-#define MORTON_POS(x, y) (morton_table_buf[num_blocks_x * (y) + (x)])
+#define MORTON_POS(x, y) (morton_table[num_blocks_x * (y) + (x)])
 
-typedef struct {
-    uint8_t a_r;
-    uint8_t a_g;
-    uint8_t a_b;
-    uint8_t a_a;
-    uint8_t b_r;
-    uint8_t b_g;
-    uint8_t b_b;
-    uint8_t b_a;
-} PVRTCTexelColor;
-
-static inline uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a)
-{
+static inline uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) {
 #if BYTE_ORDER == LITTLE_ENDIAN
     return r | g << 8 | b << 16 | a << 24;
 #else
@@ -25,8 +13,7 @@ static inline uint32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a)
 #endif
 }
 
-static inline int morton_index(const int x, const int y, const int numblocks_x, const int numblocks_y)
-{
+static inline int morton_index(const int x, const int y, const int numblocks_x, const int numblocks_y) {
     const int min_dim = numblocks_x <= numblocks_y ? numblocks_x : numblocks_y;
     int offset = 0, shift = 0;
     for (int mask = 1; mask < min_dim; mask <<= 1, shift++) {
@@ -36,51 +23,44 @@ static inline int morton_index(const int x, const int y, const int numblocks_x,
     return offset;
 }
 
-static void applicate_color_4bpp(const uint8_t* data, const PVRTCTexelColor colors[9], uint32_t buf[16])
-{
-    typedef struct {
-        uint16_t a_r;
-        uint16_t a_g;
-        uint16_t a_b;
-        uint16_t a_a;
-        uint16_t b_r;
-        uint16_t b_g;
-        uint16_t b_b;
-        uint16_t b_a;
-    } PVRTCInterpColor;
-
-    static const int INTERP_WEIGHT[4][3] = { { 2, 2, 0 }, { 1, 3, 0 }, { 0, 4, 0 }, { 0, 3, 1 } };
-    PVRTCInterpColor interp_colors[16] = {};
-
-    for (int cy = 0, c = 0; cy < 4; cy++) {
-        for (int cx = 0; cx < 4; cx++, c++) {
-            for (int acy = 0, ac = 0; acy < 3; acy++) {
-                for (int acx = 0; acx < 3; acx++, ac++) {
-                    int interp_weight = INTERP_WEIGHT[cx][acx] * INTERP_WEIGHT[cy][acy];
-                    interp_colors[c].a_r += colors[ac].a_r * interp_weight;
-                    interp_colors[c].a_g += colors[ac].a_g * interp_weight;
-                    interp_colors[c].a_b += colors[ac].a_b * interp_weight;
-                    interp_colors[c].a_a += colors[ac].a_a * interp_weight;
-                    interp_colors[c].b_r += colors[ac].b_r * interp_weight;
-                    interp_colors[c].b_g += colors[ac].b_g * interp_weight;
-                    interp_colors[c].b_b += colors[ac].b_b * interp_weight;
-                    interp_colors[c].b_a += colors[ac].b_a * interp_weight;
-                }
-            }
-            interp_colors[c].a_r = (interp_colors[c].a_r >> 1) + (interp_colors[c].a_r >> 6);
-            interp_colors[c].a_g = (interp_colors[c].a_g >> 1) + (interp_colors[c].a_g >> 6);
-            interp_colors[c].a_b = (interp_colors[c].a_b >> 1) + (interp_colors[c].a_b >> 6);
-            interp_colors[c].a_a = (interp_colors[c].a_a) + (interp_colors[c].a_a >> 4);
-            interp_colors[c].b_r = (interp_colors[c].b_r >> 1) + (interp_colors[c].b_r >> 6);
-            interp_colors[c].b_g = (interp_colors[c].b_g >> 1) + (interp_colors[c].b_g >> 6);
-            interp_colors[c].b_b = (interp_colors[c].b_b >> 1) + (interp_colors[c].b_b >> 6);
-            interp_colors[c].b_a = (interp_colors[c].b_a) + (interp_colors[c].b_a >> 4);
-        }
+static void get_texel_colors(const uint8_t *data, PVRTCTexelInfo *info) {
+#if BYTE_ORDER == LITTLE_ENDIAN
+    uint16_t ca = *(uint16_t *)(data + 4);
+    uint16_t cb = *(uint16_t *)(data + 6);
+#else
+    uint16_t ca = data[4] | data[5] << 8;
+    uint16_t cb = data[6] | data[7] << 8;
+#endif
+    if (ca & 0x8000) {
+        info->a.r = ca >> 10 & 0x1f;
+        info->a.g = ca >> 5 & 0x1f;
+        info->a.b = (ca & 0x1e) | (ca >> 4 & 1);
+        info->a.a = 0xf;
+    } else {
+        info->a.r = (ca >> 7 & 0x1e) | (ca >> 11 & 1);
+        info->a.g = (ca >> 3 & 0x1e) | (ca >> 7 & 1);
+        info->a.b = (ca << 1 & 0x1c) | (ca >> 2 & 3);
+        info->a.a = ca >> 11 & 0xe;
     }
+    if (cb & 0x8000) {
+        info->b.r = cb >> 10 & 0x1f;
+        info->b.g = cb >> 5 & 0x1f;
+        info->b.b = cb & 0x1f;
+        info->b.a = 0xf;
+    } else {
+        info->b.r = (cb >> 7 & 0x1e) | (cb >> 11 & 1);
+        info->b.g = (cb >> 3 & 0x1e) | (cb >> 7 & 1);
+        info->b.b = (cb << 1 & 0x1e) | (cb >> 3 & 1);
+        info->b.a = cb >> 11 & 0xe;
+    }
+}
+
+static void get_texel_weights_4bpp(const uint8_t *data, PVRTCTexelInfo *info) {
+    info->punch_through_flag = 0;
 
     int mod_mode = data[4] & 1;
 #if BYTE_ORDER == LITTLE_ENDIAN
-    uint32_t mod_bits = *(uint32_t*)data;
+    uint32_t mod_bits = *(uint32_t *)data;
 #else
     uint32_t mod_bits = data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24;
 #endif
@@ -88,143 +68,302 @@ static void applicate_color_4bpp(const uint8_t* data, const PVRTCTexelColor colo
     if (mod_mode) {
         // punch-through
         for (int i = 0; i < 16; i++, mod_bits >>= 2) {
-            int r, g, b, a;
             switch (mod_bits & 3) {
             case 0:
-                r = interp_colors[i].a_r;
-                g = interp_colors[i].a_g;
-                b = interp_colors[i].a_b;
-                a = interp_colors[i].a_a;
+                info->weight[i] = 0;
                 break;
             case 3:
-                r = interp_colors[i].b_r;
-                g = interp_colors[i].b_g;
-                b = interp_colors[i].b_b;
-                a = interp_colors[i].b_a;
+                info->weight[i] = 8;
                 break;
+            case 2:
+                info->punch_through_flag |= 1 << i;
+                // fall through
             default:
-                r = (interp_colors[i].a_r + interp_colors[i].b_r) / 2;
-                g = (interp_colors[i].a_g + interp_colors[i].b_g) / 2;
-                b = (interp_colors[i].a_b + interp_colors[i].b_b) / 2;
-                a = (mod_bits & 3) == 2 ? 0 : (interp_colors[i].a_a + interp_colors[i].b_a) / 2;
+                info->weight[i] = 4;
             }
-            buf[i] = color(r, g, b, a);
         }
     } else {
         // standard
         for (int i = 0; i < 16; i++, mod_bits >>= 2) {
-            int r, g, b, a;
             switch (mod_bits & 3) {
             case 0:
-                r = interp_colors[i].a_r;
-                g = interp_colors[i].a_g;
-                b = interp_colors[i].a_b;
-                a = interp_colors[i].a_a;
+                info->weight[i] = 0;
                 break;
             case 1:
-                r = (interp_colors[i].a_r * 5 + interp_colors[i].b_r * 3) / 8;
-                g = (interp_colors[i].a_g * 5 + interp_colors[i].b_g * 3) / 8;
-                b = (interp_colors[i].a_b * 5 + interp_colors[i].b_b * 3) / 8;
-                a = (interp_colors[i].a_a * 5 + interp_colors[i].b_a * 3) / 8;
+                info->weight[i] = 3;
                 break;
             case 2:
-                r = (interp_colors[i].a_r * 3 + interp_colors[i].b_r * 5) / 8;
-                g = (interp_colors[i].a_g * 3 + interp_colors[i].b_g * 5) / 8;
-                b = (interp_colors[i].a_b * 3 + interp_colors[i].b_b * 5) / 8;
-                a = (interp_colors[i].a_a * 3 + interp_colors[i].b_a * 5) / 8;
+                info->weight[i] = 5;
                 break;
             case 3:
-                r = interp_colors[i].b_r;
-                g = interp_colors[i].b_g;
-                b = interp_colors[i].b_b;
-                a = interp_colors[i].b_a;
+                info->weight[i] = 8;
                 break;
             }
-            buf[i] = color(r, g, b, a);
         }
     }
 }
 
-static inline void expand_color(const uint8_t* data, PVRTCTexelColor* color)
-{
+static void get_texel_weights_2bpp(const uint8_t *data, PVRTCTexelInfo *info) {
+    info->punch_through_flag = 0;
+
+    int mod_mode = data[4] & 1;
 #if BYTE_ORDER == LITTLE_ENDIAN
-    uint16_t ca = *(uint16_t*)(data + 4);
-    uint16_t cb = *(uint16_t*)(data + 6);
+    uint32_t mod_bits = *(uint32_t *)data;
 #else
-    uint16_t ca = data[4] | data[5] << 8;
-    uint16_t cb = data[6] | data[7] << 8;
+    uint32_t mod_bits = data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24;
 #endif
-    if (ca & 0x8000) {
-        color->a_r = ca >> 10 & 0x1f;
-        color->a_g = ca >> 5 & 0x1f;
-        color->a_b = (ca & 0x1e) | (ca >> 4 & 1);
-        color->a_a = 0xf;
+
+    if (mod_mode) {
+        // interporated modulation
+        // ここは仕様書が間違ってる（4bpp の M=0 の standard bilinear のテーブルしか使わない・punch through は 2bpp
+        // にはない）
+        int fillflag = data[0] & 1 ? (data[2] & 0x10 ? -1 : -2) : -3;
+        // 決定できない（後から補完しないといけない）ものは負の数で埋めておく
+        // -3: 上下左右 / -2: 左右 / -1: 上下
+        for (int y = 0, i = 1; y < 4; ++y & 1 ? --i : ++i)
+            for (int x = 0; x < 4; x++, i += 2)
+                info->weight[i] = fillflag;
+        for (int y = 0, i = 0; y < 4; ++y & 1 ? ++i : --i) {
+            for (int x = 0; x < 4; x++, i += 2, mod_bits >>= 2) {
+                switch (mod_bits & 3) {
+                case 0:
+                    info->weight[i] = 0;
+                    break;
+                case 1:
+                    info->weight[i] = 3;
+                    break;
+                case 2:
+                    info->weight[i] = 5;
+                    break;
+                case 3:
+                    info->weight[i] = 8;
+                    break;
+                }
+            }
+        }
+        // 0 は常に 1bpp
+        info->weight[0] = (info->weight[0] + 3) & 8;
+        if (data[0] & 1)
+            // bit0 が 1 のときは (4, 2) が 1bpp
+            info->weight[20] = (info->weight[20] + 3) & 8;
     } else {
-        color->a_r = (ca >> 7 & 0x1e) | (ca >> 11 & 1);
-        color->a_g = (ca >> 3 & 0x1e) | (ca >> 7 & 1);
-        color->a_b = (ca << 1 & 0x1c) | (ca >> 2 & 3);
-        color->a_a = ca >> 11 & 0xe;
-    }
-    if (cb & 0x8000) {
-        color->b_r = cb >> 10 & 0x1f;
-        color->b_g = cb >> 5 & 0x1f;
-        color->b_b = cb & 0x1f;
-        color->b_a = 0xf;
-    } else {
-        color->b_r = (cb >> 7 & 0x1e) | (cb >> 11 & 1);
-        color->b_g = (cb >> 3 & 0x1e) | (cb >> 7 & 1);
-        color->b_b = (cb << 1 & 0x1e) | (cb >> 3 & 1);
-        color->b_a = cb >> 11 & 0xe;
+        // 1bpp
+        for (int i = 0; i < 32; i++, mod_bits >>= 1)
+            info->weight[i] = mod_bits & 1 ? 8 : 0;
     }
 }
 
-int decode_pvrtc_4bpp(const uint8_t* data, const int w, const int h, uint32_t* image)
-{
+static void applicate_color_4bpp(const uint8_t *data, PVRTCTexelInfo *const info[9], uint32_t buf[16]) {
+    static const int INTERP_WEIGHT[4][3] = {{2, 2, 0}, {1, 3, 0}, {0, 4, 0}, {0, 3, 1}};
+    PVRTCTexelColorInt clr_a[16] = {}, clr_b[16] = {};
+
+    for (int y = 0, i = 0; y < 4; y++) {
+        for (int x = 0; x < 4; x++, i++) {
+            for (int acy = 0, ac = 0; acy < 3; acy++) {
+                for (int acx = 0; acx < 3; acx++, ac++) {
+                    int interp_weight = INTERP_WEIGHT[x][acx] * INTERP_WEIGHT[y][acy];
+                    clr_a[i].r += info[ac]->a.r * interp_weight;
+                    clr_a[i].g += info[ac]->a.g * interp_weight;
+                    clr_a[i].b += info[ac]->a.b * interp_weight;
+                    clr_a[i].a += info[ac]->a.a * interp_weight;
+                    clr_b[i].r += info[ac]->b.r * interp_weight;
+                    clr_b[i].g += info[ac]->b.g * interp_weight;
+                    clr_b[i].b += info[ac]->b.b * interp_weight;
+                    clr_b[i].a += info[ac]->b.a * interp_weight;
+                }
+            }
+            clr_a[i].r = (clr_a[i].r >> 1) + (clr_a[i].r >> 6);
+            clr_a[i].g = (clr_a[i].g >> 1) + (clr_a[i].g >> 6);
+            clr_a[i].b = (clr_a[i].b >> 1) + (clr_a[i].b >> 6);
+            clr_a[i].a = (clr_a[i].a) + (clr_a[i].a >> 4);
+            clr_b[i].r = (clr_b[i].r >> 1) + (clr_b[i].r >> 6);
+            clr_b[i].g = (clr_b[i].g >> 1) + (clr_b[i].g >> 6);
+            clr_b[i].b = (clr_b[i].b >> 1) + (clr_b[i].b >> 6);
+            clr_b[i].a = (clr_b[i].a) + (clr_b[i].a >> 4);
+        }
+    }
+
+    const PVRTCTexelInfo *self_info = info[4];
+    uint32_t punch_through_flag = self_info->punch_through_flag;
+    for (int i = 0; i < 16; i++, punch_through_flag >>= 1) {
+        buf[i] = color((clr_a[i].r * (8 - self_info->weight[i]) + clr_b[i].r * self_info->weight[i]) / 8,
+                       (clr_a[i].g * (8 - self_info->weight[i]) + clr_b[i].g * self_info->weight[i]) / 8,
+                       (clr_a[i].b * (8 - self_info->weight[i]) + clr_b[i].b * self_info->weight[i]) / 8,
+                       punch_through_flag & 1
+                         ? 0
+                         : (clr_a[i].a * (8 - self_info->weight[i]) + clr_b[i].a * self_info->weight[i]) / 8);
+    }
+}
+
+static void applicate_color_2bpp(const uint8_t *data, PVRTCTexelInfo *info[9], uint32_t buf[32]) {
+    static const int INTERP_WEIGHT_X[8][3] = {{4, 4, 0}, {3, 5, 0}, {2, 6, 0}, {1, 7, 0},
+                                              {0, 8, 0}, {0, 7, 1}, {0, 6, 2}, {0, 5, 3}};
+    static const int INTERP_WEIGHT_Y[4][3] = {{2, 2, 0}, {1, 3, 0}, {0, 4, 0}, {0, 3, 1}};
+    PVRTCTexelColorInt clr_a[32] = {}, clr_b[32] = {};
+
+    for (int y = 0, i = 0; y < 4; y++) {
+        for (int x = 0; x < 8; x++, i++) {
+            for (int acy = 0, ac = 0; acy < 3; acy++) {
+                for (int acx = 0; acx < 3; acx++, ac++) {
+                    int interp_weight = INTERP_WEIGHT_X[x][acx] * INTERP_WEIGHT_Y[y][acy];
+                    clr_a[i].r += info[ac]->a.r * interp_weight;
+                    clr_a[i].g += info[ac]->a.g * interp_weight;
+                    clr_a[i].b += info[ac]->a.b * interp_weight;
+                    clr_a[i].a += info[ac]->a.a * interp_weight;
+                    clr_b[i].r += info[ac]->b.r * interp_weight;
+                    clr_b[i].g += info[ac]->b.g * interp_weight;
+                    clr_b[i].b += info[ac]->b.b * interp_weight;
+                    clr_b[i].a += info[ac]->b.a * interp_weight;
+                }
+            }
+            clr_a[i].r = (clr_a[i].r >> 2) + (clr_a[i].r >> 7);
+            clr_a[i].g = (clr_a[i].g >> 2) + (clr_a[i].g >> 7);
+            clr_a[i].b = (clr_a[i].b >> 2) + (clr_a[i].b >> 7);
+            clr_a[i].a = (clr_a[i].a >> 1) + (clr_a[i].a >> 5);
+            clr_b[i].r = (clr_b[i].r >> 2) + (clr_b[i].r >> 7);
+            clr_b[i].g = (clr_b[i].g >> 2) + (clr_b[i].g >> 7);
+            clr_b[i].b = (clr_b[i].b >> 2) + (clr_b[i].b >> 7);
+            clr_b[i].a = (clr_b[i].a >> 1) + (clr_b[i].a >> 5);
+        }
+    }
+
+    static const int POSYA[4][2] = {{1, 24}, {4, -8}, {4, -8}, {4, -8}};
+    static const int POSYB[4][2] = {{4, 8}, {4, 8}, {4, 8}, {7, -24}};
+    static const int POSXL[8][2] = {{3, 7}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}};
+    static const int POSXR[8][2] = {{4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {5, -7}};
+
+    PVRTCTexelInfo *self_info = info[4];
+    uint32_t punch_through_flag = self_info->punch_through_flag;
+    for (int y = 0, i = 0; y < 4; y++) {
+        for (int x = 0; x < 8; x++, i++, punch_through_flag >>= 1) {
+            switch (self_info->weight[i]) {
+            case -1:
+                self_info->weight[i] =
+                  (info[POSYA[y][0]]->weight[i + POSYA[y][1]] + info[POSYB[y][0]]->weight[i + POSYB[y][1]] + 1) / 2;
+                break;
+            case -2:
+                self_info->weight[i] =
+                  (info[POSXL[x][0]]->weight[i + POSXL[x][1]] + info[POSXR[x][0]]->weight[i + POSXR[x][1]] + 1) / 2;
+                break;
+            case -3:
+                self_info->weight[i] =
+                  (info[POSYA[y][0]]->weight[i + POSYA[y][1]] + info[POSYB[y][0]]->weight[i + POSYB[y][1]] +
+                   info[POSXL[x][0]]->weight[i + POSXL[x][1]] + info[POSXR[x][0]]->weight[i + POSXR[x][1]] + 2) /
+                  4;
+                break;
+            }
+            buf[i] = color((clr_a[i].r * (8 - self_info->weight[i]) + clr_b[i].r * self_info->weight[i]) / 8,
+                           (clr_a[i].g * (8 - self_info->weight[i]) + clr_b[i].g * self_info->weight[i]) / 8,
+                           (clr_a[i].b * (8 - self_info->weight[i]) + clr_b[i].b * self_info->weight[i]) / 8,
+                           punch_through_flag & 1
+                             ? 0
+                             : (clr_a[i].a * (8 - self_info->weight[i]) + clr_b[i].a * self_info->weight[i]) / 8);
+        }
+    }
+}
+
+int decode_pvrtc_4bpp(const uint8_t *data, const int w, const int h, uint32_t *image) {
     int num_blocks_x = (w + 3) / 4;
     int num_blocks_y = (h + 3) / 4;
     int num_blocks = num_blocks_x * num_blocks_y;
     int copy_length_last = (w + 3) % 4 + 1;
 
-    PVRTCTexelColor* texel_colors = (PVRTCTexelColor*)malloc(sizeof(PVRTCTexelColor) * num_blocks);
-    if (texel_colors == NULL)
+    int *morton_table = (int *)malloc(sizeof(int) * num_blocks);
+    if (morton_table == NULL)
         return 0;
-    const uint8_t* d = data;
-    for (int i = 0; i < num_blocks; i++, d += 8)
-        expand_color(d, texel_colors + i);
-
-    int* morton_table_buf = (int*)malloc(sizeof(int) * num_blocks);
-    if (morton_table_buf == NULL) {
-        free(texel_colors);
+    PVRTCTexelInfo *texel_info = (PVRTCTexelInfo *)malloc(sizeof(PVRTCTexelInfo) * num_blocks);
+    if (texel_info == NULL) {
+        free(morton_table);
         return 0;
     }
+
     for (int y = 0; y < num_blocks_y; y++)
         for (int x = 0; x < num_blocks_x; x++)
             MORTON_POS(x, y) = morton_index(x, y, num_blocks_x, num_blocks_y);
 
+    const uint8_t *d = data;
+    for (int i = 0; i < num_blocks; i++, d += 8) {
+        get_texel_colors(d, &texel_info[i]);
+        get_texel_weights_4bpp(d, &texel_info[i]);
+    }
+
     uint32_t buffer[16];
-    uint32_t* buffer_end = buffer + 16;
-    PVRTCTexelColor colors[9];
+    uint32_t *buffer_end = buffer + 16;
+    PVRTCTexelInfo *local_info[9];
     int pos_x[3], pos_y[3];
     for (int by = 0; by < num_blocks_y; by++) {
-        pos_y[0] = by == 0 ? 0 : by - 1;
+        pos_y[0] = by == 0 ? num_blocks_y - 1 : by - 1;
         pos_y[1] = by;
-        pos_y[2] = by == num_blocks_y - 1 ? num_blocks_y - 1 : by + 1;
-        for (int bx = 0, x = 0; bx < num_blocks_x; bx++, d += 8, x += 4) {
-            pos_x[0] = bx == 0 ? 0 : bx - 1;
+        pos_y[2] = by == num_blocks_y - 1 ? 0 : by + 1;
+        for (int bx = 0, x = 0; bx < num_blocks_x; bx++, x += 4) {
+            pos_x[0] = bx == 0 ? num_blocks_x - 1 : bx - 1;
             pos_x[1] = bx;
-            pos_x[2] = bx == num_blocks_x - 1 ? num_blocks_x - 1 : bx + 1;
+            pos_x[2] = bx == num_blocks_x - 1 ? 0 : bx + 1;
             for (int cy = 0, c = 0; cy < 3; cy++)
                 for (int cx = 0; cx < 3; cx++, c++)
-                    colors[c] = texel_colors[MORTON_POS(pos_x[cx], pos_y[cy])];
-            applicate_color_4bpp(data + MORTON_POS(bx, by) * 8, colors, buffer);
+                    local_info[c] = &texel_info[MORTON_POS(pos_x[cx], pos_y[cy])];
+            applicate_color_4bpp(data + MORTON_POS(bx, by) * 8, local_info, buffer);
             int copy_length = (bx < num_blocks_x - 1 ? 4 : copy_length_last) * 4;
-            uint32_t* b = buffer;
+            uint32_t *b = buffer;
             for (int y = h - by * 4 - 1; b < buffer_end && y >= 0; y--, b += 4)
                 memcpy(image + y * w + x, b, copy_length);
         }
     }
 
-    free(morton_table_buf);
-    free(texel_colors);
+    free(morton_table);
+    free(texel_info);
+    return 1;
+}
+
+int decode_pvrtc_2bpp(const uint8_t *data, const int w, const int h, uint32_t *image) {
+    int num_blocks_x = (w + 7) / 8;
+    int num_blocks_y = (h + 3) / 4;
+    int num_blocks = num_blocks_x * num_blocks_y;
+    int copy_length_last = (w + 7) % 8 + 1;
+
+    int *morton_table = (int *)malloc(sizeof(int) * num_blocks);
+    if (morton_table == NULL)
+        return 0;
+    PVRTCTexelInfo *texel_info = (PVRTCTexelInfo *)malloc(sizeof(PVRTCTexelInfo) * num_blocks);
+    if (texel_info == NULL) {
+        free(morton_table);
+        return 0;
+    }
+
+    for (int y = 0; y < num_blocks_y; y++)
+        for (int x = 0; x < num_blocks_x; x++)
+            MORTON_POS(x, y) = morton_index(x, y, num_blocks_x, num_blocks_y);
+
+    const uint8_t *d = data;
+    for (int i = 0; i < num_blocks; i++, d += 8) {
+        get_texel_colors(d, &texel_info[i]);
+        get_texel_weights_2bpp(d, &texel_info[i]);
+    }
+
+    uint32_t buffer[32];
+    uint32_t *buffer_end = buffer + 32;
+    PVRTCTexelInfo *local_info[9];
+    int pos_x[3], pos_y[3];
+    for (int by = 0; by < num_blocks_y; by++) {
+        pos_y[0] = by == 0 ? num_blocks_y - 1 : by - 1;
+        pos_y[1] = by;
+        pos_y[2] = by == num_blocks_y - 1 ? 0 : by + 1;
+        for (int bx = 0, x = 0; bx < num_blocks_x; bx++, x += 8) {
+            pos_x[0] = bx == 0 ? num_blocks_x - 1 : bx - 1;
+            pos_x[1] = bx;
+            pos_x[2] = bx == num_blocks_x - 1 ? 0 : bx + 1;
+            for (int cy = 0, c = 0; cy < 3; cy++)
+                for (int cx = 0; cx < 3; cx++, c++)
+                    local_info[c] = &texel_info[MORTON_POS(pos_x[cx], pos_y[cy])];
+            applicate_color_2bpp(data + MORTON_POS(bx, by) * 8, local_info, buffer);
+            int copy_length = (bx < num_blocks_x - 1 ? 8 : copy_length_last) * 4;
+            uint32_t *b = buffer;
+            for (int y = h - by * 4 - 1; b < buffer_end && y >= 0; y--, b += 8)
+                memcpy(image + y * w + x, b, copy_length);
+        }
+    }
+
+    free(morton_table);
+    free(texel_info);
     return 1;
 }
diff --git a/ext/decoders/native/pvrtc.h b/ext/decoders/native/pvrtc.h
index 398e840..3bfe57e 100644
--- a/ext/decoders/native/pvrtc.h
+++ b/ext/decoders/native/pvrtc.h
@@ -3,6 +3,28 @@
 
 #include <stdint.h>
 
+typedef struct {
+    uint8_t r;
+    uint8_t g;
+    uint8_t b;
+    uint8_t a;
+} PVRTCTexelColor;
+
+typedef struct {
+    int r;
+    int g;
+    int b;
+    int a;
+} PVRTCTexelColorInt;
+
+typedef struct {
+    PVRTCTexelColor a;
+    PVRTCTexelColor b;
+    int8_t weight[32];
+    uint32_t punch_through_flag;
+} PVRTCTexelInfo;
+
 int decode_pvrtc_4bpp(const uint8_t*, const int, const int, uint32_t*);
+int decode_pvrtc_2bpp(const uint8_t*, const int, const int, uint32_t*);
 
 #endif /* end of include guard: PVRTC_H */
diff --git a/lib/mikunyan/decoders/image_decoder.rb b/lib/mikunyan/decoders/image_decoder.rb
index fb8d1bf..f21f5f1 100644
--- a/lib/mikunyan/decoders/image_decoder.rb
+++ b/lib/mikunyan/decoders/image_decoder.rb
@@ -389,8 +389,7 @@ module Mikunyan
       # @param [String] bin binary to decode
       # @return [ChunkyPNG::Image] decoded image
       def self.decode_pvrtc1_2bpp(width, height, bin)
-        # ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_pvrtc1_2bpp(bin, width, height))
-        nil
+        ChunkyPNG::Image.from_rgba_stream(width, height, DecodeHelper.decode_pvrtc1_2bpp(bin, width, height))
       end
 
       # Decode image from ETC1 compressed binary