diff --git a/jadx-plugins/jadx-java-input/src/main/java/jadx/plugins/input/java/data/ConstPoolReader.java b/jadx-plugins/jadx-java-input/src/main/java/jadx/plugins/input/java/data/ConstPoolReader.java index a6e9db9b3..ce991e293 100644 --- a/jadx-plugins/jadx-java-input/src/main/java/jadx/plugins/input/java/data/ConstPoolReader.java +++ b/jadx-plugins/jadx-java-input/src/main/java/jadx/plugins/input/java/data/ConstPoolReader.java @@ -1,6 +1,5 @@ package jadx.plugins.input.java.data; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -23,6 +22,7 @@ import jadx.plugins.input.java.data.attributes.types.JavaBootstrapMethodsAttr; import jadx.plugins.input.java.data.attributes.types.data.RawBootstrapMethod; import jadx.plugins.input.java.utils.DescriptorParser; import jadx.plugins.input.java.utils.JavaClassParseException; +import jadx.plugins.input.java.utils.ModifiedUTF8Decoder; public class ConstPoolReader { private final JavaClassReader clsReader; @@ -235,8 +235,7 @@ public class ConstPoolReader { @NotNull private String parseString(byte[] bytes) { - // TODO: parse modified UTF-8 - return new String(bytes, StandardCharsets.UTF_8); + return ModifiedUTF8Decoder.decodeString(bytes); } private String fixType(String clsName) { diff --git a/jadx-plugins/jadx-java-input/src/main/java/jadx/plugins/input/java/utils/ModifiedUTF8Decoder.java b/jadx-plugins/jadx-java-input/src/main/java/jadx/plugins/input/java/utils/ModifiedUTF8Decoder.java new file mode 100644 index 000000000..c7a2db34f --- /dev/null +++ b/jadx-plugins/jadx-java-input/src/main/java/jadx/plugins/input/java/utils/ModifiedUTF8Decoder.java @@ -0,0 +1,75 @@ +package jadx.plugins.input.java.utils; + +import java.nio.charset.StandardCharsets; + +public class ModifiedUTF8Decoder { + + public static String decodeString(byte[] bytes) { + int len = bytes.length; + // quick check if all chars are 7-bit + boolean asciiStr = true; + for (byte b : bytes) { + if ((b & 0x80) != 0) { + asciiStr = false; + break; + } + } + if (asciiStr) { + return new String(bytes, StandardCharsets.US_ASCII); + } + + // parse modified UTF-8 according jvms-4.4.7 + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < len; i++) { + int x = bytes[i] & 0xff; + // 4.4 ascii characters 1-127 (0 is encoded as 0xc0 0x80) + if ((x & 0x80) == 0) { + // 1 byte 7-Bit ascii (Table 4.4./4.5) + sb.append((char) x); + } else { + if (i + 1 >= len) { + throw new JavaClassParseException("Inconsistent byte array structure: too short"); + } + int y = bytes[i + 1] & 0xff; + // 0 is encoded as 0xc0 0x80 (jvms-4.4.7) + if (x == 0xc0 && y == 0x80) { + sb.appendCodePoint(0); + i++; + } else if ((x & 0xE0) == 0xC0 && (y & 0xC0) == 0x80) { + // 2 byte char (Table 4.8./4.9 ) + sb.appendCodePoint(((x & 0x1f) << 6) + (y & 0x3f)); + i++; + } else if (i + 2 < len) { + int z = bytes[i + 2] & 0xff; + if ((x & 0xF0) == 0xE0 && (y & 0xC0) == 0x80 && (z & 0xC0) == 0x80) { + // 3 byte char (Table 4.11/4.12) + sb.appendCodePoint(((x & 0xf) << 12) + ((y & 0x3f) << 6) + (z & 0x3f)); + i += 2; + } else if (i + 5 < len + && x == 0xED // u + && (y & 0xF0) == 0xA0 // v + && (bytes[i + 3] & 0xff) == 0xED // x + && (bytes[i + 4] & 0xF0) == 0xA0 // y + ) { + // 6 byte encoded Table 4.12. + int u = x; // 0 + int v = y; // 1 + int w = z; // 2 + x = bytes[i + 3] & 0xff; + y = bytes[i + 4] & 0xff; + z = bytes[i + 5] & 0xff; + if (x == 0xED && (y & 0xF0) == 0xA0) { + sb.appendCodePoint(0x10000 + ((v & 0x0f) << 16) + ((w & 0x3f) << 10) + ((y & 0x0f) << 6) + (z & 0x3f)); + i += 5; + } else { + throw new JavaClassParseException("Inconsistent byte array structure: invalid 6 bytes char"); + } + } else { + throw new JavaClassParseException("Inconsistent byte array structure: unexpected char"); + } + } + } + } + return sb.toString(); + } +} diff --git a/jadx-plugins/jadx-java-input/src/test/java/jadx/plugins/input/java/utils/ModifiedUTF8DecoderTest.java b/jadx-plugins/jadx-java-input/src/test/java/jadx/plugins/input/java/utils/ModifiedUTF8DecoderTest.java new file mode 100644 index 000000000..dd148a002 --- /dev/null +++ b/jadx-plugins/jadx-java-input/src/test/java/jadx/plugins/input/java/utils/ModifiedUTF8DecoderTest.java @@ -0,0 +1,29 @@ +package jadx.plugins.input.java.utils; + +import org.junit.jupiter.api.Test; + +import static jadx.plugins.input.java.utils.ModifiedUTF8Decoder.decodeString; +import static org.assertj.core.api.Assertions.assertThat; + +/* + * TODO: find a way to enter 6-bytes char decode branch + */ +class ModifiedUTF8DecoderTest { + + @Test + public void test() { + String str = "aÆřᛒቶ北𝄠😀🨄𐆙"; + byte[] mUTF8Bytes = new byte[] { 97, -61, -122, -59, -103, -31, -101, -110, -31, -119, -74, -17, + -91, -93, -19, -96, -76, -19, -76, -96, -19, -96, -67, -19, -72, + -128, -19, -96, -66, -19, -72, -124, -19, -96, -128, -19, -74, -103 }; + assertThat(decodeString(mUTF8Bytes)).isEqualTo(str); + } + + @Test + public void testASCIIOnly() { + String str = "Hello, world!"; + byte[] mUTF8Bytes = new byte[] { 72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33 }; + assertThat(decodeString(mUTF8Bytes)).isEqualTo(str); + } + +}