From 53be92c616f6e6096ac934fb091a425c83cdd0fe Mon Sep 17 00:00:00 2001 From: Skylot Date: Sat, 3 Jan 2015 17:06:41 +0300 Subject: [PATCH] core: fix decoding UTF-8 strings in xml resources --- .../src/main/java/jadx/api/ResourceType.java | 2 +- .../jadx/core/xmlgen/BinaryXMLParser.java | 227 +++++++++--------- .../java/jadx/core/xmlgen/ParserStream.java | 82 +++++++ .../java/jadx/gui/treemodel/JResource.java | 2 +- 4 files changed, 204 insertions(+), 109 deletions(-) create mode 100644 jadx-core/src/main/java/jadx/core/xmlgen/ParserStream.java diff --git a/jadx-core/src/main/java/jadx/api/ResourceType.java b/jadx-core/src/main/java/jadx/api/ResourceType.java index e27a5974..a0d3b606 100644 --- a/jadx-core/src/main/java/jadx/api/ResourceType.java +++ b/jadx-core/src/main/java/jadx/api/ResourceType.java @@ -36,13 +36,13 @@ public enum ResourceType { case CODE: case ARSC: case LIB: - case XML: case FONT: case IMG: case UNKNOWN: return false; case MANIFEST: + case XML: return true; } return false; diff --git a/jadx-core/src/main/java/jadx/core/xmlgen/BinaryXMLParser.java b/jadx-core/src/main/java/jadx/core/xmlgen/BinaryXMLParser.java index fa88be0a..17f1e4df 100644 --- a/jadx-core/src/main/java/jadx/core/xmlgen/BinaryXMLParser.java +++ b/jadx-core/src/main/java/jadx/core/xmlgen/BinaryXMLParser.java @@ -33,10 +33,33 @@ public class BinaryXMLParser { private static final Logger LOG = LoggerFactory.getLogger(BinaryXMLParser.class); - private static final Charset STRING_CHARSET = Charset.forName("UTF-16LE"); + private static final Charset STRING_CHARSET_UTF16 = Charset.forName("UTF-16LE"); + private static final Charset STRING_CHARSET_UTF8 = Charset.forName("UTF-8"); + + private static final int RES_NULL_TYPE = 0x0000; + private static final int RES_STRING_POOL_TYPE = 0x0001; + private static final int RES_TABLE_TYPE = 0x0002; + + private static final int RES_XML_TYPE = 0x0003; + private static final int RES_XML_FIRST_CHUNK_TYPE = 0x0100; + private static final int RES_XML_START_NAMESPACE_TYPE = 0x0100; + private static final int RES_XML_END_NAMESPACE_TYPE = 0x0101; + private static final int RES_XML_START_ELEMENT_TYPE = 0x0102; + private static final int RES_XML_END_ELEMENT_TYPE = 0x0103; + private static final int RES_XML_CDATA_TYPE = 0x0104; + private static final int RES_XML_LAST_CHUNK_TYPE = 0x017f; + private static final int RES_XML_RESOURCE_MAP_TYPE = 0x0180; + + private static final int RES_TABLE_PACKAGE_TYPE = 0x0200; + private static final int RES_TABLE_TYPE_TYPE = 0x0201; + private static final int RES_TABLE_TYPE_SPEC_TYPE = 0x0202; + + // string pool flags + private static final int SORTED_FLAG = 1; + private static final int UTF8_FLAG = 1 << 8; private CodeWriter writer; - private InputStream input; + private ParserStream is; private String[] strings; private String nsPrefix = "ERROR"; @@ -76,7 +99,7 @@ public class BinaryXMLParser { public synchronized CodeWriter parse(InputStream inputStream) { writer = new CodeWriter(); writer.add(""); - input = inputStream; + is = new ParserStream(inputStream); firstElement = true; try { decode(); @@ -92,37 +115,37 @@ public class BinaryXMLParser { } void decode() throws IOException { - if (cInt16() != 0x0003) { + if (is.readInt16() != 0x0003) { die("Version is not 3"); } - if (cInt16() != 0x0008) { + if (is.readInt16() != 0x0008) { die("Size of header is not 8"); } - cInt32(); - while (input.available() != 0) { - int type = cInt16(); + int size = is.readInt32(); + while (is.getPos() < size) { + int type = is.readInt16(); switch (type) { - case 0x0001: + case RES_NULL_TYPE: + // NullType is just doing nothing + break; + case RES_STRING_POOL_TYPE: parseStringPool(); break; - case 0x0180: + case RES_XML_RESOURCE_MAP_TYPE: parseResourceMap(); break; - case 0x0100: + case RES_XML_START_NAMESPACE_TYPE: parseNameSpace(); break; - case 0x0101: + case RES_XML_END_NAMESPACE_TYPE: parseNameSpaceEnd(); break; - case 0x0102: + case RES_XML_START_ELEMENT_TYPE: parseElement(); break; - case 0x0103: + case RES_XML_END_ELEMENT_TYPE: parseElementEnd(); break; - case 0x0000: - // NullType is just doing nothing - break; default: die("Type: " + Integer.toHexString(type) + " not yet implemented"); @@ -132,68 +155,88 @@ public class BinaryXMLParser { } private void parseStringPool() throws IOException { - if (cInt16() != 0x001c) { + if (is.readInt16() != 0x001c) { die("Header header size not 28"); } - int hsize = cInt32(); - int stringCount = cInt32(); - int styleCount = cInt32(); - int flags = cInt32(); - int stringsStart = cInt32(); - int stylesStart = cInt32(); - int[] stringsOffsets = new int[stringCount]; - for (int i = 0; i < stringCount; i++) { - stringsOffsets[i] = cInt32(); - } + int hsize = is.readInt32(); + int stringCount = is.readInt32(); + int styleCount = is.readInt32(); + int flags = is.readInt32(); + int stringsStart = is.readInt32(); + int stylesStart = is.readInt32(); + // skip string offsets + is.skip(stringCount * 4); strings = new String[stringCount]; - for (int i = 0; i < stringCount; i++) { - int off = 8 + stringsStart + stringsOffsets[i]; - int strlen = cInt16(); - byte[] str = new byte[strlen * 2]; - readToArray(str); - strings[i] = new String(str, STRING_CHARSET); - cInt16(); + if ((flags & UTF8_FLAG) != 0) { + // UTF-8 + long start = is.getPos(); + for (int i = 0; i < stringCount; i++) { + int charsCount = is.decodeLength8(); + int len = is.decodeLength8(); + strings[i] = new String(is.readArray(len), STRING_CHARSET_UTF8); + int zero = is.readInt8(); + if (zero != 0) { + die("Not a trailing zero at string end: " + zero + ", " + strings[i]); + } + } + long shift = is.getPos() - start; + if (shift % 2 != 0) { + is.skip(1); + } + } else { + // UTF-16 + for (int i = 0; i < stringCount; i++) { + int len = is.decodeLength16(); + strings[i] = new String(is.readArray(len * 2), STRING_CHARSET_UTF16); + int zero = is.readInt16(); + if (zero != 0) { + die("Not a trailing zero at string end: " + zero + ", " + strings[i]); + } + } + } + if (styleCount != 0) { + die("Styles parsing in string pool not yet implemented"); } } private void parseResourceMap() throws IOException { - if (cInt16() != 0x8) { + if (is.readInt16() != 0x8) { die("Header size of resmap is not 8!"); } - int rhsize = cInt32(); + int rhsize = is.readInt32(); int[] ids = new int[(rhsize - 8) / 4]; for (int i = 0; i < ids.length; i++) { - ids[i] = cInt32(); + ids[i] = is.readInt32(); } } private void parseNameSpace() throws IOException { - if (cInt16() != 0x10) { + if (is.readInt16() != 0x10) { die("NAMESPACE header is not 0x0010"); } - if (cInt32() != 0x18) { + if (is.readInt32() != 0x18) { die("NAMESPACE header chunk is not 0x18 big"); } - int beginLineNumber = cInt32(); - int comment = cInt32(); - int beginPrefix = cInt32(); + int beginLineNumber = is.readInt32(); + int comment = is.readInt32(); + int beginPrefix = is.readInt32(); nsPrefix = strings[beginPrefix]; - int beginURI = cInt32(); + int beginURI = is.readInt32(); nsURI = strings[beginURI]; } private void parseNameSpaceEnd() throws IOException { - if (cInt16() != 0x10) { + if (is.readInt16() != 0x10) { die("NAMESPACE header is not 0x0010"); } - if (cInt32() != 0x18) { + if (is.readInt32() != 0x18) { die("NAMESPACE header chunk is not 0x18 big"); } - int endLineNumber = cInt32(); - int comment = cInt32(); - int endPrefix = cInt32(); + int endLineNumber = is.readInt32(); + int comment = is.readInt32(); + int endPrefix = is.readInt32(); nsPrefix = strings[endPrefix]; - int endURI = cInt32(); + int endURI = is.readInt32(); nsURI = strings[endURI]; } @@ -203,15 +246,15 @@ public class BinaryXMLParser { } else { writer.incIndent(); } - if (cInt16() != 0x10) { + if (is.readInt16() != 0x10) { die("ELEMENT HEADER SIZE is not 0x10"); } // TODO: Check element chunk size - cInt32(); - int elementBegLineNumber = cInt32(); - int comment = cInt32(); - int startNS = cInt32(); - int startNSName = cInt32(); // actually is elementName... + is.readInt32(); + int elementBegLineNumber = is.readInt32(); + int comment = is.readInt32(); + int startNS = is.readInt32(); + int startNSName = is.readInt32(); // actually is elementName... if (!wasOneLiner && !"ERROR".equals(currentTag) && !currentTag.equals(strings[startNSName])) { writer.add(">"); } @@ -219,18 +262,18 @@ public class BinaryXMLParser { currentTag = strings[startNSName]; writer.startLine("<").add(strings[startNSName]); writer.attachSourceLine(elementBegLineNumber); - int attributeStart = cInt16(); + int attributeStart = is.readInt16(); if (attributeStart != 0x14) { die("startNS's attributeStart is not 0x14"); } - int attributeSize = cInt16(); + int attributeSize = is.readInt16(); if (attributeSize != 0x14) { die("startNS's attributeSize is not 0x14"); } - int attributeCount = cInt16(); - int idIndex = cInt16(); - int classIndex = cInt16(); - int styleIndex = cInt16(); + int attributeCount = is.readInt16(); + int idIndex = is.readInt16(); + int classIndex = is.readInt16(); + int styleIndex = is.readInt16(); if ("manifest".equals(strings[startNSName])) { writer.add(" xmlns:\"").add(nsURI).add("\""); } @@ -247,18 +290,18 @@ public class BinaryXMLParser { } private void parseAttribute(int i) throws IOException { - int attributeNS = cInt32(); - int attributeName = cInt32(); - int attributeRawValue = cInt32(); - int attrValSize = cInt16(); + int attributeNS = is.readInt32(); + int attributeName = is.readInt32(); + int attributeRawValue = is.readInt32(); + int attrValSize = is.readInt16(); if (attrValSize != 0x08) { die("attrValSize != 0x08 not supported"); } - if (cInt8() != 0) { + if (is.readInt8() != 0) { die("res0 is not 0"); } - int attrValDataType = cInt8(); - int attrValData = cInt32(); + int attrValDataType = is.readInt8(); + int attrValData = is.readInt32(); if (attributeNS != -1) { writer.add(nsPrefix).add(':'); } @@ -319,16 +362,16 @@ public class BinaryXMLParser { } private void parseElementEnd() throws IOException { - if (cInt16() != 0x10) { + if (is.readInt16() != 0x10) { die("ELEMENT END header is not 0x10"); } - if (cInt32() != 0x18) { + if (is.readInt32() != 0x18) { die("ELEMENT END header chunk is not 0x18 big"); } - int endLineNumber = cInt32(); - int comment = cInt32(); - int elementNS = cInt32(); - int elementName = cInt32(); + int endLineNumber = is.readInt32(); + int comment = is.readInt32(); + int elementNS = is.readInt32(); + int elementName = is.readInt32(); if (currentTag.equals(strings[elementName])) { writer.add(" />"); wasOneLiner = true; @@ -345,38 +388,8 @@ public class BinaryXMLParser { } } - private int cInt8() throws IOException { - return input.read(); - } - - private int cInt16() throws IOException { - int b1 = input.read(); - int b2 = input.read(); - return (b2 & 0xFF) << 8 | (b1 & 0xFF); - } - - private int cInt32() throws IOException { - InputStream in = input; - int b1 = in.read(); - int b2 = in.read(); - int b3 = in.read(); - int b4 = in.read(); - return b4 << 24 | (b3 & 0xFF) << 16 | (b2 & 0xFF) << 8 | (b1 & 0xFF); - } - - private void readToArray(byte[] arr) throws IOException { - int count = arr.length; - int pos = input.read(arr, 0, count); - while (pos < count) { - int read = input.read(arr, pos, count - pos); - if (read == -1) { - throw new IOException("No data, can't read " + count + " bytes"); - } - pos += read; - } - } - private void die(String message) { - throw new JadxRuntimeException("Decode error: " + message); + throw new JadxRuntimeException("Decode error: " + message + + ", position: 0x" + Long.toHexString(is.getPos())); } } diff --git a/jadx-core/src/main/java/jadx/core/xmlgen/ParserStream.java b/jadx-core/src/main/java/jadx/core/xmlgen/ParserStream.java new file mode 100644 index 00000000..e1fd2d5e --- /dev/null +++ b/jadx-core/src/main/java/jadx/core/xmlgen/ParserStream.java @@ -0,0 +1,82 @@ +package jadx.core.xmlgen; + +import java.io.IOException; +import java.io.InputStream; + +public class ParserStream { + + private final InputStream input; + private long readPos = 0; + + public ParserStream(InputStream inputStream) { + this.input = inputStream; + } + + public long getPos() { + return readPos; + } + + public int readInt8() throws IOException { + readPos++; + return input.read(); + } + + public int readInt16() throws IOException { + readPos += 2; + int b1 = input.read(); + int b2 = input.read(); + return (b2 & 0xFF) << 8 | (b1 & 0xFF); + } + + public int readInt32() throws IOException { + readPos += 4; + InputStream in = input; + int b1 = in.read(); + int b2 = in.read(); + int b3 = in.read(); + int b4 = in.read(); + return b4 << 24 | (b3 & 0xFF) << 16 | (b2 & 0xFF) << 8 | (b1 & 0xFF); + } + + public byte[] readArray(int count) throws IOException { + readPos += count; + byte[] arr = new byte[count]; + int pos = input.read(arr, 0, count); + while (pos < count) { + int read = input.read(arr, pos, count - pos); + if (read == -1) { + throw new IOException("No data, can't read " + count + " bytes"); + } + pos += read; + } + return arr; + } + + public void skip(long count) throws IOException { + readPos += count; + long pos = input.skip(count); + while (pos < count) { + long skipped = input.skip(count - pos); + if (skipped == -1) { + throw new IOException("No data, can't skip " + count + " bytes"); + } + pos += skipped; + } + } + + public int decodeLength8() throws IOException { + int len = readInt8(); + if ((len & 0x80) != 0) { + len = ((len & 0x7F) << 8) | readInt8(); + } + return len; + } + + public int decodeLength16() throws IOException { + int len = readInt16(); + if ((len & 0x8000) != 0) { + len = ((len & 0x7FFF) << 16) | readInt16(); + } + return len; + } +} diff --git a/jadx-gui/src/main/java/jadx/gui/treemodel/JResource.java b/jadx-gui/src/main/java/jadx/gui/treemodel/JResource.java index da5fed73..188306eb 100644 --- a/jadx-gui/src/main/java/jadx/gui/treemodel/JResource.java +++ b/jadx-gui/src/main/java/jadx/gui/treemodel/JResource.java @@ -144,7 +144,6 @@ public class JResource extends JNode implements Comparable { private boolean isSupportedForView(ResourceType type) { switch (type) { case CODE: - case XML: case ARSC: case FONT: case IMG: @@ -152,6 +151,7 @@ public class JResource extends JNode implements Comparable { return false; case MANIFEST: + case XML: case UNKNOWN: return true; } -- GitLab