提交 cb29af1f 编写于 作者: S sherman

6831794: charset EUC_TW is 12.6% of the total size of charsets.jar

6229811: Several codepoints in EUC_TW failed in roundtrip conversion
Summary: Re-write EUC_TW charset to address the size and roundtrip issue.
Reviewed-by: alanb
上级 a7445907
...@@ -827,8 +827,7 @@ CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar ...@@ -827,8 +827,7 @@ CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar
$(FILES_gensbcs_out): $(GENCSSRC)/SingleByte-X.java $(GENCSSRC)/sbcs $(FILES_gensbcs_out): $(GENCSSRC)/SingleByte-X.java $(GENCSSRC)/sbcs
@$(prep-target) @$(prep-target)
$(BOOT_JAVA_CMD) -cp $(CHARSETMAPPING_JARFILE) build.tools.charsetmapping.GenerateSBCS \ $(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSSRC) $(SCS_GEN) sbcs
$(GENCSSRC) $(SCS_GEN) sbcs
# #
# Generated file system implementation classes (Unix only) # Generated file system implementation classes (Unix only)
......
...@@ -58,11 +58,12 @@ ifdef OPENJDK ...@@ -58,11 +58,12 @@ ifdef OPENJDK
else else
RENDER_SUBDIR = dcpr RENDER_SUBDIR = dcpr
endif endif
# nio need to be compiled before awt to have all charsets ready
SUBDIRS = jar security javazic misc net audio $(RENDER_SUBDIR) image \ SUBDIRS = jar security javazic misc net audio $(RENDER_SUBDIR) image \
awt splashscreen $(XAWT_SUBDIR) \ nio awt splashscreen $(XAWT_SUBDIR) \
$(HEADLESS_SUBDIR) $(DGA_SUBDIR) \ $(HEADLESS_SUBDIR) $(DGA_SUBDIR) \
font jpeg cmm applet rmi beans $(JDBC_SUBDIR) \ font jpeg cmm applet rmi beans $(JDBC_SUBDIR) \
jawt text nio launcher management $(ORG_SUBDIR) \ jawt text launcher management $(ORG_SUBDIR) \
native2ascii serialver tools jconsole tracing native2ascii serialver tools jconsole tracing
all build clean clobber:: all build clean clobber::
......
...@@ -304,7 +304,7 @@ FILES_src = \ ...@@ -304,7 +304,7 @@ FILES_src = \
sun/io/CharToByteMacUkraine.java \ sun/io/CharToByteMacUkraine.java \
sun/io/CharToByteTIS620.java sun/io/CharToByteTIS620.java
FILES_gen_extsbcs = \ FILES_gen_extcs = \
sun/nio/cs/ext/IBM037.java \ sun/nio/cs/ext/IBM037.java \
sun/nio/cs/ext/IBM1006.java \ sun/nio/cs/ext/IBM1006.java \
sun/nio/cs/ext/IBM1025.java \ sun/nio/cs/ext/IBM1025.java \
...@@ -374,6 +374,8 @@ FILES_gen_extsbcs = \ ...@@ -374,6 +374,8 @@ FILES_gen_extsbcs = \
sun/nio/cs/ext/MacThai.java \ sun/nio/cs/ext/MacThai.java \
sun/nio/cs/ext/MacTurkish.java \ sun/nio/cs/ext/MacTurkish.java \
sun/nio/cs/ext/MacUkraine.java \ sun/nio/cs/ext/MacUkraine.java \
sun/nio/cs/ext/TIS_620.java sun/nio/cs/ext/TIS_620.java \
sun/nio/cs/ext/EUC_TWMapping.java
FILES_java = $(FILES_src) $(FILES_gen_extcs)
FILES_java = $(FILES_src) $(FILES_gen_extsbcs)
\ No newline at end of file
...@@ -61,14 +61,14 @@ endif # PLATFORM ...@@ -61,14 +61,14 @@ endif # PLATFORM
CHARSETS_JAR = $(LIBDIR)/charsets.jar CHARSETS_JAR = $(LIBDIR)/charsets.jar
# extsbcs # extsbcs
FILES_genout_extsbcs = $(FILES_gen_extsbcs:%.java=$(GENSRCDIR)/%.java) FILES_genout_extcs = $(FILES_gen_extcs:%.java=$(GENSRCDIR)/%.java)
# #
# Rules # Rules
# #
include $(BUILDDIR)/common/Classes.gmk include $(BUILDDIR)/common/Classes.gmk
build: $(FILES_genout_extsbcs) $(CHARSETS_JAR) build: $(FILES_genout_extcs) $(CHARSETS_JAR)
# #
# Extra rules to build character converters. # Extra rules to build character converters.
...@@ -77,6 +77,7 @@ SERVICE_DESCRIPTION = java.nio.charset.spi.CharsetProvider ...@@ -77,6 +77,7 @@ SERVICE_DESCRIPTION = java.nio.charset.spi.CharsetProvider
SERVICE_DESCRIPTION_PATH = META-INF/services/$(SERVICE_DESCRIPTION) SERVICE_DESCRIPTION_PATH = META-INF/services/$(SERVICE_DESCRIPTION)
GENCSDATASRC = $(BUILDDIR)/tools/CharsetMapping GENCSDATASRC = $(BUILDDIR)/tools/CharsetMapping
GENCSSRCDIR = $(BUILDDIR)/tools/src/build/tools/charsetmapping
GENCSEXT = $(GENSRCDIR)/sun/nio/cs/ext GENCSEXT = $(GENSRCDIR)/sun/nio/cs/ext
FILES_MAP = $(GENCSDATASRC)/sjis0213.map FILES_MAP = $(GENCSDATASRC)/sjis0213.map
...@@ -86,16 +87,16 @@ CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar ...@@ -86,16 +87,16 @@ CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar
$(FILES_DAT): $(FILES_MAP) $(FILES_DAT): $(FILES_MAP)
@$(prep-target) @$(prep-target)
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) \ $(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) \
$(FILES_MAP) $(FILES_DAT) $(FILES_MAP) $(FILES_DAT) sjis0213
$(FILES_genout_extsbcs): $(GENCSDATASRC)/SingleByte-X.java $(GENCSDATASRC)/extsbcs $(FILES_genout_extcs): $(GENCSDATASRC)/SingleByte-X.java $(GENCSDATASRC)/extsbcs
@$(prep-target) @$(prep-target)
$(RM) -r $(GENCSEXT) $(RM) -r $(GENCSEXT)
$(MKDIR) -p $(GENCSEXT) $(MKDIR) -p $(GENCSEXT)
$(BOOT_JAVA_CMD) -cp $(CHARSETMAPPING_JARFILE) build.tools.charsetmapping.GenerateSBCS \ $(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSDATASRC) $(GENCSEXT) extsbcs
$(GENCSDATASRC) $(GENCSEXT) extsbcs $(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSDATASRC) $(GENCSEXT) euctw \
$(GENCSSRCDIR)/GenerateEUC_TW.java
$(CLASSDESTDIR)/$(SERVICE_DESCRIPTION_PATH): \ $(CLASSDESTDIR)/$(SERVICE_DESCRIPTION_PATH): \
$(SHARE_SRC)/classes/sun/nio/cs/ext/$(SERVICE_DESCRIPTION_PATH) $(SHARE_SRC)/classes/sun/nio/cs/ext/$(SERVICE_DESCRIPTION_PATH)
......
...@@ -34,7 +34,7 @@ PROGRAM = charsetmapping ...@@ -34,7 +34,7 @@ PROGRAM = charsetmapping
include $(BUILDDIR)/common/Defs.gmk include $(BUILDDIR)/common/Defs.gmk
BUILDTOOL_SOURCE_ROOT = $(BUILDDIR)/tools/src BUILDTOOL_SOURCE_ROOT = $(BUILDDIR)/tools/src
BUILDTOOL_MAIN = $(PKGDIR)/GenerateMapping.java BUILDTOOL_MAIN = $(PKGDIR)/Main.java
# #
# Build tool jar rules. # Build tool jar rules.
......
...@@ -36,7 +36,7 @@ import java.util.*; ...@@ -36,7 +36,7 @@ import java.util.*;
public class CharsetMapping { public class CharsetMapping {
public final static char UNMAPPABLE_DECODING = '\uFFFD'; public final static char UNMAPPABLE_DECODING = '\uFFFD';
public final static int UNMAPPABLE_ENCODING = -1; public final static int UNMAPPABLE_ENCODING = 0xFFFD;
public static class Entry { public static class Entry {
public int bs; //byte sequence reps public int bs; //byte sequence reps
......
...@@ -27,15 +27,11 @@ package build.tools.charsetmapping; ...@@ -27,15 +27,11 @@ package build.tools.charsetmapping;
import java.io.*; import java.io.*;
import java.util.regex.*; import java.util.regex.*;
import build.tools.charsetmapping.GenerateSBCS;
import static build.tools.charsetmapping.CharsetMapping.*; import static build.tools.charsetmapping.CharsetMapping.*;
public class GenerateMapping { public class GenerateMapping {
public static void main(String argv[]) throws IOException {
if (argv.length < 2) { public static void genMapping(String argv[]) throws IOException {
System.out.println("Usage: java GenerateMapping fMap fDat");
System.exit(1);
}
genDataJIS0213(new FileInputStream(argv[0]), genDataJIS0213(new FileInputStream(argv[0]),
new FileOutputStream(argv[1])); new FileOutputStream(argv[1]));
} }
......
...@@ -34,11 +34,8 @@ import java.nio.charset.*; ...@@ -34,11 +34,8 @@ import java.nio.charset.*;
import static build.tools.charsetmapping.CharsetMapping.*; import static build.tools.charsetmapping.CharsetMapping.*;
public class GenerateSBCS { public class GenerateSBCS {
public static void main(String args[]) throws Exception {
if (args.length < 3) { public static void genSBCS(String args[]) throws Exception {
System.err.println("Usage: java GenSBCS srcDir dstDir config");
System.exit(1);
}
Scanner s = new Scanner(new File(args[0], args[2])); Scanner s = new Scanner(new File(args[0], args[2]));
while (s.hasNextLine()) { while (s.hasNextLine()) {
......
...@@ -39,30 +39,11 @@ public class ByteToCharEUC_TW extends ByteToCharConverter ...@@ -39,30 +39,11 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
private final byte G4 = 4; private final byte G4 = 4;
private final byte MSB = (byte) 0x80; private final byte MSB = (byte) 0x80;
private final byte SS2 = (byte) 0x8E; private final byte SS2 = (byte) 0x8E;
private final byte P2 = (byte) 0xA2;
private final byte P3 = (byte) 0xA3;
protected final char REPLACE_CHAR = '\uFFFD';
private byte firstByte = 0, state = G0; private byte firstByte = 0, state = G0;
public static String unicodeCNS2, unicodeCNS3;
private static String unicodeCNS4, unicodeCNS5, unicodeCNS6;
private static String unicodeCNS7, unicodeCNS15;
private int cnsPlane = 0; private int cnsPlane = 0;
private final static EUC_TW nioCoder = new EUC_TW();
public static String unicodeCNS1 = nioCoder.getUnicodeCNS1(); private EUC_TW.Decoder dec = (EUC_TW.Decoder)(new EUC_TW().newDecoder());
static String[] cnsChars = {
unicodeCNS2 = nioCoder.getUnicodeCNS2(),
unicodeCNS3 = nioCoder.getUnicodeCNS3(),
unicodeCNS4 = nioCoder.getUnicodeCNS4(),
unicodeCNS5 = nioCoder.getUnicodeCNS5(),
unicodeCNS6 = nioCoder.getUnicodeCNS6(),
unicodeCNS7 = nioCoder.getUnicodeCNS7(),
unicodeCNS15 = nioCoder.getUnicodeCNS15()
};
public ByteToCharEUC_TW() { public ByteToCharEUC_TW() {
} }
...@@ -81,6 +62,7 @@ public class ByteToCharEUC_TW extends ByteToCharConverter ...@@ -81,6 +62,7 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
} }
public void reset() { public void reset() {
dec.reset();
state = G0; state = G0;
firstByte = 0; firstByte = 0;
byteOff = charOff = 0; byteOff = charOff = 0;
...@@ -95,7 +77,7 @@ public class ByteToCharEUC_TW extends ByteToCharConverter ...@@ -95,7 +77,7 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
ConversionBufferFullException ConversionBufferFullException
{ {
int inputSize = 0; int inputSize = 0;
char outputChar = (char) 0; char[] c1 = new char[1];
byteOff = inOff; byteOff = inOff;
charOff = outOff; charOff = outOff;
...@@ -104,11 +86,12 @@ public class ByteToCharEUC_TW extends ByteToCharConverter ...@@ -104,11 +86,12 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
while (byteOff < inEnd) { while (byteOff < inEnd) {
if (charOff >= outEnd) if (charOff >= outEnd)
throw new ConversionBufferFullException(); throw new ConversionBufferFullException();
char[] outputChar = null;
switch (state) { switch (state) {
case G0: case G0:
if ( (input[byteOff] & MSB) == 0) { // ASCII if ( (input[byteOff] & MSB) == 0) { // ASCII
outputChar = (char) input[byteOff]; outputChar = c1;
outputChar[0] = (char) input[byteOff];
} else if (input[byteOff] == SS2) { // Codeset 2 } else if (input[byteOff] == SS2) { // Codeset 2
state = G2; state = G2;
} else { // Codeset 1 } else { // Codeset 1
...@@ -119,9 +102,10 @@ public class ByteToCharEUC_TW extends ByteToCharConverter ...@@ -119,9 +102,10 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
case G1: case G1:
inputSize = 2; inputSize = 2;
if ( (input[byteOff] & MSB) != 0) { // 2nd byte if ( (input[byteOff] & MSB) != 0) { // 2nd byte
cnsPlane = 1; cnsPlane = 0;
outputChar = convToUnicode(firstByte, outputChar = dec.toUnicode(firstByte & 0xff,
input[byteOff], unicodeCNS1); input[byteOff] & 0xff,
cnsPlane);
} else { // Error } else { // Error
badInputLength = 1; badInputLength = 1;
throw new MalformedInputException(); throw new MalformedInputException();
...@@ -154,9 +138,9 @@ public class ByteToCharEUC_TW extends ByteToCharConverter ...@@ -154,9 +138,9 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
break; break;
case G4: case G4:
if ( (input[byteOff] & MSB) != 0) { // 2nd byte if ( (input[byteOff] & MSB) != 0) { // 2nd byte
outputChar = convToUnicode(firstByte, outputChar = dec.toUnicode(firstByte & 0xff,
input[byteOff], input[byteOff] & 0xff,
cnsChars[cnsPlane - 2]); cnsPlane - 1);
} else { // Error } else { // Error
badInputLength = 3; badInputLength = 3;
throw new MalformedInputException(); throw new MalformedInputException();
...@@ -166,21 +150,19 @@ public class ByteToCharEUC_TW extends ByteToCharConverter ...@@ -166,21 +150,19 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
break; break;
} }
byteOff++; byteOff++;
if (state == G0) {
if (outputChar != (char) 0) { if (outputChar == null) {
if (outputChar == REPLACE_CHAR) { if (subMode) { // substitution enabled
if (subMode) // substitution enabled outputChar = c1;
outputChar = subChars[0]; outputChar[0] = subChars[0];
else { } else {
badInputLength = inputSize; badInputLength = inputSize;
throw new UnknownCharacterException(); throw new UnknownCharacterException();
} }
} }
output[charOff++] = outputChar; output[charOff++] = outputChar[0];
outputChar = 0;
} }
} }
return charOff - outOff; return charOff - outOff;
} }
...@@ -191,25 +173,4 @@ public class ByteToCharEUC_TW extends ByteToCharConverter ...@@ -191,25 +173,4 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
public String getCharacterEncoding() { public String getCharacterEncoding() {
return "EUC_TW"; return "EUC_TW";
} }
protected char convToUnicode(byte byte1, byte byte2, String table)
{
int index;
if ((byte1 & 0xff) < 0xa1 || (byte2 & 0xff) < 0xa1 ||
(byte1 & 0xff) > 0xfe || (byte2 & 0xff) > 0xfe)
return REPLACE_CHAR;
index = (((byte1 & 0xff) - 0xa1) * 94) + (byte2 & 0xff) - 0xa1;
if (index < 0 || index >= table.length())
return REPLACE_CHAR;
// Planes 3 and above containing zero value lead byte
// to accommodate surrogates for mappings which decode to a surrogate
// pair
if (this.cnsPlane >= 3)
index = (index * 2) + 1;
return table.charAt(index);
}
} }
...@@ -33,19 +33,7 @@ import sun.nio.cs.ext.EUC_TW; ...@@ -33,19 +33,7 @@ import sun.nio.cs.ext.EUC_TW;
public class CharToByteEUC_TW extends CharToByteConverter public class CharToByteEUC_TW extends CharToByteConverter
{ {
private final byte MSB = (byte)0x80; private final EUC_TW.Encoder enc = (EUC_TW.Encoder)(new EUC_TW().newEncoder());
private final byte SS2 = (byte) 0x8E;
private final byte P2 = (byte) 0xA2;
private final byte P3 = (byte) 0xA3;
private final static EUC_TW nioCoder = new EUC_TW();
private static String uniTab1 = nioCoder.getUniTab1();
private static String uniTab2 = nioCoder.getUniTab2();
private static String uniTab3 = nioCoder.getUniTab3();
private static String cnsTab1 = nioCoder.getCNSTab1();
private static String cnsTab2 = nioCoder.getCNSTab2();
private static String cnsTab3 = nioCoder.getCNSTab3();
public int flush(byte[] output, int outStart, int outEnd) public int flush(byte[] output, int outStart, int outEnd)
throws MalformedInputException throws MalformedInputException
...@@ -59,10 +47,7 @@ public class CharToByteEUC_TW extends CharToByteConverter ...@@ -59,10 +47,7 @@ public class CharToByteEUC_TW extends CharToByteConverter
} }
public boolean canConvert(char ch){ public boolean canConvert(char ch){
if (((0xFF00 & ch) != 0) && (getNative(ch) != -1)){ return enc.canEncode(ch);
return true;
}
return false;
} }
/** /**
...@@ -74,9 +59,8 @@ public class CharToByteEUC_TW extends CharToByteConverter ...@@ -74,9 +59,8 @@ public class CharToByteEUC_TW extends CharToByteConverter
ConversionBufferFullException ConversionBufferFullException
{ {
int outputSize; int outputSize;
byte [] tmpbuf = new byte[4]; byte [] tmpbuf = new byte[4];;
byte [] outputByte; byte [] outputByte;
byteOff = outOff; byteOff = outOff;
//Fixed 4122961 by bringing the charOff++ out to this //Fixed 4122961 by bringing the charOff++ out to this
...@@ -88,7 +72,7 @@ public class CharToByteEUC_TW extends CharToByteConverter ...@@ -88,7 +72,7 @@ public class CharToByteEUC_TW extends CharToByteConverter
outputSize = 1; outputSize = 1;
outputByte[0] = (byte)(input[charOff] & 0x7f); outputByte[0] = (byte)(input[charOff] & 0x7f);
} else { } else {
outputSize = unicodeToEUC(input[charOff], outputByte); outputSize = enc.toEUC(input[charOff], outputByte);
} }
if (outputSize == -1) { if (outputSize == -1) {
...@@ -112,7 +96,6 @@ public class CharToByteEUC_TW extends CharToByteConverter ...@@ -112,7 +96,6 @@ public class CharToByteEUC_TW extends CharToByteConverter
} }
/** /**
* returns the maximum number of bytes needed to convert a char * returns the maximum number of bytes needed to convert a char
*/ */
...@@ -120,111 +103,10 @@ public class CharToByteEUC_TW extends CharToByteConverter ...@@ -120,111 +103,10 @@ public class CharToByteEUC_TW extends CharToByteConverter
return 4; return 4;
} }
/** /**
* Return the character set ID * Return the character set ID
*/ */
public String getCharacterEncoding() { public String getCharacterEncoding() {
return "EUC_TW"; return "EUC_TW";
} }
protected int getNative(char unicode) {
int i,
cns; // 2 chars in CNS table make 1 CNS code
if (unicode < UniTab2[0]) {
if ((i = searchTab(unicode, UniTab1)) == -1)
return -1;
cns = (CNSTab1[2*i] << 16) + CNSTab1[2*i+1];
return cns;
} else if (unicode < UniTab3[0]) {
if ((i = searchTab(unicode, UniTab2)) == -1)
return -1;
cns = (CNSTab2[2*i] << 16) + CNSTab2[2*i+1];
return cns;
} else {
if ((i = searchTab(unicode, UniTab3)) == -1)
return -1;
cns = (CNSTab3[2*i] << 16) + CNSTab3[2*i+1];
return cns;
}
}
protected int searchTab(char code, char [] table) {
int i = 0, l, h;
for (l = 0, h = table.length - 1; l < h; ) {
if (table[l] == code) {
i = l;
break;
}
if (table[h] == code) {
i = h;
break;
}
i = (l + h) / 2;
if (table[i] == code)
break;
if (table[i] < code)
l = i + 1;
else h = i - 1;
}
if (code == table[i]) {
return i;
} else {
return -1;
}
}
private int unicodeToEUC(char unicode, byte ebyte[]) {
int cns = getNative(unicode);
if ((cns >> 16) == 0x01) { // Plane 1
ebyte[0] = (byte) (((cns & 0xff00) >> 8) | MSB);
ebyte[1] = (byte) ((cns & 0xff) | MSB);
return 2;
}
byte cnsPlane = (byte)(cns >> 16);
if (cnsPlane >= (byte)0x02) { // Plane 2
ebyte[0] = SS2;
ebyte[1] = (byte) (cnsPlane | (byte)0xA0);
ebyte[2] = (byte) (((cns & 0xff00) >> 8) | MSB);
ebyte[3] = (byte) ((cns & 0xff) | MSB);
return 4;
}
return -1;
}
protected int unicodeToEUC(char unicode) {
if (unicode <= 0x7F) { // ASCII falls into EUC_TW CS0
return unicode;
}
int cns = getNative(unicode);
int plane = cns >> 16;
int euc = (cns & 0x0000FFFF) | 0x00008080;
if (plane == 1) {
return euc;
} else if (plane == 2) {
return ((SS2 << 24) & 0xFF000000) | ((P2 << 16) & 0x00FF0000) |
euc;
} else if (plane == 3) {
return ((SS2 << 24) & 0xFF000000) | ((P3 << 16) & 0x00FF0000) |
euc;
}
return -1;
}
private char [] UniTab1 = uniTab1.toCharArray();
private char [] UniTab2 = uniTab2.toCharArray();
private char [] UniTab3 = uniTab3.toCharArray();
private char [] CNSTab1 = cnsTab1.toCharArray();
private char [] CNSTab2 = cnsTab2.toCharArray();
private char [] CNSTab3 = cnsTab3.toCharArray();
} }
因为 它太大了无法显示 source diff 。你可以改为 查看blob
...@@ -389,8 +389,8 @@ abstract class ISO2022 ...@@ -389,8 +389,8 @@ abstract class ISO2022
protected static class Encoder extends CharsetEncoder { protected static class Encoder extends CharsetEncoder {
private final Surrogate.Parser sgp = new Surrogate.Parser(); private final Surrogate.Parser sgp = new Surrogate.Parser();
private final byte SS2 = (byte)0x8e; private final byte SS2 = (byte)0x8e;
private final byte P2 = (byte)0xA2; private final byte PLANE2 = (byte)0xA2;
private final byte P3 = (byte)0xA3; private final byte PLANE3 = (byte)0xA3;
private final byte MSB = (byte)0x80; private final byte MSB = (byte)0x80;
protected final byte maximumDesignatorLength = 4; protected final byte maximumDesignatorLength = 4;
...@@ -460,32 +460,32 @@ abstract class ISO2022 ...@@ -460,32 +460,32 @@ abstract class ISO2022
ebyte[index++] = (byte)(convByte[0] & 0x7f); ebyte[index++] = (byte)(convByte[0] & 0x7f);
ebyte[index++] = (byte)(convByte[1] & 0x7f); ebyte[index++] = (byte)(convByte[1] & 0x7f);
} else { } else {
if((convByte[0] == SS2) && (convByte[1] == P2)) { if(convByte[0] == SS2) {
if (!SS2DesDefined) { if (convByte[1] == PLANE2) {
newSS2DesDefined = true; if (!SS2DesDefined) {
ebyte[0] = ISO_ESC; newSS2DesDefined = true;
tmpByte = SS2Desig.getBytes(); ebyte[0] = ISO_ESC;
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length); tmpByte = SS2Desig.getBytes();
index = tmpByte.length+1; System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
} index = tmpByte.length+1;
ebyte[index++] = ISO_ESC; }
ebyte[index++] = ISO_SS2_7; ebyte[index++] = ISO_ESC;
ebyte[index++] = (byte)(convByte[2] & 0x7f); ebyte[index++] = ISO_SS2_7;
ebyte[index++] = (byte)(convByte[3] & 0x7f); ebyte[index++] = (byte)(convByte[2] & 0x7f);
} ebyte[index++] = (byte)(convByte[3] & 0x7f);
if((convByte[0] == SS2)&&(convByte[1] == 0xA3)) } else if (convByte[1] == PLANE3) {
{ if(!SS3DesDefined){
if(!SS3DesDefined){ newSS3DesDefined = true;
newSS3DesDefined = true; ebyte[0] = ISO_ESC;
ebyte[0] = ISO_ESC; tmpByte = SS3Desig.getBytes();
tmpByte = SS3Desig.getBytes(); System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length); index = tmpByte.length+1;
index = tmpByte.length+1; }
ebyte[index++] = ISO_ESC;
ebyte[index++] = ISO_SS3_7;
ebyte[index++] = (byte)(convByte[2] & 0x7f);
ebyte[index++] = (byte)(convByte[3] & 0x7f);
} }
ebyte[index++] = ISO_ESC;
ebyte[index++] = ISO_SS3_7;
ebyte[index++] = (byte)(convByte[2] & 0x7f);
ebyte[index++] = (byte)(convByte[3] & 0x7f);
} }
} }
return index; return index;
......
...@@ -105,17 +105,19 @@ public class ISO2022_CN ...@@ -105,17 +105,19 @@ public class ISO2022_CN
private char cnsDecode(byte byte1, byte byte2, byte SS) { private char cnsDecode(byte byte1, byte byte2, byte SS) {
byte1 |= MSB; byte1 |= MSB;
byte2 |= MSB; byte2 |= MSB;
if (SS == ISO_SS2_7) { int p = 0;
return cnsDecoder.convToUnicode(byte1, byte2, if (SS == ISO_SS2_7)
cnsDecoder.unicodeCNS2); p = 1; //plane 2, index -- 1
else if (SS == ISO_SS3_7)
} else { //SS == ISO_SS3_7 p = 2; //plane 3, index -- 2
char[] outSurr = cnsDecoder.convToSurrogate(byte1, byte2, else
cnsDecoder.unicodeCNS3); return REPLACE_CHAR; //never happen.
if (outSurr == null || outSurr[0] != '\u0000') char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
return REPLACE_CHAR; byte2 & 0xff,
return outSurr[1]; p);
} if (ret == null || ret.length == 2)
return REPLACE_CHAR;
return ret[0];
} }
private char SODecode(byte byte1, byte byte2, byte SOD) { private char SODecode(byte byte1, byte byte2, byte SOD) {
...@@ -125,9 +127,12 @@ public class ISO2022_CN ...@@ -125,9 +127,12 @@ public class ISO2022_CN
return gb2312Decoder.decodeDouble(byte1 & 0xff, return gb2312Decoder.decodeDouble(byte1 & 0xff,
byte2 & 0xff); byte2 & 0xff);
} else { // SOD == SODesigCNS } else { // SOD == SODesigCNS
return cnsDecoder.convToUnicode(byte1, char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
byte2, byte2 & 0xff,
cnsDecoder.unicodeCNS1); 0);
if (ret == null)
return REPLACE_CHAR;
return ret[0];
} }
} }
......
/* /*
* Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved. * Copyright 2001-2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -66,16 +66,19 @@ public abstract class X11CNS11643 extends Charset { ...@@ -66,16 +66,19 @@ public abstract class X11CNS11643 extends Charset {
super(cs); super(cs);
this.plane = plane; this.plane = plane;
} }
private byte[] bb = new byte[4];
public boolean canEncode(char c) { public boolean canEncode(char c) {
if (c <= 0x7F) { if (c <= 0x7F) {
return false; return false;
} }
int p = getNative(c) >> 16; int nb = toEUC(c, bb);
if (p == 1 && plane == 0 || if (nb == -1)
p == 2 && plane == 2 || return false;
p == 3 && plane == 3) int p = 0;
return true; if (nb == 4)
return false; p = (bb[1] & 0xff) - 0xa0;
return (p == plane);
} }
public boolean isLegalReplacement(byte[] repl) { public boolean isLegalReplacement(byte[] repl) {
...@@ -93,19 +96,26 @@ public abstract class X11CNS11643 extends Charset { ...@@ -93,19 +96,26 @@ public abstract class X11CNS11643 extends Charset {
try { try {
while (sp < sl) { while (sp < sl) {
char c = sa[sp]; char c = sa[sp];
if (c >= '\uFFFE' || c <= '\u007f') if ( c > '\u007f'&& c < '\uFFFE') {
return CoderResult.unmappableForLength(1); int nb = toEUC(c, bb);
int cns = getNative(c); if (nb != -1) {
int p = cns >> 16; int p = 0;
if (p == 1 && plane == 0 || if (nb == 4)
p == 2 && plane == 2 || p = (bb[1] & 0xff) - 0xa0;
p == 3 && plane == 3) { if (p == plane) {
if (dl - dp < 2) if (dl - dp < 2)
return CoderResult.OVERFLOW; return CoderResult.OVERFLOW;
da[dp++] = (byte) ((cns >> 8) & 0x7f); if (nb == 2) {
da[dp++] = (byte) (cns & 0x7f); da[dp++] = (byte)(bb[0] & 0x7f);
sp++; da[dp++] = (byte)(bb[1] & 0x7f);
continue; } else {
da[dp++] = (byte)(bb[2] & 0x7f);
da[dp++] = (byte)(bb[3] & 0x7f);
}
sp++;
continue;
}
}
} }
return CoderResult.unmappableForLength(1); return CoderResult.unmappableForLength(1);
} }
...@@ -118,23 +128,17 @@ public abstract class X11CNS11643 extends Charset { ...@@ -118,23 +128,17 @@ public abstract class X11CNS11643 extends Charset {
} }
private class Decoder extends EUC_TW.Decoder { private class Decoder extends EUC_TW.Decoder {
int plane;
private String table; private String table;
protected Decoder(Charset cs, int plane) { protected Decoder(Charset cs, int plane) {
super(cs); super(cs);
switch (plane) { if (plane == 0)
case 0: this.plane = plane;
table = unicodeCNS1; else if (plane == 2 || plane == 3)
break; this.plane = plane - 1;
case 2: else
table = unicodeCNS2;
break;
case 3:
table = unicodeCNS3;
break;
default:
throw new IllegalArgumentException throw new IllegalArgumentException
("Only planes 1, 2, and 3 supported"); ("Only planes 1, 2, and 3 supported");
}
} }
//we only work on array backed buffer. //we only work on array backed buffer.
...@@ -142,33 +146,26 @@ public abstract class X11CNS11643 extends Charset { ...@@ -142,33 +146,26 @@ public abstract class X11CNS11643 extends Charset {
byte[] sa = src.array(); byte[] sa = src.array();
int sp = src.arrayOffset() + src.position(); int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit(); int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array(); char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position(); int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit(); int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try { try {
while (sp < sl) { while (sp < sl) {
if ( sl - sp < 2) { if ( sl - sp < 2) {
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
} }
byte b1 = sa[sp]; int b1 = (sa[sp] & 0xff) | 0x80;
byte b2 = sa[sp + 1]; int b2 = (sa[sp + 1] & 0xff) | 0x80;
char c = convToUnicode((byte)(b1 | 0x80), char[] cc = toUnicode(b1, b2, plane);
(byte)(b2 | 0x80), // plane3 has non-bmp characters(added), x11cnsp3
table); // however does not support them
if (c == replacement().charAt(0) if (cc == null || cc.length == 2)
//to keep the compatibility with b2cX11CNS11643
/*|| c == '\u0000'*/) {
return CoderResult.unmappableForLength(2); return CoderResult.unmappableForLength(2);
}
if (dl - dp < 1) if (dl - dp < 1)
return CoderResult.OVERFLOW; return CoderResult.OVERFLOW;
da[dp++] = c; da[dp++] = cc[0];
sp +=2; sp +=2;
} }
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
*/ */
/* @test /* @test
@bug 4779029 4924625 6392664 @bug 4779029 4924625 6392664 6730652
@summary Test decoding of various permutations of valid ISO-2022-CN byte sequences @summary Test decoding of various permutations of valid ISO-2022-CN byte sequences
*/ */
...@@ -387,6 +387,12 @@ public class TestISO2022CNDecoder ...@@ -387,6 +387,12 @@ public class TestISO2022CNDecoder
}; };
private static CoderResult test15_result = CoderResult.unmappableForLength(4); private static CoderResult test15_result = CoderResult.unmappableForLength(4);
private static boolean encodeTest6730652 () throws Exception {
//sample p3 codepoints
String strCNSP3 = "\u4e28\u4e36\u4e3f\u4e85\u4e05\u4e04\u5369\u53b6\u4e2a\u4e87\u4e49\u51e2\u56b8\u56b9\u56c4\u8053\u92b0";
return strCNSP3.equals(new String(strCNSP3.getBytes("x-ISO-2022-CN-CNS"), "x-ISO-2022-CN-CNS"));
}
/** /**
* Main program to test ISO2022CN conformance * Main program to test ISO2022CN conformance
* *
...@@ -430,6 +436,9 @@ public class TestISO2022CNDecoder ...@@ -430,6 +436,9 @@ public class TestISO2022CNDecoder
pass &= decodeTest(test13_bytes, test13_chars, "escapes13"); pass &= decodeTest(test13_bytes, test13_chars, "escapes13");
pass &= decodeResultTest(test14_bytes, test14_result, "escapes14"); pass &= decodeResultTest(test14_bytes, test14_result, "escapes14");
pass &= decodeResultTest(test15_bytes, test15_result, "escapes15"); pass &= decodeResultTest(test15_bytes, test15_result, "escapes15");
pass &= encodeTest6730652 ();
// PASS/FAIL status is what the whole thing is about. // PASS/FAIL status is what the whole thing is about.
// //
if (! pass) { if (! pass) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册