提交 db580a7c 编写于 作者: S sherman

6831794: charset EUC_TW is 12.6% of the total size of charsets.jar

6229811: Several codepoints in EUC_TW failed in roundtrip conversion
Summary: Re-write EUC_TW charset to address the size and roundtrip issue.
Reviewed-by: alanb
上级 ded7c2d7
......@@ -827,8 +827,7 @@ CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar
$(FILES_gensbcs_out): $(GENCSSRC)/SingleByte-X.java $(GENCSSRC)/sbcs
@$(prep-target)
$(BOOT_JAVA_CMD) -cp $(CHARSETMAPPING_JARFILE) build.tools.charsetmapping.GenerateSBCS \
$(GENCSSRC) $(SCS_GEN) sbcs
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSSRC) $(SCS_GEN) sbcs
#
# Generated file system implementation classes (Unix only)
......
......@@ -58,11 +58,12 @@ ifdef OPENJDK
else
RENDER_SUBDIR = dcpr
endif
# nio need to be compiled before awt to have all charsets ready
SUBDIRS = jar security javazic misc net audio $(RENDER_SUBDIR) image \
awt splashscreen $(XAWT_SUBDIR) \
nio awt splashscreen $(XAWT_SUBDIR) \
$(HEADLESS_SUBDIR) $(DGA_SUBDIR) \
font jpeg cmm applet rmi beans $(JDBC_SUBDIR) \
jawt text nio launcher management $(ORG_SUBDIR) \
jawt text launcher management $(ORG_SUBDIR) \
native2ascii serialver tools jconsole tracing
all build clean clobber::
......
......@@ -304,7 +304,7 @@ FILES_src = \
sun/io/CharToByteMacUkraine.java \
sun/io/CharToByteTIS620.java
FILES_gen_extsbcs = \
FILES_gen_extcs = \
sun/nio/cs/ext/IBM037.java \
sun/nio/cs/ext/IBM1006.java \
sun/nio/cs/ext/IBM1025.java \
......@@ -374,6 +374,8 @@ FILES_gen_extsbcs = \
sun/nio/cs/ext/MacThai.java \
sun/nio/cs/ext/MacTurkish.java \
sun/nio/cs/ext/MacUkraine.java \
sun/nio/cs/ext/TIS_620.java
sun/nio/cs/ext/TIS_620.java \
sun/nio/cs/ext/EUC_TWMapping.java
FILES_java = $(FILES_src) $(FILES_gen_extcs)
FILES_java = $(FILES_src) $(FILES_gen_extsbcs)
\ No newline at end of file
......@@ -61,14 +61,14 @@ endif # PLATFORM
CHARSETS_JAR = $(LIBDIR)/charsets.jar
# extsbcs
FILES_genout_extsbcs = $(FILES_gen_extsbcs:%.java=$(GENSRCDIR)/%.java)
FILES_genout_extcs = $(FILES_gen_extcs:%.java=$(GENSRCDIR)/%.java)
#
# Rules
#
include $(BUILDDIR)/common/Classes.gmk
build: $(FILES_genout_extsbcs) $(CHARSETS_JAR)
build: $(FILES_genout_extcs) $(CHARSETS_JAR)
#
# Extra rules to build character converters.
......@@ -77,6 +77,7 @@ SERVICE_DESCRIPTION = java.nio.charset.spi.CharsetProvider
SERVICE_DESCRIPTION_PATH = META-INF/services/$(SERVICE_DESCRIPTION)
GENCSDATASRC = $(BUILDDIR)/tools/CharsetMapping
GENCSSRCDIR = $(BUILDDIR)/tools/src/build/tools/charsetmapping
GENCSEXT = $(GENSRCDIR)/sun/nio/cs/ext
FILES_MAP = $(GENCSDATASRC)/sjis0213.map
......@@ -86,16 +87,16 @@ CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar
$(FILES_DAT): $(FILES_MAP)
@$(prep-target)
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) \
$(FILES_MAP) $(FILES_DAT)
$(FILES_MAP) $(FILES_DAT) sjis0213
$(FILES_genout_extsbcs): $(GENCSDATASRC)/SingleByte-X.java $(GENCSDATASRC)/extsbcs
$(FILES_genout_extcs): $(GENCSDATASRC)/SingleByte-X.java $(GENCSDATASRC)/extsbcs
@$(prep-target)
$(RM) -r $(GENCSEXT)
$(MKDIR) -p $(GENCSEXT)
$(BOOT_JAVA_CMD) -cp $(CHARSETMAPPING_JARFILE) build.tools.charsetmapping.GenerateSBCS \
$(GENCSDATASRC) $(GENCSEXT) extsbcs
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSDATASRC) $(GENCSEXT) extsbcs
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSDATASRC) $(GENCSEXT) euctw \
$(GENCSSRCDIR)/GenerateEUC_TW.java
$(CLASSDESTDIR)/$(SERVICE_DESCRIPTION_PATH): \
$(SHARE_SRC)/classes/sun/nio/cs/ext/$(SERVICE_DESCRIPTION_PATH)
......
......@@ -34,7 +34,7 @@ PROGRAM = charsetmapping
include $(BUILDDIR)/common/Defs.gmk
BUILDTOOL_SOURCE_ROOT = $(BUILDDIR)/tools/src
BUILDTOOL_MAIN = $(PKGDIR)/GenerateMapping.java
BUILDTOOL_MAIN = $(PKGDIR)/Main.java
#
# Build tool jar rules.
......
......@@ -36,7 +36,7 @@ import java.util.*;
public class CharsetMapping {
public final static char UNMAPPABLE_DECODING = '\uFFFD';
public final static int UNMAPPABLE_ENCODING = -1;
public final static int UNMAPPABLE_ENCODING = 0xFFFD;
public static class Entry {
public int bs; //byte sequence reps
......
......@@ -27,15 +27,11 @@ package build.tools.charsetmapping;
import java.io.*;
import java.util.regex.*;
import build.tools.charsetmapping.GenerateSBCS;
import static build.tools.charsetmapping.CharsetMapping.*;
public class GenerateMapping {
public static void main(String argv[]) throws IOException {
if (argv.length < 2) {
System.out.println("Usage: java GenerateMapping fMap fDat");
System.exit(1);
}
public static void genMapping(String argv[]) throws IOException {
genDataJIS0213(new FileInputStream(argv[0]),
new FileOutputStream(argv[1]));
}
......
......@@ -34,11 +34,8 @@ import java.nio.charset.*;
import static build.tools.charsetmapping.CharsetMapping.*;
public class GenerateSBCS {
public static void main(String args[]) throws Exception {
if (args.length < 3) {
System.err.println("Usage: java GenSBCS srcDir dstDir config");
System.exit(1);
}
public static void genSBCS(String args[]) throws Exception {
Scanner s = new Scanner(new File(args[0], args[2]));
while (s.hasNextLine()) {
......
......@@ -39,30 +39,11 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
private final byte G4 = 4;
private final byte MSB = (byte) 0x80;
private final byte SS2 = (byte) 0x8E;
private final byte P2 = (byte) 0xA2;
private final byte P3 = (byte) 0xA3;
protected final char REPLACE_CHAR = '\uFFFD';
private byte firstByte = 0, state = G0;
public static String unicodeCNS2, unicodeCNS3;
private static String unicodeCNS4, unicodeCNS5, unicodeCNS6;
private static String unicodeCNS7, unicodeCNS15;
private int cnsPlane = 0;
private final static EUC_TW nioCoder = new EUC_TW();
public static String unicodeCNS1 = nioCoder.getUnicodeCNS1();
static String[] cnsChars = {
unicodeCNS2 = nioCoder.getUnicodeCNS2(),
unicodeCNS3 = nioCoder.getUnicodeCNS3(),
unicodeCNS4 = nioCoder.getUnicodeCNS4(),
unicodeCNS5 = nioCoder.getUnicodeCNS5(),
unicodeCNS6 = nioCoder.getUnicodeCNS6(),
unicodeCNS7 = nioCoder.getUnicodeCNS7(),
unicodeCNS15 = nioCoder.getUnicodeCNS15()
};
private EUC_TW.Decoder dec = (EUC_TW.Decoder)(new EUC_TW().newDecoder());
public ByteToCharEUC_TW() {
}
......@@ -81,6 +62,7 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
}
public void reset() {
dec.reset();
state = G0;
firstByte = 0;
byteOff = charOff = 0;
......@@ -95,7 +77,7 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
ConversionBufferFullException
{
int inputSize = 0;
char outputChar = (char) 0;
char[] c1 = new char[1];
byteOff = inOff;
charOff = outOff;
......@@ -104,11 +86,12 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
while (byteOff < inEnd) {
if (charOff >= outEnd)
throw new ConversionBufferFullException();
char[] outputChar = null;
switch (state) {
case G0:
if ( (input[byteOff] & MSB) == 0) { // ASCII
outputChar = (char) input[byteOff];
outputChar = c1;
outputChar[0] = (char) input[byteOff];
} else if (input[byteOff] == SS2) { // Codeset 2
state = G2;
} else { // Codeset 1
......@@ -119,9 +102,10 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
case G1:
inputSize = 2;
if ( (input[byteOff] & MSB) != 0) { // 2nd byte
cnsPlane = 1;
outputChar = convToUnicode(firstByte,
input[byteOff], unicodeCNS1);
cnsPlane = 0;
outputChar = dec.toUnicode(firstByte & 0xff,
input[byteOff] & 0xff,
cnsPlane);
} else { // Error
badInputLength = 1;
throw new MalformedInputException();
......@@ -154,9 +138,9 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
break;
case G4:
if ( (input[byteOff] & MSB) != 0) { // 2nd byte
outputChar = convToUnicode(firstByte,
input[byteOff],
cnsChars[cnsPlane - 2]);
outputChar = dec.toUnicode(firstByte & 0xff,
input[byteOff] & 0xff,
cnsPlane - 1);
} else { // Error
badInputLength = 3;
throw new MalformedInputException();
......@@ -166,21 +150,19 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
break;
}
byteOff++;
if (outputChar != (char) 0) {
if (outputChar == REPLACE_CHAR) {
if (subMode) // substitution enabled
outputChar = subChars[0];
else {
if (state == G0) {
if (outputChar == null) {
if (subMode) { // substitution enabled
outputChar = c1;
outputChar[0] = subChars[0];
} else {
badInputLength = inputSize;
throw new UnknownCharacterException();
}
}
output[charOff++] = outputChar;
outputChar = 0;
output[charOff++] = outputChar[0];
}
}
return charOff - outOff;
}
......@@ -191,25 +173,4 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
public String getCharacterEncoding() {
return "EUC_TW";
}
protected char convToUnicode(byte byte1, byte byte2, String table)
{
int index;
if ((byte1 & 0xff) < 0xa1 || (byte2 & 0xff) < 0xa1 ||
(byte1 & 0xff) > 0xfe || (byte2 & 0xff) > 0xfe)
return REPLACE_CHAR;
index = (((byte1 & 0xff) - 0xa1) * 94) + (byte2 & 0xff) - 0xa1;
if (index < 0 || index >= table.length())
return REPLACE_CHAR;
// Planes 3 and above containing zero value lead byte
// to accommodate surrogates for mappings which decode to a surrogate
// pair
if (this.cnsPlane >= 3)
index = (index * 2) + 1;
return table.charAt(index);
}
}
......@@ -33,19 +33,7 @@ import sun.nio.cs.ext.EUC_TW;
public class CharToByteEUC_TW extends CharToByteConverter
{
private final byte MSB = (byte)0x80;
private final byte SS2 = (byte) 0x8E;
private final byte P2 = (byte) 0xA2;
private final byte P3 = (byte) 0xA3;
private final static EUC_TW nioCoder = new EUC_TW();
private static String uniTab1 = nioCoder.getUniTab1();
private static String uniTab2 = nioCoder.getUniTab2();
private static String uniTab3 = nioCoder.getUniTab3();
private static String cnsTab1 = nioCoder.getCNSTab1();
private static String cnsTab2 = nioCoder.getCNSTab2();
private static String cnsTab3 = nioCoder.getCNSTab3();
private final EUC_TW.Encoder enc = (EUC_TW.Encoder)(new EUC_TW().newEncoder());
public int flush(byte[] output, int outStart, int outEnd)
throws MalformedInputException
......@@ -59,10 +47,7 @@ public class CharToByteEUC_TW extends CharToByteConverter
}
public boolean canConvert(char ch){
if (((0xFF00 & ch) != 0) && (getNative(ch) != -1)){
return true;
}
return false;
return enc.canEncode(ch);
}
/**
......@@ -74,9 +59,8 @@ public class CharToByteEUC_TW extends CharToByteConverter
ConversionBufferFullException
{
int outputSize;
byte [] tmpbuf = new byte[4];
byte [] tmpbuf = new byte[4];;
byte [] outputByte;
byteOff = outOff;
//Fixed 4122961 by bringing the charOff++ out to this
......@@ -88,7 +72,7 @@ public class CharToByteEUC_TW extends CharToByteConverter
outputSize = 1;
outputByte[0] = (byte)(input[charOff] & 0x7f);
} else {
outputSize = unicodeToEUC(input[charOff], outputByte);
outputSize = enc.toEUC(input[charOff], outputByte);
}
if (outputSize == -1) {
......@@ -112,7 +96,6 @@ public class CharToByteEUC_TW extends CharToByteConverter
}
/**
* returns the maximum number of bytes needed to convert a char
*/
......@@ -120,111 +103,10 @@ public class CharToByteEUC_TW extends CharToByteConverter
return 4;
}
/**
* Return the character set ID
*/
public String getCharacterEncoding() {
return "EUC_TW";
}
protected int getNative(char unicode) {
int i,
cns; // 2 chars in CNS table make 1 CNS code
if (unicode < UniTab2[0]) {
if ((i = searchTab(unicode, UniTab1)) == -1)
return -1;
cns = (CNSTab1[2*i] << 16) + CNSTab1[2*i+1];
return cns;
} else if (unicode < UniTab3[0]) {
if ((i = searchTab(unicode, UniTab2)) == -1)
return -1;
cns = (CNSTab2[2*i] << 16) + CNSTab2[2*i+1];
return cns;
} else {
if ((i = searchTab(unicode, UniTab3)) == -1)
return -1;
cns = (CNSTab3[2*i] << 16) + CNSTab3[2*i+1];
return cns;
}
}
protected int searchTab(char code, char [] table) {
int i = 0, l, h;
for (l = 0, h = table.length - 1; l < h; ) {
if (table[l] == code) {
i = l;
break;
}
if (table[h] == code) {
i = h;
break;
}
i = (l + h) / 2;
if (table[i] == code)
break;
if (table[i] < code)
l = i + 1;
else h = i - 1;
}
if (code == table[i]) {
return i;
} else {
return -1;
}
}
private int unicodeToEUC(char unicode, byte ebyte[]) {
int cns = getNative(unicode);
if ((cns >> 16) == 0x01) { // Plane 1
ebyte[0] = (byte) (((cns & 0xff00) >> 8) | MSB);
ebyte[1] = (byte) ((cns & 0xff) | MSB);
return 2;
}
byte cnsPlane = (byte)(cns >> 16);
if (cnsPlane >= (byte)0x02) { // Plane 2
ebyte[0] = SS2;
ebyte[1] = (byte) (cnsPlane | (byte)0xA0);
ebyte[2] = (byte) (((cns & 0xff00) >> 8) | MSB);
ebyte[3] = (byte) ((cns & 0xff) | MSB);
return 4;
}
return -1;
}
protected int unicodeToEUC(char unicode) {
if (unicode <= 0x7F) { // ASCII falls into EUC_TW CS0
return unicode;
}
int cns = getNative(unicode);
int plane = cns >> 16;
int euc = (cns & 0x0000FFFF) | 0x00008080;
if (plane == 1) {
return euc;
} else if (plane == 2) {
return ((SS2 << 24) & 0xFF000000) | ((P2 << 16) & 0x00FF0000) |
euc;
} else if (plane == 3) {
return ((SS2 << 24) & 0xFF000000) | ((P3 << 16) & 0x00FF0000) |
euc;
}
return -1;
}
private char [] UniTab1 = uniTab1.toCharArray();
private char [] UniTab2 = uniTab2.toCharArray();
private char [] UniTab3 = uniTab3.toCharArray();
private char [] CNSTab1 = cnsTab1.toCharArray();
private char [] CNSTab2 = cnsTab2.toCharArray();
private char [] CNSTab3 = cnsTab3.toCharArray();
}
......@@ -389,8 +389,8 @@ abstract class ISO2022
protected static class Encoder extends CharsetEncoder {
private final Surrogate.Parser sgp = new Surrogate.Parser();
private final byte SS2 = (byte)0x8e;
private final byte P2 = (byte)0xA2;
private final byte P3 = (byte)0xA3;
private final byte PLANE2 = (byte)0xA2;
private final byte PLANE3 = (byte)0xA3;
private final byte MSB = (byte)0x80;
protected final byte maximumDesignatorLength = 4;
......@@ -460,32 +460,32 @@ abstract class ISO2022
ebyte[index++] = (byte)(convByte[0] & 0x7f);
ebyte[index++] = (byte)(convByte[1] & 0x7f);
} else {
if((convByte[0] == SS2) && (convByte[1] == P2)) {
if (!SS2DesDefined) {
newSS2DesDefined = true;
ebyte[0] = ISO_ESC;
tmpByte = SS2Desig.getBytes();
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
index = tmpByte.length+1;
}
ebyte[index++] = ISO_ESC;
ebyte[index++] = ISO_SS2_7;
ebyte[index++] = (byte)(convByte[2] & 0x7f);
ebyte[index++] = (byte)(convByte[3] & 0x7f);
}
if((convByte[0] == SS2)&&(convByte[1] == 0xA3))
{
if(!SS3DesDefined){
newSS3DesDefined = true;
ebyte[0] = ISO_ESC;
tmpByte = SS3Desig.getBytes();
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
index = tmpByte.length+1;
if(convByte[0] == SS2) {
if (convByte[1] == PLANE2) {
if (!SS2DesDefined) {
newSS2DesDefined = true;
ebyte[0] = ISO_ESC;
tmpByte = SS2Desig.getBytes();
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
index = tmpByte.length+1;
}
ebyte[index++] = ISO_ESC;
ebyte[index++] = ISO_SS2_7;
ebyte[index++] = (byte)(convByte[2] & 0x7f);
ebyte[index++] = (byte)(convByte[3] & 0x7f);
} else if (convByte[1] == PLANE3) {
if(!SS3DesDefined){
newSS3DesDefined = true;
ebyte[0] = ISO_ESC;
tmpByte = SS3Desig.getBytes();
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
index = tmpByte.length+1;
}
ebyte[index++] = ISO_ESC;
ebyte[index++] = ISO_SS3_7;
ebyte[index++] = (byte)(convByte[2] & 0x7f);
ebyte[index++] = (byte)(convByte[3] & 0x7f);
}
ebyte[index++] = ISO_ESC;
ebyte[index++] = ISO_SS3_7;
ebyte[index++] = (byte)(convByte[2] & 0x7f);
ebyte[index++] = (byte)(convByte[3] & 0x7f);
}
}
return index;
......
......@@ -105,17 +105,19 @@ public class ISO2022_CN
private char cnsDecode(byte byte1, byte byte2, byte SS) {
byte1 |= MSB;
byte2 |= MSB;
if (SS == ISO_SS2_7) {
return cnsDecoder.convToUnicode(byte1, byte2,
cnsDecoder.unicodeCNS2);
} else { //SS == ISO_SS3_7
char[] outSurr = cnsDecoder.convToSurrogate(byte1, byte2,
cnsDecoder.unicodeCNS3);
if (outSurr == null || outSurr[0] != '\u0000')
return REPLACE_CHAR;
return outSurr[1];
}
int p = 0;
if (SS == ISO_SS2_7)
p = 1; //plane 2, index -- 1
else if (SS == ISO_SS3_7)
p = 2; //plane 3, index -- 2
else
return REPLACE_CHAR; //never happen.
char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
byte2 & 0xff,
p);
if (ret == null || ret.length == 2)
return REPLACE_CHAR;
return ret[0];
}
private char SODecode(byte byte1, byte byte2, byte SOD) {
......@@ -125,9 +127,12 @@ public class ISO2022_CN
return gb2312Decoder.decodeDouble(byte1 & 0xff,
byte2 & 0xff);
} else { // SOD == SODesigCNS
return cnsDecoder.convToUnicode(byte1,
byte2,
cnsDecoder.unicodeCNS1);
char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
byte2 & 0xff,
0);
if (ret == null)
return REPLACE_CHAR;
return ret[0];
}
}
......
/*
* Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved.
* Copyright 2001-2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
......@@ -66,16 +66,19 @@ public abstract class X11CNS11643 extends Charset {
super(cs);
this.plane = plane;
}
private byte[] bb = new byte[4];
public boolean canEncode(char c) {
if (c <= 0x7F) {
return false;
}
int p = getNative(c) >> 16;
if (p == 1 && plane == 0 ||
p == 2 && plane == 2 ||
p == 3 && plane == 3)
return true;
return false;
int nb = toEUC(c, bb);
if (nb == -1)
return false;
int p = 0;
if (nb == 4)
p = (bb[1] & 0xff) - 0xa0;
return (p == plane);
}
public boolean isLegalReplacement(byte[] repl) {
......@@ -93,19 +96,26 @@ public abstract class X11CNS11643 extends Charset {
try {
while (sp < sl) {
char c = sa[sp];
if (c >= '\uFFFE' || c <= '\u007f')
return CoderResult.unmappableForLength(1);
int cns = getNative(c);
int p = cns >> 16;
if (p == 1 && plane == 0 ||
p == 2 && plane == 2 ||
p == 3 && plane == 3) {
if (dl - dp < 2)
return CoderResult.OVERFLOW;
da[dp++] = (byte) ((cns >> 8) & 0x7f);
da[dp++] = (byte) (cns & 0x7f);
sp++;
continue;
if ( c > '\u007f'&& c < '\uFFFE') {
int nb = toEUC(c, bb);
if (nb != -1) {
int p = 0;
if (nb == 4)
p = (bb[1] & 0xff) - 0xa0;
if (p == plane) {
if (dl - dp < 2)
return CoderResult.OVERFLOW;
if (nb == 2) {
da[dp++] = (byte)(bb[0] & 0x7f);
da[dp++] = (byte)(bb[1] & 0x7f);
} else {
da[dp++] = (byte)(bb[2] & 0x7f);
da[dp++] = (byte)(bb[3] & 0x7f);
}
sp++;
continue;
}
}
}
return CoderResult.unmappableForLength(1);
}
......@@ -118,23 +128,17 @@ public abstract class X11CNS11643 extends Charset {
}
private class Decoder extends EUC_TW.Decoder {
int plane;
private String table;
protected Decoder(Charset cs, int plane) {
super(cs);
switch (plane) {
case 0:
table = unicodeCNS1;
break;
case 2:
table = unicodeCNS2;
break;
case 3:
table = unicodeCNS3;
break;
default:
if (plane == 0)
this.plane = plane;
else if (plane == 2 || plane == 3)
this.plane = plane - 1;
else
throw new IllegalArgumentException
("Only planes 1, 2, and 3 supported");
}
}
//we only work on array backed buffer.
......@@ -142,33 +146,26 @@ public abstract class X11CNS11643 extends Charset {
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
assert (sp <= sl);
sp = (sp <= sl ? sp : sl);
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
try {
while (sp < sl) {
if ( sl - sp < 2) {
return CoderResult.UNDERFLOW;
}
byte b1 = sa[sp];
byte b2 = sa[sp + 1];
char c = convToUnicode((byte)(b1 | 0x80),
(byte)(b2 | 0x80),
table);
if (c == replacement().charAt(0)
//to keep the compatibility with b2cX11CNS11643
/*|| c == '\u0000'*/) {
int b1 = (sa[sp] & 0xff) | 0x80;
int b2 = (sa[sp + 1] & 0xff) | 0x80;
char[] cc = toUnicode(b1, b2, plane);
// plane3 has non-bmp characters(added), x11cnsp3
// however does not support them
if (cc == null || cc.length == 2)
return CoderResult.unmappableForLength(2);
}
if (dl - dp < 1)
return CoderResult.OVERFLOW;
da[dp++] = c;
da[dp++] = cc[0];
sp +=2;
}
return CoderResult.UNDERFLOW;
......
......@@ -22,7 +22,7 @@
*/
/* @test
@bug 4779029 4924625 6392664
@bug 4779029 4924625 6392664 6730652
@summary Test decoding of various permutations of valid ISO-2022-CN byte sequences
*/
......@@ -387,6 +387,12 @@ public class TestISO2022CNDecoder
};
private static CoderResult test15_result = CoderResult.unmappableForLength(4);
private static boolean encodeTest6730652 () throws Exception {
//sample p3 codepoints
String strCNSP3 = "\u4e28\u4e36\u4e3f\u4e85\u4e05\u4e04\u5369\u53b6\u4e2a\u4e87\u4e49\u51e2\u56b8\u56b9\u56c4\u8053\u92b0";
return strCNSP3.equals(new String(strCNSP3.getBytes("x-ISO-2022-CN-CNS"), "x-ISO-2022-CN-CNS"));
}
/**
* Main program to test ISO2022CN conformance
*
......@@ -430,6 +436,9 @@ public class TestISO2022CNDecoder
pass &= decodeTest(test13_bytes, test13_chars, "escapes13");
pass &= decodeResultTest(test14_bytes, test14_result, "escapes14");
pass &= decodeResultTest(test15_bytes, test15_result, "escapes15");
pass &= encodeTest6730652 ();
// PASS/FAIL status is what the whole thing is about.
//
if (! pass) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册