提交 bacc4af8 编写于 作者: M mchung

Merge

......@@ -33,6 +33,7 @@ FILES_java = \
sun/nio/cs/AbstractCharsetProvider.java \
sun/nio/cs/HistoricallyNamedCharset.java \
sun/nio/cs/Surrogate.java \
sun/nio/cs/CharsetMapping.java \
sun/nio/cs/SingleByteEncoder.java \
sun/nio/cs/SingleByteDecoder.java \
sun/nio/cs/UnicodeEncoder.java \
......
#
# Copyright 1996-2006 Sun Microsystems, Inc. All Rights Reserved.
# Copyright 1996-2008 Sun Microsystems, Inc. All Rights Reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
......@@ -73,11 +73,21 @@ build: $(CHARSETS_JAR)
SERVICE_DESCRIPTION = java.nio.charset.spi.CharsetProvider
SERVICE_DESCRIPTION_PATH = META-INF/services/$(SERVICE_DESCRIPTION)
GENCSDATASRC = $(BUILDDIR)/tools/CharsetMapping
FILES_MAP = $(GENCSDATASRC)/sjis0213.map
FILES_DAT = $(CLASSDESTDIR)/sun/nio/cs/ext/sjis0213.dat
CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar
$(FILES_DAT): $(FILES_MAP)
@$(prep-target)
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) \
$(FILES_MAP) $(FILES_DAT)
$(CLASSDESTDIR)/$(SERVICE_DESCRIPTION_PATH): \
$(SHARE_SRC)/classes/sun/nio/cs/ext/$(SERVICE_DESCRIPTION_PATH)
$(install-file)
$(CHARSETS_JAR): $(FILES_class) $(CLASSDESTDIR)/$(SERVICE_DESCRIPTION_PATH)
$(CHARSETS_JAR): $(FILES_class) $(CLASSDESTDIR)/$(SERVICE_DESCRIPTION_PATH) $(FILES_DAT)
$(BOOT_JAR_CMD) cf $(CHARSETS_JAR) \
-C $(CLASSDESTDIR) sun \
-C $(CLASSDESTDIR) $(SERVICE_DESCRIPTION_PATH) \
......
#
# Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation. Sun designates this
# particular file as subject to the "Classpath" exception as provided
# by Sun in the LICENSE file that accompanied this code.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
# CA 95054 USA or visit www.sun.com if you need additional information or
# have any questions.
#
#
# Makefile for building the charsetmapping tool
#
BUILDDIR = ../..
PACKAGE = build.tools.charsetmapping
PRODUCT = tools
PROGRAM = charsetmapping
include $(BUILDDIR)/common/Defs.gmk
BUILDTOOL_SOURCE_ROOT = $(BUILDDIR)/tools/src
BUILDTOOL_MAIN = $(PKGDIR)/GenerateMapping.java
#
# Build tool jar rules.
#
include $(BUILDDIR)/common/BuildToolJar.gmk
此差异已折叠。
......@@ -50,7 +50,8 @@ SUBDIRS = \
jdwpgen \
makeclasslist \
strip_properties \
winver
winver \
CharsetMapping
all build clean clobber::
$(SUBDIRS-loop)
......
/*
* Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package build.tools.charsetmapping;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.*;
public class CharsetMapping {
public final static char UNMAPPABLE_DECODING = '\uFFFD';
public final static int UNMAPPABLE_ENCODING = -1;
public static class Entry {
public int bs; //byte sequence reps
public int cp; //Unicode codepoint
public int cp2; //CC of composite
public Entry () {}
public Entry (int bytes, int cp, int cp2) {
this.bs = bytes;
this.cp = cp;
this.cp2 = cp2;
}
}
static Comparator<Entry> comparatorCP =
new Comparator<Entry>() {
public int compare(Entry m1, Entry m2) {
return m1.cp - m2.cp;
}
public boolean equals(Object obj) {
return this == obj;
}
};
public static class Parser {
static final Pattern basic = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)?\\s*+.*");
static final int gBS = 1;
static final int gCP = 2;
static final int gCP2 = 3;
BufferedReader reader;
boolean closed;
Matcher matcher;
int gbs, gcp, gcp2;
public Parser (InputStream in, Pattern p, int gbs, int gcp, int gcp2)
throws IOException
{
this.reader = new BufferedReader(new InputStreamReader(in));
this.closed = false;
this.matcher = p.matcher("");
this.gbs = gbs;
this.gcp = gcp;
this.gcp2 = gcp2;
}
public Parser (InputStream in, Pattern p) throws IOException {
this(in, p, gBS, gCP, gCP2);
}
public Parser (InputStream in) throws IOException {
this(in, basic, gBS, gCP, gCP2);
}
protected boolean isDirective(String line) {
return line.startsWith("#");
}
protected Entry parse(Matcher matcher, Entry mapping) {
mapping.bs = Integer.parseInt(matcher.group(gbs), 16);
mapping.cp = Integer.parseInt(matcher.group(gcp), 16);
if (gcp2 <= matcher.groupCount() &&
matcher.group(gcp2) != null)
mapping.cp2 = Integer.parseInt(matcher.group(gcp2), 16);
else
mapping.cp2 = 0;
return mapping;
}
public Entry next() throws Exception {
return next(new Entry());
}
// returns null and closes the input stream if the eof has beenreached.
public Entry next(Entry mapping) throws Exception {
if (closed)
return null;
String line;
while ((line = reader.readLine()) != null) {
if (isDirective(line))
continue;
matcher.reset(line);
if (!matcher.lookingAt()) {
//System.out.println("Missed: " + line);
continue;
}
return parse(matcher, mapping);
}
reader.close();
closed = true;
return null;
}
}
// tags of different charset mapping tables
private final static int MAP_SINGLEBYTE = 0x1; // 0..256 : c
private final static int MAP_DOUBLEBYTE1 = 0x2; // min..max: c
private final static int MAP_DOUBLEBYTE2 = 0x3; // min..max: c [DB2]
private final static int MAP_SUPPLEMENT = 0x5; // db,c
private final static int MAP_SUPPLEMENT_C2B = 0x6; // c,db
private final static int MAP_COMPOSITE = 0x7; // db,base,cc
private final static int MAP_INDEXC2B = 0x8; // index table of c->bb
private static final void writeShort(OutputStream out, int data)
throws IOException
{
out.write((data >>> 8) & 0xFF);
out.write((data ) & 0xFF);
}
private static final void writeShortArray(OutputStream out,
int type,
int[] array,
int off,
int size) // exclusive
throws IOException
{
writeShort(out, type);
writeShort(out, size);
for (int i = off; i < size; i++) {
writeShort(out, array[off+i]);
}
}
public static final void writeSIZE(OutputStream out, int data)
throws IOException
{
out.write((data >>> 24) & 0xFF);
out.write((data >>> 16) & 0xFF);
out.write((data >>> 8) & 0xFF);
out.write((data ) & 0xFF);
}
public static void writeINDEXC2B(OutputStream out, int[] indexC2B)
throws IOException
{
writeShort(out, MAP_INDEXC2B);
writeShort(out, indexC2B.length);
int off = 0;
for (int i = 0; i < indexC2B.length; i++) {
if (indexC2B[i] != 0) {
writeShort(out, off);
off += 256;
} else {
writeShort(out, -1);
}
}
}
public static void writeSINGLEBYTE(OutputStream out, int[] sb)
throws IOException
{
writeShortArray(out, MAP_SINGLEBYTE, sb, 0, 256);
}
private static void writeDOUBLEBYTE(OutputStream out,
int type,
int[] db,
int b1Min, int b1Max,
int b2Min, int b2Max)
throws IOException
{
writeShort(out, type);
writeShort(out, b1Min);
writeShort(out, b1Max);
writeShort(out, b2Min);
writeShort(out, b2Max);
writeShort(out, (b1Max - b1Min + 1) * (b2Max - b2Min + 1));
for (int b1 = b1Min; b1 <= b1Max; b1++) {
for (int b2 = b2Min; b2 <= b2Max; b2++) {
writeShort(out, db[b1 * 256 + b2]);
}
}
}
public static void writeDOUBLEBYTE1(OutputStream out,
int[] db,
int b1Min, int b1Max,
int b2Min, int b2Max)
throws IOException
{
writeDOUBLEBYTE(out, MAP_DOUBLEBYTE1, db, b1Min, b1Max, b2Min, b2Max);
}
public static void writeDOUBLEBYTE2(OutputStream out,
int[] db,
int b1Min, int b1Max,
int b2Min, int b2Max)
throws IOException
{
writeDOUBLEBYTE(out, MAP_DOUBLEBYTE2, db, b1Min, b1Max, b2Min, b2Max);
}
// the c2b table is output as well
public static void writeSUPPLEMENT(OutputStream out, Entry[] supp, int size)
throws IOException
{
writeShort(out, MAP_SUPPLEMENT);
writeShort(out, size * 2);
// db at first half, cc at the low half
for (int i = 0; i < size; i++) {
writeShort(out, supp[i].bs);
}
for (int i = 0; i < size; i++) {
writeShort(out, supp[i].cp);
}
//c2b
writeShort(out, MAP_SUPPLEMENT_C2B);
writeShort(out, size*2);
Arrays.sort(supp, 0, size, comparatorCP);
for (int i = 0; i < size; i++) {
writeShort(out, supp[i].cp);
}
for (int i = 0; i < size; i++) {
writeShort(out, supp[i].bs);
}
}
public static void writeCOMPOSITE(OutputStream out, Entry[] comp, int size)
throws IOException
{
writeShort(out, MAP_COMPOSITE);
writeShort(out, size*3);
// comp is sorted already
for (int i = 0; i < size; i++) {
writeShort(out, (char)comp[i].bs);
writeShort(out, (char)comp[i].cp);
writeShort(out, (char)comp[i].cp2);
}
}
}
/*
* Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package build.tools.charsetmapping;
import java.io.*;
import java.util.regex.*;
import static build.tools.charsetmapping.CharsetMapping.*;
public class GenerateMapping {
public static void main(String argv[]) throws IOException {
if (argv.length < 2) {
System.out.println("Usage: java GenCSData fMap fDat");
System.exit(1);
}
genDataJIS0213(new FileInputStream(argv[0]),
new FileOutputStream(argv[1]));
}
// regex pattern to parse the "jis0213.map" file
static Pattern sjis0213 = Pattern.compile("0x(\\p{XDigit}++)\\s++U\\+(\\p{XDigit}++)(?:\\+(\\p{XDigit}++))?\\s++#.*");
private static void genDataJIS0213(InputStream in, OutputStream out)
{
int[] sb = new int[0x100]; // singlebyte
int[] db = new int[0x10000]; // doublebyte
int[] indexC2B = new int[256];
Entry[] supp = new Entry[0x10000];
Entry[] comp = new Entry[0x100];
int suppTotal = 0;
int compTotal = 0;
int b1Min1 = 0x81;
int b1Max1 = 0x9f;
int b1Min2 = 0xe0;
int b1Max2 = 0xfc;
int b2Min = 0x40;
int b2Max = 0xfe;
//init
for (int i = 0; i < 0x80; i++) sb[i] = i;
for (int i = 0x80; i < 0x100; i++) sb[i] = UNMAPPABLE_DECODING;
for (int i = 0; i < 0x10000; i++) db[i] = UNMAPPABLE_DECODING;
try {
Parser p = new Parser(in, sjis0213);
Entry e = null;
while ((e = p.next()) != null) {
if (e.cp2 != 0) {
comp[compTotal++] = e;
} else {
if (e.cp <= 0xffff) {
if (e.bs <= 0xff)
sb[e.bs] = e.cp;
else
db[e.bs] = e.cp;
indexC2B[e.cp>>8] = 1;
} else {
supp[suppTotal++] = e;
}
}
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// c2b Index Table, always the first one
writeINDEXC2B(baos, indexC2B);
writeSINGLEBYTE(baos, sb);
writeDOUBLEBYTE1(baos, db, b1Min1, b1Max1, b2Min, b2Max);
writeDOUBLEBYTE2(baos, db, b1Min2, b1Max2, b2Min, b2Max);
writeSUPPLEMENT(baos, supp, suppTotal);
writeCOMPOSITE(baos, comp, compTotal);
writeSIZE(out, baos.size());
baos.writeTo(out);
out.close();
} catch (Exception x) {
x.printStackTrace();
}
}
}
/*
* Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.nio.cs;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.*;
import java.security.*;
public class CharsetMapping {
public final static char UNMAPPABLE_DECODING = '\uFFFD';
public final static int UNMAPPABLE_ENCODING = -1;
char[] b2cSB; //singlebyte b->c
char[] b2cDB1; //dobulebyte b->c /db1
char[] b2cDB2; //dobulebyte b->c /db2
int b2Min, b2Max; //min/max(start/end) value of 2nd byte
int b1MinDB1, b1MaxDB1; //min/Max(start/end) value of 1st byte/db1
int b1MinDB2, b1MaxDB2; //min/Max(start/end) value of 1st byte/db2
int dbSegSize;
char[] c2b;
char[] c2bIndex;
// Supplementary
char[] b2cSupp;
char[] c2bSupp;
// Composite
Entry[] b2cComp;
Entry[] c2bComp;
public char decodeSingle(int b) {
return b2cSB[b];
}
public char decodeDouble(int b1, int b2) {
if (b2 >= b2Min && b2 < b2Max) {
b2 -= b2Min;
if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) {
b1 -= b1MinDB1;
return b2cDB1[b1 * dbSegSize + b2];
}
if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) {
b1 -= b1MinDB2;
return b2cDB2[b1 * dbSegSize + b2];
}
}
return UNMAPPABLE_DECODING;
}
// for jis0213 all supplementary characters are in 0x2xxxx range,
// so only the xxxx part is now stored, should actually store the
// codepoint value instead.
public char[] decodeSurrogate(int db, char[] cc) {
int end = b2cSupp.length / 2;
int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db);
if (i >= 0) {
Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0);
return cc;
}
return null;
}
public char[] decodeComposite(Entry comp, char[] cc) {
int i = findBytes(b2cComp, comp);
if (i >= 0) {
cc[0] = (char)b2cComp[i].cp;
cc[1] = (char)b2cComp[i].cp2;
return cc;
}
return null;
}
public int encodeChar(char ch) {
int index = c2bIndex[ch >> 8];
if (index == 0xffff)
return UNMAPPABLE_ENCODING;
return c2b[index + (ch & 0xff)];
}
public int encodeSurrogate(char hi, char lo) {
char c = (char)Character.toCodePoint(hi, lo);
int end = c2bSupp.length / 2;
int i = Arrays.binarySearch(c2bSupp, 0, end, c);
if (i >= 0)
return c2bSupp[end + i];
return UNMAPPABLE_ENCODING;
}
public boolean isCompositeBase(Entry comp) {
if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) {
return (findCP(c2bComp, comp) >= 0);
}
return false;
}
public int encodeComposite(Entry comp) {
int i = findComp(c2bComp, comp);
if (i >= 0)
return c2bComp[i].bs;
return UNMAPPABLE_ENCODING;
}
// init the CharsetMapping object from the .dat binary file
public static CharsetMapping get(final InputStream is) {
return AccessController.doPrivileged(new PrivilegedAction<CharsetMapping>() {
public CharsetMapping run() {
return new CharsetMapping().load(is);
}
});
}
public static class Entry {
public int bs; //byte sequence reps
public int cp; //Unicode codepoint
public int cp2; //CC of composite
}
static Comparator<Entry> comparatorBytes =
new Comparator<Entry>() {
public int compare(Entry m1, Entry m2) {
return m1.bs - m2.bs;
}
public boolean equals(Object obj) {
return this == obj;
}
};
static Comparator<Entry> comparatorCP =
new Comparator<Entry>() {
public int compare(Entry m1, Entry m2) {
return m1.cp - m2.cp;
}
public boolean equals(Object obj) {
return this == obj;
}
};
static Comparator<Entry> comparatorComp =
new Comparator<Entry>() {
public int compare(Entry m1, Entry m2) {
int v = m1.cp - m2.cp;
if (v == 0)
v = m1.cp2 - m2.cp2;
return v;
}
public boolean equals(Object obj) {
return this == obj;
}
};
static int findBytes(Entry[] a, Entry k) {
return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes);
}
static int findCP(Entry[] a, Entry k) {
return Arrays.binarySearch(a, 0, a.length, k, comparatorCP);
}
static int findComp(Entry[] a, Entry k) {
return Arrays.binarySearch(a, 0, a.length, k, comparatorComp);
}
/*****************************************************************************/
// tags of different charset mapping tables
private final static int MAP_SINGLEBYTE = 0x1; // 0..256 : c
private final static int MAP_DOUBLEBYTE1 = 0x2; // min..max: c
private final static int MAP_DOUBLEBYTE2 = 0x3; // min..max: c [DB2]
private final static int MAP_SUPPLEMENT = 0x5; // db,c
private final static int MAP_SUPPLEMENT_C2B = 0x6; // c,db
private final static int MAP_COMPOSITE = 0x7; // db,base,cc
private final static int MAP_INDEXC2B = 0x8; // index table of c->bb
private static final boolean readNBytes(InputStream in, byte[] bb, int N)
throws IOException
{
int off = 0;
while (N > 0) {
int n = in.read(bb, off, N);
if (n == -1)
return false;
N = N - n;
off += n;
}
return true;
}
int off = 0;
byte[] bb;
private char[] readCharArray() {
// first 2 bytes are the number of "chars" stored in this table
int size = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
char [] cc = new char[size];
for (int i = 0; i < size; i++) {
cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff));
}
return cc;
}
void readSINGLEBYTE() {
char[] map = readCharArray();
for (int i = 0; i < map.length; i++) {
char c = map[i];
if (c != UNMAPPABLE_DECODING) {
c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i;
}
}
b2cSB = map;
}
void readINDEXC2B() {
char[] map = readCharArray();
for (int i = map.length - 1; i >= 0; i--) {
if (c2b == null && map[i] != -1) {
c2b = new char[map[i] + 256];
Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
break;
}
}
c2bIndex = map;
}
char[] readDB(int b1Min, int b2Min, int segSize) {
char[] map = readCharArray();
for (int i = 0; i < map.length; i++) {
char c = map[i];
if (c != UNMAPPABLE_DECODING) {
int b1 = i / segSize;
int b2 = i % segSize;
int b = (b1 + b1Min)* 256 + (b2 + b2Min);
//System.out.printf(" DB %x\t%x%n", b, c & 0xffff);
c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b);
}
}
return map;
}
void readDOUBLEBYTE1() {
b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
dbSegSize = b2Max - b2Min + 1;
b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize);
}
void readDOUBLEBYTE2() {
b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
dbSegSize = b2Max - b2Min + 1;
b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize);
}
void readCOMPOSITE() {
char[] map = readCharArray();
int mLen = map.length/3;
b2cComp = new Entry[mLen];
c2bComp = new Entry[mLen];
for (int i = 0, j= 0; i < mLen; i++) {
Entry m = new Entry();
m.bs = map[j++];
m.cp = map[j++];
m.cp2 = map[j++];
b2cComp[i] = m;
c2bComp[i] = m;
}
Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp);
}
CharsetMapping load(InputStream in) {
try {
// The first 4 bytes are the size of the total data followed in
// this .dat file.
int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) |
((in.read()&0xff) << 8) | (in.read()&0xff);
bb = new byte[len];
off = 0;
//System.out.printf("In : Total=%d%n", len);
// Read in all bytes
if (!readNBytes(in, bb, len))
throw new RuntimeException("Corrupted data file");
in.close();
while (off < len) {
int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
switch(type) {
case MAP_INDEXC2B:
readINDEXC2B();
break;
case MAP_SINGLEBYTE:
readSINGLEBYTE();
break;
case MAP_DOUBLEBYTE1:
readDOUBLEBYTE1();
break;
case MAP_DOUBLEBYTE2:
readDOUBLEBYTE2();
break;
case MAP_SUPPLEMENT:
b2cSupp = readCharArray();
break;
case MAP_SUPPLEMENT_C2B:
c2bSupp = readCharArray();
break;
case MAP_COMPOSITE:
readCOMPOSITE();
break;
default:
throw new RuntimeException("Corrupted data file");
}
}
bb = null;
return this;
} catch (IOException x) {
x.printStackTrace();
return null;
}
}
}
......@@ -23,9 +23,6 @@
* have any questions.
*/
/*
*/
package sun.nio.cs.ext;
import java.lang.ref.SoftReference;
......@@ -183,6 +180,25 @@ public class ExtendedCharsets
"csISO159JISX02121990"
});
charset("x-SJIS_0213", "SJIS_0213",
new String[] {
"sjis-0213",
"sjis_0213",
"sjis:2004",
"sjis_0213:2004",
"shift_jis_0213:2004",
"shift_jis:2004"
});
charset("x-MS932_0213", "MS932_0213",
new String[] {
"MS932-0213",
"MS932_0213",
"MS932:2004",
"windows-932-0213",
"windows-932:2004"
});
charset("EUC-JP", "EUC_JP",
new String[] {
"euc_jp", // JDK historical
......
/*
* Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.nio.cs.ext;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CharsetDecoder;
public class MS932_0213 extends Charset {
public MS932_0213() {
super("x-MS932_0213", ExtendedCharsets.aliasesFor("MS932_0213"));
}
public boolean contains(Charset cs) {
return ((cs.name().equals("US-ASCII"))
|| (cs instanceof MS932)
|| (cs instanceof MS932_0213));
}
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
protected static class Decoder extends SJIS_0213.Decoder {
MS932DB.Decoder decMS932;
protected Decoder(Charset cs) {
super(cs);
decMS932 = new MS932DB.Decoder(cs);
}
protected char decodeDouble(int b1, int b2) {
char c = decMS932.decodeDouble(b1, b2);
if (c == DoubleByteDecoder.REPLACE_CHAR)
return super.decodeDouble(b1, b2);
return c;
}
}
protected static class Encoder extends SJIS_0213.Encoder {
MS932DB.Encoder encMS932;
protected Encoder(Charset cs) {
super(cs);
encMS932 = new MS932DB.Encoder(cs);
}
protected int encodeChar(char ch) {
int db = encMS932.encodeDouble(ch);
if (db == 0)
return super.encodeChar(ch);
return db;
}
}
}
/*
* Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.nio.cs.ext;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.util.Arrays;
import sun.nio.cs.CharsetMapping;
/*
* 5 types of entry in SJIS_X_0213/Unicode mapping table
*
* (1)Single-Byte
* JIS_X_0213 does not define single-byte character itself, the
* JIS_X_0201 entries are added in for sjis implementation.
*
* (2)Double-Byte SJIS <-> BMP Unicode
* ex: 0x8140 U+3000 # IDEOGRAPHIC SPACE
*
* (3)Double-Byte SJIS <-> Supplementary
* ex: 0xFCF0 U+2A61A # <cjk> [2000] [Unicode3.1]
*
* (4)Double-Byte SJIS <-> Composite
* ex: 0x83F6 U+31F7+309A # [2000]
*
* (5)"Windows-only" special mapping entries
* are handled by MS932_0213.
*/
public class SJIS_0213 extends Charset {
public SJIS_0213() {
super("x-SJIS_0213", ExtendedCharsets.aliasesFor("SJIS_0213"));
}
public boolean contains(Charset cs) {
return ((cs.name().equals("US-ASCII"))
|| (cs instanceof SJIS)
|| (cs instanceof SJIS_0213));
}
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
static CharsetMapping mapping =
CharsetMapping.get(SJIS_0213.class.getResourceAsStream("sjis0213.dat"));
protected static class Decoder extends CharsetDecoder {
protected static final char UNMAPPABLE = CharsetMapping.UNMAPPABLE_DECODING;
protected Decoder(Charset cs) {
super(cs, 0.5f, 1.0f);
}
private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
byte[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
char[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
try {
while (sp < sl) {
int b1 = sa[sp] & 0xff;
char c = decodeSingle(b1);
int inSize = 1, outSize = 1;
char[] cc = null;
if (c == UNMAPPABLE) {
if (sl - sp < 2)
return CoderResult.UNDERFLOW;
int b2 = sa[sp + 1] & 0xff;
c = decodeDouble(b1, b2);
inSize++;
if (c == UNMAPPABLE) {
cc = decodeDoubleEx(b1, b2);
if (cc == null) {
if (decodeSingle(b2) == UNMAPPABLE)
return CoderResult.unmappableForLength(2);
else
return CoderResult.unmappableForLength(1);
}
outSize++;
}
}
if (dl - dp < outSize)
return CoderResult.OVERFLOW;
if (outSize == 2) {
da[dp++] = cc[0];
da[dp++] = cc[1];
} else {
da[dp++] = c;
}
sp += inSize;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
private CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
int mark = src.position();
try {
while (src.hasRemaining()) {
char[] cc = null;
int b1 = src.get() & 0xff;
char c = decodeSingle(b1);
int inSize = 1, outSize = 1;
if (c == UNMAPPABLE) {
if (src.remaining() < 1)
return CoderResult.UNDERFLOW;
int b2 = src.get() & 0xff;
inSize++;
c = decodeDouble(b1, b2);
if (c == UNMAPPABLE) {
cc = decodeDoubleEx(b1, b2);
if (cc == null) {
if (decodeSingle(b2) == UNMAPPABLE)
return CoderResult.unmappableForLength(2);
else
return CoderResult.unmappableForLength(1);
}
outSize++;
}
}
if (dst.remaining() < outSize)
return CoderResult.OVERFLOW;
if (outSize == 2) {
dst.put(cc[0]);
dst.put(cc[1]);
} else {
dst.put(c);
}
mark += inSize;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(mark);
}
}
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
if (src.hasArray() && dst.hasArray())
return decodeArrayLoop(src, dst);
else
return decodeBufferLoop(src, dst);
}
protected char decodeSingle(int b) {
return mapping.decodeSingle(b);
}
protected char decodeDouble(int b1, int b2) {
return mapping.decodeDouble(b1, b2);
}
private char[] cc = new char[2];
private CharsetMapping.Entry comp = new CharsetMapping.Entry();
protected char[] decodeDoubleEx(int b1, int b2) {
int db = (b1 << 8) | b2;
if (mapping.decodeSurrogate(db, cc) != null)
return cc;
comp.bs = db;
if (mapping.decodeComposite(comp, cc) != null)
return cc;
return null;
}
}
protected static class Encoder extends CharsetEncoder {
protected static final int UNMAPPABLE = CharsetMapping.UNMAPPABLE_ENCODING;
protected static final int MAX_SINGLEBYTE = 0xff;
protected Encoder(Charset cs) {
super(cs, 2.0f, 2.0f);
}
public boolean canEncode(char c) {
return (encodeChar(c) != UNMAPPABLE);
}
protected int encodeChar(char ch) {
return mapping.encodeChar(ch);
}
protected int encodeSurrogate(char hi, char lo) {
return mapping.encodeSurrogate(hi, lo);
}
private CharsetMapping.Entry comp = new CharsetMapping.Entry();
protected int encodeComposite(char base, char cc) {
comp.cp = base;
comp.cp2 = cc;
return mapping.encodeComposite(comp);
}
protected boolean isCompositeBase(char ch) {
comp.cp = ch;
return mapping.isCompositeBase(comp);
}
// Unlike surrogate pair, the base character of a base+cc composite
// itself is a legal codepoint in 0213, if we simply return UNDERFLOW
// when a base candidate is the last input char in the CharBuffer, like
// what we do for the surrogte pair, encoding will fail if this base
// character is indeed the last character of the input char sequence.
// Keep this base candidate in "leftoverBase" so we can flush it out
// at the end of the encoding circle.
char leftoverBase = 0;
protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
try {
while (sp < sl) {
int db;
char c = sa[sp];
if (leftoverBase != 0) {
boolean isComp = false;
db = encodeComposite(leftoverBase, c);
if (db == UNMAPPABLE)
db = encodeChar(leftoverBase);
else
isComp = true;
if (dl - dp < 2)
return CoderResult.OVERFLOW;
da[dp++] = (byte)(db >> 8);
da[dp++] = (byte)db;
leftoverBase = 0;
if (isComp) {
sp++;
continue;
}
}
if (isCompositeBase(c)) {
leftoverBase = c;
} else {
db = encodeChar(c);
if (db > MAX_SINGLEBYTE) { // DoubleByte
if (dl - dp < 2)
return CoderResult.OVERFLOW;
da[dp++] = (byte)(db >> 8);
da[dp++] = (byte)db;
} else if (db != UNMAPPABLE) { // SingleByte
if (dl <= dp)
return CoderResult.OVERFLOW;
da[dp++] = (byte)db;
} else if (Character.isHighSurrogate(c)) {
if ((sp + 1) == sl)
return CoderResult.UNDERFLOW;
char c2 = sa[sp + 1];
if (!Character.isLowSurrogate(c2))
return CoderResult.malformedForLength(1);
db = encodeSurrogate(c, c2);
if (db == UNMAPPABLE)
return CoderResult.unmappableForLength(2);
if (dl - dp < 2)
return CoderResult.OVERFLOW;
da[dp++] = (byte)(db >> 8);
da[dp++] = (byte)db;
sp++;
} else {
return CoderResult.unmappableForLength(1);
}
}
sp++;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
}
protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
int mark = src.position();
try {
while (src.hasRemaining()) {
int db;
char c = src.get();
if (leftoverBase != 0) {
boolean isComp = false;
db = encodeComposite(leftoverBase, c);
if (db == UNMAPPABLE)
db = encodeChar(leftoverBase);
else
isComp = true;
if (dst.remaining() < 2)
return CoderResult.OVERFLOW;
dst.put((byte)(db >> 8));
dst.put((byte)(db));
leftoverBase = 0;
if (isComp) {
mark++;
continue;
}
}
if (isCompositeBase(c)) {
leftoverBase = c;
} else {
db = encodeChar(c);
if (db > MAX_SINGLEBYTE) { // DoubleByte
if (dst.remaining() < 2)
return CoderResult.OVERFLOW;
dst.put((byte)(db >> 8));
dst.put((byte)(db));
} else if (db != UNMAPPABLE) { // Single-byte
if (dst.remaining() < 1)
return CoderResult.OVERFLOW;
dst.put((byte)db);
} else if (Character.isHighSurrogate(c)) {
if (!src.hasRemaining()) // Surrogates
return CoderResult.UNDERFLOW;
char c2 = src.get();
if (!Character.isLowSurrogate(c2))
return CoderResult.malformedForLength(1);
db = encodeSurrogate(c, c2);
if (db == UNMAPPABLE)
return CoderResult.unmappableForLength(2);
if (dst.remaining() < 2)
return CoderResult.OVERFLOW;
dst.put((byte)(db >> 8));
dst.put((byte)(db));
mark++;
} else {
return CoderResult.unmappableForLength(1);
}
}
mark++;
}
return CoderResult.UNDERFLOW;
} finally {
src.position(mark);
}
}
protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
if (src.hasArray() && dst.hasArray())
return encodeArrayLoop(src, dst);
else
return encodeBufferLoop(src, dst);
}
protected CoderResult implFlush(ByteBuffer dst) {
if (leftoverBase > 0) {
if (dst.remaining() < 2)
return CoderResult.OVERFLOW;
int db = encodeChar(leftoverBase);
dst.put((byte)(db >> 8));
dst.put((byte)(db));
leftoverBase = 0;
}
return CoderResult.UNDERFLOW;
}
protected void implReset() {
leftoverBase = 0;
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册