提交 a66c31ee 编写于 作者: S sherman

6636323: Optimize handling of builtin charsets

6636319: Encoders should implement isLegalReplacement(byte[] repl)
Summary: optimized new String(byte[], cs/csn) and String.getBytes(cs/csn) for speed and memory consumption in singlebyte case.
Reviewed-by: alanb
上级 319ca7a7
...@@ -220,6 +220,8 @@ FILES_src = \ ...@@ -220,6 +220,8 @@ FILES_src = \
sun/nio/ch/Util.java \ sun/nio/ch/Util.java \
\ \
sun/nio/cs/AbstractCharsetProvider.java \ sun/nio/cs/AbstractCharsetProvider.java \
sun/nio/cs/ArrayDecoder.java \
sun/nio/cs/ArrayEncoder.java \
sun/nio/cs/FastCharsetProvider.java \ sun/nio/cs/FastCharsetProvider.java \
sun/nio/cs/HistoricallyNamedCharset.java \ sun/nio/cs/HistoricallyNamedCharset.java \
sun/nio/cs/ISO_8859_1.java \ sun/nio/cs/ISO_8859_1.java \
......
/* /*
* Copyright 2000-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 2000-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -25,13 +25,10 @@ ...@@ -25,13 +25,10 @@
package java.lang; package java.lang;
import java.io.CharConversionException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.lang.ref.SoftReference; import java.lang.ref.SoftReference;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.CharBuffer; import java.nio.CharBuffer;
import java.nio.BufferOverflowException;
import java.nio.BufferUnderflowException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
...@@ -39,11 +36,12 @@ import java.nio.charset.CharacterCodingException; ...@@ -39,11 +36,12 @@ import java.nio.charset.CharacterCodingException;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnsupportedCharsetException; import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays; import java.util.Arrays;
import sun.misc.MessageUtils; import sun.misc.MessageUtils;
import sun.nio.cs.HistoricallyNamedCharset; import sun.nio.cs.HistoricallyNamedCharset;
import sun.nio.cs.ArrayDecoder;
import sun.nio.cs.ArrayEncoder;
/** /**
* Utility class for string encoding and decoding. * Utility class for string encoding and decoding.
...@@ -74,10 +72,8 @@ class StringCoding { ...@@ -74,10 +72,8 @@ class StringCoding {
// Trim the given byte array to the given length // Trim the given byte array to the given length
// //
private static byte[] safeTrim(byte[] ba, int len, Charset cs) { private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
if (len == ba.length if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
&& (System.getSecurityManager() == null
|| cs.getClass().getClassLoader0() == null))
return ba; return ba;
else else
return Arrays.copyOf(ba, len); return Arrays.copyOf(ba, len);
...@@ -85,10 +81,9 @@ class StringCoding { ...@@ -85,10 +81,9 @@ class StringCoding {
// Trim the given char array to the given length // Trim the given char array to the given length
// //
private static char[] safeTrim(char[] ca, int len, Charset cs) { private static char[] safeTrim(char[] ca, int len,
if (len == ca.length Charset cs, boolean isTrusted) {
&& (System.getSecurityManager() == null if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
|| cs.getClass().getClassLoader0() == null))
return ca; return ca;
else else
return Arrays.copyOf(ca, len); return Arrays.copyOf(ca, len);
...@@ -128,6 +123,7 @@ class StringCoding { ...@@ -128,6 +123,7 @@ class StringCoding {
private final String requestedCharsetName; private final String requestedCharsetName;
private final Charset cs; private final Charset cs;
private final CharsetDecoder cd; private final CharsetDecoder cd;
private final boolean isTrusted;
private StringDecoder(Charset cs, String rcn) { private StringDecoder(Charset cs, String rcn) {
this.requestedCharsetName = rcn; this.requestedCharsetName = rcn;
...@@ -135,6 +131,7 @@ class StringCoding { ...@@ -135,6 +131,7 @@ class StringCoding {
this.cd = cs.newDecoder() this.cd = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE); .onUnmappableCharacter(CodingErrorAction.REPLACE);
this.isTrusted = (cs.getClass().getClassLoader0() == null);
} }
String charsetName() { String charsetName() {
...@@ -152,24 +149,28 @@ class StringCoding { ...@@ -152,24 +149,28 @@ class StringCoding {
char[] ca = new char[en]; char[] ca = new char[en];
if (len == 0) if (len == 0)
return ca; return ca;
cd.reset(); if (cd instanceof ArrayDecoder) {
ByteBuffer bb = ByteBuffer.wrap(ba, off, len); int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
CharBuffer cb = CharBuffer.wrap(ca); return safeTrim(ca, clen, cs, isTrusted);
try { } else {
CoderResult cr = cd.decode(bb, cb, true); cd.reset();
if (!cr.isUnderflow()) ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
cr.throwException(); CharBuffer cb = CharBuffer.wrap(ca);
cr = cd.flush(cb); try {
if (!cr.isUnderflow()) CoderResult cr = cd.decode(bb, cb, true);
cr.throwException(); if (!cr.isUnderflow())
} catch (CharacterCodingException x) { cr.throwException();
// Substitution is always enabled, cr = cd.flush(cb);
// so this shouldn't happen if (!cr.isUnderflow())
throw new Error(x); cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return safeTrim(ca, cb.position(), cs, isTrusted);
} }
return safeTrim(ca, cb.position(), cs);
} }
} }
static char[] decode(String charsetName, byte[] ba, int off, int len) static char[] decode(String charsetName, byte[] ba, int off, int len)
...@@ -193,8 +194,57 @@ class StringCoding { ...@@ -193,8 +194,57 @@ class StringCoding {
} }
static char[] decode(Charset cs, byte[] ba, int off, int len) { static char[] decode(Charset cs, byte[] ba, int off, int len) {
StringDecoder sd = new StringDecoder(cs, cs.name()); // (1)We never cache the "external" cs, the only benefit of creating
return sd.decode(Arrays.copyOfRange(ba, off, off + len), 0, len); // an additional StringDe/Encoder object to wrap it is to share the
// de/encode() method. These SD/E objects are short-lifed, the young-gen
// gc should be able to take care of them well. But the best approash
// is still not to generate them if not really necessary.
// (2)The defensive copy of the input byte/char[] has a big performance
// impact, as well as the outgoing result byte/char[]. Need to do the
// optimization check of (sm==null && classLoader0==null) for both.
// (3)getClass().getClassLoader0() is expensive
// (4)There might be a timing gap in isTrusted setting. getClassLoader0()
// is only chcked (and then isTrusted gets set) when (SM==null). It is
// possible that the SM==null for now but then SM is NOT null later
// when safeTrim() is invoked...the "safe" way to do is to redundant
// check (... && (isTrusted || SM == null || getClassLoader0())) in trim
// but it then can be argued that the SM is null when the opertaion
// is started...
CharsetDecoder cd = cs.newDecoder();
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (len == 0)
return ca;
boolean isTrusted = false;
if (System.getSecurityManager() != null) {
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
ba = Arrays.copyOfRange(ba, off, off + len);
off = 0;
}
}
if (cd instanceof ArrayDecoder) {
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
return safeTrim(ca, clen, cs, isTrusted);
} else {
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.reset();
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return safeTrim(ca, cb.position(), cs, isTrusted);
}
} }
static char[] decode(byte[] ba, int off, int len) { static char[] decode(byte[] ba, int off, int len) {
...@@ -218,14 +268,12 @@ class StringCoding { ...@@ -218,14 +268,12 @@ class StringCoding {
} }
} }
// -- Encoding -- // -- Encoding --
private static class StringEncoder { private static class StringEncoder {
private Charset cs; private Charset cs;
private CharsetEncoder ce; private CharsetEncoder ce;
private final String requestedCharsetName; private final String requestedCharsetName;
private final boolean isTrusted;
private StringEncoder(Charset cs, String rcn) { private StringEncoder(Charset cs, String rcn) {
this.requestedCharsetName = rcn; this.requestedCharsetName = rcn;
...@@ -233,6 +281,7 @@ class StringCoding { ...@@ -233,6 +281,7 @@ class StringCoding {
this.ce = cs.newEncoder() this.ce = cs.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE); .onUnmappableCharacter(CodingErrorAction.REPLACE);
this.isTrusted = (cs.getClass().getClassLoader0() == null);
} }
String charsetName() { String charsetName() {
...@@ -250,23 +299,27 @@ class StringCoding { ...@@ -250,23 +299,27 @@ class StringCoding {
byte[] ba = new byte[en]; byte[] ba = new byte[en];
if (len == 0) if (len == 0)
return ba; return ba;
if (ce instanceof ArrayEncoder) {
ce.reset(); int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
ByteBuffer bb = ByteBuffer.wrap(ba); return safeTrim(ba, blen, cs, isTrusted);
CharBuffer cb = CharBuffer.wrap(ca, off, len); } else {
try { ce.reset();
CoderResult cr = ce.encode(cb, bb, true); ByteBuffer bb = ByteBuffer.wrap(ba);
if (!cr.isUnderflow()) CharBuffer cb = CharBuffer.wrap(ca, off, len);
cr.throwException(); try {
cr = ce.flush(bb); CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow()) if (!cr.isUnderflow())
cr.throwException(); cr.throwException();
} catch (CharacterCodingException x) { cr = ce.flush(bb);
// Substitution is always enabled, if (!cr.isUnderflow())
// so this shouldn't happen cr.throwException();
throw new Error(x); } catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return safeTrim(ba, bb.position(), cs, isTrusted);
} }
return safeTrim(ba, bb.position(), cs);
} }
} }
...@@ -291,8 +344,39 @@ class StringCoding { ...@@ -291,8 +344,39 @@ class StringCoding {
} }
static byte[] encode(Charset cs, char[] ca, int off, int len) { static byte[] encode(Charset cs, char[] ca, int off, int len) {
StringEncoder se = new StringEncoder(cs, cs.name()); CharsetEncoder ce = cs.newEncoder();
return se.encode(Arrays.copyOfRange(ca, off, off + len), 0, len); int en = scale(len, ce.maxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0)
return ba;
boolean isTrusted = false;
if (System.getSecurityManager() != null) {
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
ca = Arrays.copyOfRange(ca, off, off + len);
off = 0;
}
}
if (ce instanceof ArrayEncoder) {
int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
return safeTrim(ba, blen, cs, isTrusted);
} else {
ce.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.reset();
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, off, len);
try {
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
throw new Error(x);
}
return safeTrim(ba, bb.position(), cs, isTrusted);
}
} }
static byte[] encode(char[] ca, int off, int len) { static byte[] encode(char[] ca, int off, int len) {
......
/*
* Copyright 2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.nio.cs;
/*
* FastPath byte[]->char[] decoder, REPLACE on malformed or
* unmappable input.
*/
public interface ArrayDecoder {
int decode(byte[] src, int off, int len, char[] dst);
}
/*
* Copyright 2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.nio.cs;
/*
* FastPath char[]->byte[] encoder, REPLACE on malformed input or
* unmappable input.
*/
public interface ArrayEncoder {
int encode(char[] src, int off, int len, byte[] dst);
}
...@@ -23,9 +23,6 @@ ...@@ -23,9 +23,6 @@
* have any questions. * have any questions.
*/ */
/*
*/
package sun.nio.cs; package sun.nio.cs;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
...@@ -34,10 +31,7 @@ import java.nio.charset.Charset; ...@@ -34,10 +31,7 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import java.nio.charset.CharacterCodingException; import java.util.Arrays;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
class ISO_8859_1 class ISO_8859_1
extends Charset extends Charset
...@@ -65,8 +59,8 @@ class ISO_8859_1 ...@@ -65,8 +59,8 @@ class ISO_8859_1
return new Encoder(this); return new Encoder(this);
} }
private static class Decoder extends CharsetDecoder { private static class Decoder extends CharsetDecoder
implements ArrayDecoder {
private Decoder(Charset cs) { private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f); super(cs, 1.0f, 1.0f);
} }
...@@ -127,10 +121,18 @@ class ISO_8859_1 ...@@ -127,10 +121,18 @@ class ISO_8859_1
return decodeBufferLoop(src, dst); return decodeBufferLoop(src, dst);
} }
public int decode(byte[] src, int sp, int len, char[] dst) {
if (len > dst.length)
len = dst.length;
int dp = 0;
while (dp < len)
dst[dp++] = (char)(src[sp++] & 0xff);
return dp;
}
} }
private static class Encoder extends CharsetEncoder { private static class Encoder extends CharsetEncoder
implements ArrayEncoder {
private Encoder(Charset cs) { private Encoder(Charset cs) {
super(cs, 1.0f, 1.0f); super(cs, 1.0f, 1.0f);
} }
...@@ -139,6 +141,10 @@ class ISO_8859_1 ...@@ -139,6 +141,10 @@ class ISO_8859_1
return c <= '\u00FF'; return c <= '\u00FF';
} }
public boolean isLegalReplacement(byte[] repl) {
return (repl.length == 1); // we accept any byte value
}
private final Surrogate.Parser sgp = new Surrogate.Parser(); private final Surrogate.Parser sgp = new Surrogate.Parser();
private CoderResult encodeArrayLoop(CharBuffer src, private CoderResult encodeArrayLoop(CharBuffer src,
...@@ -208,5 +214,31 @@ class ISO_8859_1 ...@@ -208,5 +214,31 @@ class ISO_8859_1
return encodeBufferLoop(src, dst); return encodeBufferLoop(src, dst);
} }
private byte repl = (byte)'?';
protected void implReplaceWith(byte[] newReplacement) {
repl = newReplacement[0];
}
public int encode(char[] src, int sp, int len, byte[] dst) {
int dp = 0;
int sl = sp + Math.min(len, dst.length);
while (sp < sl) {
char c = src[sp++];
if (c <= '\u00FF') {
dst[dp++] = (byte)c;
continue;
}
if (Surrogate.isHigh(c) && sp < sl &&
Surrogate.isLow(src[sp])) {
if (len > dst.length) {
sl++;
len--;
}
sp++;
}
dst[dp++] = repl;
}
return dp;
}
} }
} }
...@@ -32,6 +32,7 @@ import java.nio.charset.Charset; ...@@ -32,6 +32,7 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import java.util.Arrays;
import static sun.nio.cs.CharsetMapping.*; import static sun.nio.cs.CharsetMapping.*;
public class SingleByte public class SingleByte
...@@ -45,7 +46,8 @@ public class SingleByte ...@@ -45,7 +46,8 @@ public class SingleByte
return cr; return cr;
} }
public static class Decoder extends CharsetDecoder { final public static class Decoder extends CharsetDecoder
implements ArrayDecoder {
private final char[] b2c; private final char[] b2c;
public Decoder(Charset cs, char[] b2c) { public Decoder(Charset cs, char[] b2c) {
...@@ -108,9 +110,29 @@ public class SingleByte ...@@ -108,9 +110,29 @@ public class SingleByte
private final char decode(int b) { private final char decode(int b) {
return b2c[b + 128]; return b2c[b + 128];
} }
private char repl = '\uFFFD';
protected void implReplaceWith(String newReplacement) {
repl = newReplacement.charAt(0);
}
public int decode(byte[] src, int sp, int len, char[] dst) {
if (len > dst.length)
len = dst.length;
int dp = 0;
while (dp < len) {
dst[dp] = decode(src[sp++]);
if (dst[dp] == UNMAPPABLE_DECODING) {
dst[dp] = repl;
}
dp++;
}
return dp;
}
} }
public static class Encoder extends CharsetEncoder { final public static class Encoder extends CharsetEncoder
implements ArrayEncoder {
private Surrogate.Parser sgp; private Surrogate.Parser sgp;
private final char[] c2b; private final char[] c2b;
private final char[] c2bIndex; private final char[] c2bIndex;
...@@ -125,6 +147,11 @@ public class SingleByte ...@@ -125,6 +147,11 @@ public class SingleByte
return encode(c) != UNMAPPABLE_ENCODING; return encode(c) != UNMAPPABLE_ENCODING;
} }
public boolean isLegalReplacement(byte[] repl) {
return ((repl.length == 1 && repl[0] == (byte)'?') ||
super.isLegalReplacement(repl));
}
private CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { private CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
char[] sa = src.array(); char[] sa = src.array();
int sp = src.arrayOffset() + src.position(); int sp = src.arrayOffset() + src.position();
...@@ -200,6 +227,34 @@ public class SingleByte ...@@ -200,6 +227,34 @@ public class SingleByte
return UNMAPPABLE_ENCODING; return UNMAPPABLE_ENCODING;
return c2b[index + (ch & 0xff)]; return c2b[index + (ch & 0xff)];
} }
private byte repl = (byte)'?';
protected void implReplaceWith(byte[] newReplacement) {
repl = newReplacement[0];
}
public int encode(char[] src, int sp, int len, byte[] dst) {
int dp = 0;
int sl = sp + Math.min(len, dst.length);
while (sp < sl) {
char c = src[sp++];
int b = encode(c);
if (b != UNMAPPABLE_ENCODING) {
dst[dp++] = (byte)b;
continue;
}
if (Surrogate.isHigh(c) && sp < sl &&
Surrogate.isLow(src[sp])) {
if (len > dst.length) {
sl++;
len--;
}
sp++;
}
dst[dp++] = repl;
}
return dp;
}
} }
// init the c2b and c2bIndex tables from b2c. // init the c2b and c2bIndex tables from b2c.
......
...@@ -31,10 +31,7 @@ import java.nio.charset.Charset; ...@@ -31,10 +31,7 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import java.nio.charset.CharacterCodingException; import java.util.Arrays;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
public class US_ASCII public class US_ASCII
extends Charset extends Charset
...@@ -61,7 +58,8 @@ public class US_ASCII ...@@ -61,7 +58,8 @@ public class US_ASCII
return new Encoder(this); return new Encoder(this);
} }
private static class Decoder extends CharsetDecoder { private static class Decoder extends CharsetDecoder
implements ArrayDecoder {
private Decoder(Charset cs) { private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f); super(cs, 1.0f, 1.0f);
...@@ -131,9 +129,27 @@ public class US_ASCII ...@@ -131,9 +129,27 @@ public class US_ASCII
return decodeBufferLoop(src, dst); return decodeBufferLoop(src, dst);
} }
private char repl = '\uFFFD';
protected void implReplaceWith(String newReplacement) {
repl = newReplacement.charAt(0);
}
public int decode(byte[] src, int sp, int len, char[] dst) {
int dp = 0;
len = Math.min(len, dst.length);
while (dp < len) {
byte b = src[sp++];
if (b >= 0)
dst[dp++] = (char)b;
else
dst[dp++] = repl;
}
return dp;
}
} }
private static class Encoder extends CharsetEncoder { private static class Encoder extends CharsetEncoder
implements ArrayEncoder {
private Encoder(Charset cs) { private Encoder(Charset cs) {
super(cs, 1.0f, 1.0f); super(cs, 1.0f, 1.0f);
...@@ -143,8 +159,11 @@ public class US_ASCII ...@@ -143,8 +159,11 @@ public class US_ASCII
return c < 0x80; return c < 0x80;
} }
private final Surrogate.Parser sgp = new Surrogate.Parser(); public boolean isLegalReplacement(byte[] repl) {
return (repl.length == 1 && repl[0] >= 0);
}
private final Surrogate.Parser sgp = new Surrogate.Parser();
private CoderResult encodeArrayLoop(CharBuffer src, private CoderResult encodeArrayLoop(CharBuffer src,
ByteBuffer dst) ByteBuffer dst)
{ {
...@@ -213,6 +232,32 @@ public class US_ASCII ...@@ -213,6 +232,32 @@ public class US_ASCII
return encodeBufferLoop(src, dst); return encodeBufferLoop(src, dst);
} }
private byte repl = (byte)'?';
protected void implReplaceWith(byte[] newReplacement) {
repl = newReplacement[0];
}
public int encode(char[] src, int sp, int len, byte[] dst) {
int dp = 0;
int sl = sp + Math.min(len, dst.length);
while (sp < sl) {
char c = src[sp++];
if (c < 0x80) {
dst[dp++] = (byte)c;
continue;
}
if (Surrogate.isHigh(c) && sp < sl &&
Surrogate.isLow(src[sp])) {
if (len > dst.length) {
sl++;
len--;
}
sp++;
}
dst[dp++] = repl;
}
return dp;
}
} }
} }
...@@ -526,4 +526,3 @@ public class FindEncoderBugs { ...@@ -526,4 +526,3 @@ public class FindEncoderBugs {
System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed); System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed);
if (failed > 0) throw new AssertionError("Some tests failed");} if (failed > 0) throw new AssertionError("Some tests failed");}
} }
/*
* Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
import java.util.*;
import java.nio.*;
import java.nio.charset.*;
import java.util.concurrent.*;
import java.util.regex.Pattern;
/**
* Usage: java StringCodingBenchmark
* [-Diterations=N] [-Dsize=N] [-Dsubsize=N] [-Dmaxchar=N]
* [-Dfilter=REGEXP] [-DSecurityManager=true]
*/
public class StrCodingBenchmark {
abstract static class Job {
private final String name;
public Job(String name) { this.name = name; }
public String name() { return name; }
public abstract void work() throws Throwable;
}
private static void collectAllGarbage() {
final java.util.concurrent.CountDownLatch drained
= new java.util.concurrent.CountDownLatch(1);
try {
System.gc(); // enqueue finalizable objects
new Object() { protected void finalize() {
drained.countDown(); }};
System.gc(); // enqueue detector
drained.await(); // wait for finalizer queue to drain
System.gc(); // cleanup finalized objects
} catch (InterruptedException e) { throw new Error(e); }
}
/**
* Runs each job for long enough that all the runtime compilers
* have had plenty of time to warm up, i.e. get around to
* compiling everything worth compiling.
* Returns array of average times per job per run.
*/
public static long[] time0(Job ... jobs) throws Throwable {
//final long warmupNanos = 10L * 1000L * 1000L * 1000L;
final long warmupNanos = 100L * 100L;
long[] nanoss = new long[jobs.length];
for (int i = 0; i < jobs.length; i++) {
collectAllGarbage();
long t0 = System.nanoTime();
long t;
int j = 0;
do { jobs[i].work(); j++; }
while ((t = System.nanoTime() - t0) < warmupNanos);
nanoss[i] = t/j;
}
return nanoss;
}
public static void time(Job ... jobs) throws Throwable {
long[] warmup = time0(jobs); // Warm up run
long[] nanoss = time0(jobs); // Real timing run
long[] milliss = new long[jobs.length];
double[] ratios = new double[jobs.length];
final String nameHeader = "Method";
final String millisHeader = "Millis";
final String ratioHeader = "Ratio";
int nameWidth = nameHeader.length();
int millisWidth = millisHeader.length();
int ratioWidth = ratioHeader.length();
for (int i = 0; i < jobs.length; i++) {
nameWidth = Math.max(nameWidth, jobs[i].name().length());
milliss[i] = nanoss[i]/(1000L * 1000L);
millisWidth = Math.max(millisWidth,
String.format("%d", milliss[i]).length());
ratios[i] = (double) nanoss[i] / (double) nanoss[0];
ratioWidth = Math.max(ratioWidth,
String.format("%.3f", ratios[i]).length());
}
String format = String.format("%%-%ds %%%dd %n",
nameWidth, millisWidth);
String headerFormat = String.format("%%-%ds %%%ds%n",
nameWidth, millisWidth);
System.out.printf(headerFormat, "Method", "Millis");
// Print out absolute and relative times, calibrated against first job
for (int i = 0; i < jobs.length; i++)
System.out.printf(format, jobs[i].name(), milliss[i], ratios[i]);
}
public static Job[] filter(Pattern filter, Job[] jobs) {
if (filter == null) return jobs;
Job[] newJobs = new Job[jobs.length];
int n = 0;
for (Job job : jobs)
if (filter.matcher(job.name()).find())
newJobs[n++] = job;
// Arrays.copyOf not available in JDK 5
Job[] ret = new Job[n];
System.arraycopy(newJobs, 0, ret, 0, n);
return ret;
}
static class PermissiveSecurityManger extends SecurityManager {
@Override public void checkPermission(java.security.Permission p) {
}
}
public static void main(String[] args) throws Throwable {
final int itrs = Integer.getInteger("iterations", 100000);
final int size = Integer.getInteger("size", 2048);
final int subsize = Integer.getInteger("subsize", 128);
final int maxchar = Integer.getInteger("maxchar", 128);
final String regex = System.getProperty("filter");
final Pattern filter = (regex == null) ? null : Pattern.compile(regex);
final boolean useSecurityManager = Boolean.getBoolean("SecurityManager");
if (useSecurityManager)
System.setSecurityManager(new PermissiveSecurityManger());
final Random rnd = new Random();
for (Charset charset: Charset.availableCharsets().values()) {
if (!("ISO-8859-1".equals(charset.name()) ||
"US-ASCII".equals(charset.name()) ||
charset.newDecoder() instanceof sun.nio.cs.SingleByte.Decoder))
continue;
final String csn = charset.name();
final Charset cs = charset;
final StringBuilder sb = new StringBuilder();
{
final CharsetEncoder enc = cs.newEncoder();
for (int i = 0; i < size; ) {
char c = (char) rnd.nextInt(maxchar);
if (enc.canEncode(c)) {
sb.append(c);
i++;
}
}
}
final String string = sb.toString();
final byte[] bytes = string.getBytes(cs);
System.out.printf("%n--------%s---------%n", csn);
for (int sz = 4; sz <= 2048; sz *= 2) {
System.out.printf(" [len=%d]%n", sz);
final byte[] bs = Arrays.copyOf(bytes, sz);
final String str = new String(bs, csn);
Job[] jobs = {
new Job("String decode: csn") {
public void work() throws Throwable {
for (int i = 0; i < itrs; i++)
new String(bs, csn);
}},
new Job("String decode: cs") {
public void work() throws Throwable {
for (int i = 0; i < itrs; i++)
new String(bs, cs);
}},
new Job("String encode: csn") {
public void work() throws Throwable {
for (int i = 0; i < itrs; i++)
str.getBytes(csn);
}},
new Job("String encode: cs") {
public void work() throws Throwable {
for (int i = 0; i < itrs; i++)
str.getBytes(cs);
}},
};
time(filter(filter, jobs));
}
}
}
}
/*
* Copyright 2000-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/* @test
@bug 6636323 6636319
@summary Test if StringCoding and NIO result have the same de/encoding result
* @run main/timeout=2000 TestStringCoding
*/
import java.util.*;
import java.nio.*;
import java.nio.charset.*;
public class TestStringCoding {
public static void main(String[] args) throws Throwable {
for (Boolean hasSM: new boolean[] { false, true }) {
if (hasSM)
System.setSecurityManager(new PermissiveSecurityManger());
for (Charset cs: Charset.availableCharsets().values()) {
if ("ISO-2022-CN".equals(cs.name()) ||
"x-COMPOUND_TEXT".equals(cs.name()) ||
"x-JISAutoDetect".equals(cs.name()))
continue;
System.out.printf("Testing(sm=%b) " + cs.name() + "....", hasSM);
// full bmp first
char[] bmpCA = new char[0x10000];
for (int i = 0; i < 0x10000; i++) {
bmpCA[i] = (char)i;
}
byte[] sbBA = new byte[0x100];
for (int i = 0; i < 0x100; i++) {
sbBA[i] = (byte)i;
}
test(cs, bmpCA, sbBA);
// "randomed" sizes
Random rnd = new Random();
for (int i = 0; i < 10; i++) {
int clen = rnd.nextInt(0x10000);
int blen = rnd.nextInt(0x100);
//System.out.printf(" blen=%d, clen=%d%n", blen, clen);
test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen));
//add a pair of surrogates
int pos = clen / 2;
if ((pos + 1) < blen) {
bmpCA[pos] = '\uD800';
bmpCA[pos+1] = '\uDC00';
}
test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen));
}
System.out.println("done!");
}
}
}
static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable {
String bmpStr = new String(bmpCA);
CharsetDecoder dec = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
CharsetEncoder enc = cs.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
//getBytes(csn);
byte[] baSC = bmpStr.getBytes(cs.name());
ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
byte[] baNIO = new byte[bf.limit()];
bf.get(baNIO, 0, baNIO.length);
if (!Arrays.equals(baSC, baNIO))
throw new RuntimeException("getBytes(csn) failed -> " + cs.name());
//getBytes(cs);
baSC = bmpStr.getBytes(cs);
if (!Arrays.equals(baSC, baNIO))
throw new RuntimeException("getBytes(cs) failed -> " + cs.name());
//new String(csn);
String strSC = new String(sbBA, cs.name());
String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString();
if(!strNIO.equals(strSC))
throw new RuntimeException("new String(csn) failed -> " + cs.name());
//new String(cs);
strSC = new String(sbBA, cs);
if (!strNIO.equals(strSC))
throw new RuntimeException("new String(cs) failed -> " + cs.name());
//encode unmappable surrogates
if (enc instanceof sun.nio.cs.ArrayEncoder &&
cs.contains(Charset.forName("ASCII"))) {
enc.replaceWith(new byte[] { (byte)'A'});
sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
String str = "ab\uD800\uDC00\uD800\uDC00cd";
byte[] ba = new byte[str.length() - 2];
int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
throw new RuntimeException("encode1(surrogates) failed -> "
+ cs.name());
ba = new byte[str.length()];
n = cae.encode(str.toCharArray(), 0, str.length(), ba);
if (n != 6 || !"abAAcd".equals(new String(ba, 0, n,
cs.name())))
throw new RuntimeException("encode2(surrogates) failed -> "
+ cs.name());
str = "ab\uD800B\uDC00Bcd";
ba = new byte[str.length()];
n = cae.encode(str.toCharArray(), 0, str.length(), ba);
if (n != 8 || !"abABABcd".equals(new String(ba, 0, n,
cs.name())))
throw new RuntimeException("encode3(surrogates) failed -> "
+ cs.name());
ba = new byte[str.length() - 1];
n = cae.encode(str.toCharArray(), 0, str.length(), ba);
if (n != 7 || !"abABABc".equals(new String(ba, 0, n,
cs.name())))
throw new RuntimeException("encode4(surrogates) failed -> "
+ cs.name());
}
}
static class PermissiveSecurityManger extends SecurityManager {
@Override public void checkPermission(java.security.Permission p) {}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册