提交 e926f38f 编写于 作者: M martin

6639458: Improvements to Surrogate.java

Summary: Optimize Surrogate.java
Reviewed-by: sherman
上级 621f22a7
...@@ -30,7 +30,6 @@ import java.nio.charset.CoderResult; ...@@ -30,7 +30,6 @@ import java.nio.charset.CoderResult;
import java.nio.charset.MalformedInputException; import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException; import java.nio.charset.UnmappableCharacterException;
/** /**
* Utility class for dealing with surrogates. * Utility class for dealing with surrogates.
* *
...@@ -41,19 +40,15 @@ public class Surrogate { ...@@ -41,19 +40,15 @@ public class Surrogate {
private Surrogate() { } private Surrogate() { }
// UTF-16 surrogate-character ranges // TODO: Deprecate/remove the following redundant definitions
// public static final char MIN_HIGH = Character.MIN_HIGH_SURROGATE;
public static final char MIN_HIGH = '\uD800'; public static final char MAX_HIGH = Character.MAX_HIGH_SURROGATE;
public static final char MAX_HIGH = '\uDBFF'; public static final char MIN_LOW = Character.MIN_LOW_SURROGATE;
public static final char MIN_LOW = '\uDC00'; public static final char MAX_LOW = Character.MAX_LOW_SURROGATE;
public static final char MAX_LOW = '\uDFFF'; public static final char MIN = Character.MIN_SURROGATE;
public static final char MIN = MIN_HIGH; public static final char MAX = Character.MAX_SURROGATE;
public static final char MAX = MAX_LOW; public static final int UCS4_MIN = Character.MIN_SUPPLEMENTARY_CODE_POINT;
public static final int UCS4_MAX = Character.MAX_CODE_POINT;
// Range of UCS-4 values that need surrogates in UTF-16
//
public static final int UCS4_MIN = 0x10000;
public static final int UCS4_MAX = (1 << 20) + UCS4_MIN - 1;
/** /**
* Tells whether or not the given UTF-16 value is a high surrogate. * Tells whether or not the given UTF-16 value is a high surrogate.
...@@ -76,36 +71,46 @@ public class Surrogate { ...@@ -76,36 +71,46 @@ public class Surrogate {
return (MIN <= c) && (c <= MAX); return (MIN <= c) && (c <= MAX);
} }
/**
* Tells whether or not the given UCS-4 character is in the Basic
* Multilingual Plane, and can be represented using a single char.
*/
public static boolean isBMP(int uc) {
return (int) (char) uc == uc;
}
/** /**
* Tells whether or not the given UCS-4 character must be represented as a * Tells whether or not the given UCS-4 character must be represented as a
* surrogate pair in UTF-16. * surrogate pair in UTF-16.
*/ */
public static boolean neededFor(int uc) { public static boolean neededFor(int uc) {
return (uc >= UCS4_MIN) && (uc <= UCS4_MAX); return Character.isSupplementaryCodePoint(uc);
} }
/** /**
* Returns the high UTF-16 surrogate for the given UCS-4 character. * Returns the high UTF-16 surrogate for the given UCS-4 character.
*/ */
public static char high(int uc) { public static char high(int uc) {
assert neededFor(uc); assert Character.isSupplementaryCodePoint(uc);
return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff)); return (char)((uc >> 10)
+ (Character.MIN_HIGH_SURROGATE
- (Character.MIN_SUPPLEMENTARY_CODE_POINT >> 10)));
} }
/** /**
* Returns the low UTF-16 surrogate for the given UCS-4 character. * Returns the low UTF-16 surrogate for the given UCS-4 character.
*/ */
public static char low(int uc) { public static char low(int uc) {
assert neededFor(uc); assert Character.isSupplementaryCodePoint(uc);
return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff)); return (char)((uc & 0x3ff) + Character.MIN_LOW_SURROGATE);
} }
/** /**
* Converts the given surrogate pair into a 32-bit UCS-4 character. * Converts the given surrogate pair into a 32-bit UCS-4 character.
*/ */
public static int toUCS4(char c, char d) { public static int toUCS4(char c, char d) {
assert isHigh(c) && isLow(d); assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d);
return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000; return Character.toCodePoint(c, d);
} }
/** /**
...@@ -178,14 +183,14 @@ public class Surrogate { ...@@ -178,14 +183,14 @@ public class Surrogate {
* object * object
*/ */
public int parse(char c, CharBuffer in) { public int parse(char c, CharBuffer in) {
if (Surrogate.isHigh(c)) { if (Character.isHighSurrogate(c)) {
if (!in.hasRemaining()) { if (!in.hasRemaining()) {
error = CoderResult.UNDERFLOW; error = CoderResult.UNDERFLOW;
return -1; return -1;
} }
char d = in.get(); char d = in.get();
if (Surrogate.isLow(d)) { if (Character.isLowSurrogate(d)) {
character = toUCS4(c, d); character = Character.toCodePoint(c, d);
isPair = true; isPair = true;
error = null; error = null;
return character; return character;
...@@ -193,7 +198,7 @@ public class Surrogate { ...@@ -193,7 +198,7 @@ public class Surrogate {
error = CoderResult.malformedForLength(1); error = CoderResult.malformedForLength(1);
return -1; return -1;
} }
if (Surrogate.isLow(c)) { if (Character.isLowSurrogate(c)) {
error = CoderResult.malformedForLength(1); error = CoderResult.malformedForLength(1);
return -1; return -1;
} }
...@@ -220,14 +225,14 @@ public class Surrogate { ...@@ -220,14 +225,14 @@ public class Surrogate {
*/ */
public int parse(char c, char[] ia, int ip, int il) { public int parse(char c, char[] ia, int ip, int il) {
assert (ia[ip] == c); assert (ia[ip] == c);
if (Surrogate.isHigh(c)) { if (Character.isHighSurrogate(c)) {
if (il - ip < 2) { if (il - ip < 2) {
error = CoderResult.UNDERFLOW; error = CoderResult.UNDERFLOW;
return -1; return -1;
} }
char d = ia[ip + 1]; char d = ia[ip + 1];
if (Surrogate.isLow(d)) { if (Character.isLowSurrogate(d)) {
character = toUCS4(c, d); character = Character.toCodePoint(c, d);
isPair = true; isPair = true;
error = null; error = null;
return character; return character;
...@@ -235,7 +240,7 @@ public class Surrogate { ...@@ -235,7 +240,7 @@ public class Surrogate {
error = CoderResult.malformedForLength(1); error = CoderResult.malformedForLength(1);
return -1; return -1;
} }
if (Surrogate.isLow(c)) { if (Character.isLowSurrogate(c)) {
error = CoderResult.malformedForLength(1); error = CoderResult.malformedForLength(1);
return -1; return -1;
} }
...@@ -282,7 +287,7 @@ public class Surrogate { ...@@ -282,7 +287,7 @@ public class Surrogate {
* error() will return a descriptive result object * error() will return a descriptive result object
*/ */
public int generate(int uc, int len, CharBuffer dst) { public int generate(int uc, int len, CharBuffer dst) {
if (uc <= 0xffff) { if (Surrogate.isBMP(uc)) {
if (Surrogate.is(uc)) { if (Surrogate.is(uc)) {
error = CoderResult.malformedForLength(len); error = CoderResult.malformedForLength(len);
return -1; return -1;
...@@ -294,12 +299,7 @@ public class Surrogate { ...@@ -294,12 +299,7 @@ public class Surrogate {
dst.put((char)uc); dst.put((char)uc);
error = null; error = null;
return 1; return 1;
} } else if (Character.isSupplementaryCodePoint(uc)) {
if (uc < Surrogate.UCS4_MIN) {
error = CoderResult.malformedForLength(len);
return -1;
}
if (uc <= Surrogate.UCS4_MAX) {
if (dst.remaining() < 2) { if (dst.remaining() < 2) {
error = CoderResult.OVERFLOW; error = CoderResult.OVERFLOW;
return -1; return -1;
...@@ -308,9 +308,10 @@ public class Surrogate { ...@@ -308,9 +308,10 @@ public class Surrogate {
dst.put(Surrogate.low(uc)); dst.put(Surrogate.low(uc));
error = null; error = null;
return 2; return 2;
} else {
error = CoderResult.unmappableForLength(len);
return -1;
} }
error = CoderResult.unmappableForLength(len);
return -1;
} }
/** /**
...@@ -330,7 +331,7 @@ public class Surrogate { ...@@ -330,7 +331,7 @@ public class Surrogate {
* error() will return a descriptive result object * error() will return a descriptive result object
*/ */
public int generate(int uc, int len, char[] da, int dp, int dl) { public int generate(int uc, int len, char[] da, int dp, int dl) {
if (uc <= 0xffff) { if (Surrogate.isBMP(uc)) {
if (Surrogate.is(uc)) { if (Surrogate.is(uc)) {
error = CoderResult.malformedForLength(len); error = CoderResult.malformedForLength(len);
return -1; return -1;
...@@ -342,12 +343,7 @@ public class Surrogate { ...@@ -342,12 +343,7 @@ public class Surrogate {
da[dp] = (char)uc; da[dp] = (char)uc;
error = null; error = null;
return 1; return 1;
} } else if (Character.isSupplementaryCodePoint(uc)) {
if (uc < Surrogate.UCS4_MIN) {
error = CoderResult.malformedForLength(len);
return -1;
}
if (uc <= Surrogate.UCS4_MAX) {
if (dl - dp < 2) { if (dl - dp < 2) {
error = CoderResult.OVERFLOW; error = CoderResult.OVERFLOW;
return -1; return -1;
...@@ -356,11 +352,11 @@ public class Surrogate { ...@@ -356,11 +352,11 @@ public class Surrogate {
da[dp + 1] = Surrogate.low(uc); da[dp + 1] = Surrogate.low(uc);
error = null; error = null;
return 2; return 2;
} else {
error = CoderResult.unmappableForLength(len);
return -1;
} }
error = CoderResult.unmappableForLength(len);
return -1;
} }
} }
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册