提交 66be5ca3 编写于 作者: M martin

8010316: Improve handling of char sequences containing surrogates

Summary: Fix and optimize codePointAt, codePointBefore and similar methods
Reviewed-by: sherman, okutsu, ulfzibis, kizune
上级 639cbf93
...@@ -236,7 +236,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence { ...@@ -236,7 +236,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
if ((index < 0) || (index >= count)) { if ((index < 0) || (index >= count)) {
throw new StringIndexOutOfBoundsException(index); throw new StringIndexOutOfBoundsException(index);
} }
return Character.codePointAt(value, index); return Character.codePointAtImpl(value, index, count);
} }
/** /**
...@@ -265,7 +265,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence { ...@@ -265,7 +265,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
if ((i < 0) || (i >= count)) { if ((i < 0) || (i >= count)) {
throw new StringIndexOutOfBoundsException(index); throw new StringIndexOutOfBoundsException(index);
} }
return Character.codePointBefore(value, index); return Character.codePointBeforeImpl(value, index, 0);
} }
/** /**
...@@ -1370,32 +1370,37 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence { ...@@ -1370,32 +1370,37 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
* @return a reference to this object. * @return a reference to this object.
*/ */
public AbstractStringBuilder reverse() { public AbstractStringBuilder reverse() {
boolean hasSurrogate = false; boolean hasSurrogates = false;
int n = count - 1; int n = count - 1;
for (int j = (n-1) >> 1; j >= 0; --j) { for (int j = (n-1) >> 1; j >= 0; j--) {
char temp = value[j]; int k = n - j;
char temp2 = value[n - j]; char cj = value[j];
if (!hasSurrogate) { char ck = value[k];
hasSurrogate = (temp >= Character.MIN_SURROGATE && temp <= Character.MAX_SURROGATE) value[j] = ck;
|| (temp2 >= Character.MIN_SURROGATE && temp2 <= Character.MAX_SURROGATE); value[k] = cj;
if (Character.isSurrogate(cj) ||
Character.isSurrogate(ck)) {
hasSurrogates = true;
} }
value[j] = temp2;
value[n - j] = temp;
} }
if (hasSurrogate) { if (hasSurrogates) {
// Reverse back all valid surrogate pairs reverseAllValidSurrogatePairs();
for (int i = 0; i < count - 1; i++) { }
char c2 = value[i]; return this;
if (Character.isLowSurrogate(c2)) { }
char c1 = value[i + 1];
if (Character.isHighSurrogate(c1)) { /** Outlined helper method for reverse() */
value[i++] = c1; private void reverseAllValidSurrogatePairs() {
value[i] = c2; for (int i = 0; i < count - 1; i++) {
} char c2 = value[i];
if (Character.isLowSurrogate(c2)) {
char c1 = value[i + 1];
if (Character.isHighSurrogate(c1)) {
value[i++] = c1;
value[i] = c2;
} }
} }
} }
return this;
} }
/** /**
......
...@@ -4862,13 +4862,11 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -4862,13 +4862,11 @@ class Character implements java.io.Serializable, Comparable<Character> {
* @since 1.5 * @since 1.5
*/ */
public static int codePointAt(CharSequence seq, int index) { public static int codePointAt(CharSequence seq, int index) {
char c1 = seq.charAt(index++); char c1 = seq.charAt(index);
if (isHighSurrogate(c1)) { if (isHighSurrogate(c1) && ++index < seq.length()) {
if (index < seq.length()) { char c2 = seq.charAt(index);
char c2 = seq.charAt(index); if (isLowSurrogate(c2)) {
if (isLowSurrogate(c2)) { return toCodePoint(c1, c2);
return toCodePoint(c1, c2);
}
} }
} }
return c1; return c1;
...@@ -4931,15 +4929,13 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -4931,15 +4929,13 @@ class Character implements java.io.Serializable, Comparable<Character> {
return codePointAtImpl(a, index, limit); return codePointAtImpl(a, index, limit);
} }
// throws ArrayIndexOutofBoundsException if index out of bounds // throws ArrayIndexOutOfBoundsException if index out of bounds
static int codePointAtImpl(char[] a, int index, int limit) { static int codePointAtImpl(char[] a, int index, int limit) {
char c1 = a[index++]; char c1 = a[index];
if (isHighSurrogate(c1)) { if (isHighSurrogate(c1) && ++index < limit) {
if (index < limit) { char c2 = a[index];
char c2 = a[index]; if (isLowSurrogate(c2)) {
if (isLowSurrogate(c2)) { return toCodePoint(c1, c2);
return toCodePoint(c1, c2);
}
} }
} }
return c1; return c1;
...@@ -4968,12 +4964,10 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -4968,12 +4964,10 @@ class Character implements java.io.Serializable, Comparable<Character> {
*/ */
public static int codePointBefore(CharSequence seq, int index) { public static int codePointBefore(CharSequence seq, int index) {
char c2 = seq.charAt(--index); char c2 = seq.charAt(--index);
if (isLowSurrogate(c2)) { if (isLowSurrogate(c2) && index > 0) {
if (index > 0) { char c1 = seq.charAt(--index);
char c1 = seq.charAt(--index); if (isHighSurrogate(c1)) {
if (isHighSurrogate(c1)) { return toCodePoint(c1, c2);
return toCodePoint(c1, c2);
}
} }
} }
return c2; return c2;
...@@ -5038,15 +5032,13 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -5038,15 +5032,13 @@ class Character implements java.io.Serializable, Comparable<Character> {
return codePointBeforeImpl(a, index, start); return codePointBeforeImpl(a, index, start);
} }
// throws ArrayIndexOutofBoundsException if index-1 out of bounds // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
static int codePointBeforeImpl(char[] a, int index, int start) { static int codePointBeforeImpl(char[] a, int index, int start) {
char c2 = a[--index]; char c2 = a[--index];
if (isLowSurrogate(c2)) { if (isLowSurrogate(c2) && index > start) {
if (index > start) { char c1 = a[--index];
char c1 = a[--index]; if (isHighSurrogate(c1)) {
if (isHighSurrogate(c1)) { return toCodePoint(c1, c2);
return toCodePoint(c1, c2);
}
} }
} }
return c2; return c2;
......
...@@ -37,6 +37,7 @@ public class Supplementary { ...@@ -37,6 +37,7 @@ public class Supplementary {
test4(); // Test for appendCodePoint(int codePoint) test4(); // Test for appendCodePoint(int codePoint)
test5(); // Test for codePointCount(int beginIndex, int endIndex) test5(); // Test for codePointCount(int beginIndex, int endIndex)
test6(); // Test for offsetByCodePoints(int index, int offset) test6(); // Test for offsetByCodePoints(int index, int offset)
testDontReadOutOfBoundsTrailingSurrogate();
} }
/* Text strings which are used as input data. /* Text strings which are used as input data.
...@@ -305,6 +306,19 @@ public class Supplementary { ...@@ -305,6 +306,19 @@ public class Supplementary {
} }
} }
static void testDontReadOutOfBoundsTrailingSurrogate() {
StringBuilder sb = new StringBuilder();
int suppl = Character.MIN_SUPPLEMENTARY_CODE_POINT;
sb.appendCodePoint(suppl);
check(sb.codePointAt(0) != (int) suppl,
"codePointAt(0)", sb.codePointAt(0), suppl);
check(sb.length() != 2, "sb.length()");
sb.setLength(1);
check(sb.length() != 1, "sb.length()");
check(sb.codePointAt(0) != Character.highSurrogate(suppl),
"codePointAt(0)",
sb.codePointAt(0), Character.highSurrogate(suppl));
}
static final boolean At = true, Before = false; static final boolean At = true, Before = false;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册