提交 beae1310 编写于 作者: S sherman

8007395: StringIndexOutofBoundsException in Match.find() when input String...

8007395: StringIndexOutofBoundsException in Match.find() when input String contains surrogate UTF-16 characters
Summary: updated GroupCurly.match0() to backtrack correctly
Reviewed-by: mchung
上级 0c4a5dce
...@@ -4334,7 +4334,6 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) { ...@@ -4334,7 +4334,6 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
info.deterministic = detm; info.deterministic = detm;
else else
info.deterministic = false; info.deterministic = false;
return next.study(info); return next.study(info);
} }
} }
...@@ -4415,6 +4414,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) { ...@@ -4415,6 +4414,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
} }
// Aggressive group match // Aggressive group match
boolean match0(Matcher matcher, int i, int j, CharSequence seq) { boolean match0(Matcher matcher, int i, int j, CharSequence seq) {
// don't back off passing the starting "j"
int min = j;
int[] groups = matcher.groups; int[] groups = matcher.groups;
int save0 = 0; int save0 = 0;
int save1 = 0; int save1 = 0;
...@@ -4452,7 +4453,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) { ...@@ -4452,7 +4453,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
break; break;
} }
} }
while (j > cmin) { while (j > min) {
if (next.match(matcher, i, seq)) { if (next.match(matcher, i, seq)) {
if (capture) { if (capture) {
groups[groupIndex+1] = i; groups[groupIndex+1] = i;
...@@ -4544,7 +4545,6 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) { ...@@ -4544,7 +4545,6 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
} else { } else {
info.deterministic = false; info.deterministic = false;
} }
return next.study(info); return next.study(info);
} }
} }
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
* 7067045 7014640 7189363 * 7067045 7014640 7189363 8007395
*/ */
import java.util.regex.*; import java.util.regex.*;
...@@ -144,6 +144,7 @@ public class RegExTest { ...@@ -144,6 +144,7 @@ public class RegExTest {
horizontalAndVerticalWSTest(); horizontalAndVerticalWSTest();
linebreakTest(); linebreakTest();
branchTest(); branchTest();
groupCurlyNotFoundSuppTest();
if (failure) { if (failure) {
throw new throw new
RuntimeException("RegExTest failed, 1st failure: " + RuntimeException("RegExTest failed, 1st failure: " +
...@@ -3947,4 +3948,27 @@ public class RegExTest { ...@@ -3947,4 +3948,27 @@ public class RegExTest {
report("branchTest"); report("branchTest");
} }
// This test is for 8007395
private static void groupCurlyNotFoundSuppTest() throws Exception {
String input = "test this as \ud83d\ude0d";
for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
"test(.)*(@[a-zA-Z.]+)",
"test([^B])+(@[a-zA-Z.]+)",
"test([^B])*(@[a-zA-Z.]+)",
"test(\\P{IsControl})+(@[a-zA-Z.]+)",
"test(\\P{IsControl})*(@[a-zA-Z.]+)",
}) {
Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
.matcher(input);
try {
if (m.find()) {
failCount++;
}
} catch (Exception x) {
failCount++;
}
}
report("GroupCurly NotFoundSupp");
}
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册