提交 96c93c69 编写于 作者: S sherman

6990617: Regular expression doesn't match if unicode character next to a digit.

Summary: updated RemoveQEQuotation() to deal with this case correctly
Reviewed-by: sherman
Contributed-by: stephen.flores@oracle.com
上级 057a6303
...@@ -1583,13 +1583,26 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) { ...@@ -1583,13 +1583,26 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
return; return;
int j = i; int j = i;
i += 2; i += 2;
int[] newtemp = new int[j + 2*(pLen-i) + 2]; int[] newtemp = new int[j + 3*(pLen-i) + 2];
System.arraycopy(temp, 0, newtemp, 0, j); System.arraycopy(temp, 0, newtemp, 0, j);
boolean inQuote = true; boolean inQuote = true;
boolean beginQuote = true;
while (i < pLen) { while (i < pLen) {
int c = temp[i++]; int c = temp[i++];
if (! ASCII.isAscii(c) || ASCII.isAlnum(c)) { if (!ASCII.isAscii(c) || ASCII.isAlpha(c)) {
newtemp[j++] = c;
} else if (ASCII.isDigit(c)) {
if (beginQuote) {
/*
* A unicode escape \[0xu] could be before this quote,
* and we don't want this numeric char to processed as
* part of the escape.
*/
newtemp[j++] = '\\';
newtemp[j++] = 'x';
newtemp[j++] = '3';
}
newtemp[j++] = c; newtemp[j++] = c;
} else if (c != '\\') { } else if (c != '\\') {
if (inQuote) newtemp[j++] = '\\'; if (inQuote) newtemp[j++] = '\\';
...@@ -1606,12 +1619,16 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) { ...@@ -1606,12 +1619,16 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
if (temp[i] == 'Q') { if (temp[i] == 'Q') {
i++; i++;
inQuote = true; inQuote = true;
beginQuote = true;
continue;
} else { } else {
newtemp[j++] = c; newtemp[j++] = c;
if (i != pLen) if (i != pLen)
newtemp[j++] = temp[i++]; newtemp[j++] = temp[i++];
} }
} }
beginQuote = false;
} }
patternLength = j; patternLength = j;
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 7014645 7039066 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
*/ */
import java.util.regex.*; import java.util.regex.*;
...@@ -50,6 +50,7 @@ public class RegExTest { ...@@ -50,6 +50,7 @@ public class RegExTest {
private static Random generator = new Random(); private static Random generator = new Random();
private static boolean failure = false; private static boolean failure = false;
private static int failCount = 0; private static int failCount = 0;
private static String firstFailure = null;
/** /**
* Main to interpret arguments and run several tests. * Main to interpret arguments and run several tests.
...@@ -133,15 +134,19 @@ public class RegExTest { ...@@ -133,15 +134,19 @@ public class RegExTest {
hitEndTest(); hitEndTest();
toMatchResultTest(); toMatchResultTest();
surrogatesInClassTest(); surrogatesInClassTest();
removeQEQuotingTest();
namedGroupCaptureTest(); namedGroupCaptureTest();
nonBmpClassComplementTest(); nonBmpClassComplementTest();
unicodePropertiesTest(); unicodePropertiesTest();
unicodeHexNotationTest(); unicodeHexNotationTest();
unicodeClassesTest(); unicodeClassesTest();
if (failure) if (failure) {
throw new RuntimeException("Failure in the RE handling."); throw new
else RuntimeException("RegExTest failed, 1st failure: " +
firstFailure);
} else {
System.err.println("OKAY: All tests passed."); System.err.println("OKAY: All tests passed.");
}
} }
// Utility functions // Utility functions
...@@ -215,8 +220,14 @@ public class RegExTest { ...@@ -215,8 +220,14 @@ public class RegExTest {
String paddedName = paddedNameBuffer.toString(); String paddedName = paddedNameBuffer.toString();
System.err.println(paddedName + ": " + System.err.println(paddedName + ": " +
(failCount==0 ? "Passed":"Failed("+failCount+")")); (failCount==0 ? "Passed":"Failed("+failCount+")"));
if (failCount > 0) if (failCount > 0) {
failure = true; failure = true;
if (firstFailure == null) {
firstFailure = testName;
}
}
failCount = 0; failCount = 0;
} }
...@@ -295,6 +306,22 @@ public class RegExTest { ...@@ -295,6 +306,22 @@ public class RegExTest {
Matcher matcher = pattern.matcher("\ud834\udd22"); Matcher matcher = pattern.matcher("\ud834\udd22");
if (!matcher.find()) if (!matcher.find())
failCount++; failCount++;
report("Surrogate pair in Unicode escape");
}
// This is for bug6990617
// Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
// char encoding is only 2 or 3 digits instead of 4 and the first quoted
// char is an octal digit.
private static void removeQEQuotingTest() throws Exception {
Pattern pattern =
Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
if (!matcher.find())
failCount++;
report("Remove Q/E Quoting");
} }
// This is for bug 4988891 // This is for bug 4988891
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册