提交 fa768406 编写于 作者: S sherman

6328855: String: Matches hangs at short and easy Strings containing \r \n

6192895: java.util.regex.Matcher: Performance issue
6345469: java.util.regex.Matcher utilizes 100% of the CPU
6988218: RegEx matcher loops
6693451: RegEx matcher goes into infinite delay
7006761: Matcher.matches() has infinite loop
8140212: Slow performance of Matcher.find
8151481: j.u.regex.Pattern cleanup
6609854: Regex does not match correctly for negative nested character classes
4916384: CANON_EQ supports only combining character sequences with non-spacing marks
4867170: Pattern doesn't work with composite character in CANON_EQ mode
6995635: CANON_EQ pattern flag is buggy
6728861: ExceptionInInitializerError is caught when the pattern has precomposed character
6736245: A character in Composition Exclusion Table does not match itself
7080302: the normalization in java regex pattern may have flaw
Reviewed-by: rriggs, okutsu, alanb
上级 2c8a26b2
......@@ -139,8 +139,6 @@ public class ProtectionDomain {
*/
final Key key = new Key();
private static final Debug debug = Debug.getInstance("domain");
/**
* Creates a new ProtectionDomain with the given CodeSource and
* Permissions. If the permissions object is not null, then
......@@ -338,6 +336,13 @@ public class ProtectionDomain {
" "+pc+"\n";
}
/*
* holder class for the static field "debug" to delay its initialization
*/
private static class DebugHolder {
private static final Debug debug = Debug.getInstance("domain");
}
/**
* Return true (merge policy permissions) in the following cases:
*
......@@ -359,7 +364,7 @@ public class ProtectionDomain {
if (sm == null) {
return true;
} else {
if (debug != null) {
if (DebugHolder.debug != null) {
if (sm.getClass().getClassLoader() == null &&
Policy.getPolicyNoCheck().getClass().getClassLoader()
== null) {
......
......@@ -62,8 +62,6 @@ public class SecureClassLoader extends ClassLoader {
private final Map<CodeSourceKey, ProtectionDomain> pdcache
= new ConcurrentHashMap<>(11);
private static final Debug debug = Debug.getInstance("scl");
static {
ClassLoader.registerAsParallelCapable();
}
......@@ -202,6 +200,13 @@ public class SecureClassLoader extends ClassLoader {
return new Permissions(); // ProtectionDomain defers the binding
}
/*
* holder class for the static field "debug" to delay its initialization
*/
private static class DebugHolder {
private static final Debug debug = Debug.getInstance("scl");
}
/*
* Returned cached ProtectionDomain for the specified CodeSource.
*/
......@@ -222,9 +227,9 @@ public class SecureClassLoader extends ClassLoader {
= SecureClassLoader.this.getPermissions(cs);
ProtectionDomain pd = new ProtectionDomain(
cs, perms, SecureClassLoader.this, null);
if (debug != null) {
debug.println(" getPermissions " + pd);
debug.println("");
if (DebugHolder.debug != null) {
DebugHolder.debug.println(" getPermissions " + pd);
DebugHolder.debug.println("");
}
return pd;
}
......
/*
* Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
import java.util.HashMap;
import java.util.Locale;
import java.util.regex.Pattern.CharPredicate;
import java.util.regex.Pattern.BmpCharPredicate;
class CharPredicates {
static final CharPredicate ALPHABETIC = Character::isAlphabetic;
// \p{gc=Decimal_Number}
static final CharPredicate DIGIT = Character::isDigit;
static final CharPredicate LETTER = Character::isLetter;
static final CharPredicate IDEOGRAPHIC = Character::isIdeographic;
static final CharPredicate LOWERCASE = Character::isLowerCase;
static final CharPredicate UPPERCASE = Character::isUpperCase;
static final CharPredicate TITLECASE = Character::isTitleCase;
// \p{Whitespace}
static final CharPredicate WHITE_SPACE = ch ->
((((1 << Character.SPACE_SEPARATOR) |
(1 << Character.LINE_SEPARATOR) |
(1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
!= 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
// \p{gc=Control}
static final CharPredicate CONTROL = ch ->
Character.getType(ch) == Character.CONTROL;
// \p{gc=Punctuation}
static final CharPredicate PUNCTUATION = ch ->
((((1 << Character.CONNECTOR_PUNCTUATION) |
(1 << Character.DASH_PUNCTUATION) |
(1 << Character.START_PUNCTUATION) |
(1 << Character.END_PUNCTUATION) |
(1 << Character.OTHER_PUNCTUATION) |
(1 << Character.INITIAL_QUOTE_PUNCTUATION) |
(1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
!= 0;
// \p{gc=Decimal_Number}
// \p{Hex_Digit} -> PropList.txt: Hex_Digit
static final CharPredicate HEX_DIGIT = DIGIT.union(
ch -> (ch >= 0x0030 && ch <= 0x0039) ||
(ch >= 0x0041 && ch <= 0x0046) ||
(ch >= 0x0061 && ch <= 0x0066) ||
(ch >= 0xFF10 && ch <= 0xFF19) ||
(ch >= 0xFF21 && ch <= 0xFF26) ||
(ch >= 0xFF41 && ch <= 0xFF46));
static final CharPredicate ASSIGNED = ch ->
Character.getType(ch) != Character.UNASSIGNED;
// PropList.txt:Noncharacter_Code_Point
static final CharPredicate NONCHARACTER_CODE_POINT = ch ->
(ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
// \p{alpha}
// \p{digit}
static final CharPredicate ALNUM = ALPHABETIC.union(DIGIT);
// \p{Whitespace} --
// [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85
// \p{gc=Line_Separator}
// \p{gc=Paragraph_Separator}]
static final CharPredicate BLANK = ch ->
Character.getType(ch) == Character.SPACE_SEPARATOR ||
ch == 0x9; // \N{HT}
// [^
// \p{space}
// \p{gc=Control}
// \p{gc=Surrogate}
// \p{gc=Unassigned}]
static final CharPredicate GRAPH = ch ->
((((1 << Character.SPACE_SEPARATOR) |
(1 << Character.LINE_SEPARATOR) |
(1 << Character.PARAGRAPH_SEPARATOR) |
(1 << Character.CONTROL) |
(1 << Character.SURROGATE) |
(1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
== 0;
// \p{graph}
// \p{blank}
// -- \p{cntrl}
static final CharPredicate PRINT = GRAPH.union(BLANK).and(CONTROL.negate());
// 200C..200D PropList.txt:Join_Control
static final CharPredicate JOIN_CONTROL = ch -> ch == 0x200C || ch == 0x200D;
// \p{alpha}
// \p{gc=Mark}
// \p{digit}
// \p{gc=Connector_Punctuation}
// \p{Join_Control} 200C..200D
static final CharPredicate WORD =
ALPHABETIC.union(ch -> ((((1 << Character.NON_SPACING_MARK) |
(1 << Character.ENCLOSING_MARK) |
(1 << Character.COMBINING_SPACING_MARK) |
(1 << Character.DECIMAL_DIGIT_NUMBER) |
(1 << Character.CONNECTOR_PUNCTUATION))
>> Character.getType(ch)) & 1) != 0,
JOIN_CONTROL);
/////////////////////////////////////////////////////////////////////////////
private static final HashMap<String, CharPredicate> posix = new HashMap<>(12);
private static final HashMap<String, CharPredicate> uprops = new HashMap<>(18);
private static void defPosix(String name, CharPredicate p) {
posix.put(name, p);
}
private static void defUProp(String name, CharPredicate p) {
uprops.put(name, p);
}
static {
defPosix("ALPHA", ALPHABETIC);
defPosix("LOWER", LOWERCASE);
defPosix("UPPER", UPPERCASE);
defPosix("SPACE", WHITE_SPACE);
defPosix("PUNCT", PUNCTUATION);
defPosix("XDIGIT",HEX_DIGIT);
defPosix("ALNUM", ALNUM);
defPosix("CNTRL", CONTROL);
defPosix("DIGIT", DIGIT);
defPosix("BLANK", BLANK);
defPosix("GRAPH", GRAPH);
defPosix("PRINT", PRINT);
defUProp("ALPHABETIC", ALPHABETIC);
defUProp("ASSIGNED", ASSIGNED);
defUProp("CONTROL", CONTROL);
defUProp("HEXDIGIT", HEX_DIGIT);
defUProp("IDEOGRAPHIC", IDEOGRAPHIC);
defUProp("JOINCONTROL", JOIN_CONTROL);
defUProp("LETTER", LETTER);
defUProp("LOWERCASE", LOWERCASE);
defUProp("NONCHARACTERCODEPOINT", NONCHARACTER_CODE_POINT);
defUProp("TITLECASE", TITLECASE);
defUProp("PUNCTUATION", PUNCTUATION);
defUProp("UPPERCASE", UPPERCASE);
defUProp("WHITESPACE", WHITE_SPACE);
defUProp("WORD", WORD);
defUProp("WHITE_SPACE", WHITE_SPACE);
defUProp("HEX_DIGIT", HEX_DIGIT);
defUProp("NONCHARACTER_CODE_POINT", NONCHARACTER_CODE_POINT);
defUProp("JOIN_CONTROL", JOIN_CONTROL);
}
public static CharPredicate forUnicodeProperty(String propName) {
propName = propName.toUpperCase(Locale.ROOT);
CharPredicate p = uprops.get(propName);
if (p != null)
return p;
return posix.get(propName);
}
public static CharPredicate forPOSIXName(String propName) {
return posix.get(propName.toUpperCase(Locale.ENGLISH));
}
/////////////////////////////////////////////////////////////////////////////
/**
* Returns a predicate matching all characters belong to a named
* UnicodeScript.
*/
static CharPredicate forUnicodeScript(String name) {
final Character.UnicodeScript script;
try {
script = Character.UnicodeScript.forName(name);
return ch -> script == Character.UnicodeScript.of(ch);
} catch (IllegalArgumentException iae) {}
return null;
}
/**
* Returns a predicate matching all characters in a UnicodeBlock.
*/
static CharPredicate forUnicodeBlock(String name) {
final Character.UnicodeBlock block;
try {
block = Character.UnicodeBlock.forName(name);
return ch -> block == Character.UnicodeBlock.of(ch);
} catch (IllegalArgumentException iae) {}
return null;
}
/////////////////////////////////////////////////////////////////////////////
// unicode categories, aliases, properties, java methods ...
private static final HashMap<String, CharPredicate> props = new HashMap<>(128);
/**
* Returns a predicate matching all characters in a named property.
*/
static CharPredicate forProperty(String name) {
return props.get(name);
}
private static void defProp(String name, CharPredicate p) {
props.put(name, p);
}
private static void defCategory(String name, final int typeMask) {
CharPredicate p = ch -> (typeMask & (1 << Character.getType(ch))) != 0;
props.put(name, p);
}
private static void defRange(String name, final int lower, final int upper) {
BmpCharPredicate p = ch -> lower <= ch && ch <= upper;
props.put(name, p);
}
private static void defCtype(String name, final int ctype) {
BmpCharPredicate p = ch -> ch < 128 && ASCII.isType(ch, ctype);
// PrintPattern.pmap.put(p, name);
props.put(name, p);
}
static {
// Unicode character property aliases, defined in
// http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
defCategory("Cn", 1<<Character.UNASSIGNED);
defCategory("Lu", 1<<Character.UPPERCASE_LETTER);
defCategory("Ll", 1<<Character.LOWERCASE_LETTER);
defCategory("Lt", 1<<Character.TITLECASE_LETTER);
defCategory("Lm", 1<<Character.MODIFIER_LETTER);
defCategory("Lo", 1<<Character.OTHER_LETTER);
defCategory("Mn", 1<<Character.NON_SPACING_MARK);
defCategory("Me", 1<<Character.ENCLOSING_MARK);
defCategory("Mc", 1<<Character.COMBINING_SPACING_MARK);
defCategory("Nd", 1<<Character.DECIMAL_DIGIT_NUMBER);
defCategory("Nl", 1<<Character.LETTER_NUMBER);
defCategory("No", 1<<Character.OTHER_NUMBER);
defCategory("Zs", 1<<Character.SPACE_SEPARATOR);
defCategory("Zl", 1<<Character.LINE_SEPARATOR);
defCategory("Zp", 1<<Character.PARAGRAPH_SEPARATOR);
defCategory("Cc", 1<<Character.CONTROL);
defCategory("Cf", 1<<Character.FORMAT);
defCategory("Co", 1<<Character.PRIVATE_USE);
defCategory("Cs", 1<<Character.SURROGATE);
defCategory("Pd", 1<<Character.DASH_PUNCTUATION);
defCategory("Ps", 1<<Character.START_PUNCTUATION);
defCategory("Pe", 1<<Character.END_PUNCTUATION);
defCategory("Pc", 1<<Character.CONNECTOR_PUNCTUATION);
defCategory("Po", 1<<Character.OTHER_PUNCTUATION);
defCategory("Sm", 1<<Character.MATH_SYMBOL);
defCategory("Sc", 1<<Character.CURRENCY_SYMBOL);
defCategory("Sk", 1<<Character.MODIFIER_SYMBOL);
defCategory("So", 1<<Character.OTHER_SYMBOL);
defCategory("Pi", 1<<Character.INITIAL_QUOTE_PUNCTUATION);
defCategory("Pf", 1<<Character.FINAL_QUOTE_PUNCTUATION);
defCategory("L", ((1<<Character.UPPERCASE_LETTER) |
(1<<Character.LOWERCASE_LETTER) |
(1<<Character.TITLECASE_LETTER) |
(1<<Character.MODIFIER_LETTER) |
(1<<Character.OTHER_LETTER)));
defCategory("M", ((1<<Character.NON_SPACING_MARK) |
(1<<Character.ENCLOSING_MARK) |
(1<<Character.COMBINING_SPACING_MARK)));
defCategory("N", ((1<<Character.DECIMAL_DIGIT_NUMBER) |
(1<<Character.LETTER_NUMBER) |
(1<<Character.OTHER_NUMBER)));
defCategory("Z", ((1<<Character.SPACE_SEPARATOR) |
(1<<Character.LINE_SEPARATOR) |
(1<<Character.PARAGRAPH_SEPARATOR)));
defCategory("C", ((1<<Character.CONTROL) |
(1<<Character.FORMAT) |
(1<<Character.PRIVATE_USE) |
(1<<Character.SURROGATE))); // Other
defCategory("P", ((1<<Character.DASH_PUNCTUATION) |
(1<<Character.START_PUNCTUATION) |
(1<<Character.END_PUNCTUATION) |
(1<<Character.CONNECTOR_PUNCTUATION) |
(1<<Character.OTHER_PUNCTUATION) |
(1<<Character.INITIAL_QUOTE_PUNCTUATION) |
(1<<Character.FINAL_QUOTE_PUNCTUATION)));
defCategory("S", ((1<<Character.MATH_SYMBOL) |
(1<<Character.CURRENCY_SYMBOL) |
(1<<Character.MODIFIER_SYMBOL) |
(1<<Character.OTHER_SYMBOL)));
defCategory("LC", ((1<<Character.UPPERCASE_LETTER) |
(1<<Character.LOWERCASE_LETTER) |
(1<<Character.TITLECASE_LETTER)));
defCategory("LD", ((1<<Character.UPPERCASE_LETTER) |
(1<<Character.LOWERCASE_LETTER) |
(1<<Character.TITLECASE_LETTER) |
(1<<Character.MODIFIER_LETTER) |
(1<<Character.OTHER_LETTER) |
(1<<Character.DECIMAL_DIGIT_NUMBER)));
defRange("L1", 0x00, 0xFF); // Latin-1
props.put("all", ch -> true);
// Posix regular expression character classes, defined in
// http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
defRange("ASCII", 0x00, 0x7F); // ASCII
defCtype("Alnum", ASCII.ALNUM); // Alphanumeric characters
defCtype("Alpha", ASCII.ALPHA); // Alphabetic characters
defCtype("Blank", ASCII.BLANK); // Space and tab characters
defCtype("Cntrl", ASCII.CNTRL); // Control characters
defRange("Digit", '0', '9'); // Numeric characters
defCtype("Graph", ASCII.GRAPH); // printable and visible
defRange("Lower", 'a', 'z'); // Lower-case alphabetic
defRange("Print", 0x20, 0x7E); // Printable characters
defCtype("Punct", ASCII.PUNCT); // Punctuation characters
defCtype("Space", ASCII.SPACE); // Space characters
defRange("Upper", 'A', 'Z'); // Upper-case alphabetic
defCtype("XDigit",ASCII.XDIGIT); // hexadecimal digits
// Java character properties, defined by methods in Character.java
defProp("javaLowerCase", java.lang.Character::isLowerCase);
defProp("javaUpperCase", Character::isUpperCase);
defProp("javaAlphabetic", java.lang.Character::isAlphabetic);
defProp("javaIdeographic", java.lang.Character::isIdeographic);
defProp("javaTitleCase", java.lang.Character::isTitleCase);
defProp("javaDigit", java.lang.Character::isDigit);
defProp("javaDefined", java.lang.Character::isDefined);
defProp("javaLetter", java.lang.Character::isLetter);
defProp("javaLetterOrDigit", java.lang.Character::isLetterOrDigit);
defProp("javaJavaIdentifierStart", java.lang.Character::isJavaIdentifierStart);
defProp("javaJavaIdentifierPart", java.lang.Character::isJavaIdentifierPart);
defProp("javaUnicodeIdentifierStart", java.lang.Character::isUnicodeIdentifierStart);
defProp("javaUnicodeIdentifierPart", java.lang.Character::isUnicodeIdentifierPart);
defProp("javaIdentifierIgnorable", java.lang.Character::isIdentifierIgnorable);
defProp("javaSpaceChar", java.lang.Character::isSpaceChar);
defProp("javaWhitespace", java.lang.Character::isWhitespace);
defProp("javaISOControl", java.lang.Character::isISOControl);
defProp("javaMirrored", java.lang.Character::isMirrored);
}
/////////////////////////////////////////////////////////////////////////////
/**
* Posix ASCII variants, not in the lookup map
*/
static final BmpCharPredicate ASCII_DIGIT = ch -> ch < 128 && ASCII.isDigit(ch);
static final BmpCharPredicate ASCII_WORD = ch -> ch < 128 && ASCII.isWord(ch);
static final BmpCharPredicate ASCII_SPACE = ch -> ch < 128 && ASCII.isSpace(ch);
}
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
import java.util.Arrays;
/**
* A lightweight hashset implementation for positive 'int'. Not safe for
* concurrent access.
*/
class IntHashSet {
private int[] entries;
private int[] hashes;
private int pos = 0;
public IntHashSet() {
this.entries = new int[16 << 1]; // initCapacity = 16;
this.hashes = new int[(16 / 2) | 1]; // odd -> fewer collisions
Arrays.fill(this.entries, -1);
Arrays.fill(this.hashes, -1);
}
public boolean contains(int i) {
int h = hashes[i % hashes.length];
while (h != -1) {
if (entries[h] == i)
return true;
h = entries[h + 1];
}
return false;
}
public void add(int i) {
int h0 = i % hashes.length;
int next = hashes[h0];
// if invoker guarantees contains(i) checked before add(i)
// the following check is not needed.
int next0 = next;
while (next0 != -1) {
if (entries[next0 ] == i)
return;
next0 = entries[next0 + 1];
}
hashes[h0] = pos;
entries[pos++] = i;
entries[pos++] = next;
if (pos == entries.length)
expand();
}
public void clear() {
Arrays.fill(this.entries, -1);
Arrays.fill(this.hashes, -1);
pos = 0;
}
private void expand() {
int[] old = entries;
int[] es = new int[old.length << 1];
int hlen = (old.length / 2) | 1;
int[] hs = new int[hlen];
Arrays.fill(es, -1);
Arrays.fill(hs, -1);
for (int n = 0; n < pos;) { // re-hashing
int i = old[n];
int hsh = i % hlen;
int next = hs[hsh];
hs[hsh] = n;
es[n++] = i;
es[n++] = next;
}
this.entries = es;
this.hashes = hs;
}
}
......@@ -177,6 +177,14 @@ public final class Matcher implements MatchResult {
*/
int[] locals;
/**
* Storage used by top greedy Loop node to store a specific hash set to
* keep the beginning index of the failed repetition match. The nodes
* themselves are stateless, so they rely on this field to hold state
* during a match.
*/
IntHashSet[] localsPos;
/**
* Boolean indicating whether or not more input could change
* the results of the last match.
......@@ -239,6 +247,7 @@ public final class Matcher implements MatchResult {
int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
groups = new int[parentGroupCount * 2];
locals = new int[parent.localCount];
localsPos = new IntHashSet[parent.localTCNCount];
// Put fields into initial states
reset();
......@@ -375,6 +384,7 @@ public final class Matcher implements MatchResult {
groups[i] = -1;
for (int i = 0; i < locals.length; i++)
locals[i] = -1;
localsPos = new IntHashSet[parentPattern.localTCNCount];
modCount++;
return this;
}
......@@ -397,6 +407,10 @@ public final class Matcher implements MatchResult {
groups[i] = -1;
for(int i=0; i<locals.length; i++)
locals[i] = -1;
for (int i = 0; i < localsPos.length; i++) {
if (localsPos[i] != null)
localsPos[i].clear();
}
lastAppendPosition = 0;
from = 0;
to = getTextLength();
......@@ -1706,6 +1720,10 @@ public final class Matcher implements MatchResult {
this.oldLast = oldLast < 0 ? from : oldLast;
for (int i = 0; i < groups.length; i++)
groups[i] = -1;
for (int i = 0; i < localsPos.length; i++) {
if (localsPos[i] != null)
localsPos[i].clear();
}
acceptMode = NOANCHOR;
boolean result = parentPattern.root.match(this, from, text);
if (!result)
......@@ -1729,6 +1747,10 @@ public final class Matcher implements MatchResult {
this.oldLast = oldLast < 0 ? from : oldLast;
for (int i = 0; i < groups.length; i++)
groups[i] = -1;
for (int i = 0; i < localsPos.length; i++) {
if (localsPos[i] != null)
localsPos[i].clear();
}
acceptMode = anchor;
boolean result = parentPattern.matchRoot.match(this, from, text);
if (!result)
......
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
import java.util.HashMap;
import java.util.regex.Pattern.CharPredicate;
import java.util.regex.CharPredicates;
import static java.util.regex.ASCII.*;
/**
* A utility class to print out the pattern node tree.
*/
class PrintPattern {
private static HashMap<Pattern.Node, Integer> ids = new HashMap<>();
private static void print(Pattern.Node node, String text, int depth) {
if (!ids.containsKey(node))
ids.put(node, ids.size());
print("%6d:%" + (depth==0? "": depth<<1) + "s<%s>", ids.get(node), "", text);
if (ids.containsKey(node.next))
print(" (=>%d)", ids.get(node.next));
print("%n");
}
private static void print(String s, int depth) {
print(" %" + (depth==0?"":depth<<1) + "s<%s>%n", "", s);
}
private static void print(String fmt, Object ... args) {
System.err.printf(fmt, args);
}
private static String toStringCPS(int[] cps) {
StringBuilder sb = new StringBuilder(cps.length);
for (int cp : cps)
sb.append(toStringCP(cp));
return sb.toString();
}
private static String toStringCP(int cp) {
return (isPrint(cp) ? "" + (char)cp
: "\\u" + Integer.toString(cp, 16));
}
private static String toStringRange(int min, int max) {
if (max == Pattern.MAX_REPS) {
if (min == 0)
return " * ";
else if (min == 1)
return " + ";
return "{" + min + ", max}";
}
return "{" + min + ", " + max + "}";
}
private static String toStringCtype(int type) {
switch(type) {
case UPPER: return "ASCII.UPPER";
case LOWER: return "ASCII.LOWER";
case DIGIT: return "ASCII.DIGIT";
case SPACE: return "ASCII.SPACE";
case PUNCT: return "ASCII.PUNCT";
case CNTRL: return "ASCII.CNTRL";
case BLANK: return "ASCII.BLANK";
case UNDER: return "ASCII.UNDER";
case ASCII: return "ASCII.ASCII";
case ALPHA: return "ASCII.ALPHA";
case ALNUM: return "ASCII.ALNUM";
case GRAPH: return "ASCII.GRAPH";
case WORD: return "ASCII.WORD";
case XDIGIT: return "ASCII.XDIGIT";
default: return "ASCII ?";
}
}
private static String toString(Pattern.Node node) {
String name = node.getClass().getName();
return name.substring(name.lastIndexOf('$') + 1);
}
static HashMap<CharPredicate, String> pmap;
static {
pmap = new HashMap<>();
pmap.put(Pattern.ALL, "All");
pmap.put(Pattern.DOT, "Dot");
pmap.put(Pattern.UNIXDOT, "UnixDot");
pmap.put(Pattern.VertWS, "VertWS");
pmap.put(Pattern.HorizWS, "HorizWS");
pmap.put(CharPredicates.ASCII_DIGIT, "ASCII.DIGIT");
pmap.put(CharPredicates.ASCII_WORD, "ASCII.WORD");
pmap.put(CharPredicates.ASCII_SPACE, "ASCII.SPACE");
}
static void walk(Pattern.Node node, int depth) {
depth++;
while(node != null) {
String name = toString(node);
String str;
if (node instanceof Pattern.Prolog) {
print(node, name, depth);
// print the loop here
Pattern.Loop loop = ((Pattern.Prolog)node).loop;
name = toString(loop);
str = name + " " + toStringRange(loop.cmin, loop.cmax);
print(loop, str, depth);
walk(loop.body, depth);
print("/" + name, depth);
node = loop;
} else if (node instanceof Pattern.Loop) {
return; // stop here, body.next -> loop
} else if (node instanceof Pattern.Curly) {
Pattern.Curly c = (Pattern.Curly)node;
str = "Curly " + c.type + " " + toStringRange(c.cmin, c.cmax);
print(node, str, depth);
walk(c.atom, depth);
print("/Curly", depth);
} else if (node instanceof Pattern.GroupCurly) {
Pattern.GroupCurly gc = (Pattern.GroupCurly)node;
str = "GroupCurly " + gc.groupIndex / 2 +
", " + gc.type + " " + toStringRange(gc.cmin, gc.cmax);
print(node, str, depth);
walk(gc.atom, depth);
print("/GroupCurly", depth);
} else if (node instanceof Pattern.GroupHead) {
Pattern.GroupHead head = (Pattern.GroupHead)node;
Pattern.GroupTail tail = head.tail;
print(head, "Group.head " + (tail.groupIndex / 2), depth);
walk(head.next, depth);
print(tail, "/Group.tail " + (tail.groupIndex / 2), depth);
node = tail;
} else if (node instanceof Pattern.GroupTail) {
return; // stopper
} else if (node instanceof Pattern.Ques) {
print(node, "Ques " + ((Pattern.Ques)node).type, depth);
walk(((Pattern.Ques)node).atom, depth);
print("/Ques", depth);
} else if (node instanceof Pattern.Branch) {
Pattern.Branch b = (Pattern.Branch)node;
print(b, name, depth);
int i = 0;
while (true) {
if (b.atoms[i] != null) {
walk(b.atoms[i], depth);
} else {
print(" (accepted)", depth);
}
if (++i == b.size)
break;
print("-branch.separator-", depth);
}
node = b.conn;
print(node, "/Branch", depth);
} else if (node instanceof Pattern.BranchConn) {
return;
} else if (node instanceof Pattern.CharProperty) {
str = pmap.get(((Pattern.CharProperty)node).predicate);
if (str == null)
str = toString(node);
else
str = "Single \"" + str + "\"";
print(node, str, depth);
} else if (node instanceof Pattern.SliceNode) {
str = name + " \"" +
toStringCPS(((Pattern.SliceNode)node).buffer) + "\"";
print(node, str, depth);
} else if (node instanceof Pattern.CharPropertyGreedy) {
Pattern.CharPropertyGreedy gcp = (Pattern.CharPropertyGreedy)node;
String pstr = pmap.get(gcp.predicate);
if (pstr == null)
pstr = gcp.predicate.toString();
else
pstr = "Single \"" + pstr + "\"";
str = name + " " + pstr + ((gcp.cmin == 0) ? "*" : "+");
print(node, str, depth);
} else if (node instanceof Pattern.BackRef) {
str = "GroupBackRef " + ((Pattern.BackRef)node).groupIndex / 2;
print(node, str, depth);
} else if (node instanceof Pattern.LastNode) {
print(node, "END", depth);
} else if (node == Pattern.accept) {
return;
} else {
print(node, name, depth);
}
node = node.next;
}
}
public static void main(String[] args) {
Pattern p = Pattern.compile(args[0]);
System.out.println(" Pattern: " + p);
walk(p.root, 0);
}
}
/*
* Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
import java.util.HashMap;
import java.util.Locale;
enum UnicodeProp {
ALPHABETIC {
public boolean is(int ch) {
return Character.isAlphabetic(ch);
}
},
LETTER {
public boolean is(int ch) {
return Character.isLetter(ch);
}
},
IDEOGRAPHIC {
public boolean is(int ch) {
return Character.isIdeographic(ch);
}
},
LOWERCASE {
public boolean is(int ch) {
return Character.isLowerCase(ch);
}
},
UPPERCASE {
public boolean is(int ch) {
return Character.isUpperCase(ch);
}
},
TITLECASE {
public boolean is(int ch) {
return Character.isTitleCase(ch);
}
},
WHITE_SPACE {
// \p{Whitespace}
public boolean is(int ch) {
return ((((1 << Character.SPACE_SEPARATOR) |
(1 << Character.LINE_SEPARATOR) |
(1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
!= 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
}
},
CONTROL {
// \p{gc=Control}
public boolean is(int ch) {
return Character.getType(ch) == Character.CONTROL;
}
},
PUNCTUATION {
// \p{gc=Punctuation}
public boolean is(int ch) {
return ((((1 << Character.CONNECTOR_PUNCTUATION) |
(1 << Character.DASH_PUNCTUATION) |
(1 << Character.START_PUNCTUATION) |
(1 << Character.END_PUNCTUATION) |
(1 << Character.OTHER_PUNCTUATION) |
(1 << Character.INITIAL_QUOTE_PUNCTUATION) |
(1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
!= 0;
}
},
HEX_DIGIT {
// \p{gc=Decimal_Number}
// \p{Hex_Digit} -> PropList.txt: Hex_Digit
public boolean is(int ch) {
return DIGIT.is(ch) ||
(ch >= 0x0030 && ch <= 0x0039) ||
(ch >= 0x0041 && ch <= 0x0046) ||
(ch >= 0x0061 && ch <= 0x0066) ||
(ch >= 0xFF10 && ch <= 0xFF19) ||
(ch >= 0xFF21 && ch <= 0xFF26) ||
(ch >= 0xFF41 && ch <= 0xFF46);
}
},
ASSIGNED {
public boolean is(int ch) {
return Character.getType(ch) != Character.UNASSIGNED;
}
},
NONCHARACTER_CODE_POINT {
// PropList.txt:Noncharacter_Code_Point
public boolean is(int ch) {
return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
}
},
DIGIT {
// \p{gc=Decimal_Number}
public boolean is(int ch) {
return Character.isDigit(ch);
}
},
ALNUM {
// \p{alpha}
// \p{digit}
public boolean is(int ch) {
return ALPHABETIC.is(ch) || DIGIT.is(ch);
}
},
BLANK {
// \p{Whitespace} --
// [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85
// \p{gc=Line_Separator}
// \p{gc=Paragraph_Separator}]
public boolean is(int ch) {
return Character.getType(ch) == Character.SPACE_SEPARATOR ||
ch == 0x9; // \N{HT}
}
},
GRAPH {
// [^
// \p{space}
// \p{gc=Control}
// \p{gc=Surrogate}
// \p{gc=Unassigned}]
public boolean is(int ch) {
return ((((1 << Character.SPACE_SEPARATOR) |
(1 << Character.LINE_SEPARATOR) |
(1 << Character.PARAGRAPH_SEPARATOR) |
(1 << Character.CONTROL) |
(1 << Character.SURROGATE) |
(1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
== 0;
}
},
PRINT {
// \p{graph}
// \p{blank}
// -- \p{cntrl}
public boolean is(int ch) {
return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
}
},
WORD {
// \p{alpha}
// \p{gc=Mark}
// \p{digit}
// \p{gc=Connector_Punctuation}
// \p{Join_Control} 200C..200D
public boolean is(int ch) {
return ALPHABETIC.is(ch) ||
((((1 << Character.NON_SPACING_MARK) |
(1 << Character.ENCLOSING_MARK) |
(1 << Character.COMBINING_SPACING_MARK) |
(1 << Character.DECIMAL_DIGIT_NUMBER) |
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
!= 0 ||
JOIN_CONTROL.is(ch);
}
},
JOIN_CONTROL {
// 200C..200D PropList.txt:Join_Control
public boolean is(int ch) {
return (ch == 0x200C || ch == 0x200D);
}
};
private static final HashMap<String, String> posix = new HashMap<>();
private static final HashMap<String, String> aliases = new HashMap<>();
static {
posix.put("ALPHA", "ALPHABETIC");
posix.put("LOWER", "LOWERCASE");
posix.put("UPPER", "UPPERCASE");
posix.put("SPACE", "WHITE_SPACE");
posix.put("PUNCT", "PUNCTUATION");
posix.put("XDIGIT","HEX_DIGIT");
posix.put("ALNUM", "ALNUM");
posix.put("CNTRL", "CONTROL");
posix.put("DIGIT", "DIGIT");
posix.put("BLANK", "BLANK");
posix.put("GRAPH", "GRAPH");
posix.put("PRINT", "PRINT");
aliases.put("WHITESPACE", "WHITE_SPACE");
aliases.put("HEXDIGIT","HEX_DIGIT");
aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
aliases.put("JOINCONTROL", "JOIN_CONTROL");
}
public static UnicodeProp forName(String propName) {
propName = propName.toUpperCase(Locale.ENGLISH);
String alias = aliases.get(propName);
if (alias != null)
propName = alias;
try {
return valueOf (propName);
} catch (IllegalArgumentException x) {}
return null;
}
public static UnicodeProp forPOSIXName(String propName) {
propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
if (propName == null)
return null;
return valueOf (propName);
}
public abstract boolean is(int ch);
}
......@@ -33,6 +33,9 @@
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
* 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
* 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384
* 6328855 6192895 6345469 6988218 6693451 7006761 8140212
*
* @library /lib/testlibrary
* @build jdk.testlibrary.*
* @run main RegExTest
......@@ -162,6 +165,7 @@ public class RegExTest {
patternAsPredicate();
invalidFlags();
grapheme();
expoBacktracking();
if (failure) {
throw new
......@@ -2659,51 +2663,101 @@ public class RegExTest {
check(p, "test\u00e4\u0323\u0300", true);
check(p, "test\u00e4\u0300\u0323", true);
/*
* The following canonical equivalence tests don't work. Bug id: 4916384.
*
// Decomposed hangul (jamos)
p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
m = p.matcher("\u1100\u1161");
if (!m.matches())
failCount++;
Object[][] data = new Object[][] {
m.reset("\uac00");
if (!m.matches())
failCount++;
// JDK-4867170
{ "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true },
{ "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true },
{ "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true },
{ "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
{ "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
{ "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true },
{ "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true },
// Composed hangul
p = Pattern.compile("\uac00", Pattern.CANON_EQ);
m = p.matcher("\u1100\u1161");
if (!m.matches())
failCount++;
{ "\\p{IsGreek}", "ab\u1f80cd", "f", true },
{ "\\p{IsGreek}", "ab\u1f81cd", "f", true },
{ "\\p{IsGreek}", "ab\u1f82cd", "f", true },
{ "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true },
{ "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true },
m.reset("\uac00");
if (!m.matches())
failCount++;
// backtracking, force to match "\u1f80", instead of \u1f82"
{ "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
// Decomposed supplementary outside char classes
p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
m = p.matcher("test\ud834\uddc0");
if (!m.matches())
failCount++;
{ "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true },
{ "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true },
m.reset("test\ud834\uddbc\ud834\udd6f");
if (!m.matches())
failCount++;
{ "[^\u1f80-\u1f82]","\u1f81", "m", false },
{ "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false },
{ "[^\u1f01\u0345]", "\u1f81", "f", false },
// Composed supplementary outside char classes
p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
m.reset("test\ud834\uddbc\ud834\udd6f");
if (!m.matches())
failCount++;
{ "[^\u1f81]+", "\u1f80\u1f82", "f", true },
{ "[\u1f80]", "ab\u1f80cd", "f", true },
{ "\u1f80", "ab\u1f80cd", "f", true },
{ "\u1f00\u0345\u0300", "\u1f82", "m", true },
{ "\u1f80", "-\u1f00\u0345\u0300-", "f", true },
{ "\u1f82", "\u1f00\u0345\u0300", "m", true },
{ "\u1f82", "\u1f80\u0300", "m", true },
m = p.matcher("test\ud834\uddc0");
if (!m.matches())
failCount++;
// JDK-7080302 # compile failed
{ "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
// JDK-6728861, same cause as above one
{ "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
// JDK-6995635
{ "(\u00e9)", "e\u0301", "m", true },
// JDK-6736245
// intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
{ "\u2ADC", "\u2ADC", "m", true}, // NFC
{ "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD
// 4916384.
// Decomposed hangul (jamos) works inside clazz
{ "[\u1100\u1161]", "\u1100\u1161", "m", true},
{ "[\u1100\u1161]", "\uac00", "m", true},
{ "[\uac00]", "\u1100\u1161", "m", true},
{ "[\uac00]", "\uac00", "m", true},
// Decomposed hangul (jamos)
{ "\u1100\u1161", "\u1100\u1161", "m", true},
{ "\u1100\u1161", "\uac00", "m", true},
// Composed hangul
{ "\uac00", "\u1100\u1161", "m", true },
{ "\uac00", "\uac00", "m", true },
/* Need a NFDSlice to nfd the source to solve this issue
u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165>
<u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
// Decomposed supplementary outside char classes
// { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
// Composed supplementary outside char classes
// { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
*/
{ "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
{ "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
{ "test\ud834\uddc0", "test\ud834\uddc0", "m", true },
{ "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
};
int failCount = 0;
for (Object[] d : data) {
String pn = (String)d[0];
String tt = (String)d[1];
boolean isFind = "f".equals(((String)d[2]));
boolean expected = (boolean)d[3];
boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
: Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
if (ret != expected) {
failCount++;
continue;
}
}
report("Canonical Equivalence");
}
......@@ -3846,7 +3900,6 @@ public class RegExTest {
if (!patternString.startsWith("'")) {
return Pattern.compile(patternString);
}
int break1 = patternString.lastIndexOf("'");
String flagString = patternString.substring(
break1+1, patternString.length());
......@@ -4092,10 +4145,11 @@ public class RegExTest {
report("NamedGroupCapture");
}
// This is for bug 6969132
// This is for bug 6919132
private static void nonBmpClassComplementTest() throws Exception {
Pattern p = Pattern.compile("\\P{Lu}");
Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
if (m.find() && m.start() == 1)
failCount++;
......@@ -4113,6 +4167,11 @@ public class RegExTest {
if (m.find() && m.start() == 1)
failCount++;
p = Pattern.compile("\\P{sc=GRANTHA}");
m = p.matcher(new String(new int[] {0x11350}, 0, 1));
if (m.find() && m.start() == 1)
failCount++;
report("NonBmpClassComplement");
}
......@@ -4662,4 +4721,92 @@ public class RegExTest {
failCount++;
report("Unicode extended grapheme cluster");
}
// hangup/timeout if go into exponential backtracking
private static void expoBacktracking() throws Exception {
Object[][] patternMatchers = {
// 6328855
{ "(.*\n*)*",
"this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
false },
// 6192895
{ " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
"Hello World this is a test this is a test this is a test A",
true },
{ " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
"Hello World this is a test this is a test this is a test \u4e00 ",
false },
{ " *([a-z0-9]+ *)+",
"hello world this is a test this is a test this is a test A",
false },
// 4771934 [FIXED] #5013651?
{ "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
"abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
true },
// 4866249 [FIXED]
{ "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
"<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
true },
{ "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
"abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
false },
// 6345469
{ "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
"&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; </p>",
true }, // --> matched
{ "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
"&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; p </p>",
false },
// 5026912
{ "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
"156580451111112225588087755221111111566969655555555",
false},
// 6988218
{ "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
"'%)) order by ANGEBOT.ID",
false}, // find
// 6693451
{ "^(\\s*foo\\s*)*$",
"foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
true },
{ "^(\\s*foo\\s*)*$",
"foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
false
},
// 7006761
{ "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
{ "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
// 8140212
{ "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
"{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
false
},
{ "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
{ "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
{ "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
{ "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
{ "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
{ "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
{ "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
/* not fixed
//8132141 ---> second level exponential backtracking
{ "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
"hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
*/
};
for (Object[] pm : patternMatchers) {
String p = (String)pm[0];
String s = (String)pm[1];
boolean r = (Boolean)pm[2];
if (r != Pattern.compile(p).matcher(s).matches()) {
failCount++;
}
}
}
}
......@@ -139,6 +139,71 @@ false 0
aaabbbcccdefg
true defg 0
// Negation with nested char class and intersection
[^[c]]
c
false 0
[^[a-z]]
e
false 0
[^[a-z][A-Z]]
E
false 0
[^a-d[0-9][m-p]]
e
true e 0
[^a-d[0-9][m-p]]
8
false 0
[^[a-c]&&[d-f]]
z
true z 0
[^a-c&&d-f]
a
true a 0
[^a-m&&m-z]
m
false 0
[^a-m&&m-z&&a-c]
m
true m 0
[^a-cd-f&&[d-f]]
c
true c 0
[^[a-c][d-f]&&abc]
a
false 0
[^[a-c][d-f]&&abc]
d
true d 0
[^[a-c][d-f]&&abc[def]]
a
false 0
[^[a-c][d-f]&&abc[def]]
e
false 0
[^[a-c]&&[b-d]&&[c-e]]
a
true a 0
[^[a-c]&&[b-d]&&[c-e]]
c
false 0
// Making sure a ^ not in first position matches literal ^
[abc^b]
b
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册