提交 a0e177a1 编写于 作者: S sherman

6350801: Add support for named (instead of numbered) capture groups in regular expression

6676425: Opensource unit/regression tests for java.util.regex
Summary: Added "named capturing group" into regex. Moved most of reg/unit tests to openjdk.
Reviewed-by: alanb, okutsu
上级 ca0148c2
......@@ -490,6 +490,45 @@ public final class Matcher implements MatchResult {
return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
}
/**
* Returns the input subsequence captured by the given
* <a href="Pattern.html#groupname">named-capturing group</a> during the previous
* match operation.
*
* <p> If the match was successful but the group specified failed to match
* any part of the input sequence, then <tt>null</tt> is returned. Note
* that some groups, for example <tt>(a*)</tt>, match the empty string.
* This method will return the empty string when such a group successfully
* matches the empty string in the input. </p>
*
* @param name
* The name of a named-capturing group in this matcher's pattern
*
* @return The (possibly empty) subsequence captured by the named group
* during the previous match, or <tt>null</tt> if the group
* failed to match part of the input
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*
* @throws IllegalArgumentException
* If there is no capturing group in the pattern
* with the given name
*/
public String group(String name) {
if (name == null)
throw new NullPointerException("Null group name");
if (first < 0)
throw new IllegalStateException("No match found");
if (!parentPattern.namedGroups().containsKey(name))
throw new IllegalArgumentException("No group with name <" + name + ">");
int group = parentPattern.namedGroups().get(name);
if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
return null;
return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
}
/**
* Returns the number of capturing groups in this matcher's pattern.
*
......@@ -649,9 +688,11 @@ public final class Matcher implements MatchResult {
*
* <p> The replacement string may contain references to subsequences
* captured during the previous match: Each occurrence of
* <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of
* evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>.
* The first number after the <tt>$</tt> is always treated as part of
* <tt>$</tt>&lt;<i>name</i>&gt; or <tt>$</tt><i>g</i>
* will be replaced by the result of evaluating the corresponding
* {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
* respectively. For <tt>$</tt><i>g</i><tt></tt>,
* the first number after the <tt>$</tt> is always treated as part of
* the group reference. Subsequent numbers are incorporated into g if
* they would form a legal group reference. Only the numerals '0'
* through '9' are considered as potential components of the group
......@@ -695,6 +736,10 @@ public final class Matcher implements MatchResult {
* If no match has yet been attempted,
* or if the previous match operation failed
*
* @throws IllegalArgumentException
* If the replacement string refers to a named-capturing
* group that does not exist in the pattern
*
* @throws IndexOutOfBoundsException
* If the replacement string refers to a capturing group
* that does not exist in the pattern
......@@ -719,29 +764,62 @@ public final class Matcher implements MatchResult {
} else if (nextChar == '$') {
// Skip past $
cursor++;
// The first number is always a group
int refNum = (int)replacement.charAt(cursor) - '0';
if ((refNum < 0)||(refNum > 9))
throw new IllegalArgumentException(
"Illegal group reference");
cursor++;
// Capture the largest legal group string
boolean done = false;
while (!done) {
if (cursor >= replacement.length()) {
break;
}
int nextDigit = replacement.charAt(cursor) - '0';
if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
break;
// A StringIndexOutOfBoundsException is thrown if
// this "$" is the last character in replacement
// string in current implementation, a IAE might be
// more appropriate.
nextChar = replacement.charAt(cursor);
int refNum = -1;
if (nextChar == '<') {
cursor++;
StringBuilder gsb = new StringBuilder();
while (cursor < replacement.length()) {
nextChar = replacement.charAt(cursor);
if (ASCII.isLower(nextChar) ||
ASCII.isUpper(nextChar) ||
ASCII.isDigit(nextChar)) {
gsb.append(nextChar);
cursor++;
} else {
break;
}
}
int newRefNum = (refNum * 10) + nextDigit;
if (groupCount() < newRefNum) {
done = true;
} else {
refNum = newRefNum;
cursor++;
if (gsb.length() == 0)
throw new IllegalArgumentException(
"named capturing group has 0 length name");
if (nextChar != '>')
throw new IllegalArgumentException(
"named capturing group is missing trailing '>'");
String gname = gsb.toString();
if (!parentPattern.namedGroups().containsKey(gname))
throw new IllegalArgumentException(
"No group with name <" + gname + ">");
refNum = parentPattern.namedGroups().get(gname);
cursor++;
} else {
// The first number is always a group
refNum = (int)nextChar - '0';
if ((refNum < 0)||(refNum > 9))
throw new IllegalArgumentException(
"Illegal group reference");
cursor++;
// Capture the largest legal group string
boolean done = false;
while (!done) {
if (cursor >= replacement.length()) {
break;
}
int nextDigit = replacement.charAt(cursor) - '0';
if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
break;
}
int newRefNum = (refNum * 10) + nextDigit;
if (groupCount() < newRefNum) {
done = true;
} else {
refNum = newRefNum;
cursor++;
}
}
}
// Append group
......
......@@ -29,6 +29,7 @@ import java.security.AccessController;
import java.security.PrivilegedAction;
import java.text.CharacterIterator;
import java.text.Normalizer;
import java.util.Map;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Arrays;
......@@ -298,6 +299,10 @@ import java.util.Arrays;
* <td valign="bottom" headers="matches">Whatever the <i>n</i><sup>th</sup>
* <a href="#cg">capturing group</a> matched</td></tr>
*
* <tr><td valign="bottom" headers="construct backref"><tt>\</tt><i>k</i>&lt;<i>name</i>&gt;</td>
* <td valign="bottom" headers="matches">Whatever the
* <a href="#groupname">named-capturing group</a> "name" matched</td></tr>
*
* <tr><th>&nbsp;</th></tr>
* <tr align="left"><th colspan="2" id="quot">Quotation</th></tr>
*
......@@ -310,8 +315,10 @@ import java.util.Arrays;
* <!-- Metachars: !$()*+.<>?[\]^{|} -->
*
* <tr><th>&nbsp;</th></tr>
* <tr align="left"><th colspan="2" id="special">Special constructs (non-capturing)</th></tr>
* <tr align="left"><th colspan="2" id="special">Special constructs (named-capturing and non-capturing)</th></tr>
*
* <tr><td valign="top" headers="construct special"><tt>(?&lt;<a href="#groupname">name</a>&gt;</tt><i>X</i><tt>)</tt></td>
* <td headers="matches"><i>X</i>, as a named-capturing group</td></tr>
* <tr><td valign="top" headers="construct special"><tt>(?:</tt><i>X</i><tt>)</tt></td>
* <td headers="matches"><i>X</i>, as a non-capturing group</td></tr>
* <tr><td valign="top" headers="construct special"><tt>(?idmsux-idmsux)&nbsp;</tt></td>
......@@ -449,6 +456,8 @@ import java.util.Arrays;
* <a name="cg">
* <h4> Groups and capturing </h4>
*
* <a name="gnumber">
* <h5> Group number </h5>
* <p> Capturing groups are numbered by counting their opening parentheses from
* left to right. In the expression <tt>((A)(B(C)))</tt>, for example, there
* are four such groups: </p>
......@@ -471,6 +480,24 @@ import java.util.Arrays;
* subsequence may be used later in the expression, via a back reference, and
* may also be retrieved from the matcher once the match operation is complete.
*
* <a name="groupname">
* <h5> Group name </h5>
* <p>A capturing group can also be assigned a "name", a <tt>named-capturing group</tt>,
* and then be back-referenced later by the "name". Group names are composed of
* the following characters:
*
* <ul>
* <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt>
* (<tt>'&#92;u0041'</tt>&nbsp;through&nbsp;<tt>'&#92;u005a'</tt>),
* <li> The lowercase letters <tt>'a'</tt> through <tt>'z'</tt>
* (<tt>'&#92;u0061'</tt>&nbsp;through&nbsp;<tt>'&#92;u007a'</tt>),
* <li> The digits <tt>'0'</tt> through <tt>'9'</tt>
* (<tt>'&#92;u0030'</tt>&nbsp;through&nbsp;<tt>'&#92;u0039'</tt>),
* </ul>
*
* <p> A <tt>named-capturing group</tt> is still numbered as described in
* <a href="#gnumber">Group number</a>.
*
* <p> The captured input associated with a group is always the subsequence
* that the group most recently matched. If a group is evaluated a second time
* because of quantification then its previously-captured value, if any, will
......@@ -479,9 +506,9 @@ import java.util.Arrays;
* group two set to <tt>"b"</tt>. All captured input is discarded at the
* beginning of each match.
*
* <p> Groups beginning with <tt>(?</tt> are pure, <i>non-capturing</i> groups
* that do not capture text and do not count towards the group total.
*
* <p> Groups beginning with <tt>(?</tt> are either pure, <i>non-capturing</i> groups
* that do not capture text and do not count towards the group total, or
* <i>named-capturing</i> group.
*
* <h4> Unicode support </h4>
*
......@@ -794,6 +821,12 @@ public final class Pattern
*/
transient int[] buffer;
/**
* Map the "name" of the "named capturing group" to its group id
* node.
*/
transient volatile Map<String, Integer> namedGroups;
/**
* Temporary storage used while parsing group references.
*/
......@@ -1467,6 +1500,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
// Allocate all temporary objects here.
buffer = new int[32];
groupNodes = new GroupHead[10];
namedGroups = null;
if (has(LITERAL)) {
// Literal pattern handling
......@@ -1505,6 +1539,12 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
compiled = true;
}
Map<String, Integer> namedGroups() {
if (namedGroups == null)
namedGroups = new HashMap<String, Integer>(2);
return namedGroups;
}
/**
* Used to print out a subtree of the Pattern to help with debugging.
*/
......@@ -2156,7 +2196,22 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
case 'h':
case 'i':
case 'j':
break;
case 'k':
if (inclass)
break;
if (read() != '<')
throw error("\\k is not followed by '<' for named capturing group");
String name = groupname(read());
if (!namedGroups().containsKey(name))
throw error("(named capturing group <"+ name+"> does not exit");
if (create) {
if (has(CASE_INSENSITIVE))
root = new CIBackRef(namedGroups().get(name), has(UNICODE_CASE));
else
root = new BackRef(namedGroups().get(name));
}
return -1;
case 'l':
case 'm':
break;
......@@ -2455,6 +2510,24 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
return p;
}
/**
* Parses and returns the name of a "named capturing group", the trailing
* ">" is consumed after parsing.
*/
private String groupname(int ch) {
StringBuilder sb = new StringBuilder();
sb.append(Character.toChars(ch));
while (ASCII.isLower(ch=read()) || ASCII.isUpper(ch) ||
ASCII.isDigit(ch)) {
sb.append(Character.toChars(ch));
}
if (sb.length() == 0)
throw error("named capturing group has 0 length name");
if (ch != '>')
throw error("named capturing group is missing trailing '>'");
return sb.toString();
}
/**
* Parses a group and returns the head node of a set of nodes that process
* the group. Sometimes a double return system is used where the tail is
......@@ -2494,6 +2567,18 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
break;
case '<': // (?<xxx) look behind
ch = read();
if (Character.isLetter(ch)) { // named captured group
String name = groupname(ch);
if (namedGroups().containsKey(name))
throw error("Named capturing group <" + name
+ "> is already defined");
capturingGroup = true;
head = createGroup(false);
tail = root;
namedGroups().put(name, capturingGroupCount-1);
head.next = expr(tail);
break;
}
int start = cursor;
head = createGroup(true);
tail = root;
......
//
// Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
// CA 95054 USA or visit www.sun.com if you need additional information or
// have any questions.
//
//
// This file contains test cases with BMP characters for regular expressions.
// A test case consists of three lines:
// The first line is a pattern used in the test
// The second line is the input to search for the pattern in
// The third line is a concatentation of the match, the number of groups,
// and the contents of the first four subexpressions.
// Empty lines and lines beginning with comment slashes are ignored.
// Test unsetting of backed off groups
^(\u3042)?\u3042
\u3042
true \u3042 1
^(\u3042\u3042(\u3043\u3043)?)+$
\u3042\u3042\u3043\u3043\u3042\u3042
true \u3042\u3042\u3043\u3043\u3042\u3042 2 \u3042\u3042 \u3043\u3043
((\u3042|\u3043)?\u3043)+
\u3043
true \u3043 2 \u3043
(\u3042\u3042\u3042)?\u3042\u3042\u3042
\u3042\u3042\u3042
true \u3042\u3042\u3042 1
^(\u3042(\u3043)?)+$
\u3042\u3043\u3042
true \u3042\u3043\u3042 2 \u3042 \u3043
^(\u3042(\u3043(\u3044)?)?)?\u3042\u3043\u3044
\u3042\u3043\u3044
true \u3042\u3043\u3044 3
^(\u3042(\u3043(\u3044))).*
\u3042\u3043\u3044
true \u3042\u3043\u3044 3 \u3042\u3043\u3044 \u3043\u3044 \u3044
// use of x modifier
\u3042\u3043\u3044(?x)\u3043la\u3049
\u3042\u3043\u3044\u3043la\u3049
true \u3042\u3043\u3044\u3043la\u3049 0
\u3042\u3043\u3044(?x) bla\u3049
\u3042\u3043\u3044bla\u3049
true \u3042\u3043\u3044bla\u3049 0
\u3042\u3043\u3044(?x) bla\u3049 ble\u3044\u3049
\u3042\u3043\u3044bla\u3049ble\u3044\u3049
true \u3042\u3043\u3044bla\u3049ble\u3044\u3049 0
\u3042\u3043\u3044(?x) bla\u3049 # ignore comment
\u3042\u3043\u3044bla\u3049
true \u3042\u3043\u3044bla\u3049 0
// Simple alternation
\u3042|\u3043
\u3042
true \u3042 0
\u3042|\u3043
\u305B
false 0
\u3042|\u3043
\u3043
true \u3043 0
\u3042|\u3043|\u3044\u3045
\u3044\u3045
true \u3044\u3045 0
\u3042|\u3042\u3045
\u3042\u3045
true \u3042 0
\u305B(\u3042|\u3042\u3044)\u3043
\u305B\u3042\u3044\u3043
true \u305B\u3042\u3044\u3043 1 \u3042\u3044
// Simple char class
[\u3042\u3043\u3044]+
\u3042\u3043\u3042\u3043\u3042\u3043
true \u3042\u3043\u3042\u3043\u3042\u3043 0
[\u3042\u3043\u3044]+
\u3045\u3046\u3047\u3048
false 0
[\u3042\u3043\u3044]+[\u3045\u3046\u3047]+[\u3048\u3049\u304A]+
\u305B\u305B\u305B\u3042\u3042\u3045\u3045\u3048\u3048\u305B\u305B\u305B
true \u3042\u3042\u3045\u3045\u3048\u3048 0
// Range char class
[\u3042-\u3048]+
\u305B\u305B\u305B\u3048\u3048\u3048
true \u3048\u3048\u3048 0
[\u3042-\u3048]+
mmm
false 0
[\u3042-]+
\u305B\u3042-9\u305B
true \u3042- 0
[\u3042-\\u4444]+
\u305B\u3042-9\u305B
true \u305B\u3042 0
// Negated char class
[^\u3042\u3043\u3044]+
\u3042\u3043\u3042\u3043\u3042\u3043
false 0
[^\u3042\u3043\u3044]+
\u3042\u3042\u3042\u3043\u3043\u3043\u3044\u3044\u3044\u3045\u3046\u3047\u3048
true \u3045\u3046\u3047\u3048 0
// Making sure a ^ not in first position matches literal ^
[\u3042\u3043\u3044^\u3043]
\u3043
true \u3043 0
[\u3042\u3043\u3044^\u3043]
^
true ^ 0
// Class union and intersection
[\u3042\u3043\u3044[\u3045\u3046\u3047]]
\u3043
true \u3043 0
[\u3042\u3043\u3044[\u3045\u3046\u3047]]
\u3046
true \u3046 0
[\u3042-\u3045[0-9][\u304e-\u3051]]
\u3042
true \u3042 0
[\u3042-\u3045[0-9][\u304e-\u3051]]
\u3050
true \u3050 0
[\u3042-\u3045[0-9][\u304e-\u3051]]
4
true 4 0
[\u3042-\u3045[0-9][\u304e-\u3051]]
\u3046
false 0
[\u3042-\u3045[0-9][\u304e-\u3051]]
\u3056
false 0
[[\u3042-\u3045][0-9][\u304e-\u3051]]
\u3043
true \u3043 0
[[\u3042-\u3045][0-9][\u304e-\u3051]]
\u305B
false 0
[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
\u3042
true \u3042 0
[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
\u3046
true \u3046 0
[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
\u3049
true \u3049 0
[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
m
false 0
[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]m]
m
true m 0
[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
\u3042
true \u3042 0
[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
\u3045
true \u3045 0
[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
\u3049
true \u3049 0
[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
w
false 0
[\u3042-\u3044&&[\u3045-\u3047]]
\u3042
false 0
[\u3042-\u3044&&[\u3045-\u3047]]
\u3046
false 0
[\u3042-\u3044&&[\u3045-\u3047]]
\u305B
false 0
[[\u3042-\u3044]&&[\u3045-\u3047]]
\u3042
false 0
[[\u3042-\u3044]&&[\u3045-\u3047]]
\u3046
false 0
[[\u3042-\u3044]&&[\u3045-\u3047]]
\u305B
false 0
[\u3042-\u3044&&\u3045-\u3047]
\u3042
false 0
[\u3042-\u304e&&\u304e-\u305B]
\u304e
true \u304e 0
[\u3042-\u304e&&\u304e-\u305B&&\u3042-\u3044]
\u304e
false 0
[\u3042-\u304e&&\u304e-\u305B&&\u3042-\u305B]
\u304e
true \u304e 0
[[\u3042-\u304e]&&[\u304e-\u305B]]
\u3042
false 0
[[\u3042-\u304e]&&[\u304e-\u305B]]
\u304e
true \u304e 0
[[\u3042-\u304e]&&[\u304e-\u305B]]
\u305B
false 0
[[\u3042-\u304e]&&[^\u3042-\u3044]]
\u3042
false 0
[[\u3042-\u304e]&&[^\u3042-\u3044]]
\u3045
true \u3045 0
[\u3042-\u304e&&[^\u3042-\u3044]]
\u3042
false 0
[\u3042-\u304e&&[^\u3042-\u3044]]
\u3045
true \u3045 0
[\u3042-\u3044\u3045-\u3047&&[\u3045-\u3047]]
\u3042
false 0
[\u3042-\u3044\u3045-\u3047&&[\u3045-\u3047]]
\u3046
true \u3046 0
[[\u3042-\u3044]&&\u3045-\u3047\u3042-\u3044]
\u3042
true \u3042 0
[[\u3042-\u3044]&&[\u3045-\u3047][\u3042-\u3044]]
\u3042
true \u3042 0
[[\u3042-\u3044][\u3045-\u3047]&&\u3042\u3043\u3044]
\u3042
true \u3042 0
[[\u3042-\u3044][\u3045-\u3047]&&\u3042\u3043\u3044[\u3045\u3046\u3047]]
\u3046
true \u3046 0
[[\u3042-\u3044]&&[\u3043-\u3045]&&[\u3044-\u3046]]
\u3042
false 0
[[\u3042-\u3044]&&[\u3043-\u3045]&&[\u3044-\u3046]]
\u3044
true \u3044 0
[[\u3042-\u3044]&&[\u3043-\u3045][\u3044-\u3046]&&[\u3056-\u305B]]
\u3044
false 0
[\u3042\u3043\u3044[^\u3043\u3044\u3045]]
\u3042
true \u3042 0
[\u3042\u3043\u3044[^\u3043\u3044\u3045]]
\u3045
false 0
[\u3042-\u3044&&\u3042-\u3045&&\u3042-\u3046\u3048\u3049\u304A]
\u3043
true \u3043 0
[\u3042-\u3044&&\u3042-\u3045&&\u3042-\u3046\u3048\u3049\u304A]
\u3048
false 0
[[\u3042[\u3043]]&&[\u3043[\u3042]]]
\u3042
true \u3042 0
[[\u3042]&&[\u3043][\u3044][\u3042]&&[^\u3045]]
\u3042
true \u3042 0
[[\u3042]&&[b][c][\u3042]&&[^d]]
\u3042
true \u3042 0
[[\u3042]&&[\u3043][\u3044][\u3042]&&[^\u3045]]
\u3045
false 0
[[[\u3042-\u3045]&&[\u3044-\u3047]]]
\u3042
false 0
[[[\u3042-\u3045]&&[\u3044-\u3047]]]
\u3044
true \u3044 0
[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]]
\u3044
true \u3044 0
[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044]
\u3044
true \u3044 0
[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044&&\u3044]
\u3044
true \u3044 0
[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044&&[\u3044\u3045\u3046]]
\u3044
true \u3044 0
[\u305B[\u3042\u3043\u3044&&\u3043\u3044\u3045]]
\u3044
true \u3044 0
[\u305B[\u3042\u3043\u3044&&\u3043\u3044\u3045]&&[\u3056-\u305B]]
\u305B
true \u305B 0
[\u3059[\u3042\u3043\u3044&&\u3043\u3044\u3045[\u305B]]&&[\u3056-\u305B]]
\u305B
false 0
[\u3059[[w\u305B]\u3042\u3043\u3044&&\u3043\u3044\u3045[\u305B]]&&[\u3056-\u305B]]
\u305B
true \u305B 0
[[\u3042\u3043\u3044]&&[\u3045\u3046\u3047]\u3042\u3043\u3044]
\u3042
true \u3042 0
[[\u3042\u3043\u3044]&&[\u3045\u3046\u3047]\u3059\u305A\u305B[\u3042\u3043\u3044]]
\u3042
true \u3042 0
\pL
\u3042
true \u3042 0
\pL
7
false 0
\p{L}
\u3042
true \u3042 0
\p{IsL}
\u3042
true \u3042 0
\p{InHiragana}
\u3042
true \u3042 0
\p{InHiragana}
\u0370
false 0
\pL\u3043\u3044
\u3042\u3043\u3044
true \u3042\u3043\u3044 0
\u3042[r\p{InGreek}]\u3044
\u3042\u0370\u3044
true \u3042\u0370\u3044 0
\u3042\p{InGreek}
\u3042\u0370
true \u3042\u0370 0
\u3042\P{InGreek}
\u3042\u0370
false 0
\u3042\P{InGreek}
\u3042\u3043
true \u3042\u3043 0
\u3042{^InGreek}
-
error
\u3042\p{^InGreek}
-
error
\u3042\P{^InGreek}
-
error
\u3042\p{InGreek}
\u3042\u0370
true \u3042\u0370 0
\u3042[\p{InGreek}]\u3044
\u3042\u0370\u3044
true \u3042\u0370\u3044 0
\u3042[\P{InGreek}]\u3044
\u3042\u0370\u3044
false 0
\u3042[\P{InGreek}]\u3044
\u3042\u3043\u3044
true \u3042\u3043\u3044 0
\u3042[{^InGreek}]\u3044
\u3042n\u3044
true \u3042n\u3044 0
\u3042[{^InGreek}]\u3044
\u3042\u305B\u3044
false 0
\u3042[\p{^InGreek}]\u3044
-
error
\u3042[\P{^InGreek}]\u3044
-
error
\u3042[\p{InGreek}]
\u3042\u0370
true \u3042\u0370 0
\u3042[r\p{InGreek}]\u3044
\u3042r\u3044
true \u3042r\u3044 0
\u3042[\p{InGreek}r]\u3044
\u3042r\u3044
true \u3042r\u3044 0
\u3042[r\p{InGreek}]\u3044
\u3042r\u3044
true \u3042r\u3044 0
\u3042[^\p{InGreek}]\u3044
\u3042\u0370\u3044
false 0
\u3042[^\P{InGreek}]\u3044
\u3042\u0370\u3044
true \u3042\u0370\u3044 0
\u3042[\p{InGreek}&&[^\u0370]]\u3044
\u3042\u0370\u3044
false 0
// Test the dot metacharacter
\u3042.\u3044.+
\u3042#\u3044%&
true \u3042#\u3044%& 0
\u3042\u3043.
\u3042\u3043\n
false 0
(?s)\u3042\u3043.
\u3042\u3043\n
true \u3042\u3043\n 0
\u3042[\p{L}&&[\P{InGreek}]]\u3044
\u3042\u6000\u3044
true \u3042\u6000\u3044 0
\u3042[\p{L}&&[\P{InGreek}]]\u3044
\u3042r\u3044
true \u3042r\u3044 0
\u3042[\p{L}&&[\P{InGreek}]]\u3044
\u3042\u0370\u3044
false 0
\u3042\p{InGreek}\u3044
\u3042\u0370\u3044
true \u3042\u0370\u3044 0
\u3042\p{Sc}
\u3042$
true \u3042$ 0
\W\w\W
rrrr#\u3048\u3048\u3048
false 0
\u3042\u3043\u3044[\s\u3045\u3046\u3047]*
\u3042\u3043\u3044 \u3045\u3046\u3047
true \u3042\u3043\u3044 \u3045\u3046\u3047 0
\u3042\u3043\u3044[\s\u305A-\u305B]*
\u3042\u3043\u3044 \u305A \u305B
true \u3042\u3043\u3044 \u305A \u305B 0
\u3042\u3043\u3044[\u3042-\u3045\s\u304e-\u3051]*
\u3042\u3043\u3044\u3042\u3042 \u304e\u304f \u3051
true \u3042\u3043\u3044\u3042\u3042 \u304e\u304f \u3051 0
// Test the whitespace escape sequence
\u3042\u3043\s\u3044
\u3042\u3043 \u3044
true \u3042\u3043 \u3044 0
\s\s\s
\u3043l\u3042\u3049 \u3046rr
false 0
\S\S\s
\u3043l\u3042\u3049 \u3046rr
true \u3042\u3049 0
// Test the digit escape sequence
\u3042\u3043\d\u3044
\u3042\u30439\u3044
true \u3042\u30439\u3044 0
\d\d\d
\u3043l\u3042\u304945
false 0
// Test the caret metacharacter
^\u3042\u3043\u3044
\u3042\u3043\u3044\u3045\u3046\u3047
true \u3042\u3043\u3044 0
^\u3042\u3043\u3044
\u3043\u3044\u3045\u3042\u3043\u3044
false 0
// Greedy ? metacharacter
\u3042?\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3043 0
\u3042?\u3043
\u3043
true \u3043 0
\u3042?\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.?\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3043 0
// Reluctant ? metacharacter
\u3042??\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3043 0
\u3042??\u3043
\u3043
true \u3043 0
\u3042??\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.??\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3043 0
// Possessive ? metacharacter
\u3042?+\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3043 0
\u3042?+\u3043
\u3043
true \u3043 0
\u3042?+\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.?+\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3043 0
// Greedy + metacharacter
\u3042+\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
\u3042+\u3043
\u3043
false 0
\u3042+\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.+\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
// Reluctant + metacharacter
\u3042+?\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
\u3042+?\u3043
\u3043
false 0
\u3042+?\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.+?\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
// Possessive + metacharacter
\u3042++\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
\u3042++\u3043
\u3043
false 0
\u3042++\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.++\u3043
\u3042\u3042\u3042\u3042\u3043
false 0
// Greedy Repetition
\u3042{2,3}
\u3042
false 0
\u3042{2,3}
\u3042\u3042
true \u3042\u3042 0
\u3042{2,3}
\u3042\u3042\u3042
true \u3042\u3042\u3042 0
\u3042{2,3}
\u3042\u3042\u3042\u3042
true \u3042\u3042\u3042 0
\u3042{3,}
\u305B\u305B\u305B\u3042\u3042\u3042\u3042\u305B\u305B\u305B
true \u3042\u3042\u3042\u3042 0
\u3042{3,}
\u305B\u305B\u305B\u3042\u3042\u305B\u305B\u305B
false 0
// Reluctant Repetition
\u3042{2,3}?
\u3042
false 0
\u3042{2,3}?
\u3042\u3042
true \u3042\u3042 0
\u3042{2,3}?
\u3042\u3042\u3042
true \u3042\u3042 0
\u3042{2,3}?
\u3042\u3042\u3042\u3042
true \u3042\u3042 0
// Zero width Positive lookahead
\u3042\u3043\u3044(?=\u3045)
\u305B\u305B\u305B\u3042\u3043\u3044\u3045
true \u3042\u3043\u3044 0
\u3042\u3043\u3044(?=\u3045)
\u305B\u305B\u305B\u3042\u3043\u3044\u3046\u3045
false 0
// Zero width Negative lookahead
\u3042\u3043\u3044(?!\u3045)
\u305B\u305B\u3042\u3043\u3044\u3045
false 0
\u3042\u3043\u3044(?!\u3045)
\u305B\u305B\u3042\u3043\u3044\u3046\u3045
true \u3042\u3043\u3044 0
// Zero width Positive lookbehind
\u3042(?<=\u3042)
###\u3042\u3043\u3044
true \u3042 0
\u3042(?<=\u3042)
###\u3043\u3044###
false 0
// Zero width Negative lookbehind
(?<!\u3042)\w
###\u3042\u3043\u3044a###
true a 0
(?<!\u3042)\u3044
\u3043\u3044
true \u3044 0
(?<!\u3042)\u3044
\u3042\u3044
false 0
// Nondeterministic group
(\u3042+\u3043)+
\u3042\u3043\u3042\u3043\u3042\u3043
true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043
(\u3042|\u3043)+
\u3044\u3044\u3044\u3044\u3045
false 1
// Deterministic group
(\u3042\u3043)+
\u3042\u3043\u3042\u3043\u3042\u3043
true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043
(\u3042\u3043)+
\u3042\u3044\u3044\u3044\u3044\u3045
false 1
(\u3042\u3043)*
\u3042\u3043\u3042\u3043\u3042\u3043
true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043
(\u3042\u3043)(\u3044\u3045*)
\u305B\u305B\u305B\u3042\u3043\u3044\u305B\u305B\u305B
true \u3042\u3043\u3044 2 \u3042\u3043 \u3044
\u3042\u3043\u3044(\u3045)*\u3042\u3043\u3044
\u3042\u3043\u3044\u3045\u3045\u3045\u3045\u3045\u3042\u3043\u3044
true \u3042\u3043\u3044\u3045\u3045\u3045\u3045\u3045\u3042\u3043\u3044 1 \u3045
// Back references
(\u3042*)\u3043\u3044\1
\u305B\u305B\u305B\u3042\u3042\u3043\u3044\u3042\u3042\u305B\u305B\u305B
true \u3042\u3042\u3043\u3044\u3042\u3042 1 \u3042\u3042
(\u3042*)\u3043\u3044\1
\u305B\u305B\u305B\u3042\u3042\u3043\u3044\u3042\u305B\u305B\u305B
true \u3042\u3043\u3044\u3042 1 \u3042
(\u3048t*)(\u3045\u3045\u3046)*(\u305A\u3056)\1\3(\u3057\u3057)
\u305B\u305B\u305B\u3048tt\u3045\u3045\u3046\u3045\u3045\u3046\u305A\u3056\u3048tt\u305A\u3056\u3057\u3057\u305B\u305B\u305B
true \u3048tt\u3045\u3045\u3046\u3045\u3045\u3046\u305A\u3056\u3048tt\u305A\u3056\u3057\u3057 4 \u3048tt \u3045\u3045\u3046 \u305A\u3056 \u3057\u3057
// Greedy * metacharacter
\u3042*\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
\u3042*\u3043
\u3043
true \u3043 0
\u3042*\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.*\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
// Reluctant * metacharacter
\u3042*?\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
\u3042*?\u3043
\u3043
true \u3043 0
\u3042*?\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.*?\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
// Possessive * metacharacter
\u3042*+\u3043
\u3042\u3042\u3042\u3042\u3043
true \u3042\u3042\u3042\u3042\u3043 0
\u3042*+\u3043
\u3043
true \u3043 0
\u3042*+\u3043
\u3042\u3042\u3042\u3044\u3044\u3044
false 0
.*+\u3043
\u3042\u3042\u3042\u3042\u3043
false 0
// Case insensitivity
(?iu)\uFF46\uFF4F\uFF4F\uFF42\uFF41\uFF52
\uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52
true \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 0
\uFF46(?iu)\uFF4F\uFF4F\uFF42\uFF41\uFF52
\uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52
true \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 0
\uFF46\uFF4F\uFF4F(?iu)\uFF42\uFF41\uFF52
\uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52
false 0
(?iu)\uFF46\uFF4F\uFF4F[\uFF42\uFF41\uFF52]+
\uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52
true \uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52 0
(?iu)\uFF46\uFF4F\uFF4F[\uFF41-\uFF52]+
\uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52
true \uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52 0
// Disable metacharacters- test both length <=3 and >3
// So that the BM optimization is part of test
\Q***\E\u3042\u3043\u3044
***\u3042\u3043\u3044
true ***\u3042\u3043\u3044 0
\u3043l\Q***\E\u3042\u3043\u3044
\u3043l***\u3042\u3043\u3044
true \u3043l***\u3042\u3043\u3044 0
\Q***\u3042\u3043\u3044
***\u3042\u3043\u3044
true ***\u3042\u3043\u3044 0
\u3043l\u3042\u3049\Q***\E\u3042\u3043\u3044
\u3043l\u3042\u3049***\u3042\u3043\u3044
true \u3043l\u3042\u3049***\u3042\u3043\u3044 0
\Q***\u3042\u3043\u3044
***\u3042\u3043\u3044
true ***\u3042\u3043\u3044 0
\Q*\u3042\u3043
*\u3042\u3043
true *\u3042\u3043 0
\u3043l\u3042\u3049\Q***\u3042\u3043\u3044
\u3043l\u3042\u3049***\u3042\u3043\u3044
true \u3043l\u3042\u3049***\u3042\u3043\u3044 0
\u3043l\u3042\Q***\u3042\u3043\u3044
\u3043l\u3042***\u3042\u3043\u3044
true \u3043l\u3042***\u3042\u3043\u3044 0
[\043]+
\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
true # 0
[\042-\044]+
\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
true # 0
[\u1234-\u1236]
\u3043l\u3042\u3049\u3043l\u3042\u3049\u1235\u3043le\u3044\u3049
true \u1235 0
[^\043]*
\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
true \u3043l\u3042\u3049\u3043l\u3042\u3049 0
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册