Merge

fa9bfbe3 · alanb · edf7b3aa · a0e177a1 · fa9bfbe3 · fa9bfbe3
6 changed file
--- a/src/share/classes/java/util/regex/Matcher.java
+++ b/src/share/classes/java/util/regex/Matcher.java
@@ -490,6 +490,45 @@ public final class Matcher implements MatchResult {
        return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
    }

+    /**
+     * Returns the input subsequence captured by the given
+     * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
+     * match operation.
+     *
+     * <p> If the match was successful but the group specified failed to match
+     * any part of the input sequence, then <tt>null</tt> is returned. Note
+     * that some groups, for example <tt>(a*)</tt>, match the empty string.
+     * This method will return the empty string when such a group successfully
+     * matches the empty string in the input.  </p>
+     *
+     * @param  name
+     *         The name of a named-capturing group in this matcher's pattern
+     *
+     * @return  The (possibly empty) subsequence captured by the named group
+     *          during the previous match, or <tt>null</tt> if the group
+     *          failed to match part of the input
+     *
+     * @throws  IllegalStateException
+     *          If no match has yet been attempted,
+     *          or if the previous match operation failed
+     *
+     * @throws  IllegalArgumentException
+     *          If there is no capturing group in the pattern
+     *          with the given name
+     */
+    public String group(String name) {
+        if (name == null)
+            throw new NullPointerException("Null group name");
+        if (first < 0)
+            throw new IllegalStateException("No match found");
+        if (!parentPattern.namedGroups().containsKey(name))
+            throw new IllegalArgumentException("No group with name <" + name + ">");
+        int group = parentPattern.namedGroups().get(name);
+        if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
+            return null;
+        return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
+    }
+
    /**
     * Returns the number of capturing groups in this matcher's pattern.
     *
@@ -649,9 +688,11 @@ public final class Matcher implements MatchResult {
     *
     * <p> The replacement string may contain references to subsequences
     * captured during the previous match: Each occurrence of
-     * <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of
-     * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>.
-     * The first number after the <tt>$</tt> is always treated as part of
+     * <tt>$</tt>&lt;<i>name</i>&gt; or <tt>$</tt><i>g</i>
+     * will be replaced by the result of evaluating the corresponding
+     * {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
+     * respectively. For  <tt>$</tt><i>g</i><tt></tt>,
+     * the first number after the <tt>$</tt> is always treated as part of
     * the group reference. Subsequent numbers are incorporated into g if
     * they would form a legal group reference. Only the numerals '0'
     * through '9' are considered as potential components of the group
@@ -695,6 +736,10 @@ public final class Matcher implements MatchResult {
     *          If no match has yet been attempted,
     *          or if the previous match operation failed
     *
+     * @throws  IllegalArgumentException
+     *          If the replacement string refers to a named-capturing
+     *          group that does not exist in the pattern
+     *
     * @throws  IndexOutOfBoundsException
     *          If the replacement string refers to a capturing group
     *          that does not exist in the pattern
@@ -719,29 +764,62 @@ public final class Matcher implements MatchResult {
            } else if (nextChar == '$') {
                // Skip past $
                cursor++;
-                // The first number is always a group
-                int refNum = (int)replacement.charAt(cursor) - '0';
-                if ((refNum < 0)||(refNum > 9))
-                    throw new IllegalArgumentException(
-                        "Illegal group reference");
-                cursor++;
-
-                // Capture the largest legal group string
-                boolean done = false;
-                while (!done) {
-                    if (cursor >= replacement.length()) {
-                        break;
-                    }
-                    int nextDigit = replacement.charAt(cursor) - '0';
-                    if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
-                        break;
+                // A StringIndexOutOfBoundsException is thrown if
+                // this "$" is the last character in replacement
+                // string in current implementation, a IAE might be
+                // more appropriate.
+                nextChar = replacement.charAt(cursor);
+                int refNum = -1;
+                if (nextChar == '<') {
+                    cursor++;
+                    StringBuilder gsb = new StringBuilder();
+                    while (cursor < replacement.length()) {
+                        nextChar = replacement.charAt(cursor);
+                        if (ASCII.isLower(nextChar) ||
+                            ASCII.isUpper(nextChar) ||
+                            ASCII.isDigit(nextChar)) {
+                            gsb.append(nextChar);
+                            cursor++;
+                        } else {
+                            break;
+                        }
                    }
-                    int newRefNum = (refNum * 10) + nextDigit;
-                    if (groupCount() < newRefNum) {
-                        done = true;
-                    } else {
-                        refNum = newRefNum;
-                        cursor++;
+                    if (gsb.length() == 0)
+                        throw new IllegalArgumentException(
+                            "named capturing group has 0 length name");
+                    if (nextChar != '>')
+                        throw new IllegalArgumentException(
+                            "named capturing group is missing trailing '>'");
+                    String gname = gsb.toString();
+                    if (!parentPattern.namedGroups().containsKey(gname))
+                        throw new IllegalArgumentException(
+                            "No group with name <" + gname + ">");
+                    refNum = parentPattern.namedGroups().get(gname);
+                    cursor++;
+                } else {
+                    // The first number is always a group
+                    refNum = (int)nextChar - '0';
+                    if ((refNum < 0)||(refNum > 9))
+                        throw new IllegalArgumentException(
+                            "Illegal group reference");
+                    cursor++;
+                    // Capture the largest legal group string
+                    boolean done = false;
+                    while (!done) {
+                        if (cursor >= replacement.length()) {
+                            break;
+                        }
+                        int nextDigit = replacement.charAt(cursor) - '0';
+                        if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
+                            break;
+                        }
+                        int newRefNum = (refNum * 10) + nextDigit;
+                        if (groupCount() < newRefNum) {
+                            done = true;
+                        } else {
+                            refNum = newRefNum;
+                            cursor++;
+                        }
                    }
                }
                // Append group

--- a/src/share/classes/java/util/regex/Pattern.java
+++ b/src/share/classes/java/util/regex/Pattern.java
@@ -29,6 +29,7 @@ import java.security.AccessController;
 import java.security.PrivilegedAction;
 import java.text.CharacterIterator;
 import java.text.Normalizer;
+import java.util.Map;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Arrays;
@@ -298,6 +299,10 @@ import java.util.Arrays;
 *     <td valign="bottom" headers="matches">Whatever the <i>n</i><sup>th</sup>
 *     <a href="#cg">capturing group</a> matched</td></tr>
 *
+ * <tr><td valign="bottom" headers="construct backref"><tt>\</tt><i>k</i>&lt;<i>name</i>&gt;</td>
+ *     <td valign="bottom" headers="matches">Whatever the
+ *     <a href="#groupname">named-capturing group</a> "name" matched</td></tr>
+ *
 * <tr><th>&nbsp;</th></tr>
 * <tr align="left"><th colspan="2" id="quot">Quotation</th></tr>
 *
@@ -310,8 +315,10 @@ import java.util.Arrays;
 *     <!-- Metachars: !$()*+.<>?[\]^{|} -->
 *
 * <tr><th>&nbsp;</th></tr>
- * <tr align="left"><th colspan="2" id="special">Special constructs (non-capturing)</th></tr>
+ * <tr align="left"><th colspan="2" id="special">Special constructs (named-capturing and non-capturing)</th></tr>
 *
+ * <tr><td valign="top" headers="construct special"><tt>(?&lt;<a href="#groupname">name</a>&gt;</tt><i>X</i><tt>)</tt></td>
+ *     <td headers="matches"><i>X</i>, as a named-capturing group</td></tr>
 * <tr><td valign="top" headers="construct special"><tt>(?:</tt><i>X</i><tt>)</tt></td>
 *     <td headers="matches"><i>X</i>, as a non-capturing group</td></tr>
 * <tr><td valign="top" headers="construct special"><tt>(?idmsux-idmsux)&nbsp;</tt></td>
@@ -449,6 +456,8 @@ import java.util.Arrays;
 * <a name="cg">
 * <h4> Groups and capturing </h4>
 *
+ * <a name="gnumber">
+ * <h5> Group number </h5>
 * <p> Capturing groups are numbered by counting their opening parentheses from
 * left to right.  In the expression <tt>((A)(B(C)))</tt>, for example, there
 * are four such groups: </p>
@@ -471,6 +480,24 @@ import java.util.Arrays;
 * subsequence may be used later in the expression, via a back reference, and
 * may also be retrieved from the matcher once the match operation is complete.
 *
+ * <a name="groupname">
+ * <h5> Group name </h5>
+ * <p>A capturing group can also be assigned a "name", a <tt>named-capturing group</tt>,
+ * and then be back-referenced later by the "name". Group names are composed of
+ * the following characters:
+ *
+ * <ul>
+ *   <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt>
+ *        (<tt>'&#92;u0041'</tt>&nbsp;through&nbsp;<tt>'&#92;u005a'</tt>),
+ *   <li> The lowercase letters <tt>'a'</tt> through <tt>'z'</tt>
+ *        (<tt>'&#92;u0061'</tt>&nbsp;through&nbsp;<tt>'&#92;u007a'</tt>),
+ *   <li> The digits <tt>'0'</tt> through <tt>'9'</tt>
+ *        (<tt>'&#92;u0030'</tt>&nbsp;through&nbsp;<tt>'&#92;u0039'</tt>),
+ * </ul>
+ *
+ * <p> A <tt>named-capturing group</tt> is still numbered as described in
+ * <a href="#gnumber">Group number</a>.
+ *
 * <p> The captured input associated with a group is always the subsequence
 * that the group most recently matched.  If a group is evaluated a second time
 * because of quantification then its previously-captured value, if any, will
@@ -479,9 +506,9 @@ import java.util.Arrays;
 * group two set to <tt>"b"</tt>.  All captured input is discarded at the
 * beginning of each match.
 *
- * <p> Groups beginning with <tt>(?</tt> are pure, <i>non-capturing</i> groups
- * that do not capture text and do not count towards the group total.
- *
+ * <p> Groups beginning with <tt>(?</tt> are either pure, <i>non-capturing</i> groups
+ * that do not capture text and do not count towards the group total, or
+ * <i>named-capturing</i> group.
 *
 * <h4> Unicode support </h4>
 *
@@ -794,6 +821,12 @@ public final class Pattern
     */
    transient int[] buffer;

+    /**
+     * Map the "name" of the "named capturing group" to its group id
+     * node.
+     */
+    transient volatile Map<String, Integer> namedGroups;
+
    /**
     * Temporary storage used while parsing group references.
     */
@@ -1467,6 +1500,7 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
        // Allocate all temporary objects here.
        buffer = new int[32];
        groupNodes = new GroupHead[10];
+        namedGroups = null;

        if (has(LITERAL)) {
            // Literal pattern handling
@@ -1505,6 +1539,12 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
        compiled = true;
    }

+    Map<String, Integer> namedGroups() {
+        if (namedGroups == null)
+            namedGroups = new HashMap<String, Integer>(2);
+        return namedGroups;
+    }
+
    /**
     * Used to print out a subtree of the Pattern to help with debugging.
     */
@@ -2156,7 +2196,22 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
        case 'h':
        case 'i':
        case 'j':
+            break;
        case 'k':
+            if (inclass)
+                break;
+            if (read() != '<')
+                throw error("\\k is not followed by '<' for named capturing group");
+            String name = groupname(read());
+            if (!namedGroups().containsKey(name))
+                throw error("(named capturing group <"+ name+"> does not exit");
+            if (create) {
+                if (has(CASE_INSENSITIVE))
+                    root = new CIBackRef(namedGroups().get(name), has(UNICODE_CASE));
+                else
+                    root = new BackRef(namedGroups().get(name));
+            }
+            return -1;
        case 'l':
        case 'm':
            break;
@@ -2455,6 +2510,24 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
        return p;
    }

+    /**
+     * Parses and returns the name of a "named capturing group", the trailing
+     * ">" is consumed after parsing.
+     */
+    private String groupname(int ch) {
+        StringBuilder sb = new StringBuilder();
+        sb.append(Character.toChars(ch));
+        while (ASCII.isLower(ch=read()) || ASCII.isUpper(ch) ||
+               ASCII.isDigit(ch)) {
+            sb.append(Character.toChars(ch));
+        }
+        if (sb.length() == 0)
+            throw error("named capturing group has 0 length name");
+        if (ch != '>')
+            throw error("named capturing group is missing trailing '>'");
+        return sb.toString();
+    }
+
    /**
     * Parses a group and returns the head node of a set of nodes that process
     * the group. Sometimes a double return system is used where the tail is
@@ -2494,6 +2567,18 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
                break;
            case '<':   // (?<xxx)  look behind
                ch = read();
+                if (Character.isLetter(ch)) {     // named captured group
+                    String name = groupname(ch);
+                    if (namedGroups().containsKey(name))
+                        throw error("Named capturing group <" + name
+                                    + "> is already defined");
+                    capturingGroup = true;
+                    head = createGroup(false);
+                    tail = root;
+                    namedGroups().put(name, capturingGroupCount-1);
+                    head.next = expr(tail);
+                    break;
+                }
                int start = cursor;
                head = createGroup(true);
                tail = root;

--- a/test/java/util/regex/BMPTestCases.txt
+++ b/test/java/util/regex/BMPTestCases.txt
+//
+// Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+//
+// This file contains test cases with BMP characters for regular expressions.
+// A test case consists of three lines:
+// The first line is a pattern used in the test
+// The second line is the input to search for the pattern in
+// The third line is a concatentation of the match, the number of groups,
+//     and the contents of the first four subexpressions.
+// Empty lines and lines beginning with comment slashes are ignored.
+
+// Test unsetting of backed off groups
+^(\u3042)?\u3042
+\u3042
+true \u3042 1
+
+^(\u3042\u3042(\u3043\u3043)?)+$
+\u3042\u3042\u3043\u3043\u3042\u3042
+true \u3042\u3042\u3043\u3043\u3042\u3042 2 \u3042\u3042 \u3043\u3043
+
+((\u3042|\u3043)?\u3043)+
+\u3043
+true \u3043 2 \u3043
+
+(\u3042\u3042\u3042)?\u3042\u3042\u3042
+\u3042\u3042\u3042
+true \u3042\u3042\u3042 1
+
+^(\u3042(\u3043)?)+$
+\u3042\u3043\u3042
+true \u3042\u3043\u3042 2 \u3042 \u3043
+
+^(\u3042(\u3043(\u3044)?)?)?\u3042\u3043\u3044
+\u3042\u3043\u3044
+true \u3042\u3043\u3044 3
+
+^(\u3042(\u3043(\u3044))).*
+\u3042\u3043\u3044
+true \u3042\u3043\u3044 3 \u3042\u3043\u3044 \u3043\u3044 \u3044
+
+// use of x modifier
+\u3042\u3043\u3044(?x)\u3043la\u3049
+\u3042\u3043\u3044\u3043la\u3049
+true \u3042\u3043\u3044\u3043la\u3049 0
+
+\u3042\u3043\u3044(?x)  bla\u3049
+\u3042\u3043\u3044bla\u3049
+true \u3042\u3043\u3044bla\u3049 0
+
+\u3042\u3043\u3044(?x)  bla\u3049  ble\u3044\u3049
+\u3042\u3043\u3044bla\u3049ble\u3044\u3049
+true \u3042\u3043\u3044bla\u3049ble\u3044\u3049 0
+
+\u3042\u3043\u3044(?x)  bla\u3049 # ignore comment
+\u3042\u3043\u3044bla\u3049
+true \u3042\u3043\u3044bla\u3049 0
+
+// Simple alternation
+\u3042|\u3043
+\u3042
+true \u3042 0
+
+\u3042|\u3043
+\u305B
+false 0
+
+\u3042|\u3043
+\u3043
+true \u3043 0
+
+\u3042|\u3043|\u3044\u3045
+\u3044\u3045
+true \u3044\u3045 0
+
+\u3042|\u3042\u3045
+\u3042\u3045
+true \u3042 0
+
+\u305B(\u3042|\u3042\u3044)\u3043
+\u305B\u3042\u3044\u3043
+true \u305B\u3042\u3044\u3043 1 \u3042\u3044
+
+// Simple char class
+[\u3042\u3043\u3044]+
+\u3042\u3043\u3042\u3043\u3042\u3043
+true \u3042\u3043\u3042\u3043\u3042\u3043 0
+
+[\u3042\u3043\u3044]+
+\u3045\u3046\u3047\u3048
+false 0
+
+[\u3042\u3043\u3044]+[\u3045\u3046\u3047]+[\u3048\u3049\u304A]+
+\u305B\u305B\u305B\u3042\u3042\u3045\u3045\u3048\u3048\u305B\u305B\u305B
+true \u3042\u3042\u3045\u3045\u3048\u3048 0
+
+// Range char class
+[\u3042-\u3048]+
+\u305B\u305B\u305B\u3048\u3048\u3048
+true \u3048\u3048\u3048 0
+
+[\u3042-\u3048]+
+mmm
+false 0
+
+[\u3042-]+
+\u305B\u3042-9\u305B
+true \u3042- 0
+
+[\u3042-\\u4444]+
+\u305B\u3042-9\u305B
+true \u305B\u3042 0
+
+// Negated char class
+[^\u3042\u3043\u3044]+
+\u3042\u3043\u3042\u3043\u3042\u3043
+false 0
+
+[^\u3042\u3043\u3044]+
+\u3042\u3042\u3042\u3043\u3043\u3043\u3044\u3044\u3044\u3045\u3046\u3047\u3048
+true \u3045\u3046\u3047\u3048 0
+
+// Making sure a ^ not in first position matches literal ^
+[\u3042\u3043\u3044^\u3043]
+\u3043
+true \u3043 0
+
+[\u3042\u3043\u3044^\u3043]
+^
+true ^ 0
+
+// Class union and intersection
+[\u3042\u3043\u3044[\u3045\u3046\u3047]]
+\u3043
+true \u3043 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]]
+\u3046
+true \u3046 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+\u3042
+true \u3042 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+\u3050
+true \u3050 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+4
+true 4 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+\u3046
+false 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+\u3056
+false 0
+
+[[\u3042-\u3045][0-9][\u304e-\u3051]]
+\u3043
+true \u3043 0
+
+[[\u3042-\u3045][0-9][\u304e-\u3051]]
+\u305B
+false 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
+\u3042
+true \u3042 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
+\u3046
+true \u3046 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
+\u3049
+true \u3049 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
+m
+false 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]m]
+m
+true m 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
+\u3042
+true \u3042 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
+\u3045
+true \u3045 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
+\u3049
+true \u3049 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
+w
+false 0
+
+[\u3042-\u3044&&[\u3045-\u3047]]
+\u3042
+false 0
+
+[\u3042-\u3044&&[\u3045-\u3047]]
+\u3046
+false 0
+
+[\u3042-\u3044&&[\u3045-\u3047]]
+\u305B
+false 0
+
+[[\u3042-\u3044]&&[\u3045-\u3047]]
+\u3042
+false 0
+
+[[\u3042-\u3044]&&[\u3045-\u3047]]
+\u3046
+false 0
+
+[[\u3042-\u3044]&&[\u3045-\u3047]]
+\u305B
+false 0
+
+[\u3042-\u3044&&\u3045-\u3047]
+\u3042
+false 0
+
+[\u3042-\u304e&&\u304e-\u305B]
+\u304e
+true \u304e 0
+
+[\u3042-\u304e&&\u304e-\u305B&&\u3042-\u3044]
+\u304e
+false 0
+
+[\u3042-\u304e&&\u304e-\u305B&&\u3042-\u305B]
+\u304e
+true \u304e 0
+
+[[\u3042-\u304e]&&[\u304e-\u305B]]
+\u3042
+false 0
+
+[[\u3042-\u304e]&&[\u304e-\u305B]]
+\u304e
+true \u304e 0
+
+[[\u3042-\u304e]&&[\u304e-\u305B]]
+\u305B
+false 0
+
+[[\u3042-\u304e]&&[^\u3042-\u3044]]
+\u3042
+false 0
+
+[[\u3042-\u304e]&&[^\u3042-\u3044]]
+\u3045
+true \u3045 0
+
+[\u3042-\u304e&&[^\u3042-\u3044]]
+\u3042
+false 0
+
+[\u3042-\u304e&&[^\u3042-\u3044]]
+\u3045
+true \u3045 0
+
+[\u3042-\u3044\u3045-\u3047&&[\u3045-\u3047]]
+\u3042
+false 0
+
+[\u3042-\u3044\u3045-\u3047&&[\u3045-\u3047]]
+\u3046
+true \u3046 0
+
+[[\u3042-\u3044]&&\u3045-\u3047\u3042-\u3044]
+\u3042
+true \u3042 0
+
+[[\u3042-\u3044]&&[\u3045-\u3047][\u3042-\u3044]]
+\u3042
+true \u3042 0
+
+[[\u3042-\u3044][\u3045-\u3047]&&\u3042\u3043\u3044]
+\u3042
+true \u3042 0
+
+[[\u3042-\u3044][\u3045-\u3047]&&\u3042\u3043\u3044[\u3045\u3046\u3047]]
+\u3046
+true \u3046 0
+
+[[\u3042-\u3044]&&[\u3043-\u3045]&&[\u3044-\u3046]]
+\u3042
+false 0
+
+[[\u3042-\u3044]&&[\u3043-\u3045]&&[\u3044-\u3046]]
+\u3044
+true \u3044 0
+
+[[\u3042-\u3044]&&[\u3043-\u3045][\u3044-\u3046]&&[\u3056-\u305B]]
+\u3044
+false 0
+
+[\u3042\u3043\u3044[^\u3043\u3044\u3045]]
+\u3042
+true \u3042 0
+
+[\u3042\u3043\u3044[^\u3043\u3044\u3045]]
+\u3045
+false 0
+
+[\u3042-\u3044&&\u3042-\u3045&&\u3042-\u3046\u3048\u3049\u304A]
+\u3043
+true \u3043 0
+
+[\u3042-\u3044&&\u3042-\u3045&&\u3042-\u3046\u3048\u3049\u304A]
+\u3048
+false 0
+
+[[\u3042[\u3043]]&&[\u3043[\u3042]]]
+\u3042
+true \u3042 0
+
+[[\u3042]&&[\u3043][\u3044][\u3042]&&[^\u3045]]
+\u3042
+true \u3042 0
+
+[[\u3042]&&[b][c][\u3042]&&[^d]]
+\u3042
+true \u3042 0
+
+[[\u3042]&&[\u3043][\u3044][\u3042]&&[^\u3045]]
+\u3045
+false 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]]
+\u3042
+false 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]]
+\u3044
+true \u3044 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]]
+\u3044
+true \u3044 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044]
+\u3044
+true \u3044 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044&&\u3044]
+\u3044
+true \u3044 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044&&[\u3044\u3045\u3046]]
+\u3044
+true \u3044 0
+
+[\u305B[\u3042\u3043\u3044&&\u3043\u3044\u3045]]
+\u3044
+true \u3044 0
+
+[\u305B[\u3042\u3043\u3044&&\u3043\u3044\u3045]&&[\u3056-\u305B]]
+\u305B
+true \u305B 0
+
+[\u3059[\u3042\u3043\u3044&&\u3043\u3044\u3045[\u305B]]&&[\u3056-\u305B]]
+\u305B
+false 0
+
+[\u3059[[w\u305B]\u3042\u3043\u3044&&\u3043\u3044\u3045[\u305B]]&&[\u3056-\u305B]]
+\u305B
+true \u305B 0
+
+[[\u3042\u3043\u3044]&&[\u3045\u3046\u3047]\u3042\u3043\u3044]
+\u3042
+true \u3042 0
+
+[[\u3042\u3043\u3044]&&[\u3045\u3046\u3047]\u3059\u305A\u305B[\u3042\u3043\u3044]]
+\u3042
+true \u3042 0
+
+\pL
+\u3042
+true \u3042 0
+
+\pL
+7
+false 0
+
+\p{L}
+\u3042
+true \u3042 0
+
+\p{IsL}
+\u3042
+true \u3042 0
+
+\p{InHiragana}
+\u3042
+true \u3042 0
+
+\p{InHiragana}
+\u0370
+false 0
+
+\pL\u3043\u3044
+\u3042\u3043\u3044
+true \u3042\u3043\u3044 0
+
+\u3042[r\p{InGreek}]\u3044
+\u3042\u0370\u3044
+true \u3042\u0370\u3044 0
+
+\u3042\p{InGreek}
+\u3042\u0370
+true \u3042\u0370 0
+
+\u3042\P{InGreek}
+\u3042\u0370
+false 0
+
+\u3042\P{InGreek}
+\u3042\u3043
+true \u3042\u3043 0
+
+\u3042{^InGreek}
+-
+error
+
+\u3042\p{^InGreek}
+-
+error
+
+\u3042\P{^InGreek}
+-
+error
+
+\u3042\p{InGreek}
+\u3042\u0370
+true \u3042\u0370 0
+
+\u3042[\p{InGreek}]\u3044
+\u3042\u0370\u3044
+true \u3042\u0370\u3044 0
+
+\u3042[\P{InGreek}]\u3044
+\u3042\u0370\u3044
+false 0
+
+\u3042[\P{InGreek}]\u3044
+\u3042\u3043\u3044
+true \u3042\u3043\u3044 0
+
+\u3042[{^InGreek}]\u3044
+\u3042n\u3044
+true \u3042n\u3044 0
+
+\u3042[{^InGreek}]\u3044
+\u3042\u305B\u3044
+false 0
+
+\u3042[\p{^InGreek}]\u3044
+-
+error
+
+\u3042[\P{^InGreek}]\u3044
+-
+error
+
+\u3042[\p{InGreek}]
+\u3042\u0370
+true \u3042\u0370 0
+
+\u3042[r\p{InGreek}]\u3044
+\u3042r\u3044
+true \u3042r\u3044 0
+
+\u3042[\p{InGreek}r]\u3044
+\u3042r\u3044
+true \u3042r\u3044 0
+
+\u3042[r\p{InGreek}]\u3044
+\u3042r\u3044
+true \u3042r\u3044 0
+
+\u3042[^\p{InGreek}]\u3044
+\u3042\u0370\u3044
+false 0
+
+\u3042[^\P{InGreek}]\u3044
+\u3042\u0370\u3044
+true \u3042\u0370\u3044 0
+
+\u3042[\p{InGreek}&&[^\u0370]]\u3044
+\u3042\u0370\u3044
+false 0
+
+// Test the dot metacharacter
+\u3042.\u3044.+
+\u3042#\u3044%&
+true \u3042#\u3044%& 0
+
+\u3042\u3043.
+\u3042\u3043\n
+false 0
+
+(?s)\u3042\u3043.
+\u3042\u3043\n
+true \u3042\u3043\n 0
+
+\u3042[\p{L}&&[\P{InGreek}]]\u3044
+\u3042\u6000\u3044
+true \u3042\u6000\u3044 0
+
+\u3042[\p{L}&&[\P{InGreek}]]\u3044
+\u3042r\u3044
+true \u3042r\u3044 0
+
+\u3042[\p{L}&&[\P{InGreek}]]\u3044
+\u3042\u0370\u3044
+false 0
+
+\u3042\p{InGreek}\u3044
+\u3042\u0370\u3044
+true \u3042\u0370\u3044 0
+
+\u3042\p{Sc}
+\u3042$
+true \u3042$ 0
+
+\W\w\W
+rrrr#\u3048\u3048\u3048
+false 0
+
+\u3042\u3043\u3044[\s\u3045\u3046\u3047]*
+\u3042\u3043\u3044  \u3045\u3046\u3047
+true \u3042\u3043\u3044  \u3045\u3046\u3047 0
+
+\u3042\u3043\u3044[\s\u305A-\u305B]*
+\u3042\u3043\u3044 \u305A \u305B
+true \u3042\u3043\u3044 \u305A \u305B 0
+
+\u3042\u3043\u3044[\u3042-\u3045\s\u304e-\u3051]*
+\u3042\u3043\u3044\u3042\u3042 \u304e\u304f  \u3051
+true \u3042\u3043\u3044\u3042\u3042 \u304e\u304f  \u3051 0
+
+// Test the whitespace escape sequence
+\u3042\u3043\s\u3044
+\u3042\u3043 \u3044
+true \u3042\u3043 \u3044 0
+
+\s\s\s
+\u3043l\u3042\u3049  \u3046rr
+false 0
+
+\S\S\s
+\u3043l\u3042\u3049  \u3046rr
+true \u3042\u3049  0
+
+// Test the digit escape sequence
+\u3042\u3043\d\u3044
+\u3042\u30439\u3044
+true \u3042\u30439\u3044 0
+
+\d\d\d
+\u3043l\u3042\u304945
+false 0
+
+// Test the caret metacharacter
+^\u3042\u3043\u3044
+\u3042\u3043\u3044\u3045\u3046\u3047
+true \u3042\u3043\u3044 0
+
+^\u3042\u3043\u3044
+\u3043\u3044\u3045\u3042\u3043\u3044
+false 0
+
+// Greedy ? metacharacter
+\u3042?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+\u3042?\u3043
+\u3043
+true \u3043 0
+
+\u3042?\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+// Reluctant ? metacharacter
+\u3042??\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+\u3042??\u3043
+\u3043
+true \u3043 0
+
+\u3042??\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.??\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+// Possessive ? metacharacter
+\u3042?+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+\u3042?+\u3043
+\u3043
+true \u3043 0
+
+\u3042?+\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.?+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+// Greedy + metacharacter
+\u3042+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042+\u3043
+\u3043
+false 0
+
+\u3042+\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+// Reluctant + metacharacter
+\u3042+?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042+?\u3043
+\u3043
+false 0
+
+\u3042+?\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.+?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+// Possessive + metacharacter
+\u3042++\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042++\u3043
+\u3043
+false 0
+
+\u3042++\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.++\u3043
+\u3042\u3042\u3042\u3042\u3043
+false 0
+
+// Greedy Repetition
+\u3042{2,3}
+\u3042
+false 0
+
+\u3042{2,3}
+\u3042\u3042
+true \u3042\u3042 0
+
+\u3042{2,3}
+\u3042\u3042\u3042
+true \u3042\u3042\u3042 0
+
+\u3042{2,3}
+\u3042\u3042\u3042\u3042
+true \u3042\u3042\u3042 0
+
+\u3042{3,}
+\u305B\u305B\u305B\u3042\u3042\u3042\u3042\u305B\u305B\u305B
+true \u3042\u3042\u3042\u3042 0
+
+\u3042{3,}
+\u305B\u305B\u305B\u3042\u3042\u305B\u305B\u305B
+false 0
+
+// Reluctant Repetition
+\u3042{2,3}?
+\u3042
+false 0
+
+\u3042{2,3}?
+\u3042\u3042
+true \u3042\u3042 0
+
+\u3042{2,3}?
+\u3042\u3042\u3042
+true \u3042\u3042 0
+
+\u3042{2,3}?
+\u3042\u3042\u3042\u3042
+true \u3042\u3042 0
+
+// Zero width Positive lookahead
+\u3042\u3043\u3044(?=\u3045)
+\u305B\u305B\u305B\u3042\u3043\u3044\u3045
+true \u3042\u3043\u3044 0
+
+\u3042\u3043\u3044(?=\u3045)
+\u305B\u305B\u305B\u3042\u3043\u3044\u3046\u3045
+false 0
+
+// Zero width Negative lookahead
+\u3042\u3043\u3044(?!\u3045)
+\u305B\u305B\u3042\u3043\u3044\u3045
+false 0
+
+\u3042\u3043\u3044(?!\u3045)
+\u305B\u305B\u3042\u3043\u3044\u3046\u3045
+true \u3042\u3043\u3044 0
+
+// Zero width Positive lookbehind
+\u3042(?<=\u3042)
+###\u3042\u3043\u3044
+true \u3042 0
+
+\u3042(?<=\u3042)
+###\u3043\u3044###
+false 0
+
+// Zero width Negative lookbehind
+(?<!\u3042)\w
+###\u3042\u3043\u3044a###
+true a 0
+
+(?<!\u3042)\u3044
+\u3043\u3044
+true \u3044 0
+
+(?<!\u3042)\u3044
+\u3042\u3044
+false 0
+
+// Nondeterministic group
+(\u3042+\u3043)+
+\u3042\u3043\u3042\u3043\u3042\u3043
+true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043
+
+(\u3042|\u3043)+
+\u3044\u3044\u3044\u3044\u3045
+false 1
+
+// Deterministic group
+(\u3042\u3043)+
+\u3042\u3043\u3042\u3043\u3042\u3043
+true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043
+
+(\u3042\u3043)+
+\u3042\u3044\u3044\u3044\u3044\u3045
+false 1
+
+(\u3042\u3043)*
+\u3042\u3043\u3042\u3043\u3042\u3043
+true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043
+
+(\u3042\u3043)(\u3044\u3045*)
+\u305B\u305B\u305B\u3042\u3043\u3044\u305B\u305B\u305B
+true \u3042\u3043\u3044 2 \u3042\u3043 \u3044
+
+\u3042\u3043\u3044(\u3045)*\u3042\u3043\u3044
+\u3042\u3043\u3044\u3045\u3045\u3045\u3045\u3045\u3042\u3043\u3044
+true \u3042\u3043\u3044\u3045\u3045\u3045\u3045\u3045\u3042\u3043\u3044 1 \u3045
+
+// Back references
+(\u3042*)\u3043\u3044\1
+\u305B\u305B\u305B\u3042\u3042\u3043\u3044\u3042\u3042\u305B\u305B\u305B
+true \u3042\u3042\u3043\u3044\u3042\u3042 1 \u3042\u3042
+
+(\u3042*)\u3043\u3044\1
+\u305B\u305B\u305B\u3042\u3042\u3043\u3044\u3042\u305B\u305B\u305B
+true \u3042\u3043\u3044\u3042 1 \u3042
+
+(\u3048t*)(\u3045\u3045\u3046)*(\u305A\u3056)\1\3(\u3057\u3057)
+\u305B\u305B\u305B\u3048tt\u3045\u3045\u3046\u3045\u3045\u3046\u305A\u3056\u3048tt\u305A\u3056\u3057\u3057\u305B\u305B\u305B
+true \u3048tt\u3045\u3045\u3046\u3045\u3045\u3046\u305A\u3056\u3048tt\u305A\u3056\u3057\u3057 4 \u3048tt \u3045\u3045\u3046 \u305A\u3056 \u3057\u3057
+
+// Greedy * metacharacter
+\u3042*\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042*\u3043
+\u3043
+true \u3043 0
+
+\u3042*\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.*\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+// Reluctant * metacharacter
+\u3042*?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042*?\u3043
+\u3043
+true \u3043 0
+
+\u3042*?\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.*?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+// Possessive * metacharacter
+\u3042*+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042*+\u3043
+\u3043
+true \u3043 0
+
+\u3042*+\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.*+\u3043
+\u3042\u3042\u3042\u3042\u3043
+false 0
+
+// Case insensitivity
+(?iu)\uFF46\uFF4F\uFF4F\uFF42\uFF41\uFF52
+\uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52
+true \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 0
+
+\uFF46(?iu)\uFF4F\uFF4F\uFF42\uFF41\uFF52
+\uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52
+true \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 0
+
+\uFF46\uFF4F\uFF4F(?iu)\uFF42\uFF41\uFF52
+\uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52
+false 0
+
+(?iu)\uFF46\uFF4F\uFF4F[\uFF42\uFF41\uFF52]+
+\uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52
+true \uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52 0
+
+(?iu)\uFF46\uFF4F\uFF4F[\uFF41-\uFF52]+
+\uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52
+true \uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52 0
+
+// Disable metacharacters- test both length <=3 and >3
+// So that the BM optimization is part of test
+\Q***\E\u3042\u3043\u3044
+***\u3042\u3043\u3044
+true ***\u3042\u3043\u3044 0
+
+\u3043l\Q***\E\u3042\u3043\u3044
+\u3043l***\u3042\u3043\u3044
+true \u3043l***\u3042\u3043\u3044 0
+
+\Q***\u3042\u3043\u3044
+***\u3042\u3043\u3044
+true ***\u3042\u3043\u3044 0
+
+\u3043l\u3042\u3049\Q***\E\u3042\u3043\u3044
+\u3043l\u3042\u3049***\u3042\u3043\u3044
+true \u3043l\u3042\u3049***\u3042\u3043\u3044 0
+
+\Q***\u3042\u3043\u3044
+***\u3042\u3043\u3044
+true ***\u3042\u3043\u3044 0
+
+\Q*\u3042\u3043
+*\u3042\u3043
+true *\u3042\u3043 0
+
+\u3043l\u3042\u3049\Q***\u3042\u3043\u3044
+\u3043l\u3042\u3049***\u3042\u3043\u3044
+true \u3043l\u3042\u3049***\u3042\u3043\u3044 0
+
+\u3043l\u3042\Q***\u3042\u3043\u3044
+\u3043l\u3042***\u3042\u3043\u3044
+true \u3043l\u3042***\u3042\u3043\u3044 0
+
+[\043]+
+\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
+true # 0
+
+[\042-\044]+
+\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
+true # 0
+
+[\u1234-\u1236]
+\u3043l\u3042\u3049\u3043l\u3042\u3049\u1235\u3043le\u3044\u3049
+true \u1235 0
+
+[^\043]*
+\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
+true \u3043l\u3042\u3049\u3043l\u3042\u3049 0
--- a/test/java/util/regex/RegExTest.java
+++ b/test/java/util/regex/RegExTest.java
--- a/test/java/util/regex/SupplementaryTestCases.txt
+++ b/test/java/util/regex/SupplementaryTestCases.txt
--- a/test/java/util/regex/TestCases.txt
+++ b/test/java/util/regex/TestCases.txt