6878475: Better syntax for the named capture group in regex

Summary: Updated the syntax of the newly added named capture group Reviewed-by: martin, alanb

6878475: Better syntax for the named capture group in regex
Summary: Updated the syntax of the newly added named capture group Reviewed-by: martin, alanb
612f2586 · sherman · 9e1d9e6d · 612f2586 · 612f2586 · 612f2586
3 changed file
--- a/src/share/classes/java/util/regex/Matcher.java
+++ b/src/share/classes/java/util/regex/Matcher.java
@@ -688,7 +688,7 @@ public final class Matcher implements MatchResult {
     *
     * <p> The replacement string may contain references to subsequences
     * captured during the previous match: Each occurrence of
-     * <tt>$</tt>&lt;<i>name</i>&gt; or <tt>$</tt><i>g</i>
+     * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>
     * will be replaced by the result of evaluating the corresponding
     * {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
     * respectively. For  <tt>$</tt><i>g</i><tt></tt>,
@@ -770,7 +770,7 @@ public final class Matcher implements MatchResult {
                // more appropriate.
                nextChar = replacement.charAt(cursor);
                int refNum = -1;
-                if (nextChar == '<') {
+                if (nextChar == '{') {
                    cursor++;
                    StringBuilder gsb = new StringBuilder();
                    while (cursor < replacement.length()) {
@@ -787,13 +787,17 @@ public final class Matcher implements MatchResult {
                    if (gsb.length() == 0)
                        throw new IllegalArgumentException(
                            "named capturing group has 0 length name");
-                    if (nextChar != '>')
+                    if (nextChar != '}')
                        throw new IllegalArgumentException(
-                            "named capturing group is missing trailing '>'");
+                            "named capturing group is missing trailing '}'");
                    String gname = gsb.toString();
+                    if (ASCII.isDigit(gname.charAt(0)))
+                        throw new IllegalArgumentException(
+                            "capturing group name {" + gname +
+                            "} starts with digit character");
                    if (!parentPattern.namedGroups().containsKey(gname))
                        throw new IllegalArgumentException(
-                            "No group with name <" + gname + ">");
+                            "No group with name {" + gname + "}");
                    refNum = parentPattern.namedGroups().get(gname);
                    cursor++;
                } else {

--- a/src/share/classes/java/util/regex/Pattern.java
+++ b/src/share/classes/java/util/regex/Pattern.java
@@ -484,7 +484,7 @@ import java.util.Arrays;
 * <h5> Group name </h5>
 * <p>A capturing group can also be assigned a "name", a <tt>named-capturing group</tt>,
 * and then be back-referenced later by the "name". Group names are composed of
- * the following characters:
+ * the following characters. The first character must be a <tt>letter</tt>.
 *
 * <ul>
 *   <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt>
@@ -2567,7 +2567,7 @@ loop:   for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
                break;
            case '<':   // (?<xxx)  look behind
                ch = read();
-                if (ASCII.isLower(ch) || ASCII.isUpper(ch) || ASCII.isDigit(ch)) {
+                if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
                    // named captured group
                    String name = groupname(ch);
                    if (namedGroups().containsKey(name))

--- a/test/java/util/regex/RegExTest.java
+++ b/test/java/util/regex/RegExTest.java
@@ -32,7 +32,7 @@
 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
- * 6350801 6676425
+ * 6350801 6676425 6878475
 */

 import java.util.regex.*;
@@ -3389,9 +3389,9 @@ public class RegExTest {
              "gname",
              "yyy");

-        check(Pattern.compile("x+(?<8gname>y+)z+"),
+        check(Pattern.compile("x+(?<gname8>y+)z+"),
              "xxxyyyzzz",
-              "8gname",
+              "gname8",
              "yyy");

        //backref
@@ -3430,81 +3430,82 @@ public class RegExTest {
        //replaceFirst/All
        checkReplaceFirst("(?<gn>ab)(c*)",
                          "abccczzzabcczzzabccc",
-                          "$<gn>",
+                          "${gn}",
                          "abzzzabcczzzabccc");

        checkReplaceAll("(?<gn>ab)(c*)",
                        "abccczzzabcczzzabccc",
-                        "$<gn>",
+                        "${gn}",
                        "abzzzabzzzab");


        checkReplaceFirst("(?<gn>ab)(c*)",
                          "zzzabccczzzabcczzzabccczzz",
-                          "$<gn>",
+                          "${gn}",
                          "zzzabzzzabcczzzabccczzz");

        checkReplaceAll("(?<gn>ab)(c*)",
                        "zzzabccczzzabcczzzabccczzz",
-                        "$<gn>",
+                        "${gn}",
                        "zzzabzzzabzzzabzzz");

        checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
                          "zzzabccczzzabcczzzabccczzz",
-                          "$<gn2>",
+                          "${gn2}",
                          "zzzccczzzabcczzzabccczzz");

        checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
                        "zzzabccczzzabcczzzabccczzz",
-                        "$<gn2>",
+                        "${gn2}",
                        "zzzccczzzcczzzccczzz");

        //toSupplementaries("(ab)(c*)"));
        checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
                           ")(?<gn2>" + toSupplementaries("c") + "*)",
                          toSupplementaries("abccczzzabcczzzabccc"),
-                          "$<gn1>",
+                          "${gn1}",
                          toSupplementaries("abzzzabcczzzabccc"));


        checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
                        ")(?<gn2>" + toSupplementaries("c") + "*)",
                        toSupplementaries("abccczzzabcczzzabccc"),
-                        "$<gn1>",
+                        "${gn1}",
                        toSupplementaries("abzzzabzzzab"));

        checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
                           ")(?<gn2>" + toSupplementaries("c") + "*)",
                          toSupplementaries("abccczzzabcczzzabccc"),
-                          "$<gn2>",
+                          "${gn2}",
                          toSupplementaries("ccczzzabcczzzabccc"));


        checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
                        ")(?<gn2>" + toSupplementaries("c") + "*)",
                        toSupplementaries("abccczzzabcczzzabccc"),
-                        "$<gn2>",
+                        "${gn2}",
                        toSupplementaries("ccczzzcczzzccc"));

        checkReplaceFirst("(?<dog>Dog)AndCat",
                          "zzzDogAndCatzzzDogAndCatzzz",
-                          "$<dog>",
+                          "${dog}",
                          "zzzDogzzzDogAndCatzzz");


        checkReplaceAll("(?<dog>Dog)AndCat",
                          "zzzDogAndCatzzzDogAndCatzzz",
-                          "$<dog>",
+                          "${dog}",
                          "zzzDogzzzDogzzz");

        // backref in Matcher & String
-        if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "$<gn>").equals("abefij") ||
-            !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "$<gn>").equals("abcdefgh"))
+        if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
+            !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
            failCount++;

        // negative
        checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
        checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
+        checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
        checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
        checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
        checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),