提交 612f2586 编写于 作者: S sherman

6878475: Better syntax for the named capture group in regex

Summary: Updated the syntax of the newly added named capture group
Reviewed-by: martin, alanb
上级 9e1d9e6d
...@@ -688,7 +688,7 @@ public final class Matcher implements MatchResult { ...@@ -688,7 +688,7 @@ public final class Matcher implements MatchResult {
* *
* <p> The replacement string may contain references to subsequences * <p> The replacement string may contain references to subsequences
* captured during the previous match: Each occurrence of * captured during the previous match: Each occurrence of
* <tt>$</tt>&lt;<i>name</i>&gt; or <tt>$</tt><i>g</i> * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>
* will be replaced by the result of evaluating the corresponding * will be replaced by the result of evaluating the corresponding
* {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>} * {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
* respectively. For <tt>$</tt><i>g</i><tt></tt>, * respectively. For <tt>$</tt><i>g</i><tt></tt>,
...@@ -770,7 +770,7 @@ public final class Matcher implements MatchResult { ...@@ -770,7 +770,7 @@ public final class Matcher implements MatchResult {
// more appropriate. // more appropriate.
nextChar = replacement.charAt(cursor); nextChar = replacement.charAt(cursor);
int refNum = -1; int refNum = -1;
if (nextChar == '<') { if (nextChar == '{') {
cursor++; cursor++;
StringBuilder gsb = new StringBuilder(); StringBuilder gsb = new StringBuilder();
while (cursor < replacement.length()) { while (cursor < replacement.length()) {
...@@ -787,13 +787,17 @@ public final class Matcher implements MatchResult { ...@@ -787,13 +787,17 @@ public final class Matcher implements MatchResult {
if (gsb.length() == 0) if (gsb.length() == 0)
throw new IllegalArgumentException( throw new IllegalArgumentException(
"named capturing group has 0 length name"); "named capturing group has 0 length name");
if (nextChar != '>') if (nextChar != '}')
throw new IllegalArgumentException( throw new IllegalArgumentException(
"named capturing group is missing trailing '>'"); "named capturing group is missing trailing '}'");
String gname = gsb.toString(); String gname = gsb.toString();
if (ASCII.isDigit(gname.charAt(0)))
throw new IllegalArgumentException(
"capturing group name {" + gname +
"} starts with digit character");
if (!parentPattern.namedGroups().containsKey(gname)) if (!parentPattern.namedGroups().containsKey(gname))
throw new IllegalArgumentException( throw new IllegalArgumentException(
"No group with name <" + gname + ">"); "No group with name {" + gname + "}");
refNum = parentPattern.namedGroups().get(gname); refNum = parentPattern.namedGroups().get(gname);
cursor++; cursor++;
} else { } else {
......
...@@ -484,7 +484,7 @@ import java.util.Arrays; ...@@ -484,7 +484,7 @@ import java.util.Arrays;
* <h5> Group name </h5> * <h5> Group name </h5>
* <p>A capturing group can also be assigned a "name", a <tt>named-capturing group</tt>, * <p>A capturing group can also be assigned a "name", a <tt>named-capturing group</tt>,
* and then be back-referenced later by the "name". Group names are composed of * and then be back-referenced later by the "name". Group names are composed of
* the following characters: * the following characters. The first character must be a <tt>letter</tt>.
* *
* <ul> * <ul>
* <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt> * <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt>
...@@ -2567,7 +2567,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) { ...@@ -2567,7 +2567,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
break; break;
case '<': // (?<xxx) look behind case '<': // (?<xxx) look behind
ch = read(); ch = read();
if (ASCII.isLower(ch) || ASCII.isUpper(ch) || ASCII.isDigit(ch)) { if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
// named captured group // named captured group
String name = groupname(ch); String name = groupname(ch);
if (namedGroups().containsKey(name)) if (namedGroups().containsKey(name))
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 * 6350801 6676425 6878475
*/ */
import java.util.regex.*; import java.util.regex.*;
...@@ -3389,9 +3389,9 @@ public class RegExTest { ...@@ -3389,9 +3389,9 @@ public class RegExTest {
"gname", "gname",
"yyy"); "yyy");
check(Pattern.compile("x+(?<8gname>y+)z+"), check(Pattern.compile("x+(?<gname8>y+)z+"),
"xxxyyyzzz", "xxxyyyzzz",
"8gname", "gname8",
"yyy"); "yyy");
//backref //backref
...@@ -3430,81 +3430,82 @@ public class RegExTest { ...@@ -3430,81 +3430,82 @@ public class RegExTest {
//replaceFirst/All //replaceFirst/All
checkReplaceFirst("(?<gn>ab)(c*)", checkReplaceFirst("(?<gn>ab)(c*)",
"abccczzzabcczzzabccc", "abccczzzabcczzzabccc",
"$<gn>", "${gn}",
"abzzzabcczzzabccc"); "abzzzabcczzzabccc");
checkReplaceAll("(?<gn>ab)(c*)", checkReplaceAll("(?<gn>ab)(c*)",
"abccczzzabcczzzabccc", "abccczzzabcczzzabccc",
"$<gn>", "${gn}",
"abzzzabzzzab"); "abzzzabzzzab");
checkReplaceFirst("(?<gn>ab)(c*)", checkReplaceFirst("(?<gn>ab)(c*)",
"zzzabccczzzabcczzzabccczzz", "zzzabccczzzabcczzzabccczzz",
"$<gn>", "${gn}",
"zzzabzzzabcczzzabccczzz"); "zzzabzzzabcczzzabccczzz");
checkReplaceAll("(?<gn>ab)(c*)", checkReplaceAll("(?<gn>ab)(c*)",
"zzzabccczzzabcczzzabccczzz", "zzzabccczzzabcczzzabccczzz",
"$<gn>", "${gn}",
"zzzabzzzabzzzabzzz"); "zzzabzzzabzzzabzzz");
checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
"zzzabccczzzabcczzzabccczzz", "zzzabccczzzabcczzzabccczzz",
"$<gn2>", "${gn2}",
"zzzccczzzabcczzzabccczzz"); "zzzccczzzabcczzzabccczzz");
checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
"zzzabccczzzabcczzzabccczzz", "zzzabccczzzabcczzzabccczzz",
"$<gn2>", "${gn2}",
"zzzccczzzcczzzccczzz"); "zzzccczzzcczzzccczzz");
//toSupplementaries("(ab)(c*)")); //toSupplementaries("(ab)(c*)"));
checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)", ")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"), toSupplementaries("abccczzzabcczzzabccc"),
"$<gn1>", "${gn1}",
toSupplementaries("abzzzabcczzzabccc")); toSupplementaries("abzzzabcczzzabccc"));
checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)", ")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"), toSupplementaries("abccczzzabcczzzabccc"),
"$<gn1>", "${gn1}",
toSupplementaries("abzzzabzzzab")); toSupplementaries("abzzzabzzzab"));
checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)", ")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"), toSupplementaries("abccczzzabcczzzabccc"),
"$<gn2>", "${gn2}",
toSupplementaries("ccczzzabcczzzabccc")); toSupplementaries("ccczzzabcczzzabccc"));
checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)", ")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"), toSupplementaries("abccczzzabcczzzabccc"),
"$<gn2>", "${gn2}",
toSupplementaries("ccczzzcczzzccc")); toSupplementaries("ccczzzcczzzccc"));
checkReplaceFirst("(?<dog>Dog)AndCat", checkReplaceFirst("(?<dog>Dog)AndCat",
"zzzDogAndCatzzzDogAndCatzzz", "zzzDogAndCatzzzDogAndCatzzz",
"$<dog>", "${dog}",
"zzzDogzzzDogAndCatzzz"); "zzzDogzzzDogAndCatzzz");
checkReplaceAll("(?<dog>Dog)AndCat", checkReplaceAll("(?<dog>Dog)AndCat",
"zzzDogAndCatzzzDogAndCatzzz", "zzzDogAndCatzzzDogAndCatzzz",
"$<dog>", "${dog}",
"zzzDogzzzDogzzz"); "zzzDogzzzDogzzz");
// backref in Matcher & String // backref in Matcher & String
if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "$<gn>").equals("abefij") || if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
!"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "$<gn>").equals("abcdefgh")) !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
failCount++; failCount++;
// negative // negative
checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册