提交 612f2586 编写于 作者: S sherman

6878475: Better syntax for the named capture group in regex

Summary: Updated the syntax of the newly added named capture group
Reviewed-by: martin, alanb
上级 9e1d9e6d
......@@ -688,7 +688,7 @@ public final class Matcher implements MatchResult {
*
* <p> The replacement string may contain references to subsequences
* captured during the previous match: Each occurrence of
* <tt>$</tt>&lt;<i>name</i>&gt; or <tt>$</tt><i>g</i>
* <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>
* will be replaced by the result of evaluating the corresponding
* {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
* respectively. For <tt>$</tt><i>g</i><tt></tt>,
......@@ -770,7 +770,7 @@ public final class Matcher implements MatchResult {
// more appropriate.
nextChar = replacement.charAt(cursor);
int refNum = -1;
if (nextChar == '<') {
if (nextChar == '{') {
cursor++;
StringBuilder gsb = new StringBuilder();
while (cursor < replacement.length()) {
......@@ -787,13 +787,17 @@ public final class Matcher implements MatchResult {
if (gsb.length() == 0)
throw new IllegalArgumentException(
"named capturing group has 0 length name");
if (nextChar != '>')
if (nextChar != '}')
throw new IllegalArgumentException(
"named capturing group is missing trailing '>'");
"named capturing group is missing trailing '}'");
String gname = gsb.toString();
if (ASCII.isDigit(gname.charAt(0)))
throw new IllegalArgumentException(
"capturing group name {" + gname +
"} starts with digit character");
if (!parentPattern.namedGroups().containsKey(gname))
throw new IllegalArgumentException(
"No group with name <" + gname + ">");
"No group with name {" + gname + "}");
refNum = parentPattern.namedGroups().get(gname);
cursor++;
} else {
......
......@@ -484,7 +484,7 @@ import java.util.Arrays;
* <h5> Group name </h5>
* <p>A capturing group can also be assigned a "name", a <tt>named-capturing group</tt>,
* and then be back-referenced later by the "name". Group names are composed of
* the following characters:
* the following characters. The first character must be a <tt>letter</tt>.
*
* <ul>
* <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt>
......@@ -2567,7 +2567,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
break;
case '<': // (?<xxx) look behind
ch = read();
if (ASCII.isLower(ch) || ASCII.isUpper(ch) || ASCII.isDigit(ch)) {
if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
// named captured group
String name = groupname(ch);
if (namedGroups().containsKey(name))
......
......@@ -32,7 +32,7 @@
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425
* 6350801 6676425 6878475
*/
import java.util.regex.*;
......@@ -3389,9 +3389,9 @@ public class RegExTest {
"gname",
"yyy");
check(Pattern.compile("x+(?<8gname>y+)z+"),
check(Pattern.compile("x+(?<gname8>y+)z+"),
"xxxyyyzzz",
"8gname",
"gname8",
"yyy");
//backref
......@@ -3430,81 +3430,82 @@ public class RegExTest {
//replaceFirst/All
checkReplaceFirst("(?<gn>ab)(c*)",
"abccczzzabcczzzabccc",
"$<gn>",
"${gn}",
"abzzzabcczzzabccc");
checkReplaceAll("(?<gn>ab)(c*)",
"abccczzzabcczzzabccc",
"$<gn>",
"${gn}",
"abzzzabzzzab");
checkReplaceFirst("(?<gn>ab)(c*)",
"zzzabccczzzabcczzzabccczzz",
"$<gn>",
"${gn}",
"zzzabzzzabcczzzabccczzz");
checkReplaceAll("(?<gn>ab)(c*)",
"zzzabccczzzabcczzzabccczzz",
"$<gn>",
"${gn}",
"zzzabzzzabzzzabzzz");
checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
"zzzabccczzzabcczzzabccczzz",
"$<gn2>",
"${gn2}",
"zzzccczzzabcczzzabccczzz");
checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
"zzzabccczzzabcczzzabccczzz",
"$<gn2>",
"${gn2}",
"zzzccczzzcczzzccczzz");
//toSupplementaries("(ab)(c*)"));
checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"),
"$<gn1>",
"${gn1}",
toSupplementaries("abzzzabcczzzabccc"));
checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"),
"$<gn1>",
"${gn1}",
toSupplementaries("abzzzabzzzab"));
checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"),
"$<gn2>",
"${gn2}",
toSupplementaries("ccczzzabcczzzabccc"));
checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"),
"$<gn2>",
"${gn2}",
toSupplementaries("ccczzzcczzzccc"));
checkReplaceFirst("(?<dog>Dog)AndCat",
"zzzDogAndCatzzzDogAndCatzzz",
"$<dog>",
"${dog}",
"zzzDogzzzDogAndCatzzz");
checkReplaceAll("(?<dog>Dog)AndCat",
"zzzDogAndCatzzzDogAndCatzzz",
"$<dog>",
"${dog}",
"zzzDogzzzDogzzz");
// backref in Matcher & String
if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "$<gn>").equals("abefij") ||
!"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "$<gn>").equals("abcdefgh"))
if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
!"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
failCount++;
// negative
checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册