未验证 提交 dc173458 编写于 作者: S Stephen Toub 提交者: GitHub

Specialize Regex codegen for ? (#1898)

We're currently generating the code for a one / not one / set ? quantifier (zero-or-one) as a loop, when it can actually just be a conditional check.  This adds a special path for that case.

(I've also added another reduction test for alternation, and tweaked the codegen for a successful match to touch fields less.)
上级 c26a6534
......@@ -1591,7 +1591,9 @@ private bool TryGenerateNonBacktrackingGo(RegexNode node)
EmitNode(node);
// Success:
// this.runtextpos = runtextpos + textSpanPos;
// runtextpos += textSpanPos;
// this.runtextpos = runtextpos;
// Capture(0, originalruntextpos, runtextpos);
MarkLabel(stopSuccessLabel);
Ldthis();
Ldloc(runtextposLocal);
......@@ -1599,14 +1601,14 @@ private bool TryGenerateNonBacktrackingGo(RegexNode node)
{
Ldc(textSpanPos);
Add();
Stloc(runtextposLocal);
Ldloc(runtextposLocal);
}
Stfld(s_runtextposField);
// Capture(0, originalruntextposLocal, this.runtextpos);
Ldthis();
Ldc(0);
Ldloc(originalruntextposLocal);
Ldthisfld(s_runtextposField);
Ldloc(runtextposLocal);
Callvirt(s_captureMethod);
// If the graph contained captures, undo any remaining to handle failed matches.
......@@ -2658,13 +2660,20 @@ void EmitAtomicSingleCharLoop(RegexNode node)
node.Type == RegexNode.Setloopatomic);
Debug.Assert(node.M < int.MaxValue);
// First generate the code to handle the required number of iterations.
// If this is actually a repeater, emit that instead.
if (node.M == node.N)
{
EmitSingleCharRepeater(node);
return;
}
// If this is actually an optional single char, emit that instead.
if (node.M == 0 && node.N == 1)
{
EmitAtomicSingleCharZeroOrOne(node);
return;
}
Debug.Assert(node.N > node.M);
int minIterations = node.M;
int maxIterations = node.N;
......@@ -2812,6 +2821,63 @@ void EmitAtomicSingleCharLoop(RegexNode node)
ReturnInt32Local(iterationLocal);
}
// Emits the code to handle a non-backtracking optional zero-or-one loop.
void EmitAtomicSingleCharZeroOrOne(RegexNode node)
{
Debug.Assert(
node.Type == RegexNode.Oneloopatomic ||
node.Type == RegexNode.Notoneloopatomic ||
node.Type == RegexNode.Setloopatomic);
Debug.Assert(node.M == 0 && node.N == 1);
Label skipUpdatesLabel = DefineLabel();
// if ((uint)textSpanPos >= (uint)textSpan.Length) goto skipUpdatesLabel;
Ldc(textSpanPos);
Ldloca(textSpanLocal);
Call(s_spanGetLengthMethod);
BgeUnFar(skipUpdatesLabel);
// if (textSpan[i] != ch) goto skipUpdatesLabel;
Ldloca(textSpanLocal);
Ldc(textSpanPos);
Call(s_spanGetItemMethod);
LdindU2();
switch (node.Type)
{
case RegexNode.Oneloopatomic:
if (IsCaseInsensitive(node)) CallToLower();
Ldc(node.Ch);
BneFar(skipUpdatesLabel);
break;
case RegexNode.Notoneloopatomic:
if (IsCaseInsensitive(node)) CallToLower();
Ldc(node.Ch);
BeqFar(skipUpdatesLabel);
break;
case RegexNode.Setloopatomic:
LocalBuilder setScratchLocal = RentInt32Local();
EmitMatchCharacterClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
ReturnInt32Local(setScratchLocal);
BrfalseFar(skipUpdatesLabel);
break;
}
// textSpan = textSpan.Slice(1);
Ldloca(textSpanLocal);
Ldc(1);
Call(s_spanSliceIntMethod);
Stloc(textSpanLocal);
// runtextpos++;
Ldloc(runtextposLocal);
Ldc(1);
Add();
Stloc(runtextposLocal);
MarkLabel(skipUpdatesLabel);
}
// Emits the code to handle a non-backtracking, variable-length loop around another node.
void EmitAtomicNodeLoop(RegexNode node)
{
......@@ -2819,6 +2885,7 @@ void EmitAtomicNodeLoop(RegexNode node)
Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic));
Debug.Assert(node.M < int.MaxValue);
// If this is actually a repeater, emit that instead.
if (node.M == node.N)
{
EmitNodeRepeater(node);
......
......@@ -674,6 +674,8 @@ public static IEnumerable<object[]> Groups_Basic_TestData()
yield return new object[] { null, @"(?:abcd|efghj{2,}|j[klm]o+)i", "efghjjjjji", RegexOptions.None, new string[] { "efghjjjjji" } };
yield return new object[] { null, @"(?:abcd|efghi{2,}|j[klm]o+)i", "efghiii", RegexOptions.None, new string[] { "efghiii" } };
yield return new object[] { null, @"(?:abcd|efghi{2,}|j[klm]o+)i", "efghiiiiiiii", RegexOptions.None, new string[] { "efghiiiiiiii" } };
yield return new object[] { null, @"a?ba?ba?ba?b", "abbabab", RegexOptions.None, new string[] { "abbabab" } };
yield return new object[] { null, @"a?ba?ba?ba?b", "abBAbab", RegexOptions.IgnoreCase, new string[] { "abBAbab" } };
// Implicitly upgrading (or not) notoneloop to be atomic
yield return new object[] { null, @"[^b]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"[^b]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
......@@ -684,6 +686,8 @@ public static IEnumerable<object[]> Groups_Basic_TestData()
yield return new object[] { null, @"(?:abc[^b]*|efgh)i", "efghi", RegexOptions.None, new string[] { "efghi" } }; // can't upgrade
yield return new object[] { null, @"(?:abcd|efg[^b]*)b", "efgb", RegexOptions.None, new string[] { "efgb" } };
yield return new object[] { null, @"(?:abcd|efg[^b]*)i", "efgi", RegexOptions.None, new string[] { "efgi" } }; // can't upgrade
yield return new object[] { null, @"[^a]?a[^a]?a[^a]?a[^a]?a", "baababa", RegexOptions.None, new string[] { "baababa" } };
yield return new object[] { null, @"[^a]?a[^a]?a[^a]?a[^a]?a", "BAababa", RegexOptions.IgnoreCase, new string[] { "BAababa" } };
// Implicitly upgrading (or not) setloop to be atomic
yield return new object[] { null, @"[ac]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
yield return new object[] { null, @"[ac]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
......@@ -710,6 +714,8 @@ public static IEnumerable<object[]> Groups_Basic_TestData()
yield return new object[] { null, @"(?:abcd|efg[hij]*)h", "efgh", RegexOptions.None, new string[] { "efgh" } }; // can't upgrade
yield return new object[] { null, @"(?:abcd|efg[hij]*)ih", "efgjih", RegexOptions.None, new string[] { "efgjih" } }; // can't upgrade
yield return new object[] { null, @"(?:abcd|efg[hij]*)k", "efgjk", RegexOptions.None, new string[] { "efgjk" } };
yield return new object[] { null, @"[ace]?b[ace]?b[ace]?b[ace]?b", "cbbabeb", RegexOptions.None, new string[] { "cbbabeb" } };
yield return new object[] { null, @"[ace]?b[ace]?b[ace]?b[ace]?b", "cBbAbEb", RegexOptions.IgnoreCase, new string[] { "cBbAbEb" } };
// Implicitly upgrading (or not) concat loops to be atomic
yield return new object[] { null, @"(?:[ab]c[de]f)*", "", RegexOptions.None, new string[] { "" } };
yield return new object[] { null, @"(?:[ab]c[de]f)*", "acdf", RegexOptions.None, new string[] { "acdf" } };
......
......@@ -273,12 +273,14 @@ private static int GetMinRequiredLength(Regex r)
[InlineData("a*b+", "(?>a*)b+")]
[InlineData("a*b{3,4}", "(?>a*)b{3,4}")]
[InlineData("a+b", "(?>a+)b")]
[InlineData("a?b", "(?>a?)b")]
[InlineData("[^\n]*\n", "(?>[^\n]*)\n")]
[InlineData("[^\n]*\n+", "(?>[^\n]*)\n+")]
[InlineData("(a+)b", "((?>a+))b")]
[InlineData("a*(?:bcd|efg)", "(?>a*)(?:bcd|efg)")]
[InlineData("\\w*\\b", "(?>\\w*)\\b")]
[InlineData("\\d*\\b", "(?>\\d*)\\b")]
[InlineData("(?:abc*|def*)g", "(?:ab(?>c*)|de(?>f*))g")]
[InlineData("(?:a[ce]*|b*)g", "(?:a(?>[ce]*)|(?>b*))g")]
[InlineData("(?:a[ce]*|b*)c", "(?:a[ce]*|(?>b*))c")]
public void PatternsReduceIdentically(string pattern1, string pattern2)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册