未验证 提交 5a948f14 编写于 作者: S Stephen Toub 提交者: GitHub

Tweak codegen for Regex's FindFirstChar (#1996)

* Avoid extra field loads / comparisons in FindFirstChar

If we've already compared the current pos to the end pos as part of a minimum length check, we don't need to repeat it again later.  We can also read from locals rather than fields more than we are to save redundant field reads.

* Reduce branches in Boyer-Moore negative switch table

It's often the case that lots of negative branches have the same value.  We can coalesce them.

* Address PR feedback
上级 0f51290d
......@@ -125,10 +125,18 @@ private void GenerateRegexDefaultCtor(string pattern, RegexOptions options, Type
Stfld(RegexField(nameof(Regex.factory)));
// Store the timeout (no need to validate as it should have happened in RegexCompilationInfo)
// base.internalMatchTimeout = TimeSpan.FromTick(matchTimeout.Ticks);
Ldthis();
LdcI8(matchTimeout.Ticks);
Call(typeof(TimeSpan).GetMethod(nameof(TimeSpan.FromTicks), BindingFlags.Public | BindingFlags.Static)!);
if (matchTimeout == Regex.InfiniteMatchTimeout)
{
// base.internalMatchTimeout = Regex.InfiniteMatchTimeout;
_ilg.Emit(OpCodes.Ldsfld, RegexField(nameof(Regex.InfiniteMatchTimeout)));
}
else
{
// base.internalMatchTimeout = TimeSpan.FromTick(matchTimeout.Ticks);
Ldthis();
LdcI8(matchTimeout.Ticks);
Call(typeof(TimeSpan).GetMethod(nameof(TimeSpan.FromTicks), BindingFlags.Public | BindingFlags.Static)!);
}
Stfld(RegexField(nameof(Regex.internalMatchTimeout)));
// Set capsize, caps, capnames, capslist.
......@@ -221,7 +229,7 @@ internal void GenerateCreateHashtable(FieldInfo field, Hashtable ht)
/// <summary>Gets the named instance field from the Regex type.</summary>
private static FieldInfo RegexField(string fieldname) =>
typeof(Regex).GetField(fieldname, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance)!;
typeof(Regex).GetField(fieldname, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Static)!;
/// <summary>Saves the assembly to a file in the current directory based on the assembly's name.</summary>
internal void Save()
......
......@@ -922,8 +922,14 @@ private void GenerateBacktrackSection()
/// </summary>
protected void GenerateFindFirstChar()
{
Debug.Assert(_code != null);
_runtextposLocal = DeclareInt32();
_runtextendLocal = DeclareInt32();
if (_code.RightToLeft)
{
_runtextbegLocal = DeclareInt32();
}
_runtextLocal = DeclareString();
_temp1Local = DeclareInt32();
_temp2Local = DeclareInt32();
......@@ -939,43 +945,54 @@ protected void GenerateFindFirstChar()
}
}
// Load necessary locals
// int runtextpos = this.runtextpos;
// int runtextend = this.runtextend;
Mvfldloc(s_runtextposField, _runtextposLocal);
Mvfldloc(s_runtextendField, _runtextendLocal);
if (_code.RightToLeft)
{
Mvfldloc(s_runtextbegField, _runtextbegLocal!);
}
// Generate length check. If the input isn't long enough to possibly match, fail quickly.
int minRequiredLength = _code!.Tree.MinRequiredLength;
int minRequiredLength = _code.Tree.MinRequiredLength;
Debug.Assert(minRequiredLength >= 0);
if (minRequiredLength > 0)
{
Label finishedLengthCheck = DefineLabel();
if (!_code.RightToLeft)
{
// if (this.runtextpos > this.runtextend - _code.Tree.MinRequiredLength)
// if (runtextpos > runtextend - _code.Tree.MinRequiredLength)
// {
// this.runtextpos = this.runtextend;
// this.runtextpos = runtextend;
// return false;
// }
Ldthisfld(s_runtextposField);
Ldthisfld(s_runtextendField);
Ldloc(_runtextposLocal);
Ldloc(_runtextendLocal);
Ldc(minRequiredLength);
Sub();
Ble(finishedLengthCheck);
Ldthis();
Ldthisfld(s_runtextendField);
Ldloc(_runtextendLocal);
Stfld(s_runtextposField);
Ldc(0);
Ret();
}
else
{
// if (this.runtextpos - _code.Tree.MinRequiredLength < this.runtextbeg)
// if (runtextpos - _code.Tree.MinRequiredLength < runtextbeg)
// {
// this.runtextpos = this.runtextbeg;
// runtextpos = runtextbeg;
// return false;
// }
Ldthisfld(s_runtextposField);
Ldloc(_runtextposLocal);
Ldc(minRequiredLength);
Sub();
Ldthisfld(s_runtextbegField);
Ldloc(_runtextbegLocal!);
Bge(finishedLengthCheck);
Ldthis();
Ldthisfld(s_runtextbegField);
Ldloc(_runtextbegLocal!);
Stfld(s_runtextposField);
Ldc(0);
Ret();
......@@ -986,16 +1003,16 @@ protected void GenerateFindFirstChar()
// Generate anchor checks.
if ((_anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)) != 0)
{
if (!_code!.RightToLeft)
if (!_code.RightToLeft)
{
if ((_anchors & RegexFCD.Beginning) != 0)
{
Label l1 = DefineLabel();
Ldthisfld(s_runtextposField);
Ldloc(_runtextposLocal);
Ldthisfld(s_runtextbegField);
Ble(l1);
Ldthis();
Ldthisfld(s_runtextendField);
Ldloc(_runtextendLocal);
Stfld(s_runtextposField);
Ldc(0);
Ret();
......@@ -1005,11 +1022,11 @@ protected void GenerateFindFirstChar()
if ((_anchors & RegexFCD.Start) != 0)
{
Label l1 = DefineLabel();
Ldthisfld(s_runtextposField);
Ldloc(_runtextposLocal);
Ldthisfld(s_runtextstartField);
Ble(l1);
Ldthis();
Ldthisfld(s_runtextendField);
Ldloc(_runtextendLocal);
Stfld(s_runtextposField);
Ldc(0);
Ret();
......@@ -1019,13 +1036,13 @@ protected void GenerateFindFirstChar()
if ((_anchors & RegexFCD.EndZ) != 0)
{
Label l1 = DefineLabel();
Ldthisfld(s_runtextposField);
Ldthisfld(s_runtextendField);
Ldloc(_runtextposLocal);
Ldloc(_runtextendLocal);
Ldc(1);
Sub();
Bge(l1);
Ldthis();
Ldthisfld(s_runtextendField);
Ldloc(_runtextendLocal);
Ldc(1);
Sub();
Stfld(s_runtextposField);
......@@ -1034,14 +1051,17 @@ protected void GenerateFindFirstChar()
if ((_anchors & RegexFCD.End) != 0)
{
Label l1 = DefineLabel();
Ldthisfld(s_runtextposField);
Ldthisfld(s_runtextendField);
Bge(l1);
Ldthis();
Ldthisfld(s_runtextendField);
Stfld(s_runtextposField);
MarkLabel(l1);
if (minRequiredLength == 0) // if it's > 0, we already output a more stringent check
{
Label l1 = DefineLabel();
Ldloc(_runtextposLocal);
Ldloc(_runtextendLocal);
Bge(l1);
Ldthis();
Ldloc(_runtextendLocal);
Stfld(s_runtextposField);
MarkLabel(l1);
}
}
}
else
......@@ -1049,11 +1069,11 @@ protected void GenerateFindFirstChar()
if ((_anchors & RegexFCD.End) != 0)
{
Label l1 = DefineLabel();
Ldthisfld(s_runtextposField);
Ldthisfld(s_runtextendField);
Ldloc(_runtextposLocal);
Ldloc(_runtextendLocal);
Bge(l1);
Ldthis();
Ldthisfld(s_runtextbegField);
Ldloc(_runtextbegLocal!);
Stfld(s_runtextposField);
Ldc(0);
Ret();
......@@ -1064,22 +1084,22 @@ protected void GenerateFindFirstChar()
{
Label l1 = DefineLabel();
Label l2 = DefineLabel();
Ldthisfld(s_runtextposField);
Ldthisfld(s_runtextendField);
Ldloc(_runtextposLocal);
Ldloc(_runtextendLocal);
Ldc(1);
Sub();
Blt(l1);
Ldthisfld(s_runtextposField);
Ldthisfld(s_runtextendField);
Ldloc(_runtextposLocal);
Ldloc(_runtextendLocal);
Beq(l2);
Ldthisfld(s_runtextField);
Ldthisfld(s_runtextposField);
Ldloc(_runtextposLocal);
Callvirt(s_stringGetCharsMethod);
Ldc('\n');
Beq(l2);
MarkLabel(l1);
Ldthis();
Ldthisfld(s_runtextbegField);
Ldloc(_runtextbegLocal!);
Stfld(s_runtextposField);
Ldc(0);
Ret();
......@@ -1089,11 +1109,11 @@ protected void GenerateFindFirstChar()
if ((_anchors & RegexFCD.Start) != 0)
{
Label l1 = DefineLabel();
Ldthisfld(s_runtextposField);
Ldloc(_runtextposLocal);
Ldthisfld(s_runtextstartField);
Bge(l1);
Ldthis();
Ldthisfld(s_runtextbegField);
Ldloc(_runtextbegLocal!);
Stfld(s_runtextposField);
Ldc(0);
Ret();
......@@ -1103,11 +1123,11 @@ protected void GenerateFindFirstChar()
if ((_anchors & RegexFCD.Beginning) != 0)
{
Label l1 = DefineLabel();
Ldthisfld(s_runtextposField);
Ldthisfld(s_runtextbegField);
Ldloc(_runtextposLocal);
Ldloc(_runtextbegLocal!);
Ble(l1);
Ldthis();
Ldthisfld(s_runtextbegField);
Ldloc(_runtextbegLocal!);
Stfld(s_runtextposField);
MarkLabel(l1);
}
......@@ -1131,7 +1151,7 @@ protected void GenerateFindFirstChar()
int beforefirst;
int last;
if (!_code!.RightToLeft)
if (!_code.RightToLeft)
{
beforefirst = -1;
last = _bmPrefix.Pattern.Length - 1;
......@@ -1145,10 +1165,10 @@ protected void GenerateFindFirstChar()
int chLast = _bmPrefix.Pattern[last];
Mvfldloc(s_runtextField, _runtextLocal);
Ldthisfld(_code.RightToLeft ? s_runtextbegField : s_runtextendField);
Ldloc(_code.RightToLeft ? _runtextbegLocal! : _runtextendLocal);
Stloc(limitLocal);
Ldthisfld(s_runtextposField);
Ldloc(_runtextposLocal);
if (!_code.RightToLeft)
{
Ldc(_bmPrefix.Pattern.Length - 1);
......@@ -1206,11 +1226,27 @@ protected void GenerateFindFirstChar()
var table = new Label[_bmPrefix.HighASCII - _bmPrefix.LowASCII + 1];
// Mapping from negative ASCII value to the label that loads it.
// As we create labels, we check to see if the table already has
// the value, and only create the label if it doesn't. Then when
// spitting out the code for each label, we try to remove the entry
// from the dictionary, and only if we're successful (because it
// wasn't already removed) do we spit the code.
var labelMap = new Dictionary<int, Label>();
for (int i = _bmPrefix.LowASCII; i <= _bmPrefix.HighASCII; i++)
{
table[i - _bmPrefix.LowASCII] = (_bmPrefix.NegativeASCII[i] == beforefirst) ?
lDefaultAdvance :
DefineLabel();
Label label;
if (_bmPrefix.NegativeASCII[i] == beforefirst)
{
label = lDefaultAdvance;
}
else if (!labelMap.TryGetValue(_bmPrefix.NegativeASCII[i], out label))
{
label = DefineLabel();
labelMap.Add(_bmPrefix.NegativeASCII[i], label);
}
table[i - _bmPrefix.LowASCII] = label;
}
Ldloc(chLocal);
......@@ -1218,7 +1254,8 @@ protected void GenerateFindFirstChar()
for (int i = _bmPrefix.LowASCII; i <= _bmPrefix.HighASCII; i++)
{
if (_bmPrefix.NegativeASCII[i] == beforefirst)
if (_bmPrefix.NegativeASCII[i] == beforefirst ||
!labelMap.Remove(_bmPrefix.NegativeASCII[i]))
{
continue;
}
......@@ -1275,7 +1312,7 @@ protected void GenerateFindFirstChar()
MarkLabel(lFail);
Ldthis();
Ldthisfld(_code.RightToLeft ? s_runtextbegField : s_runtextendField);
Ldloc(_code.RightToLeft ? _runtextbegLocal! : _runtextendLocal);
Stfld(s_runtextposField);
Ldc(0);
Ret();
......@@ -1285,7 +1322,7 @@ protected void GenerateFindFirstChar()
Ldc(1);
Ret();
}
else if (_code!.RightToLeft)
else if (_code.RightToLeft)
{
LocalBuilder charInClassLocal = _temp1Local;
LocalBuilder cLocal = _temp2Local;
......@@ -1296,20 +1333,21 @@ protected void GenerateFindFirstChar()
Label l4 = DefineLabel();
Label l5 = DefineLabel();
Mvfldloc(s_runtextposField, _runtextposLocal);
Mvfldloc(s_runtextField, _runtextLocal);
Ldloc(_runtextposLocal);
Ldthisfld(s_runtextbegField);
Ldloc(_runtextbegLocal!);
Sub();
Stloc(cLocal);
Ldloc(cLocal);
Ldc(0);
BleFar(l4);
if (minRequiredLength == 0) // if minRequiredLength > 0, we already output a more stringent check
{
Ldloc(cLocal);
Ldc(0);
BleFar(l4);
}
MarkLabel(l1);
Ldloc(cLocal);
Ldc(1);
Sub();
......@@ -1367,13 +1405,13 @@ protected void GenerateFindFirstChar()
Label returnFalseLabel = DefineLabel();
Label updatePosAndReturnFalse = DefineLabel();
Mvfldloc(s_runtextposField, _runtextposLocal);
Mvfldloc(s_runtextendField, _runtextendLocal);
// if (runtextend > runtextpos)
Ldloc(_runtextendLocal);
Ldloc(_runtextposLocal);
BleFar(returnFalseLabel);
if (minRequiredLength == 0) // if minRequiredLength > 0, we already output a more stringent check
{
// if (runtextend > runtextpos)
Ldloc(_runtextendLocal);
Ldloc(_runtextposLocal);
BleFar(returnFalseLabel);
}
Span<char> setChars = stackalloc char[3]; // up to 3 characters handled by IndexOf{Any} below
int setCharsCount;
......@@ -1555,6 +1593,7 @@ private bool TryGenerateNonBacktrackingGo(RegexNode node)
LocalBuilder originalruntextposLocal = DeclareInt32();
LocalBuilder runtextposLocal = DeclareInt32();
LocalBuilder textSpanLocal = DeclareReadOnlySpanChar();
LocalBuilder runtextendLocal = DeclareInt32();
Stack<LocalBuilder>? iterationLocals = null;
Stack<LocalBuilder>? spanLocals = null;
Label stopSuccessLabel = DefineLabel();
......@@ -1568,8 +1607,9 @@ private bool TryGenerateNonBacktrackingGo(RegexNode node)
InitializeCultureForGoIfNecessary();
// string runtext = this.runtext;
Ldthisfld(s_runtextField);
Stloc(runtextLocal);
// int runtextend = this.runtextend;
Mvfldloc(s_runtextField, runtextLocal);
Mvfldloc(s_runtextendField, runtextendLocal);
// int runtextpos;
// int originalruntextpos = runtextpos = this.runtextpos;
......@@ -1812,7 +1852,7 @@ void LoadTextSpanLocal()
// textSpan = runtext.AsSpan(runtextpos, this.runtextend - runtextpos);
Ldloc(runtextLocal);
Ldloc(runtextposLocal);
Ldthisfld(s_runtextendField);
Ldloc(runtextendLocal);
Ldloc(runtextposLocal);
Sub();
Call(s_stringAsSpanIntIntMethod);
......@@ -2306,8 +2346,8 @@ void EmitBoundary(RegexNode node)
Ldc(textSpanPos);
Add();
}
Ldthisfld(s_runtextbegField!);
Ldthisfld(s_runtextendField!);
Ldthisfld(s_runtextbegField);
Ldloc(runtextendLocal);
switch (node.Type)
{
case RegexNode.Boundary:
......@@ -2989,8 +3029,10 @@ void EmitAtomicNodeLoop(RegexNode node)
/// <summary>Generates the code for "RegexRunner.Go".</summary>
protected void GenerateGo()
{
Debug.Assert(_code != null);
// Generate backtrack-free code when we're dealing with simpler regexes.
if (TryGenerateNonBacktrackingGo(_code!.Tree.Root))
if (TryGenerateNonBacktrackingGo(_code.Tree.Root))
{
return;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册