未验证 提交 26b91db7 编写于 作者: J Jose Perez Rodriguez 提交者: GitHub

Adding Regex.IsMatch(Span) and RegexRunner.Scan() methods (#65473)

* Adding Regex.IsMatch(Span) and RegexRunner.Scan() methods

* Addressing some PR Feedback

* Fixing case where span-based APIs are called from a precompiled regex and adding a test for it.

* Adding Tests for IsMatch span overload

* Refactor Run<TState> and Run to share centralized logic

* Emit IsBoundary and IsECMABoundary instead of exposing them.

* Address Emitter changes feedback.

* Addressing PR Feedback.

* Addressing most of the PR comments

* Fix issue where runtextbeg and length do not match the size of the input

* Add Precompiled tests for all overloads of methods.

* Fix precompiled test failing in non-Windows due to new lines.

* Apply suggestions from code review
Co-authored-by: NStephen Toub <stoub@microsoft.com>

* Addressing more PR Feedback

* Fix protected Scan method

* Addressing feedback and adding more tests

* Fix few failing tests in Mono runtime
Co-authored-by: NStephen Toub <stoub@microsoft.com>
上级 c173105b
......@@ -215,17 +215,22 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
writer.WriteLine($" {{");
// Main implementation methods
writer.WriteLine($" protected override void InitTrackCount() => base.runtrackcount = {rm.Code.TrackCount};");
writer.WriteLine();
writer.WriteLine(" // Description:");
DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // ", analysis); // skip implicit root capture
writer.WriteLine();
writer.WriteLine($" protected override bool FindFirstChar()");
writer.WriteLine($" protected override void Scan(global::System.ReadOnlySpan<char> text)");
writer.WriteLine($" {{");
writer.Indent += 4;
EmitScan(writer, rm, id);
writer.Indent -= 4;
writer.WriteLine($" }}");
writer.WriteLine();
writer.WriteLine($" private bool TryFindNextPossibleStartingPosition(global::System.ReadOnlySpan<char> inputSpan)");
writer.WriteLine($" {{");
writer.Indent += 4;
RequiredHelperFunctions requiredHelpers = EmitFindFirstChar(writer, rm, id);
RequiredHelperFunctions requiredHelpers = EmitTryFindNextPossibleStartingPosition(writer, rm, id);
writer.Indent -= 4;
writer.WriteLine($" }}");
writer.WriteLine();
......@@ -233,10 +238,10 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
{
writer.WriteLine($" [global::System.Runtime.CompilerServices.SkipLocalsInit]");
}
writer.WriteLine($" protected override void Go()");
writer.WriteLine($" private bool TryMatchAtCurrentPosition(global::System.ReadOnlySpan<char> inputSpan)");
writer.WriteLine($" {{");
writer.Indent += 4;
requiredHelpers |= EmitGo(writer, rm, id, analysis);
requiredHelpers |= EmitTryMatchAtCurrentPosition(writer, rm, id, analysis);
writer.Indent -= 4;
writer.WriteLine($" }}");
......@@ -271,6 +276,41 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
writer.WriteLine($" }}");
}
if ((requiredHelpers & RequiredHelperFunctions.IsBoundary) != 0)
{
writer.WriteLine();
writer.WriteLine($" /// <summary>Determines whether the character at the specified index is a boundary.</summary>");
writer.WriteLine($" [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
writer.WriteLine($" private static bool IsBoundary(global::System.ReadOnlySpan<char> inputSpan, int index)");
writer.WriteLine($" {{");
writer.WriteLine($" int indexM1 = index - 1;");
writer.WriteLine($" return ((uint)indexM1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexM1])) !=");
writer.WriteLine($" ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));");
writer.WriteLine();
writer.WriteLine($" static bool IsBoundaryWordChar(char ch) =>");
writer.WriteLine($" IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');");
writer.WriteLine($" }}");
}
if ((requiredHelpers & RequiredHelperFunctions.IsECMABoundary) != 0)
{
writer.WriteLine();
writer.WriteLine($" /// <summary>Determines whether the character at the specified index is a boundary.</summary>");
writer.WriteLine($" [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
writer.WriteLine($" private static bool IsECMABoundary(global::System.ReadOnlySpan<char> inputSpan, int index)");
writer.WriteLine($" {{");
writer.WriteLine($" int indexM1 = index - 1;");
writer.WriteLine($" return ((uint)indexM1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexM1])) !=");
writer.WriteLine($" ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));");
writer.WriteLine();
writer.WriteLine($" static bool IsECMAWordChar(char ch) =>");
writer.WriteLine($" ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter");
writer.WriteLine($" (((uint)ch - '0') < 10) || // digit");
writer.WriteLine($" ch == '_' || // underscore");
writer.WriteLine($" ch == '\\u0130'; // latin capital letter I with dot above");
writer.WriteLine($" }}");
}
writer.WriteLine($" }}");
writer.WriteLine($" }}");
writer.WriteLine("}");
......@@ -299,8 +339,30 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
}
}
/// <summary>Emits the body of the FindFirstChar override.</summary>
private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id)
/// <summary>Emits the body of the Scan method override.</summary>
private static void EmitScan(IndentedTextWriter writer, RegexMethod rm, string id)
{
using (EmitBlock(writer, "while (TryFindNextPossibleStartingPosition(text))"))
{
if (rm.MatchTimeout != Timeout.Infinite)
{
writer.WriteLine("base.CheckTimeout();");
writer.WriteLine();
}
writer.WriteLine("// If we find a match on the current position, or we have reached the end of the input, we are done.");
using (EmitBlock(writer, "if (TryMatchAtCurrentPosition(text) || base.runtextpos == text.Length)"))
{
writer.WriteLine("return;");
}
writer.WriteLine();
writer.WriteLine("base.runtextpos++;");
}
}
/// <summary>Emits the body of the TryFindNextPossibleStartingPosition.</summary>
private static RequiredHelperFunctions EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, string id)
{
RegexOptions options = (RegexOptions)rm.Options;
RegexCode code = rm.Code;
......@@ -347,7 +409,6 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ
{
case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive:
Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix));
additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
EmitIndexOf(code.FindOptimizations.LeadingCaseSensitivePrefix);
break;
......@@ -356,13 +417,11 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive:
Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
EmitFixedSet();
break;
case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight_CaseSensitive:
Debug.Assert(code.FindOptimizations.LiteralAfterLoop is not null);
additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
EmitLiteralAfterAtomicLoop();
break;
......@@ -392,7 +451,7 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ
// searching is required; otherwise, false.
bool EmitAnchors()
{
// Anchors that fully implement FindFirstChar, with a check that leads to immediate success or failure determination.
// Anchors that fully implement TryFindNextPossibleStartingPosition, with a check that leads to immediate success or failure determination.
switch (code.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning:
......@@ -463,7 +522,6 @@ bool EmitAnchors()
// the other anchors, which all skip all subsequent processing if found, with BOL we just use it
// to boost our position to the next line, and then continue normally with any searches.
writer.WriteLine("// Beginning-of-line anchor");
additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
additionalDeclarations.Add("int beginning = base.runtextbeg;");
using (EmitBlock(writer, "if (pos > beginning && inputSpan[pos - 1] != '\\n')"))
{
......@@ -710,8 +768,8 @@ FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
}
}
/// <summary>Emits the body of the Go override.</summary>
private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMethod rm, string id, AnalysisResults analysis)
/// <summary>Emits the body of the TryMatchAtCurrentPosition.</summary>
private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, string id, AnalysisResults analysis)
{
// In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled
// version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via
......@@ -730,7 +788,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
// label that code should jump back to when backtracking. That way, a subsequent EmitXx function doesn't need to know exactly
// where to jump: it simply always jumps to "doneLabel" on match failure, and "doneLabel" is always configured to point to
// the right location. In an expression without backtracking, or before any backtracking constructs have been encountered,
// "doneLabel" is simply the final return location from the Go method that will undo any captures and exit, signaling to
// "doneLabel" is simply the final return location from the TryMatchAtCurrentPosition method that will undo any captures and exit, signaling to
// the calling scan loop that nothing was matched.
// Arbitrary limit for unrolling vs creating a loop. We want to balance size in the generated
......@@ -752,17 +810,18 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child");
node = node.Child(0);
// In some limited cases, FindFirstChar will only return true if it successfully matched the whole expression.
// We can special case these to do essentially nothing in Go other than emit the capture.
// In some limited cases, TryFindNextPossibleStartingPosition will only return true if it successfully matched the whole expression.
// We can special case these to do essentially nothing in TryMatchAtCurrentPosition other than emit the capture.
switch (node.Kind)
{
case RegexNodeKind.Multi or RegexNodeKind.Notone or RegexNodeKind.One or RegexNodeKind.Set when !IsCaseInsensitive(node):
// This is the case for single and multiple characters, though the whole thing is only guaranteed
// to have been validated in FindFirstChar when doing case-sensitive comparison.
// to have been validated in TryFindNextPossibleStartingPosition when doing case-sensitive comparison.
writer.WriteLine($"int start = base.runtextpos;");
writer.WriteLine($"int end = start + {(node.Kind == RegexNodeKind.Multi ? node.Str!.Length : 1)};");
writer.WriteLine("base.Capture(0, start, end);");
writer.WriteLine("base.runtextpos = end;");
writer.WriteLine("return true;");
return requiredHelpers;
case RegexNodeKind.Empty:
......@@ -770,6 +829,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
// source generator and seeing what happens as you add more to expressions. When approaching
// it from a learning perspective, this is very common, as it's the empty string you start with.
writer.WriteLine("base.Capture(0, base.runtextpos, base.runtextpos);");
writer.WriteLine("return true;");
return requiredHelpers;
}
......@@ -781,7 +841,6 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
// Declare some locals.
string sliceSpan = "slice";
writer.WriteLine("global::System.ReadOnlySpan<char> inputSpan = base.runtext;");
writer.WriteLine("int pos = base.runtextpos, end = base.runtextend;");
writer.WriteLine($"int original_pos = pos;");
bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm);
......@@ -825,7 +884,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
}
writer.WriteLine("base.runtextpos = pos;");
writer.WriteLine("base.Capture(0, original_pos, pos);");
writer.WriteLine("return;");
writer.WriteLine("return true;");
writer.WriteLine();
// We only get here in the code if the whole expression fails to match and jumps to
......@@ -836,6 +895,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
{
EmitUncaptureUntil("0");
}
writer.WriteLine("return false;");
// We're done with the match.
......@@ -845,8 +905,6 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe
// And emit any required helpers.
if (additionalLocalFunctions.Count != 0)
{
writer.WriteLine("return;"); // not strictly necessary, just for readability
foreach (KeyValuePair<string, string[]> localFunctions in additionalLocalFunctions.OrderBy(k => k.Key))
{
writer.WriteLine();
......@@ -2138,13 +2196,22 @@ void EmitBoundary(RegexNode node)
string call = node.Kind switch
{
RegexNodeKind.Boundary => "!base.IsBoundary",
RegexNodeKind.NonBoundary => "base.IsBoundary",
RegexNodeKind.ECMABoundary => "!base.IsECMABoundary",
_ => "base.IsECMABoundary",
RegexNodeKind.Boundary => "!IsBoundary",
RegexNodeKind.NonBoundary => "IsBoundary",
RegexNodeKind.ECMABoundary => "!IsECMABoundary",
_ => "IsECMABoundary",
};
using (EmitBlock(writer, $"if ({call}(pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))"))
RequiredHelperFunctions boundaryFunctionRequired = node.Kind switch
{
RegexNodeKind.Boundary or
RegexNodeKind.NonBoundary => RequiredHelperFunctions.IsBoundary | RequiredHelperFunctions.IsWordChar, // IsBoundary internally uses IsWordChar
_ => RequiredHelperFunctions.IsECMABoundary
};
requiredHelpers |= boundaryFunctionRequired;
using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}))"))
{
writer.WriteLine($"goto {doneLabel};");
}
......@@ -3827,9 +3894,13 @@ public void Dispose()
private enum RequiredHelperFunctions
{
/// <summary>No additional functions are required.</summary>
None,
None = 0b0,
/// <summary>The IsWordChar helper is required.</summary>
IsWordChar
IsWordChar = 0b1,
/// <summary>The IsBoundary helper is required.</summary>
IsBoundary = 0b10,
/// <summary>The IsECMABoundary helper is required.</summary>
IsECMABoundary = 0b100
}
}
}
......@@ -171,6 +171,10 @@ public partial class Regex : System.Runtime.Serialization.ISerializable
public string GroupNameFromNumber(int i) { throw null; }
public int GroupNumberFromName(string name) { throw null; }
protected void InitializeReferences() { }
public bool IsMatch(System.ReadOnlySpan<char> input) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
public bool IsMatch(string input) { throw null; }
public bool IsMatch(string input, int startat) { throw null; }
public static bool IsMatch(string input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; }
......@@ -330,9 +334,9 @@ public abstract partial class RegexRunner
protected void DoubleStack() { }
protected void DoubleTrack() { }
protected void EnsureStorage() { }
protected abstract bool FindFirstChar();
protected abstract void Go();
protected abstract void InitTrackCount();
protected virtual bool FindFirstChar() { throw null; }
protected virtual void Go() { throw null; }
protected virtual void InitTrackCount() { throw null; }
protected bool IsBoundary(int index, int startpos, int endpos) { throw null; }
protected bool IsECMABoundary(int index, int startpos, int endpos) { throw null; }
protected bool IsMatched(int cap) { throw null; }
......@@ -341,6 +345,7 @@ public abstract partial class RegexRunner
protected int Popcrawl() { throw null; }
protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) { throw null; }
protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; }
protected internal virtual void Scan(System.ReadOnlySpan<char> text) { throw null; }
protected void TransferCapture(int capnum, int uncapnum, int start, int end) { }
protected void Uncapture() { }
}
......
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
......@@ -254,4 +313,7 @@
<data name="ExpressionDescription_BalancingGroup" xml:space="preserve">
<value>balancing group (?&lt;name1-name2&gt;subexpression) or (?'name1-name2' subexpression)</value>
</data>
</root>
<data name="UsingSpanAPIsWithCompiledToAssembly" xml:space="preserve">
<value>Searching an input span using a pre-compiled Regex assembly is not supported. Please use the string overloads or use a newer Regex implementation.</value>
</data>
</root>
\ No newline at end of file
......@@ -9,7 +9,7 @@ namespace System.Text.RegularExpressions
/// </summary>
public class Capture
{
internal Capture(string text, int index, int length)
internal Capture(string? text, int index, int length)
{
Text = text;
Index = index;
......@@ -19,27 +19,38 @@ internal Capture(string text, int index, int length)
/// <summary>Returns the position in the original string where the first character of captured substring was found.</summary>
public int Index { get; private protected set; }
/// <summary>
/// This method should only be called when the text for matching was sliced with a different beginning, so the resulting index of
/// the match is not from the start of the text, but instead the start of the slice. This method will add back that extra indices
/// to account for the original text beginning.
/// </summary>
/// <param name="beginning">The original text's beginning offset.</param>
internal void AddBeginningToIndex(int beginning)
{
Index += beginning;
}
/// <summary>Returns the length of the captured substring.</summary>
public int Length { get; private protected set; }
/// <summary>The original string</summary>
internal string Text { get; set; }
internal string? Text { get; set; }
/// <summary>Gets the captured substring from the input string.</summary>
/// <value>The substring that is captured by the match.</value>
public string Value => Text.Substring(Index, Length);
public string Value => Text is string text ? text.Substring(Index, Length) : string.Empty;
/// <summary>Gets the captured span from the input string.</summary>
/// <value>The span that is captured by the match.</value>
public ReadOnlySpan<char> ValueSpan => Text.AsSpan(Index, Length);
public ReadOnlySpan<char> ValueSpan => Text is string text ? text.AsSpan(Index, Length) : ReadOnlySpan<char>.Empty;
/// <summary>Returns the substring that was matched.</summary>
public override string ToString() => Value;
/// <summary>The substring to the left of the capture</summary>
internal ReadOnlyMemory<char> GetLeftSubstring() => Text.AsMemory(0, Index);
internal ReadOnlyMemory<char> GetLeftSubstring() => Text is string text ? text.AsMemory(0, Index) : ReadOnlyMemory<char>.Empty;
/// <summary>The substring to the right of the capture</summary>
internal ReadOnlyMemory<char> GetRightSubstring() => Text.AsMemory(Index + Length, Text.Length - Index - Length);
internal ReadOnlyMemory<char> GetRightSubstring() => Text is string text ? text.AsMemory(Index + Length, Text.Length - Index - Length) : ReadOnlyMemory<char>.Empty;
}
}
......@@ -5,20 +5,16 @@ namespace System.Text.RegularExpressions
{
internal sealed class CompiledRegexRunner : RegexRunner
{
private readonly Action<RegexRunner> _goMethod;
private readonly Func<RegexRunner, bool> _findFirstCharMethod;
private readonly ScanDelegate _scanMethod;
public CompiledRegexRunner(Action<RegexRunner> go, Func<RegexRunner, bool> findFirstChar, int trackCount)
internal delegate void ScanDelegate(RegexRunner runner, ReadOnlySpan<char> text);
public CompiledRegexRunner(ScanDelegate scan)
{
_goMethod = go;
_findFirstCharMethod = findFirstChar;
runtrackcount = trackCount;
_scanMethod = scan;
}
protected override void Go() => _goMethod(this);
protected override bool FindFirstChar() => _findFirstCharMethod(this);
protected override void InitTrackCount() { }
protected internal override void Scan(ReadOnlySpan<char> text)
=> _scanMethod(this, text);
}
}
......@@ -7,25 +7,18 @@ namespace System.Text.RegularExpressions
{
internal sealed class CompiledRegexRunnerFactory : RegexRunnerFactory
{
private readonly DynamicMethod _goMethod;
private readonly DynamicMethod _findFirstCharMethod;
private readonly int _trackcount;
private readonly DynamicMethod _scanMethod;
// Delegates are lazily created to avoid forcing JIT'ing until the regex is actually executed.
private Action<RegexRunner>? _go;
private Func<RegexRunner, bool>? _findFirstChar;
// Delegate is lazily created to avoid forcing JIT'ing until the regex is actually executed.
private CompiledRegexRunner.ScanDelegate? _scan;
public CompiledRegexRunnerFactory(DynamicMethod goMethod, DynamicMethod findFirstCharMethod, int trackcount)
public CompiledRegexRunnerFactory(DynamicMethod scanMethod)
{
_goMethod = goMethod;
_findFirstCharMethod = findFirstCharMethod;
_trackcount = trackcount;
_scanMethod = scanMethod;
}
protected internal override RegexRunner CreateInstance() =>
new CompiledRegexRunner(
_go ??= _goMethod.CreateDelegate<Action<RegexRunner>>(),
_findFirstChar ??= _findFirstCharMethod.CreateDelegate<Func<RegexRunner, bool>>(),
_trackcount);
_scan ??= _scanMethod.CreateDelegate<CompiledRegexRunner.ScanDelegate>());
}
}
......@@ -16,7 +16,7 @@ public class Group : Capture
internal int _capcount;
internal CaptureCollection? _capcoll;
internal Group(string text, int[] caps, int capcount, string name)
internal Group(string? text, int[] caps, int capcount, string name)
: base(text, capcount == 0 ? 0 : caps[(capcount - 1) * 2], capcount == 0 ? 0 : caps[(capcount * 2) - 1])
{
_caps = caps;
......
......@@ -50,7 +50,7 @@ public class Match : Group
internal bool _balancing; // whether we've done any balancing with this match. If we
// have done balancing, we'll need to do extra work in Tidy().
internal Match(Regex? regex, int capcount, string text, int begpos, int len, int startpos) :
internal Match(Regex? regex, int capcount, string? text, int begpos, int len, int startpos) :
base(text, new int[2], 0, "0")
{
_regex = regex;
......@@ -66,7 +66,7 @@ public class Match : Group
/// <summary>Returns an empty Match object.</summary>
public static Match Empty { get; } = new Match(null, 1, string.Empty, 0, 0, 0);
internal void Reset(Regex regex, string text, int textbeg, int textend, int textstart)
internal void Reset(Regex regex, string? text, int textbeg, int textend, int textstart)
{
_regex = regex;
Text = text;
......@@ -84,6 +84,16 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text
_groupcoll?.Reset();
}
/// <summary>
/// Returns <see langword="true"/> if this object represents a successful match, and <see langword="false"/> otherwise.
/// </summary>
/// <remarks>
/// The main difference between the public <see cref="Group.Success"/> property and this one, is that <see cref="Group.Success"/> requires
/// for a <see cref="Match"/> to call <see cref="Match.Tidy(int)"/> first, in order to report the correct value, while this API will return
/// the correct value right after a Match gets calculated, meaning that it will return <see langword="true"/> right after <see cref="RegexRunner.Capture(int, int, int)"/>
/// </remarks>
internal bool FoundMatch => _matchcount[0] > 0;
public virtual GroupCollection Groups => _groupcoll ??= new GroupCollection(this, null);
/// <summary>
......@@ -94,6 +104,7 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text
public Match NextMatch()
{
Regex? r = _regex;
Debug.Assert(Text != null);
return r != null ?
r.Run(false, Length, Text, _textbeg, _textend - _textbeg, _textpos)! :
this;
......@@ -338,7 +349,7 @@ internal sealed class MatchSparse : Match
{
private new readonly Hashtable _caps;
internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int begpos, int len, int startpos) :
internal MatchSparse(Regex regex, Hashtable caps, int capcount, string? text, int begpos, int len, int startpos) :
base(regex, capcount, text, begpos, len, startpos)
{
_caps = caps;
......
......@@ -13,6 +13,18 @@ public partial class Regex
public static bool IsMatch(string input, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern) =>
RegexCache.GetOrAdd(pattern).IsMatch(input);
/// <summary>
/// Indicates whether the specified regular expression finds a match in the specified input span.
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="ArgumentException">A regular expression parsing error occurred.</exception>
/// <exception cref="ArgumentNullException"><paramref name="pattern"/> is <see langword="null"/></exception>
/// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
public static bool IsMatch(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern) =>
RegexCache.GetOrAdd(pattern).IsMatch(input);
/// <summary>
/// Searches the input string for one or more occurrences of the text
/// supplied in the pattern parameter with matching options supplied in the options
......@@ -21,9 +33,39 @@ public partial class Regex
public static bool IsMatch(string input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options) =>
RegexCache.GetOrAdd(pattern, options, s_defaultMatchTimeout).IsMatch(input);
/// <summary>
/// Indicates whether the specified regular expression finds a match in the specified input span, using the specified matching options.
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that provide options for matching.</param>
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="ArgumentException">A regular expression parsing error occurred.</exception>
/// <exception cref="ArgumentNullException"><paramref name="pattern"/> is <see langword="null"/></exception>
/// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="options"/> is not in a valid <see cref="RegexOptions"/> value.</exception>
public static bool IsMatch(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options) =>
RegexCache.GetOrAdd(pattern, options, s_defaultMatchTimeout).IsMatch(input);
public static bool IsMatch(string input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) =>
RegexCache.GetOrAdd(pattern, options, matchTimeout).IsMatch(input);
/// <summary>
/// Indicates whether the specified regular expression finds a match in the specified input span, using the specified matching options and time-out interval.
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <param name="pattern">The regular expression pattern to match.</param>
/// <param name="options">A bitwise combination of the enumeration values that provide options for matching.</param>
/// <param name="matchTimeout">A time-out interval, or <see cref="Regex.InfiniteMatchTimeout"/> to indicate that the method should not time out.</param>
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="ArgumentException">A regular expression parsing error occurred.</exception>
/// <exception cref="ArgumentNullException"><paramref name="pattern"/> is <see langword="null"/></exception>
/// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="options"/> is not in a valid <see cref="RegexOptions"/> value or <paramref name="matchTimeout"/> is negative,
/// zero, or greater than approximately 24 days.</exception>
public static bool IsMatch(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) =>
RegexCache.GetOrAdd(pattern, options, matchTimeout).IsMatch(input);
/// <summary>
/// Searches the input string for one or more matches using the previous pattern,
/// options, and starting position.
......@@ -38,6 +80,15 @@ public bool IsMatch(string input)
return Run(quick: true, -1, input, 0, input.Length, UseOptionR() ? input.Length : 0) is null;
}
/// <summary>
/// Indicates whether the regular expression specified in the Regex constructor finds a match in a specified input span.
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="RegexMatchTimeoutException">A time-out ocurred.</exception>
public bool IsMatch(ReadOnlySpan<char> input) =>
Run(input, UseOptionR() ? input.Length : 0) is null;
/// <summary>
/// Searches the input string for one or more matches using the previous pattern and options,
/// with a new starting position.
......
......@@ -372,7 +372,90 @@ protected void InitializeReferences()
RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner();
try
{
return runner.Scan(this, input, beginning, beginning + length, startat, prevlen, quick, internalMatchTimeout);
runner.InitializeTimeout(internalMatchTimeout);
runner.runtext = input;
ReadOnlySpan<char> span = input.AsSpan(beginning, length);
runner.InitializeForScan(this, span, startat - beginning, quick);
int stoppos = RightToLeft ? 0 : span.Length;
// If previous match was empty or failed, advance by one before matching.
if (prevlen == 0)
{
if (runner.runtextstart == stoppos)
{
return RegularExpressions.Match.Empty;
}
runner.runtextpos += RightToLeft ? -1 : 1;
}
return InternalPerformScan(quick, input, beginning, runner, span, returnNullIfQuick: true);
}
finally
{
runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache.
_runner = runner;
}
}
private static Match? InternalPerformScan(bool quick, string input, int beginning, RegexRunner runner, ReadOnlySpan<char> span, bool returnNullIfQuick)
{
runner.Scan(span);
Match? match = runner.runmatch;
Debug.Assert(match is not null);
// If we got a match, do some cleanup and return it, or return null if quick is true;
if (match.FoundMatch)
{
if (!quick)
{
// We're about to return the Match object. Store the input into it and remove it from the runner.
match.Text = input;
runner.runmatch = null;
}
else if (returnNullIfQuick)
{
match.Text = null;
return null;
}
match.Tidy(runner.runtextpos);
// If the passed in beginning was not 0 then we need to adjust the offsets on the match object.
if (beginning != 0)
{
match.AddBeginningToIndex(beginning);
}
return match;
}
// We failed to match, so we will return Match.Empty which means we can reuse runmatch object.
// We do however need to clear its Text in case it was set, so as to not keep it alive in some cache.
runner.runmatch!.Text = null;
return RegularExpressions.Match.Empty;
}
internal Match? Run(ReadOnlySpan<char> input, int startat)
{
// startat parameter is always either 0 or input.Length since public API for IsMatch doesn't have an overload
// that takes in startat.
Debug.Assert(startat <= input.Length);
RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner();
try
{
runner.InitializeTimeout(internalMatchTimeout);
runner.InitializeForScan(this, input, startat, quick: true);
runner.Scan(input);
// If runmatch is null it means that an override of Scan didn't implement it correctly, so we will
// let this null ref since there are lots of ways where you can end up in a erroneous state.
return runner.runmatch!.FoundMatch ? null : RegularExpressions.Match.Empty;
}
finally
{
......@@ -387,10 +470,84 @@ internal void Run<TState>(string input, int startat, ref TState state, MatchCall
RegexRunner runner = Interlocked.Exchange(ref _runner, null) ?? CreateRunner();
try
{
runner.ScanInternal(this, input, startat, ref state, callback, reuseMatchObject, internalMatchTimeout);
runner.InitializeTimeout(internalMatchTimeout);
runner.runtext = input;
int runtextpos = startat;
while (true)
{
runner.InitializeForScan(this, input, startat, false);
runner.runtextpos = runtextpos;
int stoppos = RightToLeft ? 0 : input.Length;
Match? match = InternalPerformScan(reuseMatchObject, input, 0, runner, input, returnNullIfQuick: false);
Debug.Assert(match is not null);
// if we got a match, then call the callback function with the match and prepare for next iteration.
if (match.Success)
{
if (!reuseMatchObject)
{
// We're not reusing match objects, so null out our field reference to the instance.
// It'll be recreated the next time one is needed.
runner.runmatch = null;
}
if (!callback(ref state, match))
{
// If the callback returns false, we're done.
if (reuseMatchObject)
{
// We're reusing the single match instance, so clear out its text as well.
// We don't do this if we're not reusing instances, as in that case we're
// dropping the whole reference to the match, and we no longer own the instance
// having handed it out to the callback.
match.Text = null;
}
return;
}
// Now that we've matched successfully, update the starting position to reflect
// the current position, just as Match.NextMatch() would pass in _textpos as textstart.
runtextpos = startat = runner.runtextpos;
// Reset state for another iteration.
runner.runtrackpos = runner.runtrack!.Length;
runner.runstackpos = runner.runstack!.Length;
runner.runcrawlpos = runner.runcrawl!.Length;
if (match.Length == 0)
{
if (runner.runtextpos == stoppos)
{
if (reuseMatchObject)
{
// See above comment.
match.Text = null;
}
return;
}
runtextpos += RightToLeft ? -1 : 1;
}
// Loop around to perform next match from where we left off.
continue;
}
else
{
// We failed to match at this position. If we're at the stopping point, we're done.
if (runner.runtextpos == stoppos)
{
return;
}
}
}
}
finally
{
runner.runtext = null; // drop reference to text to avoid keeping it alive in a cache.
_runner = runner;
}
}
......
......@@ -3,6 +3,7 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Reflection;
using System.Reflection.Emit;
......@@ -20,7 +21,6 @@ internal abstract class RegexCompiler
private static readonly FieldInfo s_runtextendField = RegexRunnerField("runtextend");
private static readonly FieldInfo s_runtextstartField = RegexRunnerField("runtextstart");
private static readonly FieldInfo s_runtextposField = RegexRunnerField("runtextpos");
private static readonly FieldInfo s_runtextField = RegexRunnerField("runtext");
private static readonly FieldInfo s_runstackField = RegexRunnerField("runstack");
private static readonly MethodInfo s_captureMethod = RegexRunnerMethod("Capture");
......@@ -29,9 +29,9 @@ internal abstract class RegexCompiler
private static readonly MethodInfo s_isMatchedMethod = RegexRunnerMethod("IsMatched");
private static readonly MethodInfo s_matchLengthMethod = RegexRunnerMethod("MatchLength");
private static readonly MethodInfo s_matchIndexMethod = RegexRunnerMethod("MatchIndex");
private static readonly MethodInfo s_isBoundaryMethod = RegexRunnerMethod("IsBoundary");
private static readonly MethodInfo s_isBoundaryMethod = typeof(RegexRunner).GetMethod("IsBoundary", BindingFlags.NonPublic | BindingFlags.Instance, new[] { typeof(ReadOnlySpan<char>), typeof(int) })!;
private static readonly MethodInfo s_isWordCharMethod = RegexRunnerMethod("IsWordChar");
private static readonly MethodInfo s_isECMABoundaryMethod = RegexRunnerMethod("IsECMABoundary");
private static readonly MethodInfo s_isECMABoundaryMethod = typeof(RegexRunner).GetMethod("IsECMABoundary", BindingFlags.NonPublic | BindingFlags.Instance, new[] { typeof(ReadOnlySpan<char>), typeof(int) })!;
private static readonly MethodInfo s_crawlposMethod = RegexRunnerMethod("Crawlpos");
private static readonly MethodInfo s_charInClassMethod = RegexRunnerMethod("CharInClass");
private static readonly MethodInfo s_checkTimeoutMethod = RegexRunnerMethod("CheckTimeout");
......@@ -180,6 +180,9 @@ internal abstract class RegexCompiler
/// <summary>A macro for _ilg.Emit(OpCodes.Ldarg_0).</summary>
protected void Ldthis() => _ilg!.Emit(OpCodes.Ldarg_0);
/// <summary>A macro for _ilgEmit(OpCodes.Ldarg_1) </summary>
private void Ldarg_1() => _ilg!.Emit(OpCodes.Ldarg_1);
/// <summary>A macro for Ldthis(); Ldfld();</summary>
protected void Ldthisfld(FieldInfo ft)
{
......@@ -187,6 +190,10 @@ protected void Ldthisfld(FieldInfo ft)
_ilg!.Emit(OpCodes.Ldfld, ft);
}
/// <summary>Fetches the address of argument in passed in <paramref name="position"/></summary>
/// <param name="position">The position of the argument which address needs to be fetched.</param>
private void Ldarga_s(int position) => _ilg!.Emit(OpCodes.Ldarga_S, position);
/// <summary>A macro for Ldthis(); Ldfld(); Stloc();</summary>
private void Mvfldloc(FieldInfo ft, LocalBuilder lt)
{
......@@ -271,6 +278,9 @@ private void Mvfldloc(FieldInfo ft, LocalBuilder lt)
private void Switch(Label[] table) => _ilg!.Emit(OpCodes.Switch, table);
/// <summary>Declares a local bool.</summary>
private LocalBuilder DeclareBool() => _ilg!.DeclareLocal(typeof(bool));
/// <summary>Declares a local int.</summary>
private LocalBuilder DeclareInt32() => _ilg!.DeclareLocal(typeof(int));
......@@ -353,8 +363,8 @@ private void CallToLower()
}
}
/// <summary>Generates the implementation for FindFirstChar.</summary>
protected void EmitFindFirstChar()
/// <summary>Generates the implementation for TryFindNextPossibleStartingPosition.</summary>
protected void EmitTryFindNextPossibleStartingPosition()
{
Debug.Assert(_code != null);
_int32LocalsPool?.Clear();
......@@ -388,11 +398,10 @@ FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
// Load necessary locals
// int pos = base.runtextpos;
// int end = base.runtextend;
// ReadOnlySpan<char> inputSpan = base.runtext.AsSpan();
// ReadOnlySpan<char> inputSpan = input;
Mvfldloc(s_runtextposField, pos);
Mvfldloc(s_runtextendField, end);
Ldthisfld(s_runtextField);
Call(s_stringAsSpanMethod);
Ldarg_1();
Stloc(inputSpan);
// Generate length check. If the input isn't long enough to possibly match, fail quickly.
......@@ -470,7 +479,7 @@ bool GenerateAnchors()
{
Label label;
// Anchors that fully implement FindFirstChar, with a check that leads to immediate success or failure determination.
// Anchors that fully implement TryFindNextPossibleStartingPosition, with a check that leads to immediate success or failure determination.
switch (_code.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning:
......@@ -1016,8 +1025,8 @@ void EmitLiteralAfterAtomicLoop()
}
}
/// <summary>Generates the implementation for Go.</summary>
protected void EmitGo()
/// <summary>Generates the implementation for TryMatchAtCurrentPosition.</summary>
protected void EmitTryMatchAtCurrentPosition()
{
// In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled
// version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via
......@@ -1036,7 +1045,7 @@ protected void EmitGo()
// label that code should jump back to when backtracking. That way, a subsequent EmitXx function doesn't need to know exactly
// where to jump: it simply always jumps to "doneLabel" on match failure, and "doneLabel" is always configured to point to
// the right location. In an expression without backtracking, or before any backtracking constructs have been encountered,
// "doneLabel" is simply the final return location from the Go method that will undo any captures and exit, signaling to
// "doneLabel" is simply the final return location from the TryMatchAtCurrentPosition method that will undo any captures and exit, signaling to
// the calling scan loop that nothing was matched.
Debug.Assert(_code != null);
......@@ -1051,16 +1060,16 @@ protected void EmitGo()
// Skip the Capture node. We handle the implicit root capture specially.
node = node.Child(0);
// In some limited cases, FindFirstChar will only return true if it successfully matched the whole expression.
// We can special case these to do essentially nothing in Go other than emit the capture.
// In some limited cases, TryFindNextPossibleStartingPosition will only return true if it successfully matched the whole expression.
// We can special case these to do essentially nothing in TryMatchAtCurrentPosition other than emit the capture.
switch (node.Kind)
{
case RegexNodeKind.Multi or RegexNodeKind.Notone or RegexNodeKind.One or RegexNodeKind.Set when !IsCaseInsensitive(node):
// This is the case for single and multiple characters, though the whole thing is only guaranteed
// to have been validated in FindFirstChar when doing case-sensitive comparison.
// to have been validated in TryFindNextPossibleStartingPosition when doing case-sensitive comparison.
// base.Capture(0, base.runtextpos, base.runtextpos + node.Str.Length);
// base.runtextpos = base.runtextpos + node.Str.Length;
// return;
// return true;
Ldthis();
Dup();
Ldc(0);
......@@ -1073,6 +1082,7 @@ protected void EmitGo()
Ldc(node.Kind == RegexNodeKind.Multi ? node.Str!.Length : 1);
Add();
Stfld(s_runtextposField);
Ldc(1);
Ret();
return;
......@@ -1097,10 +1107,9 @@ protected void EmitGo()
// CultureInfo culture = CultureInfo.CurrentCulture; // only if the whole expression or any subportion is ignoring case, and we're not using invariant
InitializeCultureForGoIfNecessary();
// ReadOnlySpan<char> inputSpan = base.runtext.AsSpan();
// ReadOnlySpan<char> inputSpan = input;
// int end = base.runtextend;
Ldthisfld(s_runtextField);
Call(s_stringAsSpanMethod);
Ldarg_1();
Stloc(inputSpan);
Mvfldloc(s_runtextendField, end);
......@@ -1154,6 +1163,9 @@ protected void EmitGo()
Ldloc(originalPos);
Ldloc(pos);
Call(s_captureMethod);
// return true;
Ldc(1);
Ret();
// If the graph contained captures, undo any remaining to handle failed matches.
if (expressionHasCaptures)
......@@ -1184,7 +1196,8 @@ protected void EmitGo()
MarkLabel(originalDoneLabel);
}
// return;
// return false;
Ldc(0);
Ret();
// Generated code successfully.
......@@ -2311,16 +2324,15 @@ void EmitBoundary(RegexNode node)
{
Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected type: {node.Kind}");
// if (!IsBoundary(pos + sliceStaticPos, base.runtextbeg, end)) goto doneLabel;
// if (!IsBoundary(inputSpan, pos + sliceStaticPos)) goto doneLabel;
Ldthis();
Ldloc(inputSpan);
Ldloc(pos);
if (sliceStaticPos > 0)
{
Ldc(sliceStaticPos);
Add();
}
Ldthisfld(s_runtextbegField);
Ldloc(end);
switch (node.Kind)
{
case RegexNodeKind.Boundary:
......@@ -3953,6 +3965,52 @@ void EmitStackPop()
}
}
protected void EmitScan(DynamicMethod tryFindNextStartingPositionMethod, DynamicMethod tryMatchAtCurrentPositionMethod)
{
Label returnLabel = DefineLabel();
// while (TryFindNextPossibleStartingPosition(text))
Label whileLoopBody = DefineLabel();
MarkLabel(whileLoopBody);
Ldthis();
Ldarg_1();
Call(tryFindNextStartingPositionMethod);
BrfalseFar(returnLabel);
if (_hasTimeout)
{
// CheckTimeout();
Ldthis();
Call(s_checkTimeoutMethod);
}
// if (TryMatchAtCurrentPosition(text) || runtextpos == text.length)
// return;
Ldthis();
Ldarg_1();
Call(tryMatchAtCurrentPositionMethod);
BrtrueFar(returnLabel);
Ldthisfld(s_runtextposField);
Ldarga_s(1);
Call(s_spanGetLengthMethod);
Ceq();
BrtrueFar(returnLabel);
// runtextpos += 1
Ldthis();
Ldthisfld(s_runtextposField);
Ldc(1);
Add();
Stfld(s_runtextposField);
// End loop body.
BrFar(whileLoopBody);
// return;
MarkLabel(returnLabel);
Ret();
}
private void InitializeCultureForGoIfNecessary()
{
_textInfo = null;
......
......@@ -324,15 +324,46 @@ private bool MatchRef(int index, int length, ReadOnlySpan<char> inputSpan)
private void Backwardnext() => runtextpos += _rightToLeft ? 1 : -1;
protected override bool FindFirstChar() =>
_code.FindOptimizations.TryFindNextStartingPosition(runtext!, ref runtextpos, runtextbeg, runtextstart, runtextend);
protected internal override void Scan(ReadOnlySpan<char> text)
{
Debug.Assert(runregex is not null);
Debug.Assert(runtrack is not null);
Debug.Assert(runstack is not null);
Debug.Assert(runcrawl is not null);
// Configure the additional value to "bump" the position along each time we loop around
// to call TryFindNextStartingPosition again, as well as the stopping position for the loop. We generally
// bump by 1 and stop at textend, but if we're examining right-to-left, we instead bump
// by -1 and stop at textbeg.
int bump = 1, stoppos = text.Length;
if (runregex.RightToLeft)
{
bump = -1;
stoppos = 0;
}
while (_code.FindOptimizations.TryFindNextStartingPosition(text, ref runtextpos, runtextbeg, runtextstart, runtextend))
{
CheckTimeout();
if (TryMatchAtCurrentPosition(text) || runtextpos == stoppos)
{
return;
}
// Reset state for another iteration.
runtrackpos = runtrack.Length;
runstackpos = runstack.Length;
runcrawlpos = runcrawl.Length;
runtextpos += bump;
}
}
protected override void Go()
private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
{
SetOperator((RegexOpcode)_code.Codes[0]);
_codepos = 0;
int advance = -1;
ReadOnlySpan<char> inputSpan = runtext;
while (true)
{
......@@ -354,7 +385,7 @@ protected override void Go()
switch (_operator)
{
case RegexOpcode.Stop:
return;
return runmatch!.FoundMatch;
case RegexOpcode.Nothing:
break;
......@@ -711,7 +742,7 @@ protected override void Go()
continue;
case RegexOpcode.Boundary:
if (!IsBoundary(runtextpos, runtextbeg, runtextend))
if (!IsBoundary(inputSpan, runtextpos))
{
break;
}
......@@ -719,7 +750,7 @@ protected override void Go()
continue;
case RegexOpcode.NonBoundary:
if (IsBoundary(runtextpos, runtextbeg, runtextend))
if (IsBoundary(inputSpan, runtextpos))
{
break;
}
......@@ -727,7 +758,7 @@ protected override void Go()
continue;
case RegexOpcode.ECMABoundary:
if (!IsECMABoundary(runtextpos, runtextbeg, runtextend))
if (!IsECMABoundary(inputSpan, runtextpos))
{
break;
}
......@@ -735,7 +766,7 @@ protected override void Go()
continue;
case RegexOpcode.NonECMABoundary:
if (IsECMABoundary(runtextpos, runtextbeg, runtextend))
if (IsECMABoundary(inputSpan, runtextpos))
{
break;
}
......
......@@ -24,7 +24,7 @@ internal sealed class RegexLWCGCompiler : RegexCompiler
private static readonly bool s_includePatternInName = Environment.GetEnvironmentVariable(IncludePatternInNamesEnvVar) == "1";
/// <summary>Parameter types for the generated Go and FindFirstChar methods.</summary>
private static readonly Type[] s_paramTypes = new Type[] { typeof(RegexRunner) };
private static readonly Type[] s_paramTypes = new Type[] { typeof(RegexRunner), typeof(ReadOnlySpan<char>) };
/// <summary>Id number to use for the next compiled regex.</summary>
private static int s_regexCount;
......@@ -52,17 +52,20 @@ internal sealed class RegexLWCGCompiler : RegexCompiler
description = string.Concat("_", pattern.Length > DescriptionLimit ? pattern.AsSpan(0, DescriptionLimit) : pattern);
}
DynamicMethod findFirstCharMethod = DefineDynamicMethod($"Regex{regexNum}_FindFirstChar{description}", typeof(bool), typeof(CompiledRegexRunner));
EmitFindFirstChar();
DynamicMethod tryfindNextPossibleStartPositionMethod = DefineDynamicMethod($"Regex{regexNum}_TryFindNextPossibleStartingPosition{description}", typeof(bool), typeof(CompiledRegexRunner), s_paramTypes);
EmitTryFindNextPossibleStartingPosition();
DynamicMethod goMethod = DefineDynamicMethod($"Regex{regexNum}_Go{description}", null, typeof(CompiledRegexRunner));
EmitGo();
DynamicMethod tryMatchAtCurrentPositionMethod = DefineDynamicMethod($"Regex{regexNum}_TryMatchAtCurrentPosition{description}", typeof(bool), typeof(CompiledRegexRunner), s_paramTypes);
EmitTryMatchAtCurrentPosition();
return new CompiledRegexRunnerFactory(goMethod, findFirstCharMethod, code.TrackCount);
DynamicMethod scanMethod = DefineDynamicMethod($"Regex{regexNum}_Scan{description}", null, typeof(CompiledRegexRunner), new[] { typeof(RegexRunner), typeof(ReadOnlySpan<char>) });
EmitScan(tryfindNextPossibleStartPositionMethod, tryMatchAtCurrentPositionMethod);
return new CompiledRegexRunnerFactory(scanMethod);
}
/// <summary>Begins the definition of a new method (no args) with a specified return value.</summary>
private DynamicMethod DefineDynamicMethod(string methname, Type? returntype, Type hostType)
private DynamicMethod DefineDynamicMethod(string methname, Type? returntype, Type hostType, Type[] paramTypes)
{
// We're claiming that these are static methods, but really they are instance methods.
// By giving them a parameter which represents "this", we're tricking them into
......@@ -71,7 +74,7 @@ private DynamicMethod DefineDynamicMethod(string methname, Type? returntype, Typ
const MethodAttributes Attribs = MethodAttributes.Public | MethodAttributes.Static;
const CallingConventions Conventions = CallingConventions.Standard;
var dm = new DynamicMethod(methname, Attribs, Conventions, returntype, s_paramTypes, hostType, skipVisibility: false);
var dm = new DynamicMethod(methname, Attribs, Conventions, returntype, paramTypes, hostType, skipVisibility: false);
_ilg = dm.GetILGenerator();
return dm;
}
......
......@@ -88,23 +88,16 @@ internal Runner(SymbolicRegexMatcher<TSetType> matcher)
_perThreadData = matcher.CreatePerThreadData();
}
protected override void InitTrackCount() { } // nop, no backtracking
protected override bool FindFirstChar() => true; // The logic is all in Go.
protected override void Go()
protected internal override void Scan(ReadOnlySpan<char> text)
{
int beginning = runtextbeg;
ReadOnlySpan<char> inputSpan = runtext.AsSpan(beginning, runtextend - beginning);
// Perform the match.
SymbolicMatch pos = _matcher.FindMatch(quick, inputSpan, runtextpos - beginning, _perThreadData);
SymbolicMatch pos = _matcher.FindMatch(quick, text, runtextpos, _perThreadData);
// Transfer the result back to the RegexRunner state.
if (pos.Success)
{
// If we successfully matched, capture the match, and then jump the current position to the end of the match.
int start = pos.Index + beginning;
int start = pos.Index;
int end = start + pos.Length;
if (!quick && pos.CaptureStarts != null)
{
......@@ -115,7 +108,7 @@ protected override void Go()
if (pos.CaptureStarts[cap] >= 0)
{
Debug.Assert(pos.CaptureEnds[cap] >= pos.CaptureStarts[cap]);
Capture(cap, pos.CaptureStarts[cap] + beginning, pos.CaptureEnds[cap] + beginning);
Capture(cap, pos.CaptureStarts[cap], pos.CaptureEnds[cap]);
}
}
}
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Threading;
using Xunit;
namespace System.Text.RegularExpressions.Tests
{
public class CustomDerivedRegexScenarioTest
{
[Fact]
public void CallProtectedScanMethodFromCustomDerivedRegex()
{
CustomDerivedRegex regex = new();
Assert.True(regex.CallScanDirectly(regex, "3456", 0, 4, 0, -1, false).Success);
Assert.False(regex.CallScanDirectly(regex, "456", 0, 3, 0, -1, false).Success);
Assert.Equal("45", regex.CallScanDirectly(regex, "45456", 0, 5, 0, -1, false).Value);
Assert.Equal("896", regex.CallScanDirectly(regex, "45896456", 0, 8, 2, -1, false).Value);
Assert.Equal(Match.Empty, regex.CallScanDirectly(regex, "I dont match", 0, 12, 0, -1, false));
Assert.Null(regex.CallScanDirectly(regex, "3456", 0, 4, 0, -1, true));
}
}
/// <summary>
/// This type was generated using an earlier version of the Regex Source Generator which still overrides Go and FindFirstChar.
/// The purpose of this class is to validate that if a derived RegexRunner is invoking the protected Scan methods, they should call
/// the overridden Go and FindFirstChar methods and return the expected results.
/// </summary>
internal class CustomDerivedRegex : Regex
{
private CustomRegexRunnerFactory.CustomRegexRunner runner;
public CustomDerivedRegex()
{
pattern = /*lang=regex*/@"\G(\d{1,3})(?=(?:\d{3})+\b)";
roptions = RegexOptions.Compiled;
internalMatchTimeout = Timeout.InfiniteTimeSpan;
factory = new CustomRegexRunnerFactory();
capsize = 2;
MethodInfo createRunnerMethod = typeof(Regex).GetMethod("CreateRunner", BindingFlags.Instance | BindingFlags.NonPublic);
runner = createRunnerMethod.Invoke(this, new object[] { }) as CustomRegexRunnerFactory.CustomRegexRunner;
}
public Match? CallScanDirectly(Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick)
=> runner.CallScanDirectly(regex, text, textbeg, textend, textstart, prevlen, quick);
internal class CustomRegexRunnerFactory : RegexRunnerFactory
{
protected override RegexRunner CreateInstance() => new CustomRegexRunner();
internal class CustomRegexRunner : RegexRunner
{
public Match? CallScanDirectly(Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick)
=> Scan(regex, text, textbeg, textend, textstart, prevlen, quick);
protected override void InitTrackCount() => base.runtrackcount = 12;
// Description:
// ○ Match if at the start position.
// ○ 1st capture group.
// ○ Match a Unicode digit greedily at least 1 and at most 3 times.
// ○ Zero-width positive lookahead assertion.
// ○ Loop greedily at least once.
// ○ Match a Unicode digit exactly 3 times.
// ○ Match if at a word boundary.
protected override bool FindFirstChar()
{
int pos = runtextpos, end = runtextend;
if (pos < end)
{
// Start \G anchor
if (pos > runtextstart)
{
goto NoStartingPositionFound;
}
return true;
}
// No starting position found
NoStartingPositionFound:
runtextpos = end;
return false;
}
protected override void Go()
{
ReadOnlySpan<char> inputSpan = runtext.AsSpan();
int pos = base.runtextpos, end = base.runtextend;
int original_pos = pos;
int charloop_starting_pos = 0, charloop_ending_pos = 0;
int loop_iteration = 0, loop_starting_pos = 0;
int stackpos = 0;
int start = base.runtextstart;
ReadOnlySpan<char> slice = inputSpan.Slice(pos, end - pos);
// Match if at the start position.
{
if (pos != start)
{
goto NoMatch;
}
}
// 1st capture group.
//{
int capture_starting_pos = pos;
// Match a Unicode digit greedily at least 1 and at most 3 times.
//{
charloop_starting_pos = pos;
int iteration = 0;
while (iteration < 3 && (uint)iteration < (uint)slice.Length && char.IsDigit(slice[iteration]))
{
iteration++;
}
if (iteration == 0)
{
goto NoMatch;
}
slice = slice.Slice(iteration);
pos += iteration;
charloop_ending_pos = pos;
charloop_starting_pos++;
goto CharLoopEnd;
CharLoopBacktrack:
UncaptureUntil(base.runstack![--stackpos]);
StackPop2(base.runstack, ref stackpos, out charloop_ending_pos, out charloop_starting_pos);
if (charloop_starting_pos >= charloop_ending_pos)
{
goto NoMatch;
}
pos = --charloop_ending_pos;
slice = inputSpan.Slice(pos, end - pos);
CharLoopEnd:
StackPush3(ref base.runstack!, ref stackpos, charloop_starting_pos, charloop_ending_pos, base.Crawlpos());
//}
base.Capture(1, capture_starting_pos, pos);
StackPush1(ref base.runstack!, ref stackpos, capture_starting_pos);
goto SkipBacktrack;
CaptureBacktrack:
capture_starting_pos = base.runstack![--stackpos];
goto CharLoopBacktrack;
SkipBacktrack:;
//}
// Zero-width positive lookahead assertion.
{
int positivelookahead_starting_pos = pos;
// Loop greedily at least once.
//{
loop_iteration = 0;
loop_starting_pos = pos;
LoopBody:
StackPush3(ref base.runstack!, ref stackpos, base.Crawlpos(), loop_starting_pos, pos);
loop_starting_pos = pos;
loop_iteration++;
// Match a Unicode digit exactly 3 times.
{
if ((uint)slice.Length < 3 ||
!char.IsDigit(slice[0]) ||
!char.IsDigit(slice[1]) ||
!char.IsDigit(slice[2]))
{
goto LoopIterationNoMatch;
}
}
pos += 3;
slice = slice.Slice(3);
if (pos != loop_starting_pos || loop_iteration == 0)
{
goto LoopBody;
}
goto LoopEnd;
LoopIterationNoMatch:
loop_iteration--;
if (loop_iteration < 0)
{
goto CaptureBacktrack;
}
StackPop2(base.runstack, ref stackpos, out pos, out loop_starting_pos);
UncaptureUntil(base.runstack![--stackpos]);
slice = inputSpan.Slice(pos, end - pos);
if (loop_iteration == 0)
{
goto CaptureBacktrack;
}
if (loop_iteration == 0)
{
goto CaptureBacktrack;
}
LoopEnd:;
//}
// Match if at a word boundary.
{
if (!base.IsBoundary(pos, base.runtextbeg, end))
{
goto LoopIterationNoMatch;
}
}
pos = positivelookahead_starting_pos;
slice = inputSpan.Slice(pos, end - pos);
}
// The input matched.
base.runtextpos = pos;
base.Capture(0, original_pos, pos);
return;
// The input didn't match.
NoMatch:
UncaptureUntil(0);
return;
// <summary>Pop 2 values from the backtracking stack.</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void StackPop2(int[] stack, ref int pos, out int arg0, out int arg1)
{
arg0 = stack[--pos];
arg1 = stack[--pos];
}
// <summary>Push 1 value onto the backtracking stack.</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void StackPush1(ref int[] stack, ref int pos, int arg0)
{
// If there's space available for the value, store it.
int[] s = stack;
int p = pos;
if ((uint)p < (uint)s.Length)
{
s[p] = arg0;
pos++;
return;
}
// Otherwise, resize the stack to make room and try again.
WithResize(ref stack, ref pos, arg0);
// <summary>Resize the backtracking stack array and push 1 value onto the stack.</summary>
[MethodImpl(MethodImplOptions.NoInlining)]
static void WithResize(ref int[] stack, ref int pos, int arg0)
{
Array.Resize(ref stack, (pos + 0) * 2);
StackPush1(ref stack, ref pos, arg0);
}
}
// <summary>Push 3 values onto the backtracking stack.</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void StackPush3(ref int[] stack, ref int pos, int arg0, int arg1, int arg2)
{
// If there's space available for all 3 values, store them.
int[] s = stack;
int p = pos;
if ((uint)(p + 2) < (uint)s.Length)
{
s[p] = arg0;
s[p + 1] = arg1;
s[p + 2] = arg2;
pos += 3;
return;
}
// Otherwise, resize the stack to make room and try again.
WithResize(ref stack, ref pos, arg0, arg1, arg2);
// <summary>Resize the backtracking stack array and push 3 values onto the stack.</summary>
[MethodImpl(MethodImplOptions.NoInlining)]
static void WithResize(ref int[] stack, ref int pos, int arg0, int arg1, int arg2)
{
Array.Resize(ref stack, (pos + 2) * 2);
StackPush3(ref stack, ref pos, arg0, arg1, arg2);
}
}
// <summary>Undo captures until we reach the specified capture position.</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void UncaptureUntil(int capturepos)
{
while (base.Crawlpos() > capturepos)
{
base.Uncapture();
}
}
}
}
}
}
}
......@@ -14,16 +14,89 @@ namespace System.Text.RegularExpressions.Tests
{
public class PrecompiledRegexScenarioTest
{
const string text = "asdf134success1245something";
const string textWithMultipleMatches = @"asdf134success1245something
bsdf135success1245somethingelse
csdf136success2245somethingnew
dsdf137success3245somethingold";
[Fact]
public void TestPrecompiledRegex()
public void PrecompiledRegex_MatchesTest()
{
string text = "asdf134success1245something";
string[] expectedMatches = textWithMultipleMatches.Split(Environment.NewLine);
RegexTestClass testClass = new RegexTestClass();
// Test Matches overloads
Assert.Equal(1, testClass.Matches(text).Count);
Assert.Equal(0, testClass.Matches(text, startat: 7).Count);
MatchCollection multipleMatches = testClass.Matches(textWithMultipleMatches);
Assert.Equal(4, multipleMatches.Count);
for (int i = 0; i < expectedMatches.Length; i++)
{
Assert.Equal(expectedMatches[i], multipleMatches[i].Value.Trim()); // Calling Trim since the match will contain the new line as part of the match.
}
}
[Fact]
public void PrecompiledRegex_MatchTest()
{
RegexTestClass testClass = new RegexTestClass();
Assert.Equal(1, testClass.Match(text).Groups[0].Captures.Count);
Assert.Equal(Match.Empty, testClass.Match(text, beginning: 7, length: text.Length - 7));
Assert.Equal(5, testClass.Match(text, beginning: 5, length: text.Length - 5).Index);
Assert.False(testClass.Match("asdf134succes1245somethingasdf134success1245something", 0, 27).Success); // The first 27 characters shouldn't match.
Assert.True(testClass.Match("asdf134succes1245somethingasdf134success1245something", 26, 27).Success); // The last 27 characters should match.
Assert.Equal(Match.Empty, testClass.Match(text, startat: 7));
Assert.Equal(6, testClass.Match(text, startat: 6).Index);
}
[Fact]
public void PrecompiledRegex_ReplaceTest()
{
RegexTestClass testClass = new RegexTestClass();
Assert.Equal("4success", testClass.Replace(text, "$1${output}"));
Assert.Equal("4success", testClass.Replace(text, (match) =>
{
return $"{match.Groups[1]}{match.Groups["output"]}";
}));
Assert.Equal("4success\n5success\n6success\n7success", testClass.Replace(textWithMultipleMatches, "$1${output}"));
}
[Fact]
public void PrecompiledRegex_SplitTest()
{
RegexTestClass testClass = new RegexTestClass();
Assert.Equal(new[] { "", "4", "success", "\n", "5", "success", "\n", "6", "success", "\n", "7", "success", "" }, testClass.Split(textWithMultipleMatches));
Assert.Equal(new[] { "", "4", "success", $"\nbsdf135success1245somethingelse{Environment.NewLine}csdf136success2245somethingnew{Environment.NewLine}dsdf137success3245somethingold" }, testClass.Split(textWithMultipleMatches, 2));
}
[Fact]
public void PrecompiledRegex_CountTest()
{
RegexTestClass testClass = new RegexTestClass();
Assert.Equal(4, testClass.Count(textWithMultipleMatches));
Assert.Equal(4, testClass.Count(textWithMultipleMatches));
}
[Fact]
public void PrecompiledRegex_ThrowsWhenSpanIsMatchIsCalled()
{
RegexTestClass testClass = new RegexTestClass();
Assert.Throws<NotSupportedException>(() => testClass.IsMatch(text.AsSpan()));
}
[Fact]
public void PrecompiledRegex_Groups()
{
RegexTestClass testClass = new RegexTestClass();
Assert.Equal(text, testClass.Match(text).Groups[0].Value);
Assert.Equal(new int[] { 0, 1, 2}, testClass.GetGroupNumbers());
Assert.Equal(new int[] { 0, 1, 2 }, testClass.GetGroupNumbers());
Assert.Equal(new string[] { "0", "1", "output" }, testClass.GetGroupNames());
}
}
......@@ -39,8 +112,8 @@ public RegexTestClass()
roptions = RegexOptions.IgnoreCase;
internalMatchTimeout = TimeSpan.FromTicks(-10000L);
factory = new RegexFactoryTestClass();
Caps = new Hashtable {{0, 0}, {1, 1}, {2, 2}};
CapNames = new Hashtable {{"0", 0}, {"1", 1}, {"output", 2}};
Caps = new Hashtable { { 0, 0 }, { 1, 1 }, { 2, 2 } };
CapNames = new Hashtable { { "0", 0 }, { "1", 1 }, { "output", 2 } };
capslist = new string[3];
capslist[0] = "0";
capslist[1] = "1";
......@@ -139,7 +212,7 @@ protected override void Go()
}
}
}
IL_441:
IL_441:
while (true)
{
this.runtrackpos = num2;
......@@ -169,7 +242,7 @@ protected override void Go()
}
goto IL_49E;
}
IL_4C7:
IL_4C7:
this.CheckTimeout();
num = runtrack[num2++];
num4 = runtrack[num2++];
......@@ -181,7 +254,7 @@ protected override void Go()
continue;
}
continue;
IL_51D:
IL_51D:
this.CheckTimeout();
num = runtrack[num2++];
num4 = runtrack[num2++];
......@@ -191,7 +264,7 @@ protected override void Go()
runtrack[--num2] = num - 1;
runtrack[--num2] = 3;
}
IL_204:
IL_204:
this.CheckTimeout();
num4 = runstack[num3++];
this.Capture(1, num4, num);
......@@ -234,21 +307,21 @@ protected override void Go()
runtrack[--num2] = num - 1;
runtrack[--num2] = 5;
}
IL_3FC:
IL_3FC:
this.CheckTimeout();
num4 = runstack[num3++];
this.Capture(0, num4, num);
runtrack[--num2] = num4;
runtrack[num2 - 1] = 4;
IL_432:
IL_432:
this.CheckTimeout();
this.runtextpos = num;
return;
IL_49E:
IL_49E:
this.CheckTimeout();
num = runtrack[num2++];
goto IL_432;
IL_598:
IL_598:
this.CheckTimeout();
num = runtrack[num2++];
num4 = runtrack[num2++];
......@@ -280,10 +353,10 @@ protected override bool FindFirstChar()
while (num2 > 0);
bool arg_74_0 = false;
goto IL_6C;
IL_63:
IL_63:
num--;
arg_74_0 = true;
IL_6C:
IL_6C:
this.runtextpos = num;
return arg_74_0;
}
......
......@@ -840,7 +840,7 @@ public void Match(RegexEngine engine, string pattern, string input, RegexOptions
if (isDefaultStart && isDefaultCount)
{
VerifyMatch(r.Match(input));
Assert.Equal(expectedSuccess, r.IsMatch(input));
VerifyIsMatch(r, input, expectedSuccess, Regex.InfiniteMatchTimeout);
}
if (beginning + length == input.Length && (options & RegexOptions.RightToLeft) == 0)
{
......@@ -857,7 +857,7 @@ public void Match(RegexEngine engine, string pattern, string input, RegexOptions
case RegexEngine.Compiled:
case RegexEngine.NonBacktracking:
VerifyMatch(Regex.Match(input, pattern, options | RegexHelpers.OptionsFromEngine(engine)));
Assert.Equal(expectedSuccess, Regex.IsMatch(input, pattern, options | RegexHelpers.OptionsFromEngine(engine)));
VerifyIsMatch(null, input, expectedSuccess, Regex.InfiniteMatchTimeout, pattern, options | RegexHelpers.OptionsFromEngine(engine));
break;
}
}
......@@ -1036,9 +1036,9 @@ public void Match_DefaultTimeout_Throws(RegexOptions options)
public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options)
{
const string PatternLeadingToLotsOfBacktracking = @"^(\w+\s?)*$";
Assert.True(Regex.IsMatch("", PatternLeadingToLotsOfBacktracking, options, TimeSpan.FromDays(1)));
VerifyIsMatch(null, "", true, TimeSpan.FromDays(1), PatternLeadingToLotsOfBacktracking, options);
var sw = Stopwatch.StartNew();
Assert.Throws<RegexMatchTimeoutException>(() => Regex.IsMatch("An input string that takes a very very very very very very very very very very very long time!", PatternLeadingToLotsOfBacktracking, options, TimeSpan.FromMilliseconds(1)));
VerifyIsMatchThrows<RegexMatchTimeoutException>(null, "An input string that takes a very very very very very very very very very very very long time!", TimeSpan.FromMilliseconds(1), PatternLeadingToLotsOfBacktracking, options);
Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout
}
......@@ -1408,7 +1408,7 @@ public async Task Match_Advanced(RegexEngine engine, string pattern, string inpu
VerifyMatch(r.Match(input));
VerifyMatch(Regex.Match(input, pattern, options));
Assert.True(Regex.IsMatch(input, pattern, options));
VerifyIsMatch(null, input, true, Regex.InfiniteMatchTimeout, pattern, options);
}
if (beginning + length == input.Length)
......@@ -1561,9 +1561,9 @@ public void Match_ExcessPrefix(RegexEngine engine)
// Should not throw out of memory
// Repeaters
Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{2147483647,}")).IsMatch("a"));
Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50,}")).IsMatch("a"));
Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50_000,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in release
VerifyIsMatch((await RegexHelpers.GetRegexAsync(engine, @"a{2147483647,}")), "a", false, Regex.InfiniteMatchTimeout);
VerifyIsMatch((await RegexHelpers.GetRegexAsync(engine, @"a{50,}")), "a", false, Regex.InfiniteMatchTimeout);
VerifyIsMatch((await RegexHelpers.GetRegexAsync(engine, @"a{50_000,}")), "a", false, Regex.InfiniteMatchTimeout); // cutoff for Boyer-Moore prefix in release
// Multis
foreach (int length in new[] { 50, 50_000, char.MaxValue + 1 })
......@@ -1575,7 +1575,7 @@ public void Match_ExcessPrefix(RegexEngine engine)
if (!RegexHelpers.IsNonBacktracking(engine) || length < 50_000)
{
string s = "bcd" + new string('a', length) + "efg";
Assert.True((await RegexHelpers.GetRegexAsync(engine, @$"a{{{length}}}")).IsMatch(s));
VerifyIsMatch((await RegexHelpers.GetRegexAsync(engine, @$"a{{{length}}}")), s, true, Regex.InfiniteMatchTimeout);
}
}
}, engine.ToString()).Dispose();
......@@ -1625,9 +1625,9 @@ public void IsMatch_Invalid()
AssertExtensions.Throws<ArgumentNullException>("input", () => r.IsMatch(null, 0));
// Pattern is null
AssertExtensions.Throws<ArgumentNullException>("pattern", () => Regex.IsMatch("input", null));
AssertExtensions.Throws<ArgumentNullException>("pattern", () => Regex.IsMatch("input", null, RegexOptions.None));
AssertExtensions.Throws<ArgumentNullException>("pattern", () => Regex.IsMatch("input", null, RegexOptions.None, TimeSpan.FromSeconds(1)));
VerifyIsMatchThrows<ArgumentNullException>(null, "input", Regex.InfiniteMatchTimeout, pattern: null);
VerifyIsMatchThrows<ArgumentNullException>(null, "input", Regex.InfiniteMatchTimeout, pattern: null, RegexOptions.None);
VerifyIsMatchThrows<ArgumentNullException>(null, "input", TimeSpan.FromSeconds(1), pattern: null, RegexOptions.None);
// Start is invalid
Assert.Throws<ArgumentOutOfRangeException>(() => r.IsMatch("input", -1));
......@@ -1650,7 +1650,7 @@ public static IEnumerable<object[]> IsMatch_SucceedQuicklyDueToLoopReduction_Mem
public async Task IsMatch_SucceedQuicklyDueToLoopReduction(RegexEngine engine, string pattern, string input, bool expected)
{
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern);
Assert.Equal(expected, r.IsMatch(input));
VerifyIsMatch(r, input, expected, Regex.InfiniteMatchTimeout);
}
[Theory]
......@@ -1660,6 +1660,10 @@ public async Task TestCharIsLowerCultureEdgeCasesAroundTurkishCharacters(RegexEn
Regex r1 = await RegexHelpers.GetRegexAsync(engine, "[\u012F-\u0130]", RegexOptions.IgnoreCase);
Regex r2 = await RegexHelpers.GetRegexAsync(engine, "[\u012F\u0130]", RegexOptions.IgnoreCase);
Assert.Equal(r1.IsMatch("\u0130"), r2.IsMatch("\u0130"));
#if NET7_0_OR_GREATER
Assert.Equal(r1.IsMatch("\u0130".AsSpan()), r2.IsMatch("\u0130".AsSpan()));
#endif
}
[Fact]
......@@ -1688,8 +1692,8 @@ public void Synchronized()
public async Task Match_Boundary(RegexEngine engine)
{
Regex r = await RegexHelpers.GetRegexAsync(engine, @"\b\w+\b");
Assert.False(r.IsMatch(" AB\u200cCD "));
Assert.False(r.IsMatch(" AB\u200dCD "));
VerifyIsMatch(r, " AB\u200cCD ", false, Regex.InfiniteMatchTimeout);
VerifyIsMatch(r, " AB\u200dCD ", false, Regex.InfiniteMatchTimeout);
}
public static IEnumerable<object[]> Match_Count_TestData()
......@@ -2003,11 +2007,56 @@ public async Task StandardCharSets_SameMeaningAcrossAllEngines(string singleChar
bool baseline = regexes[0].IsMatch(s);
for (int i = 1; i < regexes.Count; i++)
{
Assert.Equal(baseline, regexes[i].IsMatch(s));
VerifyIsMatch(regexes[i], s, baseline, Regex.InfiniteMatchTimeout);
}
}
}
private static void VerifyIsMatchThrows<T>(Regex? r, string input, TimeSpan timeout, string? pattern = null, RegexOptions options = RegexOptions.None)
where T : Exception
{
if (r == null)
{
Assert.Throws<T>(() => timeout == Regex.InfiniteMatchTimeout ? Regex.IsMatch(input, pattern, options) : Regex.IsMatch(input, pattern, options, timeout));
#if NET7_0_OR_GREATER
Assert.Throws<T>(() => timeout == Regex.InfiniteMatchTimeout ? Regex.IsMatch(input.AsSpan(), pattern, options) : Regex.IsMatch(input.AsSpan(), pattern, options, timeout));
#endif
}
else
{
Assert.Throws<T>(() => r.IsMatch(input));
#if NET7_0_OR_GREATER
Assert.Throws<T>(() => r.IsMatch(input.AsSpan()));
#endif
}
}
private static void VerifyIsMatch(Regex? r, string input, bool expected, TimeSpan timeout, string? pattern = null, RegexOptions options = RegexOptions.None)
{
if (r == null)
{
Assert.Equal(expected, timeout == Regex.InfiniteMatchTimeout ? Regex.IsMatch(input, pattern, options) : Regex.IsMatch(input, pattern, options, timeout));
if (options == RegexOptions.None)
{
Assert.Equal(expected, Regex.IsMatch(input, pattern));
}
#if NET7_0_OR_GREATER
Assert.Equal(expected, timeout == Regex.InfiniteMatchTimeout ? Regex.IsMatch(input.AsSpan(), pattern, options) : Regex.IsMatch(input.AsSpan(), pattern, options, timeout));
if (options == RegexOptions.None)
{
Assert.Equal(expected, Regex.IsMatch(input.AsSpan(), pattern));
}
#endif
}
else
{
Assert.Equal(expected, r.IsMatch(input));
#if NET7_0_OR_GREATER
Assert.Equal(expected, r.IsMatch(input.AsSpan()));
#endif
}
}
public static IEnumerable<object[]> Match_DisjunctionOverCounting_TestData()
{
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Reflection;
using System.Threading.Tasks;
using Xunit;
namespace System.Text.RegularExpressions.Tests
{
public class RegexRunnerTests
{
[Theory]
[MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
public async Task EnginesThrowNotImplementedForGoAndFFC(RegexEngine engine)
{
Regex re = await RegexHelpers.GetRegexAsync(engine, /*lang=regex*/@"abc");
// Use reflection to ensure the runner is created so it can be fetched.
MethodInfo createRunnerMethod = typeof(Regex).GetMethod("CreateRunner", BindingFlags.Instance | BindingFlags.NonPublic);
RegexRunner runner = createRunnerMethod.Invoke(re, new object[] { }) as RegexRunner;
// Use reflection to call Go and FFC and ensure it throws NotImplementedException
MethodInfo goMethod = typeof(RegexRunner).GetMethod("Go", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo ffcMethod = typeof(RegexRunner).GetMethod("FindFirstChar", BindingFlags.Instance | BindingFlags.NonPublic);
// FindFirstChar and Go methods should not be implemented since built-in engines should be overriding and using Scan instead.
TargetInvocationException goInvocationException = Assert.Throws<TargetInvocationException>(() => goMethod.Invoke(runner, new object[] { }));
Assert.Equal(typeof(NotImplementedException), goInvocationException.InnerException.GetType());
TargetInvocationException ffcInvocationException = Assert.Throws<TargetInvocationException>(() => ffcMethod.Invoke(runner, new object[] { }));
Assert.Equal(typeof(NotImplementedException), ffcInvocationException.InnerException.GetType());
}
[Theory]
[MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
public async Task EnsureRunmatchValueIsNulledAfterIsMatch(RegexEngine engine)
{
Regex re = await RegexHelpers.GetRegexAsync(engine, /*lang=regex*/@"abc");
// First call IsMatch which should initialize runmatch on the runner.
Assert.True(re.IsMatch("abcabcabc"));
// Ensure runmatch wasn't nulled out, since after calling IsMatch it should be reused.
FieldInfo runnerField = typeof(Regex).GetField("_runner", BindingFlags.Instance | BindingFlags.NonPublic);
RegexRunner runner = runnerField.GetValue(re) as RegexRunner;
FieldInfo runmatchField = typeof(RegexRunner).GetField("runmatch", BindingFlags.Instance | BindingFlags.NonPublic);
Match runmatch = runmatchField.GetValue(runner) as Match;
Assert.NotNull(runmatch);
// Ensure that the Value of runmatch was nulled out, so as to not keep a reference to it in a cache.
MethodInfo getTextMethod = typeof(Match).GetMethod("get_Text", BindingFlags.Instance | BindingFlags.NonPublic);
Assert.Null(getTextMethod.Invoke(runmatch, new object[] { }));
Assert.Equal(string.Empty, runmatch.Value);
#if NET7_0_OR_GREATER
Assert.True(runmatch.ValueSpan == ReadOnlySpan<char>.Empty);
#endif
}
}
}
......@@ -41,6 +41,8 @@
<PackageReference Include="System.Text.Json" Version="$(SystemTextJsonVersion)" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)' == '$(NetCoreAppCurrent)'">
<Compile Include="CustomDerivedRegexScenarioTest.cs" />
<Compile Include="RegexRunnerTests.cs" />
<Compile Include="Regex.Count.Tests.cs" />
<Compile Include="RegexAssert.netcoreapp.cs" />
<Compile Include="RegexParserTests.netcoreapp.cs" />
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册