未验证 提交 6ebcb3d6 编写于 作者: O Olli Saarikivi 提交者: GitHub

NonBacktracking locking fixes and cleanup (#71234)

* Concurrency fixes and refactoring for clarity

Removed builder reference from SymbolicRegexNode instances; builder now
has to be passed in. Since the builder is not thread safe this clarifies
the locking required in the matcher when using it.
Moved matching specific state from the builder to the matcher. This
includes state and transition arrays.
Simplify character kind code by eliminating duplication of logic.

* Changes from review and cleanup

DfaMatchingState is now just MatchingState

* Comment on NFA mode IDs
上级 8cd9d636
...@@ -63,7 +63,7 @@ ...@@ -63,7 +63,7 @@
<Compile Include="System\Text\RegularExpressions\Symbolic\CharKind.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\CharKind.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\CharSetSolver.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\CharSetSolver.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DerivativeEffect.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\DerivativeEffect.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DfaMatchingState.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\MatchingState.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DoublyLinkedList.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\DoublyLinkedList.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\ISolver.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\ISolver.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\MintermClassifier.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\MintermClassifier.cs" />
...@@ -75,6 +75,7 @@ ...@@ -75,6 +75,7 @@
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexNode.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexNode.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexKind.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexKind.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexInfo.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexInfo.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Automata.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Dgml.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Dgml.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Explore.cs" /> <Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Explore.cs" />
......
...@@ -43,5 +43,8 @@ internal static class CharKind ...@@ -43,5 +43,8 @@ internal static class CharKind
WordLetter => @"\w", WordLetter => @"\w",
_ => string.Empty, _ => string.Empty,
}; };
/// <summary>Returns whether the given value is in the range of valid character kinds.</summary>
internal static bool IsValidCharKind(uint charKind) => charKind < CharKindCount;
} }
} }
...@@ -8,135 +8,104 @@ ...@@ -8,135 +8,104 @@
namespace System.Text.RegularExpressions.Symbolic namespace System.Text.RegularExpressions.Symbolic
{ {
/// <summary>Captures a state of a DFA explored during matching.</summary> /// <summary>Captures a state explored during matching.</summary>
internal sealed class DfaMatchingState<TSet> where TSet : IComparable<TSet>, IEquatable<TSet> internal sealed class MatchingState<TSet> where TSet : IComparable<TSet>, IEquatable<TSet>
{ {
internal DfaMatchingState(SymbolicRegexNode<TSet> node, uint prevCharKind) internal MatchingState(SymbolicRegexNode<TSet> node, uint prevCharKind)
{ {
Node = node; Node = node;
PrevCharKind = prevCharKind; PrevCharKind = prevCharKind;
} }
/// <summary>The regular expression that labels this state and gives it its semantics.</summary>
internal SymbolicRegexNode<TSet> Node { get; } internal SymbolicRegexNode<TSet> Node { get; }
/// <summary>
/// The kind of the previous character in the input. The <see cref="SymbolicRegexMatcher{TSet}"/> is responsible
/// for ensuring that in all uses of this state this invariant holds by both selecting initial states accordingly
/// and transitioning on each character to states that match that character's kind.
/// </summary>
/// <remarks>
/// Tracking this information is an optimization that allows each transition taken in the matcher to only depend
/// on the next character (and its kind). In general, the transitions from a state with anchors in its pattern
/// depend on both the previous and the next character. Creating distinct states for each kind of the previous
/// character embeds the necessary information about the previous character into the state space of the automaton.
/// However, this does incur a memory overhead due to the duplication of states. For patterns with no anchors
/// this will always be set to <see cref="CharKind.General"/>, which can reduce the number of states created.
///
/// The performance effect of this optimization has not been investigated. If this optimization were removed, the
/// transition logic would in turn have to become more complicated for derivatives that depend on the nullability
/// of anchors. Care should be taken to not slow down transitions without anchors involved.
/// </remarks>
internal uint PrevCharKind { get; } internal uint PrevCharKind { get; }
/// <summary>
/// A unique identifier for this state, which is used in <see cref="SymbolicRegexMatcher{TSet}"/> to index into
/// state information and transition arrays. Valid IDs are always >= 1.
/// </summary>
internal int Id { get; set; } internal int Id { get; set; }
/// <summary>This is a deadend state</summary> /// <summary>Whether this state is known to be a dead end, i.e. no nullable states are reachable from here.</summary>
internal bool IsDeadend => Node.IsNothing; internal bool IsDeadend(ISolver<TSet> solver) => Node.IsNothing(solver);
/// <summary>The node must be nullable here</summary> /// <summary>
/// Returns the fixed length that any match ending with this state must have, or -1 if there is no such
/// fixed length, <see cref="SymbolicRegexNode{TSet}.ResolveFixedLength(uint)"/>. The context is defined
/// by <see cref="PrevCharKind"/> of this state and the given nextCharKind. The node must be nullable here.
/// </summary>
internal int FixedLength(uint nextCharKind) internal int FixedLength(uint nextCharKind)
{ {
Debug.Assert(nextCharKind is 0 or CharKind.BeginningEnd or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS); Debug.Assert(IsNullableFor(nextCharKind));
Debug.Assert(CharKind.IsValidCharKind(nextCharKind));
uint context = CharKind.Context(PrevCharKind, nextCharKind); uint context = CharKind.Context(PrevCharKind, nextCharKind);
return Node.ResolveFixedLength(context); return Node.ResolveFixedLength(context);
} }
/// <summary>If true then the state is a dead-end, rejects all inputs.</summary>
internal bool IsNothing => Node.IsNothing;
/// <summary>If true then state starts with a ^ or $ or \Z</summary> /// <summary>If true then state starts with a ^ or $ or \Z</summary>
internal bool StartsWithLineAnchor => Node._info.StartsWithLineAnchor; internal bool StartsWithLineAnchor => Node._info.StartsWithLineAnchor;
/// <summary>
/// Translates a minterm set to a character kind, which is a general categorization of characters used
/// for cheaply deciding the nullability of anchors.
/// </summary>
/// <remarks>
/// An empty set is handled as a special case to indicate the very last \n.
/// </remarks>
/// <param name="minterm">the minterm to translate</param>
/// <returns>the character kind of the minterm</returns>
private uint GetNextCharKind(ref TSet minterm)
{
ISolver<TSet> solver = Node._builder._solver;
TSet wordLetterPredicate = Node._builder._wordLetterForBoundariesSet;
TSet newLinePredicate = Node._builder._newLineSet;
// minterm == solver.False is used to represent the very last \n
uint nextCharKind = CharKind.General;
if (solver.Empty.Equals(minterm))
{
nextCharKind = CharKind.NewLineS;
minterm = newLinePredicate;
}
else if (newLinePredicate.Equals(minterm))
{
// If the previous state was the start state, mark this as the very FIRST \n.
// Essentially, this looks the same as the very last \n and is used to nullify
// rev(\Z) in the conext of a reversed automaton.
nextCharKind = PrevCharKind == CharKind.BeginningEnd ?
CharKind.NewLineS :
CharKind.Newline;
}
else if (!solver.IsEmpty(solver.And(wordLetterPredicate, minterm)))
{
nextCharKind = CharKind.WordLetter;
}
return nextCharKind;
}
/// <summary> /// <summary>
/// Compute the target state for the given input minterm. /// Compute the target state for the given input minterm.
/// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input. /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input.
/// </summary> /// </summary>
/// <param name="builder">the builder that owns <see cref="Node"/></param>
/// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param>
internal DfaMatchingState<TSet> Next(TSet minterm) /// <param name="nextCharKind"></param>
internal SymbolicRegexNode<TSet> Next(SymbolicRegexBuilder<TSet> builder, TSet minterm, uint nextCharKind)
{ {
uint nextCharKind = GetNextCharKind(ref minterm);
// Combined character context // Combined character context
uint context = CharKind.Context(PrevCharKind, nextCharKind); uint context = CharKind.Context(PrevCharKind, nextCharKind);
// Compute the derivative of the node for the given context // Compute the derivative of the node for the given context
SymbolicRegexNode<TSet> derivative = Node.CreateDerivativeWithoutEffects(minterm, context); return Node.CreateDerivativeWithoutEffects(builder, minterm, context);
// nextCharKind will be the PrevCharKind of the target state
// use an existing state instead if one exists already
// otherwise create a new new id for it
return Node._builder.CreateState(derivative, nextCharKind, capturing: false);
} }
/// <summary> /// <summary>
/// Compute a set of transitions for the given minterm. /// Compute a set of transitions for the given minterm.
/// </summary> /// </summary>
/// <param name="builder">the builder that owns <see cref="Node"/></param>
/// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param>
/// <param name="nextCharKind"></param>
/// <returns>an enumeration of the transitions as pairs of the target state and a list of effects to be applied</returns> /// <returns>an enumeration of the transitions as pairs of the target state and a list of effects to be applied</returns>
internal List<(DfaMatchingState<TSet> State, DerivativeEffect[] Effects)> NfaNextWithEffects(TSet minterm) internal List<(SymbolicRegexNode<TSet> Node, DerivativeEffect[] Effects)> NfaNextWithEffects(SymbolicRegexBuilder<TSet> builder, TSet minterm, uint nextCharKind)
{ {
uint nextCharKind = GetNextCharKind(ref minterm);
// Combined character context // Combined character context
uint context = CharKind.Context(PrevCharKind, nextCharKind); uint context = CharKind.Context(PrevCharKind, nextCharKind);
// Compute the transitions for the given context // Compute the transitions for the given context
List<(SymbolicRegexNode<TSet>, DerivativeEffect[])> nodesAndEffects = Node.CreateNfaDerivativeWithEffects(minterm, context); return Node.CreateNfaDerivativeWithEffects(builder, minterm, context);
var list = new List<(DfaMatchingState<TSet> State, DerivativeEffect[] Effects)>();
foreach ((SymbolicRegexNode<TSet> node, DerivativeEffect[]? effects) in nodesAndEffects)
{
// nextCharKind will be the PrevCharKind of the target state
// use an existing state instead if one exists already
// otherwise create a new new id for it
DfaMatchingState<TSet> state = Node._builder.CreateState(node, nextCharKind, capturing: true);
if (!state.IsDeadend)
list.Add((state, effects));
}
return list;
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool IsNullableFor(uint nextCharKind) internal bool IsNullableFor(uint nextCharKind)
{ {
Debug.Assert(nextCharKind is 0 or CharKind.BeginningEnd or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS); Debug.Assert(CharKind.IsValidCharKind(nextCharKind));
uint context = CharKind.Context(PrevCharKind, nextCharKind); uint context = CharKind.Context(PrevCharKind, nextCharKind);
return Node.IsNullableFor(context); return Node.IsNullableFor(context);
} }
public override bool Equals(object? obj) => public override bool Equals(object? obj) =>
obj is DfaMatchingState<TSet> s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node); obj is MatchingState<TSet> s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node);
public override int GetHashCode() => (PrevCharKind, Node).GetHashCode(); public override int GetHashCode() => (PrevCharKind, Node).GetHashCode();
......
...@@ -240,12 +240,12 @@ static string UnexpectedNodeType(RegexNode node) ...@@ -240,12 +240,12 @@ static string UnexpectedNodeType(RegexNode node)
SymbolicRegexNode<BDD> elem = childResult.Count == 1 ? SymbolicRegexNode<BDD> elem = childResult.Count == 1 ?
childResult.FirstElement : childResult.FirstElement :
_builder.CreateConcatAlreadyReversed(childResult); _builder.CreateConcatAlreadyReversed(childResult);
if (elem.IsNothing) if (elem.IsNothing(_builder._solver))
{ {
continue; continue;
} }
or = elem.IsAnyStar ? or = elem.IsAnyStar(_builder._solver) ?
elem : // .* is the absorbing element elem : // .* is the absorbing element
SymbolicRegexNode<BDD>.CreateAlternate(_builder, elem, or); SymbolicRegexNode<BDD>.CreateAlternate(_builder, elem, or);
} }
......
// Licensed to the .NET Foundation under one or more agreements. // Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license. // The .NET Foundation licenses this file to you under the MIT license.
using System.Diagnostics;
namespace System.Text.RegularExpressions.Symbolic namespace System.Text.RegularExpressions.Symbolic
{ {
/// <summary>Misc information of structural properties of a <see cref="SymbolicRegexNode{S}"/> that is computed bottom up.</summary> /// <summary>Misc information of structural properties of a <see cref="SymbolicRegexNode{S}"/> that is computed bottom up.</summary>
...@@ -14,54 +16,34 @@ namespace System.Text.RegularExpressions.Symbolic ...@@ -14,54 +16,34 @@ namespace System.Text.RegularExpressions.Symbolic
private const uint StartsWithSomeAnchorMask = 32; private const uint StartsWithSomeAnchorMask = 32;
private const uint IsHighPriorityNullableMask = 64; private const uint IsHighPriorityNullableMask = 64;
private const uint ContainsEffectMask = 128; private const uint ContainsEffectMask = 128;
private const uint ContainsLineAnchorMask = 256;
private readonly uint _info; private readonly uint _info;
private SymbolicRegexInfo(uint i) => _info = i; private SymbolicRegexInfo(uint i) => _info = i;
internal static SymbolicRegexInfo Create( private static SymbolicRegexInfo Create(
bool isAlwaysNullable = false, bool canBeNullable = false, bool isAlwaysNullable = false, bool canBeNullable = false,
bool startsWithLineAnchor = false, bool startsWithSomeAnchor = false, bool containsSomeAnchor = false, bool startsWithLineAnchor = false, bool containsLineAnchor = false,
bool startsWithSomeAnchor = false, bool containsSomeAnchor = false,
bool isHighPriorityNullable = false, bool containsEffect = false) bool isHighPriorityNullable = false, bool containsEffect = false)
{ {
uint i = 0; // Assert that the expected implications hold. For example, every node that contains a line anchor
// must also be marked as containing some anchor.
if (canBeNullable || isAlwaysNullable) Debug.Assert(!isAlwaysNullable || canBeNullable);
{ Debug.Assert(!startsWithLineAnchor || containsLineAnchor);
i |= CanBeNullableMask; Debug.Assert(!startsWithLineAnchor || startsWithSomeAnchor);
Debug.Assert(!containsLineAnchor || containsSomeAnchor);
if (isAlwaysNullable) Debug.Assert(!startsWithSomeAnchor || containsSomeAnchor);
{ return new SymbolicRegexInfo(
i |= IsAlwaysNullableMask; (isAlwaysNullable ? IsAlwaysNullableMask : 0) |
} (canBeNullable ? CanBeNullableMask : 0) |
} (startsWithLineAnchor ? StartsWithLineAnchorMask : 0) |
(containsLineAnchor ? ContainsLineAnchorMask : 0) |
if (containsSomeAnchor || startsWithLineAnchor || startsWithSomeAnchor) (startsWithSomeAnchor ? StartsWithSomeAnchorMask : 0) |
{ (containsSomeAnchor ? ContainsSomeAnchorMask : 0) |
i |= ContainsSomeAnchorMask; (isHighPriorityNullable ? IsHighPriorityNullableMask : 0) |
(containsEffect ? ContainsEffectMask : 0));
if (startsWithLineAnchor)
{
i |= StartsWithLineAnchorMask;
}
if (startsWithLineAnchor || startsWithSomeAnchor)
{
i |= StartsWithSomeAnchorMask;
}
}
if (isHighPriorityNullable)
{
i |= IsHighPriorityNullableMask;
}
if (containsEffect)
{
i |= ContainsEffectMask;
}
return new SymbolicRegexInfo(i);
} }
public bool IsNullable => (_info & IsAlwaysNullableMask) != 0; public bool IsNullable => (_info & IsAlwaysNullableMask) != 0;
...@@ -70,6 +52,8 @@ namespace System.Text.RegularExpressions.Symbolic ...@@ -70,6 +52,8 @@ namespace System.Text.RegularExpressions.Symbolic
public bool StartsWithLineAnchor => (_info & StartsWithLineAnchorMask) != 0; public bool StartsWithLineAnchor => (_info & StartsWithLineAnchorMask) != 0;
public bool ContainsLineAnchor => (_info & ContainsLineAnchorMask) != 0;
public bool StartsWithSomeAnchor => (_info & StartsWithSomeAnchorMask) != 0; public bool StartsWithSomeAnchor => (_info & StartsWithSomeAnchorMask) != 0;
public bool ContainsSomeAnchor => (_info & ContainsSomeAnchorMask) != 0; public bool ContainsSomeAnchor => (_info & ContainsSomeAnchorMask) != 0;
...@@ -80,6 +64,27 @@ namespace System.Text.RegularExpressions.Symbolic ...@@ -80,6 +64,27 @@ namespace System.Text.RegularExpressions.Symbolic
public bool ContainsEffect => (_info & ContainsEffectMask) != 0; public bool ContainsEffect => (_info & ContainsEffectMask) != 0;
/// <summary>
/// Used for any node that acts as an epsilon, i.e., something that always matches the empty string.
/// </summary>
public static SymbolicRegexInfo Epsilon() =>
Create(
isAlwaysNullable: true,
canBeNullable: true,
isHighPriorityNullable: true);
/// <summary>
/// Used for all anchors.
/// </summary>
/// <param name="isLineAnchor">whether this anchor is a line anchor</param>
public static SymbolicRegexInfo Anchor(bool isLineAnchor) =>
Create(
canBeNullable: true,
startsWithLineAnchor: isLineAnchor,
containsLineAnchor: isLineAnchor,
startsWithSomeAnchor: true,
containsSomeAnchor: true);
/// <summary> /// <summary>
/// The alternation remains high priority nullable if the left alternative is so. /// The alternation remains high priority nullable if the left alternative is so.
/// All other info properties are the logical disjunction of the resepctive info properties /// All other info properties are the logical disjunction of the resepctive info properties
...@@ -90,6 +95,7 @@ namespace System.Text.RegularExpressions.Symbolic ...@@ -90,6 +95,7 @@ namespace System.Text.RegularExpressions.Symbolic
isAlwaysNullable: left_info.IsNullable || right_info.IsNullable, isAlwaysNullable: left_info.IsNullable || right_info.IsNullable,
canBeNullable: left_info.CanBeNullable || right_info.CanBeNullable, canBeNullable: left_info.CanBeNullable || right_info.CanBeNullable,
startsWithLineAnchor: left_info.StartsWithLineAnchor || right_info.StartsWithLineAnchor, startsWithLineAnchor: left_info.StartsWithLineAnchor || right_info.StartsWithLineAnchor,
containsLineAnchor: left_info.ContainsLineAnchor || right_info.ContainsLineAnchor,
startsWithSomeAnchor: left_info.StartsWithSomeAnchor || right_info.StartsWithSomeAnchor, startsWithSomeAnchor: left_info.StartsWithSomeAnchor || right_info.StartsWithSomeAnchor,
containsSomeAnchor: left_info.ContainsSomeAnchor || right_info.ContainsSomeAnchor, containsSomeAnchor: left_info.ContainsSomeAnchor || right_info.ContainsSomeAnchor,
isHighPriorityNullable: left_info.IsHighPriorityNullable, isHighPriorityNullable: left_info.IsHighPriorityNullable,
...@@ -105,6 +111,7 @@ namespace System.Text.RegularExpressions.Symbolic ...@@ -105,6 +111,7 @@ namespace System.Text.RegularExpressions.Symbolic
isAlwaysNullable: left_info.IsNullable && right_info.IsNullable, isAlwaysNullable: left_info.IsNullable && right_info.IsNullable,
canBeNullable: left_info.CanBeNullable && right_info.CanBeNullable, canBeNullable: left_info.CanBeNullable && right_info.CanBeNullable,
startsWithLineAnchor: left_info.StartsWithLineAnchor || (left_info.CanBeNullable && right_info.StartsWithLineAnchor), startsWithLineAnchor: left_info.StartsWithLineAnchor || (left_info.CanBeNullable && right_info.StartsWithLineAnchor),
containsLineAnchor: left_info.ContainsLineAnchor || right_info.ContainsLineAnchor,
startsWithSomeAnchor: left_info.StartsWithSomeAnchor || (left_info.CanBeNullable && right_info.StartsWithSomeAnchor), startsWithSomeAnchor: left_info.StartsWithSomeAnchor || (left_info.CanBeNullable && right_info.StartsWithSomeAnchor),
containsSomeAnchor: left_info.ContainsSomeAnchor || right_info.ContainsSomeAnchor, containsSomeAnchor: left_info.ContainsSomeAnchor || right_info.ContainsSomeAnchor,
isHighPriorityNullable: left_info.IsHighPriorityNullable && right_info.IsHighPriorityNullable, isHighPriorityNullable: left_info.IsHighPriorityNullable && right_info.IsHighPriorityNullable,
......
...@@ -48,7 +48,7 @@ internal enum SymbolicRegexNodeKind ...@@ -48,7 +48,7 @@ internal enum SymbolicRegexNodeKind
/// <summary>Effects to be applied when taking a transition.</summary> /// <summary>Effects to be applied when taking a transition.</summary>
/// <remarks> /// <remarks>
/// Left child is the pattern itself and the right child is a concatenation of nodes whose effects should be applied. /// Left child is the pattern itself and the right child is a concatenation of nodes whose effects should be applied.
/// Effect nodes are created in the rule for concatenation in <see cref="SymbolicRegexNode{TSet}.CreateDerivative(TSet, uint)"/>, /// Effect nodes are created in the rule for concatenation in <see cref="SymbolicRegexNode{TSet}.CreateDerivative(SymbolicRegexBuilder{TSet}, TSet, uint)"/>,
/// where they are used to represent additional operations that should be performed in the current position if /// where they are used to represent additional operations that should be performed in the current position if
/// the pattern in the left child is used to match the input. Since these Effect nodes are relative to the current /// the pattern in the left child is used to match the input. Since these Effect nodes are relative to the current
/// position in the input, the effects from the right child must be applied in the transition that the derivative is /// position in the input, the effects from the right child must be applied in the transition that the derivative is
......
...@@ -16,89 +16,91 @@ internal sealed partial class SymbolicRegexMatcher<TSet> ...@@ -16,89 +16,91 @@ internal sealed partial class SymbolicRegexMatcher<TSet>
[ExcludeFromCodeCoverage(Justification = "Currently only used for testing")] [ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
public override void Explore(bool includeDotStarred, bool includeReverse, bool includeOriginal, bool exploreDfa, bool exploreNfa) public override void Explore(bool includeDotStarred, bool includeReverse, bool includeOriginal, bool exploreDfa, bool exploreNfa)
{ {
Debug.Assert(_builder._minterms is not null); lock (this)
{
// Track seen states to avoid exploring twice // Track seen states to avoid exploring twice
HashSet<DfaMatchingState<TSet>> seen = new(); HashSet<MatchingState<TSet>> seen = new();
// Use a queue for unexplored states // Use a queue for unexplored states
// This results in a breadth-first exploration // This results in a breadth-first exploration
Queue<DfaMatchingState<TSet>> toExplore = new(); Queue<MatchingState<TSet>> toExplore = new();
// Explore all initial states as requested // Explore all initial states as requested
if (includeDotStarred) if (includeDotStarred)
EnqueueAll(_dotstarredInitialStates, seen, toExplore); EnqueueAll(_dotstarredInitialStates, seen, toExplore);
if (includeReverse) if (includeReverse)
EnqueueAll(_reverseInitialStates, seen, toExplore); EnqueueAll(_reverseInitialStates, seen, toExplore);
if (includeOriginal) if (includeOriginal)
EnqueueAll(_initialStates, seen, toExplore); EnqueueAll(_initialStates, seen, toExplore);
if (exploreDfa) if (exploreDfa)
{
while (toExplore.Count > 0)
{ {
// Don't dequeue yet, because a transition might fail while (toExplore.Count > 0)
DfaMatchingState<TSet> state = toExplore.Peek();
// Include the special minterm for the last end-of-line if the state is sensitive to it
int maxMinterm = state.StartsWithLineAnchor ? _builder._minterms.Length : _builder._minterms.Length - 1;
// Explore successor states for each minterm
for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
{ {
int offset = (state.Id << _builder._mintermsLog) | mintermId; // Don't dequeue yet, because a transition might fail
if (!_builder.TryCreateNewTransition(state, mintermId, offset, true, out DfaMatchingState<TSet>? nextState)) MatchingState<TSet> state = toExplore.Peek();
goto DfaLimitReached; // Include the special minterm for the last end-of-line if the state is sensitive to it
EnqueueIfUnseen(nextState, seen, toExplore); int maxMinterm = state.StartsWithLineAnchor ? _minterms!.Length : _minterms!.Length - 1;
// Explore successor states for each minterm
for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
{
int offset = DeltaOffset(state.Id, mintermId);
if (!TryCreateNewTransition(state, mintermId, offset, true, out MatchingState<TSet>? nextState))
goto DfaLimitReached;
EnqueueIfUnseen(nextState, seen, toExplore);
}
// Safe to dequeue now that the state has been completely handled
toExplore.Dequeue();
} }
// Safe to dequeue now that the state has been completely handled
toExplore.Dequeue();
} }
}
DfaLimitReached: DfaLimitReached:
if (exploreNfa && toExplore.Count > 0) if (exploreNfa && toExplore.Count > 0)
{
// DFA states are broken up into NFA states when they are alternations
DfaMatchingState<TSet>[] toBreakUp = toExplore.ToArray();
toExplore.Clear();
foreach (DfaMatchingState<TSet> dfaState in toBreakUp)
{ {
// Remove state from seen so that it can be added back in if necessary // DFA states are broken up into NFA states when they are alternations
seen.Remove(dfaState); MatchingState<TSet>[] toBreakUp = toExplore.ToArray();
// Enqueue all elements of a top level alternation or the state itself toExplore.Clear();
foreach (var element in dfaState.Node.EnumerateAlternationBranches()) foreach (MatchingState<TSet> dfaState in toBreakUp)
{ {
int nfaState = _builder.CreateNfaState(element, dfaState.PrevCharKind); // Remove state from seen so that it can be added back in if necessary
EnqueueIfUnseen(_builder.GetCoreState(nfaState), seen, toExplore); seen.Remove(dfaState);
// Enqueue all elements of a top level alternation or the state itself
ForEachNfaState(dfaState.Node, dfaState.PrevCharKind, (this, seen, toExplore),
static (int nfaId, (SymbolicRegexMatcher<TSet> Matcher, HashSet<MatchingState<TSet>> Seen, Queue<MatchingState<TSet>> ToExplore) args) =>
{
MatchingState<TSet>? coreState = args.Matcher.GetState(args.Matcher.GetCoreStateId(nfaId));
EnqueueIfUnseen(coreState, args.Seen, args.ToExplore);
});
} }
}
while (toExplore.Count > 0) while (toExplore.Count > 0)
{
// NFA transitions can't fail, so its safe to dequeue here
DfaMatchingState<TSet> state = toExplore.Dequeue();
// Include the special minterm for the last end-of-line if the state is sensitive to it
int maxMinterm = state.StartsWithLineAnchor ? _builder._minterms.Length : _builder._minterms.Length - 1;
// Explore successor states for each minterm
for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
{ {
int nfaOffset = (_builder._nfaStateArrayInverse[state.Id] << _builder._mintermsLog) | mintermId; // NFA transitions can't fail, so its safe to dequeue here
int[] nextNfaStates = _builder.CreateNewNfaTransition(_builder._nfaStateArrayInverse[state.Id], mintermId, nfaOffset); MatchingState<TSet> state = toExplore.Dequeue();
foreach (int nextNfaState in nextNfaStates) // Include the special minterm for the last end-of-line if the state is sensitive to it
int maxMinterm = state.StartsWithLineAnchor ? _minterms.Length : _minterms.Length - 1;
// Explore successor states for each minterm
for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
{ {
EnqueueIfUnseen(_builder.GetCoreState(nextNfaState), seen, toExplore); int nfaOffset = DeltaOffset(_nfaIdByCoreId[state.Id], mintermId);
int[] nextNfaStates = CreateNewNfaTransition(_nfaIdByCoreId[state.Id], mintermId, nfaOffset);
foreach (int nextNfaState in nextNfaStates)
{
EnqueueIfUnseen(GetState(GetCoreStateId(nextNfaState)), seen, toExplore);
}
} }
} }
} }
} }
static void EnqueueAll(DfaMatchingState<TSet>[] states, HashSet<DfaMatchingState<TSet>> seen, Queue<DfaMatchingState<TSet>> toExplore) static void EnqueueAll(MatchingState<TSet>[] states, HashSet<MatchingState<TSet>> seen, Queue<MatchingState<TSet>> toExplore)
{ {
foreach (DfaMatchingState<TSet> state in states) foreach (MatchingState<TSet> state in states)
{ {
EnqueueIfUnseen(state, seen, toExplore); EnqueueIfUnseen(state, seen, toExplore);
} }
} }
static void EnqueueIfUnseen(DfaMatchingState<TSet> state, HashSet<DfaMatchingState<TSet>> seen, Queue<DfaMatchingState<TSet>> queue) static void EnqueueIfUnseen(MatchingState<TSet> state, HashSet<MatchingState<TSet>> seen, Queue<MatchingState<TSet>> queue)
{ {
if (seen.Add(state)) if (seen.Add(state))
{ {
......
...@@ -30,133 +30,134 @@ internal sealed partial class SymbolicRegexMatcher<TSet> ...@@ -30,133 +30,134 @@ internal sealed partial class SymbolicRegexMatcher<TSet>
[ExcludeFromCodeCoverage(Justification = "Currently only used for testing")] [ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
public override IEnumerable<string> SampleMatches(int k, int randomseed) public override IEnumerable<string> SampleMatches(int k, int randomseed)
{ {
// Zero is treated as no seed, instead using a system provided one lock (this)
Random random = randomseed != 0 ? new Random(randomseed) : new Random(); {
// Zero is treated as no seed, instead using a system provided one
ISolver<TSet> solver = _builder._solver; Random random = randomseed != 0 ? new Random(randomseed) : new Random();
CharSetSolver charSetSolver = _builder._charSetSolver; CharSetSolver charSetSolver = _builder._charSetSolver;
// Create helper BDDs for handling anchors and preferentially generating ASCII inputs // Create helper BDDs for handling anchors and preferentially generating ASCII inputs
BDD asciiWordCharacters = charSetSolver.Or(new BDD[] { BDD asciiWordCharacters = charSetSolver.Or(new BDD[] {
charSetSolver.CreateBDDFromRange('A', 'Z'), charSetSolver.CreateBDDFromRange('A', 'Z'),
charSetSolver.CreateBDDFromRange('a', 'z'), charSetSolver.CreateBDDFromRange('a', 'z'),
charSetSolver.CreateBDDFromChar('_'), charSetSolver.CreateBDDFromChar('_'),
charSetSolver.CreateBDDFromRange('0', '9')}); charSetSolver.CreateBDDFromRange('0', '9')});
// Visible ASCII range for input character generation // Visible ASCII range for input character generation
BDD ascii = charSetSolver.CreateBDDFromRange('\x20', '\x7E'); BDD ascii = charSetSolver.CreateBDDFromRange('\x20', '\x7E');
BDD asciiNonWordCharacters = charSetSolver.And(ascii, charSetSolver.Not(asciiWordCharacters)); BDD asciiNonWordCharacters = charSetSolver.And(ascii, charSetSolver.Not(asciiWordCharacters));
// Set up two sets of minterms, one with the additional special minterm for the last end-of-line // Set up two sets of minterms, one with the additional special minterm for the last end-of-line
Debug.Assert(_builder._minterms is not null); Debug.Assert(_minterms is not null);
int[] mintermIdsWithoutZ = new int[_builder._minterms.Length]; int[] mintermIdsWithoutZ = new int[_minterms.Length];
int[] mintermIdsWithZ = new int[_builder._minterms.Length + 1]; int[] mintermIdsWithZ = new int[_minterms.Length + 1];
for (int i = 0; i < _builder._minterms.Length; ++i) for (int i = 0; i < _minterms.Length; ++i)
{ {
mintermIdsWithoutZ[i] = i; mintermIdsWithoutZ[i] = i;
mintermIdsWithZ[i] = i; mintermIdsWithZ[i] = i;
} }
mintermIdsWithZ[_builder._minterms.Length] = _builder._minterms.Length; mintermIdsWithZ[_minterms.Length] = _minterms.Length;
for (int i = 0; i < k; i++)
{
// Holds the generated input so far
StringBuilder inputSoFar = new();
StringBuilder? latestCandidate = null;
// Current set of states reached initially contains just the root for (int i = 0; i < k; i++)
NfaMatchingState states = new(_builder); {
// Here one could also consider previous characters for example for \b, \B, and ^ anchors // Holds the generated input so far
// and initialize inputSoFar accordingly StringBuilder inputSoFar = new();
states.InitializeFrom(_initialStates[GetCharKind(ReadOnlySpan<char>.Empty, -1)]); StringBuilder? latestCandidate = null;
CurrentState statesWrapper = new(states);
// Used for end suffixes // Current set of states reached initially contains just the root
List<string> possibleEndings = new(); NfaMatchingState states = new();
// Here one could also consider previous characters for example for \b, \B, and ^ anchors
// and initialize inputSoFar accordingly
states.InitializeFrom(this, _initialStates[GetCharKind<FullInputReader>(ReadOnlySpan<char>.Empty, -1)]);
CurrentState statesWrapper = new(states);
while (true) // Used for end suffixes
{ List<string> possibleEndings = new();
Debug.Assert(states.NfaStateSet.Count > 0);
// Gather the possible endings for satisfying nullability while (true)
possibleEndings.Clear();
if (NfaStateHandler.CanBeNullable(ref statesWrapper))
{ {
// Unconditionally final state or end of the input due to \Z anchor for example Debug.Assert(states.NfaStateSet.Count > 0);
if (NfaStateHandler.IsNullable(ref statesWrapper) ||
NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.BeginningEnd))
{
possibleEndings.Add("");
}
// End of line due to end-of-line anchor // Gather the possible endings for satisfying nullability
if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.Newline)) possibleEndings.Clear();
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.CanBeNullable(this, in statesWrapper))
{ {
possibleEndings.Add("\n"); // Unconditionally final state or end of the input due to \Z anchor for example
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullable(this, in statesWrapper) ||
SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd))
{
possibleEndings.Add("");
}
// End of line due to end-of-line anchor
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.Newline))
{
possibleEndings.Add("\n");
}
// Related to wordborder due to \b or \B
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.WordLetter))
{
possibleEndings.Add(ChooseChar(random, asciiWordCharacters, ascii, charSetSolver).ToString());
}
// Related to wordborder due to \b or \B
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.General))
{
possibleEndings.Add(ChooseChar(random, asciiNonWordCharacters, ascii, charSetSolver).ToString());
}
} }
// Related to wordborder due to \b or \B // If we have a possible ending, then store a candidate input
if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.WordLetter)) if (possibleEndings.Count > 0)
{ {
possibleEndings.Add(ChooseChar(random, asciiWordCharacters, ascii, charSetSolver).ToString()); latestCandidate ??= new();
latestCandidate.Clear();
latestCandidate.Append(inputSoFar);
//Choose some suffix that allows some anchor (if any) to be nullable
latestCandidate.Append(Choose(random, possibleEndings));
// Choose to stop here based on a coin-toss
if (FlipBiasedCoin(random, SampleMatchesStoppingProbability))
{
yield return latestCandidate.ToString();
break;
}
} }
// Related to wordborder due to \b or \B // Shuffle the minterms, including the last end-of-line marker if appropriate
if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.General)) int[] mintermIds = SymbolicRegexMatcher<TSet>.NfaStateHandler.StartsWithLineAnchor(this, in statesWrapper) ?
Shuffle(random, mintermIdsWithZ) :
Shuffle(random, mintermIdsWithoutZ);
foreach (int mintermId in mintermIds)
{ {
possibleEndings.Add(ChooseChar(random, asciiNonWordCharacters, ascii, charSetSolver).ToString()); bool success = SymbolicRegexMatcher<TSet>.NfaStateHandler.TryTakeTransition(this, ref statesWrapper, mintermId);
Debug.Assert(success);
if (states.NfaStateSet.Count > 0)
{
TSet minterm = GetMintermFromId(mintermId);
// Append a random member of the minterm
inputSoFar.Append(ChooseChar(random, ToBDD(minterm, Solver, charSetSolver), ascii, charSetSolver));
break;
}
else
{
// The transition was a dead end, undo and continue to try another minterm
NfaStateHandler.UndoTransition(ref statesWrapper);
}
} }
}
// If we have a possible ending, then store a candidate input // In the case that there are no next states or input has become too large: stop here
if (possibleEndings.Count > 0) if (states.NfaStateSet.Count == 0 || inputSoFar.Length > SampleMatchesMaxInputLength)
{
latestCandidate ??= new();
latestCandidate.Clear();
latestCandidate.Append(inputSoFar);
//Choose some suffix that allows some anchor (if any) to be nullable
latestCandidate.Append(Choose(random, possibleEndings));
// Choose to stop here based on a coin-toss
if (FlipBiasedCoin(random, SampleMatchesStoppingProbability))
{ {
yield return latestCandidate.ToString(); // Ending up here without an ending is unlikely but possible for example for infeasible patterns
// such as @"no\bway" or due to poor choice of c -- no anchor is enabled -- so this is a deadend.
if (latestCandidate != null)
{
yield return latestCandidate.ToString();
}
break; break;
} }
} }
// Shuffle the minterms, including the last end-of-line marker if appropriate
int[] mintermIds = NfaStateHandler.StartsWithLineAnchor(_builder, ref statesWrapper) ?
Shuffle(random, mintermIdsWithZ) :
Shuffle(random, mintermIdsWithoutZ);
foreach (int mintermId in mintermIds)
{
bool success = NfaStateHandler.TakeTransition(_builder, ref statesWrapper, mintermId);
Debug.Assert(success);
if (states.NfaStateSet.Count > 0)
{
TSet minterm = _builder.GetMinterm(mintermId);
// Append a random member of the minterm
inputSoFar.Append(ChooseChar(random, ToBDD(minterm, solver, charSetSolver), ascii, charSetSolver));
break;
}
else
{
// The transition was a dead end, undo and continue to try another minterm
NfaStateHandler.UndoTransition(ref statesWrapper);
}
}
// In the case that there are no next states or input has become too large: stop here
if (states.NfaStateSet.Count == 0 || inputSoFar.Length > SampleMatchesMaxInputLength)
{
// Ending up here without an ending is unlikely but possible for example for infeasible patterns
// such as @"no\bway" or due to poor choice of c -- no anchor is enabled -- so this is a deadend.
if (latestCandidate != null)
{
yield return latestCandidate.ToString();
}
break;
}
} }
} }
......
...@@ -37,8 +37,8 @@ public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, Tim ...@@ -37,8 +37,8 @@ public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, Tim
} }
} }
rootNode = rootNode.AddFixedLengthMarkers(); rootNode = rootNode.AddFixedLengthMarkers(bddBuilder);
BDD[] minterms = rootNode.ComputeMinterms(); BDD[] minterms = rootNode.ComputeMinterms(bddBuilder);
_matcher = minterms.Length > 64 ? _matcher = minterms.Length > 64 ?
SymbolicRegexMatcher<BitVector>.Create(regexTree.CaptureCount, regexTree.FindOptimizations, bddBuilder, rootNode, new BitVectorSolver(minterms, charSetSolver), matchTimeout) : SymbolicRegexMatcher<BitVector>.Create(regexTree.CaptureCount, regexTree.FindOptimizations, bddBuilder, rootNode, new BitVectorSolver(minterms, charSetSolver), matchTimeout) :
......
...@@ -64,6 +64,38 @@ public static bool TryEnsureSufficientExecutionStack() ...@@ -64,6 +64,38 @@ public static bool TryEnsureSufficientExecutionStack()
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default) .ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult(); .GetAwaiter().GetResult();
/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
/// <typeparam name="TArg4">The type of the fourth argument to pass to the function.</typeparam>
/// <param name="action">The action to invoke.</param>
/// <param name="arg1">The first argument to pass to the action.</param>
/// <param name="arg2">The second argument to pass to the action.</param>
/// <param name="arg3">The third argument to pass to the action.</param>
/// <param name="arg4">The fourth argument to pass to the action.</param>
public static void CallOnEmptyStack<TArg1, TArg2, TArg3, TArg4>(Action<TArg1, TArg2, TArg3, TArg4> action, TArg1 arg1, TArg2 arg2, TArg3 arg3, TArg4 arg4) =>
Task.Run(() => action(arg1, arg2, arg3, arg4))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
/// <typeparam name="TArg4">The type of the fourth argument to pass to the function.</typeparam>
/// <typeparam name="TArg5">The type of the fifth argument to pass to the function.</typeparam>
/// <param name="action">The action to invoke.</param>
/// <param name="arg1">The first argument to pass to the action.</param>
/// <param name="arg2">The second argument to pass to the action.</param>
/// <param name="arg3">The third argument to pass to the action.</param>
/// <param name="arg4">The fourth argument to pass to the action.</param>
/// <param name="arg5">The fifth argument to pass to the action.</param>
public static void CallOnEmptyStack<TArg1, TArg2, TArg3, TArg4, TArg5>(Action<TArg1, TArg2, TArg3, TArg4, TArg5> action, TArg1 arg1, TArg2 arg2, TArg3 arg3, TArg4 arg4, TArg5 arg5) =>
Task.Run(() => action(arg1, arg2, arg3, arg4, arg5))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary> /// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam> /// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam> /// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
...@@ -126,5 +158,21 @@ public static bool TryEnsureSufficientExecutionStack() ...@@ -126,5 +158,21 @@ public static bool TryEnsureSufficientExecutionStack()
Task.Run(() => func(arg1, arg2, arg3)) Task.Run(() => func(arg1, arg2, arg3))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default) .ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult(); .GetAwaiter().GetResult();
/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
/// <typeparam name="TArg4">The type of the fourth argument to pass to the function.</typeparam>
/// <typeparam name="TResult">The return type of the function.</typeparam>
/// <param name="func">The function to invoke.</param>
/// <param name="arg1">The first argument to pass to the function.</param>
/// <param name="arg2">The second argument to pass to the function.</param>
/// <param name="arg3">The third argument to pass to the function.</param>
/// <param name="arg4">The fourth argument to pass to the function.</param>
public static TResult CallOnEmptyStack<TArg1, TArg2, TArg3, TArg4, TResult>(Func<TArg1, TArg2, TArg3, TArg4, TResult> func, TArg1 arg1, TArg2 arg2, TArg3 arg3, TArg4 arg4) =>
Task.Run(() => func(arg1, arg2, arg3, arg4))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
} }
} }
...@@ -86,7 +86,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData() ...@@ -86,7 +86,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData()
{ {
RegexNode tree = RegexParser.Parse(Pattern, options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root; RegexNode tree = RegexParser.Parse(Pattern, options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root;
SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree); SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree);
yield return new object[] { rootNode, ExpectedSafeSize }; yield return new object[] { bddBuilder, rootNode, ExpectedSafeSize };
} }
// add .*? in front of the pattern, this adds 1 more NFA state // add .*? in front of the pattern, this adds 1 more NFA state
...@@ -94,7 +94,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData() ...@@ -94,7 +94,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData()
{ {
RegexNode tree = RegexParser.Parse(".*?" + Pattern, options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root; RegexNode tree = RegexParser.Parse(".*?" + Pattern, options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root;
SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree); SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree);
yield return new object[] { rootNode, 1 + ExpectedSafeSize}; yield return new object[] { bddBuilder, rootNode, 1 + ExpectedSafeSize};
} }
// use of anchors increases the estimate by 5x in general but in reality much less, at most 3x // use of anchors increases the estimate by 5x in general but in reality much less, at most 3x
...@@ -102,7 +102,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData() ...@@ -102,7 +102,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData()
{ {
RegexNode tree = RegexParser.Parse(Pattern + "$", options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root; RegexNode tree = RegexParser.Parse(Pattern + "$", options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root;
SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree); SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree);
yield return new object[] { rootNode, 5 * ExpectedSafeSize }; yield return new object[] { bddBuilder, rootNode, 5 * ExpectedSafeSize };
} }
// use of captures has no effect on the estimations // use of captures has no effect on the estimations
...@@ -110,31 +110,32 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData() ...@@ -110,31 +110,32 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData()
{ {
RegexNode tree = RegexParser.Parse(Pattern, options, CultureInfo.CurrentCulture).Root; RegexNode tree = RegexParser.Parse(Pattern, options, CultureInfo.CurrentCulture).Root;
SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree); SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree);
yield return new object[] { rootNode, ExpectedSafeSize }; yield return new object[] { bddBuilder, rootNode, ExpectedSafeSize };
} }
} }
[Theory] [Theory]
[MemberData(nameof(SafeThresholdTests_MemberData))] [MemberData(nameof(SafeThresholdTests_MemberData))]
public void SafeThresholdTests(object obj, int expectedSafeSize) public void SafeThresholdTests(object builderObj, object nodeObj, int expectedSafeSize)
{ {
SymbolicRegexNode<BDD> node = (SymbolicRegexNode<BDD>)obj; SymbolicRegexBuilder<BDD> builder = (SymbolicRegexBuilder<BDD>)builderObj;
SymbolicRegexNode<BDD> node = (SymbolicRegexNode<BDD>)nodeObj;
int safeSize = node.EstimateNfaSize(); int safeSize = node.EstimateNfaSize();
Assert.Equal(expectedSafeSize, safeSize); Assert.Equal(expectedSafeSize, safeSize);
int nfaStateCount = CalculateNfaStateCount(node); int nfaStateCount = CalculateNfaStateCount(builder, node);
Assert.True(nfaStateCount <= expectedSafeSize); Assert.True(nfaStateCount <= expectedSafeSize);
} }
/// <summary> /// <summary>
/// Compute the closure of all NFA states from root and return the size of the resulting state space. /// Compute the closure of all NFA states from root and return the size of the resulting state space.
/// </summary> /// </summary>
private static int CalculateNfaStateCount(SymbolicRegexNode<BDD> root) private static int CalculateNfaStateCount(SymbolicRegexBuilder<BDD> builder, SymbolicRegexNode<BDD> root)
{ {
// Here we are actually using the original BDD algebra (not converting to the BV or Uint64 algebra) // Here we are actually using the original BDD algebra (not converting to the BV or Uint64 algebra)
// because it does not matter which algebra we use here (this matters only for performance) // because it does not matter which algebra we use here (this matters only for performance)
HashSet<(uint, SymbolicRegexNode<BDD>)> states = new(); HashSet<(uint, SymbolicRegexNode<BDD>)> states = new();
Stack<(uint, SymbolicRegexNode<BDD>)> frontier = new(); Stack<(uint, SymbolicRegexNode<BDD>)> frontier = new();
List<BDD> minterms = MintermGenerator<BDD>.GenerateMinterms(root._builder._solver, root.GetSets()); List<BDD> minterms = MintermGenerator<BDD>.GenerateMinterms(builder._solver, root.GetSets(builder));
// Start from the initial state that has kind 'General' when no anchors are being used, else kind 'BeginningEnd' // Start from the initial state that has kind 'General' when no anchors are being used, else kind 'BeginningEnd'
(uint, SymbolicRegexNode<BDD>) initialState = (root._info.ContainsSomeAnchor ? CharKind.BeginningEnd : CharKind.General, root); (uint, SymbolicRegexNode<BDD>) initialState = (root._info.ContainsSomeAnchor ? CharKind.BeginningEnd : CharKind.General, root);
...@@ -150,7 +151,7 @@ private static int CalculateNfaStateCount(SymbolicRegexNode<BDD> root) ...@@ -150,7 +151,7 @@ private static int CalculateNfaStateCount(SymbolicRegexNode<BDD> root)
foreach (BDD minterm in minterms) foreach (BDD minterm in minterms)
{ {
uint kind = GetCharKind(minterm); uint kind = GetCharKind(minterm);
SymbolicRegexNode<BDD> target = source.Node.CreateDerivativeWithoutEffects(minterm, source.Kind); SymbolicRegexNode<BDD> target = source.Node.CreateDerivativeWithoutEffects(builder, minterm, source.Kind);
//In the case of an NFA all the different alternatives in the DFA state become individual states themselves //In the case of an NFA all the different alternatives in the DFA state become individual states themselves
foreach (SymbolicRegexNode<BDD> node in GetAlternatives(target)) foreach (SymbolicRegexNode<BDD> node in GetAlternatives(target))
...@@ -169,7 +170,7 @@ private static int CalculateNfaStateCount(SymbolicRegexNode<BDD> root) ...@@ -169,7 +170,7 @@ private static int CalculateNfaStateCount(SymbolicRegexNode<BDD> root)
return states.Count; return states.Count;
// Enumerates the alternatives from a node, for eaxmple (ab|(bc|cd)) has three alternatives // Enumerates the alternatives from a node, for eaxmple (ab|(bc|cd)) has three alternatives
static IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD> node) IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD> node)
{ {
if (node._kind == SymbolicRegexNodeKind.Alternate) if (node._kind == SymbolicRegexNodeKind.Alternate)
{ {
...@@ -178,7 +179,7 @@ static IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD ...@@ -178,7 +179,7 @@ static IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD
foreach (SymbolicRegexNode<BDD> elem in GetAlternatives(node._right!)) foreach (SymbolicRegexNode<BDD> elem in GetAlternatives(node._right!))
yield return elem; yield return elem;
} }
else if (!node.IsNothing) // omit deadend states else if (!node.IsNothing(builder._solver)) // omit deadend states
{ {
yield return node; yield return node;
} }
...@@ -187,8 +188,8 @@ static IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD ...@@ -187,8 +188,8 @@ static IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD
// Simplified character kind calculation that omits the special case that minterm can be the very last \n // Simplified character kind calculation that omits the special case that minterm can be the very last \n
// This omission has practically no effect of the size of the state space, but would complicate the logic // This omission has practically no effect of the size of the state space, but would complicate the logic
uint GetCharKind(BDD minterm) => uint GetCharKind(BDD minterm) =>
minterm.Equals(root._builder._newLineSet) ? CharKind.Newline : // is \n minterm.Equals(builder._newLineSet) ? CharKind.Newline : // is \n
(!root._builder._solver.IsEmpty(root._builder._solver.And(root._builder._wordLetterForBoundariesSet, minterm)) ? (!builder._solver.IsEmpty(builder._solver.And(builder._wordLetterForBoundariesSet, minterm)) ?
CharKind.WordLetter : // in \w CharKind.WordLetter : // in \w
CharKind.General); // anything else, thus in particular in \W CharKind.General); // anything else, thus in particular in \W
} }
......
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\CharKind.cs" Link="Production\CharKind.cs" /> <Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\CharKind.cs" Link="Production\CharKind.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\CharSetSolver.cs" Link="Production\CharSetSolver.cs" /> <Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\CharSetSolver.cs" Link="Production\CharSetSolver.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\DerivativeEffect.cs" Link="Production\DerivativeEffect.cs" /> <Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\DerivativeEffect.cs" Link="Production\DerivativeEffect.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\DfaMatchingState.cs" Link="Production\DfaMatchingState.cs" /> <Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\MatchingState.cs" Link="Production\MatchingState.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\DoublyLinkedList.cs" Link="Production\DoublyLinkedList.cs" /> <Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\DoublyLinkedList.cs" Link="Production\DoublyLinkedList.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\ISolver.cs" Link="Production\ISolver.cs" /> <Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\ISolver.cs" Link="Production\ISolver.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\MintermGenerator.cs" Link="Production\MintermGenerator.cs" /> <Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\MintermGenerator.cs" Link="Production\MintermGenerator.cs" />
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册