未验证 提交 6ebcb3d6 编写于 作者: O Olli Saarikivi 提交者: GitHub

NonBacktracking locking fixes and cleanup (#71234)

* Concurrency fixes and refactoring for clarity

Removed builder reference from SymbolicRegexNode instances; builder now
has to be passed in. Since the builder is not thread safe this clarifies
the locking required in the matcher when using it.
Moved matching specific state from the builder to the matcher. This
includes state and transition arrays.
Simplify character kind code by eliminating duplication of logic.

* Changes from review and cleanup

DfaMatchingState is now just MatchingState

* Comment on NFA mode IDs
上级 8cd9d636
......@@ -63,7 +63,7 @@
<Compile Include="System\Text\RegularExpressions\Symbolic\CharKind.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\CharSetSolver.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DerivativeEffect.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DfaMatchingState.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\MatchingState.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\DoublyLinkedList.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\ISolver.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\MintermClassifier.cs" />
......@@ -75,6 +75,7 @@
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexNode.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexKind.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexInfo.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Automata.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Dgml.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexMatcher.Explore.cs" />
......
......@@ -43,5 +43,8 @@ internal static class CharKind
WordLetter => @"\w",
_ => string.Empty,
};
/// <summary>Returns whether the given value is in the range of valid character kinds.</summary>
internal static bool IsValidCharKind(uint charKind) => charKind < CharKindCount;
}
}
......@@ -8,135 +8,104 @@
namespace System.Text.RegularExpressions.Symbolic
{
/// <summary>Captures a state of a DFA explored during matching.</summary>
internal sealed class DfaMatchingState<TSet> where TSet : IComparable<TSet>, IEquatable<TSet>
/// <summary>Captures a state explored during matching.</summary>
internal sealed class MatchingState<TSet> where TSet : IComparable<TSet>, IEquatable<TSet>
{
internal DfaMatchingState(SymbolicRegexNode<TSet> node, uint prevCharKind)
internal MatchingState(SymbolicRegexNode<TSet> node, uint prevCharKind)
{
Node = node;
PrevCharKind = prevCharKind;
}
/// <summary>The regular expression that labels this state and gives it its semantics.</summary>
internal SymbolicRegexNode<TSet> Node { get; }
/// <summary>
/// The kind of the previous character in the input. The <see cref="SymbolicRegexMatcher{TSet}"/> is responsible
/// for ensuring that in all uses of this state this invariant holds by both selecting initial states accordingly
/// and transitioning on each character to states that match that character's kind.
/// </summary>
/// <remarks>
/// Tracking this information is an optimization that allows each transition taken in the matcher to only depend
/// on the next character (and its kind). In general, the transitions from a state with anchors in its pattern
/// depend on both the previous and the next character. Creating distinct states for each kind of the previous
/// character embeds the necessary information about the previous character into the state space of the automaton.
/// However, this does incur a memory overhead due to the duplication of states. For patterns with no anchors
/// this will always be set to <see cref="CharKind.General"/>, which can reduce the number of states created.
///
/// The performance effect of this optimization has not been investigated. If this optimization were removed, the
/// transition logic would in turn have to become more complicated for derivatives that depend on the nullability
/// of anchors. Care should be taken to not slow down transitions without anchors involved.
/// </remarks>
internal uint PrevCharKind { get; }
/// <summary>
/// A unique identifier for this state, which is used in <see cref="SymbolicRegexMatcher{TSet}"/> to index into
/// state information and transition arrays. Valid IDs are always >= 1.
/// </summary>
internal int Id { get; set; }
/// <summary>This is a deadend state</summary>
internal bool IsDeadend => Node.IsNothing;
/// <summary>Whether this state is known to be a dead end, i.e. no nullable states are reachable from here.</summary>
internal bool IsDeadend(ISolver<TSet> solver) => Node.IsNothing(solver);
/// <summary>The node must be nullable here</summary>
/// <summary>
/// Returns the fixed length that any match ending with this state must have, or -1 if there is no such
/// fixed length, <see cref="SymbolicRegexNode{TSet}.ResolveFixedLength(uint)"/>. The context is defined
/// by <see cref="PrevCharKind"/> of this state and the given nextCharKind. The node must be nullable here.
/// </summary>
internal int FixedLength(uint nextCharKind)
{
Debug.Assert(nextCharKind is 0 or CharKind.BeginningEnd or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS);
Debug.Assert(IsNullableFor(nextCharKind));
Debug.Assert(CharKind.IsValidCharKind(nextCharKind));
uint context = CharKind.Context(PrevCharKind, nextCharKind);
return Node.ResolveFixedLength(context);
}
/// <summary>If true then the state is a dead-end, rejects all inputs.</summary>
internal bool IsNothing => Node.IsNothing;
/// <summary>If true then state starts with a ^ or $ or \Z</summary>
internal bool StartsWithLineAnchor => Node._info.StartsWithLineAnchor;
/// <summary>
/// Translates a minterm set to a character kind, which is a general categorization of characters used
/// for cheaply deciding the nullability of anchors.
/// </summary>
/// <remarks>
/// An empty set is handled as a special case to indicate the very last \n.
/// </remarks>
/// <param name="minterm">the minterm to translate</param>
/// <returns>the character kind of the minterm</returns>
private uint GetNextCharKind(ref TSet minterm)
{
ISolver<TSet> solver = Node._builder._solver;
TSet wordLetterPredicate = Node._builder._wordLetterForBoundariesSet;
TSet newLinePredicate = Node._builder._newLineSet;
// minterm == solver.False is used to represent the very last \n
uint nextCharKind = CharKind.General;
if (solver.Empty.Equals(minterm))
{
nextCharKind = CharKind.NewLineS;
minterm = newLinePredicate;
}
else if (newLinePredicate.Equals(minterm))
{
// If the previous state was the start state, mark this as the very FIRST \n.
// Essentially, this looks the same as the very last \n and is used to nullify
// rev(\Z) in the conext of a reversed automaton.
nextCharKind = PrevCharKind == CharKind.BeginningEnd ?
CharKind.NewLineS :
CharKind.Newline;
}
else if (!solver.IsEmpty(solver.And(wordLetterPredicate, minterm)))
{
nextCharKind = CharKind.WordLetter;
}
return nextCharKind;
}
/// <summary>
/// Compute the target state for the given input minterm.
/// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input.
/// </summary>
/// <param name="builder">the builder that owns <see cref="Node"/></param>
/// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param>
internal DfaMatchingState<TSet> Next(TSet minterm)
/// <param name="nextCharKind"></param>
internal SymbolicRegexNode<TSet> Next(SymbolicRegexBuilder<TSet> builder, TSet minterm, uint nextCharKind)
{
uint nextCharKind = GetNextCharKind(ref minterm);
// Combined character context
uint context = CharKind.Context(PrevCharKind, nextCharKind);
// Compute the derivative of the node for the given context
SymbolicRegexNode<TSet> derivative = Node.CreateDerivativeWithoutEffects(minterm, context);
// nextCharKind will be the PrevCharKind of the target state
// use an existing state instead if one exists already
// otherwise create a new new id for it
return Node._builder.CreateState(derivative, nextCharKind, capturing: false);
return Node.CreateDerivativeWithoutEffects(builder, minterm, context);
}
/// <summary>
/// Compute a set of transitions for the given minterm.
/// </summary>
/// <param name="builder">the builder that owns <see cref="Node"/></param>
/// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param>
/// <param name="nextCharKind"></param>
/// <returns>an enumeration of the transitions as pairs of the target state and a list of effects to be applied</returns>
internal List<(DfaMatchingState<TSet> State, DerivativeEffect[] Effects)> NfaNextWithEffects(TSet minterm)
internal List<(SymbolicRegexNode<TSet> Node, DerivativeEffect[] Effects)> NfaNextWithEffects(SymbolicRegexBuilder<TSet> builder, TSet minterm, uint nextCharKind)
{
uint nextCharKind = GetNextCharKind(ref minterm);
// Combined character context
uint context = CharKind.Context(PrevCharKind, nextCharKind);
// Compute the transitions for the given context
List<(SymbolicRegexNode<TSet>, DerivativeEffect[])> nodesAndEffects = Node.CreateNfaDerivativeWithEffects(minterm, context);
var list = new List<(DfaMatchingState<TSet> State, DerivativeEffect[] Effects)>();
foreach ((SymbolicRegexNode<TSet> node, DerivativeEffect[]? effects) in nodesAndEffects)
{
// nextCharKind will be the PrevCharKind of the target state
// use an existing state instead if one exists already
// otherwise create a new new id for it
DfaMatchingState<TSet> state = Node._builder.CreateState(node, nextCharKind, capturing: true);
if (!state.IsDeadend)
list.Add((state, effects));
}
return list;
return Node.CreateNfaDerivativeWithEffects(builder, minterm, context);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool IsNullableFor(uint nextCharKind)
{
Debug.Assert(nextCharKind is 0 or CharKind.BeginningEnd or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS);
Debug.Assert(CharKind.IsValidCharKind(nextCharKind));
uint context = CharKind.Context(PrevCharKind, nextCharKind);
return Node.IsNullableFor(context);
}
public override bool Equals(object? obj) =>
obj is DfaMatchingState<TSet> s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node);
obj is MatchingState<TSet> s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node);
public override int GetHashCode() => (PrevCharKind, Node).GetHashCode();
......
......@@ -240,12 +240,12 @@ static string UnexpectedNodeType(RegexNode node)
SymbolicRegexNode<BDD> elem = childResult.Count == 1 ?
childResult.FirstElement :
_builder.CreateConcatAlreadyReversed(childResult);
if (elem.IsNothing)
if (elem.IsNothing(_builder._solver))
{
continue;
}
or = elem.IsAnyStar ?
or = elem.IsAnyStar(_builder._solver) ?
elem : // .* is the absorbing element
SymbolicRegexNode<BDD>.CreateAlternate(_builder, elem, or);
}
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Diagnostics;
namespace System.Text.RegularExpressions.Symbolic
{
/// <summary>Misc information of structural properties of a <see cref="SymbolicRegexNode{S}"/> that is computed bottom up.</summary>
......@@ -14,54 +16,34 @@ namespace System.Text.RegularExpressions.Symbolic
private const uint StartsWithSomeAnchorMask = 32;
private const uint IsHighPriorityNullableMask = 64;
private const uint ContainsEffectMask = 128;
private const uint ContainsLineAnchorMask = 256;
private readonly uint _info;
private SymbolicRegexInfo(uint i) => _info = i;
internal static SymbolicRegexInfo Create(
private static SymbolicRegexInfo Create(
bool isAlwaysNullable = false, bool canBeNullable = false,
bool startsWithLineAnchor = false, bool startsWithSomeAnchor = false, bool containsSomeAnchor = false,
bool startsWithLineAnchor = false, bool containsLineAnchor = false,
bool startsWithSomeAnchor = false, bool containsSomeAnchor = false,
bool isHighPriorityNullable = false, bool containsEffect = false)
{
uint i = 0;
if (canBeNullable || isAlwaysNullable)
{
i |= CanBeNullableMask;
if (isAlwaysNullable)
{
i |= IsAlwaysNullableMask;
}
}
if (containsSomeAnchor || startsWithLineAnchor || startsWithSomeAnchor)
{
i |= ContainsSomeAnchorMask;
if (startsWithLineAnchor)
{
i |= StartsWithLineAnchorMask;
}
if (startsWithLineAnchor || startsWithSomeAnchor)
{
i |= StartsWithSomeAnchorMask;
}
}
if (isHighPriorityNullable)
{
i |= IsHighPriorityNullableMask;
}
if (containsEffect)
{
i |= ContainsEffectMask;
}
return new SymbolicRegexInfo(i);
// Assert that the expected implications hold. For example, every node that contains a line anchor
// must also be marked as containing some anchor.
Debug.Assert(!isAlwaysNullable || canBeNullable);
Debug.Assert(!startsWithLineAnchor || containsLineAnchor);
Debug.Assert(!startsWithLineAnchor || startsWithSomeAnchor);
Debug.Assert(!containsLineAnchor || containsSomeAnchor);
Debug.Assert(!startsWithSomeAnchor || containsSomeAnchor);
return new SymbolicRegexInfo(
(isAlwaysNullable ? IsAlwaysNullableMask : 0) |
(canBeNullable ? CanBeNullableMask : 0) |
(startsWithLineAnchor ? StartsWithLineAnchorMask : 0) |
(containsLineAnchor ? ContainsLineAnchorMask : 0) |
(startsWithSomeAnchor ? StartsWithSomeAnchorMask : 0) |
(containsSomeAnchor ? ContainsSomeAnchorMask : 0) |
(isHighPriorityNullable ? IsHighPriorityNullableMask : 0) |
(containsEffect ? ContainsEffectMask : 0));
}
public bool IsNullable => (_info & IsAlwaysNullableMask) != 0;
......@@ -70,6 +52,8 @@ namespace System.Text.RegularExpressions.Symbolic
public bool StartsWithLineAnchor => (_info & StartsWithLineAnchorMask) != 0;
public bool ContainsLineAnchor => (_info & ContainsLineAnchorMask) != 0;
public bool StartsWithSomeAnchor => (_info & StartsWithSomeAnchorMask) != 0;
public bool ContainsSomeAnchor => (_info & ContainsSomeAnchorMask) != 0;
......@@ -80,6 +64,27 @@ namespace System.Text.RegularExpressions.Symbolic
public bool ContainsEffect => (_info & ContainsEffectMask) != 0;
/// <summary>
/// Used for any node that acts as an epsilon, i.e., something that always matches the empty string.
/// </summary>
public static SymbolicRegexInfo Epsilon() =>
Create(
isAlwaysNullable: true,
canBeNullable: true,
isHighPriorityNullable: true);
/// <summary>
/// Used for all anchors.
/// </summary>
/// <param name="isLineAnchor">whether this anchor is a line anchor</param>
public static SymbolicRegexInfo Anchor(bool isLineAnchor) =>
Create(
canBeNullable: true,
startsWithLineAnchor: isLineAnchor,
containsLineAnchor: isLineAnchor,
startsWithSomeAnchor: true,
containsSomeAnchor: true);
/// <summary>
/// The alternation remains high priority nullable if the left alternative is so.
/// All other info properties are the logical disjunction of the resepctive info properties
......@@ -90,6 +95,7 @@ namespace System.Text.RegularExpressions.Symbolic
isAlwaysNullable: left_info.IsNullable || right_info.IsNullable,
canBeNullable: left_info.CanBeNullable || right_info.CanBeNullable,
startsWithLineAnchor: left_info.StartsWithLineAnchor || right_info.StartsWithLineAnchor,
containsLineAnchor: left_info.ContainsLineAnchor || right_info.ContainsLineAnchor,
startsWithSomeAnchor: left_info.StartsWithSomeAnchor || right_info.StartsWithSomeAnchor,
containsSomeAnchor: left_info.ContainsSomeAnchor || right_info.ContainsSomeAnchor,
isHighPriorityNullable: left_info.IsHighPriorityNullable,
......@@ -105,6 +111,7 @@ namespace System.Text.RegularExpressions.Symbolic
isAlwaysNullable: left_info.IsNullable && right_info.IsNullable,
canBeNullable: left_info.CanBeNullable && right_info.CanBeNullable,
startsWithLineAnchor: left_info.StartsWithLineAnchor || (left_info.CanBeNullable && right_info.StartsWithLineAnchor),
containsLineAnchor: left_info.ContainsLineAnchor || right_info.ContainsLineAnchor,
startsWithSomeAnchor: left_info.StartsWithSomeAnchor || (left_info.CanBeNullable && right_info.StartsWithSomeAnchor),
containsSomeAnchor: left_info.ContainsSomeAnchor || right_info.ContainsSomeAnchor,
isHighPriorityNullable: left_info.IsHighPriorityNullable && right_info.IsHighPriorityNullable,
......
......@@ -48,7 +48,7 @@ internal enum SymbolicRegexNodeKind
/// <summary>Effects to be applied when taking a transition.</summary>
/// <remarks>
/// Left child is the pattern itself and the right child is a concatenation of nodes whose effects should be applied.
/// Effect nodes are created in the rule for concatenation in <see cref="SymbolicRegexNode{TSet}.CreateDerivative(TSet, uint)"/>,
/// Effect nodes are created in the rule for concatenation in <see cref="SymbolicRegexNode{TSet}.CreateDerivative(SymbolicRegexBuilder{TSet}, TSet, uint)"/>,
/// where they are used to represent additional operations that should be performed in the current position if
/// the pattern in the left child is used to match the input. Since these Effect nodes are relative to the current
/// position in the input, the effects from the right child must be applied in the transition that the derivative is
......
......@@ -15,25 +15,27 @@ internal sealed partial class SymbolicRegexMatcher<TSet>
/// <inheritdoc cref="Regex.SaveDGML(TextWriter, int)"/>
[ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
public override void SaveDGML(TextWriter writer, int maxLabelLength)
{
lock (this)
{
if (maxLabelLength < 0)
maxLabelLength = int.MaxValue;
Dictionary<(int Source, int Target), (TSet Rule, List<int> NfaTargets)> transitions = GatherTransitions(_builder);
Dictionary<(int Source, int Target), (TSet Rule, List<int> NfaTargets)> transitions = GatherTransitions(this);
writer.WriteLine("<?xml version=\"1.0\" encoding=\"utf-8\"?>");
writer.WriteLine("<DirectedGraph xmlns=\"http://schemas.microsoft.com/vs/2009/dgml\" ZoomLevel=\"1.5\" GraphDirection=\"TopToBottom\" >");
writer.WriteLine(" <Nodes>");
writer.WriteLine(" <Node Id=\"dfa\" Label=\" \" Group=\"Collapsed\" Category=\"DFA\" DFAInfo=\"{0}\" />", FormatInfo(_builder, transitions.Count));
writer.WriteLine(" <Node Id=\"dfainfo\" Category=\"DFAInfo\" Label=\"{0}\"/>", FormatInfo(_builder, transitions.Count));
foreach (DfaMatchingState<TSet> state in _builder._stateCache)
writer.WriteLine(" <Node Id=\"dfa\" Label=\" \" Group=\"Collapsed\" Category=\"DFA\" DFAInfo=\"{0}\" />", FormatInfo(this, transitions.Count));
writer.WriteLine(" <Node Id=\"dfainfo\" Category=\"DFAInfo\" Label=\"{0}\"/>", FormatInfo(this, transitions.Count));
foreach (MatchingState<TSet> state in _stateCache.Values)
{
string info = CharKind.DescribePrev(state.PrevCharKind);
string deriv = WebUtility.HtmlEncode(state.Node.ToString());
string nodeDgmlView = $"{(info == string.Empty ? info : $"Previous: {info}&#13;")}{(deriv == string.Empty ? "()" : deriv)}";
writer.WriteLine(" <Node Id=\"{0}\" Label=\"{0}\" Category=\"State\" Group=\"Collapsed\" StateInfo=\"{1}\">", state.Id, nodeDgmlView);
if (_builder.GetStateInfo(state.Id).IsInitial)
if (GetStateInfo(state.Id).IsInitial)
{
writer.WriteLine(" <Category Ref=\"InitialState\" />");
}
......@@ -46,9 +48,8 @@ public override void SaveDGML(TextWriter writer, int maxLabelLength)
}
writer.WriteLine(" </Nodes>");
writer.WriteLine(" <Links>");
foreach (DfaMatchingState<TSet> initialState in GetInitialStates(this))
foreach (MatchingState<TSet> initialState in GetInitialStates(this))
{
Debug.Assert(_builder._stateCache.Contains(initialState));
writer.WriteLine(" <Link Source=\"dfa\" Target=\"{0}\" Label=\"\" Category=\"StartTransition\" />", initialState.Id);
}
writer.WriteLine(" <Link Source=\"dfa\" Target=\"dfainfo\" Label=\"\" Category=\"Contains\" />");
......@@ -72,7 +73,7 @@ public override void SaveDGML(TextWriter writer, int maxLabelLength)
}
}
foreach (DfaMatchingState<TSet> state in _builder._stateCache)
foreach (MatchingState<TSet> state in _stateCache.Values)
{
writer.WriteLine(" <Link Source=\"{0}\" Target=\"{0}info\" Category=\"Contains\" />", state.Id);
}
......@@ -137,19 +138,18 @@ public override void SaveDGML(TextWriter writer, int maxLabelLength)
writer.WriteLine(" </Style>");
writer.WriteLine(" </Styles>");
writer.WriteLine("</DirectedGraph>");
}
// This function gathers all transitions in the given builder and groups them by (source,destination) state ID
static Dictionary<(int Source, int Target), (TSet Rule, List<int> NfaTargets)> GatherTransitions(SymbolicRegexBuilder<TSet> builder)
static Dictionary<(int Source, int Target), (TSet Rule, List<int> NfaTargets)> GatherTransitions(SymbolicRegexMatcher<TSet> matcher)
{
Debug.Assert(builder._delta is not null);
Debug.Assert(builder._minterms is not null);
Dictionary<(int Source, int Target), (TSet Rule, List<int> NfaTargets)> result = new();
foreach (DfaMatchingState<TSet> source in builder._stateCache)
foreach (MatchingState<TSet> source in matcher._stateCache.Values)
{
// Get the span of entries in delta that gives the transitions for the different minterms
Span<int> deltas = builder.GetDeltasFor(source);
Span<int[]?> nfaDeltas = builder.GetNfaDeltasFor(source);
Debug.Assert(deltas.Length == builder._minterms.Length);
Span<int> deltas = matcher.GetDeltasFor(source);
Span<int[]?> nfaDeltas = matcher.GetNfaDeltasFor(source);
Debug.Assert(deltas.Length == matcher._minterms.Length);
for (int i = 0; i < deltas.Length; ++i)
{
// negative entries are transitions not explored yet, so skip them
......@@ -160,7 +160,7 @@ static Dictionary<(int Source, int Target), (TSet Rule, List<int> NfaTargets)> G
(int Source, int Target) key = (source.Id, targetId);
if (!result.TryGetValue(key, out (TSet Rule, List<int> NfaTargets) entry))
{
entry = (builder._solver.Empty, new List<int>());
entry = (matcher.Solver.Empty, new List<int>());
}
// If this state has an NFA transition for the same minterm, then associate
// those with the transition.
......@@ -168,24 +168,24 @@ static Dictionary<(int Source, int Target), (TSet Rule, List<int> NfaTargets)> G
{
foreach (int nfaTarget in nfaTargets)
{
entry.NfaTargets.Add(builder._nfaStateArray[nfaTarget]);
entry.NfaTargets.Add(matcher._nfaCoreIdArray[nfaTarget]);
}
}
// Expand the rule for this minterm
result[key] = (builder._solver.Or(entry.Rule, builder._minterms[i]), entry.NfaTargets);
result[key] = (matcher.Solver.Or(entry.Rule, matcher._minterms[i]), entry.NfaTargets);
}
}
}
return result;
}
static string FormatInfo(SymbolicRegexBuilder<TSet> builder, int transitionCount)
static string FormatInfo(SymbolicRegexMatcher<TSet> matcher, int transitionCount)
{
StringBuilder sb = new();
sb.Append($"States = {builder._stateCache.Count}&#13;");
sb.Append($"States = {matcher._stateCache.Count}&#13;");
sb.Append($"Transitions = {transitionCount}&#13;");
sb.Append($"Min Terms ({builder._solver.GetMinterms()!.Length}) = ").AppendJoin(',',
DescribeLabels(builder._solver.GetMinterms()!, builder));
sb.Append($"Min Terms ({matcher.Solver.GetMinterms()!.Length}) = ").AppendJoin(',',
DescribeLabels(matcher.Solver.GetMinterms()!, matcher._builder));
return sb.ToString();
}
......@@ -200,13 +200,13 @@ static IEnumerable<string> DescribeLabels(IEnumerable<TSet> labels, SymbolicRege
static string DescribeLabel(TSet label, SymbolicRegexBuilder<TSet> builder) =>
WebUtility.HtmlEncode(builder._solver.PrettyPrint(label, builder._charSetSolver));
static IEnumerable<DfaMatchingState<TSet>> GetInitialStates(SymbolicRegexMatcher<TSet> matcher)
static IEnumerable<MatchingState<TSet>> GetInitialStates(SymbolicRegexMatcher<TSet> matcher)
{
foreach (DfaMatchingState<TSet> state in matcher._dotstarredInitialStates)
foreach (MatchingState<TSet> state in matcher._dotstarredInitialStates)
yield return state;
foreach (DfaMatchingState<TSet> state in matcher._initialStates)
foreach (MatchingState<TSet> state in matcher._initialStates)
yield return state;
foreach (DfaMatchingState<TSet> state in matcher._reverseInitialStates)
foreach (MatchingState<TSet> state in matcher._reverseInitialStates)
yield return state;
}
}
......
......@@ -16,13 +16,13 @@ internal sealed partial class SymbolicRegexMatcher<TSet>
[ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
public override void Explore(bool includeDotStarred, bool includeReverse, bool includeOriginal, bool exploreDfa, bool exploreNfa)
{
Debug.Assert(_builder._minterms is not null);
lock (this)
{
// Track seen states to avoid exploring twice
HashSet<DfaMatchingState<TSet>> seen = new();
HashSet<MatchingState<TSet>> seen = new();
// Use a queue for unexplored states
// This results in a breadth-first exploration
Queue<DfaMatchingState<TSet>> toExplore = new();
Queue<MatchingState<TSet>> toExplore = new();
// Explore all initial states as requested
if (includeDotStarred)
......@@ -37,14 +37,14 @@ public override void Explore(bool includeDotStarred, bool includeReverse, bool i
while (toExplore.Count > 0)
{
// Don't dequeue yet, because a transition might fail
DfaMatchingState<TSet> state = toExplore.Peek();
MatchingState<TSet> state = toExplore.Peek();
// Include the special minterm for the last end-of-line if the state is sensitive to it
int maxMinterm = state.StartsWithLineAnchor ? _builder._minterms.Length : _builder._minterms.Length - 1;
int maxMinterm = state.StartsWithLineAnchor ? _minterms!.Length : _minterms!.Length - 1;
// Explore successor states for each minterm
for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
{
int offset = (state.Id << _builder._mintermsLog) | mintermId;
if (!_builder.TryCreateNewTransition(state, mintermId, offset, true, out DfaMatchingState<TSet>? nextState))
int offset = DeltaOffset(state.Id, mintermId);
if (!TryCreateNewTransition(state, mintermId, offset, true, out MatchingState<TSet>? nextState))
goto DfaLimitReached;
EnqueueIfUnseen(nextState, seen, toExplore);
}
......@@ -57,48 +57,50 @@ public override void Explore(bool includeDotStarred, bool includeReverse, bool i
if (exploreNfa && toExplore.Count > 0)
{
// DFA states are broken up into NFA states when they are alternations
DfaMatchingState<TSet>[] toBreakUp = toExplore.ToArray();
MatchingState<TSet>[] toBreakUp = toExplore.ToArray();
toExplore.Clear();
foreach (DfaMatchingState<TSet> dfaState in toBreakUp)
foreach (MatchingState<TSet> dfaState in toBreakUp)
{
// Remove state from seen so that it can be added back in if necessary
seen.Remove(dfaState);
// Enqueue all elements of a top level alternation or the state itself
foreach (var element in dfaState.Node.EnumerateAlternationBranches())
ForEachNfaState(dfaState.Node, dfaState.PrevCharKind, (this, seen, toExplore),
static (int nfaId, (SymbolicRegexMatcher<TSet> Matcher, HashSet<MatchingState<TSet>> Seen, Queue<MatchingState<TSet>> ToExplore) args) =>
{
int nfaState = _builder.CreateNfaState(element, dfaState.PrevCharKind);
EnqueueIfUnseen(_builder.GetCoreState(nfaState), seen, toExplore);
}
MatchingState<TSet>? coreState = args.Matcher.GetState(args.Matcher.GetCoreStateId(nfaId));
EnqueueIfUnseen(coreState, args.Seen, args.ToExplore);
});
}
while (toExplore.Count > 0)
{
// NFA transitions can't fail, so its safe to dequeue here
DfaMatchingState<TSet> state = toExplore.Dequeue();
MatchingState<TSet> state = toExplore.Dequeue();
// Include the special minterm for the last end-of-line if the state is sensitive to it
int maxMinterm = state.StartsWithLineAnchor ? _builder._minterms.Length : _builder._minterms.Length - 1;
int maxMinterm = state.StartsWithLineAnchor ? _minterms.Length : _minterms.Length - 1;
// Explore successor states for each minterm
for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
{
int nfaOffset = (_builder._nfaStateArrayInverse[state.Id] << _builder._mintermsLog) | mintermId;
int[] nextNfaStates = _builder.CreateNewNfaTransition(_builder._nfaStateArrayInverse[state.Id], mintermId, nfaOffset);
int nfaOffset = DeltaOffset(_nfaIdByCoreId[state.Id], mintermId);
int[] nextNfaStates = CreateNewNfaTransition(_nfaIdByCoreId[state.Id], mintermId, nfaOffset);
foreach (int nextNfaState in nextNfaStates)
{
EnqueueIfUnseen(_builder.GetCoreState(nextNfaState), seen, toExplore);
EnqueueIfUnseen(GetState(GetCoreStateId(nextNfaState)), seen, toExplore);
}
}
}
}
}
static void EnqueueAll(DfaMatchingState<TSet>[] states, HashSet<DfaMatchingState<TSet>> seen, Queue<DfaMatchingState<TSet>> toExplore)
static void EnqueueAll(MatchingState<TSet>[] states, HashSet<MatchingState<TSet>> seen, Queue<MatchingState<TSet>> toExplore)
{
foreach (DfaMatchingState<TSet> state in states)
foreach (MatchingState<TSet> state in states)
{
EnqueueIfUnseen(state, seen, toExplore);
}
}
static void EnqueueIfUnseen(DfaMatchingState<TSet> state, HashSet<DfaMatchingState<TSet>> seen, Queue<DfaMatchingState<TSet>> queue)
static void EnqueueIfUnseen(MatchingState<TSet> state, HashSet<MatchingState<TSet>> seen, Queue<MatchingState<TSet>> queue)
{
if (seen.Add(state))
{
......
......@@ -29,11 +29,11 @@ internal sealed partial class SymbolicRegexMatcher<TSet>
/// <inheritdoc cref="Regex.SampleMatches(int, int)"/>
[ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
public override IEnumerable<string> SampleMatches(int k, int randomseed)
{
lock (this)
{
// Zero is treated as no seed, instead using a system provided one
Random random = randomseed != 0 ? new Random(randomseed) : new Random();
ISolver<TSet> solver = _builder._solver;
CharSetSolver charSetSolver = _builder._charSetSolver;
// Create helper BDDs for handling anchors and preferentially generating ASCII inputs
......@@ -47,15 +47,15 @@ public override IEnumerable<string> SampleMatches(int k, int randomseed)
BDD asciiNonWordCharacters = charSetSolver.And(ascii, charSetSolver.Not(asciiWordCharacters));
// Set up two sets of minterms, one with the additional special minterm for the last end-of-line
Debug.Assert(_builder._minterms is not null);
int[] mintermIdsWithoutZ = new int[_builder._minterms.Length];
int[] mintermIdsWithZ = new int[_builder._minterms.Length + 1];
for (int i = 0; i < _builder._minterms.Length; ++i)
Debug.Assert(_minterms is not null);
int[] mintermIdsWithoutZ = new int[_minterms.Length];
int[] mintermIdsWithZ = new int[_minterms.Length + 1];
for (int i = 0; i < _minterms.Length; ++i)
{
mintermIdsWithoutZ[i] = i;
mintermIdsWithZ[i] = i;
}
mintermIdsWithZ[_builder._minterms.Length] = _builder._minterms.Length;
mintermIdsWithZ[_minterms.Length] = _minterms.Length;
for (int i = 0; i < k; i++)
{
......@@ -64,10 +64,10 @@ public override IEnumerable<string> SampleMatches(int k, int randomseed)
StringBuilder? latestCandidate = null;
// Current set of states reached initially contains just the root
NfaMatchingState states = new(_builder);
NfaMatchingState states = new();
// Here one could also consider previous characters for example for \b, \B, and ^ anchors
// and initialize inputSoFar accordingly
states.InitializeFrom(_initialStates[GetCharKind(ReadOnlySpan<char>.Empty, -1)]);
states.InitializeFrom(this, _initialStates[GetCharKind<FullInputReader>(ReadOnlySpan<char>.Empty, -1)]);
CurrentState statesWrapper = new(states);
// Used for end suffixes
......@@ -79,29 +79,29 @@ public override IEnumerable<string> SampleMatches(int k, int randomseed)
// Gather the possible endings for satisfying nullability
possibleEndings.Clear();
if (NfaStateHandler.CanBeNullable(ref statesWrapper))
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.CanBeNullable(this, in statesWrapper))
{
// Unconditionally final state or end of the input due to \Z anchor for example
if (NfaStateHandler.IsNullable(ref statesWrapper) ||
NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.BeginningEnd))
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullable(this, in statesWrapper) ||
SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd))
{
possibleEndings.Add("");
}
// End of line due to end-of-line anchor
if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.Newline))
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.Newline))
{
possibleEndings.Add("\n");
}
// Related to wordborder due to \b or \B
if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.WordLetter))
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.WordLetter))
{
possibleEndings.Add(ChooseChar(random, asciiWordCharacters, ascii, charSetSolver).ToString());
}
// Related to wordborder due to \b or \B
if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.General))
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.General))
{
possibleEndings.Add(ChooseChar(random, asciiNonWordCharacters, ascii, charSetSolver).ToString());
}
......@@ -125,18 +125,18 @@ public override IEnumerable<string> SampleMatches(int k, int randomseed)
}
// Shuffle the minterms, including the last end-of-line marker if appropriate
int[] mintermIds = NfaStateHandler.StartsWithLineAnchor(_builder, ref statesWrapper) ?
int[] mintermIds = SymbolicRegexMatcher<TSet>.NfaStateHandler.StartsWithLineAnchor(this, in statesWrapper) ?
Shuffle(random, mintermIdsWithZ) :
Shuffle(random, mintermIdsWithoutZ);
foreach (int mintermId in mintermIds)
{
bool success = NfaStateHandler.TakeTransition(_builder, ref statesWrapper, mintermId);
bool success = SymbolicRegexMatcher<TSet>.NfaStateHandler.TryTakeTransition(this, ref statesWrapper, mintermId);
Debug.Assert(success);
if (states.NfaStateSet.Count > 0)
{
TSet minterm = _builder.GetMinterm(mintermId);
TSet minterm = GetMintermFromId(mintermId);
// Append a random member of the minterm
inputSoFar.Append(ChooseChar(random, ToBDD(minterm, solver, charSetSolver), ascii, charSetSolver));
inputSoFar.Append(ChooseChar(random, ToBDD(minterm, Solver, charSetSolver), ascii, charSetSolver));
break;
}
else
......@@ -159,6 +159,7 @@ public override IEnumerable<string> SampleMatches(int k, int randomseed)
}
}
}
}
static BDD ToBDD(TSet set, ISolver<TSet> solver, CharSetSolver charSetSolver) => solver.ConvertToBDD(set, charSetSolver);
......
......@@ -37,8 +37,8 @@ public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, Tim
}
}
rootNode = rootNode.AddFixedLengthMarkers();
BDD[] minterms = rootNode.ComputeMinterms();
rootNode = rootNode.AddFixedLengthMarkers(bddBuilder);
BDD[] minterms = rootNode.ComputeMinterms(bddBuilder);
_matcher = minterms.Length > 64 ?
SymbolicRegexMatcher<BitVector>.Create(regexTree.CaptureCount, regexTree.FindOptimizations, bddBuilder, rootNode, new BitVectorSolver(minterms, charSetSolver), matchTimeout) :
......
......@@ -64,6 +64,38 @@ public static bool TryEnsureSufficientExecutionStack()
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
/// <typeparam name="TArg4">The type of the fourth argument to pass to the function.</typeparam>
/// <param name="action">The action to invoke.</param>
/// <param name="arg1">The first argument to pass to the action.</param>
/// <param name="arg2">The second argument to pass to the action.</param>
/// <param name="arg3">The third argument to pass to the action.</param>
/// <param name="arg4">The fourth argument to pass to the action.</param>
public static void CallOnEmptyStack<TArg1, TArg2, TArg3, TArg4>(Action<TArg1, TArg2, TArg3, TArg4> action, TArg1 arg1, TArg2 arg2, TArg3 arg3, TArg4 arg4) =>
Task.Run(() => action(arg1, arg2, arg3, arg4))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
/// <typeparam name="TArg4">The type of the fourth argument to pass to the function.</typeparam>
/// <typeparam name="TArg5">The type of the fifth argument to pass to the function.</typeparam>
/// <param name="action">The action to invoke.</param>
/// <param name="arg1">The first argument to pass to the action.</param>
/// <param name="arg2">The second argument to pass to the action.</param>
/// <param name="arg3">The third argument to pass to the action.</param>
/// <param name="arg4">The fourth argument to pass to the action.</param>
/// <param name="arg5">The fifth argument to pass to the action.</param>
public static void CallOnEmptyStack<TArg1, TArg2, TArg3, TArg4, TArg5>(Action<TArg1, TArg2, TArg3, TArg4, TArg5> action, TArg1 arg1, TArg2 arg2, TArg3 arg3, TArg4 arg4, TArg5 arg5) =>
Task.Run(() => action(arg1, arg2, arg3, arg4, arg5))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
......@@ -126,5 +158,21 @@ public static bool TryEnsureSufficientExecutionStack()
Task.Run(() => func(arg1, arg2, arg3))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
/// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
/// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
/// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
/// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
/// <typeparam name="TArg4">The type of the fourth argument to pass to the function.</typeparam>
/// <typeparam name="TResult">The return type of the function.</typeparam>
/// <param name="func">The function to invoke.</param>
/// <param name="arg1">The first argument to pass to the function.</param>
/// <param name="arg2">The second argument to pass to the function.</param>
/// <param name="arg3">The third argument to pass to the function.</param>
/// <param name="arg4">The fourth argument to pass to the function.</param>
public static TResult CallOnEmptyStack<TArg1, TArg2, TArg3, TArg4, TResult>(Func<TArg1, TArg2, TArg3, TArg4, TResult> func, TArg1 arg1, TArg2 arg2, TArg3 arg3, TArg4 arg4) =>
Task.Run(() => func(arg1, arg2, arg3, arg4))
.ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
.GetAwaiter().GetResult();
}
}
......@@ -86,7 +86,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData()
{
RegexNode tree = RegexParser.Parse(Pattern, options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root;
SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree);
yield return new object[] { rootNode, ExpectedSafeSize };
yield return new object[] { bddBuilder, rootNode, ExpectedSafeSize };
}
// add .*? in front of the pattern, this adds 1 more NFA state
......@@ -94,7 +94,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData()
{
RegexNode tree = RegexParser.Parse(".*?" + Pattern, options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root;
SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree);
yield return new object[] { rootNode, 1 + ExpectedSafeSize};
yield return new object[] { bddBuilder, rootNode, 1 + ExpectedSafeSize};
}
// use of anchors increases the estimate by 5x in general but in reality much less, at most 3x
......@@ -102,7 +102,7 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData()
{
RegexNode tree = RegexParser.Parse(Pattern + "$", options | RegexOptions.ExplicitCapture, CultureInfo.CurrentCulture).Root;
SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree);
yield return new object[] { rootNode, 5 * ExpectedSafeSize };
yield return new object[] { bddBuilder, rootNode, 5 * ExpectedSafeSize };
}
// use of captures has no effect on the estimations
......@@ -110,31 +110,32 @@ public static IEnumerable<object[]> SafeThresholdTests_MemberData()
{
RegexNode tree = RegexParser.Parse(Pattern, options, CultureInfo.CurrentCulture).Root;
SymbolicRegexNode<BDD> rootNode = converter.ConvertToSymbolicRegexNode(tree);
yield return new object[] { rootNode, ExpectedSafeSize };
yield return new object[] { bddBuilder, rootNode, ExpectedSafeSize };
}
}
[Theory]
[MemberData(nameof(SafeThresholdTests_MemberData))]
public void SafeThresholdTests(object obj, int expectedSafeSize)
public void SafeThresholdTests(object builderObj, object nodeObj, int expectedSafeSize)
{
SymbolicRegexNode<BDD> node = (SymbolicRegexNode<BDD>)obj;
SymbolicRegexBuilder<BDD> builder = (SymbolicRegexBuilder<BDD>)builderObj;
SymbolicRegexNode<BDD> node = (SymbolicRegexNode<BDD>)nodeObj;
int safeSize = node.EstimateNfaSize();
Assert.Equal(expectedSafeSize, safeSize);
int nfaStateCount = CalculateNfaStateCount(node);
int nfaStateCount = CalculateNfaStateCount(builder, node);
Assert.True(nfaStateCount <= expectedSafeSize);
}
/// <summary>
/// Compute the closure of all NFA states from root and return the size of the resulting state space.
/// </summary>
private static int CalculateNfaStateCount(SymbolicRegexNode<BDD> root)
private static int CalculateNfaStateCount(SymbolicRegexBuilder<BDD> builder, SymbolicRegexNode<BDD> root)
{
// Here we are actually using the original BDD algebra (not converting to the BV or Uint64 algebra)
// because it does not matter which algebra we use here (this matters only for performance)
HashSet<(uint, SymbolicRegexNode<BDD>)> states = new();
Stack<(uint, SymbolicRegexNode<BDD>)> frontier = new();
List<BDD> minterms = MintermGenerator<BDD>.GenerateMinterms(root._builder._solver, root.GetSets());
List<BDD> minterms = MintermGenerator<BDD>.GenerateMinterms(builder._solver, root.GetSets(builder));
// Start from the initial state that has kind 'General' when no anchors are being used, else kind 'BeginningEnd'
(uint, SymbolicRegexNode<BDD>) initialState = (root._info.ContainsSomeAnchor ? CharKind.BeginningEnd : CharKind.General, root);
......@@ -150,7 +151,7 @@ private static int CalculateNfaStateCount(SymbolicRegexNode<BDD> root)
foreach (BDD minterm in minterms)
{
uint kind = GetCharKind(minterm);
SymbolicRegexNode<BDD> target = source.Node.CreateDerivativeWithoutEffects(minterm, source.Kind);
SymbolicRegexNode<BDD> target = source.Node.CreateDerivativeWithoutEffects(builder, minterm, source.Kind);
//In the case of an NFA all the different alternatives in the DFA state become individual states themselves
foreach (SymbolicRegexNode<BDD> node in GetAlternatives(target))
......@@ -169,7 +170,7 @@ private static int CalculateNfaStateCount(SymbolicRegexNode<BDD> root)
return states.Count;
// Enumerates the alternatives from a node, for eaxmple (ab|(bc|cd)) has three alternatives
static IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD> node)
IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD> node)
{
if (node._kind == SymbolicRegexNodeKind.Alternate)
{
......@@ -178,7 +179,7 @@ static IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD
foreach (SymbolicRegexNode<BDD> elem in GetAlternatives(node._right!))
yield return elem;
}
else if (!node.IsNothing) // omit deadend states
else if (!node.IsNothing(builder._solver)) // omit deadend states
{
yield return node;
}
......@@ -187,8 +188,8 @@ static IEnumerable<SymbolicRegexNode<BDD>> GetAlternatives(SymbolicRegexNode<BDD
// Simplified character kind calculation that omits the special case that minterm can be the very last \n
// This omission has practically no effect of the size of the state space, but would complicate the logic
uint GetCharKind(BDD minterm) =>
minterm.Equals(root._builder._newLineSet) ? CharKind.Newline : // is \n
(!root._builder._solver.IsEmpty(root._builder._solver.And(root._builder._wordLetterForBoundariesSet, minterm)) ?
minterm.Equals(builder._newLineSet) ? CharKind.Newline : // is \n
(!builder._solver.IsEmpty(builder._solver.And(builder._wordLetterForBoundariesSet, minterm)) ?
CharKind.WordLetter : // in \w
CharKind.General); // anything else, thus in particular in \W
}
......
......@@ -51,7 +51,7 @@
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\CharKind.cs" Link="Production\CharKind.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\CharSetSolver.cs" Link="Production\CharSetSolver.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\DerivativeEffect.cs" Link="Production\DerivativeEffect.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\DfaMatchingState.cs" Link="Production\DfaMatchingState.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\MatchingState.cs" Link="Production\MatchingState.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\DoublyLinkedList.cs" Link="Production\DoublyLinkedList.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\ISolver.cs" Link="Production\ISolver.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\MintermGenerator.cs" Link="Production\MintermGenerator.cs" />
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册