diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
index 56a71e7e8126f69861a8a518ebe8083fc1318209..94490e85c60c9bfbea8ca83952265271356f42e9 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
+++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
@@ -63,7 +63,7 @@
-
+
@@ -75,6 +75,7 @@
+
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs
index aacdf06702290b58804a88c413e8902afd7bd32a..9f21787baae2ed2a0af6ecb8c061f7045b013334 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs
@@ -43,5 +43,8 @@ internal static class CharKind
WordLetter => @"\w",
_ => string.Empty,
};
+
+ /// Returns whether the given value is in the range of valid character kinds.
+ internal static bool IsValidCharKind(uint charKind) => charKind < CharKindCount;
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/DfaMatchingState.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/DfaMatchingState.cs
deleted file mode 100644
index ecbb44415eea6a761be6094230c6d696c1135292..0000000000000000000000000000000000000000
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/DfaMatchingState.cs
+++ /dev/null
@@ -1,149 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-using System.Net;
-
-namespace System.Text.RegularExpressions.Symbolic
-{
- /// Captures a state of a DFA explored during matching.
- internal sealed class DfaMatchingState where TSet : IComparable, IEquatable
- {
- internal DfaMatchingState(SymbolicRegexNode node, uint prevCharKind)
- {
- Node = node;
- PrevCharKind = prevCharKind;
- }
-
- internal SymbolicRegexNode Node { get; }
-
- internal uint PrevCharKind { get; }
-
- internal int Id { get; set; }
-
- /// This is a deadend state
- internal bool IsDeadend => Node.IsNothing;
-
- /// The node must be nullable here
- internal int FixedLength(uint nextCharKind)
- {
- Debug.Assert(nextCharKind is 0 or CharKind.BeginningEnd or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS);
- uint context = CharKind.Context(PrevCharKind, nextCharKind);
- return Node.ResolveFixedLength(context);
- }
-
- /// If true then the state is a dead-end, rejects all inputs.
- internal bool IsNothing => Node.IsNothing;
-
- /// If true then state starts with a ^ or $ or \Z
- internal bool StartsWithLineAnchor => Node._info.StartsWithLineAnchor;
-
- ///
- /// Translates a minterm set to a character kind, which is a general categorization of characters used
- /// for cheaply deciding the nullability of anchors.
- ///
- ///
- /// An empty set is handled as a special case to indicate the very last \n.
- ///
- /// the minterm to translate
- /// the character kind of the minterm
- private uint GetNextCharKind(ref TSet minterm)
- {
- ISolver solver = Node._builder._solver;
- TSet wordLetterPredicate = Node._builder._wordLetterForBoundariesSet;
- TSet newLinePredicate = Node._builder._newLineSet;
-
- // minterm == solver.False is used to represent the very last \n
- uint nextCharKind = CharKind.General;
- if (solver.Empty.Equals(minterm))
- {
- nextCharKind = CharKind.NewLineS;
- minterm = newLinePredicate;
- }
- else if (newLinePredicate.Equals(minterm))
- {
- // If the previous state was the start state, mark this as the very FIRST \n.
- // Essentially, this looks the same as the very last \n and is used to nullify
- // rev(\Z) in the conext of a reversed automaton.
- nextCharKind = PrevCharKind == CharKind.BeginningEnd ?
- CharKind.NewLineS :
- CharKind.Newline;
- }
- else if (!solver.IsEmpty(solver.And(wordLetterPredicate, minterm)))
- {
- nextCharKind = CharKind.WordLetter;
- }
- return nextCharKind;
- }
-
- ///
- /// Compute the target state for the given input minterm.
- /// If is False this means that this is \n and it is the last character of the input.
- ///
- /// minterm corresponding to some input character or False corresponding to last \n
- internal DfaMatchingState Next(TSet minterm)
- {
- uint nextCharKind = GetNextCharKind(ref minterm);
-
- // Combined character context
- uint context = CharKind.Context(PrevCharKind, nextCharKind);
-
- // Compute the derivative of the node for the given context
- SymbolicRegexNode derivative = Node.CreateDerivativeWithoutEffects(minterm, context);
-
- // nextCharKind will be the PrevCharKind of the target state
- // use an existing state instead if one exists already
- // otherwise create a new new id for it
- return Node._builder.CreateState(derivative, nextCharKind, capturing: false);
- }
-
- ///
- /// Compute a set of transitions for the given minterm.
- ///
- /// minterm corresponding to some input character or False corresponding to last \n
- /// an enumeration of the transitions as pairs of the target state and a list of effects to be applied
- internal List<(DfaMatchingState State, DerivativeEffect[] Effects)> NfaNextWithEffects(TSet minterm)
- {
- uint nextCharKind = GetNextCharKind(ref minterm);
-
- // Combined character context
- uint context = CharKind.Context(PrevCharKind, nextCharKind);
-
- // Compute the transitions for the given context
- List<(SymbolicRegexNode, DerivativeEffect[])> nodesAndEffects = Node.CreateNfaDerivativeWithEffects(minterm, context);
-
- var list = new List<(DfaMatchingState State, DerivativeEffect[] Effects)>();
- foreach ((SymbolicRegexNode node, DerivativeEffect[]? effects) in nodesAndEffects)
- {
- // nextCharKind will be the PrevCharKind of the target state
- // use an existing state instead if one exists already
- // otherwise create a new new id for it
- DfaMatchingState state = Node._builder.CreateState(node, nextCharKind, capturing: true);
- if (!state.IsDeadend)
- list.Add((state, effects));
- }
- return list;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal bool IsNullableFor(uint nextCharKind)
- {
- Debug.Assert(nextCharKind is 0 or CharKind.BeginningEnd or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS);
- uint context = CharKind.Context(PrevCharKind, nextCharKind);
- return Node.IsNullableFor(context);
- }
-
- public override bool Equals(object? obj) =>
- obj is DfaMatchingState s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node);
-
- public override int GetHashCode() => (PrevCharKind, Node).GetHashCode();
-
-#if DEBUG
- public override string ToString() =>
- PrevCharKind == 0 ? Node.ToString() :
- $"({CharKind.DescribePrev(PrevCharKind)},{Node})";
-#endif
- }
-}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs
new file mode 100644
index 0000000000000000000000000000000000000000..38226258df4a2a7fe326c623d2340951aec36344
--- /dev/null
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs
@@ -0,0 +1,118 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Net;
+
+namespace System.Text.RegularExpressions.Symbolic
+{
+ /// Captures a state explored during matching.
+ internal sealed class MatchingState where TSet : IComparable, IEquatable
+ {
+ internal MatchingState(SymbolicRegexNode node, uint prevCharKind)
+ {
+ Node = node;
+ PrevCharKind = prevCharKind;
+ }
+
+ /// The regular expression that labels this state and gives it its semantics.
+ internal SymbolicRegexNode Node { get; }
+
+ ///
+ /// The kind of the previous character in the input. The is responsible
+ /// for ensuring that in all uses of this state this invariant holds by both selecting initial states accordingly
+ /// and transitioning on each character to states that match that character's kind.
+ ///
+ ///
+ /// Tracking this information is an optimization that allows each transition taken in the matcher to only depend
+ /// on the next character (and its kind). In general, the transitions from a state with anchors in its pattern
+ /// depend on both the previous and the next character. Creating distinct states for each kind of the previous
+ /// character embeds the necessary information about the previous character into the state space of the automaton.
+ /// However, this does incur a memory overhead due to the duplication of states. For patterns with no anchors
+ /// this will always be set to , which can reduce the number of states created.
+ ///
+ /// The performance effect of this optimization has not been investigated. If this optimization were removed, the
+ /// transition logic would in turn have to become more complicated for derivatives that depend on the nullability
+ /// of anchors. Care should be taken to not slow down transitions without anchors involved.
+ ///
+ internal uint PrevCharKind { get; }
+
+ ///
+ /// A unique identifier for this state, which is used in to index into
+ /// state information and transition arrays. Valid IDs are always >= 1.
+ ///
+ internal int Id { get; set; }
+
+ /// Whether this state is known to be a dead end, i.e. no nullable states are reachable from here.
+ internal bool IsDeadend(ISolver solver) => Node.IsNothing(solver);
+
+ ///
+ /// Returns the fixed length that any match ending with this state must have, or -1 if there is no such
+ /// fixed length, . The context is defined
+ /// by of this state and the given nextCharKind. The node must be nullable here.
+ ///
+ internal int FixedLength(uint nextCharKind)
+ {
+ Debug.Assert(IsNullableFor(nextCharKind));
+ Debug.Assert(CharKind.IsValidCharKind(nextCharKind));
+ uint context = CharKind.Context(PrevCharKind, nextCharKind);
+ return Node.ResolveFixedLength(context);
+ }
+
+ /// If true then state starts with a ^ or $ or \Z
+ internal bool StartsWithLineAnchor => Node._info.StartsWithLineAnchor;
+
+ ///
+ /// Compute the target state for the given input minterm.
+ /// If is False this means that this is \n and it is the last character of the input.
+ ///
+ /// the builder that owns
+ /// minterm corresponding to some input character or False corresponding to last \n
+ ///
+ internal SymbolicRegexNode Next(SymbolicRegexBuilder builder, TSet minterm, uint nextCharKind)
+ {
+ // Combined character context
+ uint context = CharKind.Context(PrevCharKind, nextCharKind);
+
+ // Compute the derivative of the node for the given context
+ return Node.CreateDerivativeWithoutEffects(builder, minterm, context);
+ }
+
+ ///
+ /// Compute a set of transitions for the given minterm.
+ ///
+ /// the builder that owns
+ /// minterm corresponding to some input character or False corresponding to last \n
+ ///
+ /// an enumeration of the transitions as pairs of the target state and a list of effects to be applied
+ internal List<(SymbolicRegexNode Node, DerivativeEffect[] Effects)> NfaNextWithEffects(SymbolicRegexBuilder builder, TSet minterm, uint nextCharKind)
+ {
+ // Combined character context
+ uint context = CharKind.Context(PrevCharKind, nextCharKind);
+
+ // Compute the transitions for the given context
+ return Node.CreateNfaDerivativeWithEffects(builder, minterm, context);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal bool IsNullableFor(uint nextCharKind)
+ {
+ Debug.Assert(CharKind.IsValidCharKind(nextCharKind));
+ uint context = CharKind.Context(PrevCharKind, nextCharKind);
+ return Node.IsNullableFor(context);
+ }
+
+ public override bool Equals(object? obj) =>
+ obj is MatchingState s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node);
+
+ public override int GetHashCode() => (PrevCharKind, Node).GetHashCode();
+
+#if DEBUG
+ public override string ToString() =>
+ PrevCharKind == 0 ? Node.ToString() :
+ $"({CharKind.DescribePrev(PrevCharKind)},{Node})";
+#endif
+ }
+}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeConverter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeConverter.cs
index 8c4fd992f9352de10da7ce3d3580042981fd59b6..857b8d51972645782a2fc39dbf7995c48ac26906 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeConverter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeConverter.cs
@@ -240,12 +240,12 @@ static string UnexpectedNodeType(RegexNode node)
SymbolicRegexNode elem = childResult.Count == 1 ?
childResult.FirstElement :
_builder.CreateConcatAlreadyReversed(childResult);
- if (elem.IsNothing)
+ if (elem.IsNothing(_builder._solver))
{
continue;
}
- or = elem.IsAnyStar ?
+ or = elem.IsAnyStar(_builder._solver) ?
elem : // .* is the absorbing element
SymbolicRegexNode.CreateAlternate(_builder, elem, or);
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs
index 3ba759e2b04fd7dcb72b50acd89954ec4ab16563..eceaadd247eaab80098dd61f27700459c5839e1f 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs
@@ -30,43 +30,34 @@ internal sealed class SymbolicRegexBuilder where TSet : IComparable,
internal SymbolicRegexNode Epsilon => _epsilon ??= SymbolicRegexNode.CreateEpsilon(this);
private SymbolicRegexNode? _beginningAnchor;
- internal SymbolicRegexNode BeginningAnchor => _beginningAnchor ??= SymbolicRegexNode.CreateBeginEndAnchor(this, SymbolicRegexNodeKind.BeginningAnchor);
+ internal SymbolicRegexNode BeginningAnchor => _beginningAnchor ??= SymbolicRegexNode.CreateAnchor(this, SymbolicRegexNodeKind.BeginningAnchor);
private SymbolicRegexNode? _endAnchor;
- internal SymbolicRegexNode EndAnchor => _endAnchor ??= SymbolicRegexNode.CreateBeginEndAnchor(this, SymbolicRegexNodeKind.EndAnchor);
+ internal SymbolicRegexNode EndAnchor => _endAnchor ??= SymbolicRegexNode.CreateAnchor(this, SymbolicRegexNodeKind.EndAnchor);
private SymbolicRegexNode? _endAnchorZ;
- internal SymbolicRegexNode EndAnchorZ => _endAnchorZ ??= SymbolicRegexNode.CreateBeginEndAnchor(this, SymbolicRegexNodeKind.EndAnchorZ);
+ internal SymbolicRegexNode EndAnchorZ => _endAnchorZ ??= SymbolicRegexNode.CreateAnchor(this, SymbolicRegexNodeKind.EndAnchorZ);
private SymbolicRegexNode? _endAnchorZReverse;
- internal SymbolicRegexNode EndAnchorZReverse => _endAnchorZReverse ??= SymbolicRegexNode.CreateBeginEndAnchor(this, SymbolicRegexNodeKind.EndAnchorZReverse);
+ internal SymbolicRegexNode EndAnchorZReverse => _endAnchorZReverse ??= SymbolicRegexNode.CreateAnchor(this, SymbolicRegexNodeKind.EndAnchorZReverse);
private SymbolicRegexNode? _bolAnchor;
- internal SymbolicRegexNode BolAnchor => _bolAnchor ??= SymbolicRegexNode.CreateBeginEndAnchor(this, SymbolicRegexNodeKind.BOLAnchor);
+ internal SymbolicRegexNode BolAnchor => _bolAnchor ??= SymbolicRegexNode.CreateAnchor(this, SymbolicRegexNodeKind.BOLAnchor);
private SymbolicRegexNode? _eolAnchor;
- internal SymbolicRegexNode EolAnchor => _eolAnchor ??= SymbolicRegexNode.CreateBeginEndAnchor(this, SymbolicRegexNodeKind.EOLAnchor);
+ internal SymbolicRegexNode EolAnchor => _eolAnchor ??= SymbolicRegexNode.CreateAnchor(this, SymbolicRegexNodeKind.EOLAnchor);
private SymbolicRegexNode? _wbAnchor;
- internal SymbolicRegexNode BoundaryAnchor => _wbAnchor ??= SymbolicRegexNode.CreateBoundaryAnchor(this, SymbolicRegexNodeKind.BoundaryAnchor);
+ internal SymbolicRegexNode BoundaryAnchor => _wbAnchor ??= SymbolicRegexNode.CreateAnchor(this, SymbolicRegexNodeKind.BoundaryAnchor);
private SymbolicRegexNode? _nwbAnchor;
- internal SymbolicRegexNode NonBoundaryAnchor => _nwbAnchor ??= SymbolicRegexNode.CreateBoundaryAnchor(this, SymbolicRegexNodeKind.NonBoundaryAnchor);
+ internal SymbolicRegexNode NonBoundaryAnchor => _nwbAnchor ??= SymbolicRegexNode.CreateAnchor(this, SymbolicRegexNodeKind.NonBoundaryAnchor);
internal TSet _wordLetterForBoundariesSet;
internal TSet _newLineSet;
- /// Partition of the input space of sets.
- internal TSet[]? _minterms;
-
private readonly Dictionary> _singletonCache = new();
- // states that have been created
- internal HashSet> _stateCache = new();
-
- // capturing states that have been created
- internal HashSet> _capturingStateCache = new();
-
///
/// This cache is used in to keep all nodes associated with this builder
/// unique. This ensures that reference equality can be used for syntactic equality and that all shared subexpressions
@@ -84,7 +75,7 @@ internal sealed class SymbolicRegexBuilder where TSet : IComparable,
// matching when simplification rules fail to eliminate the portions being walked over.
///
- /// Cache for keyed by:
+ /// Cache for keyed by:
/// -The node to derivate
/// -The character or minterm to take the derivative with
/// -The surrounding character context
@@ -93,7 +84,7 @@ internal sealed class SymbolicRegexBuilder where TSet : IComparable,
internal readonly Dictionary<(SymbolicRegexNode, TSet elem, uint context), SymbolicRegexNode> _derivativeCache = new();
///
- /// Cache for keyed by:
+ /// Cache for keyed by:
/// -The node to prune
/// -The surrounding character context
/// The value is the pruned node.
@@ -101,74 +92,13 @@ internal sealed class SymbolicRegexBuilder where TSet : IComparable,
internal readonly Dictionary<(SymbolicRegexNode, uint), SymbolicRegexNode> _pruneLowerPriorityThanNullabilityCache = new();
///
- /// Cache for keyed by:
+ /// Cache for keyed by:
/// -The node R potentially subsuming S
/// -The node S potentially being subsumed by R
/// The value indicates if subsumption is known to hold.
///
internal readonly Dictionary<(SymbolicRegexNode, SymbolicRegexNode), bool> _subsumptionCache = new();
- ///
- /// Maps state ids to states, initial capacity is 1024 states.
- /// Each time more states are needed the length is increased by 1024.
- ///
- internal DfaMatchingState[]? _stateArray;
- internal DfaMatchingState[]? _capturingStateArray;
-
- ///
- /// Maps state IDs to context-independent information for all states in .
- ///
- private ContextIndependentState[] _stateInfo = Array.Empty();
-
- /// Context-independent information available for every state.
- [Flags]
- private enum ContextIndependentState : byte
- {
- IsInitial = 1,
- IsDeadend = 2,
- IsNullable = 4,
- CanBeNullable = 8,
- }
-
- ///
- /// For these "delta" arrays, technically Volatile.Read should be used to read out an element,
- /// but in practice that's not needed on the runtimes in use (though that needs to be documented
- /// via https://github.com/dotnet/runtime/issues/63474), and use of Volatile.Read is
- /// contributing non-trivial overhead (https://github.com/dotnet/runtime/issues/65789).
- ///
- internal int[]? _delta;
- internal List<(DfaMatchingState, DerivativeEffect[])>?[]? _capturingDelta;
- private const int InitialStateLimit = 1024;
-
- /// 1 + Log2(_minterms.Length), the smallest k s.t. 2^k >= minterms.Length + 1
- internal int _mintermsLog;
-
- ///
- /// Maps each NFA state id to the state id of the DfaMatchingState stored in _stateArray.
- /// This map is used to compactly represent NFA state ids in NFA mode in order to utilize
- /// the property that all NFA states are small integers in one interval.
- /// The valid entries are 0 to -1.
- ///
- internal int[] _nfaStateArray = Array.Empty();
-
- ///
- /// Maps the id of a DfaMatchingState to the NFA state id that it is being identifed with in the NFA.
- /// It is the inverse of used entries in _nfaStateArray.
- /// The range of this map is 0 to -1.
- ///
- internal readonly Dictionary _nfaStateArrayInverse = new();
-
- /// Gets .Count
- internal int NfaStateCount => _nfaStateArrayInverse.Count;
-
- ///
- /// Transition function for NFA transitions in NFA mode.
- /// Each NFA entry maps to a list of NFA target states.
- /// Each list of target states is without repetitions.
- /// If the entry is null then the targets states have not been computed yet.
- ///
- internal int[]?[] _nfaDelta = Array.Empty();
-
/// Create a new symbolic regex builder.
internal SymbolicRegexBuilder(ISolver solver, CharSetSolver charSetSolver)
{
@@ -176,24 +106,6 @@ internal SymbolicRegexBuilder(ISolver solver, CharSetSolver charSetSolver)
_charSetSolver = charSetSolver;
_solver = solver;
- // minterms = null if partition of the solver is undefined and returned as null
- _minterms = solver.GetMinterms();
- if (_minterms == null)
- {
- _mintermsLog = -1;
- }
- else
- {
- _stateArray = new DfaMatchingState[InitialStateLimit];
- _capturingStateArray = new DfaMatchingState[InitialStateLimit];
- _stateInfo = new ContextIndependentState[InitialStateLimit];
-
- // the extra +1 slot with id minterms.Length is reserved for \Z (last occurrence of \n)
- _mintermsLog = BitOperations.Log2((uint)_minterms.Length) + 1;
- _delta = new int[InitialStateLimit << _mintermsLog];
- _capturingDelta = new List<(DfaMatchingState, DerivativeEffect[])>[InitialStateLimit << _mintermsLog];
- }
-
// initialized to False but updated later to the actual condition ony if \b or \B occurs anywhere in the regex
// this implies that if a regex never uses \b or \B then the character context will never
// update the previous character context to distinguish word and nonword letters
@@ -213,94 +125,6 @@ internal SymbolicRegexBuilder(ISolver solver, CharSetSolver charSetSolver)
_singletonCache[_solver.Full] = _anyChar;
}
- /// Assign the context-independent information for the given state.
- internal void SetStateInfo(int stateId, bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable)
- {
- Debug.Assert(stateId > 0);
- Debug.Assert(!isNullable || canBeNullable);
-
- ContextIndependentState info = 0;
-
- if (isInitial)
- {
- info |= ContextIndependentState.IsInitial;
- }
-
- if (isDeadend)
- {
- info |= ContextIndependentState.IsDeadend;
- }
-
- if (canBeNullable)
- {
- info |= ContextIndependentState.CanBeNullable;
- if (isNullable)
- {
- info |= ContextIndependentState.IsNullable;
- }
- }
-
- _stateInfo[stateId] = info;
- }
-
- /// Get context-independent information for the given state.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(int stateId)
- {
- Debug.Assert(stateId > 0);
-
- ContextIndependentState info = _stateInfo[stateId];
- return ((info & ContextIndependentState.IsInitial) != 0,
- (info & ContextIndependentState.IsDeadend) != 0,
- (info & ContextIndependentState.IsNullable) != 0,
- (info & ContextIndependentState.CanBeNullable) != 0);
- }
-
- /// Lookup the actual minterm based on its ID.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal TSet GetMinterm(int mintermId)
- {
- TSet[]? minterms = _minterms;
- Debug.Assert(minterms is not null);
- return (uint)mintermId < (uint)minterms.Length ?
- minterms[mintermId] :
- _solver.Empty; // minterm=False represents \Z
- }
-
- /// Returns the span from that may contain transitions for the given state
- internal Span GetDeltasFor(DfaMatchingState state)
- {
- if (_delta is null || _minterms is null)
- {
- return default;
- }
-
- int numMinterms = _minterms.Length;
- if (state.StartsWithLineAnchor)
- {
- numMinterms++;
- }
-
- return _delta.AsSpan(state.Id << _mintermsLog, numMinterms);
- }
-
- /// Returns the span from that may contain transitions for the given state
- internal Span GetNfaDeltasFor(DfaMatchingState state)
- {
- if (_nfaDelta is null || _minterms is null || !_nfaStateArrayInverse.TryGetValue(state.Id, out int nfaState))
- {
- return default;
- }
-
- int numMinterms = _minterms.Length;
- if (state.StartsWithLineAnchor)
- {
- numMinterms++;
- }
-
- return _nfaDelta.AsSpan(nfaState << _mintermsLog, numMinterms);
- }
-
///
/// Make an alternation of given nodes, simplify by eliminating any regex that accepts no inputs
///
@@ -509,224 +333,5 @@ internal SymbolicRegexNode Transform(SymbolicRegexNode n
return null;
}
}
-
- ///
- /// Create a state with given node and previous character context.
- ///
- /// the pattern that this state will represent
- /// the kind of the character that led to this state
- /// whether to use the separate space of states with capturing transitions or not
- /// whether to mark the state as an initial state or not
- ///
- public DfaMatchingState CreateState(SymbolicRegexNode node, uint prevCharKind, bool capturing = false, bool isInitialState = false)
- {
- //first prune the anchors in the node
- TSet wlbSet = _wordLetterForBoundariesSet;
- TSet startSet = node.GetStartSet();
-
- //true if the startset of the node overlaps with some wordletter or the node can be nullable
- bool contWithWL = node.CanBeNullable || !_solver.IsEmpty(_solver.And(wlbSet, startSet));
-
- //true if the startset of the node overlaps with some nonwordletter or the node can be nullable
- bool contWithNWL = node.CanBeNullable || !_solver.IsEmpty(_solver.And(_solver.Not(wlbSet), startSet));
- SymbolicRegexNode pruned_node = node.PruneAnchors(prevCharKind, contWithWL, contWithNWL);
- var s = new DfaMatchingState(pruned_node, prevCharKind);
- if (!(capturing ? _capturingStateCache : _stateCache).TryGetValue(s, out DfaMatchingState? state))
- {
- state = MakeNewState(s, capturing, isInitialState);
- }
-
- return state;
- }
-
- private DfaMatchingState MakeNewState(DfaMatchingState state, bool capturing, bool isInitialState)
- {
- lock (this)
- {
- HashSet> cache = capturing ? _capturingStateCache : _stateCache;
- cache.Add(state); // Add to cache first to make 1 the first state ID
- state.Id = cache.Count;
-
- Debug.Assert(_stateArray is not null && _capturingStateArray is not null);
-
- const int GrowthSize = 1024;
- if (capturing)
- {
- if (state.Id == _capturingStateArray.Length)
- {
- int newsize = _capturingStateArray.Length + GrowthSize;
- Array.Resize(ref _capturingStateArray, newsize);
- Array.Resize(ref _capturingDelta, newsize << _mintermsLog);
- }
- _capturingStateArray[state.Id] = state;
- }
- else
- {
- if (state.Id == _stateArray.Length)
- {
- int newsize = _stateArray.Length + GrowthSize;
- Array.Resize(ref _stateArray, newsize);
- Array.Resize(ref _delta, newsize << _mintermsLog);
- Array.Resize(ref _stateInfo, newsize);
- }
- _stateArray[state.Id] = state;
- SetStateInfo(state.Id, isInitialState, state.IsDeadend, state.Node.IsNullable, state.Node.CanBeNullable);
- }
- return state;
- }
- }
-
- ///
- /// Make an NFA state for the given node and previous character kind.
- ///
- public int CreateNfaState(SymbolicRegexNode node, uint prevCharKind)
- {
- Debug.Assert(node.Kind != SymbolicRegexNodeKind.Alternate);
-
- // First make the underlying core state
- DfaMatchingState coreState = CreateState(node, prevCharKind);
-
- if (!_nfaStateArrayInverse.TryGetValue(coreState.Id, out int nfaStateId))
- {
- nfaStateId = MakeNewNfaState(coreState.Id);
- }
-
- return nfaStateId;
- }
-
- /// Critical region that creates a new NFA state for the underlying core state
- private int MakeNewNfaState(int coreStateId)
- {
- lock (this)
- {
- if (NfaStateCount == _nfaStateArray.Length)
- {
- // TBD: is 1024 reasonable?
- int newsize = _nfaStateArray.Length + 1024;
- Array.Resize(ref _nfaStateArray, newsize);
- Array.Resize(ref _nfaDelta, newsize << _mintermsLog);
- // TBD: capturing
- }
-
- int nfaStateId = NfaStateCount;
- _nfaStateArray[nfaStateId] = coreStateId;
- _nfaStateArrayInverse[coreStateId] = nfaStateId;
- return nfaStateId;
- }
- }
-
- /// Gets the core state Id corresponding to the NFA state
- public int GetCoreStateId(int nfaStateId)
- {
- Debug.Assert(_stateArray is not null);
- Debug.Assert(nfaStateId < _nfaStateArray.Length);
- Debug.Assert(_nfaStateArray[nfaStateId] < _stateArray.Length);
- return _nfaStateArray[nfaStateId];
- }
-
- /// Gets the core state corresponding to the NFA state
- public DfaMatchingState GetCoreState(int nfaStateId)
- {
- Debug.Assert(_stateArray is not null);
- return _stateArray[GetCoreStateId(nfaStateId)];
- }
-
- /// Critical region for defining a new core transition
- public DfaMatchingState CreateNewTransition(DfaMatchingState sourceState, int mintermId, int offset)
- {
- TryCreateNewTransition(sourceState, mintermId, offset, checkThreshold: false, out DfaMatchingState? nextState);
- Debug.Assert(nextState is not null);
- return nextState;
- }
-
- /// Gets or creates a new DFA transition.
- public bool TryCreateNewTransition(
- DfaMatchingState sourceState, int mintermId, int offset, bool checkThreshold, [NotNullWhen(true)] out DfaMatchingState? nextState)
- {
- Debug.Assert(_delta is not null && _stateArray is not null);
- lock (this)
- {
- Debug.Assert(offset < _delta.Length);
-
- // check if meanwhile delta[offset] has become defined possibly by another thread
- DfaMatchingState? targetState = _stateArray[_delta[offset]];
- if (targetState is null)
- {
- if (checkThreshold && _stateCache.Count >= SymbolicRegexThresholds.NfaThreshold)
- {
- nextState = null;
- return false;
- }
-
- targetState = sourceState.Next(GetMinterm(mintermId));
- Volatile.Write(ref _delta[offset], targetState.Id);
- }
-
- nextState = targetState;
- return true;
- }
- }
-
- /// Gets or creates a new NFA transition.
- public int[] CreateNewNfaTransition(int nfaStateId, int mintermId, int nfaOffset)
- {
- Debug.Assert(_delta is not null && _stateArray is not null);
- lock (this)
- {
- Debug.Assert(nfaOffset < _nfaDelta.Length);
-
- // check if meanwhile the nfaoffset has become defined possibly by another thread
- int[]? targets = _nfaDelta[nfaOffset];
- if (targets is null)
- {
- // Create the underlying transition from the core state corresponding to the nfa state
- DfaMatchingState coreState = GetCoreState(nfaStateId);
- int coreOffset = (coreState.Id << _mintermsLog) | mintermId;
- int coreTargetId = _delta[coreOffset];
- DfaMatchingState? coreTarget = coreTargetId > 0 ?
- _stateArray[coreTargetId] : CreateNewTransition(coreState, mintermId, coreOffset);
-
- SymbolicRegexNode node = coreTarget.Node.Kind == SymbolicRegexNodeKind.DisableBacktrackingSimulation ?
- coreTarget.Node._left! : coreTarget.Node;
- if (node.Kind == SymbolicRegexNodeKind.Alternate)
- {
- // Create separate NFA states for all members of a disjunction
- // Here duplicate NFA states cannot arise because there are no duplicate nodes in the disjunction
- List> alts = node.ToList(listKind: SymbolicRegexNodeKind.Alternate);
- targets = new int[alts.Count];
- int targetIndex = 0;
- foreach (SymbolicRegexNode q in alts)
- {
- Debug.Assert(!q.IsNothing);
- // Re-wrap the element nodes in DisableBacktrackingSimulation if the top level node was too
- SymbolicRegexNode targetNode = coreTarget.Node.Kind == SymbolicRegexNodeKind.DisableBacktrackingSimulation ?
- CreateDisableBacktrackingSimulation(q) : q;
- targets[targetIndex++] = CreateNfaState(targetNode, coreTarget.PrevCharKind);
- }
- Debug.Assert(targetIndex == targets.Length);
- }
- else if (coreTarget.IsDeadend)
- {
- // Omit deadend states from the target list of states
- // target list being empty means that the NFA state itself is a deadend
- targets = Array.Empty();
- }
- else
- {
- // Add the single NFA target state correponding to the core target state
- if (!_nfaStateArrayInverse.TryGetValue(coreTarget.Id, out int nfaTargetId))
- {
- nfaTargetId = MakeNewNfaState(coreTarget.Id);
- }
-
- targets = new[] { nfaTargetId };
- }
-
- Volatile.Write(ref _nfaDelta[nfaOffset], targets);
- }
-
- return targets;
- }
- }
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs
index cd333942b491a4d3259c57c7008a1e259b1bd2cb..ff95195292bfa476c695058df51074017f8f5255 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs
@@ -1,6 +1,8 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
+using System.Diagnostics;
+
namespace System.Text.RegularExpressions.Symbolic
{
/// Misc information of structural properties of a that is computed bottom up.
@@ -14,54 +16,34 @@ namespace System.Text.RegularExpressions.Symbolic
private const uint StartsWithSomeAnchorMask = 32;
private const uint IsHighPriorityNullableMask = 64;
private const uint ContainsEffectMask = 128;
+ private const uint ContainsLineAnchorMask = 256;
private readonly uint _info;
private SymbolicRegexInfo(uint i) => _info = i;
- internal static SymbolicRegexInfo Create(
+ private static SymbolicRegexInfo Create(
bool isAlwaysNullable = false, bool canBeNullable = false,
- bool startsWithLineAnchor = false, bool startsWithSomeAnchor = false, bool containsSomeAnchor = false,
+ bool startsWithLineAnchor = false, bool containsLineAnchor = false,
+ bool startsWithSomeAnchor = false, bool containsSomeAnchor = false,
bool isHighPriorityNullable = false, bool containsEffect = false)
{
- uint i = 0;
-
- if (canBeNullable || isAlwaysNullable)
- {
- i |= CanBeNullableMask;
-
- if (isAlwaysNullable)
- {
- i |= IsAlwaysNullableMask;
- }
- }
-
- if (containsSomeAnchor || startsWithLineAnchor || startsWithSomeAnchor)
- {
- i |= ContainsSomeAnchorMask;
-
- if (startsWithLineAnchor)
- {
- i |= StartsWithLineAnchorMask;
- }
-
- if (startsWithLineAnchor || startsWithSomeAnchor)
- {
- i |= StartsWithSomeAnchorMask;
- }
- }
-
- if (isHighPriorityNullable)
- {
- i |= IsHighPriorityNullableMask;
- }
-
- if (containsEffect)
- {
- i |= ContainsEffectMask;
- }
-
- return new SymbolicRegexInfo(i);
+ // Assert that the expected implications hold. For example, every node that contains a line anchor
+ // must also be marked as containing some anchor.
+ Debug.Assert(!isAlwaysNullable || canBeNullable);
+ Debug.Assert(!startsWithLineAnchor || containsLineAnchor);
+ Debug.Assert(!startsWithLineAnchor || startsWithSomeAnchor);
+ Debug.Assert(!containsLineAnchor || containsSomeAnchor);
+ Debug.Assert(!startsWithSomeAnchor || containsSomeAnchor);
+ return new SymbolicRegexInfo(
+ (isAlwaysNullable ? IsAlwaysNullableMask : 0) |
+ (canBeNullable ? CanBeNullableMask : 0) |
+ (startsWithLineAnchor ? StartsWithLineAnchorMask : 0) |
+ (containsLineAnchor ? ContainsLineAnchorMask : 0) |
+ (startsWithSomeAnchor ? StartsWithSomeAnchorMask : 0) |
+ (containsSomeAnchor ? ContainsSomeAnchorMask : 0) |
+ (isHighPriorityNullable ? IsHighPriorityNullableMask : 0) |
+ (containsEffect ? ContainsEffectMask : 0));
}
public bool IsNullable => (_info & IsAlwaysNullableMask) != 0;
@@ -70,6 +52,8 @@ namespace System.Text.RegularExpressions.Symbolic
public bool StartsWithLineAnchor => (_info & StartsWithLineAnchorMask) != 0;
+ public bool ContainsLineAnchor => (_info & ContainsLineAnchorMask) != 0;
+
public bool StartsWithSomeAnchor => (_info & StartsWithSomeAnchorMask) != 0;
public bool ContainsSomeAnchor => (_info & ContainsSomeAnchorMask) != 0;
@@ -80,6 +64,27 @@ namespace System.Text.RegularExpressions.Symbolic
public bool ContainsEffect => (_info & ContainsEffectMask) != 0;
+ ///
+ /// Used for any node that acts as an epsilon, i.e., something that always matches the empty string.
+ ///
+ public static SymbolicRegexInfo Epsilon() =>
+ Create(
+ isAlwaysNullable: true,
+ canBeNullable: true,
+ isHighPriorityNullable: true);
+
+ ///
+ /// Used for all anchors.
+ ///
+ /// whether this anchor is a line anchor
+ public static SymbolicRegexInfo Anchor(bool isLineAnchor) =>
+ Create(
+ canBeNullable: true,
+ startsWithLineAnchor: isLineAnchor,
+ containsLineAnchor: isLineAnchor,
+ startsWithSomeAnchor: true,
+ containsSomeAnchor: true);
+
///
/// The alternation remains high priority nullable if the left alternative is so.
/// All other info properties are the logical disjunction of the resepctive info properties
@@ -90,6 +95,7 @@ namespace System.Text.RegularExpressions.Symbolic
isAlwaysNullable: left_info.IsNullable || right_info.IsNullable,
canBeNullable: left_info.CanBeNullable || right_info.CanBeNullable,
startsWithLineAnchor: left_info.StartsWithLineAnchor || right_info.StartsWithLineAnchor,
+ containsLineAnchor: left_info.ContainsLineAnchor || right_info.ContainsLineAnchor,
startsWithSomeAnchor: left_info.StartsWithSomeAnchor || right_info.StartsWithSomeAnchor,
containsSomeAnchor: left_info.ContainsSomeAnchor || right_info.ContainsSomeAnchor,
isHighPriorityNullable: left_info.IsHighPriorityNullable,
@@ -105,6 +111,7 @@ namespace System.Text.RegularExpressions.Symbolic
isAlwaysNullable: left_info.IsNullable && right_info.IsNullable,
canBeNullable: left_info.CanBeNullable && right_info.CanBeNullable,
startsWithLineAnchor: left_info.StartsWithLineAnchor || (left_info.CanBeNullable && right_info.StartsWithLineAnchor),
+ containsLineAnchor: left_info.ContainsLineAnchor || right_info.ContainsLineAnchor,
startsWithSomeAnchor: left_info.StartsWithSomeAnchor || (left_info.CanBeNullable && right_info.StartsWithSomeAnchor),
containsSomeAnchor: left_info.ContainsSomeAnchor || right_info.ContainsSomeAnchor,
isHighPriorityNullable: left_info.IsHighPriorityNullable && right_info.IsHighPriorityNullable,
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexKind.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexKind.cs
index ea1f68075000b64c44dd496b705560d3b269be82..bc01b913f7ce63045fed4ff9f2fb6316fcf0c56d 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexKind.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexKind.cs
@@ -48,7 +48,7 @@ internal enum SymbolicRegexNodeKind
/// Effects to be applied when taking a transition.
///
/// Left child is the pattern itself and the right child is a concatenation of nodes whose effects should be applied.
- /// Effect nodes are created in the rule for concatenation in ,
+ /// Effect nodes are created in the rule for concatenation in ,
/// where they are used to represent additional operations that should be performed in the current position if
/// the pattern in the left child is used to match the input. Since these Effect nodes are relative to the current
/// position in the input, the effects from the right child must be applied in the transition that the derivative is
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs
new file mode 100644
index 0000000000000000000000000000000000000000..9912da4da8ef39c801cf288710b839072ccc65a0
--- /dev/null
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs
@@ -0,0 +1,441 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace System.Text.RegularExpressions.Symbolic
+{
+ internal sealed partial class SymbolicRegexMatcher
+ {
+ ///
+ /// Initial capacity for DFA related arrays.
+ ///
+ private const int InitialDfaStateCapacity = 1024;
+
+ ///
+ /// Minimum capacity for NFA related arrays when the matcher first enters NFA mode. The arrays start out empty,
+ /// but are resized to this capacity upon first use.
+ ///
+ private const int InitialNfaStateCapacity = 64;
+
+ ///
+ /// Cache for the states that have been created. Each state is uniquely identified by its associated
+ /// and the kind of the previous character.
+ ///
+ private readonly Dictionary<(SymbolicRegexNode Node, uint PrevCharKind), MatchingState> _stateCache = new();
+
+ ///
+ /// Maps state ids to states, initial capacity is given by .
+ /// Each time more states are needed the length is doubled.
+ /// The first valid state is at index 1.
+ ///
+ private MatchingState?[] _stateArray;
+
+ ///
+ /// Maps state IDs to context-independent information for all states in .
+ /// The first valid entry is at index 1.
+ ///
+ private ContextIndependentState[] _stateInfo;
+
+ /// Context-independent information available for every state.
+ [Flags]
+ private enum ContextIndependentState : byte
+ {
+ IsInitial = 1,
+ IsDeadend = 2,
+ IsNullable = 4,
+ CanBeNullable = 8,
+ }
+
+ ///
+ /// The transition function for DFA mode.
+ /// Each state has a range of consecutive entries for each minterm ID. A range of size 2^L, where L is
+ /// the number of bits required to represent the largest minterm ID , is reserved
+ /// for each state. This makes indexing into this array not require a multiplication
+ /// , but does mean some unused space may be present.
+ /// The first valid state ID is 1.
+ ///
+ ///
+ /// For these "delta" arrays, technically Volatile.Read should be used to read out an element,
+ /// but in practice that's not needed on the runtimes in use (though that needs to be documented
+ /// via https://github.com/dotnet/runtime/issues/63474), and use of Volatile.Read is
+ /// contributing non-trivial overhead (https://github.com/dotnet/runtime/issues/65789).
+ ///
+ private int[] _dfaDelta;
+
+ ///
+ /// Maps each NFA state id to the state id of the MatchingState stored in _stateArray.
+ /// This map is used to compactly represent NFA state ids in NFA mode in order to utilize
+ /// the property that all NFA states are small integers in one interval.
+ /// The valid entries are 0 to the size of - 1.
+ ///
+ private int[] _nfaCoreIdArray = Array.Empty();
+
+ ///
+ /// Maps the id of a MatchingState to the NFA state id that it is being identifed with in the NFA.
+ /// It is the inverse of used entries in _nfaStateArray.
+ /// The range of this map is 0 to its size - 1.
+ ///
+ private readonly Dictionary _nfaIdByCoreId = new();
+
+ ///
+ /// Transition function for NFA transitions in NFA mode.
+ /// Each NFA entry maps to a list of NFA target states.
+ /// Each list of target states is without repetitions.
+ /// If the entry is null then the targets states have not been computed yet.
+ ///
+ private int[]?[] _nfaDelta = Array.Empty();
+
+ ///
+ /// The transition function for ,
+ /// which is an NFA mode with additional state to track capture start and end positions.
+ /// Each entry is an array of pairs of target state and effects to be applied when taking the transition.
+ /// If the entry is null then the transition has not been computed yet.
+ ///
+ private (int, DerivativeEffect[])[]?[] _capturingNfaDelta = Array.Empty<(int, DerivativeEffect[])[]?>();
+
+ ///
+ /// Implements a version of that is guaranteed to not publish an array before values
+ /// have been copied over.
+ ///
+ ///
+ /// This may not be strictly necessary for arrays of primitive or reference types (which have atomic
+ /// reads/writes), as when, e.g., is found to not have an entry the array is checked again
+ /// after a lock on the matcher has been acquired. However, in a highly threaded use case it still seems better
+ /// to avoid unnecessarily causing other threads to acquire the lock.
+ ///
+ private static void ArrayResizeAndVolatilePublish(ref T[] array, int newSize)
+ {
+ Debug.Assert(newSize >= array.Length);
+ T[] newArray = new T[newSize];
+ Array.Copy(array, newArray, array.Length);
+ Volatile.Write(ref array, newArray);
+ }
+
+ private int DeltaOffset(int stateId, int mintermId) => (stateId << _mintermsLog) | mintermId;
+
+ /// Returns the span from that may contain transitions for the given state
+ private Span GetDeltasFor(MatchingState state)
+ {
+ Debug.Assert(Monitor.IsEntered(this));
+
+ int numMinterms = _minterms.Length;
+ if (state.StartsWithLineAnchor)
+ {
+ numMinterms++;
+ }
+
+ return _dfaDelta.AsSpan(state.Id << _mintermsLog, numMinterms);
+ }
+
+ /// Returns the span from that may contain transitions for the given state
+ private Span GetNfaDeltasFor(MatchingState state)
+ {
+ Debug.Assert(Monitor.IsEntered(this));
+
+ if (!_nfaIdByCoreId.TryGetValue(state.Id, out int nfaState))
+ {
+ return default;
+ }
+
+ int numMinterms = _minterms.Length;
+ if (state.StartsWithLineAnchor)
+ {
+ numMinterms++;
+ }
+
+ return _nfaDelta.AsSpan(nfaState << _mintermsLog, numMinterms);
+ }
+
+ /// Get context-independent information for the given state.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(int stateId)
+ {
+ Debug.Assert(stateId > 0);
+
+ ContextIndependentState info = _stateInfo[stateId];
+ return ((info & ContextIndependentState.IsInitial) != 0,
+ (info & ContextIndependentState.IsDeadend) != 0,
+ (info & ContextIndependentState.IsNullable) != 0,
+ (info & ContextIndependentState.CanBeNullable) != 0);
+ }
+
+ ///
+ /// Create a state with given node and previous character context.
+ ///
+ /// the pattern that this state will represent
+ /// the kind of the character that led to this state
+ ///
+ private MatchingState GetOrCreateState(SymbolicRegexNode node, uint prevCharKind)
+ {
+ Debug.Assert(Monitor.IsEntered(this));
+ return GetOrCreateState_NoLock(node, prevCharKind);
+ }
+
+ ///
+ /// Create a state with given node and previous character context.
+ ///
+ /// the pattern that this state will represent
+ /// the kind of the character that led to this state
+ /// whether to mark the state as an initial state or not
+ ///
+ private MatchingState GetOrCreateState_NoLock(SymbolicRegexNode node, uint prevCharKind, bool isInitialState = false)
+ {
+ SymbolicRegexNode prunedNode = node.PruneAnchors(_builder, prevCharKind);
+ (SymbolicRegexNode Node, uint PrevCharKind) key = (prunedNode, prevCharKind);
+ if (!_stateCache.TryGetValue(key, out MatchingState? state))
+ {
+ state = new MatchingState(key.Node, key.PrevCharKind);
+ _stateCache.Add(key, state); // Add to cache first to make 1 the first state ID
+ state.Id = _stateCache.Count;
+
+ Debug.Assert(_stateArray is not null);
+
+ if (state.Id == _stateArray.Length)
+ {
+ // The growth factor 2 matches that of List
+ int newsize = _stateArray.Length * 2;
+ ArrayResizeAndVolatilePublish(ref _stateArray, newsize);
+ ArrayResizeAndVolatilePublish(ref _dfaDelta, newsize << _mintermsLog);
+ ArrayResizeAndVolatilePublish(ref _stateInfo, newsize);
+ }
+ _stateArray[state.Id] = state;
+ _stateInfo[state.Id] = BuildStateInfo(state.Id, isInitialState, state.IsDeadend(Solver), state.Node.IsNullable, state.Node.CanBeNullable);
+ }
+
+ return state;
+
+ // Assign the context-independent information for the given state
+ static ContextIndependentState BuildStateInfo(int stateId, bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable)
+ {
+ Debug.Assert(stateId > 0);
+ Debug.Assert(!isNullable || canBeNullable);
+
+ ContextIndependentState info = 0;
+
+ if (isInitial)
+ {
+ info |= ContextIndependentState.IsInitial;
+ }
+
+ if (isDeadend)
+ {
+ info |= ContextIndependentState.IsDeadend;
+ }
+
+ if (canBeNullable)
+ {
+ info |= ContextIndependentState.CanBeNullable;
+ if (isNullable)
+ {
+ info |= ContextIndependentState.IsNullable;
+ }
+ }
+
+ return info;
+ }
+ }
+
+ ///
+ /// Make an NFA state for the given node and previous character kind. NFA states include a "core state" of a
+ /// allocated with ,
+ /// which stores the pattern and previous character kind and can be used for creating further NFA transitions.
+ /// In addition to the ID of the core state, NFA states are allocated a new NFA mode specific ID, which is
+ /// used to index into NFA mode transition arrays (e.g. ).
+ ///
+ ///
+ /// Using an ID numbering for NFA mode that is separate from DFA mode allows the IDs to be smaller, which saves
+ /// space both in the NFA mode arrays and in the instances used during matching for
+ /// sets of NFA states.
+ /// The core state ID can be looked up by the NFA ID with .
+ ///
+ /// the NFA ID of the new state, or null if the state is a dead end
+ private int? CreateNfaState(SymbolicRegexNode node, uint prevCharKind)
+ {
+ Debug.Assert(Monitor.IsEntered(this));
+ Debug.Assert(node.Kind != SymbolicRegexNodeKind.Alternate);
+
+ // First make the core state for the node, which is used for creating further transitions out of this state
+ MatchingState coreState = GetOrCreateState(node, prevCharKind);
+
+ // If the state is a dead end then don't create an NFA state, as dead ends in NFA mode are represented
+ // as empty lists of states.
+ if (coreState.IsDeadend(Solver))
+ {
+ return null;
+ }
+
+ // The NFA state itself is an ID that can be mapped back to the ID of the MatchingState. These NFA states are
+ // allocated separately from the IDs used in DFA mode to avoid large values, which helps save memory in the
+ // SparseIntMap data structures used in NFA matching modes.
+ if (!_nfaIdByCoreId.TryGetValue(coreState.Id, out int nfaStateId))
+ {
+ // No NFA state already exists, so make a new one. NFA state IDs are allocated sequentially from zero by
+ // giving each new state an ID equal to the number of existing NFA states.
+ nfaStateId = _nfaIdByCoreId.Count;
+
+ // If the next ID is past the end of the NFA state array, increase the sizes of the NFA arrays
+ if (nfaStateId == _nfaCoreIdArray.Length)
+ {
+ // The growth factor 2 matches that of List
+ int newsize = Math.Max(_nfaCoreIdArray.Length * 2, InitialNfaStateCapacity);
+ ArrayResizeAndVolatilePublish(ref _nfaCoreIdArray, newsize);
+ ArrayResizeAndVolatilePublish(ref _nfaDelta, newsize << _mintermsLog);
+ ArrayResizeAndVolatilePublish(ref _capturingNfaDelta, newsize << _mintermsLog);
+ }
+
+ // Store the mapping from NFA state ID to core state ID
+ Debug.Assert(nfaStateId < _nfaCoreIdArray.Length);
+ _nfaCoreIdArray[nfaStateId] = coreState.Id;
+
+ // Store the mapping from core state ID to NFA state ID
+ // Adding an entry here increments the ID that will be given to the next NFA state
+ _nfaIdByCoreId.Add(coreState.Id, nfaStateId);
+ }
+
+ return nfaStateId;
+ }
+
+ /// Gets the corresponding to the given state ID.
+ private MatchingState GetState(int stateId)
+ {
+ Debug.Assert(stateId > 0);
+ MatchingState? state = _stateArray[stateId];
+ Debug.Assert(state is not null);
+ return state;
+ }
+
+ /// Gets the core state Id corresponding to the NFA state
+ private int GetCoreStateId(int nfaStateId)
+ {
+ Debug.Assert(nfaStateId < _nfaCoreIdArray.Length);
+ Debug.Assert(_nfaCoreIdArray[nfaStateId] < _stateArray.Length);
+ return _nfaCoreIdArray[nfaStateId];
+ }
+
+ /// Gets or creates a new DFA transition.
+ /// This function locks the matcher for safe concurrent use of the
+ private bool TryCreateNewTransition(
+ MatchingState sourceState, int mintermId, int offset, bool checkThreshold, [NotNullWhen(true)] out MatchingState? nextState)
+ {
+ Debug.Assert(offset < _dfaDelta.Length);
+
+ lock (this)
+ {
+ // check if meanwhile delta[offset] has become defined possibly by another thread
+ MatchingState? targetState = _stateArray[_dfaDelta[offset]];
+ if (targetState is null)
+ {
+ if (checkThreshold && _stateCache.Count >= SymbolicRegexThresholds.NfaThreshold)
+ {
+ nextState = null;
+ return false;
+ }
+
+ TSet minterm = GetMintermFromId(mintermId);
+ uint nextCharKind = GetPositionKind(mintermId);
+ targetState = GetOrCreateState(sourceState.Next(_builder, minterm, nextCharKind), nextCharKind);
+ Volatile.Write(ref _dfaDelta[offset], targetState.Id);
+ }
+
+ nextState = targetState;
+ return true;
+ }
+ }
+
+ /// Gets or creates a new NFA transition.
+ /// This function locks the matcher for safe concurrent use of the
+ private int[] CreateNewNfaTransition(int nfaStateId, int mintermId, int nfaOffset)
+ {
+ Debug.Assert(nfaOffset < _nfaDelta.Length);
+
+ lock (this)
+ {
+ // check if meanwhile the nfaoffset has become defined possibly by another thread
+ int[]? targets = _nfaDelta[nfaOffset];
+ if (targets is null)
+ {
+ // Create the underlying transition from the core state corresponding to the nfa state
+ int coreId = GetCoreStateId(nfaStateId);
+ int coreOffset = (coreId << _mintermsLog) | mintermId;
+ int coreTargetId = _dfaDelta[coreOffset];
+ MatchingState coreState = GetState(coreId);
+ TSet minterm = GetMintermFromId(mintermId);
+ uint nextCharKind = GetPositionKind(mintermId);
+ SymbolicRegexNode? targetNode = coreTargetId > 0 ?
+ GetState(coreTargetId).Node : coreState.Next(_builder, minterm, nextCharKind);
+
+ List targetsList = new();
+ ForEachNfaState(targetNode, nextCharKind, targetsList, static (int nfaId, List targetsList) =>
+ targetsList.Add(nfaId));
+
+ targets = targetsList.ToArray();
+ Volatile.Write(ref _nfaDelta[nfaOffset], targets);
+ }
+
+ return targets;
+ }
+ }
+
+ /// Gets or creates a new capturing NFA transition.
+ /// This function locks the matcher for safe concurrent use of the
+ private (int, DerivativeEffect[])[] CreateNewCapturingTransition(int nfaStateId, int mintermId, int offset)
+ {
+ lock (this)
+ {
+ // Get the next state if it exists. The caller should have already tried and found it null (not yet created),
+ // but in the interim another thread could have created it.
+ (int, DerivativeEffect[])[]? targets = _capturingNfaDelta[offset];
+ if (targets is null)
+ {
+ MatchingState coreState = GetState(GetCoreStateId(nfaStateId));
+ TSet minterm = GetMintermFromId(mintermId);
+ uint nextCharKind = GetPositionKind(mintermId);
+ List<(SymbolicRegexNode Node, DerivativeEffect[] Effects)>? transition = coreState.NfaNextWithEffects(_builder, minterm, nextCharKind);
+ // Build the new state and store it into the array.
+ List<(int, DerivativeEffect[])> targetsList = new();
+ foreach ((SymbolicRegexNode Node, DerivativeEffect[] Effects) entry in transition)
+ {
+ ForEachNfaState(entry.Node, nextCharKind, (targetsList, entry.Effects),
+ static (int nfaId, (List<(int, DerivativeEffect[])> Targets, DerivativeEffect[] Effects) args) =>
+ args.Targets.Add((nfaId, args.Effects)));
+ }
+ targets = targetsList.ToArray();
+ Volatile.Write(ref _capturingNfaDelta[offset], targets);
+ }
+
+ return targets;
+ }
+ }
+
+ ///
+ /// Iterates through the alternation branches
+ /// and tries to create NFA states for each. The supplied action is called for each created NFA state. These never
+ /// include dead ends as will filter those out.
+ ///
+ /// This function locks the matcher for safe concurrent use of the
+ /// the type of the additional argument passed through to the action
+ /// the node to break up into NFA states
+ /// the previous character kind for each created NFA state
+ /// an additional argument passed through to each call to the action
+ /// action to call for each NFA state
+ private void ForEachNfaState(SymbolicRegexNode node, uint prevCharKind, T arg, Action action)
+ {
+ lock (this)
+ {
+ foreach (SymbolicRegexNode nfaNode in node.EnumerateAlternationBranches(_builder))
+ {
+ if (CreateNfaState(nfaNode, prevCharKind) is int nfaId)
+ {
+ action(nfaId, arg);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs
index 1225c4748e4c57bd4f6bb3ca0f91550da2389efb..157fd7d332db92d877894096d82dfb9d7cf642cd 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs
@@ -16,140 +16,140 @@ internal sealed partial class SymbolicRegexMatcher
[ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
public override void SaveDGML(TextWriter writer, int maxLabelLength)
{
- if (maxLabelLength < 0)
- maxLabelLength = int.MaxValue;
-
- Dictionary<(int Source, int Target), (TSet Rule, List NfaTargets)> transitions = GatherTransitions(_builder);
-
- writer.WriteLine("");
- writer.WriteLine("");
- writer.WriteLine(" ");
- writer.WriteLine(" ", FormatInfo(_builder, transitions.Count));
- writer.WriteLine(" ", FormatInfo(_builder, transitions.Count));
- foreach (DfaMatchingState state in _builder._stateCache)
+ lock (this)
{
- string info = CharKind.DescribePrev(state.PrevCharKind);
- string deriv = WebUtility.HtmlEncode(state.Node.ToString());
- string nodeDgmlView = $"{(info == string.Empty ? info : $"Previous: {info}
")}{(deriv == string.Empty ? "()" : deriv)}";
+ if (maxLabelLength < 0)
+ maxLabelLength = int.MaxValue;
+
+ Dictionary<(int Source, int Target), (TSet Rule, List NfaTargets)> transitions = GatherTransitions(this);
- writer.WriteLine(" ", state.Id, nodeDgmlView);
- if (_builder.GetStateInfo(state.Id).IsInitial)
+ writer.WriteLine("");
+ writer.WriteLine("");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ", FormatInfo(this, transitions.Count));
+ writer.WriteLine(" ", FormatInfo(this, transitions.Count));
+ foreach (MatchingState state in _stateCache.Values)
{
- writer.WriteLine(" ");
+ string info = CharKind.DescribePrev(state.PrevCharKind);
+ string deriv = WebUtility.HtmlEncode(state.Node.ToString());
+ string nodeDgmlView = $"{(info == string.Empty ? info : $"Previous: {info}
")}{(deriv == string.Empty ? "()" : deriv)}";
+
+ writer.WriteLine(" ", state.Id, nodeDgmlView);
+ if (GetStateInfo(state.Id).IsInitial)
+ {
+ writer.WriteLine(" ");
+ }
+ if (state.Node.CanBeNullable)
+ {
+ writer.WriteLine(" ");
+ }
+ writer.WriteLine(" ");
+ writer.WriteLine(" ", state.Id, nodeDgmlView);
}
- if (state.Node.CanBeNullable)
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ foreach (MatchingState initialState in GetInitialStates(this))
{
- writer.WriteLine(" ");
+ writer.WriteLine(" ", initialState.Id);
}
- writer.WriteLine(" ");
- writer.WriteLine(" ", state.Id, nodeDgmlView);
- }
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- foreach (DfaMatchingState initialState in GetInitialStates(this))
- {
- Debug.Assert(_builder._stateCache.Contains(initialState));
- writer.WriteLine(" ", initialState.Id);
- }
- writer.WriteLine(" ");
+ writer.WriteLine(" ");
- foreach (KeyValuePair<(int Source, int Target), (TSet Rule, List NfaTargets)> transition in transitions)
- {
- string label = DescribeLabel(transition.Value.Rule, _builder);
- string info = "";
- if (label.Length > maxLabelLength)
+ foreach (KeyValuePair<(int Source, int Target), (TSet Rule, List NfaTargets)> transition in transitions)
{
- info = $"FullLabel = \"{label}\" ";
- label = string.Concat(label.AsSpan(0, maxLabelLength), "..");
+ string label = DescribeLabel(transition.Value.Rule, _builder);
+ string info = "";
+ if (label.Length > maxLabelLength)
+ {
+ info = $"FullLabel = \"{label}\" ";
+ label = string.Concat(label.AsSpan(0, maxLabelLength), "..");
+ }
+
+ writer.WriteLine($" ");
+ // Render NFA transitions as labelless "epsilon" transitions (i.e. ones that don't consume a character)
+ // from the target of the DFA transition.
+ foreach (int nfaTarget in transition.Value.NfaTargets)
+ {
+ writer.WriteLine($" ");
+ }
}
- writer.WriteLine($" ");
- // Render NFA transitions as labelless "epsilon" transitions (i.e. ones that don't consume a character)
- // from the target of the DFA transition.
- foreach (int nfaTarget in transition.Value.NfaTargets)
+ foreach (MatchingState state in _stateCache.Values)
{
- writer.WriteLine($" ");
+ writer.WriteLine(" ", state.Id);
}
- }
- foreach (DfaMatchingState state in _builder._stateCache)
- {
- writer.WriteLine(" ", state.Id);
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine("");
}
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine(" ");
- writer.WriteLine("");
-
// This function gathers all transitions in the given builder and groups them by (source,destination) state ID
- static Dictionary<(int Source, int Target), (TSet Rule, List NfaTargets)> GatherTransitions(SymbolicRegexBuilder builder)
+ static Dictionary<(int Source, int Target), (TSet Rule, List NfaTargets)> GatherTransitions(SymbolicRegexMatcher matcher)
{
- Debug.Assert(builder._delta is not null);
- Debug.Assert(builder._minterms is not null);
Dictionary<(int Source, int Target), (TSet Rule, List NfaTargets)> result = new();
- foreach (DfaMatchingState source in builder._stateCache)
+ foreach (MatchingState source in matcher._stateCache.Values)
{
// Get the span of entries in delta that gives the transitions for the different minterms
- Span deltas = builder.GetDeltasFor(source);
- Span nfaDeltas = builder.GetNfaDeltasFor(source);
- Debug.Assert(deltas.Length == builder._minterms.Length);
+ Span deltas = matcher.GetDeltasFor(source);
+ Span nfaDeltas = matcher.GetNfaDeltasFor(source);
+ Debug.Assert(deltas.Length == matcher._minterms.Length);
for (int i = 0; i < deltas.Length; ++i)
{
// negative entries are transitions not explored yet, so skip them
@@ -160,7 +160,7 @@ static Dictionary<(int Source, int Target), (TSet Rule, List NfaTargets)> G
(int Source, int Target) key = (source.Id, targetId);
if (!result.TryGetValue(key, out (TSet Rule, List NfaTargets) entry))
{
- entry = (builder._solver.Empty, new List());
+ entry = (matcher.Solver.Empty, new List());
}
// If this state has an NFA transition for the same minterm, then associate
// those with the transition.
@@ -168,24 +168,24 @@ static Dictionary<(int Source, int Target), (TSet Rule, List NfaTargets)> G
{
foreach (int nfaTarget in nfaTargets)
{
- entry.NfaTargets.Add(builder._nfaStateArray[nfaTarget]);
+ entry.NfaTargets.Add(matcher._nfaCoreIdArray[nfaTarget]);
}
}
// Expand the rule for this minterm
- result[key] = (builder._solver.Or(entry.Rule, builder._minterms[i]), entry.NfaTargets);
+ result[key] = (matcher.Solver.Or(entry.Rule, matcher._minterms[i]), entry.NfaTargets);
}
}
}
return result;
}
- static string FormatInfo(SymbolicRegexBuilder builder, int transitionCount)
+ static string FormatInfo(SymbolicRegexMatcher matcher, int transitionCount)
{
StringBuilder sb = new();
- sb.Append($"States = {builder._stateCache.Count}
");
+ sb.Append($"States = {matcher._stateCache.Count}
");
sb.Append($"Transitions = {transitionCount}
");
- sb.Append($"Min Terms ({builder._solver.GetMinterms()!.Length}) = ").AppendJoin(',',
- DescribeLabels(builder._solver.GetMinterms()!, builder));
+ sb.Append($"Min Terms ({matcher.Solver.GetMinterms()!.Length}) = ").AppendJoin(',',
+ DescribeLabels(matcher.Solver.GetMinterms()!, matcher._builder));
return sb.ToString();
}
@@ -200,13 +200,13 @@ static IEnumerable DescribeLabels(IEnumerable labels, SymbolicRege
static string DescribeLabel(TSet label, SymbolicRegexBuilder builder) =>
WebUtility.HtmlEncode(builder._solver.PrettyPrint(label, builder._charSetSolver));
- static IEnumerable> GetInitialStates(SymbolicRegexMatcher matcher)
+ static IEnumerable> GetInitialStates(SymbolicRegexMatcher matcher)
{
- foreach (DfaMatchingState state in matcher._dotstarredInitialStates)
+ foreach (MatchingState state in matcher._dotstarredInitialStates)
yield return state;
- foreach (DfaMatchingState state in matcher._initialStates)
+ foreach (MatchingState state in matcher._initialStates)
yield return state;
- foreach (DfaMatchingState state in matcher._reverseInitialStates)
+ foreach (MatchingState state in matcher._reverseInitialStates)
yield return state;
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs
index 6808434ef984197a841d8290a5f86974d00afb6b..09880c1ad448afcc8d4839b73f106d4f6a14926d 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs
@@ -16,89 +16,91 @@ internal sealed partial class SymbolicRegexMatcher
[ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
public override void Explore(bool includeDotStarred, bool includeReverse, bool includeOriginal, bool exploreDfa, bool exploreNfa)
{
- Debug.Assert(_builder._minterms is not null);
-
- // Track seen states to avoid exploring twice
- HashSet> seen = new();
- // Use a queue for unexplored states
- // This results in a breadth-first exploration
- Queue> toExplore = new();
+ lock (this)
+ {
+ // Track seen states to avoid exploring twice
+ HashSet> seen = new();
+ // Use a queue for unexplored states
+ // This results in a breadth-first exploration
+ Queue> toExplore = new();
- // Explore all initial states as requested
- if (includeDotStarred)
- EnqueueAll(_dotstarredInitialStates, seen, toExplore);
- if (includeReverse)
- EnqueueAll(_reverseInitialStates, seen, toExplore);
- if (includeOriginal)
- EnqueueAll(_initialStates, seen, toExplore);
+ // Explore all initial states as requested
+ if (includeDotStarred)
+ EnqueueAll(_dotstarredInitialStates, seen, toExplore);
+ if (includeReverse)
+ EnqueueAll(_reverseInitialStates, seen, toExplore);
+ if (includeOriginal)
+ EnqueueAll(_initialStates, seen, toExplore);
- if (exploreDfa)
- {
- while (toExplore.Count > 0)
+ if (exploreDfa)
{
- // Don't dequeue yet, because a transition might fail
- DfaMatchingState state = toExplore.Peek();
- // Include the special minterm for the last end-of-line if the state is sensitive to it
- int maxMinterm = state.StartsWithLineAnchor ? _builder._minterms.Length : _builder._minterms.Length - 1;
- // Explore successor states for each minterm
- for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
+ while (toExplore.Count > 0)
{
- int offset = (state.Id << _builder._mintermsLog) | mintermId;
- if (!_builder.TryCreateNewTransition(state, mintermId, offset, true, out DfaMatchingState? nextState))
- goto DfaLimitReached;
- EnqueueIfUnseen(nextState, seen, toExplore);
+ // Don't dequeue yet, because a transition might fail
+ MatchingState state = toExplore.Peek();
+ // Include the special minterm for the last end-of-line if the state is sensitive to it
+ int maxMinterm = state.StartsWithLineAnchor ? _minterms!.Length : _minterms!.Length - 1;
+ // Explore successor states for each minterm
+ for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
+ {
+ int offset = DeltaOffset(state.Id, mintermId);
+ if (!TryCreateNewTransition(state, mintermId, offset, true, out MatchingState? nextState))
+ goto DfaLimitReached;
+ EnqueueIfUnseen(nextState, seen, toExplore);
+ }
+ // Safe to dequeue now that the state has been completely handled
+ toExplore.Dequeue();
}
- // Safe to dequeue now that the state has been completely handled
- toExplore.Dequeue();
}
- }
- DfaLimitReached:
- if (exploreNfa && toExplore.Count > 0)
- {
- // DFA states are broken up into NFA states when they are alternations
- DfaMatchingState[] toBreakUp = toExplore.ToArray();
- toExplore.Clear();
- foreach (DfaMatchingState dfaState in toBreakUp)
+ DfaLimitReached:
+ if (exploreNfa && toExplore.Count > 0)
{
- // Remove state from seen so that it can be added back in if necessary
- seen.Remove(dfaState);
- // Enqueue all elements of a top level alternation or the state itself
- foreach (var element in dfaState.Node.EnumerateAlternationBranches())
+ // DFA states are broken up into NFA states when they are alternations
+ MatchingState[] toBreakUp = toExplore.ToArray();
+ toExplore.Clear();
+ foreach (MatchingState dfaState in toBreakUp)
{
- int nfaState = _builder.CreateNfaState(element, dfaState.PrevCharKind);
- EnqueueIfUnseen(_builder.GetCoreState(nfaState), seen, toExplore);
+ // Remove state from seen so that it can be added back in if necessary
+ seen.Remove(dfaState);
+ // Enqueue all elements of a top level alternation or the state itself
+ ForEachNfaState(dfaState.Node, dfaState.PrevCharKind, (this, seen, toExplore),
+ static (int nfaId, (SymbolicRegexMatcher Matcher, HashSet> Seen, Queue> ToExplore) args) =>
+ {
+ MatchingState? coreState = args.Matcher.GetState(args.Matcher.GetCoreStateId(nfaId));
+ EnqueueIfUnseen(coreState, args.Seen, args.ToExplore);
+ });
}
- }
- while (toExplore.Count > 0)
- {
- // NFA transitions can't fail, so its safe to dequeue here
- DfaMatchingState state = toExplore.Dequeue();
- // Include the special minterm for the last end-of-line if the state is sensitive to it
- int maxMinterm = state.StartsWithLineAnchor ? _builder._minterms.Length : _builder._minterms.Length - 1;
- // Explore successor states for each minterm
- for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
+ while (toExplore.Count > 0)
{
- int nfaOffset = (_builder._nfaStateArrayInverse[state.Id] << _builder._mintermsLog) | mintermId;
- int[] nextNfaStates = _builder.CreateNewNfaTransition(_builder._nfaStateArrayInverse[state.Id], mintermId, nfaOffset);
- foreach (int nextNfaState in nextNfaStates)
+ // NFA transitions can't fail, so its safe to dequeue here
+ MatchingState state = toExplore.Dequeue();
+ // Include the special minterm for the last end-of-line if the state is sensitive to it
+ int maxMinterm = state.StartsWithLineAnchor ? _minterms.Length : _minterms.Length - 1;
+ // Explore successor states for each minterm
+ for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
{
- EnqueueIfUnseen(_builder.GetCoreState(nextNfaState), seen, toExplore);
+ int nfaOffset = DeltaOffset(_nfaIdByCoreId[state.Id], mintermId);
+ int[] nextNfaStates = CreateNewNfaTransition(_nfaIdByCoreId[state.Id], mintermId, nfaOffset);
+ foreach (int nextNfaState in nextNfaStates)
+ {
+ EnqueueIfUnseen(GetState(GetCoreStateId(nextNfaState)), seen, toExplore);
+ }
}
}
}
}
- static void EnqueueAll(DfaMatchingState[] states, HashSet> seen, Queue> toExplore)
+ static void EnqueueAll(MatchingState[] states, HashSet> seen, Queue> toExplore)
{
- foreach (DfaMatchingState state in states)
+ foreach (MatchingState state in states)
{
EnqueueIfUnseen(state, seen, toExplore);
}
}
- static void EnqueueIfUnseen(DfaMatchingState state, HashSet> seen, Queue> queue)
+ static void EnqueueIfUnseen(MatchingState state, HashSet> seen, Queue> queue)
{
if (seen.Add(state))
{
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs
index e5040d7b121894ac2f662077c7e6aeeebd8cc953..dc62647080b0e9f997186af7fba3495bfc5ca458 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs
@@ -30,133 +30,134 @@ internal sealed partial class SymbolicRegexMatcher
[ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
public override IEnumerable SampleMatches(int k, int randomseed)
{
- // Zero is treated as no seed, instead using a system provided one
- Random random = randomseed != 0 ? new Random(randomseed) : new Random();
-
- ISolver solver = _builder._solver;
- CharSetSolver charSetSolver = _builder._charSetSolver;
+ lock (this)
+ {
+ // Zero is treated as no seed, instead using a system provided one
+ Random random = randomseed != 0 ? new Random(randomseed) : new Random();
+ CharSetSolver charSetSolver = _builder._charSetSolver;
- // Create helper BDDs for handling anchors and preferentially generating ASCII inputs
- BDD asciiWordCharacters = charSetSolver.Or(new BDD[] {
+ // Create helper BDDs for handling anchors and preferentially generating ASCII inputs
+ BDD asciiWordCharacters = charSetSolver.Or(new BDD[] {
charSetSolver.CreateBDDFromRange('A', 'Z'),
charSetSolver.CreateBDDFromRange('a', 'z'),
charSetSolver.CreateBDDFromChar('_'),
charSetSolver.CreateBDDFromRange('0', '9')});
- // Visible ASCII range for input character generation
- BDD ascii = charSetSolver.CreateBDDFromRange('\x20', '\x7E');
- BDD asciiNonWordCharacters = charSetSolver.And(ascii, charSetSolver.Not(asciiWordCharacters));
-
- // Set up two sets of minterms, one with the additional special minterm for the last end-of-line
- Debug.Assert(_builder._minterms is not null);
- int[] mintermIdsWithoutZ = new int[_builder._minterms.Length];
- int[] mintermIdsWithZ = new int[_builder._minterms.Length + 1];
- for (int i = 0; i < _builder._minterms.Length; ++i)
- {
- mintermIdsWithoutZ[i] = i;
- mintermIdsWithZ[i] = i;
- }
- mintermIdsWithZ[_builder._minterms.Length] = _builder._minterms.Length;
-
- for (int i = 0; i < k; i++)
- {
- // Holds the generated input so far
- StringBuilder inputSoFar = new();
- StringBuilder? latestCandidate = null;
+ // Visible ASCII range for input character generation
+ BDD ascii = charSetSolver.CreateBDDFromRange('\x20', '\x7E');
+ BDD asciiNonWordCharacters = charSetSolver.And(ascii, charSetSolver.Not(asciiWordCharacters));
+
+ // Set up two sets of minterms, one with the additional special minterm for the last end-of-line
+ Debug.Assert(_minterms is not null);
+ int[] mintermIdsWithoutZ = new int[_minterms.Length];
+ int[] mintermIdsWithZ = new int[_minterms.Length + 1];
+ for (int i = 0; i < _minterms.Length; ++i)
+ {
+ mintermIdsWithoutZ[i] = i;
+ mintermIdsWithZ[i] = i;
+ }
+ mintermIdsWithZ[_minterms.Length] = _minterms.Length;
- // Current set of states reached initially contains just the root
- NfaMatchingState states = new(_builder);
- // Here one could also consider previous characters for example for \b, \B, and ^ anchors
- // and initialize inputSoFar accordingly
- states.InitializeFrom(_initialStates[GetCharKind(ReadOnlySpan.Empty, -1)]);
- CurrentState statesWrapper = new(states);
+ for (int i = 0; i < k; i++)
+ {
+ // Holds the generated input so far
+ StringBuilder inputSoFar = new();
+ StringBuilder? latestCandidate = null;
- // Used for end suffixes
- List possibleEndings = new();
+ // Current set of states reached initially contains just the root
+ NfaMatchingState states = new();
+ // Here one could also consider previous characters for example for \b, \B, and ^ anchors
+ // and initialize inputSoFar accordingly
+ states.InitializeFrom(this, _initialStates[GetCharKind(ReadOnlySpan.Empty, -1)]);
+ CurrentState statesWrapper = new(states);
- while (true)
- {
- Debug.Assert(states.NfaStateSet.Count > 0);
+ // Used for end suffixes
+ List possibleEndings = new();
- // Gather the possible endings for satisfying nullability
- possibleEndings.Clear();
- if (NfaStateHandler.CanBeNullable(ref statesWrapper))
+ while (true)
{
- // Unconditionally final state or end of the input due to \Z anchor for example
- if (NfaStateHandler.IsNullable(ref statesWrapper) ||
- NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.BeginningEnd))
- {
- possibleEndings.Add("");
- }
+ Debug.Assert(states.NfaStateSet.Count > 0);
- // End of line due to end-of-line anchor
- if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.Newline))
+ // Gather the possible endings for satisfying nullability
+ possibleEndings.Clear();
+ if (SymbolicRegexMatcher.NfaStateHandler.CanBeNullable(this, in statesWrapper))
{
- possibleEndings.Add("\n");
+ // Unconditionally final state or end of the input due to \Z anchor for example
+ if (SymbolicRegexMatcher.NfaStateHandler.IsNullable(this, in statesWrapper) ||
+ SymbolicRegexMatcher.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd))
+ {
+ possibleEndings.Add("");
+ }
+
+ // End of line due to end-of-line anchor
+ if (SymbolicRegexMatcher.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.Newline))
+ {
+ possibleEndings.Add("\n");
+ }
+
+ // Related to wordborder due to \b or \B
+ if (SymbolicRegexMatcher.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.WordLetter))
+ {
+ possibleEndings.Add(ChooseChar(random, asciiWordCharacters, ascii, charSetSolver).ToString());
+ }
+
+ // Related to wordborder due to \b or \B
+ if (SymbolicRegexMatcher