diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index 94490e85c60c9bfbea8ca83952265271356f42e9..8bab1fced79b63f72a70cda8470c3a106b3cc187 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -70,6 +70,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs index 27a4223eeccf538b6ad6b52aecc582c7517bd663..7ece9a91265b81cbf45e4e6e9feb6cca17f44ab5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs @@ -104,6 +104,43 @@ internal bool IsNullableFor(uint nextCharKind) return Node.IsNullableFor(context); } + /// + /// Builds a with the relevant flags set. + /// + /// a solver for + /// whether this state is an initial state + /// the flags for this matching state + internal StateFlags BuildStateFlags(ISolver solver, bool isInitial) + { + StateFlags info = 0; + + if (isInitial) + { + info |= StateFlags.IsInitialFlag; + } + + if (IsDeadend(solver)) + { + info |= StateFlags.IsDeadendFlag; + } + + if (Node.CanBeNullable) + { + info |= StateFlags.CanBeNullableFlag; + if (Node.IsNullable) + { + info |= StateFlags.IsNullableFlag; + } + } + + if (Node.Kind != SymbolicRegexNodeKind.DisableBacktrackingSimulation) + { + info |= StateFlags.SimulatesBacktrackingFlag; + } + + return info; + } + public override bool Equals(object? obj) => obj is MatchingState s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs new file mode 100644 index 0000000000000000000000000000000000000000..cd859350352fff1931ae59eb965be8cf8807fae3 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs @@ -0,0 +1,36 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; + +namespace System.Text.RegularExpressions.Symbolic +{ + /// + /// These flags provide context-independent information available for every state. They provide a fast way to evaluate + /// conditions in the inner matching loops of . The matcher caches one of these + /// for every state, for which they are created by . + /// In DFA mode the cached flags are used directly, while in NFA mode the + /// handles aggregating the flags in the state set. + /// + [Flags] + internal enum StateFlags : byte + { + IsInitialFlag = 1, + IsDeadendFlag = 2, + IsNullableFlag = 4, + CanBeNullableFlag = 8, + SimulatesBacktrackingFlag = 16, + } + + /// + /// These extension methods for make checking for the presence of flags more concise. + /// + internal static class StateFlagsExtensions + { + internal static bool IsInitial(this StateFlags info) => (info & StateFlags.IsInitialFlag) != 0; + internal static bool IsDeadend(this StateFlags info) => (info & StateFlags.IsDeadendFlag) != 0; + internal static bool IsNullable(this StateFlags info) => (info & StateFlags.IsNullableFlag) != 0; + internal static bool CanBeNullable(this StateFlags info) => (info & StateFlags.CanBeNullableFlag) != 0; + internal static bool SimulatesBacktracking(this StateFlags info) => (info & StateFlags.SimulatesBacktrackingFlag) != 0; + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs index 9912da4da8ef39c801cf288710b839072ccc65a0..b1092ad3c0e39d305f0cdab16c5cb8b7c95d8bb9 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs @@ -40,17 +40,7 @@ internal sealed partial class SymbolicRegexMatcher /// Maps state IDs to context-independent information for all states in . /// The first valid entry is at index 1. /// - private ContextIndependentState[] _stateInfo; - - /// Context-independent information available for every state. - [Flags] - private enum ContextIndependentState : byte - { - IsInitial = 1, - IsDeadend = 2, - IsNullable = 4, - CanBeNullable = 8, - } + private StateFlags[] _stateFlagsArray; /// /// The transition function for DFA mode. @@ -152,19 +142,6 @@ private Span GetDeltasFor(MatchingState state) return _nfaDelta.AsSpan(nfaState << _mintermsLog, numMinterms); } - /// Get context-independent information for the given state. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(int stateId) - { - Debug.Assert(stateId > 0); - - ContextIndependentState info = _stateInfo[stateId]; - return ((info & ContextIndependentState.IsInitial) != 0, - (info & ContextIndependentState.IsDeadend) != 0, - (info & ContextIndependentState.IsNullable) != 0, - (info & ContextIndependentState.CanBeNullable) != 0); - } - /// /// Create a state with given node and previous character context. /// @@ -202,43 +179,13 @@ private MatchingState GetOrCreateState_NoLock(SymbolicRegexNode node int newsize = _stateArray.Length * 2; ArrayResizeAndVolatilePublish(ref _stateArray, newsize); ArrayResizeAndVolatilePublish(ref _dfaDelta, newsize << _mintermsLog); - ArrayResizeAndVolatilePublish(ref _stateInfo, newsize); + ArrayResizeAndVolatilePublish(ref _stateFlagsArray, newsize); } _stateArray[state.Id] = state; - _stateInfo[state.Id] = BuildStateInfo(state.Id, isInitialState, state.IsDeadend(Solver), state.Node.IsNullable, state.Node.CanBeNullable); + _stateFlagsArray[state.Id] = state.BuildStateFlags(Solver, isInitialState); } return state; - - // Assign the context-independent information for the given state - static ContextIndependentState BuildStateInfo(int stateId, bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) - { - Debug.Assert(stateId > 0); - Debug.Assert(!isNullable || canBeNullable); - - ContextIndependentState info = 0; - - if (isInitial) - { - info |= ContextIndependentState.IsInitial; - } - - if (isDeadend) - { - info |= ContextIndependentState.IsDeadend; - } - - if (canBeNullable) - { - info |= ContextIndependentState.CanBeNullable; - if (isNullable) - { - info |= ContextIndependentState.IsNullable; - } - } - - return info; - } } /// diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs index 157fd7d332db92d877894096d82dfb9d7cf642cd..6c4dee6866d98cf502e0638f9d30f9e142a7d381 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs @@ -35,7 +35,7 @@ public override void SaveDGML(TextWriter writer, int maxLabelLength) string nodeDgmlView = $"{(info == string.Empty ? info : $"Previous: {info} ")}{(deriv == string.Empty ? "()" : deriv)}"; writer.WriteLine(" ", state.Id, nodeDgmlView); - if (GetStateInfo(state.Id).IsInitial) + if (_stateFlagsArray[state.Id].IsInitial()) { writer.WriteLine(" "); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs index dc62647080b0e9f997186af7fba3495bfc5ca458..3bc9ce768592423ab4125ce5d92dfb960d319380 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs @@ -79,11 +79,11 @@ public override IEnumerable SampleMatches(int k, int randomseed) // Gather the possible endings for satisfying nullability possibleEndings.Clear(); - if (SymbolicRegexMatcher.NfaStateHandler.CanBeNullable(this, in statesWrapper)) + StateFlags flags = SymbolicRegexMatcher.NfaStateHandler.GetStateFlags(this, in statesWrapper); + if (flags.CanBeNullable()) { // Unconditionally final state or end of the input due to \Z anchor for example - if (SymbolicRegexMatcher.NfaStateHandler.IsNullable(this, in statesWrapper) || - SymbolicRegexMatcher.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd)) + if (flags.IsNullable() || SymbolicRegexMatcher.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd)) { possibleEndings.Add(""); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index b84df67463f6807d8a7289d276eacbde6c678cbe..70390343c3405c7e4c73161bc20ae39280724e8a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; @@ -173,7 +173,7 @@ private SymbolicRegexMatcher(SymbolicRegexBuilder builder, SymbolicRegexNo // Initialization for fields in SymbolicRegexMatcher.Automata.cs _stateArray = new MatchingState[InitialDfaStateCapacity]; - _stateInfo = new ContextIndependentState[InitialDfaStateCapacity]; + _stateFlagsArray = new StateFlags[InitialDfaStateCapacity]; _dfaDelta = new int[InitialDfaStateCapacity << _mintermsLog]; // Initialize a lookup array for the character kinds of each minterm ID. This includes one "special" minterm @@ -453,8 +453,8 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i input; bool done = currentState.NfaState is not null ? - FindEndPositionDeltas(input, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) : - FindEndPositionDeltas(input, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate); + FindEndPositionDeltas(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) : + FindEndPositionDeltas(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate); // If the inner loop indicates that the search finished (for example due to reaching a deadend state) or // there is no more input available, then the whole search is done. @@ -523,11 +523,11 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i // Loop through each character in the input, transitioning from state to state for each. while (true) { - (bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = TStateHandler.GetStateInfo(this, in state); + StateFlags flags = TStateHandler.GetStateFlags(this, in state); // Check if currentState represents an initial state. If it does, call into any possible find optimizations // to hopefully more quickly find the next possible starting location. - if (isInitial) + if (flags.IsInitial()) { if (!TFindOptimizationsHandler.TryFindNextStartingPosition(this, input, ref state, ref pos)) { @@ -538,7 +538,7 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i } // If the state is a dead end, such that we can't transition anywhere else, end the search. - if (isDeadend) + if (flags.IsDeadend()) { return true; } @@ -547,7 +547,7 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i // If the state is nullable for the next character, meaning it accepts the empty string, // we found a potential end state. - if (TNullabilityHandler.IsNullableAt(this, in state, positionId, isNullable, canBeNullable)) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId, flags)) { endPos = pos; endStateId = TStateHandler.ExtractNullableCoreStateId(this, in state, input, pos); @@ -652,20 +652,20 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i // Loop backwards through each character in the input, transitioning from state to state for each. while (true) { - (bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = TStateHandler.GetStateInfo(this, in state); + StateFlags flags = TStateHandler.GetStateFlags(this, in state); int positionId = TInputReader.GetPositionId(this, input, pos - 1); // If the state accepts the empty string, we found a valid starting position. Record it and keep going, // since we're looking for the earliest one to occur within bounds. - if (TNullabilityHandler.IsNullableAt(this, in state, positionId, isNullable, canBeNullable)) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId, flags)) { lastStart = pos; } // If we are past the start threshold or if the state is a dead end, bail; we should have already // found a valid starting location. - if (pos <= startThreshold || isDeadend) + if (pos <= startThreshold || flags.IsDeadend()) { Debug.Assert(lastStart != -1); return true; @@ -750,10 +750,10 @@ private Registers FindSubcaptures(ReadOnlySpan input, int i, next.Update(index, targetStateId, newRegisters); int coreStateId = GetCoreStateId(targetStateId); - (bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = GetStateInfo(coreStateId); - Debug.Assert(!isDeadend); + StateFlags flags = _stateFlagsArray[coreStateId]; + Debug.Assert(!flags.IsDeadend()); - if (isNullable || (canBeNullable && GetState(coreStateId).IsNullableFor(GetCharKind(input, i + 1)))) + if (flags.IsNullable() || (flags.CanBeNullable() && GetState(coreStateId).IsNullableFor(GetCharKind(input, i + 1)))) { // No lower priority transitions from this or other source states are taken because the // backtracking engines would return the match ending here. @@ -950,7 +950,7 @@ private interface IStateHandler public static abstract int ExtractNullableCoreStateId(SymbolicRegexMatcher matcher, in CurrentState state, ReadOnlySpan input, int pos); public static abstract int FixedLength(SymbolicRegexMatcher matcher, in CurrentState state, uint nextCharKind); public static abstract bool TryTakeTransition(SymbolicRegexMatcher matcher, ref CurrentState state, int mintermId); - public static abstract (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state); + public static abstract StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in CurrentState state); } /// An for operating over instances configured as DFA states. @@ -1009,8 +1009,8 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur /// - whether this state may be contextually nullable /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state) - => matcher.GetStateInfo(state.DfaStateId); + public static StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in CurrentState state) + => matcher._stateFlagsArray[state.DfaStateId]; } /// An for operating over instances configured as NFA states. @@ -1100,6 +1100,8 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur foreach (int nextState in GetNextStates(sourceStates.Values[0].Key, mintermId, matcher)) { nextStates.Add(nextState, out _); + // Nothing is required for backtracking simulation here, since there's just one state so the + // transition itself already handles it. } } else @@ -1108,12 +1110,23 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur // their next states. For each source state, get its next states, adding each into // our set (which exists purely for deduping purposes), and if we successfully added // to the set, then add the known-unique state to the destination list. + uint nextCharKind = matcher.GetPositionKind(mintermId); foreach (ref KeyValuePair sourceState in CollectionsMarshal.AsSpan(sourceStates.Values)) { foreach (int nextState in GetNextStates(sourceState.Key, mintermId, matcher)) { nextStates.Add(nextState, out _); } + + // To simulate backtracking, if a source state is nullable then no further transitions are taken + // as the backtracking engines would prefer the match ending here. + int coreStateId = matcher.GetCoreStateId(sourceState.Key); + StateFlags flags = matcher._stateFlagsArray[coreStateId]; + if (flags.SimulatesBacktracking() && + (flags.IsNullable() || (flags.CanBeNullable() && matcher.GetState(coreStateId).IsNullableFor(nextCharKind)))) + { + break; + } } } @@ -1145,35 +1158,27 @@ static int[] GetNextStates(int sourceState, int mintermId, SymbolicRegexMatcher< /// can transition back to a DFA state. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state) => - (false, state.NfaState!.NfaStateSet.Count == 0, IsNullable(matcher, in state), CanBeNullable(matcher, in state)); - - /// Check if any underlying core state is unconditionally nullable. - public static bool IsNullable(SymbolicRegexMatcher matcher, in CurrentState state) + public static StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in CurrentState state) { - foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values)) + SparseIntMap stateSet = state.NfaState!.NfaStateSet; + if (stateSet.Count == 0) { - if (matcher.GetStateInfo(matcher.GetCoreStateId(nfaState.Key)).IsNullable) - { - return true; - } + // In NFA state sets dead ends are never included. Instead an empty set of states represents a dead end. + return StateFlags.IsDeadendFlag; } - - return false; - } - - /// Check if any underlying core state can be nullable in some context. - public static bool CanBeNullable(SymbolicRegexMatcher matcher, in CurrentState state) - { - foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values)) + else { - if (matcher.GetStateInfo(matcher.GetCoreStateId(nfaState.Key)).CanBeNullable) + // Build the flags for the set of states by taking a bitwise Or of all the per-state flags and then + // masking out the irrelevant ones. This works because IsNullable and CanBeNullable should be true if + // they are true for any state in the set; SimulatesBacktracking is true for all the states if + // it is true for any state (since it is a phase-wide property); and all other flags are masked out. + StateFlags flags = 0; + foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(stateSet.Values)) { - return true; + flags |= matcher._stateFlagsArray[matcher.GetCoreStateId(nfaState.Key)]; } + return flags & (StateFlags.IsNullableFlag | StateFlags.CanBeNullableFlag | StateFlags.SimulatesBacktrackingFlag); } - - return false; } #if DEBUG @@ -1284,7 +1289,7 @@ public static bool TryFindNextStartingPosition(SymbolicRegexMatche /// private interface INullabilityHandler { - public static abstract bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable) + public static abstract bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) where TStateHandler : struct, IStateHandler; } @@ -1294,11 +1299,11 @@ private interface INullabilityHandler private readonly struct NoAnchorsNullabilityHandler : INullabilityHandler { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable) + public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) where TStateHandler : struct, IStateHandler { Debug.Assert(!matcher._pattern._info.ContainsSomeAnchor); - return isNullable; + return flags.IsNullable(); } } @@ -1308,10 +1313,10 @@ public static bool IsNullableAt(SymbolicRegexMatcher matche private readonly struct FullNullabilityHandler : INullabilityHandler { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable) + public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) where TStateHandler : struct, IStateHandler { - return isNullable || (canBeNullable && TStateHandler.IsNullableFor(matcher, in state, matcher.GetPositionKind(positionId))); + return flags.IsNullable() || (flags.CanBeNullable() && TStateHandler.IsNullableFor(matcher, in state, matcher.GetPositionKind(positionId))); } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj index 8c45a0c5adbc7e63ce578c4ce6840e5952f972ef..684b36c6d3d4c0ada169bdfadad6e64d35bbf9b5 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj @@ -56,6 +56,7 @@ +