未验证 提交 c5759fae 编写于 作者: O Olli Saarikivi 提交者: GitHub

Fixes for NonBacktracking NFA mode (#72199)

* Fix NFA mode backtracking simulation

* Refactor to StateFlags

* Fix bug in timeout check

* Changes from review
上级 0fe98fc4
......@@ -70,6 +70,7 @@
<Compile Include="System\Text\RegularExpressions\Symbolic\MintermGenerator.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\RegexNodeConverter.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SparseIntMap.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\StateFlags.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicMatch.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexBuilder.cs" />
<Compile Include="System\Text\RegularExpressions\Symbolic\SymbolicRegexNode.cs" />
......
......@@ -104,6 +104,43 @@ internal bool IsNullableFor(uint nextCharKind)
return Node.IsNullableFor(context);
}
/// <summary>
/// Builds a <see cref="StateFlags"/> with the relevant flags set.
/// </summary>
/// <param name="solver">a solver for <typeparamref name="TSet"/></param>
/// <param name="isInitial">whether this state is an initial state</param>
/// <returns>the flags for this matching state</returns>
internal StateFlags BuildStateFlags(ISolver<TSet> solver, bool isInitial)
{
StateFlags info = 0;
if (isInitial)
{
info |= StateFlags.IsInitialFlag;
}
if (IsDeadend(solver))
{
info |= StateFlags.IsDeadendFlag;
}
if (Node.CanBeNullable)
{
info |= StateFlags.CanBeNullableFlag;
if (Node.IsNullable)
{
info |= StateFlags.IsNullableFlag;
}
}
if (Node.Kind != SymbolicRegexNodeKind.DisableBacktrackingSimulation)
{
info |= StateFlags.SimulatesBacktrackingFlag;
}
return info;
}
public override bool Equals(object? obj) =>
obj is MatchingState<TSet> s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node);
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Runtime.CompilerServices;
namespace System.Text.RegularExpressions.Symbolic
{
/// <summary>
/// These flags provide context-independent information available for every state. They provide a fast way to evaluate
/// conditions in the inner matching loops of <see cref="SymbolicRegexMatcher{TSet}"/>. The matcher caches one of these
/// for every state, for which they are created by <see cref="MatchingState{TSet}.BuildStateFlags(ISolver{TSet}, bool)"/>.
/// In DFA mode the cached flags are used directly, while in NFA mode the <see cref="SymbolicRegexMatcher{TSet}.NfaStateHandler"/>
/// handles aggregating the flags in the state set.
/// </summary>
[Flags]
internal enum StateFlags : byte
{
IsInitialFlag = 1,
IsDeadendFlag = 2,
IsNullableFlag = 4,
CanBeNullableFlag = 8,
SimulatesBacktrackingFlag = 16,
}
/// <summary>
/// These extension methods for <see cref="StateFlags"/> make checking for the presence of flags more concise.
/// </summary>
internal static class StateFlagsExtensions
{
internal static bool IsInitial(this StateFlags info) => (info & StateFlags.IsInitialFlag) != 0;
internal static bool IsDeadend(this StateFlags info) => (info & StateFlags.IsDeadendFlag) != 0;
internal static bool IsNullable(this StateFlags info) => (info & StateFlags.IsNullableFlag) != 0;
internal static bool CanBeNullable(this StateFlags info) => (info & StateFlags.CanBeNullableFlag) != 0;
internal static bool SimulatesBacktracking(this StateFlags info) => (info & StateFlags.SimulatesBacktrackingFlag) != 0;
}
}
......@@ -40,17 +40,7 @@ internal sealed partial class SymbolicRegexMatcher<TSet>
/// Maps state IDs to context-independent information for all states in <see cref="_stateArray"/>.
/// The first valid entry is at index 1.
/// </summary>
private ContextIndependentState[] _stateInfo;
/// <summary>Context-independent information available for every state.</summary>
[Flags]
private enum ContextIndependentState : byte
{
IsInitial = 1,
IsDeadend = 2,
IsNullable = 4,
CanBeNullable = 8,
}
private StateFlags[] _stateFlagsArray;
/// <summary>
/// The transition function for DFA mode.
......@@ -152,19 +142,6 @@ private Span<int> GetDeltasFor(MatchingState<TSet> state)
return _nfaDelta.AsSpan(nfaState << _mintermsLog, numMinterms);
}
/// <summary>Get context-independent information for the given state.</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(int stateId)
{
Debug.Assert(stateId > 0);
ContextIndependentState info = _stateInfo[stateId];
return ((info & ContextIndependentState.IsInitial) != 0,
(info & ContextIndependentState.IsDeadend) != 0,
(info & ContextIndependentState.IsNullable) != 0,
(info & ContextIndependentState.CanBeNullable) != 0);
}
/// <summary>
/// Create a state with given node and previous character context.
/// </summary>
......@@ -202,43 +179,13 @@ private MatchingState<TSet> GetOrCreateState_NoLock(SymbolicRegexNode<TSet> node
int newsize = _stateArray.Length * 2;
ArrayResizeAndVolatilePublish(ref _stateArray, newsize);
ArrayResizeAndVolatilePublish(ref _dfaDelta, newsize << _mintermsLog);
ArrayResizeAndVolatilePublish(ref _stateInfo, newsize);
ArrayResizeAndVolatilePublish(ref _stateFlagsArray, newsize);
}
_stateArray[state.Id] = state;
_stateInfo[state.Id] = BuildStateInfo(state.Id, isInitialState, state.IsDeadend(Solver), state.Node.IsNullable, state.Node.CanBeNullable);
_stateFlagsArray[state.Id] = state.BuildStateFlags(Solver, isInitialState);
}
return state;
// Assign the context-independent information for the given state
static ContextIndependentState BuildStateInfo(int stateId, bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable)
{
Debug.Assert(stateId > 0);
Debug.Assert(!isNullable || canBeNullable);
ContextIndependentState info = 0;
if (isInitial)
{
info |= ContextIndependentState.IsInitial;
}
if (isDeadend)
{
info |= ContextIndependentState.IsDeadend;
}
if (canBeNullable)
{
info |= ContextIndependentState.CanBeNullable;
if (isNullable)
{
info |= ContextIndependentState.IsNullable;
}
}
return info;
}
}
/// <summary>
......
......@@ -35,7 +35,7 @@ public override void SaveDGML(TextWriter writer, int maxLabelLength)
string nodeDgmlView = $"{(info == string.Empty ? info : $"Previous: {info}&#13;")}{(deriv == string.Empty ? "()" : deriv)}";
writer.WriteLine(" <Node Id=\"{0}\" Label=\"{0}\" Category=\"State\" Group=\"Collapsed\" StateInfo=\"{1}\">", state.Id, nodeDgmlView);
if (GetStateInfo(state.Id).IsInitial)
if (_stateFlagsArray[state.Id].IsInitial())
{
writer.WriteLine(" <Category Ref=\"InitialState\" />");
}
......
......@@ -79,11 +79,11 @@ public override IEnumerable<string> SampleMatches(int k, int randomseed)
// Gather the possible endings for satisfying nullability
possibleEndings.Clear();
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.CanBeNullable(this, in statesWrapper))
StateFlags flags = SymbolicRegexMatcher<TSet>.NfaStateHandler.GetStateFlags(this, in statesWrapper);
if (flags.CanBeNullable())
{
// Unconditionally final state or end of the input due to \Z anchor for example
if (SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullable(this, in statesWrapper) ||
SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd))
if (flags.IsNullable() || SymbolicRegexMatcher<TSet>.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd))
{
possibleEndings.Add("");
}
......
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Collections.Generic;
......@@ -173,7 +173,7 @@ private SymbolicRegexMatcher(SymbolicRegexBuilder<TSet> builder, SymbolicRegexNo
// Initialization for fields in SymbolicRegexMatcher.Automata.cs
_stateArray = new MatchingState<TSet>[InitialDfaStateCapacity];
_stateInfo = new ContextIndependentState[InitialDfaStateCapacity];
_stateFlagsArray = new StateFlags[InitialDfaStateCapacity];
_dfaDelta = new int[InitialDfaStateCapacity << _mintermsLog];
// Initialize a lookup array for the character kinds of each minterm ID. This includes one "special" minterm
......@@ -453,8 +453,8 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
input;
bool done = currentState.NfaState is not null ?
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);
// If the inner loop indicates that the search finished (for example due to reaching a deadend state) or
// there is no more input available, then the whole search is done.
......@@ -523,11 +523,11 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
// Loop through each character in the input, transitioning from state to state for each.
while (true)
{
(bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = TStateHandler.GetStateInfo(this, in state);
StateFlags flags = TStateHandler.GetStateFlags(this, in state);
// Check if currentState represents an initial state. If it does, call into any possible find optimizations
// to hopefully more quickly find the next possible starting location.
if (isInitial)
if (flags.IsInitial())
{
if (!TFindOptimizationsHandler.TryFindNextStartingPosition<TInputReader>(this, input, ref state, ref pos))
{
......@@ -538,7 +538,7 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
}
// If the state is a dead end, such that we can't transition anywhere else, end the search.
if (isDeadend)
if (flags.IsDeadend())
{
return true;
}
......@@ -547,7 +547,7 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
// If the state is nullable for the next character, meaning it accepts the empty string,
// we found a potential end state.
if (TNullabilityHandler.IsNullableAt<TStateHandler>(this, in state, positionId, isNullable, canBeNullable))
if (TNullabilityHandler.IsNullableAt<TStateHandler>(this, in state, positionId, flags))
{
endPos = pos;
endStateId = TStateHandler.ExtractNullableCoreStateId(this, in state, input, pos);
......@@ -652,20 +652,20 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
// Loop backwards through each character in the input, transitioning from state to state for each.
while (true)
{
(bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = TStateHandler.GetStateInfo(this, in state);
StateFlags flags = TStateHandler.GetStateFlags(this, in state);
int positionId = TInputReader.GetPositionId(this, input, pos - 1);
// If the state accepts the empty string, we found a valid starting position. Record it and keep going,
// since we're looking for the earliest one to occur within bounds.
if (TNullabilityHandler.IsNullableAt<TStateHandler>(this, in state, positionId, isNullable, canBeNullable))
if (TNullabilityHandler.IsNullableAt<TStateHandler>(this, in state, positionId, flags))
{
lastStart = pos;
}
// If we are past the start threshold or if the state is a dead end, bail; we should have already
// found a valid starting location.
if (pos <= startThreshold || isDeadend)
if (pos <= startThreshold || flags.IsDeadend())
{
Debug.Assert(lastStart != -1);
return true;
......@@ -750,10 +750,10 @@ private Registers FindSubcaptures<TInputReader>(ReadOnlySpan<char> input, int i,
next.Update(index, targetStateId, newRegisters);
int coreStateId = GetCoreStateId(targetStateId);
(bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = GetStateInfo(coreStateId);
Debug.Assert(!isDeadend);
StateFlags flags = _stateFlagsArray[coreStateId];
Debug.Assert(!flags.IsDeadend());
if (isNullable || (canBeNullable && GetState(coreStateId).IsNullableFor(GetCharKind<TInputReader>(input, i + 1))))
if (flags.IsNullable() || (flags.CanBeNullable() && GetState(coreStateId).IsNullableFor(GetCharKind<TInputReader>(input, i + 1))))
{
// No lower priority transitions from this or other source states are taken because the
// backtracking engines would return the match ending here.
......@@ -950,7 +950,7 @@ private interface IStateHandler
public static abstract int ExtractNullableCoreStateId(SymbolicRegexMatcher<TSet> matcher, in CurrentState state, ReadOnlySpan<char> input, int pos);
public static abstract int FixedLength(SymbolicRegexMatcher<TSet> matcher, in CurrentState state, uint nextCharKind);
public static abstract bool TryTakeTransition(SymbolicRegexMatcher<TSet> matcher, ref CurrentState state, int mintermId);
public static abstract (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher<TSet> matcher, in CurrentState state);
public static abstract StateFlags GetStateFlags(SymbolicRegexMatcher<TSet> matcher, in CurrentState state);
}
/// <summary>An <see cref="IStateHandler"/> for operating over <see cref="CurrentState"/> instances configured as DFA states.</summary>
......@@ -1009,8 +1009,8 @@ public static bool TryTakeTransition(SymbolicRegexMatcher<TSet> matcher, ref Cur
/// - whether this state may be contextually nullable
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher<TSet> matcher, in CurrentState state)
=> matcher.GetStateInfo(state.DfaStateId);
public static StateFlags GetStateFlags(SymbolicRegexMatcher<TSet> matcher, in CurrentState state)
=> matcher._stateFlagsArray[state.DfaStateId];
}
/// <summary>An <see cref="IStateHandler"/> for operating over <see cref="CurrentState"/> instances configured as NFA states.</summary>
......@@ -1100,6 +1100,8 @@ public static bool TryTakeTransition(SymbolicRegexMatcher<TSet> matcher, ref Cur
foreach (int nextState in GetNextStates(sourceStates.Values[0].Key, mintermId, matcher))
{
nextStates.Add(nextState, out _);
// Nothing is required for backtracking simulation here, since there's just one state so the
// transition itself already handles it.
}
}
else
......@@ -1108,12 +1110,23 @@ public static bool TryTakeTransition(SymbolicRegexMatcher<TSet> matcher, ref Cur
// their next states. For each source state, get its next states, adding each into
// our set (which exists purely for deduping purposes), and if we successfully added
// to the set, then add the known-unique state to the destination list.
uint nextCharKind = matcher.GetPositionKind(mintermId);
foreach (ref KeyValuePair<int, int> sourceState in CollectionsMarshal.AsSpan(sourceStates.Values))
{
foreach (int nextState in GetNextStates(sourceState.Key, mintermId, matcher))
{
nextStates.Add(nextState, out _);
}
// To simulate backtracking, if a source state is nullable then no further transitions are taken
// as the backtracking engines would prefer the match ending here.
int coreStateId = matcher.GetCoreStateId(sourceState.Key);
StateFlags flags = matcher._stateFlagsArray[coreStateId];
if (flags.SimulatesBacktracking() &&
(flags.IsNullable() || (flags.CanBeNullable() && matcher.GetState(coreStateId).IsNullableFor(nextCharKind))))
{
break;
}
}
}
......@@ -1145,35 +1158,27 @@ static int[] GetNextStates(int sourceState, int mintermId, SymbolicRegexMatcher<
/// can transition back to a DFA state.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher<TSet> matcher, in CurrentState state) =>
(false, state.NfaState!.NfaStateSet.Count == 0, IsNullable(matcher, in state), CanBeNullable(matcher, in state));
/// <summary>Check if any underlying core state is unconditionally nullable.</summary>
public static bool IsNullable(SymbolicRegexMatcher<TSet> matcher, in CurrentState state)
public static StateFlags GetStateFlags(SymbolicRegexMatcher<TSet> matcher, in CurrentState state)
{
foreach (ref KeyValuePair<int, int> nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values))
SparseIntMap<int> stateSet = state.NfaState!.NfaStateSet;
if (stateSet.Count == 0)
{
if (matcher.GetStateInfo(matcher.GetCoreStateId(nfaState.Key)).IsNullable)
{
return true;
}
// In NFA state sets dead ends are never included. Instead an empty set of states represents a dead end.
return StateFlags.IsDeadendFlag;
}
return false;
}
/// <summary>Check if any underlying core state can be nullable in some context.</summary>
public static bool CanBeNullable(SymbolicRegexMatcher<TSet> matcher, in CurrentState state)
{
foreach (ref KeyValuePair<int, int> nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values))
else
{
if (matcher.GetStateInfo(matcher.GetCoreStateId(nfaState.Key)).CanBeNullable)
// Build the flags for the set of states by taking a bitwise Or of all the per-state flags and then
// masking out the irrelevant ones. This works because IsNullable and CanBeNullable should be true if
// they are true for any state in the set; SimulatesBacktracking is true for all the states if
// it is true for any state (since it is a phase-wide property); and all other flags are masked out.
StateFlags flags = 0;
foreach (ref KeyValuePair<int, int> nfaState in CollectionsMarshal.AsSpan(stateSet.Values))
{
return true;
flags |= matcher._stateFlagsArray[matcher.GetCoreStateId(nfaState.Key)];
}
return flags & (StateFlags.IsNullableFlag | StateFlags.CanBeNullableFlag | StateFlags.SimulatesBacktrackingFlag);
}
return false;
}
#if DEBUG
......@@ -1284,7 +1289,7 @@ public static bool TryFindNextStartingPosition<TInputReader>(SymbolicRegexMatche
/// </summary>
private interface INullabilityHandler
{
public static abstract bool IsNullableAt<TStateHandler>(SymbolicRegexMatcher<TSet> matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable)
public static abstract bool IsNullableAt<TStateHandler>(SymbolicRegexMatcher<TSet> matcher, in CurrentState state, int positionId, StateFlags flags)
where TStateHandler : struct, IStateHandler;
}
......@@ -1294,11 +1299,11 @@ private interface INullabilityHandler
private readonly struct NoAnchorsNullabilityHandler : INullabilityHandler
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsNullableAt<TStateHandler>(SymbolicRegexMatcher<TSet> matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable)
public static bool IsNullableAt<TStateHandler>(SymbolicRegexMatcher<TSet> matcher, in CurrentState state, int positionId, StateFlags flags)
where TStateHandler : struct, IStateHandler
{
Debug.Assert(!matcher._pattern._info.ContainsSomeAnchor);
return isNullable;
return flags.IsNullable();
}
}
......@@ -1308,10 +1313,10 @@ public static bool IsNullableAt<TStateHandler>(SymbolicRegexMatcher<TSet> matche
private readonly struct FullNullabilityHandler : INullabilityHandler
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsNullableAt<TStateHandler>(SymbolicRegexMatcher<TSet> matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable)
public static bool IsNullableAt<TStateHandler>(SymbolicRegexMatcher<TSet> matcher, in CurrentState state, int positionId, StateFlags flags)
where TStateHandler : struct, IStateHandler
{
return isNullable || (canBeNullable && TStateHandler.IsNullableFor(matcher, in state, matcher.GetPositionKind(positionId)));
return flags.IsNullable() || (flags.CanBeNullable() && TStateHandler.IsNullableFor(matcher, in state, matcher.GetPositionKind(positionId)));
}
}
}
......
......@@ -56,6 +56,7 @@
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\ISolver.cs" Link="Production\ISolver.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\MintermGenerator.cs" Link="Production\MintermGenerator.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\RegexNodeConverter.cs" Link="Production\RegexNodeConverter.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\StateFlags.cs" Link="Production\StateFlags.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\SymbolicRegexBuilder.cs" Link="Production\SymbolicRegexBuilder.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\SymbolicRegexInfo.cs" Link="Production\SymbolicRegexInfo.cs" />
<Compile Include="..\..\src\System\Text\RegularExpressions\Symbolic\SymbolicRegexKind.cs" Link="Production\SymbolicRegexKind.cs" />
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册