diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.cs index 735b9fd0e179c6975e7deec566de803c3ff9c6e8..c10e018ca586388b207c0632638f0f1d32b0a2de 100644 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.cs +++ b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Diagnostics; using System.Linq; @@ -12,7 +13,6 @@ namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions { - using System.Collections.Generic; using static EmbeddedSyntaxHelpers; using static RegexHelpers; @@ -20,6 +20,46 @@ namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions using RegexToken = EmbeddedSyntaxToken; using RegexTrivia = EmbeddedSyntaxTrivia; + /// + /// Produces a from a sequence of characters. + /// + /// Importantly, this parser attempts to replicate diagnostics with almost the exact same text + /// as the native .Net regex parser. This is important so that users get an understandable + /// experience where it appears to them that this is all one cohesive system and that the IDE + /// will let them discover and fix the same issues they would encounter when previously trying + /// to just compile and execute these regexes. + /// + /// + /// Invariants we try to maintain (and should consider a bug if we do not): l 1. If the .net + /// regex parser does not report an error for a given pattern, we should not either. it would be + /// very bad if we told the user there was something wrong with there pattern when there really + /// wasn't. + /// + /// 2. If the .net regex parser does report an error for a given pattern, we should either not + /// report an error (not recommended) or report the same error at an appropriate location in the + /// pattern. Not reporting the error can be confusing as the user will think their pattern is + /// ok, when it really is not. However, it can be acceptable to do this as it's not telling + /// them that something is actually wrong, and it may be too difficult to find and report the + /// same error. Note: there is only one time we do this in this parser (see the deviation + /// documented in ). + /// + /// Note1: the above invariants make life difficult at times. This happens due to the fact that + /// the .net parser is multi-pass. Meaning it does a first scan (which may report errors), then + /// does the full parse. This means that it might report an error in a later location during + /// the initial scan than it would during the parse. We replicate that behavior to follow the + /// second invariant. + /// + /// Note2: It would be nice if we could check these invariants at runtime, so we could control + /// our behavior by the behavior of the real .net regex engine. For example, if the .net regex + /// engine did not report any issues, we could suppress any diagnostics we generated and we + /// could log an NFW to record which pattern we deviated on so we could fix the issue for a + /// future release. However, we cannot do this as the .net regex engine has no guarantees about + /// its performance characteristics. For example, certain regex patterns might end up causing + /// that engine to consume unbounded amounts of CPU and memory. This is because the .net regex + /// engine is not just a parser, but something that builds an actual recognizer using techniques + /// that are not necessarily bounded. As such, while we test ourselves around it during our + /// tests, we cannot do the same at runtime as part of the IDE. + /// internal partial struct RegexParser { private readonly ImmutableDictionary _captureNamesToSpan;