diff --git a/src/EditorFeatures/CSharp/BraceMatching/CSharpRegexBraceMatcher.cs b/src/EditorFeatures/CSharp/BraceMatching/CSharpRegexBraceMatcher.cs deleted file mode 100644 index 08de2e2560374d73ae32d7efaa171fd6135aa731..0000000000000000000000000000000000000000 --- a/src/EditorFeatures/CSharp/BraceMatching/CSharpRegexBraceMatcher.cs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Threading; -using System.Threading.Tasks; -using Microsoft.CodeAnalysis.CSharp; -using Microsoft.CodeAnalysis.Editor.Implementation.BraceMatching; - -namespace Microsoft.CodeAnalysis.Editor.CSharp.BraceMatching -{ - [ExportBraceMatcher(LanguageNames.CSharp)] - internal class CSharpRegexBraceMatcher : IBraceMatcher - { - public Task FindBracesAsync(Document document, int position, CancellationToken cancellationToken) - => CommonRegexBraceMatcher.FindBracesAsync(document, position, cancellationToken); - } -} diff --git a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests.cs b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests.cs index 9e50e38afd2085775b77b267da143997a13e699b..f7b7143db2b1bc393ff4de3973e5a47768cf6942 100644 --- a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests.cs +++ b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests.cs @@ -2505,242 +2505,5 @@ interface unmanaged {} TypeParameter("T"), Keyword("unmanaged")); } - - [WpfFact, Trait(Traits.Feature, Traits.Features.Classification)] - public async Task TestRegex1() - { - await TestAsync( -@" -using System.Text.RegularExpressions; - -class Program -{ - - void Goo() - { - var r = new Regex(@""$(\a\t\u0020)|[^\p{Lu}-a\w\sa-z-[m-p]]+?(?#comment)|(\b\G\z)|(?sub){0,5}?^""); - } -}", -Keyword("var"), -Class("Regex"), -Regex.Anchor("$"), -Regex.Grouping("("), -Regex.OtherEscape("\\"), -Regex.OtherEscape("a"), -Regex.OtherEscape("\\"), -Regex.OtherEscape("t"), -Regex.OtherEscape("\\"), -Regex.OtherEscape("u"), -Regex.OtherEscape("0020"), -Regex.Grouping(")"), -Regex.Alternation("|"), -Regex.CharacterClass("["), -Regex.CharacterClass("^"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("p"), -Regex.CharacterClass("{"), -Regex.CharacterClass("Lu"), -Regex.CharacterClass("}"), -Regex.Text("-a"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("w"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("s"), -Regex.Text("a"), -Regex.CharacterClass("-"), -Regex.Text("z"), -Regex.CharacterClass("-"), -Regex.CharacterClass("["), -Regex.Text("m"), -Regex.CharacterClass("-"), -Regex.Text("p"), -Regex.CharacterClass("]"), -Regex.CharacterClass("]"), -Regex.Quantifier("+"), -Regex.Quantifier("?"), -Regex.Comment("(?#comment)"), -Regex.Alternation("|"), -Regex.Grouping("("), -Regex.Anchor("\\"), -Regex.Anchor("b"), -Regex.Anchor("\\"), -Regex.Anchor("G"), -Regex.Anchor("\\"), -Regex.Anchor("z"), -Regex.Grouping(")"), -Regex.Alternation("|"), -Regex.Grouping("("), -Regex.Grouping("?"), -Regex.Grouping("<"), -Regex.Grouping("name"), -Regex.Grouping(">"), -Regex.Text("sub"), -Regex.Grouping(")"), -Regex.Quantifier("{"), -Regex.Quantifier("0"), -Regex.Quantifier(","), -Regex.Quantifier("5"), -Regex.Quantifier("}"), -Regex.Quantifier("?"), -Regex.Anchor("^")); - } - - [WpfFact, Trait(Traits.Feature, Traits.Features.Classification)] - public async Task TestRegex2() - { - await TestAsync( -@" -using System.Text.RegularExpressions; - -class Program -{ - - void Goo() - { - // language=regex - var r = @""$(\a\t\u0020)|[^\p{Lu}-a\w\sa-z-[m-p]]+?(?#comment)|(\b\G\z)|(?sub){0,5}?^""; - } -}", -Keyword("var"), -Regex.Anchor("$"), -Regex.Grouping("("), -Regex.OtherEscape("\\"), -Regex.OtherEscape("a"), -Regex.OtherEscape("\\"), -Regex.OtherEscape("t"), -Regex.OtherEscape("\\"), -Regex.OtherEscape("u"), -Regex.OtherEscape("0020"), -Regex.Grouping(")"), -Regex.Alternation("|"), -Regex.CharacterClass("["), -Regex.CharacterClass("^"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("p"), -Regex.CharacterClass("{"), -Regex.CharacterClass("Lu"), -Regex.CharacterClass("}"), -Regex.Text("-a"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("w"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("s"), -Regex.Text("a"), -Regex.CharacterClass("-"), -Regex.Text("z"), -Regex.CharacterClass("-"), -Regex.CharacterClass("["), -Regex.Text("m"), -Regex.CharacterClass("-"), -Regex.Text("p"), -Regex.CharacterClass("]"), -Regex.CharacterClass("]"), -Regex.Quantifier("+"), -Regex.Quantifier("?"), -Regex.Comment("(?#comment)"), -Regex.Alternation("|"), -Regex.Grouping("("), -Regex.Anchor("\\"), -Regex.Anchor("b"), -Regex.Anchor("\\"), -Regex.Anchor("G"), -Regex.Anchor("\\"), -Regex.Anchor("z"), -Regex.Grouping(")"), -Regex.Alternation("|"), -Regex.Grouping("("), -Regex.Grouping("?"), -Regex.Grouping("<"), -Regex.Grouping("name"), -Regex.Grouping(">"), -Regex.Text("sub"), -Regex.Grouping(")"), -Regex.Quantifier("{"), -Regex.Quantifier("0"), -Regex.Quantifier(","), -Regex.Quantifier("5"), -Regex.Quantifier("}"), -Regex.Quantifier("?"), -Regex.Anchor("^")); - } - - [WpfFact, Trait(Traits.Feature, Traits.Features.Classification)] - public async Task TestRegex3() - { - await TestAsync( -@" -using System.Text.RegularExpressions; - -class Program -{ - void Goo() - { - var r = /* language=regex */@""$(\a\t\u0020\\)|[^\p{Lu}-a\w\sa-z-[m-p]]+?(?#comment)|(\b\G\z)|(?sub){0,5}?^""; - } -}", -Keyword("var"), -Regex.Anchor("$"), -Regex.Grouping("("), -Regex.OtherEscape("\\"), -Regex.OtherEscape("a"), -Regex.OtherEscape("\\"), -Regex.OtherEscape("t"), -Regex.OtherEscape("\\"), -Regex.OtherEscape("u"), -Regex.OtherEscape("0020"), -Regex.SelfEscapedCharacter("\\"), -Regex.SelfEscapedCharacter("\\"), -Regex.Grouping(")"), -Regex.Alternation("|"), -Regex.CharacterClass("["), -Regex.CharacterClass("^"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("p"), -Regex.CharacterClass("{"), -Regex.CharacterClass("Lu"), -Regex.CharacterClass("}"), -Regex.Text("-a"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("w"), -Regex.CharacterClass("\\"), -Regex.CharacterClass("s"), -Regex.Text("a"), -Regex.CharacterClass("-"), -Regex.Text("z"), -Regex.CharacterClass("-"), -Regex.CharacterClass("["), -Regex.Text("m"), -Regex.CharacterClass("-"), -Regex.Text("p"), -Regex.CharacterClass("]"), -Regex.CharacterClass("]"), -Regex.Quantifier("+"), -Regex.Quantifier("?"), -Regex.Comment("(?#comment)"), -Regex.Alternation("|"), -Regex.Grouping("("), -Regex.Anchor("\\"), -Regex.Anchor("b"), -Regex.Anchor("\\"), -Regex.Anchor("G"), -Regex.Anchor("\\"), -Regex.Anchor("z"), -Regex.Grouping(")"), -Regex.Alternation("|"), -Regex.Grouping("("), -Regex.Grouping("?"), -Regex.Grouping("<"), -Regex.Grouping("name"), -Regex.Grouping(">"), -Regex.Text("sub"), -Regex.Grouping(")"), -Regex.Quantifier("{"), -Regex.Quantifier("0"), -Regex.Quantifier(","), -Regex.Quantifier("5"), -Regex.Quantifier("}"), -Regex.Quantifier("?"), -Regex.Anchor("^")); - } } } diff --git a/src/EditorFeatures/CSharpTest/ValidateRegexString/ValidateRegexStringTests.cs b/src/EditorFeatures/CSharpTest/ValidateRegexString/ValidateRegexStringTests.cs deleted file mode 100644 index 2652c2d7109ce946e9fd2897d2a9f07e0db04c67..0000000000000000000000000000000000000000 --- a/src/EditorFeatures/CSharpTest/ValidateRegexString/ValidateRegexStringTests.cs +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.Threading.Tasks; -using Microsoft.CodeAnalysis.CodeFixes; -using Microsoft.CodeAnalysis.CSharp.ValidateRegexString; -using Microsoft.CodeAnalysis.Diagnostics; -using Microsoft.CodeAnalysis.Editor.CSharp.UnitTests.Diagnostics; -using Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions; -using Microsoft.CodeAnalysis.Options; -using Microsoft.CodeAnalysis.Test.Utilities; -using Roslyn.Test.Utilities; -using Xunit; - -namespace Microsoft.CodeAnalysis.Editor.CSharp.UnitTests.ValidateRegexString -{ - public class ValidateRegexStringTests : AbstractCSharpDiagnosticProviderBasedUserDiagnosticTest - { - internal override (DiagnosticAnalyzer, CodeFixProvider) CreateDiagnosticProviderAndFixer(Workspace workspace) - => (new CSharpValidateRegexStringDiagnosticAnalyzer(), null); - - private IDictionary OptionOn() - { - var optionsSet = new Dictionary(); - optionsSet.Add(new OptionKey(RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.CSharp), true); - optionsSet.Add(new OptionKey(RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.VisualBasic), true); - return optionsSet; - } - - private IDictionary OptionOff() - { - var optionsSet = new Dictionary(); - optionsSet.Add(new OptionKey(RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.CSharp), false); - optionsSet.Add(new OptionKey(RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.VisualBasic), false); - return optionsSet; - } - - [Fact, Trait(Traits.Feature, Traits.Features.ValidateRegexString)] - public async Task TestWarning1() - { - await TestDiagnosticInfoAsync(@" -using System.Text.RegularExpressions; - -class Program -{ - void Main() - { - var r = new Regex(@""[|)|]""); - } -}", - options: OptionOn(), - diagnosticId: IDEDiagnosticIds.RegexPatternDiagnosticId, - diagnosticSeverity: DiagnosticSeverity.Warning, - diagnosticMessage: string.Format(FeaturesResources.Regex_issue_0, WorkspacesResources.Too_many_close_parens)); - } - - [Fact, Trait(Traits.Feature, Traits.Features.ValidateRegexString)] - public async Task TestWarning2() - { - await TestDiagnosticInfoAsync(@" -using System.Text.RegularExpressions; - -class Program -{ - void Main() - { - var r = new Regex(""[|\u0029|]""); - } -}", - options: OptionOn(), - diagnosticId: IDEDiagnosticIds.RegexPatternDiagnosticId, - diagnosticSeverity: DiagnosticSeverity.Warning, - diagnosticMessage: string.Format(FeaturesResources.Regex_issue_0, WorkspacesResources.Too_many_close_parens)); - } - - [Fact, Trait(Traits.Feature, Traits.Features.ValidateRegexString)] - public async Task TestWarningMissing1() - { - await TestDiagnosticMissingAsync(@" -using System.Text.RegularExpressions; - -class Program -{ - void Main() - { - var r = new Regex(@""[|\u0029|]""); - } -}"); - } - } -} diff --git a/src/EditorFeatures/Core.Wpf/Classification/ClassificationTypeFormatDefinitions.cs b/src/EditorFeatures/Core.Wpf/Classification/ClassificationTypeFormatDefinitions.cs index 15795ec9700a260ed5599c0847da07ea3c4b91ac..65513c8a7c8f848d0c6fbf63f29a2e2a14ea0b59 100644 --- a/src/EditorFeatures/Core.Wpf/Classification/ClassificationTypeFormatDefinitions.cs +++ b/src/EditorFeatures/Core.Wpf/Classification/ClassificationTypeFormatDefinitions.cs @@ -345,171 +345,6 @@ private XmlDocCommentTextFormatDefinition() } #endregion - #region Regex - Comment - - -#if dark_theme - private static readonly Color s_regexTextColor = Color.FromRgb(0xd6, 0x9d, 0x85); - private static readonly Color s_regexOtherEscapeColor = Color.FromRgb(0xff, 0xd6, 0x8f); - private static readonly Color s_regexGroupingAndAlternationColor = Color.FromRgb(0x05, 0xc3, 0xba); - private static readonly Color s_characterClassColor = Color.FromRgb(0x00, 0x8a, 0xff); - private static readonly Color s_regexAnchorAndQuantifierColor = Color.FromRgb(0xd7, 0x45, 0x8c); - private static readonly Color s_regexCommentColor = Color.FromRgb(87, 166, 74); -#else - private static readonly Color s_regexTextColor = Color.FromRgb(0x80, 0x00, 0x00); - private static readonly Color s_regexOtherEscapeColor = Color.FromRgb(0x9e, 0x5b, 0x71); - private static readonly Color s_regexGroupingAndAlternationColor = Color.FromRgb(0x05, 0xc3, 0xba); - private static readonly Color s_characterClassColor = Color.FromRgb(0x00, 0x73, 0xff); - private static readonly Color s_regexAnchorAndQuantifierColor = Color.FromRgb(0xff, 0x00, 0xc1); - private static readonly Color s_regexCommentColor = Color.FromRgb(0, 128, 0); -#endif - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexComment)] - [Name(ClassificationTypeNames.RegexComment)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexCommentFormatDefinition : ClassificationFormatDefinition - { - private RegexCommentFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_Comment; - this.ForegroundColor = s_regexCommentColor; - } - } - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexCharacterClass)] - [Name(ClassificationTypeNames.RegexCharacterClass)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexCharacterClassFormatDefinition : ClassificationFormatDefinition - { - private RegexCharacterClassFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_Character_class; - this.ForegroundColor = s_characterClassColor; - } - } - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexAnchor)] - [Name(ClassificationTypeNames.RegexAnchor)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexAnchorFormatDefinition : ClassificationFormatDefinition - { - private RegexAnchorFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_Anchor; - this.ForegroundColor = s_regexAnchorAndQuantifierColor; - } - } - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexQuantifier)] - [Name(ClassificationTypeNames.RegexQuantifier)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexQuantifierFormatDefinition : ClassificationFormatDefinition - { - private RegexQuantifierFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_Quantifier; - this.ForegroundColor = s_regexAnchorAndQuantifierColor; - } - } - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexGrouping)] - [Name(ClassificationTypeNames.RegexGrouping)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexGroupingFormatDefinition : ClassificationFormatDefinition - { - private RegexGroupingFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_Grouping; - this.ForegroundColor = s_regexGroupingAndAlternationColor; - } - } - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexAlternation)] - [Name(ClassificationTypeNames.RegexAlternation)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexAlternationFormatDefinition : ClassificationFormatDefinition - { - private RegexAlternationFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_Alternation; - this.ForegroundColor = s_regexGroupingAndAlternationColor; - } - } - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexText)] - [Name(ClassificationTypeNames.RegexText)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexTextFormatDefinition : ClassificationFormatDefinition - { - private RegexTextFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_Text; - this.ForegroundColor = s_regexTextColor; - } - } - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexSelfEscapedCharacter)] - [Name(ClassificationTypeNames.RegexSelfEscapedCharacter)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexSelfEscapedCharacterFormatDefinition : ClassificationFormatDefinition - { - private RegexSelfEscapedCharacterFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_SelfEscapedCharacter; - this.ForegroundColor = s_regexTextColor; - this.IsBold = true; - } - } - - [Export(typeof(EditorFormatDefinition))] - [ClassificationType(ClassificationTypeNames = ClassificationTypeNames.RegexOtherEscape)] - [Name(ClassificationTypeNames.RegexOtherEscape)] - [Order(After = ClassificationTypeNames.StringLiteral)] - [Order(After = ClassificationTypeNames.VerbatimStringLiteral)] - [UserVisible(true)] - [ExcludeFromCodeCoverage] - private class RegexOtherEscapeFormatDefinition : ClassificationFormatDefinition - { - private RegexOtherEscapeFormatDefinition() - { - this.DisplayName = EditorFeaturesWpfResources.Regex_OtherEscape; - this.ForegroundColor = s_regexOtherEscapeColor; - } - } -#endregion - #region JSON [Export(typeof(EditorFormatDefinition))] diff --git a/src/EditorFeatures/Core.Wpf/EditorFeaturesWpfResources.Designer.cs b/src/EditorFeatures/Core.Wpf/EditorFeaturesWpfResources.Designer.cs index d1a24f7ebd3bc7a77019fa14dd9ad5d5a9841f57..6b422ba5a57d3accbbc5185ef1dbf4cb9b001b7e 100644 --- a/src/EditorFeatures/Core.Wpf/EditorFeaturesWpfResources.Designer.cs +++ b/src/EditorFeatures/Core.Wpf/EditorFeaturesWpfResources.Designer.cs @@ -185,86 +185,5 @@ internal class EditorFeaturesWpfResources { return ResourceManager.GetString("JSON_Text", resourceCulture); } } - - /// - /// Looks up a localized string similar to Regex - Alternation. - /// - internal static string Regex_Alternation { - get { - return ResourceManager.GetString("Regex_Alternation", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Regex - Anchor. - /// - internal static string Regex_Anchor { - get { - return ResourceManager.GetString("Regex_Anchor", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Regex - Character class. - /// - internal static string Regex_Character_class { - get { - return ResourceManager.GetString("Regex_Character_class", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Regex - Comment. - /// - internal static string Regex_Comment { - get { - return ResourceManager.GetString("Regex_Comment", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Regex - Grouping. - /// - internal static string Regex_Grouping { - get { - return ResourceManager.GetString("Regex_Grouping", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Regex - Other Escape. - /// - internal static string Regex_OtherEscape { - get { - return ResourceManager.GetString("Regex_OtherEscape", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Regex - Quantifier. - /// - internal static string Regex_Quantifier { - get { - return ResourceManager.GetString("Regex_Quantifier", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Regex - Self Escaped Character. - /// - internal static string Regex_SelfEscapedCharacter { - get { - return ResourceManager.GetString("Regex_SelfEscapedCharacter", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Regex - Text. - /// - internal static string Regex_Text { - get { - return ResourceManager.GetString("Regex_Text", resourceCulture); - } - } } } diff --git a/src/EditorFeatures/Core.Wpf/EditorFeaturesWpfResources.resx b/src/EditorFeatures/Core.Wpf/EditorFeaturesWpfResources.resx index 875ccdcd92b60bc66254ef0543758089ef46b67c..f42536704bc963fbcb8ed63221da24eb449e6c9a 100644 --- a/src/EditorFeatures/Core.Wpf/EditorFeaturesWpfResources.resx +++ b/src/EditorFeatures/Core.Wpf/EditorFeaturesWpfResources.resx @@ -126,33 +126,6 @@ Downloading IntelliSense index for {0} - - Regex - Comment - - - Regex - Character class - - - Regex - Alternation - - - Regex - Anchor - - - Regex - Quantifier - - - Regex - Self Escaped Character - - - Regex - Grouping - - - Regex - Text - - - Regex - Other Escape - JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.cs.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.cs.xlf index 8482360b3afff63958c361bd0b55683bcb25a38e..df8319a9341a08717cff33fa893b730d095fcd97 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.cs.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.cs.xlf @@ -17,51 +17,6 @@ Stahuje se index IntelliSense pro {0}. - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.de.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.de.xlf index e73f2b4354c3b0243246a26db73d57eb2cab8828..a7f3d853bc4e4569e8c1c044e1e2a2743142a977 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.de.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.de.xlf @@ -17,51 +17,6 @@ Der IntelliSense-Index für "{0}" wird heruntergeladen. - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.es.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.es.xlf index 976704b78d913843bd5ba1c4c57cb485120df922..d743ba6f0d55150e4cc4b3bb67da1c5cfd77a0c0 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.es.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.es.xlf @@ -17,51 +17,6 @@ Descargando el índice de IntelliSense para {0} - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.fr.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.fr.xlf index 3f30b72a2db704c94d2b72d29b8e334beecfe07e..8f6782d434be512838627df76f2f2d9028cc5656 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.fr.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.fr.xlf @@ -17,51 +17,6 @@ Téléchargement de l'index IntelliSense pour {0} - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.it.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.it.xlf index e753fde8d01aa3439fa8cf86c0f4844c9e500497..efc9d95f5548e23f6a1ec358f98ac300dd7ea9f6 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.it.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.it.xlf @@ -17,51 +17,6 @@ Download dell'indice IntelliSense per {0} - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ja.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ja.xlf index bc571e3a8c19d21810fce9b11efcd45dac9c41b1..930234cc0c6eaea3e9c36e8839498282f79c76dd 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ja.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ja.xlf @@ -17,51 +17,6 @@ {0} の IntelliSense インデックスをダウンロードしています - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ko.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ko.xlf index 902f2064ee1a72ee504ce82f455eeef9bcf475b0..f37506a13d7be3f3ab4829c44f10a80a1aee2911 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ko.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ko.xlf @@ -17,51 +17,6 @@ {0}의 IntelliSense 인덱스 다운로드 중 - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.pl.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.pl.xlf index 7ee280f8afc7418c14ed2909fcda104c01673c51..f5fe20622efc440f48f8a55600f558b011daef8d 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.pl.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.pl.xlf @@ -17,51 +17,6 @@ Pobieranie indeksu funkcji IntelliSense dla {0} - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.pt-BR.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.pt-BR.xlf index 77a1e47c0f9a7f018e58081f1e4233d2a531582e..759394bb4de0feec8ff103eb4b71bc4ec3ceebcf 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.pt-BR.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.pt-BR.xlf @@ -17,51 +17,6 @@ Baixando o índice do IntelliSense para {0} - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ru.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ru.xlf index 8af786b1e1d5045ec0e659d131caef28b08f6312..0bc169826862fb7ed7fa592a2866e51361a55b12 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ru.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.ru.xlf @@ -17,51 +17,6 @@ Загрузка индекса IntelliSense для {0} - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.tr.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.tr.xlf index 3ced7d69f03f12122e53838ab6cf9e93f1307ca9..99ea30ed5f160245985441e21a112f83e4828dbb 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.tr.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.tr.xlf @@ -17,51 +17,6 @@ {0} için IntelliSense dizini indiriliyor - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.zh-Hans.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.zh-Hans.xlf index 172695bf35b8759b874950d4fc469beff551b770..3769ca7dfd1ff2398d7ca88d37b804eaf5e1279d 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.zh-Hans.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.zh-Hans.xlf @@ -17,51 +17,6 @@ 正在下载用于 {0} 的 IntelliSense 索引 - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.zh-Hant.xlf b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.zh-Hant.xlf index 9af24347725c261b6dc42bb3c51f3ee1e0d1f95d..43d0eefaf25f4a19c28ee5a797a0df9461c9ca7a 100644 --- a/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.zh-Hant.xlf +++ b/src/EditorFeatures/Core.Wpf/xlf/EditorFeaturesWpfResources.zh-Hant.xlf @@ -17,51 +17,6 @@ 正在為 {0} 下載 IntelliSense 索引 - - Regex - Comment - Regex - Comment - - - - Regex - Character class - Regex - Character class - - - - Regex - Quantifier - Regex - Quantifier - - - - Regex - Anchor - Regex - Anchor - - - - Regex - Alternation - Regex - Alternation - - - - Regex - Text - Regex - Text - - - - Regex - Grouping - Regex - Grouping - - - - Regex - Self Escaped Character - Regex - Self Escaped Character - - - - Regex - Other Escape - Regex - Other Escape - - JSON Property Name JSON Property Name diff --git a/src/EditorFeatures/Core/Implementation/BraceMatching/CommonRegexBraceMatcher.cs b/src/EditorFeatures/Core/Implementation/BraceMatching/CommonRegexBraceMatcher.cs deleted file mode 100644 index d0e25c33858081fb0511d6b09bc8184867cb951d..0000000000000000000000000000000000000000 --- a/src/EditorFeatures/Core/Implementation/BraceMatching/CommonRegexBraceMatcher.cs +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Threading; -using System.Threading.Tasks; -using Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.LanguageServices; -using Microsoft.CodeAnalysis.Shared.Extensions; -using Roslyn.Utilities; - -namespace Microsoft.CodeAnalysis.Editor.Implementation.BraceMatching -{ - internal static class CommonRegexBraceMatcher - { - internal static async Task FindBracesAsync( - Document document, int position, CancellationToken cancellationToken) - { - var option = document.Project.Solution.Workspace.Options.GetOption( - RegularExpressionsOptions.HighlightRelatedRegexComponentsUnderCursor, document.Project.Language); - if (!option) - { - return default; - } - - var root = await document.GetSyntaxRootAsync(cancellationToken).ConfigureAwait(false); - var token = root.FindToken(position); - - var syntaxFacts = document.GetLanguageService(); - if (RegexPatternDetector.IsDefinitelyNotPattern(token, syntaxFacts)) - { - return null; - } - - var semanticModel = await document.GetSemanticModelAsync(cancellationToken).ConfigureAwait(false); - var detector = RegexPatternDetector.TryGetOrCreate(semanticModel, syntaxFacts, document.GetLanguageService()); - var tree = detector?.TryParseRegexPattern(token, document.GetLanguageService(), cancellationToken); - - if (tree == null) - { - return null; - } - - return GetMatchingBraces(tree, position); - } - - private static BraceMatchingResult? GetMatchingBraces(RegexTree tree, int position) - { - var virtualChar = tree.Text.FirstOrNullable(vc => vc.Span.Contains(position)); - if (virtualChar == null) - { - return null; - } - - var ch = virtualChar.Value; - if (ch != '(' && ch != ')') - { - return null; - } - - return FindBraceHighlights(tree, ch); - } - - private static BraceMatchingResult? FindBraceHighlights(RegexTree tree, VirtualChar ch) - { - var node = FindGroupingNode(tree.Root, ch); - if (node == null) - { - return null; - } - - if (node.OpenParenToken.IsMissing || node.CloseParenToken.IsMissing) - { - return null; - } - - return new BraceMatchingResult( - node.OpenParenToken.VirtualChars[0].Span, - node.CloseParenToken.VirtualChars[0].Span); - } - - private static RegexGroupingNode FindGroupingNode(RegexNode node, VirtualChar ch) - { - if (node is RegexGroupingNode grouping && - (grouping.OpenParenToken.VirtualChars.Contains(ch) || grouping.CloseParenToken.VirtualChars.Contains(ch))) - { - return grouping; - } - - foreach (var child in node) - { - if (child.IsNode) - { - var result = FindGroupingNode(child.Node, ch); - if (result != null) - { - return result; - } - } - } - - return null; - } - } -} diff --git a/src/EditorFeatures/Core/Implementation/Classification/ClassificationTypeDefinitions.cs b/src/EditorFeatures/Core/Implementation/Classification/ClassificationTypeDefinitions.cs index 5dbe52055512e356f11924527ebc5af1362971f1..5cd52f0db53e8ed39f1c305ba118e2dbefa168cc 100644 --- a/src/EditorFeatures/Core/Implementation/Classification/ClassificationTypeDefinitions.cs +++ b/src/EditorFeatures/Core/Implementation/Classification/ClassificationTypeDefinitions.cs @@ -188,53 +188,6 @@ internal sealed class ClassificationTypeDefinitions internal readonly ClassificationTypeDefinition XmlDocCommentTextTypeDefinition; #endregion - #region Regex - [Export] - [Name(ClassificationTypeNames.RegexComment)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexCommentTypeDefinition; - - [Export] - [Name(ClassificationTypeNames.RegexText)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexTextTypeDefinition; - - [Export] - [Name(ClassificationTypeNames.RegexCharacterClass)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexCharacterClassTypeDefinition; - - [Export] - [Name(ClassificationTypeNames.RegexQuantifier)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexQuantifierTypeDefinition; - - [Export] - [Name(ClassificationTypeNames.RegexAnchor)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexAnchorTypeDefinition; - - [Export] - [Name(ClassificationTypeNames.RegexAlternation)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexAlternationTypeDefinition; - - [Export] - [Name(ClassificationTypeNames.RegexOtherEscape)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexOtherEscapeTypeDefinition; - - [Export] - [Name(ClassificationTypeNames.RegexSelfEscapedCharacter)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexSelfEscapedCharacterTypeDefinition; - - [Export] - [Name(ClassificationTypeNames.RegexGrouping)] - [BaseDefinition(PredefinedClassificationTypeNames.FormalLanguage)] - internal readonly ClassificationTypeDefinition RegexGroupingTypeDefinition; - #endregion - #region JSON [Export] [Name(ClassificationTypeNames.JsonComment)] diff --git a/src/EditorFeatures/TestUtilities/Classification/FormattedClassifications.RegexTypes.cs b/src/EditorFeatures/TestUtilities/Classification/FormattedClassifications.RegexTypes.cs deleted file mode 100644 index 654ee74bb162d572a660aca8b84c912c5cf7f61c..0000000000000000000000000000000000000000 --- a/src/EditorFeatures/TestUtilities/Classification/FormattedClassifications.RegexTypes.cs +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Diagnostics; -using Microsoft.CodeAnalysis.Classification; - -namespace Microsoft.CodeAnalysis.Editor.UnitTests.Classification -{ - public static partial class FormattedClassifications - { - public static class Regex - { - [DebuggerStepThrough] - public static FormattedClassification Anchor(string value) => New(value, ClassificationTypeNames.RegexAnchor); - - [DebuggerStepThrough] - public static FormattedClassification Grouping(string value) => New(value, ClassificationTypeNames.RegexGrouping); - - [DebuggerStepThrough] - public static FormattedClassification OtherEscape(string value) => New(value, ClassificationTypeNames.RegexOtherEscape); - - [DebuggerStepThrough] - public static FormattedClassification SelfEscapedCharacter(string value) => New(value, ClassificationTypeNames.RegexSelfEscapedCharacter); - - [DebuggerStepThrough] - public static FormattedClassification Alternation(string value) => New(value, ClassificationTypeNames.RegexAlternation); - - [DebuggerStepThrough] - public static FormattedClassification CharacterClass(string value) => New(value, ClassificationTypeNames.RegexCharacterClass); - - [DebuggerStepThrough] - public static FormattedClassification Text(string value) => New(value, ClassificationTypeNames.RegexText); - - [DebuggerStepThrough] - public static FormattedClassification Quantifier(string value) => New(value, ClassificationTypeNames.RegexQuantifier); - - [DebuggerStepThrough] - public static FormattedClassification Comment(string value) => New(value, ClassificationTypeNames.RegexComment); - } - } -} diff --git a/src/EditorFeatures/VisualBasic/BraceMatching/VisualBasicRegexBraceMatcher.vb b/src/EditorFeatures/VisualBasic/BraceMatching/VisualBasicRegexBraceMatcher.vb deleted file mode 100644 index feecae3052abdd685f590adf1e19961c63cfe6c0..0000000000000000000000000000000000000000 --- a/src/EditorFeatures/VisualBasic/BraceMatching/VisualBasicRegexBraceMatcher.vb +++ /dev/null @@ -1,16 +0,0 @@ -' Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -Imports System.Threading -Imports System.Threading.Tasks -Imports Microsoft.CodeAnalysis.Editor.Implementation.BraceMatching - -Namespace Microsoft.CodeAnalysis.Editor.VisualBasic.BraceMatching - - Friend Class VisualBasicRegexBraceMatcher - Implements IBraceMatcher - - Public Function FindBracesAsync(document As Document, position As Integer, Optional cancellationToken As CancellationToken = Nothing) As Task(Of BraceMatchingResult?) Implements IBraceMatcher.FindBracesAsync - Return CommonRegexBraceMatcher.FindBracesAsync(document, position, cancellationToken) - End Function - End Class -End Namespace diff --git a/src/EditorFeatures/VisualBasicTest/Classification/SemanticClassifierTests.vb b/src/EditorFeatures/VisualBasicTest/Classification/SemanticClassifierTests.vb index 67ffa598487f254a5ed2ae4c306ad864139c1678..370690c3ae7ba44001b40090a5ffd999059e51ef 100644 --- a/src/EditorFeatures/VisualBasicTest/Classification/SemanticClassifierTests.vb +++ b/src/EditorFeatures/VisualBasicTest/Classification/SemanticClassifierTests.vb @@ -531,44 +531,6 @@ End Class" [Class]("AttributeUsage")) End Function - - Public Async Function TestRegex1() As Task - Await TestAsync( -" -imports System.Text.RegularExpressions - -class Program - sub Goo() - ' language=regex - var r = ""$(\b\G\z)|(?sub){0,5}?^"" - end sub -end class", -Regex.Anchor("$"), -Regex.Grouping("("), -Regex.Anchor("\"), -Regex.Anchor("b"), -Regex.Anchor("\"), -Regex.Anchor("G"), -Regex.Anchor("\"), -Regex.Anchor("z"), -Regex.Grouping(")"), -Regex.Alternation("|"), -Regex.Grouping("("), -Regex.Grouping("?"), -Regex.Grouping("<"), -Regex.Grouping("name"), -Regex.Grouping(">"), -Regex.Text("sub"), -Regex.Grouping(")"), -Regex.Quantifier("{"), -Regex.Quantifier("0"), -Regex.Quantifier(","), -Regex.Quantifier("5"), -Regex.Quantifier("}"), -Regex.Quantifier("?"), -Regex.Anchor("^")) - End Function - Public Async Function TestConstField() As Task Dim code = diff --git a/src/EditorFeatures/VisualBasicTest/ValidateRegexString/ValidateRegexStringTests.vb b/src/EditorFeatures/VisualBasicTest/ValidateRegexString/ValidateRegexStringTests.vb deleted file mode 100644 index 534b1e30216ed8dd3a22d35f2326ec9ac01a1f32..0000000000000000000000000000000000000000 --- a/src/EditorFeatures/VisualBasicTest/ValidateRegexString/ValidateRegexStringTests.vb +++ /dev/null @@ -1,64 +0,0 @@ -' Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -Imports Microsoft.CodeAnalysis.CodeFixes -Imports Microsoft.CodeAnalysis.Diagnostics -Imports Microsoft.CodeAnalysis.Editor.VisualBasic.UnitTests.Diagnostics -Imports Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -Imports Microsoft.CodeAnalysis.Options -Imports Microsoft.CodeAnalysis.VisualBasic.ValidateRegexString - -Namespace Microsoft.CodeAnalysis.Editor.VisualBasic.UnitTests.ValidateRegexString - Public Class ValidateRegexStringTests - Inherits AbstractVisualBasicDiagnosticProviderBasedUserDiagnosticTest - - Friend Overrides Function CreateDiagnosticProviderAndFixer(workspace As Workspace) As (DiagnosticAnalyzer, CodeFixProvider) - Return (New VisualBasicValidateRegexStringDiagnosticAnalyzer(), Nothing) - End Function - - Private Function OptionOn() As IDictionary(Of OptionKey, Object) - Dim values = New Dictionary(Of OptionKey, Object) - values.Add(New OptionKey(RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.CSharp), True) - values.Add(New OptionKey(RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.VisualBasic), True) - Return values - End Function - - Private Function OptionOff() As IDictionary(Of OptionKey, Object) - Dim values = New Dictionary(Of OptionKey, Object) - values.Add(New OptionKey(RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.CSharp), False) - values.Add(New OptionKey(RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.VisualBasic), False) - Return values - End Function - - - Public Async Function TestWarning1() As Task - Await TestDiagnosticInfoAsync(" - imports System.Text.RegularExpressions - - class Program - sub Main() - var r = new Regex(""[|)|]"") - end sub - end class", - options:=OptionOn(), - diagnosticId:=IDEDiagnosticIds.RegexPatternDiagnosticId, - diagnosticSeverity:=DiagnosticSeverity.Warning, - diagnosticMessage:=String.Format(FeaturesResources.Regex_issue_0, WorkspacesResources.Too_many_close_parens)) - End Function - - - Public Async Function TestWarning2() As Task - Await TestDiagnosticInfoAsync(" - imports System.Text.RegularExpressions - - class Program - sub Main() - var r = new Regex(""""""[|)|]"") - end sub - end class", - options:=OptionOn(), - diagnosticId:=IDEDiagnosticIds.RegexPatternDiagnosticId, - diagnosticSeverity:=DiagnosticSeverity.Warning, - diagnosticMessage:=String.Format(FeaturesResources.Regex_issue_0, WorkspacesResources.Too_many_close_parens)) - End Function - End Class -End Namespace diff --git a/src/Features/CSharp/Portable/ValidateRegexString/CSharpValidateRegexStringDiagnosticAnalyzer.cs b/src/Features/CSharp/Portable/ValidateRegexString/CSharpValidateRegexStringDiagnosticAnalyzer.cs deleted file mode 100644 index 297c61a0660026ae9a078536efac1da8329dcfae..0000000000000000000000000000000000000000 --- a/src/Features/CSharp/Portable/ValidateRegexString/CSharpValidateRegexStringDiagnosticAnalyzer.cs +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using Microsoft.CodeAnalysis.CSharp.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.Diagnostics; -using Microsoft.CodeAnalysis.ValidateRegexString; - -namespace Microsoft.CodeAnalysis.CSharp.ValidateRegexString -{ - [DiagnosticAnalyzer(LanguageNames.CSharp)] - internal class CSharpValidateRegexStringDiagnosticAnalyzer : AbstractValidateRegexStringDiagnosticAnalyzer - { - public CSharpValidateRegexStringDiagnosticAnalyzer() - : base((int)SyntaxKind.StringLiteralToken, - CSharpSyntaxFactsService.Instance, - CSharpSemanticFactsService.Instance, - CSharpVirtualCharService.Instance) - { - } - } -} diff --git a/src/Features/Core/Portable/Diagnostics/Analyzers/IDEDiagnosticIds.cs b/src/Features/Core/Portable/Diagnostics/Analyzers/IDEDiagnosticIds.cs index 11630dbe4ab14bb2f6484ba9595d98ab00baab28..0cae55442320d8cecd1973ce0425e780c8ff1636 100644 --- a/src/Features/Core/Portable/Diagnostics/Analyzers/IDEDiagnosticIds.cs +++ b/src/Features/Core/Portable/Diagnostics/Analyzers/IDEDiagnosticIds.cs @@ -66,8 +66,6 @@ internal static class IDEDiagnosticIds public const string MakeFieldReadonlyDiagnosticId = "IDE0044"; - public const string RegexPatternDiagnosticId = "IDE0045"; - // diagnostic for when we detect a string that should have /*language=json*/ added to it. public const string JsonDetectionDiagnosticId = "IDE0046"; // diagnostic for when we have a known json string and we detect something wrong with it. diff --git a/src/Features/Core/Portable/DocumentHighlighting/AbstractDocumentHighlightsService.cs b/src/Features/Core/Portable/DocumentHighlighting/AbstractDocumentHighlightsService.cs index 50b6477fcb55f9c370fd514b93feb63c17420576..865239c277169544162e265c9a7af98bda90396d 100644 --- a/src/Features/Core/Portable/DocumentHighlighting/AbstractDocumentHighlightsService.cs +++ b/src/Features/Core/Portable/DocumentHighlighting/AbstractDocumentHighlightsService.cs @@ -60,12 +60,6 @@ internal abstract partial class AbstractDocumentHighlightsService : IDocumentHig private async Task> GetDocumentHighlightsInCurrentProcessAsync( Document document, int position, IImmutableSet documentsToSearch, CancellationToken cancellationToken) { - var result = await TryGetRegexPatternHighlightsAsync(document, position, cancellationToken).ConfigureAwait(false); - if (!result.IsDefaultOrEmpty) - { - return result; - } - // use speculative semantic model to see whether we are on a symbol we can do HR var span = new TextSpan(position, 0); var solution = document.Project.Solution; diff --git a/src/Features/Core/Portable/DocumentHighlighting/AbstractDocumentHighlightsService_Regex.cs b/src/Features/Core/Portable/DocumentHighlighting/AbstractDocumentHighlightsService_Regex.cs deleted file mode 100644 index b86393a67313ff6154fa8afa7300f48054cf6a99..0000000000000000000000000000000000000000 --- a/src/Features/Core/Portable/DocumentHighlighting/AbstractDocumentHighlightsService_Regex.cs +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Immutable; -using System.Threading; -using System.Threading.Tasks; -using Microsoft.CodeAnalysis.EmbeddedLanguages.Common; -using Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.LanguageServices; -using Microsoft.CodeAnalysis.Shared.Extensions; -using Microsoft.CodeAnalysis.Text; -using Roslyn.Utilities; - -namespace Microsoft.CodeAnalysis.DocumentHighlighting -{ - using static EmbeddedSyntaxHelpers; - using RegexToken = EmbeddedSyntaxToken; - - internal abstract partial class AbstractDocumentHighlightsService : IDocumentHighlightsService - { - private async Task> TryGetRegexPatternHighlightsAsync( - Document document, int position, CancellationToken cancellationToken) - { - var option = document.Project.Solution.Workspace.Options.GetOption(RegularExpressionsOptions.HighlightRelatedRegexComponentsUnderCursor, document.Project.Language); - if (!option) - { - return default; - } - - var root = await document.GetSyntaxRootAsync(cancellationToken).ConfigureAwait(false); - var token = root.FindToken(position); - - var syntaxFacts = document.GetLanguageService(); - if (RegexPatternDetector.IsDefinitelyNotPattern(token, syntaxFacts)) - { - return default; - } - - var semanticModel = await document.GetSemanticModelAsync(cancellationToken).ConfigureAwait(false); - var detector = RegexPatternDetector.TryGetOrCreate(semanticModel, syntaxFacts, document.GetLanguageService()); - var tree = detector?.TryParseRegexPattern(token, document.GetLanguageService(), cancellationToken); - - if (tree == null) - { - return default; - } - - return GetHighlights(document, tree, position); - } - - private ImmutableArray GetHighlights( - Document document, RegexTree tree, int position) - { - var referencesOnTheRight = GetReferences(document, tree, position, caretOnLeft: true); - if (!referencesOnTheRight.IsEmpty) - { - return referencesOnTheRight; - } - - // Nothing was on the right of the caret. Return anything we were able to find on - // the left of the caret - var referencesOnTheLeft = GetReferences(document, tree, position - 1, caretOnLeft: false); - return referencesOnTheLeft; - } - - private ImmutableArray GetReferences( - Document document, RegexTree tree, int position, bool caretOnLeft) - { - var virtualChar = tree.Text.FirstOrNullable(vc => vc.Span.Contains(position)); - if (virtualChar == null) - { - return ImmutableArray.Empty; - } - - var ch = virtualChar.Value; - return FindReferenceHighlights(document, tree, ch); - } - - private ImmutableArray FindReferenceHighlights( - Document document, RegexTree tree, VirtualChar ch) - { - var node = FindReferenceNode(tree.Root, ch); - if (node == null) - { - return ImmutableArray.Empty; - } - - var captureToken = GetCaptureToken(node); - if (captureToken.Kind == RegexKind.NumberToken) - { - var val = (int)captureToken.Value; - if (tree.CaptureNumbersToSpan.TryGetValue(val, out var captureSpan)) - { - return CreateHighlights(document, node, captureSpan); - } - } - else - { - var val = (string)captureToken.Value; - if (tree.CaptureNamesToSpan.TryGetValue(val, out var captureSpan)) - { - return CreateHighlights(document, node, captureSpan); - } - } - - return ImmutableArray.Empty; - } - - private ImmutableArray CreateHighlights( - Document document, RegexEscapeNode node, TextSpan captureSpan) - { - return ImmutableArray.Create(new DocumentHighlights(document, - ImmutableArray.Create( - new HighlightSpan(node.GetSpan(), HighlightSpanKind.None), - new HighlightSpan(captureSpan, HighlightSpanKind.None)))); - } - - private RegexToken GetCaptureToken(RegexEscapeNode node) - { - switch (node) - { - case RegexBackreferenceEscapeNode backReference: - return backReference.NumberToken; - case RegexCaptureEscapeNode captureEscape: - return captureEscape.CaptureToken; - case RegexKCaptureEscapeNode kCaptureEscape: - return kCaptureEscape.CaptureToken; - } - - throw new InvalidOperationException(); - } - - private RegexEscapeNode FindReferenceNode(RegexNode node, VirtualChar virtualChar) - { - if (node.Kind == RegexKind.BackreferenceEscape || - node.Kind == RegexKind.CaptureEscape || - node.Kind == RegexKind.KCaptureEscape) - { - if (node.Contains(virtualChar)) - { - return (RegexEscapeNode)node; - } - } - - foreach (var child in node) - { - if (child.IsNode) - { - var result = FindReferenceNode(child.Node, virtualChar); - if (result != null) - { - return result; - } - } - } - - return null; - } - } -} diff --git a/src/Features/Core/Portable/FeaturesResources.Designer.cs b/src/Features/Core/Portable/FeaturesResources.Designer.cs index ffa3e62b19a580b5571660f2342266bf611fe99c..cac5272896e5460124e05c0e5667c1cbebf2f390 100644 --- a/src/Features/Core/Portable/FeaturesResources.Designer.cs +++ b/src/Features/Core/Portable/FeaturesResources.Designer.cs @@ -2589,15 +2589,6 @@ internal class FeaturesResources { } } - /// - /// Looks up a localized string similar to Regex issue: {0}. - /// - internal static string Regex_issue_0 { - get { - return ResourceManager.GetString("Regex_issue_0", resourceCulture); - } - } - /// /// Looks up a localized string similar to Remarks:. /// diff --git a/src/Features/Core/Portable/FeaturesResources.resx b/src/Features/Core/Portable/FeaturesResources.resx index 5420f9e11f7697764b4a458ff40652b9779354d8..6e85d9e977458dd8c10f950b4f9ab1b8620db415 100644 --- a/src/Features/Core/Portable/FeaturesResources.resx +++ b/src/Features/Core/Portable/FeaturesResources.resx @@ -1334,9 +1334,6 @@ This version used in: {2} indexer - - Regex issue: {0} - Enable JSON editor features diff --git a/src/Features/Core/Portable/ValidateRegexString/AbstractValidateRegexStringDiagnosticAnalyzer.cs b/src/Features/Core/Portable/ValidateRegexString/AbstractValidateRegexStringDiagnosticAnalyzer.cs deleted file mode 100644 index 9ba91dca48917fd02c52089c2a877ca2356a654d..0000000000000000000000000000000000000000 --- a/src/Features/Core/Portable/ValidateRegexString/AbstractValidateRegexStringDiagnosticAnalyzer.cs +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Threading; -using Microsoft.CodeAnalysis.CodeStyle; -using Microsoft.CodeAnalysis.Diagnostics; -using Microsoft.CodeAnalysis.LanguageServices; -using Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; - -namespace Microsoft.CodeAnalysis.ValidateRegexString -{ - internal abstract class AbstractValidateRegexStringDiagnosticAnalyzer : AbstractCodeStyleDiagnosticAnalyzer - { - private readonly int _stringLiteralKind; - private readonly ISyntaxFactsService _syntaxFacts; - private readonly ISemanticFactsService _semanticFacts; - private readonly IVirtualCharService _virtualCharService; - - protected AbstractValidateRegexStringDiagnosticAnalyzer( - int stringLiteralKind, - ISyntaxFactsService syntaxFacts, - ISemanticFactsService semanticFacts, - IVirtualCharService virtualCharService) - : base(IDEDiagnosticIds.RegexPatternDiagnosticId, - new LocalizableResourceString(nameof(FeaturesResources.Regex_issue_0), FeaturesResources.ResourceManager, typeof(FeaturesResources))) - { - _stringLiteralKind = stringLiteralKind; - _syntaxFacts = syntaxFacts; - _semanticFacts = semanticFacts; - _virtualCharService = virtualCharService; - } - - public override DiagnosticAnalyzerCategory GetAnalyzerCategory() - => DiagnosticAnalyzerCategory.SemanticSpanAnalysis; - - public override bool OpenFileOnly(Workspace workspace) - => false; - - protected override void InitializeWorker(AnalysisContext context) - => context.RegisterSemanticModelAction(AnalyzeSemanticModel); - - private void AnalyzeSemanticModel(SemanticModelAnalysisContext context) - { - var semanticModel = context.SemanticModel; - var syntaxTree = semanticModel.SyntaxTree; - var cancellationToken = context.CancellationToken; - var options = context.Options; - var optionSet = options.GetDocumentOptionSetAsync(syntaxTree, cancellationToken).GetAwaiter().GetResult(); - if (optionSet == null) - { - return; - } - - var option = optionSet.GetOption(RegularExpressionsOptions.ReportInvalidRegexPatterns, syntaxTree.Options.Language); - if (!option) - { - return; - } - - var detector = RegexPatternDetector.TryGetOrCreate(semanticModel, _syntaxFacts, _semanticFacts); - if (detector == null) - { - return; - } - - var root = syntaxTree.GetRoot(cancellationToken); - Analyze(context, detector, _virtualCharService, root, cancellationToken); - } - - private void Analyze( - SemanticModelAnalysisContext context, RegexPatternDetector detector, - IVirtualCharService virtualCharService, SyntaxNode node, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - - foreach (var child in node.ChildNodesAndTokens()) - { - if (child.IsNode) - { - Analyze(context, detector, virtualCharService, child.AsNode(), cancellationToken); - } - else - { - var token = child.AsToken(); - if (token.RawKind == _stringLiteralKind) - { - var tree = detector.TryParseRegexPattern(token, virtualCharService, cancellationToken); - if (tree != null) - { - foreach (var diag in tree.Diagnostics) - { - context.ReportDiagnostic(Diagnostic.Create( - this.GetDescriptorWithSeverity(DiagnosticSeverity.Warning), - Location.Create(context.SemanticModel.SyntaxTree, diag.Span), - diag.Message)); - } - } - } - } - } - } - } -} diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.cs.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.cs.xlf index a4a8d32c3c8f46d1435051f4e5a748ec26871910..410998f843562cfac3e5f423f67f6436b422573b 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.cs.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.cs.xlf @@ -1985,11 +1985,6 @@ Tato verze se používá zde: {2}. indexer - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.de.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.de.xlf index 34f0baef409839a9954f9cfd287de253ddc2537c..cf4cfba8618611f73efaaaa69f3992299c7ea28c 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.de.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.de.xlf @@ -1985,11 +1985,6 @@ Diese Version wird verwendet in: {2} Indexer - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.es.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.es.xlf index c146e264073ec0dbc2abb05a44da6bf2cb9ee4e6..ec6446a6260663bc4ee1bf4cb04f57a81148e2ad 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.es.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.es.xlf @@ -1985,11 +1985,6 @@ Esta versión se utiliza en: {2} indizador - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.fr.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.fr.xlf index d5ffdeef28d46c244e85695024537b8d6979bec4..7a4555390b7a8b778fccc7382ad735f36ad1ef2c 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.fr.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.fr.xlf @@ -1985,11 +1985,6 @@ Version utilisée dans : {2} indexeur - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.it.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.it.xlf index 596afb51981676c524e9bf007bb77b14992c6e70..26cb3da69431377bb0bd492d09f42d6607701833 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.it.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.it.xlf @@ -1985,11 +1985,6 @@ Questa versione è usata {2} indicizzatore - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.ja.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.ja.xlf index 2d12a5ef520c8f0c2044180725f510622861ce40..b1e5151379362e1f43c3cd7c348d29219e56c70c 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.ja.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.ja.xlf @@ -1985,11 +1985,6 @@ This version used in: {2} インデクサー - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.ko.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.ko.xlf index a4c2ebeaac081af966b986fcbbde2772fb6eaff5..04c888d18fe808c70cdb74c20c154744889943a4 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.ko.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.ko.xlf @@ -1985,11 +1985,6 @@ This version used in: {2} 인덱서 - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.pl.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.pl.xlf index 054fb1cdf2604c0f1d83b4bfb0f105cc2bb705af..9ba5a7289a84c5c25a0a50d06b101c2d9a5d9d42 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.pl.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.pl.xlf @@ -1985,11 +1985,6 @@ Ta wersja jest używana wersja: {2} indeksator - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.pt-BR.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.pt-BR.xlf index 17cc844904a597d4d50f46239602fde33b2785b4..03cb92f22f8e1cc87450a9477bb4809025ae052d 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.pt-BR.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.pt-BR.xlf @@ -1985,11 +1985,6 @@ Essa versão é usada no: {2} indexador - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.ru.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.ru.xlf index e1714b78e3f57ad040e92bc73b7d2e38e1a6d495..8399a1c54674b2391763b7c04f9bc0306409a41e 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.ru.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.ru.xlf @@ -1985,11 +1985,6 @@ This version used in: {2} индексатор - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.tr.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.tr.xlf index 644440beb4bd381254c8a4dbe3d18d8dd54a0b67..a3fe863dbc087a543ede1315b68fe60ed1c75b70 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.tr.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.tr.xlf @@ -1985,11 +1985,6 @@ Bu sürüm şurada kullanılır: {2} dizin oluşturucu - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.zh-Hans.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.zh-Hans.xlf index 9461e986a58f329a1b35ec287b02f888f9d4fe2d..4d9a6a095a8bb5fd50d7680b72e7ca248d3031de 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.zh-Hans.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.zh-Hans.xlf @@ -1985,11 +1985,6 @@ This version used in: {2} 索引器 - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/Core/Portable/xlf/FeaturesResources.zh-Hant.xlf b/src/Features/Core/Portable/xlf/FeaturesResources.zh-Hant.xlf index dd4a14520444363f20f876167932cb8e2156b935..c20063c9f4dde8d7ecd5418845aa46f310a11c23 100644 --- a/src/Features/Core/Portable/xlf/FeaturesResources.zh-Hant.xlf +++ b/src/Features/Core/Portable/xlf/FeaturesResources.zh-Hant.xlf @@ -1985,11 +1985,6 @@ This version used in: {2} 索引子 - - Regex issue: {0} - Regex issue: {0} - - Probable JSON string detected Probable JSON string detected diff --git a/src/Features/VisualBasic/Portable/ValidateRegexString/VisualBasicValidateRegexStringDiagnosticAnalyzer.vb b/src/Features/VisualBasic/Portable/ValidateRegexString/VisualBasicValidateRegexStringDiagnosticAnalyzer.vb deleted file mode 100644 index 934c41cc39ff6eafc42e332a65465776ec25d96d..0000000000000000000000000000000000000000 --- a/src/Features/VisualBasic/Portable/ValidateRegexString/VisualBasicValidateRegexStringDiagnosticAnalyzer.vb +++ /dev/null @@ -1,19 +0,0 @@ -' Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -Imports Microsoft.CodeAnalysis.Diagnostics -Imports Microsoft.CodeAnalysis.ValidateRegexString -Imports Microsoft.CodeAnalysis.VisualBasic.EmbeddedLanguages.VirtualChars - -Namespace Microsoft.CodeAnalysis.VisualBasic.ValidateRegexString - - Friend Class VisualBasicValidateRegexStringDiagnosticAnalyzer - Inherits AbstractValidateRegexStringDiagnosticAnalyzer - - Public Sub New() - MyBase.New(SyntaxKind.StringLiteralToken, - VisualBasicSyntaxFactsService.Instance, - VisualBasicSemanticFactsService.Instance, - VisualBasicVirtualCharService.Instance) - End Sub - End Class -End Namespace diff --git a/src/VisualStudio/CSharp/Impl/Options/AdvancedOptionPageControl.xaml.cs b/src/VisualStudio/CSharp/Impl/Options/AdvancedOptionPageControl.xaml.cs index a5fa3d8f595943774bf466bae44bd0e07c71d605..11417b64b51c3c9aaa546179830b2d56d9060745 100644 --- a/src/VisualStudio/CSharp/Impl/Options/AdvancedOptionPageControl.xaml.cs +++ b/src/VisualStudio/CSharp/Impl/Options/AdvancedOptionPageControl.xaml.cs @@ -6,7 +6,6 @@ using Microsoft.CodeAnalysis.Editor.CSharp.SplitStringLiteral; using Microsoft.CodeAnalysis.Editor.Shared.Options; using Microsoft.CodeAnalysis.EmbeddedLanguages.Json; -using Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions; using Microsoft.CodeAnalysis.ExtractMethod; using Microsoft.CodeAnalysis.Fading; using Microsoft.CodeAnalysis.ImplementType; @@ -64,10 +63,6 @@ public AdvancedOptionPageControl(IServiceProvider serviceProvider) : base(servic BindToOption(Report_invalid_placeholders_in_string_dot_format_calls, ValidateFormatStringOption.ReportInvalidPlaceholdersInStringDotFormatCalls, LanguageNames.CSharp); - BindToOption(Colorize_regular_expressions, RegularExpressionsOptions.ColorizeRegexPatterns, LanguageNames.CSharp); - BindToOption(Report_invalid_regular_expressions, RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.CSharp); - BindToOption(Highlight_related_regex_components_under_cursor, RegularExpressionsOptions.HighlightRelatedRegexComponentsUnderCursor, LanguageNames.CSharp); - BindToOption(Detect_and_offer_editor_features_for_likely_JSON_strings, JsonOptions.DetectAndOfferEditorFeaturesForProbableJsonStrings, LanguageNames.CSharp); BindToOption(Colorize_JSON_strings, JsonOptions.ColorizeJsonPatterns, LanguageNames.CSharp); BindToOption(Report_invalid_JSON_strings, JsonOptions.ReportInvalidJsonPatterns, LanguageNames.CSharp); diff --git a/src/VisualStudio/Core/Def/HACK_ThemeColorFixer.cs b/src/VisualStudio/Core/Def/HACK_ThemeColorFixer.cs index 69c8a4930be347c1ad2ad400bd2b17690fc9e332..d7c3246331e0a95cf32375e80f8df5b22bc6381f 100644 --- a/src/VisualStudio/Core/Def/HACK_ThemeColorFixer.cs +++ b/src/VisualStudio/Core/Def/HACK_ThemeColorFixer.cs @@ -75,16 +75,6 @@ public void RefreshThemeColors() UpdateForegroundColor(ClassificationTypeNames.XmlDocCommentComment, sourceFormatMap, targetFormatMap); UpdateForegroundColor(ClassificationTypeNames.XmlDocCommentCDataSection, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexComment, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexText, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexCharacterClass, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexQuantifier, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexAnchor, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexAlternation, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexGrouping, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexOtherEscape, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.RegexSelfEscapedCharacter, sourceFormatMap, targetFormatMap); - UpdateForegroundColor(ClassificationTypeNames.JsonComment, sourceFormatMap, targetFormatMap); UpdateForegroundColor(ClassificationTypeNames.JsonNumber, sourceFormatMap, targetFormatMap); UpdateForegroundColor(ClassificationTypeNames.JsonString, sourceFormatMap, targetFormatMap); diff --git a/src/VisualStudio/VisualBasic/Impl/Options/AdvancedOptionPageControl.xaml.vb b/src/VisualStudio/VisualBasic/Impl/Options/AdvancedOptionPageControl.xaml.vb index ff540090f93df2e8eaf5779cc0635b2d85685da2..fc8a8df527a54d628b9c911a8bc24bbf0443eaac 100644 --- a/src/VisualStudio/VisualBasic/Impl/Options/AdvancedOptionPageControl.xaml.vb +++ b/src/VisualStudio/VisualBasic/Impl/Options/AdvancedOptionPageControl.xaml.vb @@ -4,7 +4,6 @@ Imports Microsoft.CodeAnalysis Imports Microsoft.CodeAnalysis.Editing Imports Microsoft.CodeAnalysis.Editor.Shared.Options Imports Microsoft.CodeAnalysis.EmbeddedLanguages.Json -Imports Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions Imports Microsoft.CodeAnalysis.ExtractMethod Imports Microsoft.CodeAnalysis.Fading Imports Microsoft.CodeAnalysis.ImplementType @@ -61,10 +60,6 @@ Namespace Microsoft.VisualStudio.LanguageServices.VisualBasic.Options BindToOption(Report_invalid_placeholders_in_string_dot_format_calls, ValidateFormatStringOption.ReportInvalidPlaceholdersInStringDotFormatCalls, LanguageNames.VisualBasic) - BindToOption(Colorize_regular_expressions, RegularExpressionsOptions.ColorizeRegexPatterns, LanguageNames.VisualBasic) - BindToOption(Report_invalid_regular_expressions, RegularExpressionsOptions.ReportInvalidRegexPatterns, LanguageNames.VisualBasic) - BindToOption(Highlight_related_regex_components_under_cursor, RegularExpressionsOptions.HighlightRelatedRegexComponentsUnderCursor, LanguageNames.VisualBasic) - BindToOption(Detect_and_offer_editor_features_for_likely_JSON_strings, JsonOptions.DetectAndOfferEditorFeaturesForProbableJsonStrings, LanguageNames.VisualBasic) BindToOption(Colorize_JSON_strings, JsonOptions.ColorizeJsonPatterns, LanguageNames.VisualBasic) BindToOption(Report_invalid_JSON_strings, JsonOptions.ReportInvalidJsonPatterns, LanguageNames.VisualBasic) diff --git a/src/Workspaces/CSharp/Portable/Classification/SyntaxClassification/CSharpSyntaxClassificationService.cs b/src/Workspaces/CSharp/Portable/Classification/SyntaxClassification/CSharpSyntaxClassificationService.cs index 3b89d98477b6c2ee280924608e3f94c9981f8877..39b88fc93c2a58c0781bf249646ab701f0c6240b 100644 --- a/src/Workspaces/CSharp/Portable/Classification/SyntaxClassification/CSharpSyntaxClassificationService.cs +++ b/src/Workspaces/CSharp/Portable/Classification/SyntaxClassification/CSharpSyntaxClassificationService.cs @@ -20,7 +20,6 @@ internal class CSharpSyntaxClassificationService : AbstractSyntaxClassificationS new JsonPatternTokenClassifier(), new NameSyntaxClassifier(), new SyntaxTokenClassifier(), - new RegexPatternTokenClassifier(), new UsingDirectiveSyntaxClassifier()); public override ImmutableArray GetDefaultSyntaxClassifiers() diff --git a/src/Workspaces/CSharp/Portable/Classification/SyntaxClassification/RegexPatternTokenClassifier.cs b/src/Workspaces/CSharp/Portable/Classification/SyntaxClassification/RegexPatternTokenClassifier.cs deleted file mode 100644 index dbabcad3204db76e32da451018500fc60036fc05..0000000000000000000000000000000000000000 --- a/src/Workspaces/CSharp/Portable/Classification/SyntaxClassification/RegexPatternTokenClassifier.cs +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Immutable; -using System.Diagnostics; -using System.Threading; -using Microsoft.CodeAnalysis.Classification; -using Microsoft.CodeAnalysis.CSharp.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.PooledObjects; - -namespace Microsoft.CodeAnalysis.CSharp.Classification.Classifiers -{ - internal class RegexPatternTokenClassifier : AbstractSyntaxClassifier - { - public override ImmutableArray SyntaxTokenKinds { get; } = ImmutableArray.Create((int)SyntaxKind.StringLiteralToken); - - public override void AddClassifications(Workspace workspace, SyntaxToken token, SemanticModel semanticModel, ArrayBuilder result, CancellationToken cancellationToken) - { - Debug.Assert(token.Kind() == SyntaxKind.StringLiteralToken); - CommonRegexPatternTokenClassifier.AddClassifications( - workspace, token, semanticModel, result, - CSharpSyntaxFactsService.Instance, - CSharpSemanticFactsService.Instance, - CSharpVirtualCharService.Instance, - cancellationToken); - } - } -} diff --git a/src/Workspaces/Core/Portable/Classification/ClassificationTypeNames.cs b/src/Workspaces/Core/Portable/Classification/ClassificationTypeNames.cs index 30d7401f7d90b9a69ee619dbc667d9a0b127d032..3150ffa2230717757ff5f5a5ee264dbeb8025f5e 100644 --- a/src/Workspaces/Core/Portable/Classification/ClassificationTypeNames.cs +++ b/src/Workspaces/Core/Portable/Classification/ClassificationTypeNames.cs @@ -60,16 +60,6 @@ public static class ClassificationTypeNames public const string XmlLiteralProcessingInstruction = "xml literal - processing instruction"; public const string XmlLiteralText = "xml literal - text"; - internal const string RegexComment = "regex - comment"; - internal const string RegexCharacterClass = "regex - character class"; - internal const string RegexAnchor = "regex - anchor"; - internal const string RegexQuantifier = "regex - quantifier"; - internal const string RegexGrouping = "regex - grouping"; - internal const string RegexAlternation = "regex - alternation"; - internal const string RegexText = "regex - text"; - internal const string RegexSelfEscapedCharacter = "regex - self escaped character"; - internal const string RegexOtherEscape = "regex - other escape"; - internal const string JsonComment = "json - comment"; internal const string JsonNumber = "json - number"; internal const string JsonString = "json - string"; diff --git a/src/Workspaces/Core/Portable/Classification/SyntaxClassification/CommonRegexPatternTokenClassifier.cs b/src/Workspaces/Core/Portable/Classification/SyntaxClassification/CommonRegexPatternTokenClassifier.cs deleted file mode 100644 index b5b4507b579a987325e136fe0c5aaa86cf9c430f..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/Classification/SyntaxClassification/CommonRegexPatternTokenClassifier.cs +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Linq; -using System.Threading; -using Microsoft.CodeAnalysis.EmbeddedLanguages.Common; -using Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.LanguageServices; -using Microsoft.CodeAnalysis.PooledObjects; - -namespace Microsoft.CodeAnalysis.Classification -{ - using static EmbeddedSyntaxHelpers; - - using RegexToken = EmbeddedSyntaxToken; - using RegexTrivia = EmbeddedSyntaxTrivia; - - internal static class CommonRegexPatternTokenClassifier - { - private static ObjectPool _visitorPool = new ObjectPool(() => new Visitor()); - - public static void AddClassifications( - Workspace workspace, SyntaxToken token, SemanticModel semanticModel, ArrayBuilder result, - ISyntaxFactsService syntaxFacts, ISemanticFactsService semanticFacts, IVirtualCharService virtualCharService, - CancellationToken cancellationToken) - { - if (!workspace.Options.GetOption(RegularExpressionsOptions.ColorizeRegexPatterns, LanguageNames.CSharp)) - { - return; - } - - // Do some quick syntactic checks before doing any complex work. - if (RegexPatternDetector.IsDefinitelyNotPattern(token, syntaxFacts)) - { - return; - } - - var detector = RegexPatternDetector.TryGetOrCreate(semanticModel, syntaxFacts, semanticFacts); - var tree = detector?.TryParseRegexPattern(token, virtualCharService, cancellationToken); - if (tree == null) - { - return; - } - - var visitor = _visitorPool.Allocate(); - try - { - visitor.Result = result; - AddClassifications(tree.Root, visitor, result); - } - finally - { - visitor.Result = null; - _visitorPool.Free(visitor); - } - } - - private static void AddClassifications(RegexNode node, Visitor visitor, ArrayBuilder result) - { - node.Accept(visitor); - - foreach (var child in node) - { - if (child.IsNode) - { - AddClassifications(child.Node, visitor, result); - } - else - { - AddTriviaClassifications(child.Token, result); - } - } - } - - private static void AddTriviaClassifications(RegexToken token, ArrayBuilder result) - { - foreach (var trivia in token.LeadingTrivia) - { - AddTriviaClassifications(trivia, result); - } - } - - private static void AddTriviaClassifications(RegexTrivia trivia, ArrayBuilder result) - { - if (trivia.Kind == RegexKind.CommentTrivia && - trivia.VirtualChars.Length > 0) - { - result.Add(new ClassifiedSpan( - ClassificationTypeNames.RegexComment, GetSpan(trivia.VirtualChars))); - } - } - - private class Visitor : IRegexNodeVisitor - { - public ArrayBuilder Result; - - private void AddClassification(RegexToken token, string typeName) - { - if (!token.IsMissing) - { - Result.Add(new ClassifiedSpan(typeName, token.GetSpan())); - } - } - - private void ClassifyWholeNode(RegexNode node, string typeName) - { - foreach (var child in node) - { - if (child.IsNode) - { - ClassifyWholeNode(child.Node, typeName); - } - else - { - AddClassification(child.Token, typeName); - } - } - } - - public void Visit(RegexCompilationUnit node) - { - // Nothing to highlight. - } - - public void Visit(RegexSequenceNode node) - { - // Nothing to highlight. - } - - #region Character classes - - public void Visit(RegexWildcardNode node) - => AddClassification(node.DotToken, ClassificationTypeNames.RegexCharacterClass); - - public void Visit(RegexCharacterClassNode node) - { - AddClassification(node.OpenBracketToken, ClassificationTypeNames.RegexCharacterClass); - AddClassification(node.CloseBracketToken, ClassificationTypeNames.RegexCharacterClass); - } - - public void Visit(RegexNegatedCharacterClassNode node) - { - AddClassification(node.OpenBracketToken, ClassificationTypeNames.RegexCharacterClass); - AddClassification(node.CaretToken, ClassificationTypeNames.RegexCharacterClass); - AddClassification(node.CloseBracketToken, ClassificationTypeNames.RegexCharacterClass); - } - - public void Visit(RegexCharacterClassRangeNode node) - => AddClassification(node.MinusToken, ClassificationTypeNames.RegexCharacterClass); - - public void Visit(RegexCharacterClassSubtractionNode node) - => AddClassification(node.MinusToken, ClassificationTypeNames.RegexCharacterClass); - - public void Visit(RegexCharacterClassEscapeNode node) - => ClassifyWholeNode(node, ClassificationTypeNames.RegexCharacterClass); - - public void Visit(RegexCategoryEscapeNode node) - => ClassifyWholeNode(node, ClassificationTypeNames.RegexCharacterClass); - - #endregion - - #region Quantifiers - - public void Visit(RegexZeroOrMoreQuantifierNode node) - => AddClassification(node.AsteriskToken, ClassificationTypeNames.RegexQuantifier); - - public void Visit(RegexOneOrMoreQuantifierNode node) - => AddClassification(node.PlusToken, ClassificationTypeNames.RegexQuantifier); - - public void Visit(RegexZeroOrOneQuantifierNode node) - => AddClassification(node.QuestionToken, ClassificationTypeNames.RegexQuantifier); - - public void Visit(RegexLazyQuantifierNode node) - => AddClassification(node.QuestionToken, ClassificationTypeNames.RegexQuantifier); - - public void Visit(RegexExactNumericQuantifierNode node) - { - AddClassification(node.OpenBraceToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.FirstNumberToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.CloseBraceToken, ClassificationTypeNames.RegexQuantifier); - } - - public void Visit(RegexOpenNumericRangeQuantifierNode node) - { - AddClassification(node.OpenBraceToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.FirstNumberToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.CommaToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.CloseBraceToken, ClassificationTypeNames.RegexQuantifier); - } - - public void Visit(RegexClosedNumericRangeQuantifierNode node) - { - AddClassification(node.OpenBraceToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.FirstNumberToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.CommaToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.SecondNumberToken, ClassificationTypeNames.RegexQuantifier); - AddClassification(node.CloseBraceToken, ClassificationTypeNames.RegexQuantifier); - } - - #endregion - - #region Groupings - - public void Visit(RegexSimpleGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexSimpleOptionsGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexNestedOptionsGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexNonCapturingGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexPositiveLookaheadGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexNegativeLookaheadGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexPositiveLookbehindGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexNegativeLookbehindGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexNonBacktrackingGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexCaptureGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexBalancingGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexConditionalCaptureGroupingNode node) - => ClassifyGrouping(node); - - public void Visit(RegexConditionalExpressionGroupingNode node) - => ClassifyGrouping(node); - - // Captures and backreferences refer to groups. So we classify them the same way as groups. - public void Visit(RegexCaptureEscapeNode node) - => ClassifyWholeNode(node, ClassificationTypeNames.RegexGrouping); - - public void Visit(RegexKCaptureEscapeNode node) - => ClassifyWholeNode(node, ClassificationTypeNames.RegexGrouping); - - public void Visit(RegexBackreferenceEscapeNode node) - => ClassifyWholeNode(node, ClassificationTypeNames.RegexGrouping); - - private void ClassifyGrouping(RegexGroupingNode node) - { - foreach (var child in node) - { - if (!child.IsNode) - { - AddClassification(child.Token, ClassificationTypeNames.RegexGrouping); - } - } - } - - #endregion - - #region Other Escapes - - public void Visit(RegexControlEscapeNode node) - => ClassifyOtherEscape(node); - - public void Visit(RegexHexEscapeNode node) - => ClassifyOtherEscape(node); - - public void Visit(RegexUnicodeEscapeNode node) - => ClassifyOtherEscape(node); - - public void Visit(RegexOctalEscapeNode node) - => ClassifyOtherEscape(node); - - public void ClassifyOtherEscape(RegexNode node) - => ClassifyWholeNode(node, ClassificationTypeNames.RegexOtherEscape); - - #endregion - - #region Anchors - - public void Visit(RegexAnchorNode node) - => AddClassification(node.AnchorToken, ClassificationTypeNames.RegexAnchor); - - public void Visit(RegexAnchorEscapeNode node) - => ClassifyWholeNode(node, ClassificationTypeNames.RegexAnchor); - - #endregion - - public void Visit(RegexTextNode node) - => AddClassification(node.TextToken, ClassificationTypeNames.RegexText); - - public void Visit(RegexPosixPropertyNode node) - { - // The .net parser just interprets the [ of the node, and skips the rest. So - // classify the end part as a comment. - Result.Add(new ClassifiedSpan(node.TextToken.VirtualChars[0].Span, ClassificationTypeNames.RegexText)); - Result.Add(new ClassifiedSpan( - GetSpan(node.TextToken.VirtualChars[1], node.TextToken.VirtualChars.Last()), - ClassificationTypeNames.RegexComment)); - } - - public void Visit(RegexAlternationNode node) - => AddClassification(node.BarToken, ClassificationTypeNames.RegexAlternation); - - public void Visit(RegexSimpleEscapeNode node) - => ClassifyWholeNode(node, node.IsSelfEscape() - ? ClassificationTypeNames.RegexSelfEscapedCharacter - : ClassificationTypeNames.RegexOtherEscape); - } - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/IRegexNodeVisitor.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/IRegexNodeVisitor.cs deleted file mode 100644 index 324d8d9c6321ca31ccb29a5f1b8c224d860ce6ae..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/IRegexNodeVisitor.cs +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - internal interface IRegexNodeVisitor - { - void Visit(RegexCompilationUnit node); - void Visit(RegexSequenceNode node); - void Visit(RegexTextNode node); - void Visit(RegexCharacterClassNode node); - void Visit(RegexNegatedCharacterClassNode node); - void Visit(RegexCharacterClassRangeNode node); - void Visit(RegexCharacterClassSubtractionNode node); - void Visit(RegexPosixPropertyNode node); - void Visit(RegexWildcardNode node); - void Visit(RegexZeroOrMoreQuantifierNode node); - void Visit(RegexOneOrMoreQuantifierNode node); - void Visit(RegexZeroOrOneQuantifierNode node); - void Visit(RegexLazyQuantifierNode node); - void Visit(RegexExactNumericQuantifierNode node); - void Visit(RegexOpenNumericRangeQuantifierNode node); - void Visit(RegexClosedNumericRangeQuantifierNode node); - void Visit(RegexAnchorNode node); - void Visit(RegexAlternationNode node); - void Visit(RegexSimpleGroupingNode node); - void Visit(RegexSimpleOptionsGroupingNode node); - void Visit(RegexNestedOptionsGroupingNode node); - void Visit(RegexNonCapturingGroupingNode node); - void Visit(RegexPositiveLookaheadGroupingNode node); - void Visit(RegexNegativeLookaheadGroupingNode node); - void Visit(RegexPositiveLookbehindGroupingNode node); - void Visit(RegexNegativeLookbehindGroupingNode node); - void Visit(RegexNonBacktrackingGroupingNode node); - void Visit(RegexCaptureGroupingNode node); - void Visit(RegexBalancingGroupingNode node); - void Visit(RegexConditionalCaptureGroupingNode node); - void Visit(RegexConditionalExpressionGroupingNode node); - void Visit(RegexSimpleEscapeNode node); - void Visit(RegexAnchorEscapeNode node); - void Visit(RegexCharacterClassEscapeNode node); - void Visit(RegexControlEscapeNode node); - void Visit(RegexHexEscapeNode node); - void Visit(RegexUnicodeEscapeNode node); - void Visit(RegexCaptureEscapeNode node); - void Visit(RegexKCaptureEscapeNode node); - void Visit(RegexOctalEscapeNode node); - void Visit(RegexBackreferenceEscapeNode node); - void Visit(RegexCategoryEscapeNode node); - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexCharClass.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexCharClass.cs deleted file mode 100644 index f5565ee0b397fb41703d29a76f1a061f0fbced2d..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexCharClass.cs +++ /dev/null @@ -1,350 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Globalization; -using System.Text; - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - /// - /// Minimal copy of https://github.com/dotnet/corefx/blob/master/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs - /// Used to accurately determine if something is a WordChar according to the .Net regex engine. - /// - internal static class RegexCharClass - { - private const int FLAGS = 0; - private const int SETLENGTH = 1; - private const int CATEGORYLENGTH = 2; - private const int SETSTART = 3; - - private const short SpaceConst = 100; - private const short NotSpaceConst = -100; - - private const char ZeroWidthJoiner = '\u200D'; - private const char ZeroWidthNonJoiner = '\u200C'; - - private const string WordClass = "\u0000\u0000\u000A\u0000\u0002\u0004\u0005\u0003\u0001\u0006\u0009\u0013\u0000"; - - private static readonly HashSet s_escapeCategories = new HashSet - { - // Others - "Cc", "Cf", "Cn", "Co", "Cs", "C", - // Letters - "Ll", "Lm", "Lo", "Lt", "Lu", "L", - // Marks - "Mc", "Me", "Mn", "M", - // Numbers - "Nd", "Nl", "No", "N", - // Punctuation - "Pc", "Pd", "Pe", "Po", "Ps", "Pf", "Pi", "P", - // Symbols - "Sc", "Sk", "Sm", "So", "S", - // Separators - "Zl", "Zp", "Zs", "Z", - - "IsAlphabeticPresentationForms", - "IsArabic", - "IsArabicPresentationForms-A", - "IsArabicPresentationForms-B", - "IsArmenian", - "IsArrows", - "IsBasicLatin", - "IsBengali", - "IsBlockElements", - "IsBopomofo", - "IsBopomofoExtended", - "IsBoxDrawing", - "IsBraillePatterns", - "IsBuhid", - "IsCJKCompatibility", - "IsCJKCompatibilityForms", - "IsCJKCompatibilityIdeographs", - "IsCJKRadicalsSupplement", - "IsCJKSymbolsandPunctuation", - "IsCJKUnifiedIdeographs", - "IsCJKUnifiedIdeographsExtensionA", - "IsCherokee", - "IsCombiningDiacriticalMarks", - "IsCombiningDiacriticalMarksforSymbols", - "IsCombiningHalfMarks", - "IsCombiningMarksforSymbols", - "IsControlPictures", - "IsCurrencySymbols", - "IsCyrillic", - "IsCyrillicSupplement", - "IsDevanagari", - "IsDingbats", - "IsEnclosedAlphanumerics", - "IsEnclosedCJKLettersandMonths", - "IsEthiopic", - "IsGeneralPunctuation", - "IsGeometricShapes", - "IsGeorgian", - "IsGreek", - "IsGreekExtended", - "IsGreekandCoptic", - "IsGujarati", - "IsGurmukhi", - "IsHalfwidthandFullwidthForms", - "IsHangulCompatibilityJamo", - "IsHangulJamo", - "IsHangulSyllables", - "IsHanunoo", - "IsHebrew", - "IsHighPrivateUseSurrogates", - "IsHighSurrogates", - "IsHiragana", - "IsIPAExtensions", - "IsIdeographicDescriptionCharacters", - "IsKanbun", - "IsKangxiRadicals", - "IsKannada", - "IsKatakana", - "IsKatakanaPhoneticExtensions", - "IsKhmer", - "IsKhmerSymbols", - "IsLao", - "IsLatin-1Supplement", - "IsLatinExtended-A", - "IsLatinExtended-B", - "IsLatinExtendedAdditional", - "IsLetterlikeSymbols", - "IsLimbu", - "IsLowSurrogates", - "IsMalayalam", - "IsMathematicalOperators", - "IsMiscellaneousMathematicalSymbols-A", - "IsMiscellaneousMathematicalSymbols-B", - "IsMiscellaneousSymbols", - "IsMiscellaneousSymbolsandArrows", - "IsMiscellaneousTechnical", - "IsMongolian", - "IsMyanmar", - "IsNumberForms", - "IsOgham", - "IsOpticalCharacterRecognition", - "IsOriya", - "IsPhoneticExtensions", - "IsPrivateUse", - "IsPrivateUseArea", - "IsRunic", - "IsSinhala", - "IsSmallFormVariants", - "IsSpacingModifierLetters", - "IsSpecials", - "IsSuperscriptsandSubscripts", - "IsSupplementalArrows-A", - "IsSupplementalArrows-B", - "IsSupplementalMathematicalOperators", - "IsSyriac", - "IsTagalog", - "IsTagbanwa", - "IsTaiLe", - "IsTamil", - "IsTelugu", - "IsThaana", - "IsThai", - "IsTibetan", - "IsUnifiedCanadianAboriginalSyllabics", - "IsVariationSelectors", - "IsYiRadicals", - "IsYiSyllables", - "IsYijingHexagramSymbols", - "_xmlC", - "_xmlD", - "_xmlI", - "_xmlW", - }; - - public static bool IsEscapeCategory(string value) - { - return s_escapeCategories.Contains(value); - } - - public static bool IsWordChar(char ch) - { - // According to UTS#18 Unicode Regular Expressions (http://www.unicode.org/reports/tr18/) - // RL 1.4 Simple Word Boundaries The class of includes all Alphabetic - // values from the Unicode character database, from UnicodeData.txt [UData], plus the U+200C - // ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER. - return CharInClass(ch, WordClass) || ch == ZeroWidthJoiner || ch == ZeroWidthNonJoiner; - } - - internal static bool CharInClass(char ch, string set) - { - return CharInClassRecursive(ch, set, 0); - } - - internal static bool CharInClassRecursive(char ch, string set, int start) - { - int mySetLength = set[start + SETLENGTH]; - int myCategoryLength = set[start + CATEGORYLENGTH]; - int myEndPosition = start + SETSTART + mySetLength + myCategoryLength; - - bool subtracted = false; - - if (set.Length > myEndPosition) - { - subtracted = CharInClassRecursive(ch, set, myEndPosition); - } - - bool b = CharInClassInternal(ch, set, start, mySetLength, myCategoryLength); - - // Note that we apply the negation *before* performing the subtraction. This is because - // the negation only applies to the first char class, not the entire subtraction. - if (set[start + FLAGS] == 1) - b = !b; - - return b && !subtracted; - } - - /// - /// Determines a character's membership in a character class (via the - /// string representation of the class). - /// - private static bool CharInClassInternal(char ch, string set, int start, int mySetLength, int myCategoryLength) - { - int min; - int max; - int mid; - min = start + SETSTART; - max = min + mySetLength; - - while (min != max) - { - mid = (min + max) / 2; - if (ch < set[mid]) - max = mid; - else - min = mid + 1; - } - - // The starting position of the set within the character class determines - // whether what an odd or even ending position means. If the start is odd, - // an *even* ending position means the character was in the set. With recursive - // subtractions in the mix, the starting position = start+SETSTART. Since we know that - // SETSTART is odd, we can simplify it out of the equation. But if it changes we need to - // reverse this check. - Debug.Assert((SETSTART & 0x1) == 1, "If SETSTART is not odd, the calculation below this will be reversed"); - if ((min & 0x1) == (start & 0x1)) - return true; - else - { - if (myCategoryLength == 0) - return false; - - return CharInCategory(ch, set, start, mySetLength, myCategoryLength); - } - } - - private static bool CharInCategory(char ch, string set, int start, int mySetLength, int myCategoryLength) - { - UnicodeCategory chcategory = CharUnicodeInfo.GetUnicodeCategory(ch); - - int i = start + SETSTART + mySetLength; - int end = i + myCategoryLength; - while (i < end) - { - int curcat = unchecked((short)set[i]); - - if (curcat == 0) - { - // zero is our marker for a group of categories - treated as a unit - if (CharInCategoryGroup(ch, chcategory, set, ref i)) - return true; - } - else if (curcat > 0) - { - // greater than zero is a positive case - - if (curcat == SpaceConst) - { - if (char.IsWhiteSpace(ch)) - return true; - else - { - i++; - continue; - } - } - --curcat; - - if (chcategory == (UnicodeCategory)curcat) - return true; - } - else - { - // less than zero is a negative case - if (curcat == NotSpaceConst) - { - if (!char.IsWhiteSpace(ch)) - return true; - else - { - i++; - continue; - } - } - - //curcat = -curcat; - //--curcat; - curcat = -1 - curcat; - - if (chcategory != (UnicodeCategory)curcat) - return true; - } - i++; - } - return false; - } - - /// - /// This is used for categories which are composed of other categories - L, N, Z, W... - /// These groups need special treatment when they are negated - /// - private static bool CharInCategoryGroup(char ch, UnicodeCategory chcategory, string category, ref int i) - { - i++; - - int curcat = unchecked((short)category[i]); - if (curcat > 0) - { - // positive case - the character must be in ANY of the categories in the group - bool answer = false; - - while (curcat != 0) - { - if (!answer) - { - --curcat; - if (chcategory == (UnicodeCategory)curcat) - answer = true; - } - i++; - curcat = (short)category[i]; - } - return answer; - } - else - { - // negative case - the character must be in NONE of the categories in the group - bool answer = true; - - while (curcat != 0) - { - if (answer) - { - //curcat = -curcat; - //--curcat; - curcat = -1 - curcat; - if (chcategory == (UnicodeCategory)curcat) - answer = false; - } - i++; - curcat = unchecked((short)category[i]); - } - return answer; - } - } - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexHelpers.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexHelpers.cs deleted file mode 100644 index 615697dff3c7a7e485cc408b5da6bd2cc0c7f525..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexHelpers.cs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Immutable; -using System.Text.RegularExpressions; -using Microsoft.CodeAnalysis.EmbeddedLanguages.Common; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - using RegexToken = EmbeddedSyntaxToken; - using RegexTrivia = EmbeddedSyntaxTrivia; - - internal static class RegexHelpers - { - public static bool HasOption(RegexOptions options, RegexOptions val) - => (options & val) != 0; - - public static RegexToken CreateToken(RegexKind kind, ImmutableArray leadingTrivia, ImmutableArray virtualChars) - => new RegexToken(kind, leadingTrivia, virtualChars, ImmutableArray.Empty, ImmutableArray.Empty, value: null); - - public static RegexToken CreateMissingToken(RegexKind kind) - => CreateToken(kind, ImmutableArray.Empty, ImmutableArray.Empty); - - public static RegexTrivia CreateTrivia(RegexKind kind, ImmutableArray virtualChars) - => CreateTrivia(kind, virtualChars, ImmutableArray.Empty); - - public static RegexTrivia CreateTrivia(RegexKind kind, ImmutableArray virtualChars, ImmutableArray diagnostics) - => new RegexTrivia(kind, virtualChars, diagnostics); - - public static bool IsSelfEscape(this RegexSimpleEscapeNode node) - { - if (node.TypeToken.VirtualChars.Length > 0) - { - switch (node.TypeToken.VirtualChars[0].Char) - { - case 'a': - case 'b': - case 'e': - case 'f': - case 'n': - case 'r': - case 't': - case 'v': - return false; - } - } - - return true; - } - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexKind.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexKind.cs deleted file mode 100644 index a1970720d54979ee86ebab53fe5e95c7d4089604..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexKind.cs +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - internal enum RegexKind - { - None, - EndOfFile, - Sequence, - CompilationUnit, - Text, - StartAnchor, - EndAnchor, - Alternation, - Wildcard, - CharacterClass, - NegatedCharacterClass, - CharacterClassRange, - CharacterClassSubtraction, - PosixProperty, - - ZeroOrMoreQuantifier, - OneOrMoreQuantifier, - ZeroOrOneQuantifier, - ExactNumericQuantifier, - OpenRangeNumericQuantifier, - ClosedRangeNumericQuantifier, - LazyQuantifier, - - SimpleGrouping, - SimpleOptionsGrouping, - NestedOptionsGrouping, - NonCapturingGrouping, - PositiveLookaheadGrouping, - NegativeLookaheadGrouping, - PositiveLookbehindGrouping, - NegativeLookbehindGrouping, - NonBacktrackingGrouping, - CaptureGrouping, - BalancingGrouping, - ConditionalCaptureGrouping, - ConditionalExpressionGrouping, - - SimpleEscape, - AnchorEscape, - CharacterClassEscape, - CategoryEscape, - ControlEscape, - HexEscape, - UnicodeEscape, - OctalEscape, - CaptureEscape, - KCaptureEscape, - BackreferenceEscape, - - // Tokens - DollarToken, - OpenBraceToken, - CloseBraceToken, - OpenBracketToken, - CloseBracketToken, - OpenParenToken, - CloseParenToken, - BarToken, - DotToken, - CaretToken, - TextToken, - QuestionToken, - AsteriskToken, - PlusToken, - CommaToken, - BackslashToken, - ColonToken, - EqualsToken, - ExclamationToken, - GreaterThanToken, - LessThanToken, - MinusToken, - SingleQuoteToken, - - // Special multi-character tokens that have to be explicitly requested. - OptionsToken, - NumberToken, - CaptureNameToken, - EscapeCategoryToken, - - // Trivia - CommentTrivia, - WhitespaceTrivia, - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexLexer.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexLexer.cs deleted file mode 100644 index cb28ef24fc7e5480d878e8e1a9c060ed3dcb7c78..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexLexer.cs +++ /dev/null @@ -1,463 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Immutable; -using System.Diagnostics; -using System.Text.RegularExpressions; -using Microsoft.CodeAnalysis.EmbeddedLanguages.Common; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.PooledObjects; -using Microsoft.CodeAnalysis.Text; - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - using static EmbeddedSyntaxHelpers; - using static RegexHelpers; - - using RegexToken = EmbeddedSyntaxToken; - using RegexTrivia = EmbeddedSyntaxTrivia; - - /// - /// Produces tokens from the sequence of characters. Unlike the - /// native C# and VB lexer, this lexer is much more tightly controlled by the parser. For - /// example, while C# can have trivia on virtual every token, the same is not true for - /// RegexTokens. As such, instead of automatically lexing out tokens to make them available for - /// the parser, the parser asks for each token as necessary passing the right information to - /// indicate which types and shapes of tokens are allowed. - /// - /// The tight coupling means that the parser is allowed direct control of the position of the - /// lexer. - /// - /// Note: most of the time, tokens returned are just a single character long, including for long - /// sequences of text characters (like ```"goo"```). This is just three s in a row (each containing a a - /// single character long). - /// - /// There are multi-character tokens though. For example ```10``` in ```a{10,}``` or ```name``` - /// in ```\k'name'``` - /// - internal struct RegexLexer - { - public readonly ImmutableArray Text; - public int Position; - - public RegexLexer(ImmutableArray text) : this() - { - Text = text; - } - - public VirtualChar CurrentChar => Position < Text.Length ? Text[Position] : new VirtualChar((char)0, default); - - public ImmutableArray GetSubPatternToCurrentPos(int start) - => GetSubPattern(start, Position); - - public ImmutableArray GetSubPattern(int start, int end) - { - var result = ArrayBuilder.GetInstance(end - start); - for (var i = start; i < end; i++) - { - result.Add(Text[i]); - } - - return result.ToImmutableAndFree(); - } - - public RegexToken ScanNextToken(bool allowTrivia, RegexOptions options) - { - var trivia = ScanLeadingTrivia(allowTrivia, options); - if (Position == Text.Length) - { - return CreateToken(RegexKind.EndOfFile, trivia, ImmutableArray.Empty); - } - - var ch = this.CurrentChar; - Position++; - - return CreateToken(GetKind(ch), trivia, ImmutableArray.Create(ch)); - } - - private static RegexKind GetKind(char ch) - { - switch (ch) - { - case '|': return RegexKind.BarToken; - case '*': return RegexKind.AsteriskToken; - case '+': return RegexKind.PlusToken; - case '?': return RegexKind.QuestionToken; - case '{': return RegexKind.OpenBraceToken; - case '}': return RegexKind.CloseBraceToken; - case '\\': return RegexKind.BackslashToken; - case '[': return RegexKind.OpenBracketToken; - case ']': return RegexKind.CloseBracketToken; - case '.': return RegexKind.DotToken; - case '^': return RegexKind.CaretToken; - case '$': return RegexKind.DollarToken; - case '(': return RegexKind.OpenParenToken; - case ')': return RegexKind.CloseParenToken; - case ',': return RegexKind.CommaToken; - case ':': return RegexKind.ColonToken; - case '=': return RegexKind.EqualsToken; - case '!': return RegexKind.ExclamationToken; - case '<': return RegexKind.LessThanToken; - case '>': return RegexKind.GreaterThanToken; - case '-': return RegexKind.MinusToken; - case '\'': return RegexKind.SingleQuoteToken; - default: return RegexKind.TextToken; - } - } - - private ImmutableArray ScanLeadingTrivia(bool allowTrivia, RegexOptions options) - { - if (!allowTrivia) - { - return ImmutableArray.Empty; - } - - var result = ArrayBuilder.GetInstance(); - - var start = Position; - - while (Position < Text.Length) - { - var comment = ScanComment(options); - if (comment != null) - { - result.Add(comment.Value); - continue; - } - - var whitespace = ScanWhitespace(options); - if (whitespace != null) - { - result.Add(whitespace.Value); - continue; - } - - break; - } - - return result.ToImmutableAndFree(); - } - - public RegexTrivia? ScanComment(RegexOptions options) - { - if (Position >= Text.Length) - { - return null; - } - - if (HasOption(options, RegexOptions.IgnorePatternWhitespace)) - { - if (Text[Position] == '#') - { - var start = Position; - - // Note: \n is the only newline the native regex parser looks for. - while (Position < Text.Length && - Text[Position] != '\n') - { - Position++; - } - - return CreateTrivia(RegexKind.CommentTrivia, GetSubPatternToCurrentPos(start)); - } - } - - if (IsAt("(?#")) - { - var start = Position; - while (Position < Text.Length && - Text[Position] != ')') - { - Position++; - } - - if (Position == Text.Length) - { - var diagnostics = ImmutableArray.Create(new EmbeddedDiagnostic( - WorkspacesResources.Unterminated_regex_comment, - GetTextSpan(start, Position))); - return CreateTrivia(RegexKind.CommentTrivia, GetSubPatternToCurrentPos(start), diagnostics); - } - - Position++; - return CreateTrivia(RegexKind.CommentTrivia, GetSubPatternToCurrentPos(start)); - } - - return null; - } - - public TextSpan GetTextSpan(int startInclusive, int endExclusive) - => TextSpan.FromBounds(Text[startInclusive].Span.Start, Text[endExclusive - 1].Span.End); - - public bool IsAt(string val) - => TextAt(this.Position, val); - - private bool TextAt(int position, string val) - { - for (var i = 0; i < val.Length; i++) - { - if (position + i >= Text.Length || - Text[position + i] != val[i]) - { - return false; - } - } - - return true; - } - - private RegexTrivia? ScanWhitespace(RegexOptions options) - { - if (HasOption(options, RegexOptions.IgnorePatternWhitespace)) - { - var start = Position; - while (Position < Text.Length && IsBlank(Text[Position])) - { - Position++; - } - - if (Position > start) - { - return CreateTrivia(RegexKind.WhitespaceTrivia, GetSubPatternToCurrentPos(start)); - } - } - - return null; - } - - private bool IsBlank(char ch) - { - // List taken from the native regex parser. - switch (ch) - { - case '\u0009': - case '\u000A': - case '\u000C': - case '\u000D': - case ' ': - return true; - default: - return false; - } - } - - public RegexToken? TryScanEscapeCategory() - { - var start = Position; - while (Position < Text.Length && - IsEscapeCategoryChar(this.CurrentChar)) - { - Position++; - } - - if (Position == start) - { - return null; - } - - var token = CreateToken(RegexKind.EscapeCategoryToken, ImmutableArray.Empty, GetSubPatternToCurrentPos(start)); - var category = token.VirtualChars.CreateString(); - - if (!RegexCharClass.IsEscapeCategory(category)) - { - token = token.AddDiagnosticIfNone(new EmbeddedDiagnostic( - string.Format(WorkspacesResources.Unknown_property_0, category), - token.GetSpan())); - } - - return token; - } - - private static bool IsEscapeCategoryChar(VirtualChar ch) - => ch == '-' || - (ch >= 'a' && ch <= 'z') || - (ch >= 'A' && ch <= 'Z'); - - public RegexToken? TryScanNumber() - { - if (Position == Text.Length) - { - return null; - } - - const int MaxValueDiv10 = int.MaxValue / 10; - const int MaxValueMod10 = int.MaxValue % 10; - - var value = 0; - var start = Position; - var error = false; - while (Position < Text.Length && this.CurrentChar is var ch && IsDecimalDigit(ch)) - { - Position++; - - unchecked - { - var charVal = ch - '0'; - if (value > MaxValueDiv10 || (value == MaxValueDiv10 && charVal > MaxValueMod10)) - { - error = true; - } - - value *= 10; - value += charVal; - } - } - - if (Position == start) - { - return null; - } - - var token = CreateToken(RegexKind.NumberToken, ImmutableArray.Empty, GetSubPatternToCurrentPos(start)); - token = token.With(value: value); - - if (error) - { - token = token.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Capture_group_numbers_must_be_less_than_or_equal_to_Int32_MaxValue, - token.GetSpan())); - } - - return token; - } - - public RegexToken? TryScanCaptureName() - { - if (Position == Text.Length) - { - return null; - } - - var start = Position; - while (Position < Text.Length && RegexCharClass.IsWordChar(this.CurrentChar)) - { - Position++; - } - - if (Position == start) - { - return null; - } - - var token = CreateToken(RegexKind.CaptureNameToken, ImmutableArray.Empty, GetSubPatternToCurrentPos(start)); - token = token.With(value: token.VirtualChars.CreateString()); - return token; - } - - public RegexToken? TryScanNumberOrCaptureName() - => TryScanNumber() ?? TryScanCaptureName(); - - public RegexToken? TryScanOptions() - { - var start = Position; - while (Position < Text.Length && IsOptionChar(this.CurrentChar)) - { - Position++; - } - - return start == Position - ? default(RegexToken?) - : CreateToken(RegexKind.OptionsToken, ImmutableArray.Empty, GetSubPatternToCurrentPos(start)); - } - - private bool IsOptionChar(char ch) - { - switch (ch) - { - case '+': case '-': - case 'i': case 'I': - case 'm': case 'M': - case 'n': case 'N': - case 's': case 'S': - case 'x': case 'X': - return true; - default: - return false; - } - } - - public RegexToken ScanHexCharacters(int count) - { - var start = Position; - var beforeSlash = start - 2; - - // Make sure we're right after the \x or \u. - Debug.Assert(Text[beforeSlash].Char == '\\'); - Debug.Assert(Text[beforeSlash + 1].Char == 'x' || Text[beforeSlash + 1].Char == 'u'); - - for (int i = 0; i < count; i++) - { - if (Position < Text.Length && IsHexChar(this.CurrentChar)) - { - Position++; - } - } - - var result = CreateToken( - RegexKind.TextToken, ImmutableArray.Empty, GetSubPatternToCurrentPos(start)); - - var length = Position - start; - if (length != count) - { - result = result.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Insufficient_hexadecimal_digits, - TextSpan.FromBounds(Text[beforeSlash].Span.Start, Text[Position - 1].Span.End))); - } - - return result; - } - - public static bool IsHexChar(char ch) - => IsDecimalDigit(ch) || - (ch >= 'a' && ch <= 'f') || - (ch >= 'A' && ch <= 'F'); - - private static bool IsDecimalDigit(char ch) - => ch >= '0' && ch <= '9'; - - private static bool IsOctalDigit(char ch) - => ch >= '0' && ch <= '7'; - - public RegexToken ScanOctalCharacters(RegexOptions options) - { - var start = Position; - var beforeSlash = start - 1; - - // Make sure we're right after the \ - // And we only should have been called if we were \octal-char - Debug.Assert(Text[beforeSlash].Char == '\\'); - Debug.Assert(IsOctalDigit(Text[start].Char)); - - const int maxChars = 3; - int currentVal = 0; - - for (int i = 0; i < maxChars; i++) - { - if (Position < Text.Length && IsOctalDigit(this.CurrentChar)) - { - var octalVal = this.CurrentChar - '0'; - Debug.Assert(octalVal >= 0 && octalVal <= 7); - currentVal *= 8; - currentVal += octalVal; - - Position++; - - // Ecmascript doesn't allow octal values above 32 (0x20 in hex). Note: we do - // *not* add a diagnostic. This is not an error situation. The .net lexer - // simply stops once it hits a value greater than a legal octal value. - if (HasOption(options, RegexOptions.ECMAScript) && currentVal >= 0x20) - { - break; - } - } - } - - Debug.Assert(Position - start > 0); - - var result = CreateToken( - RegexKind.TextToken, ImmutableArray.Empty, GetSubPatternToCurrentPos(start)); - - return result; - } - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexNode.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexNode.cs deleted file mode 100644 index 0bcb4013d80f785504e5a84aa474419d6e15f8e4..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexNode.cs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using Microsoft.CodeAnalysis.EmbeddedLanguages.Common; - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - internal abstract class RegexNode : EmbeddedSyntaxNode - { - protected RegexNode(RegexKind kind) : base(kind) - { - } - - public abstract void Accept(IRegexNodeVisitor visitor); - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexNodes.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexNodes.cs deleted file mode 100644 index 1a688a8f022d3c256aa3604de87f209b97ab1685..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexNodes.cs +++ /dev/null @@ -1,1677 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Immutable; -using System.Diagnostics; -using Microsoft.CodeAnalysis.EmbeddedLanguages.Common; - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - using RegexNodeOrToken = EmbeddedSyntaxNodeOrToken; - using RegexToken = EmbeddedSyntaxToken; - - internal sealed class RegexCompilationUnit : RegexNode - { - public RegexCompilationUnit(RegexExpressionNode expression, RegexToken endOfFileToken) - : base(RegexKind.CompilationUnit) - { - Debug.Assert(expression != null); - Debug.Assert(endOfFileToken.Kind == RegexKind.EndOfFile); - Expression = expression; - EndOfFileToken = endOfFileToken; - } - - public RegexExpressionNode Expression { get; } - public RegexToken EndOfFileToken { get; } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return Expression; - case 1: return EndOfFileToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Represents a possibly-empty sequence of regex expressions. For example, the regex "" - /// will produce an empty RegexSequence nodes, and "a|" will produce an alternation with an - /// empty sequence on the right side. Having a node represent the empty sequence is actually - /// appropriate as these are legal regexes and the empty sequence represents 'a pattern - /// that will match any position'. Not having a node for this would actually end up - /// complicating things in terms of dealing with nulls in the tree. - /// - /// This does not deviate from Roslyn principles. While nodes for empty text are rare, they - /// are allowed (for example, OmittedTypeArgument in C#). - /// - internal sealed class RegexSequenceNode : RegexExpressionNode - { - public ImmutableArray Children { get; } - - internal override int ChildCount => Children.Length; - - public RegexSequenceNode(ImmutableArray children) - : base(RegexKind.Sequence) - { - this.Children = children; - } - - internal override RegexNodeOrToken ChildAt(int index) - => Children[index]; - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Represents a chunk of text (usually just a single char) from the original pattern. - /// - internal sealed class RegexTextNode : RegexPrimaryExpressionNode - { - public RegexTextNode(RegexToken textToken) - : base(RegexKind.Text) - { - Debug.Assert(textToken.Kind == RegexKind.TextToken); - TextToken = textToken; - } - - public RegexToken TextToken { get; } - - internal override int ChildCount => 1; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return TextToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Base type for [...] and [^...] character classes. - /// - internal abstract class RegexBaseCharacterClassNode : RegexPrimaryExpressionNode - { - protected RegexBaseCharacterClassNode( - RegexKind kind, RegexToken openBracketToken, RegexSequenceNode components, RegexToken closeBracketToken) - : base(kind) - { - Debug.Assert(openBracketToken.Kind == RegexKind.OpenBracketToken); - Debug.Assert(components != null); - Debug.Assert(closeBracketToken.Kind == RegexKind.CloseBracketToken); - OpenBracketToken = openBracketToken; - Components = components; - CloseBracketToken = closeBracketToken; - } - - public RegexToken OpenBracketToken { get; } - public RegexSequenceNode Components { get; } - public RegexToken CloseBracketToken { get; } - } - - /// - /// [...] node. - /// - internal sealed class RegexCharacterClassNode : RegexBaseCharacterClassNode - { - public RegexCharacterClassNode( - RegexToken openBracketToken, RegexSequenceNode components, RegexToken closeBracketToken) - : base(RegexKind.CharacterClass, openBracketToken, components, closeBracketToken) - { - } - - internal override int ChildCount => 3; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenBracketToken; - case 1: return Components; - case 2: return CloseBracketToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// [^...] node - /// - internal sealed class RegexNegatedCharacterClassNode : RegexBaseCharacterClassNode - { - public RegexNegatedCharacterClassNode( - RegexToken openBracketToken, RegexToken caretToken, RegexSequenceNode components, RegexToken closeBracketToken) - : base(RegexKind.NegatedCharacterClass, openBracketToken, components, closeBracketToken) - { - Debug.Assert(caretToken.Kind == RegexKind.CaretToken); - CaretToken = caretToken; - } - - public RegexToken CaretToken { get; } - - internal override int ChildCount => 4; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenBracketToken; - case 1: return CaretToken; - case 2: return Components; - case 3: return CloseBracketToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```a-z``` node in a character class. - /// - internal sealed class RegexCharacterClassRangeNode : RegexPrimaryExpressionNode - { - public RegexCharacterClassRangeNode( - RegexExpressionNode left, RegexToken minusToken, RegexExpressionNode right) - : base(RegexKind.CharacterClassRange) - { - Debug.Assert(left != null); - Debug.Assert(minusToken.Kind == RegexKind.MinusToken); - Debug.Assert(right != null); - Left = left; - MinusToken = minusToken; - Right = right; - } - - public RegexExpressionNode Left { get; } - public RegexToken MinusToken { get; } - public RegexExpressionNode Right { get; } - - internal override int ChildCount => 3; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return Left; - case 1: return MinusToken; - case 2: return Right; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```-[f-m]``` in a pattern like ```[a-z-[f-m]]```. A subtraction must come last in a - /// character class, and removes some range of chars from the claracter class built up - /// so far. - /// - internal sealed class RegexCharacterClassSubtractionNode : RegexPrimaryExpressionNode - { - public RegexCharacterClassSubtractionNode( - RegexToken minusToken, RegexBaseCharacterClassNode characterClass) - : base(RegexKind.CharacterClassSubtraction) - { - Debug.Assert(minusToken.Kind == RegexKind.MinusToken); - Debug.Assert(characterClass != null); - MinusToken = minusToken; - CharacterClass = characterClass; - } - - public RegexToken MinusToken { get; } - public RegexBaseCharacterClassNode CharacterClass { get; } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return MinusToken; - case 1: return CharacterClass; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Represents a ```[:...:]``` node in a character class. Note: the .net regex parser - /// simply treats this as the character ```[``` and ignores the rest of the ```:...:]```. - /// They latter part has no impact on the actual match engine that is produced. - /// - internal sealed class RegexPosixPropertyNode : RegexPrimaryExpressionNode - { - public RegexPosixPropertyNode(RegexToken textToken) - : base(RegexKind.PosixProperty) - { - Debug.Assert(textToken.Kind == RegexKind.TextToken); - TextToken = textToken; - } - - public RegexToken TextToken { get; } - - internal override int ChildCount => 1; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return TextToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Root of all expression nodes. - /// - internal abstract class RegexExpressionNode : RegexNode - { - protected RegexExpressionNode(RegexKind kind) - : base(kind) - { - } - } - - /// - /// Root of all the primary nodes (similar to unary nodes in C#). - /// - internal abstract class RegexPrimaryExpressionNode : RegexExpressionNode - { - protected RegexPrimaryExpressionNode(RegexKind kind) - : base(kind) - { - } - } - - /// - /// A ```.``` expression. - /// - internal sealed class RegexWildcardNode : RegexPrimaryExpressionNode - { - public RegexWildcardNode(RegexToken dotToken) - : base(RegexKind.Wildcard) - { - Debug.Assert(dotToken.Kind == RegexKind.DotToken); - DotToken = dotToken; - } - - public RegexToken DotToken { get; } - - internal override int ChildCount => 1; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return DotToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Root of all quantifier nodes: ```?```, ```*``` etc. - /// - internal abstract class RegexQuantifierNode : RegexExpressionNode - { - protected RegexQuantifierNode(RegexKind kind) - : base(kind) - { - } - } - - /// - /// ```expr*``` - /// - internal sealed class RegexZeroOrMoreQuantifierNode : RegexQuantifierNode - { - public RegexZeroOrMoreQuantifierNode( - RegexExpressionNode expression, RegexToken asteriskToken) - : base(RegexKind.ZeroOrMoreQuantifier) - { - Debug.Assert(expression != null); - Debug.Assert(asteriskToken.Kind == RegexKind.AsteriskToken); - Expression = expression; - AsteriskToken = asteriskToken; - } - - public RegexExpressionNode Expression { get; } - public RegexToken AsteriskToken { get; } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return this.Expression; - case 1: return this.AsteriskToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```expr+``` - /// - internal sealed class RegexOneOrMoreQuantifierNode : RegexQuantifierNode - { - public RegexOneOrMoreQuantifierNode( - RegexExpressionNode expression, RegexToken plusToken) - : base(RegexKind.OneOrMoreQuantifier) - { - Debug.Assert(expression != null); - Debug.Assert(plusToken.Kind == RegexKind.PlusToken); - Expression = expression; - PlusToken = plusToken; - } - - public RegexExpressionNode Expression { get; } - public RegexToken PlusToken { get; } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return this.Expression; - case 1: return this.PlusToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```expr?``` - /// - internal sealed class RegexZeroOrOneQuantifierNode : RegexQuantifierNode - { - public RegexZeroOrOneQuantifierNode( - RegexExpressionNode expression, RegexToken questionToken) - : base(RegexKind.ZeroOrOneQuantifier) - { - Debug.Assert(expression != null); - Debug.Assert(questionToken.Kind == RegexKind.QuestionToken); - Expression = expression; - QuestionToken = questionToken; - } - - public RegexExpressionNode Expression { get; } - public RegexToken QuestionToken { get; } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return this.Expression; - case 1: return this.QuestionToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Quantifiers can be optionally followed by a ? to make them lazy. i.e. ```a*?``` or ```a+?```. - /// You can even have ```a??``` (zero or one 'a', lazy). However, only one lazy modifier is allowed - /// ```a*??``` or ```a???``` is not allowed. - /// - internal sealed class RegexLazyQuantifierNode : RegexExpressionNode - { - public RegexLazyQuantifierNode( - RegexQuantifierNode quantifier, RegexToken questionToken) - : base(RegexKind.LazyQuantifier) - { - Debug.Assert(quantifier != null); - Debug.Assert(quantifier.Kind != RegexKind.LazyQuantifier); - Debug.Assert(questionToken.Kind == RegexKind.QuestionToken); - Quantifier = quantifier; - QuestionToken = questionToken; - } - - public RegexQuantifierNode Quantifier { get; } - public RegexToken QuestionToken { get; } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return this.Quantifier; - case 1: return this.QuestionToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```a{...}``` - /// - internal abstract class RegexNumericQuantifierNode : RegexQuantifierNode - { - protected RegexNumericQuantifierNode( - RegexKind kind, RegexPrimaryExpressionNode expression, RegexToken openBraceToken, RegexToken firstNumberToken, RegexToken closeBraceToken) - : base(kind) - { - Debug.Assert(expression != null); - Debug.Assert(openBraceToken.Kind == RegexKind.OpenBraceToken); - Debug.Assert(firstNumberToken.Kind == RegexKind.NumberToken); - Debug.Assert(closeBraceToken.Kind == RegexKind.CloseBraceToken); - Expression = expression; - OpenBraceToken = openBraceToken; - FirstNumberToken = firstNumberToken; - CloseBraceToken = closeBraceToken; - } - - public RegexExpressionNode Expression { get; } - public RegexToken OpenBraceToken { get; } - public RegexToken FirstNumberToken { get; } - public RegexToken CloseBraceToken { get; } - } - - /// - /// ```a{5}``` - /// - internal sealed class RegexExactNumericQuantifierNode : RegexNumericQuantifierNode - { - public RegexExactNumericQuantifierNode( - RegexPrimaryExpressionNode expression, RegexToken openBraceToken, RegexToken numberToken, RegexToken closeBraceToken) - : base(RegexKind.ExactNumericQuantifier, expression, openBraceToken, numberToken, closeBraceToken) - { - } - - internal override int ChildCount => 4; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return Expression; - case 1: return OpenBraceToken; - case 2: return FirstNumberToken; - case 3: return CloseBraceToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```a{5,}``` - /// - internal sealed class RegexOpenNumericRangeQuantifierNode : RegexNumericQuantifierNode - { - public RegexOpenNumericRangeQuantifierNode( - RegexPrimaryExpressionNode expression, - RegexToken openBraceToken, RegexToken firstNumberToken, - RegexToken commaToken, RegexToken closeBraceToken) - : base(RegexKind.OpenRangeNumericQuantifier, expression, openBraceToken, firstNumberToken, closeBraceToken) - { - Debug.Assert(commaToken.Kind == RegexKind.CommaToken); - CommaToken = commaToken; - } - - public RegexToken CommaToken { get; } - - internal override int ChildCount => 5; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return Expression; - case 1: return OpenBraceToken; - case 2: return FirstNumberToken; - case 3: return CommaToken; - case 4: return CloseBraceToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```a{5,10}``` - /// - internal sealed class RegexClosedNumericRangeQuantifierNode : RegexNumericQuantifierNode - { - public RegexClosedNumericRangeQuantifierNode( - RegexPrimaryExpressionNode expression, - RegexToken openBraceToken, RegexToken firstNumberToken, - RegexToken commaToken, RegexToken secondNumberToken, RegexToken closeBraceToken) - : base(RegexKind.ClosedRangeNumericQuantifier, expression, openBraceToken, firstNumberToken, closeBraceToken) - { - Debug.Assert(commaToken.Kind == RegexKind.CommaToken); - Debug.Assert(secondNumberToken.Kind == RegexKind.NumberToken); - CommaToken = commaToken; - SecondNumberToken = secondNumberToken; - } - - public RegexToken CommaToken { get; } - public RegexToken SecondNumberToken { get; } - - internal override int ChildCount => 6; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return Expression; - case 1: return OpenBraceToken; - case 2: return FirstNumberToken; - case 3: return CommaToken; - case 4: return SecondNumberToken; - case 5: return CloseBraceToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```$``` or ```^```. - /// - internal sealed class RegexAnchorNode : RegexPrimaryExpressionNode - { - public RegexAnchorNode(RegexKind kind, RegexToken anchorToken) - : base(kind) - { - Debug.Assert(anchorToken.Kind == RegexKind.DollarToken || anchorToken.Kind == RegexKind.CaretToken); - AnchorToken = anchorToken; - } - - public RegexToken AnchorToken { get; } - - internal override int ChildCount => 1; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return AnchorToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```expr1|expr2``` node. - /// - internal sealed class RegexAlternationNode : RegexExpressionNode - { - public RegexAlternationNode( - RegexExpressionNode left, RegexToken barToken, RegexSequenceNode right) - : base(RegexKind.Alternation) - { - Debug.Assert(left != null); - Debug.Assert(barToken.Kind == RegexKind.BarToken); - Debug.Assert(right != null); - Left = left; - BarToken = barToken; - Right = right; - } - - public RegexExpressionNode Left { get; } - public RegexToken BarToken { get; } - public RegexSequenceNode Right { get; } - - internal override int ChildCount => 3; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return Left; - case 1: return BarToken; - case 2: return Right; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Base type of all non-trivia ```(...)``` nodes - /// - internal abstract class RegexGroupingNode : RegexPrimaryExpressionNode - { - protected RegexGroupingNode(RegexKind kind, RegexToken openParenToken, RegexToken closeParenToken) - : base(kind) - { - Debug.Assert(openParenToken.Kind == RegexKind.OpenParenToken); - Debug.Assert(closeParenToken.Kind == RegexKind.CloseParenToken); - OpenParenToken = openParenToken; - CloseParenToken = closeParenToken; - } - - public RegexToken OpenParenToken { get; } - public RegexToken CloseParenToken { get; } - } - - /// - /// The ```(...)``` node you get when the group does not start with ```(?``` - /// - internal sealed class RegexSimpleGroupingNode : RegexGroupingNode - { - public RegexSimpleGroupingNode(RegexToken openParenToken, RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.SimpleGrouping, openParenToken, closeParenToken) - { - Debug.Assert(expression != null); - Expression = expression; - } - - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 3; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return Expression; - case 2: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Base type of all ```(?...)``` groupings. - /// - internal abstract class RegexQuestionGroupingNode : RegexGroupingNode - { - protected RegexQuestionGroupingNode(RegexKind kind, RegexToken openParenToken, RegexToken questionToken, RegexToken closeParenToken) - : base(kind, openParenToken, closeParenToken) - { - Debug.Assert(questionToken.Kind == RegexKind.QuestionToken); - QuestionToken = questionToken; - } - - public RegexToken QuestionToken { get; } - } - - /// - /// Base type of ```(?inmsx)``` or ```(?inmsx:...)``` nodes. - /// - internal abstract class RegexOptionsGroupingNode : RegexQuestionGroupingNode - { - protected RegexOptionsGroupingNode(RegexKind kind, RegexToken openParenToken, RegexToken questionToken, RegexToken optionsToken, RegexToken closeParenToken) - : base(kind, openParenToken, questionToken, closeParenToken) - { - OptionsToken = optionsToken; - } - - public RegexToken OptionsToken { get; } - } - - /// - /// ```(?inmsx)``` node. Changes options in a sequence for all subsequence nodes. - /// - internal sealed class RegexSimpleOptionsGroupingNode : RegexOptionsGroupingNode - { - public RegexSimpleOptionsGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken optionsToken, RegexToken closeParenToken) - : base(RegexKind.SimpleOptionsGrouping, openParenToken, questionToken, optionsToken, closeParenToken) - { - } - - internal override int ChildCount => 4; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return OptionsToken; - case 3: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?inmsx:expr)``` node. Changes options for the parsing of 'expr'. - /// - internal sealed class RegexNestedOptionsGroupingNode : RegexOptionsGroupingNode - { - public RegexNestedOptionsGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken optionsToken, - RegexToken colonToken, RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.NestedOptionsGrouping, openParenToken, questionToken, optionsToken, closeParenToken) - { - Debug.Assert(colonToken.Kind == RegexKind.ColonToken); - Debug.Assert(expression != null); - ColonToken = colonToken; - Expression = expression; - } - - public RegexToken ColonToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 6; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return OptionsToken; - case 3: return ColonToken; - case 4: return Expression; - case 5: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?:expr)``` node. - /// - internal sealed class RegexNonCapturingGroupingNode : RegexQuestionGroupingNode - { - public RegexNonCapturingGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken colonToken, - RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.NonCapturingGrouping, openParenToken, questionToken, closeParenToken) - { - Debug.Assert(colonToken.Kind == RegexKind.ColonToken); - Debug.Assert(expression != null); - ColonToken = colonToken; - Expression = expression; - } - - public RegexToken ColonToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 5; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return ColonToken; - case 3: return Expression; - case 4: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?=expr)``` node. - /// - internal sealed class RegexPositiveLookaheadGroupingNode : RegexQuestionGroupingNode - { - public RegexPositiveLookaheadGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken equalsToken, - RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.PositiveLookaheadGrouping, openParenToken, questionToken, closeParenToken) - { - Debug.Assert(equalsToken.Kind == RegexKind.EqualsToken); - Debug.Assert(expression != null); - EqualsToken = equalsToken; - Expression = expression; - } - - public RegexToken EqualsToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 5; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return EqualsToken; - case 3: return Expression; - case 4: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?!expr)``` node. - /// - internal sealed class RegexNegativeLookaheadGroupingNode : RegexQuestionGroupingNode - { - public RegexNegativeLookaheadGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken exclamationToken, - RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.NegativeLookaheadGrouping, openParenToken, questionToken, closeParenToken) - { - Debug.Assert(exclamationToken.Kind == RegexKind.ExclamationToken); - Debug.Assert(expression != null); - ExclamationToken = exclamationToken; - Expression = expression; - } - - public RegexToken ExclamationToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 5; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return ExclamationToken; - case 3: return Expression; - case 4: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - internal abstract class RegexLookbehindGroupingNode : RegexQuestionGroupingNode - { - protected RegexLookbehindGroupingNode( - RegexKind kind, RegexToken openParenToken, RegexToken questionToken, - RegexToken lessThanToken, RegexToken closeParenToken) - : base(kind, openParenToken, questionToken, closeParenToken) - { - Debug.Assert(lessThanToken.Kind == RegexKind.LessThanToken); - LessThanToken = lessThanToken; - } - - public RegexToken LessThanToken { get; } - } - - /// - /// ```(?<=expr)``` node. - /// - internal sealed class RegexPositiveLookbehindGroupingNode : RegexLookbehindGroupingNode - { - public RegexPositiveLookbehindGroupingNode( - RegexToken openParenToken, RegexToken questionToken,RegexToken lessThanToken, - RegexToken equalsToken, RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.PositiveLookbehindGrouping, openParenToken, questionToken, lessThanToken, closeParenToken) - { - Debug.Assert(equalsToken.Kind == RegexKind.EqualsToken); - Debug.Assert(expression != null); - EqualsToken = equalsToken; - Expression = expression; - } - - public RegexToken EqualsToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 6; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return LessThanToken; - case 3: return EqualsToken; - case 4: return Expression; - case 5: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?<!expr)``` node. - /// - internal sealed class RegexNegativeLookbehindGroupingNode : RegexLookbehindGroupingNode - { - public RegexNegativeLookbehindGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken lessThanToken, - RegexToken exclamationToken, RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.NegativeLookbehindGrouping, openParenToken, questionToken, lessThanToken, closeParenToken) - { - Debug.Assert(exclamationToken.Kind == RegexKind.ExclamationToken); - Debug.Assert(expression != null); - ExclamationToken = exclamationToken; - Expression = expression; - } - - public RegexToken ExclamationToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 6; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return LessThanToken; - case 3: return ExclamationToken; - case 4: return Expression; - case 5: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?>expr)``` node. - /// - internal sealed class RegexNonBacktrackingGroupingNode : RegexQuestionGroupingNode - { - public RegexNonBacktrackingGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken greaterThanToken, - RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.NonBacktrackingGrouping, openParenToken, questionToken, closeParenToken) - { - Debug.Assert(greaterThanToken.Kind == RegexKind.GreaterThanToken); - Debug.Assert(expression != null); - GreaterThanToken = greaterThanToken; - Expression = expression; - } - - public RegexToken GreaterThanToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 5; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return GreaterThanToken; - case 3: return Expression; - case 4: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?'name'expr)``` or ```(?<name>expr)``` node. - /// - internal sealed class RegexCaptureGroupingNode : RegexQuestionGroupingNode - { - public RegexCaptureGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken openToken, - RegexToken captureToken, RegexToken closeToken, - RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.CaptureGrouping, openParenToken, questionToken, closeParenToken) - { - Debug.Assert(expression != null); - OpenToken = openToken; - CaptureToken = captureToken; - CloseToken = closeToken; - Expression = expression; - } - - public RegexToken OpenToken { get; } - public RegexToken CaptureToken { get; } - public RegexToken CloseToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 7; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return OpenToken; - case 3: return CaptureToken; - case 4: return CloseToken; - case 5: return Expression; - case 6: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?'name1-name2'expr)``` or ```(?<name1-name2>expr)``` node. - /// - internal sealed class RegexBalancingGroupingNode : RegexQuestionGroupingNode - { - public RegexBalancingGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken openToken, - RegexToken firstCaptureToken, RegexToken minusToken, RegexToken secondCaptureToken, - RegexToken closeToken, RegexExpressionNode expression, RegexToken closeParenToken) - : base(RegexKind.BalancingGrouping, openParenToken, questionToken, closeParenToken) - { - Debug.Assert(minusToken.Kind == RegexKind.MinusToken); - Debug.Assert(expression != null); - OpenToken = openToken; - FirstCaptureToken = firstCaptureToken; - MinusToken = minusToken; - SecondCaptureToken = secondCaptureToken; - CloseToken = closeToken; - Expression = expression; - } - - public RegexToken OpenToken { get; } - public RegexToken FirstCaptureToken { get; } - public RegexToken MinusToken { get; } - public RegexToken SecondCaptureToken { get; } - public RegexToken CloseToken { get; } - public RegexExpressionNode Expression { get; } - - internal override int ChildCount => 9; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return OpenToken; - case 3: return FirstCaptureToken; - case 4: return MinusToken; - case 5: return SecondCaptureToken; - case 6: return CloseToken; - case 7: return Expression; - case 8: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - internal abstract class RegexConditionalGroupingNode : RegexQuestionGroupingNode - { - protected RegexConditionalGroupingNode( - RegexKind kind, RegexToken openParenToken, RegexToken questionToken, - RegexExpressionNode result, RegexToken closeParenToken) - : base(kind, openParenToken, questionToken, closeParenToken) - { - Debug.Assert(result != null); - Result = result; - } - - public RegexExpressionNode Result { get; } - } - - /// - /// ```(?(capture_name)result)``` - /// - internal sealed class RegexConditionalCaptureGroupingNode : RegexConditionalGroupingNode - { - public RegexConditionalCaptureGroupingNode( - RegexToken openParenToken, RegexToken questionToken, - RegexToken innerOpenParenToken, RegexToken captureToken, RegexToken innerCloseParenToken, - RegexExpressionNode result, RegexToken closeParenToken) - : base(RegexKind.ConditionalCaptureGrouping, openParenToken, questionToken, result, closeParenToken) - { - Debug.Assert(innerOpenParenToken.Kind == RegexKind.OpenParenToken); - Debug.Assert(innerCloseParenToken.Kind == RegexKind.CloseParenToken); - InnerOpenParenToken = innerOpenParenToken; - CaptureToken = captureToken; - InnerCloseParenToken = innerCloseParenToken; - } - - public RegexToken InnerOpenParenToken { get; } - public RegexToken CaptureToken { get; } - public RegexToken InnerCloseParenToken { get; } - - internal override int ChildCount => 7; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return InnerOpenParenToken; - case 3: return CaptureToken; - case 4: return InnerCloseParenToken; - case 5: return Result; - case 6: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```(?(group)result)``` - /// - internal sealed class RegexConditionalExpressionGroupingNode : RegexConditionalGroupingNode - { - public RegexConditionalExpressionGroupingNode( - RegexToken openParenToken, RegexToken questionToken, - RegexGroupingNode grouping, - RegexExpressionNode result, RegexToken closeParenToken) - : base(RegexKind.ConditionalExpressionGrouping, openParenToken, questionToken, result, closeParenToken) - { - Debug.Assert(grouping != null); - Grouping = grouping; - } - - internal override int ChildCount => 5; - - public RegexGroupingNode Grouping { get; } - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return OpenParenToken; - case 1: return QuestionToken; - case 2: return Grouping; - case 3: return Result; - case 4: return CloseParenToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// Base type of all regex primitives that start with \ - /// - internal abstract class RegexEscapeNode : RegexPrimaryExpressionNode - { - protected RegexEscapeNode(RegexKind kind, RegexToken backslashToken) : base(kind) - { - Debug.Assert(backslashToken.Kind == RegexKind.BackslashToken); - BackslashToken = backslashToken; - } - - public RegexToken BackslashToken { get; } - } - - /// - /// Base type of all regex escapes that start with \ and some informative character (like \v \t \c etc.). - /// - internal abstract class RegexTypeEscapeNode : RegexEscapeNode - { - protected RegexTypeEscapeNode(RegexKind kind, RegexToken backslashToken, RegexToken typeToken) - : base(kind, backslashToken) - { - TypeToken = typeToken; - } - - public RegexToken TypeToken { get; } - } - - /// - /// A basic escape that just has \ and one additional character and needs no further information. - /// - internal sealed class RegexSimpleEscapeNode : RegexTypeEscapeNode - { - public RegexSimpleEscapeNode(RegexToken backslashToken, RegexToken typeToken) - : base(RegexKind.SimpleEscape, backslashToken, typeToken) - { - Debug.Assert(typeToken.Kind == RegexKind.TextToken); - } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return TypeToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - - /// - /// One of \b \B \A \G \z \Z - /// - internal sealed class RegexAnchorEscapeNode : RegexTypeEscapeNode - { - public RegexAnchorEscapeNode(RegexToken backslashToken, RegexToken typeToken) - : base(RegexKind.AnchorEscape, backslashToken, typeToken) - { - } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return TypeToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// One of \s \S \d \D \w \W - /// - internal sealed class RegexCharacterClassEscapeNode : RegexTypeEscapeNode - { - public RegexCharacterClassEscapeNode(RegexToken backslashToken, RegexToken typeToken) - : base(RegexKind.CharacterClassEscape, backslashToken, typeToken) - { - } - - internal override int ChildCount => 2; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return TypeToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```\cX``` escape - /// - internal sealed class RegexControlEscapeNode : RegexTypeEscapeNode - { - public RegexControlEscapeNode(RegexToken backslashToken, RegexToken typeToken, RegexToken controlToken) - : base(RegexKind.ControlEscape, backslashToken, typeToken) - { - ControlToken = controlToken; - } - - internal override int ChildCount => 3; - - public RegexToken ControlToken { get; } - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return TypeToken; - case 2: return ControlToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```\xFF``` escape. - /// - internal sealed class RegexHexEscapeNode : RegexTypeEscapeNode - { - public RegexHexEscapeNode(RegexToken backslashToken, RegexToken typeToken, RegexToken hexText) - : base(RegexKind.HexEscape, backslashToken, typeToken) - { - HexText = hexText; - } - - internal override int ChildCount => 3; - - public RegexToken HexText { get; } - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return TypeToken; - case 2: return HexText; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```\uFFFF``` escape. - /// - internal sealed class RegexUnicodeEscapeNode : RegexTypeEscapeNode - { - public RegexUnicodeEscapeNode(RegexToken backslashToken, RegexToken typeToken, RegexToken hexText) - : base(RegexKind.UnicodeEscape, backslashToken, typeToken) - { - HexText = hexText; - } - - internal override int ChildCount => 3; - - public RegexToken HexText { get; } - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return TypeToken; - case 2: return HexText; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```\'name'``` or ```\<name>``` escape. - /// - internal sealed class RegexCaptureEscapeNode : RegexEscapeNode - { - public RegexCaptureEscapeNode( - RegexToken backslashToken, RegexToken openToken, RegexToken captureToken, RegexToken closeToken) - : base(RegexKind.CaptureEscape, backslashToken) - { - OpenToken = openToken; - CaptureToken = captureToken; - CloseToken = closeToken; - } - - internal override int ChildCount => 4; - - public RegexToken OpenToken { get; } - public RegexToken CaptureToken { get; } - public RegexToken CloseToken { get; } - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return OpenToken; - case 2: return CaptureToken; - case 3: return CloseToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```\k'name'``` or ```\k<name>``` escape. - /// - internal sealed class RegexKCaptureEscapeNode : RegexTypeEscapeNode - { - public RegexKCaptureEscapeNode( - RegexToken backslashToken, RegexToken typeToken, - RegexToken openToken, RegexToken captureToken, RegexToken closeToken) - : base(RegexKind.KCaptureEscape, backslashToken, typeToken) - { - OpenToken = openToken; - CaptureToken = captureToken; - CloseToken = closeToken; - } - - internal override int ChildCount => 5; - - public RegexToken OpenToken { get; } - public RegexToken CaptureToken { get; } - public RegexToken CloseToken { get; } - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return TypeToken; - case 2: return OpenToken; - case 3: return CaptureToken; - case 4: return CloseToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```\1``` escape. In contexts where back-references are not allowed. - /// - internal sealed class RegexOctalEscapeNode : RegexEscapeNode - { - public RegexOctalEscapeNode(RegexToken backslashToken, RegexToken octalText) - : base(RegexKind.OctalEscape, backslashToken) - { - OctalText = octalText; - } - - internal override int ChildCount => 2; - - public RegexToken OctalText { get; } - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return OctalText; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```\1``` - /// - internal sealed class RegexBackreferenceEscapeNode : RegexEscapeNode - { - public RegexBackreferenceEscapeNode(RegexToken backslashToken, RegexToken numberToken) - : base(RegexKind.BackreferenceEscape, backslashToken) - { - NumberToken = numberToken; - } - - internal override int ChildCount => 2; - - public RegexToken NumberToken { get; } - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return NumberToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } - - /// - /// ```\p{...}``` - /// - internal sealed class RegexCategoryEscapeNode : RegexEscapeNode - { - public RegexCategoryEscapeNode( - RegexToken backslashToken, RegexToken typeToken, RegexToken openBraceToken, RegexToken categoryToken, RegexToken closeBraceToken) - : base(RegexKind.CategoryEscape, backslashToken) - { - Debug.Assert(openBraceToken.Kind == RegexKind.OpenBraceToken); - Debug.Assert(closeBraceToken.Kind == RegexKind.CloseBraceToken); - TypeToken = typeToken; - OpenBraceToken = openBraceToken; - CategoryToken = categoryToken; - CloseBraceToken = closeBraceToken; - } - - public RegexToken TypeToken { get; } - public RegexToken OpenBraceToken { get; } - public RegexToken CategoryToken { get; } - public RegexToken CloseBraceToken { get; } - - internal override int ChildCount => 5; - - internal override RegexNodeOrToken ChildAt(int index) - { - switch (index) - { - case 0: return BackslashToken; - case 1: return TypeToken; - case 2: return OpenBraceToken; - case 3: return CategoryToken; - case 4: return CloseBraceToken; - } - - throw new InvalidOperationException(); - } - - public override void Accept(IRegexNodeVisitor visitor) - => visitor.Visit(this); - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.CaptureInfoAnalyzer.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.CaptureInfoAnalyzer.cs deleted file mode 100644 index 4e03160fcc08d4022297a86f798bd0a5a58ecc23..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.CaptureInfoAnalyzer.cs +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Immutable; -using System.Diagnostics; -using System.Linq; -using System.Text.RegularExpressions; -using Microsoft.CodeAnalysis.EmbeddedLanguages.Common; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.PooledObjects; -using Microsoft.CodeAnalysis.Text; - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - using static EmbeddedSyntaxHelpers; - using static RegexHelpers; - - using RegexToken = EmbeddedSyntaxToken; - - internal partial struct RegexParser - { - /// - /// Analyzes the first parsed tree to determine the set of capture numbers and names. These are - /// then used to do the second parsing pass as they can change how the regex engine interprets - /// some parts of the pattern (though not the groups themselves). - /// - private struct CaptureInfoAnalyzer - { - private readonly ImmutableArray _text; - private readonly ImmutableDictionary.Builder _captureNumberToSpan; - private readonly ImmutableDictionary.Builder _captureNameToSpan; - private readonly ArrayBuilder _captureNames; - private int _autoNumber; - - public CaptureInfoAnalyzer(ImmutableArray text) - { - _text = text; - _captureNumberToSpan = ImmutableDictionary.CreateBuilder(); - _captureNameToSpan = ImmutableDictionary.CreateBuilder(); - _captureNames = ArrayBuilder.GetInstance(); - _autoNumber = 1; - - _captureNumberToSpan.Add(0, text.IsEmpty ? default : GetSpan(text)); - } - - public (ImmutableDictionary, ImmutableDictionary) Analyze( - RegexCompilationUnit root, RegexOptions options) - { - CollectCaptures(root, options); - AssignNumbersToCaptureNames(); - - _captureNames.Free(); - return (_captureNameToSpan.ToImmutable(), _captureNumberToSpan.ToImmutable()); - } - - private void CollectCaptures(RegexNode node, RegexOptions options) - { - switch (node.Kind) - { - case RegexKind.CaptureGrouping: - var captureGrouping = (RegexCaptureGroupingNode)node; - RecordCapture(captureGrouping.CaptureToken, GetGroupingSpan(captureGrouping)); - break; - - case RegexKind.BalancingGrouping: - var balancingGroup = (RegexBalancingGroupingNode)node; - RecordCapture(balancingGroup.FirstCaptureToken, GetGroupingSpan(balancingGroup)); - break; - - case RegexKind.ConditionalExpressionGrouping: - // Explicitly recurse into conditionalGrouping.Grouping. That grouping - // itself does not create a capture group, but nested groupings inside of it - // will. - var conditionalGrouping = (RegexConditionalExpressionGroupingNode)node; - RecurseIntoChildren(conditionalGrouping.Grouping, options); - CollectCaptures(conditionalGrouping.Result, options); - return; - - case RegexKind.SimpleGrouping: - RecordSimpleGroupingCapture((RegexSimpleGroupingNode)node, options); - break; - - case RegexKind.NestedOptionsGrouping: - // When we see (?opts:...) - // Recurse explicitly, setting the new options as we process the inner expression. - // When this pops out we'll be back to these options we're currently at now. - var nestedOptions = (RegexNestedOptionsGroupingNode)node; - CollectCaptures(nestedOptions.Expression, GetNewOptionsFromToken(options, nestedOptions.OptionsToken)); - return; - } - - RecurseIntoChildren(node, options); - } - - private void RecurseIntoChildren(RegexNode node, RegexOptions options) - { - foreach (var child in node) - { - if (child.IsNode) - { - // When we see a SimpleOptionsGroup ```(?opts)``` then determine what the options will - // be for successive nodes in the sequence. - var childNode = child.Node; - if (childNode is RegexSimpleOptionsGroupingNode simpleOptions) - { - options = GetNewOptionsFromToken(options, simpleOptions.OptionsToken); - } - - CollectCaptures(child.Node, options); - } - } - } - - private TextSpan GetGroupingSpan(RegexGroupingNode grouping) - { - Debug.Assert(!grouping.OpenParenToken.IsMissing); - var lastChar = grouping.CloseParenToken.IsMissing - ? _text.Last() - : grouping.CloseParenToken.VirtualChars.Last(); - - return GetSpan(grouping.OpenParenToken.VirtualChars[0], lastChar); - } - - private void RecordSimpleGroupingCapture(RegexSimpleGroupingNode node, RegexOptions options) - { - if (HasOption(options, RegexOptions.ExplicitCapture)) - { - // Don't automatically add simple groups if the explicit capture option is on. - // Only add captures for 'CaptureGrouping' and 'BalancingGrouping' nodes. - return; - } - - // Don't count a bogus (? node as a capture node. We only have this to keep our error - // messages in line with the native parser. i.e. even though the bogus (? code would - // cause an exception, we might get an earlier exception if there's a reference to - // this grouping. So if we note this grouping we'll end up not causing that error - // to happen, bringing out behavior out of sync with the native system. - var expr = node.Expression; - while (expr is RegexAlternationNode alternation) - { - expr = alternation.Left; - } - - if (expr is RegexSequenceNode sequence && - sequence.ChildCount > 0) - { - var leftMost = sequence.ChildAt(0); - if (leftMost.Node is RegexTextNode textNode && - IsTextChar(textNode.TextToken, '?')) - { - return; - } - } - - AddIfMissing(_captureNumberToSpan, list: null, _autoNumber++, GetGroupingSpan(node)); - } - - private void RecordCapture(RegexToken token, TextSpan span) - { - if (!token.IsMissing) - { - if (token.Kind == RegexKind.NumberToken) - { - AddIfMissing(_captureNumberToSpan, list: null, (int)token.Value, span); - } - else - { - AddIfMissing(_captureNameToSpan, list: _captureNames, (string)token.Value, span); - } - } - } - - private static void AddIfMissing( - ImmutableDictionary.Builder mapping, - ArrayBuilder list, - T val, TextSpan span) - { - if (!mapping.ContainsKey(val)) - { - mapping.Add(val, span); - list?.Add(val); - } - } - - /// - /// Give numbers to all named captures. They will get successive values that have not already been handed out to existing - /// numbered capture groups. - /// - private void AssignNumbersToCaptureNames() - { - foreach (var name in _captureNames) - { - while (_captureNumberToSpan.ContainsKey(_autoNumber)) - { - _autoNumber++; - } - - _captureNumberToSpan.Add(_autoNumber, _captureNameToSpan[name]); - _autoNumber++; - } - } - } - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.cs deleted file mode 100644 index e38967792b67d6e783d6d322c32d1658deef30a8..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexParser.cs +++ /dev/null @@ -1,2015 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Collections.Immutable; -using System.Diagnostics; -using System.Linq; -using System.Text.RegularExpressions; -using Microsoft.CodeAnalysis.EmbeddedLanguages.Common; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.PooledObjects; -using Microsoft.CodeAnalysis.Text; - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - using static EmbeddedSyntaxHelpers; - using static RegexHelpers; - - using RegexNodeOrToken = EmbeddedSyntaxNodeOrToken; - using RegexToken = EmbeddedSyntaxToken; - using RegexTrivia = EmbeddedSyntaxTrivia; - - /// - /// Produces a from a sequence of characters. - /// - /// Importantly, this parser attempts to replicate diagnostics with almost the exact same text - /// as the native .Net regex parser. This is important so that users get an understandable - /// experience where it appears to them that this is all one cohesive system and that the IDE - /// will let them discover and fix the same issues they would encounter when previously trying - /// to just compile and execute these regexes. - /// - /// - /// Invariants we try to maintain (and should consider a bug if we do not): l 1. If the .net - /// regex parser does not report an error for a given pattern, we should not either. it would be - /// very bad if we told the user there was something wrong with there pattern when there really - /// wasn't. - /// - /// 2. If the .net regex parser does report an error for a given pattern, we should either not - /// report an error (not recommended) or report the same error at an appropriate location in the - /// pattern. Not reporting the error can be confusing as the user will think their pattern is - /// ok, when it really is not. However, it can be acceptable to do this as it's not telling - /// them that something is actually wrong, and it may be too difficult to find and report the - /// same error. Note: there is only one time we do this in this parser (see the deviation - /// documented in ). - /// - /// Note1: the above invariants make life difficult at times. This happens due to the fact that - /// the .net parser is multi-pass. Meaning it does a first scan (which may report errors), then - /// does the full parse. This means that it might report an error in a later location during - /// the initial scan than it would during the parse. We replicate that behavior to follow the - /// second invariant. - /// - /// Note2: It would be nice if we could check these invariants at runtime, so we could control - /// our behavior by the behavior of the real .net regex engine. For example, if the .net regex - /// engine did not report any issues, we could suppress any diagnostics we generated and we - /// could log an NFW to record which pattern we deviated on so we could fix the issue for a - /// future release. However, we cannot do this as the .net regex engine has no guarantees about - /// its performance characteristics. For example, certain regex patterns might end up causing - /// that engine to consume unbounded amounts of CPU and memory. This is because the .net regex - /// engine is not just a parser, but something that builds an actual recognizer using techniques - /// that are not necessarily bounded. As such, while we test ourselves around it during our - /// tests, we cannot do the same at runtime as part of the IDE. - /// - /// This parser was based off the corefx RegexParser based at: - /// https://github.com/dotnet/corefx/blob/f759243d724f462da0bcef54e86588f8a55352c6/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs#L1 - /// - internal partial struct RegexParser - { - private readonly ImmutableDictionary _captureNamesToSpan; - private readonly ImmutableDictionary _captureNumbersToSpan; - - private RegexLexer _lexer; - private RegexOptions _options; - private RegexToken _currentToken; - private int _recursionDepth; - - private RegexParser( - ImmutableArray text, RegexOptions options, - ImmutableDictionary captureNamesToSpan, - ImmutableDictionary captureNumbersToSpan) : this() - { - _lexer = new RegexLexer(text); - _options = options; - - _captureNamesToSpan = captureNamesToSpan; - _captureNumbersToSpan = captureNumbersToSpan; - - // Get the first token. It is allowed to have trivia on it. - ConsumeCurrentToken(allowTrivia: true); - } - - /// - /// Returns the latest token the lexer has produced, and then asks the lexer to - /// produce the next token after that. - /// - /// Whether or not trivia is allowed on the next token - /// produced. In the .net parser trivia is only allowed on a few constructs, - /// and our parser mimics that behavior. Note that even if trivia is allowed, - /// the type of trivia that can be scanned depends on the current RegexOptions. - /// For example, if is currently - /// enabled, then '#...' comments are allowed. Otherwise, only '(?#...)' comments - /// are allowed. - private RegexToken ConsumeCurrentToken(bool allowTrivia) - { - var previous = _currentToken; - _currentToken = _lexer.ScanNextToken(allowTrivia, _options); - return previous; - } - - /// - /// Given an input text, and set of options, parses out a fully representative syntax tree - /// and list of diagnostics. Parsing should always succeed, except in the case of the stack - /// overflowing. - /// - public static RegexTree TryParse(ImmutableArray text, RegexOptions options) - { - try - { - // Parse the tree once, to figure out the capture groups. These are needed - // to then parse the tree again, as the captures will affect how we interpret - // certain things (i.e. escape references) and what errors will be reported. - // - // This is necessary as .net regexes allow references to *future* captures. - // As such, we don't know when we're seeing a reference if it's to something - // that exists or not. - var tree1 = new RegexParser(text, options, - ImmutableDictionary.Empty, - ImmutableDictionary.Empty).ParseTree(); - - var analyzer = new CaptureInfoAnalyzer(text); - var (captureNames, captureNumbers) = analyzer.Analyze(tree1.Root, options); - - var tree2 = new RegexParser( - text, options, captureNames, captureNumbers).ParseTree(); - return tree2; - } - catch (Exception e) when (StackGuard.IsInsufficientExecutionStackException(e)) - { - return null; - } - } - - private RegexTree ParseTree() - { - // Most callers to ParseAlternatingSequences are from group constructs. As those - // constructs will have already consumed the open paren, they don't want this sub-call - // to consume through close-paren tokens as they want that token for themselves. - // However, we're the topmost call and have not consumed an open paren. And, we want - // this call to consume all the way to the end, eating up excess close-paren tokens that - // are encountered. - var expression = this.ParseAlternatingSequences(consumeCloseParen: true); - Debug.Assert(_lexer.Position == _lexer.Text.Length); - Debug.Assert(_currentToken.Kind == RegexKind.EndOfFile); - - var root = new RegexCompilationUnit(expression, _currentToken); - - var seenDiagnostics = new HashSet(); - var diagnostics = ArrayBuilder.GetInstance(); - CollectDiagnostics(root, seenDiagnostics, diagnostics); - - return new RegexTree( - _lexer.Text, root, diagnostics.ToImmutableAndFree(), - _captureNamesToSpan, _captureNumbersToSpan); - } - - private static void CollectDiagnostics( - RegexNode node, HashSet seenDiagnostics, ArrayBuilder diagnostics) - { - foreach (var child in node) - { - if (child.IsNode) - { - CollectDiagnostics(child.Node, seenDiagnostics, diagnostics); - } - else - { - var token = child.Token; - foreach (var trivia in token.LeadingTrivia) - { - AddUniqueDiagnostics(seenDiagnostics, trivia.Diagnostics, diagnostics); - } - - // We never place trailing trivia on regex tokens. - Debug.Assert(token.TrailingTrivia.IsEmpty); - AddUniqueDiagnostics(seenDiagnostics, token.Diagnostics, diagnostics); - } - } - } - - /// - /// It's very common to have duplicated diagnostics. For example, consider "((". This will - /// have two 'missing )' diagnostics, both at the end. Reporting both isn't helpful, so we - /// filter duplicates out here. - /// - private static void AddUniqueDiagnostics( - HashSet seenDiagnostics, ImmutableArray from, ArrayBuilder to) - { - foreach (var diagnostic in from) - { - if (seenDiagnostics.Add(diagnostic)) - { - to.Add(diagnostic); - } - } - } - - private RegexExpressionNode ParseAlternatingSequences(bool consumeCloseParen) - { - try - { - _recursionDepth++; - StackGuard.EnsureSufficientExecutionStack(_recursionDepth); - return ParseAlternatingSequencesWorker(consumeCloseParen); - } - finally - { - _recursionDepth--; - } - } - - /// - /// Parses out code of the form: ...|...|... - /// This is the type of code you have at the top level of a regex, or inside any grouping - /// contruct. Note that sequences can be empty in .net regex. i.e. the following is legal: - /// - /// ...||... - /// - /// An empty sequence just means "match at every position in the test string". - /// - private RegexExpressionNode ParseAlternatingSequencesWorker(bool consumeCloseParen) - { - RegexExpressionNode current = ParseSequence(consumeCloseParen); - - while (_currentToken.Kind == RegexKind.BarToken) - { - // Trivia allowed between the | and the next token. - current = new RegexAlternationNode( - current, ConsumeCurrentToken(allowTrivia: true), ParseSequence(consumeCloseParen)); - } - - return current; - } - - private RegexSequenceNode ParseSequence(bool consumeCloseParen) - { - var list = ArrayBuilder.GetInstance(); - - if (ShouldConsumeSequenceElement(consumeCloseParen)) - { - do - { - var last = list.Count == 0 ? null : list.Last(); - list.Add(ParsePrimaryExpressionAndQuantifiers(last)); - - TryMergeLastTwoNodes(list); - } - while (ShouldConsumeSequenceElement(consumeCloseParen)); - } - - return new RegexSequenceNode(list.ToImmutableAndFree()); - } - - private void TryMergeLastTwoNodes(ArrayBuilder list) - { - if (list.Count >= 2) - { - var last = list[list.Count - 2]; - var next = list[list.Count - 1]; - - if (last?.Kind == RegexKind.Text && next?.Kind == RegexKind.Text) - { - var lastTextToken = ((RegexTextNode)last).TextToken; - var nextTextToken = ((RegexTextNode)next).TextToken; - - if (lastTextToken.Diagnostics.Length == 0 && - nextTextToken.Diagnostics.Length == 0 && - lastTextToken.Value == null && - nextTextToken.Value == null && - nextTextToken.LeadingTrivia.Length == 0) - { - // Merge two text tokens token if there is no intermediary trivia. - var merged = new RegexTextNode(CreateToken( - RegexKind.TextToken, lastTextToken.LeadingTrivia, - lastTextToken.VirtualChars.Concat(nextTextToken.VirtualChars))); - - list.RemoveLast(); - list.RemoveLast(); - list.Add(merged); - } - } - } - } - - private bool ShouldConsumeSequenceElement(bool consumeCloseParen) - { - if (_currentToken.Kind == RegexKind.EndOfFile) - { - return false; - } - - if (_currentToken.Kind == RegexKind.BarToken) - { - return false; - } - - if (_currentToken.Kind == RegexKind.CloseParenToken) - { - return consumeCloseParen; - } - - return true; - } - - private RegexExpressionNode ParsePrimaryExpressionAndQuantifiers(RegexExpressionNode lastExpression) - { - var current = ParsePrimaryExpression(lastExpression); - if (current.Kind == RegexKind.SimpleOptionsGrouping) - { - // Simple options (i.e. "(?i-x)" can't have quantifiers attached to them). - return current; - } - - switch (_currentToken.Kind) - { - case RegexKind.AsteriskToken: return ParseZeroOrMoreQuantifier(current); - case RegexKind.PlusToken: return ParseOneOrMoreQuantifier(current); - case RegexKind.QuestionToken: return ParseZeroOrOneQuantifier(current); - case RegexKind.OpenBraceToken: return TryParseNumericQuantifier(current, _currentToken); - default: return current; - } - } - - private RegexExpressionNode TryParseLazyQuantifier(RegexQuantifierNode quantifier) - { - if (_currentToken.Kind != RegexKind.QuestionToken) - { - return quantifier; - } - - // Whitespace allowed after the question and the next sequence element. - return new RegexLazyQuantifierNode(quantifier, - ConsumeCurrentToken(allowTrivia: true)); - } - - private RegexExpressionNode ParseZeroOrMoreQuantifier(RegexPrimaryExpressionNode current) - { - // Whitespace allowed between the quantifier and the possible following ? or next sequence item. - return TryParseLazyQuantifier(new RegexZeroOrMoreQuantifierNode(current, ConsumeCurrentToken(allowTrivia: true))); - } - - private RegexExpressionNode ParseOneOrMoreQuantifier(RegexPrimaryExpressionNode current) - { - // Whitespace allowed between the quantifier and the possible following ? or next sequence item. - return TryParseLazyQuantifier(new RegexOneOrMoreQuantifierNode(current, ConsumeCurrentToken(allowTrivia: true))); - } - - private RegexExpressionNode ParseZeroOrOneQuantifier(RegexPrimaryExpressionNode current) - { - // Whitespace allowed between the quantifier and the possible following ? or next sequence item. - return TryParseLazyQuantifier(new RegexZeroOrOneQuantifierNode(current, ConsumeCurrentToken(allowTrivia: true))); - } - - private RegexExpressionNode TryParseNumericQuantifier( - RegexPrimaryExpressionNode expression, RegexToken openBraceToken) - { - var start = _lexer.Position; - - if (!TryParseNumericQuantifierParts( - out var firstNumberToken, - out var commaToken, - out var secondNumberToken, - out var closeBraceToken)) - { - _currentToken = openBraceToken; - _lexer.Position = start; - return expression; - } - - var quantifier = CreateQuantifier( - expression, openBraceToken, firstNumberToken, commaToken, - secondNumberToken, closeBraceToken); - - return TryParseLazyQuantifier(quantifier); - } - - private RegexQuantifierNode CreateQuantifier( - RegexPrimaryExpressionNode expression, - RegexToken openBraceToken, RegexToken firstNumberToken, RegexToken? commaToken, - RegexToken? secondNumberToken, RegexToken closeBraceToken) - { - if (commaToken != null) - { - return secondNumberToken != null - ? new RegexClosedNumericRangeQuantifierNode(expression, openBraceToken, firstNumberToken, commaToken.Value, secondNumberToken.Value, closeBraceToken) - : (RegexQuantifierNode)new RegexOpenNumericRangeQuantifierNode(expression, openBraceToken, firstNumberToken, commaToken.Value, closeBraceToken); - } - - return new RegexExactNumericQuantifierNode(expression, openBraceToken, firstNumberToken, closeBraceToken); - } - - private bool TryParseNumericQuantifierParts( - out RegexToken firstNumberToken, out RegexToken? commaToken, - out RegexToken? secondNumberToken, out RegexToken closeBraceToken) - { - firstNumberToken = default; - commaToken = default; - secondNumberToken = default; - closeBraceToken = default; - - var firstNumber = _lexer.TryScanNumber(); - if (firstNumber == null) - { - return false; - } - - firstNumberToken = firstNumber.Value; - - // Nothing allowed between {x,n} - ConsumeCurrentToken(allowTrivia: false); - - if (_currentToken.Kind == RegexKind.CommaToken) - { - commaToken = _currentToken; - - var start = _lexer.Position; - secondNumberToken = _lexer.TryScanNumber(); - - if (secondNumberToken == null) - { - // Nothing allowed between {x,n} - ResetToPositionAndConsumeCurrentToken(start, allowTrivia: false); - } - else - { - var secondNumberTokenLocal = secondNumberToken.Value; - - // Nothing allowed between {x,n} - ConsumeCurrentToken(allowTrivia: false); - - var val1 = (int)firstNumberToken.Value; - var val2 = (int)secondNumberTokenLocal.Value; - - if (val2 < val1) - { - secondNumberTokenLocal = secondNumberTokenLocal.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Illegal_x_y_with_x_less_than_y, - secondNumberTokenLocal.GetSpan())); - secondNumberToken = secondNumberTokenLocal; - } - } - } - - if (_currentToken.Kind != RegexKind.CloseBraceToken) - { - return false; - } - - // Whitespace allowed between the quantifier and the possible following ? or next sequence item. - closeBraceToken = ConsumeCurrentToken(allowTrivia: true); - return true; - } - - private void ResetToPositionAndConsumeCurrentToken(int position, bool allowTrivia) - { - _lexer.Position = position; - ConsumeCurrentToken(allowTrivia); - } - - private RegexPrimaryExpressionNode ParsePrimaryExpression(RegexExpressionNode lastExpression) - { - switch (_currentToken.Kind) - { - case RegexKind.DotToken: - return ParseWildcard(); - case RegexKind.CaretToken: - return ParseStartAnchor(); - case RegexKind.DollarToken: - return ParseEndAnchor(); - case RegexKind.BackslashToken: - return ParseEscape(_currentToken, allowTriviaAfterEnd: true); - case RegexKind.OpenBracketToken: - return ParseCharacterClass(); - case RegexKind.OpenParenToken: - return ParseGrouping(); - case RegexKind.CloseParenToken: - return ParseUnexpectedCloseParenToken(); - case RegexKind.OpenBraceToken: - return ParsePossibleUnexpectedNumericQuantifier(lastExpression); - case RegexKind.AsteriskToken: - case RegexKind.PlusToken: - case RegexKind.QuestionToken: - return ParseUnexpectedQuantifier(lastExpression); - default: - return ParseText(); - } - } - - private RegexPrimaryExpressionNode ParsePossibleUnexpectedNumericQuantifier(RegexExpressionNode lastExpression) - { - // Native parser looks for something like {0,1} in a top level sequence and reports - // an explicit error that that's not allowed. However, something like {0, 1} is fine - // and is treated as six textual tokens. - var openBraceToken = _currentToken.With(kind: RegexKind.TextToken); - var start = _lexer.Position; - - if (TryParseNumericQuantifierParts( - out _, out _, out _, out _)) - { - // Report that a numeric quantifier isn't allowed here. - CheckQuantifierExpression(lastExpression, ref openBraceToken); - } - - // Started with { but wasn't a numeric quantifier. This is totally legal and is just - // a textual sequence. Restart, scanning this token as a normal sequence element. - ResetToPositionAndConsumeCurrentToken(start, allowTrivia: true); - return new RegexTextNode(openBraceToken); - } - - private RegexPrimaryExpressionNode ParseUnexpectedCloseParenToken() - { - var token = _currentToken.With(kind: RegexKind.TextToken).AddDiagnosticIfNone( - new EmbeddedDiagnostic(WorkspacesResources.Too_many_close_parens, _currentToken.GetSpan())); - - // Technically, since an error occurred, we can do whatever we want here. However, - // the spirit of the native parser is that top level sequence elements are allowed - // to have trivia. So that's the behavior we mimic. - ConsumeCurrentToken(allowTrivia: true); - return new RegexTextNode(token); - } - - private RegexPrimaryExpressionNode ParseText() - { - var token = ConsumeCurrentToken(allowTrivia: true); - Debug.Assert(token.Value == null); - - // Allow trivia between this piece of text and the next sequence element - return new RegexTextNode(token.With(kind: RegexKind.TextToken)); - } - - private RegexPrimaryExpressionNode ParseEndAnchor() - { - // Allow trivia between this anchor and the next sequence element - return new RegexAnchorNode(RegexKind.EndAnchor, ConsumeCurrentToken(allowTrivia: true)); - } - - private RegexPrimaryExpressionNode ParseStartAnchor() - { - // Allow trivia between this anchor and the next sequence element - return new RegexAnchorNode(RegexKind.StartAnchor, ConsumeCurrentToken(allowTrivia: true)); - } - - private RegexPrimaryExpressionNode ParseWildcard() - { - // Allow trivia between the . and the next sequence element - return new RegexWildcardNode(ConsumeCurrentToken(allowTrivia: true)); - } - - private RegexGroupingNode ParseGrouping() - { - var start = _lexer.Position; - - // Check what immediately follows the (. If we have (? it is processed specially. - // However, we do not treat (? the same as ( ? - var openParenToken = ConsumeCurrentToken(allowTrivia: false); - - switch (_currentToken.Kind) - { - case RegexKind.QuestionToken: - return ParseGroupQuestion(openParenToken, _currentToken); - - default: - // Wasn't (? just parse this as a normal group. - _lexer.Position = start; - return ParseSimpleGroup(openParenToken); - } - } - - private RegexToken ParseGroupingCloseParen() - { - switch (_currentToken.Kind) - { - case RegexKind.CloseParenToken: - // Grouping completed normally. Allow trivia between it and the next sequence element. - return ConsumeCurrentToken(allowTrivia: true); - - default: - return CreateMissingToken(RegexKind.CloseParenToken).AddDiagnosticIfNone( - new EmbeddedDiagnostic(WorkspacesResources.Not_enough_close_parens, GetTokenStartPositionSpan(_currentToken))); - } - } - - private RegexSimpleGroupingNode ParseSimpleGroup(RegexToken openParenToken) - => new RegexSimpleGroupingNode( - openParenToken, ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen()); - - private RegexExpressionNode ParseGroupingEmbeddedExpression(RegexOptions embeddedOptions) - { - // Save and restore options when we go into, and pop out of a group node. - var currentOptions = _options; - _options = embeddedOptions; - - // We're parsing the embedded sequence inside the current group. As this is a sequence - // we want to allow trivia between the current token we're on, and the first token - // of the embedded sequence. - ConsumeCurrentToken(allowTrivia: true); - - // When parsing out the sequence don't grab the close paren, that will be for our caller - // to get. - var expression = this.ParseAlternatingSequences(consumeCloseParen: false); - _options = currentOptions; - return expression; - } - - private TextSpan GetTokenSpanIncludingEOF(RegexToken token) - => token.Kind == RegexKind.EndOfFile - ? GetTokenStartPositionSpan(token) - : token.GetSpan(); - - private TextSpan GetTokenStartPositionSpan(RegexToken token) - { - return token.Kind == RegexKind.EndOfFile - ? new TextSpan(_lexer.Text.Last().Span.End, 0) - : new TextSpan(token.VirtualChars[0].Span.Start, 0); - } - - private RegexGroupingNode ParseGroupQuestion(RegexToken openParenToken, RegexToken questionToken) - { - var optionsToken = _lexer.TryScanOptions(); - if (optionsToken != null) - { - return ParseOptionsGroupingNode(openParenToken, questionToken, optionsToken.Value); - } - - var afterQuestionPos = _lexer.Position; - - // Lots of possible options when we see (?. Look at the immediately following character - // (without any allowed spaces) to decide what to parse out next. - ConsumeCurrentToken(allowTrivia: false); - switch (_currentToken.Kind) - { - case RegexKind.LessThanToken: - // (?<=...) or (?...) or (?<...-...>...) - return ParseLookbehindOrNamedCaptureOrBalancingGrouping(openParenToken, questionToken); - - case RegexKind.SingleQuoteToken: - // (?'...'...) or (?'...-...'...) - return ParseNamedCaptureOrBalancingGrouping( - openParenToken, questionToken, _currentToken); - - case RegexKind.OpenParenToken: - // alternation construct (?(...) | ) - return ParseConditionalGrouping(openParenToken, questionToken); - - case RegexKind.ColonToken: - return ParseNonCapturingGroupingNode(openParenToken, questionToken); - - case RegexKind.EqualsToken: - return ParsePositiveLookaheadGrouping(openParenToken, questionToken); - - case RegexKind.ExclamationToken: - return ParseNegativeLookaheadGrouping(openParenToken, questionToken); - - case RegexKind.GreaterThanToken: - return ParseNonBacktrackingGrouping(openParenToken, questionToken); - - default: - if (_currentToken.Kind != RegexKind.CloseParenToken) - { - // Native parser reports "Unrecognized grouping construct", *except* for (?) - openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Unrecognized_grouping_construct, - openParenToken.GetSpan())); - } - - break; - } - - // (?) - // Parse this as a normal group. The question will immediately error as it's a - // quantifier not following anything. - _lexer.Position = afterQuestionPos - 1; - return ParseSimpleGroup(openParenToken); - } - - private RegexConditionalGroupingNode ParseConditionalGrouping(RegexToken openParenToken, RegexToken questionToken) - { - var innerOpenParenToken = _currentToken; - var afterInnerOpenParen = _lexer.Position; - - var captureToken = _lexer.TryScanNumberOrCaptureName(); - if (captureToken == null) - { - return ParseConditionalExpressionGrouping(openParenToken, questionToken, innerOpenParenToken); - } - - var capture = captureToken.Value; - - RegexToken innerCloseParenToken; - if (capture.Kind == RegexKind.NumberToken) - { - // If it's a numeric group, it has to be immediately followed by a ) and the - // numeric reference has to exist. - // - // That means that (?(4 ) is not treated as an embedded expression but as an - // error. This is different from (?(a ) which will be treated as an embedded - // expression, and different from (?(a) will be treated as an embedded - // expression or capture group depending on if 'a' is a existing capture name. - - ConsumeCurrentToken(allowTrivia: false); - if (_currentToken.Kind == RegexKind.CloseParenToken) - { - innerCloseParenToken = _currentToken; - if (!HasCapture((int)capture.Value)) - { - capture = capture.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Reference_to_undefined_group, - capture.GetSpan())); - } - } - else - { - innerCloseParenToken = CreateMissingToken(RegexKind.CloseParenToken); - capture = capture.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Malformed, - capture.GetSpan())); - MoveBackBeforePreviousScan(); - } - } - else - { - // If its a capture name, it's ok if it that capture doesn't exist. In that - // case we will just treat this as an conditional expression. - if (!HasCapture((string)capture.Value)) - { - _lexer.Position = afterInnerOpenParen; - return ParseConditionalExpressionGrouping(openParenToken, questionToken, innerOpenParenToken); - } - - // Capture name existed. For this to be a capture grouping it exactly has to - // match (?(a) anything other than a close paren after the ) will make this - // into a conditional expression. - ConsumeCurrentToken(allowTrivia: false); - if (_currentToken.Kind != RegexKind.CloseParenToken) - { - _lexer.Position = afterInnerOpenParen; - return ParseConditionalExpressionGrouping(openParenToken, questionToken, innerOpenParenToken); - } - - innerCloseParenToken = _currentToken; - } - - // Was (?(name) or (?(num) and name/num was a legal capture name. Parse - // this out as a conditional grouping. Because we're going to be parsing out - // an embedded sequence, allow trivia before the first element. - ConsumeCurrentToken(allowTrivia: true); - var result = ParseConditionalGroupingResult(); - - return new RegexConditionalCaptureGroupingNode( - openParenToken, questionToken, - innerOpenParenToken, capture, innerCloseParenToken, - result, ParseGroupingCloseParen()); - } - - private bool HasCapture(int value) - => _captureNumbersToSpan.ContainsKey(value); - - private bool HasCapture(string value) - => _captureNamesToSpan.ContainsKey(value); - - private void MoveBackBeforePreviousScan() - { - if (_currentToken.Kind != RegexKind.EndOfFile) - { - // Move back to un-consume whatever we just consumed. - _lexer.Position--; - } - } - - private RegexConditionalGroupingNode ParseConditionalExpressionGrouping( - RegexToken openParenToken, RegexToken questionToken, RegexToken innerOpenParenToken) - { - // Reproduce very specific errors the .net regex parser looks for. Technically, - // we would error out in these cases no matter what. However, it means we can - // stringently enforce that our parser produces the same errors as the native one. - // - // Move back before the ( - _lexer.Position--; - if (_lexer.IsAt("(?#")) - { - var pos = _lexer.Position; - var comment = _lexer.ScanComment(options: default); - _lexer.Position = pos; - - if (comment.Value.Diagnostics.Length > 0) - { - openParenToken = openParenToken.AddDiagnosticIfNone(comment.Value.Diagnostics[0]); - } - else - { - openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Alternation_conditions_cannot_be_comments, - openParenToken.GetSpan())); - } - } - else if (_lexer.IsAt("(?'")) - { - openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Alternation_conditions_do_not_capture_and_cannot_be_named, - openParenToken.GetSpan())); - } - else if (_lexer.IsAt("(?<")) - { - if (!_lexer.IsAt("(? or (?<...-...> - _lexer.Position = start; - return ParseNamedCaptureOrBalancingGrouping(openParenToken, questionToken, lessThanToken); - } - } - - private RegexGroupingNode ParseNamedCaptureOrBalancingGrouping( - RegexToken openParenToken, RegexToken questionToken, RegexToken openToken) - { - if (_lexer.Position == _lexer.Text.Length) - { - openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Unrecognized_grouping_construct, - GetSpan(openParenToken, openToken))); - } - - // (?<...>...) or (?<...-...>...) - // (?'...'...) or (?'...-...'...) - var captureToken = _lexer.TryScanNumberOrCaptureName(); - if (captureToken == null) - { - // Can't have any trivia between the elements in this grouping header. - ConsumeCurrentToken(allowTrivia: false); - captureToken = CreateMissingToken(RegexKind.CaptureNameToken); - - if (_currentToken.Kind == RegexKind.MinusToken) - { - return ParseBalancingGrouping( - openParenToken, questionToken, openToken, captureToken.Value); - } - else - { - openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Invalid_group_name_Group_names_must_begin_with_a_word_character, - GetTokenSpanIncludingEOF(_currentToken))); - - // If we weren't at the end of the text, go back to before whatever character - // we just consumed. - MoveBackBeforePreviousScan(); - } - } - - var capture = captureToken.Value; - if (capture.Kind == RegexKind.NumberToken && (int)capture.Value == 0) - { - capture = capture.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Capture_number_cannot_be_zero, - capture.GetSpan())); - } - - // Can't have any trivia between the elements in this grouping header. - ConsumeCurrentToken(allowTrivia: false); - - if (_currentToken.Kind == RegexKind.MinusToken) - { - // Have (?<...- parse out the balancing group form. - return ParseBalancingGrouping( - openParenToken, questionToken, - openToken, capture); - } - - var closeToken = ParseCaptureGroupingCloseToken(ref openParenToken, openToken); - - return new RegexCaptureGroupingNode( - openParenToken, questionToken, - openToken, capture, closeToken, - ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen()); - } - - private RegexToken ParseCaptureGroupingCloseToken(ref RegexToken openParenToken, RegexToken openToken) - { - if ((openToken.Kind == RegexKind.LessThanToken && _currentToken.Kind == RegexKind.GreaterThanToken) || - (openToken.Kind == RegexKind.SingleQuoteToken && _currentToken.Kind == RegexKind.SingleQuoteToken)) - { - return _currentToken; - } - - if (_currentToken.Kind == RegexKind.EndOfFile) - { - openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Unrecognized_grouping_construct, - GetSpan(openParenToken, openToken))); - } - else - { - openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Invalid_group_name_Group_names_must_begin_with_a_word_character, - _currentToken.GetSpan())); - - // Rewind to where we were before seeing this bogus character. - _lexer.Position--; - } - - return CreateMissingToken( - openToken.Kind == RegexKind.LessThanToken - ? RegexKind.GreaterThanToken : RegexKind.SingleQuoteToken); - } - - private RegexBalancingGroupingNode ParseBalancingGrouping( - RegexToken openParenToken, RegexToken questionToken, - RegexToken openToken, RegexToken firstCapture) - { - var minusToken = _currentToken; - var secondCapture = _lexer.TryScanNumberOrCaptureName(); - if (secondCapture == null) - { - // Invalid group name: Group names must begin with a word character - ConsumeCurrentToken(allowTrivia: false); - - openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Invalid_group_name_Group_names_must_begin_with_a_word_character, - GetTokenSpanIncludingEOF(_currentToken))); - - // If we weren't at the end of the text, go back to before whatever character - // we just consumed. - MoveBackBeforePreviousScan(); - secondCapture = CreateMissingToken(RegexKind.CaptureNameToken); - } - - var second = secondCapture.Value; - CheckCapture(ref second); - - // Can't have any trivia between the elements in this grouping header. - ConsumeCurrentToken(allowTrivia: false); - var closeToken = ParseCaptureGroupingCloseToken(ref openParenToken, openToken); - - return new RegexBalancingGroupingNode( - openParenToken, questionToken, - openToken, firstCapture, minusToken, second, closeToken, - ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen()); - } - - private void CheckCapture(ref RegexToken captureToken) - { - if (captureToken.IsMissing) - { - // Don't need to check for a synthesized error capture token. - return; - } - - if (captureToken.Kind == RegexKind.NumberToken) - { - var val = (int)captureToken.Value; - if (!HasCapture(val)) - { - captureToken = captureToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - string.Format(WorkspacesResources.Reference_to_undefined_group_number_0, val), - captureToken.GetSpan())); - } - } - else - { - var val = (string)captureToken.Value; - if (!HasCapture(val)) - { - captureToken = captureToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - string.Format(WorkspacesResources.Reference_to_undefined_group_name_0, val), - captureToken.GetSpan())); - } - } - } - - private RegexNonCapturingGroupingNode ParseNonCapturingGroupingNode(RegexToken openParenToken, RegexToken questionToken) - => new RegexNonCapturingGroupingNode( - openParenToken, questionToken, _currentToken, - ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen()); - - private RegexPositiveLookaheadGroupingNode ParsePositiveLookaheadGrouping(RegexToken openParenToken, RegexToken questionToken) - => new RegexPositiveLookaheadGroupingNode( - openParenToken, questionToken, _currentToken, - ParseGroupingEmbeddedExpression(_options & ~RegexOptions.RightToLeft), ParseGroupingCloseParen()); - - private RegexNegativeLookaheadGroupingNode ParseNegativeLookaheadGrouping(RegexToken openParenToken, RegexToken questionToken) - => new RegexNegativeLookaheadGroupingNode( - openParenToken, questionToken, _currentToken, - ParseGroupingEmbeddedExpression(_options & ~RegexOptions.RightToLeft), ParseGroupingCloseParen()); - - private RegexNonBacktrackingGroupingNode ParseNonBacktrackingGrouping(RegexToken openParenToken, RegexToken questionToken) - => new RegexNonBacktrackingGroupingNode( - openParenToken, questionToken, _currentToken, - ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen()); - - private RegexGroupingNode ParseOptionsGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken optionsToken) - { - // Only (?opts:...) or (?opts) are allowed. After the opts must be a : or ) - ConsumeCurrentToken(allowTrivia: false); - switch (_currentToken.Kind) - { - case RegexKind.CloseParenToken: - // Allow trivia after the options and the next element in the sequence. - _options = GetNewOptionsFromToken(_options, optionsToken); - return new RegexSimpleOptionsGroupingNode( - openParenToken, questionToken, optionsToken, - ConsumeCurrentToken(allowTrivia: true)); - - case RegexKind.ColonToken: - return ParseNestedOptionsGroupingNode(openParenToken, questionToken, optionsToken); - - default: - return new RegexSimpleOptionsGroupingNode( - openParenToken, questionToken, optionsToken, - CreateMissingToken(RegexKind.CloseParenToken).AddDiagnosticIfNone( - new EmbeddedDiagnostic(WorkspacesResources.Unrecognized_grouping_construct, openParenToken.GetSpan()))); - } - } - - private RegexNestedOptionsGroupingNode ParseNestedOptionsGroupingNode( - RegexToken openParenToken, RegexToken questionToken, RegexToken optionsToken) - => new RegexNestedOptionsGroupingNode( - openParenToken, questionToken, optionsToken, _currentToken, - ParseGroupingEmbeddedExpression(GetNewOptionsFromToken(_options, optionsToken)), ParseGroupingCloseParen()); - - private static bool IsTextChar(RegexToken currentToken, char ch) - => currentToken.Kind == RegexKind.TextToken && currentToken.VirtualChars.Length == 1 && currentToken.VirtualChars[0].Char == ch; - - private static RegexOptions GetNewOptionsFromToken(RegexOptions currentOptions, RegexToken optionsToken) - { - var copy = currentOptions; - var on = true; - foreach (var ch in optionsToken.VirtualChars) - { - switch (ch.Char) - { - case '-': on = false; break; - case '+': on = true; break; - default: - var newOption = OptionFromCode(ch); - if (on) - { - copy |= newOption; - } - else - { - copy &= ~newOption; - } - break; - } - } - - return copy; - } - - private static RegexOptions OptionFromCode(VirtualChar ch) - { - switch (ch) - { - case 'i': case 'I': return RegexOptions.IgnoreCase; - case 'm': case 'M': return RegexOptions.Multiline; - case 'n': case 'N': return RegexOptions.ExplicitCapture; - case 's': case 'S': return RegexOptions.Singleline; - case 'x': case 'X': return RegexOptions.IgnorePatternWhitespace; - default: - throw new InvalidOperationException(); - } - } - - private RegexBaseCharacterClassNode ParseCharacterClass() - { - // Note: ScanCharClass is one of the strangest function in the .net regex parser. Code - // for it is here: - // https://github.com/dotnet/corefx/blob/6ae0da1563e6e701bac61012c62ede8f8737f065/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs#L498 - // - // It has certain behaviors that were probably not intentional, but which we try to - // replicate. Specifically, it looks like it was *intended* to just read components - // like simple characters ('a'), char-class-escape ('\s' and the like), ranges - // ('component-component'), and subtractions ('-[charclass]'). - // - // And, it *looks* like it intended that if it ran into a range, it would check that the - // components on the left and right of the '-' made sense (i.e. you could have 'a-b' but - // not 'b-a'). - // - // *However*, the way it is actually written, it does not have that behavior. Instead, - // what it ends up doing is subtly different. Specifically, in this switch: - // https://github.com/dotnet/corefx/blob/6ae0da1563e6e701bac61012c62ede8f8737f065/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs#L531 - // - // In this switch, if it encounters a '\-' it immediately 'continues', effectively - // ignoring that character on the right side of a character range. So, if you had - // ```[#-\-b]```, then this *should* be treated as the character class containing - // the range of character from '#' to '-', unioned with the character 'b'. However, - // .net will interpret this as the character class containing the range of characters - // from '#' to 'b'. We follow .Net here to keep our errors in sync with them. - // - // See the comment about this in ParseRightSideOfCharacterClassRange - - var openBracketToken = _currentToken; - Debug.Assert(openBracketToken.Kind == RegexKind.OpenBracketToken); - var caretToken = CreateMissingToken(RegexKind.CaretToken); - var closeBracketToken = CreateMissingToken(RegexKind.CloseBracketToken); - - // trivia is not allowed anywhere in a character class - ConsumeCurrentToken(allowTrivia: false); - if (_currentToken.Kind == RegexKind.CaretToken) - { - caretToken = _currentToken; - } - else - { - MoveBackBeforePreviousScan(); - } - - // trivia is not allowed anywhere in a character class - ConsumeCurrentToken(allowTrivia: false); - - var contents = ArrayBuilder.GetInstance(); - while (_currentToken.Kind != RegexKind.EndOfFile) - { - Debug.Assert(_currentToken.VirtualChars.Length == 1); - - if (_currentToken.Kind == RegexKind.CloseBracketToken && contents.Count > 0) - { - // Allow trivia after the character class, and whatever is next in the sequence. - closeBracketToken = ConsumeCurrentToken(allowTrivia: true); - break; - } - - ParseCharacterClassComponents(contents); - TryMergeLastTwoNodes(contents); - } - - if (closeBracketToken.IsMissing) - { - closeBracketToken = closeBracketToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Unterminated_character_class_set, - GetTokenStartPositionSpan(_currentToken))); - } - - var components = new RegexSequenceNode(contents.ToImmutableAndFree()); - return caretToken.IsMissing - ? (RegexBaseCharacterClassNode)new RegexCharacterClassNode(openBracketToken, components, closeBracketToken) - : new RegexNegatedCharacterClassNode(openBracketToken, caretToken, components, closeBracketToken); - } - - private void ParseCharacterClassComponents(ArrayBuilder components) - { - var left = ParseSingleCharacterClassComponent(isFirst: components.Count == 0, afterRangeMinus: false); - if (left.Kind == RegexKind.CharacterClassEscape || - left.Kind == RegexKind.CategoryEscape || - IsEscapedMinus(left)) - { - // \s or \p{Lu} or \- on the left of a minus doesn't start a range. If there is a following - // minus, it's just treated textually. - components.Add(left); - return; - } - - if (_currentToken.Kind == RegexKind.MinusToken && !_lexer.IsAt("]")) - { - // trivia is not allowed anywhere in a character class - var minusToken = ConsumeCurrentToken(allowTrivia: false); - - if (_currentToken.Kind == RegexKind.OpenBracketToken) - { - components.Add(left); - components.Add(ParseCharacterClassSubtractionNode(minusToken)); - } - else - { - var right = ParseRightSideOfCharacterClassRange(); - - if (TryGetRangeComponentValue(left, isRight: false, out var leftCh) && - TryGetRangeComponentValue(right, isRight: true, out var rightCh) && - leftCh > rightCh) - { - minusToken = minusToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.x_y_range_in_reverse_order, - minusToken.GetSpan())); - } - - components.Add(new RegexCharacterClassRangeNode(left, minusToken, right)); - } - } - else - { - components.Add(left); - } - } - - private bool IsEscapedMinus(RegexNode node) - => node is RegexSimpleEscapeNode simple && IsTextChar(simple.TypeToken, '-'); - - private bool TryGetRangeComponentValue(RegexExpressionNode component, bool isRight, out char ch) - { - // Don't bother examining the component if it has any errors already. This also means - // we don't have to worry about running into invalid escape sequences and the like. - if (!HasProblem(component)) - { - return TryGetRangeComponentValueWorker(component, out ch); - } - - ch = default; - return false; - } - - private bool TryGetRangeComponentValueWorker(RegexNode component, out char ch) - { - switch (component.Kind) - { - case RegexKind.SimpleEscape: - ch = ((RegexSimpleEscapeNode)component).TypeToken.VirtualChars[0]; - switch (ch) - { - case 'a': ch = '\u0007'; break; - case 'b': ch = '\b'; break; - case 'e': ch = '\u001B'; break; - case 'f': ch = '\f'; break; - case 'n': ch = '\n'; break; - case 'r': ch = '\r'; break; - case 't': ch = '\t'; break; - case 'v': ch = '\u000B'; break; - } - return true; - - case RegexKind.ControlEscape: - var controlEscape = (RegexControlEscapeNode)component; - var controlCh = controlEscape.ControlToken.VirtualChars[0].Char; - - // \ca interpreted as \cA - if (controlCh >= 'a' && controlCh <= 'z') - { - controlCh -= (char)('a' - 'A'); - } - - // The control characters have values mapping from the A-Z range to numeric - // values 1-26. So, to map that, we subtract 'A' from the value (which would - // give us 0-25) and then add '1' back to it. - ch = (char)(controlCh - 'A' + 1); - return true; - - case RegexKind.OctalEscape: - ch = GetCharValue(((RegexOctalEscapeNode)component).OctalText, withBase: 8); - return true; - - case RegexKind.HexEscape: - ch = GetCharValue(((RegexHexEscapeNode)component).HexText, withBase: 16); - return true; - - case RegexKind.UnicodeEscape: - ch = GetCharValue(((RegexUnicodeEscapeNode)component).HexText, withBase: 16); - return true; - - case RegexKind.PosixProperty: - // When the native parser sees [:...:] it treats this as if it just saw '[' and skipped the - // rest. - ch = '['; - return true; - - case RegexKind.Text: - ch = ((RegexTextNode)component).TextToken.VirtualChars[0]; - return true; - - case RegexKind.Sequence: - var sequence = (RegexSequenceNode)component; -#if DEBUG - Debug.Assert(sequence.ChildCount > 0); - for (int i = 0, n = sequence.ChildCount - 1; i < n; i++) - { - Debug.Assert(IsEscapedMinus(sequence.ChildAt(i).Node)); - } -#endif - - var last = sequence.ChildAt(sequence.ChildCount - 1).Node; - if (IsEscapedMinus(last)) - { - break; - } - - return TryGetRangeComponentValueWorker(last, out ch); - } - - ch = default; - return false; - } - - private char GetCharValue(RegexToken hexText, int withBase) - { - unchecked - { - var total = 0; - foreach (var vc in hexText.VirtualChars) - { - total *= withBase; - total += HexValue(vc.Char); - } - - return (char)total; - } - } - - private int HexValue(char ch) - { - Debug.Assert(RegexLexer.IsHexChar(ch)); - unchecked - { - var temp = (uint)(ch - '0'); - if (temp <= 9) - { - return (int)temp; - } - - temp = (uint)(ch - 'a'); - if (temp <= 5) - { - return (int)(temp + 10); - } - - temp = (uint)(ch - 'A'); - if (temp <= 5) - { - return (int)(temp + 10); - } - } - - throw new InvalidOperationException(); - } - - private bool HasProblem(RegexNodeOrToken component) - { - if (component.IsNode) - { - foreach (var child in component.Node) - { - if (HasProblem(child)) - { - return true; - } - } - } - else - { - var token = component.Token; - if (token.IsMissing || - token.Diagnostics.Length > 0) - { - return true; - } - - foreach (var trivia in token.LeadingTrivia) - { - if (trivia.Diagnostics.Length > 0) - { - return true; - } - } - } - - return false; - } - - private RegexExpressionNode ParseRightSideOfCharacterClassRange() - { - // Parsing the right hand side of a - is extremely strange (and most likely buggy) in - // the .net parser. Specifically, the .net parser will still consider itself on the - // right side no matter how many escaped dashes it sees. So, for example, the following - // is legal [a-\-] (even though \- is less than 'a'). Similarly, the following are - // *illegal* [b-\-a] and [b-\-\-a]. That's because the range that is checked is - // actually "b-a", even though it has all the \- escapes in the middle. - - var first = ParseSingleCharacterClassComponent(isFirst: false, afterRangeMinus: true); - if (!IsEscapedMinus(first)) - { - return first; - } - - var builder = ArrayBuilder.GetInstance(); - builder.Add(first); - - while (IsEscapedMinus(builder.Last()) && _currentToken.Kind != RegexKind.CloseBracketToken) - { - builder.Add(ParseSingleCharacterClassComponent(isFirst: false, afterRangeMinus: true)); - } - - return new RegexSequenceNode(builder.ToImmutableAndFree()); - } - - private RegexPrimaryExpressionNode ParseSingleCharacterClassComponent(bool isFirst, bool afterRangeMinus) - { - if (_currentToken.Kind == RegexKind.BackslashToken && _lexer.Position < _lexer.Text.Length) - { - var backslashToken = _currentToken; - var afterSlash = _lexer.Position; - - // trivia is not allowed anywhere in a character class, and definitely not between - // a \ and the following character. - ConsumeCurrentToken(allowTrivia: false); - Debug.Assert(_currentToken.VirtualChars.Length == 1); - - var nextChar = _currentToken.VirtualChars[0].Char; - switch (nextChar) - { - case 'D': case 'd': - case 'S': case 's': - case 'W': case 'w': - case 'p': case 'P': - if (afterRangeMinus) - { - backslashToken = backslashToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - string.Format(WorkspacesResources.Cannot_include_class_0_in_character_range, nextChar), - GetSpan(backslashToken, _currentToken))); - } - - // move back before the character we just scanned. - // trivia is not allowed anywhere in a character class. - - // The above list are character class and category escapes. ParseEscape can - // handle both of those, so we just defer to it. - _lexer.Position--; - return ParseEscape(backslashToken, allowTriviaAfterEnd: false); - - case '-': - // trivia is not allowed anywhere in a character class. - - // We just let the basic consumption code pull out a token for us, we then - // convert that to text since we treat all characters after the - as text no - // matter what. - return new RegexSimpleEscapeNode( - backslashToken, ConsumeCurrentToken(allowTrivia: false).With(kind: RegexKind.TextToken)); - - default: - // trivia is not allowed anywhere in a character class. - - // Note: it is very intentional that we're calling ParseCharEscape and not - // ParseEscape. Normal escapes are not interpreted the same way inside a - // character class. For example \b is not an anchor in a character class. - // And things like \k'...' are not k-captures, etc. etc. - _lexer.Position--; - return ParseCharEscape(backslashToken, allowTriviaAfterEnd: false); - } - } - - if (!afterRangeMinus && - !isFirst && - _currentToken.Kind == RegexKind.MinusToken && - _lexer.IsAt("[")) - { - // have a trailing subtraction. - // trivia is not allowed anywhere in a character class - return ParseCharacterClassSubtractionNode( - ConsumeCurrentToken(allowTrivia: false)); - } - - // From the .net regex code: - // This is code for Posix style properties - [:Ll:] or [:IsTibetan:]. - // It currently doesn't do anything other than skip the whole thing! - if (!afterRangeMinus && _currentToken.Kind == RegexKind.OpenBracketToken && _lexer.IsAt(":")) - { - var beforeBracketPos = _lexer.Position - 1; - // trivia is not allowed anywhere in a character class - ConsumeCurrentToken(allowTrivia: false); - - var captureName = _lexer.TryScanCaptureName(); - if (captureName.HasValue && _lexer.IsAt(":]")) - { - _lexer.Position += 2; - var textChars = _lexer.GetSubPattern(beforeBracketPos, _lexer.Position); - var token = CreateToken(RegexKind.TextToken, ImmutableArray.Empty, textChars); - - // trivia is not allowed anywhere in a character class - ConsumeCurrentToken(allowTrivia: false); - return new RegexPosixPropertyNode(token); - } - else - { - // Reset to back where we were. - // trivia is not allowed anywhere in a character class - _lexer.Position = beforeBracketPos; - ConsumeCurrentToken(allowTrivia: false); - Debug.Assert(_currentToken.Kind == RegexKind.OpenBracketToken); - } - } - - // trivia is not allowed anywhere in a character class - return new RegexTextNode( - ConsumeCurrentToken(allowTrivia: false).With(kind: RegexKind.TextToken)); - } - - private RegexPrimaryExpressionNode ParseCharacterClassSubtractionNode(RegexToken minusToken) - { - var charClass = ParseCharacterClass(); - - if (_currentToken.Kind != RegexKind.CloseBracketToken && _currentToken.Kind != RegexKind.EndOfFile) - { - minusToken = minusToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.A_subtraction_must_be_the_last_element_in_a_character_class, - GetTokenStartPositionSpan(minusToken))); - } - - return new RegexCharacterClassSubtractionNode(minusToken, charClass); - } - - /// - /// Parses out an escape sequence. Escape sequences are allowed in top level sequences - /// and in character classes. In a top level sequence trivia will be allowed afterwards, - /// but in a character class trivia is not allowed afterwards. - /// - private RegexEscapeNode ParseEscape(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - Debug.Assert(_lexer.Text[_lexer.Position - 1].Char == '\\'); - - // No spaces between \ and next char. - ConsumeCurrentToken(allowTrivia: false); - - if (_currentToken.Kind == RegexKind.EndOfFile) - { - backslashToken = backslashToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Illegal_backslash_at_end_of_pattern, - backslashToken.GetSpan())); - return new RegexSimpleEscapeNode(backslashToken, CreateMissingToken(RegexKind.TextToken)); - } - - Debug.Assert(_currentToken.VirtualChars.Length == 1); - var ch = _currentToken.VirtualChars[0].Char; - switch (_currentToken.VirtualChars[0].Char) - { - case 'b': case 'B': case 'A': case 'G': case 'Z': case 'z': - return new RegexAnchorEscapeNode( - backslashToken, ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd)); - - case 'w': case 'W': case 's': case 'S': case 'd': case 'D': - return new RegexCharacterClassEscapeNode( - backslashToken, ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd)); - - case 'p': case 'P': - return ParseCategoryEscape(backslashToken, allowTriviaAfterEnd); - } - - // Move back to after the backslash - _lexer.Position--; - return ParseBasicBackslash(backslashToken, allowTriviaAfterEnd); - } - - private RegexEscapeNode ParseBasicBackslash(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - Debug.Assert(_lexer.Text[_lexer.Position - 1].Char == '\\'); - - // No spaces between \ and next char. - ConsumeCurrentToken(allowTrivia: false); - - if (_currentToken.Kind == RegexKind.EndOfFile) - { - backslashToken = backslashToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Illegal_backslash_at_end_of_pattern, - backslashToken.GetSpan())); - return new RegexSimpleEscapeNode(backslashToken, CreateMissingToken(RegexKind.TextToken)); - } - - Debug.Assert(_currentToken.VirtualChars.Length == 1); - var ch = _currentToken.VirtualChars[0].Char; - if (ch == 'k') - { - return ParsePossibleKCaptureEscape(backslashToken, allowTriviaAfterEnd); - } - - if (ch == '<' || ch == '\'') - { - _lexer.Position--; - return ParsePossibleCaptureEscape(backslashToken, allowTriviaAfterEnd); - } - - if (ch >= '1' && ch <= '9') - { - _lexer.Position--; - return ParsePossibleBackreferenceEscape(backslashToken, allowTriviaAfterEnd); - } - - _lexer.Position--; - return ParseCharEscape(backslashToken, allowTriviaAfterEnd); - } - - private RegexEscapeNode ParsePossibleBackreferenceEscape(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - Debug.Assert(_lexer.Text[_lexer.Position - 1] == '\\'); - return HasOption(_options, RegexOptions.ECMAScript) - ? ParsePossibleEcmascriptBackreferenceEscape(backslashToken, allowTriviaAfterEnd) - : ParsePossibleRegularBackreferenceEscape(backslashToken, allowTriviaAfterEnd); - } - - private RegexEscapeNode ParsePossibleEcmascriptBackreferenceEscape( - RegexToken backslashToken, bool allowTriviaAfterEnd) - { - // Small deviation: Ecmascript allows references only to captures that precede - // this position (unlike .net which allows references in any direction). However, - // because we don't track position, we just consume the entire back-reference. - // - // This is addressable if we add position tracking when we locate all the captures. - - Debug.Assert(_lexer.Text[_lexer.Position - 1].Char == '\\'); - var start = _lexer.Position; - - var bestPosition = -1; - var capVal = 0; - while (_lexer.Position < _lexer.Text.Length && - _lexer.Text[_lexer.Position] is var ch && - (ch >= '0' && ch <= '9')) - { - unchecked - { - capVal *= 10; - capVal += (ch - '0'); - } - - _lexer.Position++; - - if (HasCapture(capVal)) - { - bestPosition = _lexer.Position; - } - } - - if (bestPosition != -1) - { - var numberToken = CreateToken( - RegexKind.NumberToken, ImmutableArray.Empty, - _lexer.GetSubPattern(start, bestPosition)).With(value: capVal); - ResetToPositionAndConsumeCurrentToken(bestPosition, allowTrivia: allowTriviaAfterEnd); - return new RegexBackreferenceEscapeNode(backslashToken, numberToken); - } - - _lexer.Position = start; - return ParseCharEscape(backslashToken, allowTriviaAfterEnd); - } - - private RegexEscapeNode ParsePossibleRegularBackreferenceEscape( - RegexToken backslashToken, bool allowTriviaAfterEnd) - { - Debug.Assert(_lexer.Text[_lexer.Position - 1].Char == '\\'); - var start = _lexer.Position; - - var numberToken = _lexer.TryScanNumber().Value; - var capVal = (int)numberToken.Value; - if (HasCapture(capVal) || - capVal <= 9) - { - CheckCapture(ref numberToken); - - ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd); - return new RegexBackreferenceEscapeNode(backslashToken, numberToken); - } - - _lexer.Position = start; - return ParseCharEscape(backslashToken, allowTriviaAfterEnd); - } - - private RegexEscapeNode ParsePossibleCaptureEscape(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - Debug.Assert(_lexer.Text[_lexer.Position - 1].Char == '\\'); - Debug.Assert(_lexer.Text[_lexer.Position].Char == '<' || - _lexer.Text[_lexer.Position].Char == '\''); - - var afterBackslashPosition = _lexer.Position; - ScanCaptureParts(allowTriviaAfterEnd, out var openToken, out var capture, out var closeToken); - - if (openToken.IsMissing || capture.IsMissing || closeToken.IsMissing) - { - _lexer.Position = afterBackslashPosition; - return ParseCharEscape(backslashToken, allowTriviaAfterEnd); - } - - return new RegexCaptureEscapeNode( - backslashToken, openToken, capture, closeToken); - } - - private RegexEscapeNode ParsePossibleKCaptureEscape(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - var typeToken = _currentToken; - var afterBackslashPosition = _lexer.Position - @"k".Length; - - ScanCaptureParts(allowTriviaAfterEnd, out var openToken, out var capture, out var closeToken); - if (openToken.IsMissing) - { - backslashToken = backslashToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Malformed_named_back_reference, - GetSpan(backslashToken, typeToken))); - return new RegexSimpleEscapeNode(backslashToken, typeToken.With(kind: RegexKind.TextToken)); - } - - if (capture.IsMissing || closeToken.IsMissing) - { - // Native parser falls back to normal escape scanning, if it doesn't see a capture, - // or close brace. For normal .net regexes, this will then fail later (as \k is not - // a legal escape), but will succeed for ecmascript regexes. - _lexer.Position = afterBackslashPosition; - return ParseCharEscape(backslashToken, allowTriviaAfterEnd); - } - - return new RegexKCaptureEscapeNode( - backslashToken, typeToken, openToken, capture, closeToken); - } - - private void ScanCaptureParts( - bool allowTriviaAfterEnd, out RegexToken openToken, out RegexToken capture, out RegexToken closeToken) - { - openToken = CreateMissingToken(RegexKind.LessThanToken); - capture = CreateMissingToken(RegexKind.CaptureNameToken); - closeToken = CreateMissingToken(RegexKind.GreaterThanToken); - - // No trivia allowed in or 'cap' - ConsumeCurrentToken(allowTrivia: false); - - if (_lexer.Position < _lexer.Text.Length && - (_currentToken.Kind == RegexKind.LessThanToken || _currentToken.Kind == RegexKind.SingleQuoteToken)) - { - openToken = _currentToken; - } - else - { - return; - } - - var captureToken = _lexer.TryScanNumberOrCaptureName(); - capture = captureToken == null - ? CreateMissingToken(RegexKind.CaptureNameToken) - : captureToken.Value; - - // No trivia allowed in or 'cap' - ConsumeCurrentToken(allowTrivia: false); - closeToken = CreateMissingToken(RegexKind.GreaterThanToken); - - if (!capture.IsMissing && - ((openToken.Kind == RegexKind.LessThanToken && _currentToken.Kind == RegexKind.GreaterThanToken) || - (openToken.Kind == RegexKind.SingleQuoteToken && _currentToken.Kind == RegexKind.SingleQuoteToken))) - { - CheckCapture(ref capture); - closeToken = ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd); - } - } - - private RegexEscapeNode ParseCharEscape(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - Debug.Assert(_lexer.Text[_lexer.Position - 1].Char == '\\'); - - // no trivia between \ and the next char - ConsumeCurrentToken(allowTrivia: false); - Debug.Assert(_currentToken.VirtualChars.Length == 1); - - var ch = _currentToken.VirtualChars[0]; - if (ch >= '0' && ch <= '7') - { - _lexer.Position--; - var octalDigits = _lexer.ScanOctalCharacters(_options); - Debug.Assert(octalDigits.VirtualChars.Length > 0); - - ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd); - return new RegexOctalEscapeNode(backslashToken, octalDigits); - } - - switch (ch) - { - case 'a': case 'b': case 'e': case 'f': - case 'n': case 'r': case 't': case 'v': - return new RegexSimpleEscapeNode( - backslashToken, ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd)); - case 'x': - return ParseHexEscape(backslashToken, allowTriviaAfterEnd); - case 'u': - return ParseUnicodeEscape(backslashToken, allowTriviaAfterEnd); - case 'c': - return ParseControlEscape(backslashToken, allowTriviaAfterEnd); - default: - var typeToken = ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd).With(kind: RegexKind.TextToken); - - if (!HasOption(_options, RegexOptions.ECMAScript) && RegexCharClass.IsWordChar(ch)) - { - typeToken = typeToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - string.Format(WorkspacesResources.Unrecognized_escape_sequence_0, ch.Char), - typeToken.GetSpan())); - } - - return new RegexSimpleEscapeNode(backslashToken, typeToken); - } - } - - private RegexEscapeNode ParseUnicodeEscape(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - var typeToken = _currentToken; - var hexChars = _lexer.ScanHexCharacters(4); - ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd); - return new RegexUnicodeEscapeNode(backslashToken, typeToken, hexChars); - } - - private RegexEscapeNode ParseHexEscape(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - var typeToken = _currentToken; - var hexChars = _lexer.ScanHexCharacters(2); - ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd); - return new RegexHexEscapeNode(backslashToken, typeToken, hexChars); - } - - private RegexControlEscapeNode ParseControlEscape(RegexToken backslashToken, bool allowTriviaAfterEnd) - { - // Nothing allowed between \c and the next char - var typeToken = ConsumeCurrentToken(allowTrivia: false); - - if (_currentToken.Kind == RegexKind.EndOfFile) - { - typeToken = typeToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Missing_control_character, - typeToken.GetSpan())); - return new RegexControlEscapeNode(backslashToken, typeToken, CreateMissingToken(RegexKind.TextToken)); - } - - Debug.Assert(_currentToken.VirtualChars.Length == 1); - - var ch = _currentToken.VirtualChars[0].Char; - - unchecked - { - // \ca interpreted as \cA - if (ch >= 'a' && ch <= 'z') - { - ch -= (char)('a' - 'A'); - } - - ch -= '@'; - - if (ch < ' ') - { - var controlToken = ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd).With(kind: RegexKind.TextToken); - return new RegexControlEscapeNode(backslashToken, typeToken, controlToken); - } - else - { - typeToken = typeToken.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Unrecognized_control_character, - _currentToken.GetSpan())); - - // Don't consume the bogus control character. - return new RegexControlEscapeNode(backslashToken, typeToken, CreateMissingToken(RegexKind.TextToken)); - } - } - } - - private RegexEscapeNode ParseCategoryEscape(RegexToken backslash, bool allowTriviaAfterEnd) - { - Debug.Assert(_lexer.Text[_lexer.Position - 1] is var ch && (ch == 'P' || ch == 'p')); - var typeToken = _currentToken; - - var start = _lexer.Position; - - if (!TryGetCategoryEscapeParts( - allowTriviaAfterEnd, - out var openBraceToken, - out var categoryToken, - out var closeBraceToken, - out var message)) - { - ResetToPositionAndConsumeCurrentToken(start, allowTrivia: allowTriviaAfterEnd); - typeToken = typeToken.With(kind: RegexKind.TextToken).AddDiagnosticIfNone(new EmbeddedDiagnostic( - message, GetSpan(backslash, typeToken))); - return new RegexSimpleEscapeNode(backslash, typeToken); - } - - return new RegexCategoryEscapeNode(backslash, typeToken, openBraceToken, categoryToken, closeBraceToken); - } - - private bool TryGetCategoryEscapeParts( - bool allowTriviaAfterEnd, - out RegexToken openBraceToken, - out RegexToken categoryToken, - out RegexToken closeBraceToken, - out string message) - { - openBraceToken = default; - categoryToken = default; - closeBraceToken = default; - message = default; - - if (_lexer.Text.Length - _lexer.Position < "{x}".Length) - { - message = WorkspacesResources.Incomplete_character_escape; - return false; - } - - // no whitespace in \p{x} - ConsumeCurrentToken(allowTrivia: false); - - if (_currentToken.Kind != RegexKind.OpenBraceToken) - { - message = WorkspacesResources.Malformed_character_escape; - return false; - } - - openBraceToken = _currentToken; - var category = _lexer.TryScanEscapeCategory(); - - // no whitespace in \p{x} - ConsumeCurrentToken(allowTrivia: false); - if (_currentToken.Kind != RegexKind.CloseBraceToken) - { - message = WorkspacesResources.Incomplete_character_escape; - return false; - } - - if (category == null) - { - message = WorkspacesResources.Unknown_property; - return false; - } - - categoryToken = category.Value; - closeBraceToken = ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd); - return true; - } - - private RegexTextNode ParseUnexpectedQuantifier(RegexExpressionNode lastExpression) - { - // This is just a bogus element in the higher level sequence. Allow trivia - // after this to abide by the spirit of the native parser. - var token = ConsumeCurrentToken(allowTrivia: true); - CheckQuantifierExpression(lastExpression, ref token); - return new RegexTextNode(token.With(kind: RegexKind.TextToken)); - } - - private void CheckQuantifierExpression(RegexExpressionNode current, ref RegexToken token) - { - if (current == null || - current.Kind == RegexKind.SimpleOptionsGrouping) - { - token = token.AddDiagnosticIfNone(new EmbeddedDiagnostic( - WorkspacesResources.Quantifier_x_y_following_nothing, token.GetSpan())); - } - else if (current is RegexQuantifierNode || - current is RegexLazyQuantifierNode) - { - token = token.AddDiagnosticIfNone(new EmbeddedDiagnostic( - string.Format(WorkspacesResources.Nested_quantifier_0, token.VirtualChars.First().Char), token.GetSpan())); - } - } - } -} diff --git a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexPatternDetector.cs b/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexPatternDetector.cs deleted file mode 100644 index 0e6405b0c26300f4464ae002f3bd952d5a22d3f1..0000000000000000000000000000000000000000 --- a/src/Workspaces/Core/Portable/EmbeddedLanguages/RegularExpressions/RegexPatternDetector.cs +++ /dev/null @@ -1,354 +0,0 @@ -// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Text.RegularExpressions; -using System.Threading; -using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; -using Microsoft.CodeAnalysis.LanguageServices; -using Microsoft.CodeAnalysis.Shared.Extensions; -using Roslyn.Utilities; - -namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions -{ - /// - /// Helper class to detect regex pattern tokens in a document efficiently. - /// - internal class RegexPatternDetector - { - private const string _patternName = "pattern"; - - private static readonly ConditionalWeakTable _modelToDetector = - new ConditionalWeakTable(); - - private readonly SemanticModel _semanticModel; - private readonly ISyntaxFactsService _syntaxFacts; - private readonly ISemanticFactsService _semanticFacts; - private readonly INamedTypeSymbol _regexType; - private readonly HashSet _methodNamesOfInterest; - - /// - /// Helps match patterns of the form: language=regex,option1,option2,option3 - /// - /// All matching is case insensitive, with spaces allowed between the punctuation. - /// 'regex' or 'regexp' are both allowed. Option values will be or'ed together - /// to produce final options value. If an unknown option is encountered, processing - /// will stop with whatever value has accumulated so far. - /// - /// Option names are the values from the enum. - /// - private static readonly Regex s_languageCommentDetector = - new Regex(@"language\s*=\s*regex(p)?((\s*,\s*)(?