提交 87076321 编写于 作者: C Cyrus Najmabadi

Merge branch 'regexParsing4' into regexFeatures

......@@ -3,12 +3,12 @@
using System.Collections.Immutable;
using System.Composition;
using System.Diagnostics;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.Host.Mef;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
using Microsoft.CodeAnalysis.VirtualChars;
namespace Microsoft.CodeAnalysis.CSharp.VirtualChars
namespace Microsoft.CodeAnalysis.CSharp.EmbeddedLanguages.VirtualChars
{
[ExportLanguageService(typeof(IVirtualCharService), LanguageNames.CSharp), Shared]
internal class CSharpVirtualCharService : AbstractVirtualCharService
......
......@@ -2,19 +2,19 @@
using System.Collections.Immutable;
using System.Linq;
using Microsoft.CodeAnalysis.CSharp.VirtualChars;
using Microsoft.CodeAnalysis.VirtualChars;
using Microsoft.CodeAnalysis.CSharp.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Xunit;
namespace Microsoft.CodeAnalysis.CSharp.UnitTests.VirtualChars
namespace Microsoft.CodeAnalysis.CSharp.UnitTests.EmbeddedLanguages.VirtualChars
{
public class CSharpVirtualCharServiceTests
{
private const string _statmentPrefix = "var v = ";
private const string _statementPrefix = "var v = ";
private SyntaxToken GetStringToken(string text)
{
var statement = _statmentPrefix + text;
var statement = _statementPrefix + text;
var parsedStatement = SyntaxFactory.ParseStatement(statement);
var token = parsedStatement.DescendantTokens().ToArray()[3];
Assert.True(token.Kind() == SyntaxKind.StringLiteralToken);
......@@ -199,7 +199,7 @@ private string ConvertToString(ImmutableArray<VirtualChar> virtualChars)
=> string.Join("", virtualChars.Select(ConvertToString));
private string ConvertToString(VirtualChar vc)
=> $"[{ConvertToString(vc.Char)},[{vc.Span.Start - _statmentPrefix.Length},{vc.Span.End - _statmentPrefix.Length}]]";
=> $"[{ConvertToString(vc.Char)},[{vc.Span.Start - _statementPrefix.Length},{vc.Span.End - _statementPrefix.Length}]]";
private string ConvertToString(char c)
=> char.IsLetterOrDigit(c) && c < 127 ? $"'{c}'" : $"'\\u{((int)c).ToString("X4")}'";
......
......@@ -5,22 +5,21 @@
using System.Diagnostics;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.Common
{
/// <summary>
/// Represents an error in a regular expression. The error contains the message to show a user
/// (which should be as close as possible to the error that the .Net regex library would produce),
/// as well as the span of the error. This span is in actual user character coordinates. For
/// example, if the user has the string "...\\p{0}..." then the span of the error would be for
/// the range of characters for '\\p{0}' (even though the regex engine would only see the \\ translated
/// as a virtual char to the single \ character.
/// Represents an error in a embedded language snippet. The error contains the message to show
/// a user as well as the span of the error. This span is in actual user character coordinates.
/// For example, if the user has the string "...\\p{0}..." then the span of the error would be
/// for the range of characters for '\\p{0}' (even though the regex engine would only see the \\
/// translated as a virtual char to the single \ character.
/// </summary>
internal struct RegexDiagnostic : IEquatable<RegexDiagnostic>
internal struct EmbeddedDiagnostic : IEquatable<EmbeddedDiagnostic>
{
public readonly string Message;
public readonly TextSpan Span;
public RegexDiagnostic(string message, TextSpan span)
public EmbeddedDiagnostic(string message, TextSpan span)
{
Debug.Assert(message != null);
Message = message;
......@@ -28,9 +27,9 @@ public RegexDiagnostic(string message, TextSpan span)
}
public override bool Equals(object obj)
=> obj is RegexDiagnostic rd && Equals(rd);
=> obj is EmbeddedDiagnostic rd && Equals(rd);
public bool Equals(RegexDiagnostic other)
public bool Equals(EmbeddedDiagnostic other)
=> Message == other.Message &&
Span.Equals(other.Span);
......@@ -46,10 +45,10 @@ public override int GetHashCode()
}
}
public static bool operator ==(RegexDiagnostic diagnostic1, RegexDiagnostic diagnostic2)
public static bool operator ==(EmbeddedDiagnostic diagnostic1, EmbeddedDiagnostic diagnostic2)
=> diagnostic1.Equals(diagnostic2);
public static bool operator !=(RegexDiagnostic diagnostic1, RegexDiagnostic diagnostic2)
public static bool operator !=(EmbeddedDiagnostic diagnostic1, EmbeddedDiagnostic diagnostic2)
=> !(diagnostic1 == diagnostic2);
}
}
......@@ -3,18 +3,17 @@
using System;
using System.Collections.Immutable;
using System.Linq;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.Text;
using Microsoft.CodeAnalysis.VirtualChars;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.Common
{
internal static class RegexHelpers
internal static class EmbeddedSyntaxHelpers
{
public static TextSpan GetSpan(RegexToken token)
public static TextSpan GetSpan<TSyntaxKind>(EmbeddedSyntaxToken<TSyntaxKind> token) where TSyntaxKind : struct
=> GetSpan(token.VirtualChars);
public static TextSpan GetSpan(RegexToken token1, RegexToken token2)
public static TextSpan GetSpan<TSyntaxKind>(EmbeddedSyntaxToken<TSyntaxKind> token1, EmbeddedSyntaxToken<TSyntaxKind> token2) where TSyntaxKind : struct
=> GetSpan(token1.VirtualChars[0], token2.VirtualChars.Last());
public static TextSpan GetSpan(ImmutableArray<VirtualChar> virtualChars)
......@@ -23,26 +22,27 @@ public static TextSpan GetSpan(ImmutableArray<VirtualChar> virtualChars)
public static TextSpan GetSpan(VirtualChar firstChar, VirtualChar lastChar)
=> TextSpan.FromBounds(firstChar.Span.Start, lastChar.Span.End);
public static bool HasOption(RegexOptions options, RegexOptions val)
=> (options & val) != 0;
public static TextSpan GetSpan(RegexEscapeNode node)
public static TextSpan GetSpan<TSyntaxKind, TNode>(TNode node)
where TSyntaxKind : struct
where TNode : EmbeddedSyntaxNode<TSyntaxKind, TNode>
{
var start = int.MaxValue;
var end = 0;
GetSpan(node, ref start, ref end);
GetSpan<TSyntaxKind, TNode>(node, ref start, ref end);
return TextSpan.FromBounds(start, end);
}
private static void GetSpan(RegexNode node, ref int start, ref int end)
private static void GetSpan<TSyntaxKind, TNode>(TNode node, ref int start, ref int end)
where TSyntaxKind : struct
where TNode : EmbeddedSyntaxNode<TSyntaxKind, TNode>
{
foreach (var child in node)
{
if (child.IsNode)
{
GetSpan(child.Node, ref start, ref end);
GetSpan<TSyntaxKind, TNode>(child.Node, ref start, ref end);
}
else
{
......@@ -56,13 +56,15 @@ private static void GetSpan(RegexNode node, ref int start, ref int end)
}
}
public static bool Contains(RegexNode node, VirtualChar virtualChar)
public static bool Contains<TSyntaxKind, TNode>(TNode node, VirtualChar virtualChar)
where TSyntaxKind : struct
where TNode : EmbeddedSyntaxNode<TSyntaxKind, TNode>
{
foreach (var child in node)
{
if (child.IsNode)
{
if (Contains(child.Node, virtualChar))
if (Contains<TSyntaxKind, TNode>(child.Node, virtualChar))
{
return true;
}
......
......@@ -2,16 +2,16 @@
using System.Diagnostics;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.Common
{
/// <summary>
/// Root of the Regex syntax hierarchy. RegexNodes are very similar to Roslyn Red-Nodes in concept,
/// though there are differences for ease of implementation.
/// Root of the embedded language syntax hierarchy. EmbeddedSyntaxNodes are very similar to
/// Roslyn Red-Nodes in concept, though there are differences for ease of implementation.
///
/// Similarities:
/// 1. Fully representative of the original source. All source VirtualChars are contained
/// in the Regex nodes.
/// 2. Specific types for Nodes, Tokens and Trivia (though RegexTokens only have leading trivia).
/// 2. Specific types for Nodes, Tokens and Trivia.
/// 3. Uniform ways of deconstructing Nodes (i.e. ChildCount + ChildAt).
///
/// Differences:
......@@ -23,45 +23,45 @@ namespace Microsoft.CodeAnalysis.RegularExpressions
/// an empty node (for example, an empty RegexSequenceNode) effect has no way to simply ascertain
/// its location. So far that hasn't been a problem.
/// 4. No null nodes. Haven't been needed so far, and it keeps things extremely simple. For
/// Situations where Roslyn might have chosen an optional null child, the Regex hierarchy just
/// example where Roslyn might have chosen an optional null child, the Regex hierarchy just
/// has multiple nodes. For example there are distinct nodes to represent the very similar
/// {a} {a,} {a,b} constructs.
/// </summary>
internal abstract class RegexNode
internal abstract class EmbeddedSyntaxNode<TSyntaxKind, TNode>
where TSyntaxKind : struct
where TNode : EmbeddedSyntaxNode<TSyntaxKind, TNode>
{
public readonly RegexKind Kind;
public readonly TSyntaxKind Kind;
protected RegexNode(RegexKind kind)
protected EmbeddedSyntaxNode(TSyntaxKind kind)
{
Debug.Assert(kind != RegexKind.None);
Debug.Assert((int)(object)kind != 0);
Kind = kind;
}
public abstract int ChildCount { get; }
public abstract RegexNodeOrToken ChildAt(int index);
public abstract EmbeddedSyntaxNodeOrToken<TSyntaxKind, TNode> ChildAt(int index);
public abstract void Accept(IRegexNodeVisitor visitor);
// public abstract void Accept(IRegexNodeVisitor visitor);
public Enumerator GetEnumerator()
{
return new Enumerator(this);
}
=> new Enumerator(this);
public struct Enumerator
{
private readonly RegexNode _regexNode;
private readonly EmbeddedSyntaxNode<TSyntaxKind, TNode> _node;
private readonly int _childCount;
private int _currentIndex;
public Enumerator(RegexNode regexNode)
public Enumerator(EmbeddedSyntaxNode<TSyntaxKind, TNode> node)
{
_regexNode = regexNode;
_childCount = regexNode.ChildCount;
_node = node;
_childCount = _node.ChildCount;
_currentIndex = -1;
Current = default;
}
public RegexNodeOrToken Current { get; private set; }
public EmbeddedSyntaxNodeOrToken<TSyntaxKind, TNode> Current { get; private set; }
public bool MoveNext()
{
......@@ -72,7 +72,7 @@ public bool MoveNext()
return false;
}
Current = _regexNode.ChildAt(_currentIndex);
Current = _node.ChildAt(_currentIndex);
return true;
}
}
......
......@@ -2,31 +2,33 @@
using System.Diagnostics;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.Common
{
internal struct RegexNodeOrToken
internal struct EmbeddedSyntaxNodeOrToken<TSyntaxKind, TNode>
where TSyntaxKind : struct
where TNode : EmbeddedSyntaxNode<TSyntaxKind, TNode>
{
public readonly RegexNode Node;
public readonly RegexToken Token;
public readonly TNode Node;
public readonly EmbeddedSyntaxToken<TSyntaxKind> Token;
private RegexNodeOrToken(RegexNode node) : this()
private EmbeddedSyntaxNodeOrToken(TNode node) : this()
{
Debug.Assert(node != null);
Node = node;
}
private RegexNodeOrToken(RegexToken token) : this()
private EmbeddedSyntaxNodeOrToken(EmbeddedSyntaxToken<TSyntaxKind> token) : this()
{
Debug.Assert(token.Kind != RegexKind.None);
Debug.Assert((int)(object)token.Kind != 0);
Token = token;
}
public bool IsNode => Node != null;
public static implicit operator RegexNodeOrToken(RegexNode node)
=> new RegexNodeOrToken(node);
public static implicit operator EmbeddedSyntaxNodeOrToken<TSyntaxKind, TNode>(TNode node)
=> new EmbeddedSyntaxNodeOrToken<TSyntaxKind, TNode>(node);
public static implicit operator RegexNodeOrToken(RegexToken token)
=> new RegexNodeOrToken(token);
public static implicit operator EmbeddedSyntaxNodeOrToken<TSyntaxKind, TNode>(EmbeddedSyntaxToken<TSyntaxKind> token)
=> new EmbeddedSyntaxNodeOrToken<TSyntaxKind, TNode>(token);
}
}
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System.Collections.Immutable;
using Microsoft.CodeAnalysis.VirtualChars;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.Common
{
internal struct RegexToken
internal struct EmbeddedSyntaxToken<TSyntaxKind> where TSyntaxKind : struct
{
public readonly RegexKind Kind;
public readonly ImmutableArray<RegexTrivia> LeadingTrivia;
public readonly TSyntaxKind Kind;
public readonly ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>> LeadingTrivia;
public readonly ImmutableArray<VirtualChar> VirtualChars;
internal readonly ImmutableArray<RegexDiagnostic> Diagnostics;
public readonly ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>> TrailingTrivia;
internal readonly ImmutableArray<EmbeddedDiagnostic> Diagnostics;
public readonly object Value;
public RegexToken(RegexKind kind, ImmutableArray<RegexTrivia> leadingTrivia, ImmutableArray<VirtualChar> virtualChars)
: this(kind, leadingTrivia, virtualChars, ImmutableArray<RegexDiagnostic>.Empty)
{
}
public RegexToken(
RegexKind kind, ImmutableArray<RegexTrivia> leadingTrivia,
ImmutableArray<VirtualChar> virtualChars, ImmutableArray<RegexDiagnostic> diagnostics)
: this(kind, leadingTrivia, virtualChars, diagnostics, value: null)
{
}
public RegexToken(
RegexKind kind, ImmutableArray<RegexTrivia> leadingTrivia, ImmutableArray<VirtualChar> virtualChars,
ImmutableArray<RegexDiagnostic> diagnostics, object value)
public EmbeddedSyntaxToken(
TSyntaxKind kind,
ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>> leadingTrivia,
ImmutableArray<VirtualChar> virtualChars,
ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>> trailingTrivia,
ImmutableArray<EmbeddedDiagnostic> diagnostics, object value)
{
Kind = kind;
LeadingTrivia = leadingTrivia;
VirtualChars = virtualChars;
TrailingTrivia = trailingTrivia;
Diagnostics = diagnostics;
Value = value;
}
public static RegexToken CreateMissing(RegexKind kind)
=> new RegexToken(kind, ImmutableArray<RegexTrivia>.Empty, ImmutableArray<VirtualChar>.Empty);
public bool IsMissing => VirtualChars.IsEmpty;
public RegexToken AddDiagnosticIfNone(RegexDiagnostic diagnostic)
public EmbeddedSyntaxToken<TSyntaxKind> AddDiagnosticIfNone(EmbeddedDiagnostic diagnostic)
=> Diagnostics.Length > 0 ? this : WithDiagnostics(ImmutableArray.Create(diagnostic));
public RegexToken WithDiagnostics(ImmutableArray<RegexDiagnostic> diagnostics)
public EmbeddedSyntaxToken<TSyntaxKind> WithDiagnostics(ImmutableArray<EmbeddedDiagnostic> diagnostics)
=> With(diagnostics: diagnostics);
public RegexToken With(
Optional<RegexKind> kind = default,
Optional<ImmutableArray<RegexTrivia>> leadingTrivia = default,
public EmbeddedSyntaxToken<TSyntaxKind> With(
Optional<TSyntaxKind> kind = default,
Optional<ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>>> leadingTrivia = default,
Optional<ImmutableArray<VirtualChar>> virtualChars = default,
Optional<ImmutableArray<RegexDiagnostic>> diagnostics = default,
Optional<ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>>> trailingTrivia = default,
Optional<ImmutableArray<EmbeddedDiagnostic>> diagnostics = default,
Optional<object> value = default)
{
return new RegexToken(
return new EmbeddedSyntaxToken<TSyntaxKind>(
kind.HasValue ? kind.Value : Kind,
leadingTrivia.HasValue ? leadingTrivia.Value : LeadingTrivia,
virtualChars.HasValue ? virtualChars.Value : VirtualChars,
trailingTrivia.HasValue ? trailingTrivia.Value : TrailingTrivia,
diagnostics.HasValue ? diagnostics.Value : Diagnostics,
value.HasValue ? value.Value : Value);
}
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System.Collections.Immutable;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.Common
{
internal abstract class EmbeddedSyntaxTree<TSyntaxKind, TNode, TRoot>
where TSyntaxKind : struct
where TNode : EmbeddedSyntaxNode<TSyntaxKind, TNode>
where TRoot : TNode
{
public readonly ImmutableArray<VirtualChar> Text;
public readonly TRoot Root;
public readonly ImmutableArray<EmbeddedDiagnostic> Diagnostics;
protected EmbeddedSyntaxTree(
ImmutableArray<VirtualChar> text,
TRoot root,
ImmutableArray<EmbeddedDiagnostic> diagnostics)
{
Text = text;
Root = root;
Diagnostics = diagnostics;
}
}
}
......@@ -2,34 +2,25 @@
using System.Collections.Immutable;
using System.Diagnostics;
using Microsoft.CodeAnalysis.VirtualChars;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.Common
{
/// <summary>
/// Trivia on a <see cref="RegexToken"/>. There are only two types of trivia
/// <see cref="RegexKind.WhitespaceTrivia"/> and <see cref="RegexKind.CommentTrivia"/>.
///
/// For simplicity, all trivia is leading trivia.
/// Trivia on an <see cref="EmbeddedSyntaxToken{TSyntaxKind}"/>.
/// </summary>
internal struct RegexTrivia
internal struct EmbeddedSyntaxTrivia<TSyntaxKind> where TSyntaxKind : struct
{
public readonly RegexKind Kind;
public readonly TSyntaxKind Kind;
public readonly ImmutableArray<VirtualChar> VirtualChars;
/// <summary>
/// A place for diagnostics to be stored during parsing. Not intended to be accessed
/// directly. These will be collected and aggregated into <see cref="RegexTree.Diagnostics"/>
/// directly. These will be collected and aggregated into <see cref="EmbeddedSyntaxTree{TNode, TRoot, TSyntaxKind}.Diagnostics"/>
/// </summary>
internal readonly ImmutableArray<RegexDiagnostic> Diagnostics;
internal readonly ImmutableArray<EmbeddedDiagnostic> Diagnostics;
public RegexTrivia(RegexKind kind, ImmutableArray<VirtualChar> virtualChars)
: this(kind, virtualChars, ImmutableArray<RegexDiagnostic>.Empty)
{
}
public RegexTrivia(RegexKind kind, ImmutableArray<VirtualChar> virtualChars, ImmutableArray<RegexDiagnostic> diagnostics)
public EmbeddedSyntaxTrivia(TSyntaxKind kind, ImmutableArray<VirtualChar> virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
{
Debug.Assert(virtualChars.Length > 0);
Kind = kind;
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
internal interface IRegexNodeVisitor
{
......
......@@ -4,7 +4,7 @@
using System.Globalization;
using System.Text;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
/// <summary>
/// Minimal copy of https://github.com/dotnet/corefx/blob/master/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
using RegexToken = EmbeddedSyntaxToken<RegexKind>;
using RegexTrivia = EmbeddedSyntaxTrivia<RegexKind>;
internal static class RegexHelpers
{
public static bool HasOption(RegexOptions options, RegexOptions val)
=> (options & val) != 0;
public static RegexToken CreateToken(RegexKind kind, ImmutableArray<RegexTrivia> leadingTrivia, ImmutableArray<VirtualChar> virtualChars)
=> CreateToken(kind, leadingTrivia, virtualChars, ImmutableArray<EmbeddedDiagnostic>.Empty);
public static RegexToken CreateToken(
RegexKind kind, ImmutableArray<RegexTrivia> leadingTrivia,
ImmutableArray<VirtualChar> virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
=> CreateToken(kind, leadingTrivia, virtualChars, diagnostics, value: null);
public static RegexToken CreateToken(
RegexKind kind, ImmutableArray<RegexTrivia> leadingTrivia, ImmutableArray<VirtualChar> virtualChars,
ImmutableArray<EmbeddedDiagnostic> diagnostics, object value)
=> new RegexToken(kind, leadingTrivia, virtualChars, ImmutableArray<RegexTrivia>.Empty, diagnostics, value);
public static RegexToken CreateMissingToken(RegexKind kind)
=> CreateToken(kind, ImmutableArray<RegexTrivia>.Empty, ImmutableArray<VirtualChar>.Empty);
public static RegexTrivia CreateTrivia(RegexKind kind, ImmutableArray<VirtualChar> virtualChars)
=> CreateTrivia(kind, virtualChars, ImmutableArray<EmbeddedDiagnostic>.Empty);
public static RegexTrivia CreateTrivia(RegexKind kind, ImmutableArray<VirtualChar> virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
=> new RegexTrivia(kind, virtualChars, diagnostics);
}
}
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
internal enum RegexKind
{
......
......@@ -2,16 +2,20 @@
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
using Microsoft.CodeAnalysis.VirtualChars;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
using static EmbeddedSyntaxHelpers;
using static RegexHelpers;
using RegexToken = EmbeddedSyntaxToken<RegexKind>;
using RegexTrivia = EmbeddedSyntaxTrivia<RegexKind>;
/// <summary>
/// Produces tokens from the sequence of <see cref="VirtualChar"/> characters. Unlike the native C# and VB
/// lexer, this lexer is much more tightly controlled by the parser. For example, while C# can have trivia
......@@ -53,13 +57,13 @@ public RegexToken ScanNextToken(bool allowTrivia, RegexOptions options)
var trivia = ScanLeadingTrivia(allowTrivia, options);
if (Position == Text.Length)
{
return new RegexToken(RegexKind.EndOfFile, trivia, ImmutableArray<VirtualChar>.Empty);
return CreateToken(RegexKind.EndOfFile, trivia, ImmutableArray<VirtualChar>.Empty);
}
var ch = this.CurrentChar;
Position++;
return new RegexToken(GetKind(ch), trivia, ImmutableArray.Create(ch));
return CreateToken(GetKind(ch), trivia, ImmutableArray.Create(ch));
}
private static RegexKind GetKind(char ch)
......@@ -142,7 +146,7 @@ private ImmutableArray<RegexTrivia> ScanLeadingTrivia(bool allowTrivia, RegexOpt
Position++;
}
return new RegexTrivia(RegexKind.CommentTrivia, GetSubPattern(start, Position));
return CreateTrivia(RegexKind.CommentTrivia, GetSubPattern(start, Position));
}
}
......@@ -157,14 +161,14 @@ private ImmutableArray<RegexTrivia> ScanLeadingTrivia(bool allowTrivia, RegexOpt
if (Position == Text.Length)
{
var diagnostics = ImmutableArray.Create(new RegexDiagnostic(
var diagnostics = ImmutableArray.Create(new EmbeddedDiagnostic(
WorkspacesResources.Unterminated_regex_comment,
GetTextSpan(start, Position)));
return new RegexTrivia(RegexKind.CommentTrivia, GetSubPattern(start, Position), diagnostics);
return CreateTrivia(RegexKind.CommentTrivia, GetSubPattern(start, Position), diagnostics);
}
Position++;
return new RegexTrivia(RegexKind.CommentTrivia, GetSubPattern(start, Position));
return CreateTrivia(RegexKind.CommentTrivia, GetSubPattern(start, Position));
}
}
......@@ -203,7 +207,7 @@ private bool TextAt(int position, string val)
if (Position > start)
{
return new RegexTrivia(RegexKind.WhitespaceTrivia, GetSubPattern(start, Position));
return CreateTrivia(RegexKind.WhitespaceTrivia, GetSubPattern(start, Position));
}
}
......@@ -240,12 +244,12 @@ private bool IsBlank(char ch)
return null;
}
var token = new RegexToken(RegexKind.EscapeCategoryToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
var token = CreateToken(RegexKind.EscapeCategoryToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
var category = token.VirtualChars.CreateString();
if (!RegexCharClass.IsEscapeCategory(category))
{
token = token.AddDiagnosticIfNone(new RegexDiagnostic(
token = token.AddDiagnosticIfNone(new EmbeddedDiagnostic(
string.Format(WorkspacesResources.Unknown_property_0, category),
GetSpan(token)));
}
......@@ -293,12 +297,12 @@ private static bool IsEscapeCategoryChar(VirtualChar ch)
}
}
var token = new RegexToken(RegexKind.NumberToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
var token = CreateToken(RegexKind.NumberToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
token = token.With(value: value);
if (error)
{
token = token.AddDiagnosticIfNone(new RegexDiagnostic(
token = token.AddDiagnosticIfNone(new EmbeddedDiagnostic(
WorkspacesResources.Capture_group_numbers_must_be_less_than_or_equal_to_Int32_MaxValue,
GetSpan(token)));
}
......@@ -324,7 +328,7 @@ private static bool IsEscapeCategoryChar(VirtualChar ch)
return null;
}
var token = new RegexToken(RegexKind.CaptureNameToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
var token = CreateToken(RegexKind.CaptureNameToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
token = token.With(value: token.VirtualChars.CreateString());
return token;
}
......@@ -342,7 +346,7 @@ private static bool IsEscapeCategoryChar(VirtualChar ch)
return start == Position
? default(RegexToken?)
: new RegexToken(RegexKind.OptionsToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
: CreateToken(RegexKind.OptionsToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
}
private bool IsOptionChar(char ch)
......@@ -378,13 +382,13 @@ public RegexToken ScanHexCharacters(int count)
}
}
var result = new RegexToken(
var result = CreateToken(
RegexKind.TextToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
var length = Position - start;
if (length != count)
{
result = result.AddDiagnosticIfNone(new RegexDiagnostic(
result = result.AddDiagnosticIfNone(new EmbeddedDiagnostic(
WorkspacesResources.Insufficient_hexadecimal_digits,
TextSpan.FromBounds(Text[beforeSlash].Span.Start, Text[Position - 1].Span.End)));
}
......@@ -435,7 +439,7 @@ public RegexToken ScanOctalCharacters(RegexOptions options)
Debug.Assert(Position - start > 0);
var result = new RegexToken(
var result = CreateToken(
RegexKind.TextToken, ImmutableArray<RegexTrivia>.Empty, GetSubPattern(start, Position));
return result;
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
internal abstract class RegexNode : EmbeddedSyntaxNode<RegexKind, RegexNode>
{
protected RegexNode(RegexKind kind) : base(kind)
{
}
public abstract void Accept(IRegexNodeVisitor visitor);
}
}
......@@ -3,9 +3,13 @@
using System;
using System.Collections.Immutable;
using System.Diagnostics;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
using RegexNodeOrToken = EmbeddedSyntaxNodeOrToken<RegexKind, RegexNode>;
using RegexToken = EmbeddedSyntaxToken<RegexKind>;
internal sealed class RegexCompilationUnit : RegexNode
{
public RegexCompilationUnit(RegexExpressionNode expression, RegexToken endOfFileToken)
......
......@@ -4,14 +4,18 @@
using System.Diagnostics;
using System.Linq;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
using Microsoft.CodeAnalysis.VirtualChars;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
using static EmbeddedSyntaxHelpers;
using static RegexHelpers;
using RegexToken = EmbeddedSyntaxToken<RegexKind>;
internal partial struct RegexParser
{
/// <summary>
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System.Collections.Immutable;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.Text;
using Microsoft.CodeAnalysis.VirtualChars;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
internal sealed class RegexTree
internal sealed class RegexTree : EmbeddedSyntaxTree<RegexKind, RegexNode, RegexCompilationUnit>
{
public readonly ImmutableArray<VirtualChar> Text;
public readonly RegexCompilationUnit Root;
public readonly ImmutableArray<RegexDiagnostic> Diagnostics;
public readonly ImmutableDictionary<string, TextSpan> CaptureNamesToSpan;
public readonly ImmutableDictionary<int, TextSpan> CaptureNumbersToSpan;
public RegexTree(
ImmutableArray<VirtualChar> text,
RegexCompilationUnit root,
ImmutableArray<RegexDiagnostic> diagnostics,
ImmutableArray<EmbeddedDiagnostic> diagnostics,
ImmutableDictionary<string, TextSpan> captureNamesToSpan,
ImmutableDictionary<int, TextSpan> captureNumbersToSpan)
: base(text, root, diagnostics)
{
Text = text;
Root = root;
Diagnostics = diagnostics;
CaptureNamesToSpan = captureNamesToSpan;
CaptureNumbersToSpan = captureNumbersToSpan;
}
......
......@@ -5,7 +5,7 @@
using Microsoft.CodeAnalysis.Options;
using Microsoft.CodeAnalysis.Options.Providers;
namespace Microsoft.CodeAnalysis.RegularExpressions
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
internal class RegularExpressionsOptions
{
......
......@@ -6,7 +6,7 @@
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.VirtualChars
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars
{
internal abstract class AbstractVirtualCharService : IVirtualCharService
{
......
......@@ -4,7 +4,7 @@
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.VirtualChars
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars
{
/// <summary>
/// Helper service that takes the raw text of a string token and produces the individual
......
......@@ -3,7 +3,7 @@
using System;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.VirtualChars
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars
{
/// <summary>
/// The Regex and Json parsers wants to work over an array of characters, however this array of
......
......@@ -3,7 +3,7 @@
using System.Collections.Immutable;
using Microsoft.CodeAnalysis.PooledObjects;
namespace Microsoft.CodeAnalysis.VirtualChars
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars
{
internal static class VirtualCharExtensions
{
......
......@@ -3,9 +3,9 @@
Imports System.Collections.Immutable
Imports System.Composition
Imports Microsoft.CodeAnalysis.Host.Mef
Imports Microsoft.CodeAnalysis.VirtualChars
Imports Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars
Namespace Microsoft.CodeAnalysis.VisualBasic.VirtualChars
Namespace Microsoft.CodeAnalysis.VisualBasic.EmbeddedLanguages.VirtualChars
<ExportLanguageService(GetType(IVirtualCharService), LanguageNames.VisualBasic), [Shared]>
Friend Class VisualBasicVirtualCharService
Inherits AbstractVirtualCharService
......
' Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
Imports System.Collections.Immutable
Imports Microsoft.CodeAnalysis.VirtualChars
Imports Microsoft.CodeAnalysis.VisualBasic.VirtualChars
Imports Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars
Imports Microsoft.CodeAnalysis.VisualBasic.EmbeddedLanguages.VirtualChars
Imports Xunit
Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests.VirtualChars
Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests.EmbeddedLanguages.VirtualChars
Public Class VisualBasicVirtualCharServiceTests
Private Const _statmentPrefix As String = "dim v = "
Private Const _statementPrefix As String = "dim v = "
Private Function GetStringToken(text As String) As SyntaxToken
Dim statement = _statmentPrefix + text
Dim statement = _statementPrefix + text
Dim parsedStatement = SyntaxFactory.ParseExecutableStatement(statement)
Dim token = parsedStatement.DescendantTokens().ToArray()(3)
Assert.True(token.Kind() = SyntaxKind.StringLiteralToken)
......@@ -51,7 +51,7 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests.VirtualChars
End Function
Private Function ConvertToString(vc As VirtualChar) As String
Return $"[{ConvertToString(vc.Char)},[{vc.Span.Start - _statmentPrefix.Length},{vc.Span.End - _statmentPrefix.Length}]]"
Return $"[{ConvertToString(vc.Char)},[{vc.Span.Start - _statementPrefix.Length},{vc.Span.End - _statementPrefix.Length}]]"
End Function
Private Function ConvertToString(c As Char) As String
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册