diff --git a/src/Workspaces/CSharp/Portable/VirtualChars/CSharpVirtualCharService.cs b/src/Workspaces/CSharp/Portable/VirtualChars/CSharpVirtualCharService.cs new file mode 100644 index 0000000000000000000000000000000000000000..85e7e4d790b9bedc477f47674d6655952f1884da --- /dev/null +++ b/src/Workspaces/CSharp/Portable/VirtualChars/CSharpVirtualCharService.cs @@ -0,0 +1,263 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Collections.Immutable; +using System.Composition; +using System.Diagnostics; +using Microsoft.CodeAnalysis.Host.Mef; +using Microsoft.CodeAnalysis.PooledObjects; +using Microsoft.CodeAnalysis.Text; +using Microsoft.CodeAnalysis.VirtualChars; + +namespace Microsoft.CodeAnalysis.CSharp.VirtualChars +{ + [ExportLanguageService(typeof(IVirtualCharService), LanguageNames.CSharp), Shared] + internal class CSharpVirtualCharService : AbstractVirtualCharService + { + public static readonly IVirtualCharService Instance = new CSharpVirtualCharService(); + + protected override ImmutableArray TryConvertToVirtualCharsWorker(SyntaxToken token) + { + Debug.Assert(!token.ContainsDiagnostics); + if (token.Kind() != SyntaxKind.StringLiteralToken) + { + return default; + } + + return token.IsVerbatimStringLiteral() + ? TryConvertVerbatimStringToVirtualChars(token) + : TryConvertStringToVirtualChars(token); + } + + private ImmutableArray TryConvertVerbatimStringToVirtualChars(SyntaxToken token) + => TryConvertSimpleDoubleQuoteString(token, "@\""); + + private ImmutableArray TryConvertStringToVirtualChars(SyntaxToken token) + { + const string StartDelimeter = "\""; + const string EndDelimeter = "\""; + + var tokenText = token.Text; + if (!tokenText.StartsWith(StartDelimeter) || + !tokenText.EndsWith(EndDelimeter)) + { + Debug.Fail("This should not be reachable as long as the compiler added no diagnostics."); + return default; + } + + var startIndexInclusive = StartDelimeter.Length; + var endIndexExclusive = tokenText.Length - EndDelimeter.Length; + + var result = ArrayBuilder.GetInstance(); + try + { + var offset = token.SpanStart; + for (var index = startIndexInclusive; index < endIndexExclusive;) + { + if (tokenText[index] == '\\') + { + if (!TryAddEscape(result, tokenText, offset, index)) + { + return default; + } + + index += result.Last().Span.Length; + } + else + { + result.Add(new VirtualChar(tokenText[index], new TextSpan(offset + index, 1))); + index++; + } + } + + return result.ToImmutable(); + } + finally + { + result.Free(); + } + } + + private bool TryAddEscape( + ArrayBuilder result, string tokenText, int offset, int index) + { + // Copied from Lexer.ScanEscapeSequence. + Debug.Assert(tokenText[index] == '\\'); + + return TryAddSingleCharacterEscape(result, tokenText, offset, index) || + TryAddMultiCharacterEscape(result, tokenText, offset, index); + } + + private bool TryAddSingleCharacterEscape( + ArrayBuilder result, string tokenText, int offset, int index) + { + // Copied from Lexer.ScanEscapeSequence. + Debug.Assert(tokenText[index] == '\\'); + + var ch = tokenText[index + 1]; + switch (ch) + { + // escaped characters that translate to themselves + case '\'': + case '"': + case '\\': + break; + // translate escapes as per C# spec 2.4.4.4 + case '0': ch = '\0'; break; + case 'a': ch = '\a'; break; + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + case 'v': ch = '\v'; break; + default: + return false; + } + + result.Add(new VirtualChar(ch, new TextSpan(offset + index, 2))); + return true; + } + + private bool TryAddMultiCharacterEscape( + ArrayBuilder result, string tokenText, int offset, int index) + { + // Copied from Lexer.ScanEscapeSequence. + Debug.Assert(tokenText[index] == '\\'); + + var ch = tokenText[index + 1]; + switch (ch) + { + case 'x': + case 'u': + case 'U': + return TryAddMultiCharacterEscape(result, tokenText, offset, index, ch); + default: + Debug.Fail("This should not be reachable as long as the compiler added no diagnostics."); + return false; + } + } + + private bool TryAddMultiCharacterEscape( + ArrayBuilder result, string tokenText, int offset, int index, char character) + { + var startIndex = index; + Debug.Assert(tokenText[index] == '\\'); + + // skip past the / and the escape type. + index += 2; + if (character == 'U') + { + // 8 character escape. May represent 1 or 2 actual chars. In the case of + // 2 chars, we will fail out as that isn't supported in this system (currently). + uint uintChar = 0; + + if (!IsHexDigit(tokenText[index])) + { + Debug.Fail("This should not be reachable as long as the compiler added no diagnostics."); + return false; + } + + for (var i = 0; i < 8; i++) + { + character = tokenText[index + i]; + if (!IsHexDigit(character)) + { + Debug.Fail("This should not be reachable as long as the compiler added no diagnostics."); + return false; + } + + uintChar = (uint)((uintChar << 4) + HexValue(character)); + } + + if (uintChar > 0x0010FFFF) + { + Debug.Fail("This should not be reachable as long as the compiler added no diagnostics."); + return false; + } + + // Surrogate characters aren't supported here. + if (uintChar >= 0x00010000) + { + // This is possible. It's a legal C# escape, but we don't support it here because it + // would need two chars to encode. + return false; + } + + result.Add(new VirtualChar((char)uintChar, new TextSpan(startIndex + offset, 2 + 8))); + return true; + } + else if (character == 'u') + { + // 4 character escape representing one char. + + var intChar = 0; + if (!IsHexDigit(tokenText[index])) + { + Debug.Fail("This should not be reachable as long as the compiler added no diagnostics."); + return false; + } + + var endIndex = index + 1; + for (var i = 0; i < 4; i++) + { + var ch2 = tokenText[index + i]; + if (!IsHexDigit(ch2)) + { + Debug.Fail("This should not be reachable as long as the compiler added no diagnostics."); + return false; + } + + intChar = (intChar << 4) + HexValue(ch2); + endIndex++; + } + + character = (char)intChar; + result.Add(new VirtualChar(character, new TextSpan(startIndex + offset, 2 + 4))); + return true; + } + else + { + Debug.Assert(character == 'x'); + // Variable length (up to 4 chars) hexadecimal escape. + + var intChar = 0; + if (!IsHexDigit(tokenText[index])) + { + Debug.Fail("This should not be reachable as long as the compiler added no diagnostics."); + return false; + } + + var endIndex = index; + for (var i = 0; i < 4; i++) + { + var ch2 = tokenText[index + i]; + if (!IsHexDigit(ch2)) + { + // This is possible. These escape sequences are variable length. + break; + } + + intChar = (intChar << 4) + HexValue(ch2); + endIndex++; + } + + character = (char)intChar; + result.Add(new VirtualChar(character, TextSpan.FromBounds(startIndex + offset, endIndex + offset))); + return true; + } + } + + private static int HexValue(char c) + { + Debug.Assert(IsHexDigit(c)); + return (c >= '0' && c <= '9') ? c - '0' : (c & 0xdf) - 'A' + 10; + } + + private static bool IsHexDigit(char c) + { + return (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f'); + } + } +} diff --git a/src/Workspaces/CSharpTest/VirtualChars/CSharpVirtualCharServiceTests.cs b/src/Workspaces/CSharpTest/VirtualChars/CSharpVirtualCharServiceTests.cs new file mode 100644 index 0000000000000000000000000000000000000000..0cbb67b527fe5312d8802e79491c0e8905a68555 --- /dev/null +++ b/src/Workspaces/CSharpTest/VirtualChars/CSharpVirtualCharServiceTests.cs @@ -0,0 +1,207 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Collections.Immutable; +using System.Linq; +using Microsoft.CodeAnalysis.CSharp.VirtualChars; +using Microsoft.CodeAnalysis.VirtualChars; +using Xunit; + +namespace Microsoft.CodeAnalysis.CSharp.UnitTests.VirtualChars +{ + public class CSharpVirtualCharServiceTests + { + private const string _statementPrefix = "var v = "; + + private SyntaxToken GetStringToken(string text) + { + var statement = _statementPrefix + text; + var parsedStatement = SyntaxFactory.ParseStatement(statement); + var token = parsedStatement.DescendantTokens().ToArray()[3]; + Assert.Equal(token.Kind(), SyntaxKind.StringLiteralToken); + + return token; + } + + private void Test(string stringText, string expected) + { + var token = GetStringToken(stringText); + var virtualChars = CSharpVirtualCharService.Instance.TryConvertToVirtualChars(token); + var actual = ConvertToString(virtualChars); + Assert.Equal(expected, actual); + } + + private void TestFailure(string stringText) + { + var token = GetStringToken(stringText); + var virtualChars = CSharpVirtualCharService.Instance.TryConvertToVirtualChars(token); + Assert.True(virtualChars.IsDefault); + } + + [Fact] + public void TestEmptyString() + { + Test("\"\"", ""); + } + + [Fact] + public void TestEmptyVerbatimString() + { + Test("@\"\"", ""); + } + + [Fact] + public void TestSimpleString() + { + Test("\"a\"", "['a',[1,2]]"); + } + + [Fact] + public void TestSimpleVerbatimString() + { + Test("@\"a\"", "['a',[2,3]]"); + } + + [Fact] + public void TestUnterminatedString() + { + TestFailure("\""); + } + + [Fact] + public void TestUnterminatedVerbatimString() + { + TestFailure("@\""); + } + + [Fact] + public void TestSimpleEscape() + { + Test(@"""a\ta""", "['a',[1,2]]['\\u0009',[2,4]]['a',[4,5]]"); + } + + [Fact] + public void TestMultipleSimpleEscape() + { + Test(@"""a\t\ta""", "['a',[1,2]]['\\u0009',[2,4]]['\\u0009',[4,6]]['a',[6,7]]"); + } + + [Fact] + public void TestNonEscapeInVerbatim() + { + Test(@"@""a\ta""", "['a',[2,3]]['\\u005C',[3,4]]['t',[4,5]]['a',[5,6]]"); + } + + [Fact] + public void TestInvalidHexEscape() + { + TestFailure(@"""\xZ"""); + } + + [Fact] + public void TestValidHex1Escape() + { + Test(@"""\xa""", @"['\u000A',[1,4]]"); + } + + [Fact] + public void TestValidHex2Escape() + { + Test(@"""\xaa""", @"['\u00AA',[1,5]]"); + } + + [Fact] + public void TestValidHex3Escape() + { + Test(@"""\xaaa""", @"['\u0AAA',[1,6]]"); + } + + [Fact] + public void TestValidHex4Escape() + { + Test(@"""\xaaaa""", @"['\uAAAA',[1,7]]"); + } + + [Fact] + public void TestValidHex5Escape() + { + Test(@"""\xaaaaa""", @"['\uAAAA',[1,7]]['a',[7,8]]"); + } + + [Fact] + public void TestValidHex6Escape() + { + Test(@"""a\xaaaaa""", @"['a',[1,2]]['\uAAAA',[2,8]]['a',[8,9]]"); + } + + [Fact] + public void TestInvalidUnicodeEscape() + { + TestFailure(@"""\u000"""); + } + + [Fact] + public void TestValidUnicodeEscape1() + { + Test(@"""\u0000""", @"['\u0000',[1,7]]"); + } + + [Fact] + public void TestValidUnicodeEscape2() + { + Test(@"""a\u0000a""", @"['a',[1,2]]['\u0000',[2,8]]['a',[8,9]]"); + } + + [Fact] + public void TestInvalidLongUnicodeEscape1() + { + TestFailure(@"""\U0000"""); + } + + [Fact] + public void TestInvalidLongUnicodeEscape2() + { + TestFailure(@"""\U10000000"""); + } + + [Fact] + public void TestValidLongEscape1() + { + Test(@"""\U00000000""", @"['\u0000',[1,11]]"); + } + + [Fact] + public void TestValidLongEscape2() + { + Test(@"""\U0000ffff""", @"['\uFFFF',[1,11]]"); + } + + [Fact] + public void TestValidLongEscape3() + { + Test(@"""a\U00000000a""", @"['a',[1,2]]['\u0000',[2,12]]['a',[12,13]]"); + } + + [Fact] + public void TestValidButUnsupportedLongEscape1() + { + var token = GetStringToken(@"""\U00010000"""); + Assert.False(token.ContainsDiagnostics); + TestFailure(@"""\U00010000"""); + } + + [Fact] + public void TestEscapedQuoteInVerbatimString() + { + Test("@\"a\"\"a\"", @"['a',[2,3]]['\u0022',[3,5]]['a',[5,6]]"); + } + + private string ConvertToString(ImmutableArray virtualChars) + => string.Join("", virtualChars.Select(ConvertToString)); + + private string ConvertToString(VirtualChar vc) + => $"[{ConvertToString(vc.Char)},[{vc.Span.Start - _statementPrefix.Length},{vc.Span.End - _statementPrefix.Length}]]"; + + private string ConvertToString(char c) + => char.IsLetterOrDigit(c) && c < 127 ? $"'{c}'" : $"'\\u{((int)c).ToString("X4")}'"; + } +} diff --git a/src/Workspaces/Core/Portable/VirtualChars/AbstractVirtualCharService.cs b/src/Workspaces/Core/Portable/VirtualChars/AbstractVirtualCharService.cs new file mode 100644 index 0000000000000000000000000000000000000000..004b26a5d890a21a628a53bd78387685b6c9edfe --- /dev/null +++ b/src/Workspaces/Core/Portable/VirtualChars/AbstractVirtualCharService.cs @@ -0,0 +1,102 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Collections.Immutable; +using System.Diagnostics; +using System.Linq; +using Microsoft.CodeAnalysis.PooledObjects; +using Microsoft.CodeAnalysis.Text; + +namespace Microsoft.CodeAnalysis.VirtualChars +{ + internal abstract class AbstractVirtualCharService : IVirtualCharService + { + protected abstract ImmutableArray TryConvertToVirtualCharsWorker(SyntaxToken token); + + public ImmutableArray TryConvertToVirtualChars(SyntaxToken token) + { + // We don't process any strings that contain diagnostics in it. That means that we can + // trust that all the string's contents (most importantly, the escape sequences) are well + // formed. + if (token.ContainsDiagnostics) + { + return default; + } + + var result = TryConvertToVirtualCharsWorker(token); + +#if DEBUG + // Do some invariant checking to make sure we processed the string token the same + // way the C# and VB compilers did. + if (!result.IsDefault) + { + // Ensure that we properly broke up the token into a sequence of characters that + // matches what the compiler did. + var expectedValueText = token.ValueText; + var actualValueText = result.CreateString(); + Debug.Assert(expectedValueText == actualValueText); + + if (result.Length > 0) + { + var currentVC = result[0]; + Debug.Assert(currentVC.Span.Start > token.SpanStart, "First span has to start after the start of the string token (including its delimeter)"); + Debug.Assert(currentVC.Span.Start == token.SpanStart + 1 || currentVC.Span.Start == token.SpanStart + 2, "First span should start on the second or third char of the string."); + for (var i = 1; i < result.Length; i++) + { + var nextVC = result[i]; + Debug.Assert(currentVC.Span.End == nextVC.Span.Start, "Virtual character spans have to be touching."); + currentVC = nextVC; + } + + var lastVC = result.Last(); + Debug.Assert(lastVC.Span.End == token.Span.End - 1, "Last span has to end right before the end of the string token (including its trailing delimeter)."); + } + } +#endif + + return result; + } + + /// + /// Helper to convert simple string literals that escape quotes by doubling them. This is + /// how normal VB literals and c# verbatim string literals work. + /// + /// The start characters string. " in VB and @" in C# + protected static ImmutableArray TryConvertSimpleDoubleQuoteString( + SyntaxToken token, string startDelimiter) + { + Debug.Assert(!token.ContainsDiagnostics); + const string endDelimiter = "\""; + + var tokenText = token.Text; + if (!tokenText.StartsWith(startDelimiter) || + !tokenText.EndsWith(endDelimiter)) + { + Debug.Assert(false, "This should not be reachable as long as the compiler added no diagnostics."); + return default; + } + + var startIndexInclusive = startDelimiter.Length; + var endIndexExclusive = tokenText.Length - endDelimiter.Length; + + var result = ArrayBuilder.GetInstance(); + + var offset = token.SpanStart; + for (var index = startIndexInclusive; index < endIndexExclusive;) + { + if (tokenText[index] == '"' && + tokenText[index + 1] == '"') + { + result.Add(new VirtualChar('"', new TextSpan(offset + index, 2))); + index += 2; + } + else + { + result.Add(new VirtualChar(tokenText[index], new TextSpan(offset + index, 1))); + index++; + } + } + + return result.ToImmutableAndFree(); + } + } +} diff --git a/src/Workspaces/Core/Portable/VirtualChars/IVirtualCharService.cs b/src/Workspaces/Core/Portable/VirtualChars/IVirtualCharService.cs new file mode 100644 index 0000000000000000000000000000000000000000..5b33787c8b4bd2a2c1f1aeec7122f3d003f2994c --- /dev/null +++ b/src/Workspaces/Core/Portable/VirtualChars/IVirtualCharService.cs @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Collections.Immutable; +using Microsoft.CodeAnalysis.Host; +using Microsoft.CodeAnalysis.Text; + +namespace Microsoft.CodeAnalysis.VirtualChars +{ + /// + /// Helper service that takes the raw text of a string token and produces the individual + /// characters that raw string token represents (i.e. with escapes collapsed). The difference + /// between this and the result from token.ValueText is that for each collapsed character + /// returned the original span of text in the original token can be found. i.e. if you had the + /// following in C#: + /// + /// "G\u006fo" + /// + /// Then you'd get back: + /// + /// 'G' -> [0, 1) 'o' -> [1, 7) 'o' -> [7, 1) + /// + /// This allows for embedded language processing that can refer back to the users' original code + /// instead of the escaped value we're processing. + /// + internal interface IVirtualCharService : ILanguageService + { + /// + /// Takes in a string token and return the s corresponding to each + /// char of the tokens . In other words, for each char + /// in ValueText there will be a VirtualChar in the resultant array. Each VirtualChar will + /// specify what char the language considers them to represent, as well as the span of text + /// in the original that the language created that char from. + /// + /// For most chars this will be a single character span. i.e. 'c' -> 'c'. However, for + /// escapes this may be a multi character span. i.e. 'c' -> '\u0063' + /// + /// If the token is not a string literal token, or the string literal has any diagnostics on + /// it, then will be returned. Additionally, because a + /// VirtualChar can only represent a single char, while some escape sequences represent + /// multiple chars, will also be returned in those cases. All + /// these cases could be relaxed in the future. But they greatly simplify the + /// implementation. + /// + /// If this function succeeds, certain invariants will hold. First, each character in the + /// sequence of characters in .ValueText will become a single + /// VirtualChar in the result array with a matching property. + /// Similarly, each VirtualChar's will abut each other, and + /// the union of all of them will cover the span of the token's + /// *not* including the start and quotes. + /// + /// In essence the VirtualChar array acts as the information explaining how the of the token between the quotes maps to each character in the + /// token's . + /// + ImmutableArray TryConvertToVirtualChars(SyntaxToken token); + } +} diff --git a/src/Workspaces/Core/Portable/VirtualChars/VirtualChar.cs b/src/Workspaces/Core/Portable/VirtualChars/VirtualChar.cs new file mode 100644 index 0000000000000000000000000000000000000000..46e9a8245369d9ac5781328dd50eebf60f172327 --- /dev/null +++ b/src/Workspaces/Core/Portable/VirtualChars/VirtualChar.cs @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using Microsoft.CodeAnalysis.Text; + +namespace Microsoft.CodeAnalysis.VirtualChars +{ + /// + /// The Regex and Json parsers wants to work over an array of characters, however this array of + /// characters is not the same as the array of characters a user types into a string in C# or + /// VB. For example In C# someone may write: @"\z". This should appear to the user the same as + /// if they wrote "\\z" and the same as "\\\u007a". However, as these all have wildly different + /// presentations for the user, there needs to be a way to map back the characters it sees ( '\' + /// and 'z' ) back to the ranges of characters the user wrote. + /// + /// VirtualChar serves this purpose. It contains the interpreted value of any language + /// character/character-escape-sequence, as well as the original SourceText span where that + /// interpreted character was created from. This allows the regex and json parsers to both + /// process input from any language uniformly, but then also produce trees and diagnostics that + /// map back properly to the original source text locations that make sense to the user. + /// + internal struct VirtualChar : IEquatable + { + public readonly char Char; + public readonly TextSpan Span; + + public VirtualChar(char @char, TextSpan span) + { + if (span.IsEmpty) + { + throw new ArgumentException("Span should not be empty.", nameof(span)); + } + + Char = @char; + Span = span; + } + + public override bool Equals(object obj) + => obj is VirtualChar vc && Equals(vc); + + public bool Equals(VirtualChar other) + => Char == other.Char && + Span == other.Span; + + public override int GetHashCode() + { + unchecked + { + var hashCode = 244102310; + hashCode = hashCode * -1521134295 + Char.GetHashCode(); + hashCode = hashCode * -1521134295 + Span.GetHashCode(); + return hashCode; + } + } + + public static bool operator ==(VirtualChar char1, VirtualChar char2) + => char1.Equals(char2); + + public static bool operator !=(VirtualChar char1, VirtualChar char2) + => !(char1 == char2); + + public static implicit operator char(VirtualChar vc) => vc.Char; + } +} diff --git a/src/Workspaces/Core/Portable/VirtualChars/VirtualCharExtensions.cs b/src/Workspaces/Core/Portable/VirtualChars/VirtualCharExtensions.cs new file mode 100644 index 0000000000000000000000000000000000000000..51c9f8f64dc398d28cbd87ab490c9ba69882e520 --- /dev/null +++ b/src/Workspaces/Core/Portable/VirtualChars/VirtualCharExtensions.cs @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Collections.Immutable; +using Microsoft.CodeAnalysis.PooledObjects; + +namespace Microsoft.CodeAnalysis.VirtualChars +{ + internal static class VirtualCharExtensions + { + public static string CreateString(this ImmutableArray chars) + { + var builder = PooledStringBuilder.GetInstance(); + + foreach (var vc in chars) + { + builder.Builder.Append(vc.Char); + } + + return builder.ToStringAndFree(); + } + } +} diff --git a/src/Workspaces/VisualBasic/Portable/VirtualChars/VisualBasicVirtualCharService.vb b/src/Workspaces/VisualBasic/Portable/VirtualChars/VisualBasicVirtualCharService.vb new file mode 100644 index 0000000000000000000000000000000000000000..7e18308359cfbf8b6806d077adabf6bc292f83ce --- /dev/null +++ b/src/Workspaces/VisualBasic/Portable/VirtualChars/VisualBasicVirtualCharService.vb @@ -0,0 +1,20 @@ +' Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +Imports System.Collections.Immutable +Imports System.Composition +Imports Microsoft.CodeAnalysis.Host.Mef +Imports Microsoft.CodeAnalysis.VirtualChars + +Namespace Microsoft.CodeAnalysis.VisualBasic.VirtualChars + + Friend Class VisualBasicVirtualCharService + Inherits AbstractVirtualCharService + + Public Shared ReadOnly Instance As IVirtualCharService = New VisualBasicVirtualCharService() + + Protected Overrides Function TryConvertToVirtualCharsWorker(token As SyntaxToken) As ImmutableArray(Of VirtualChar) + Debug.Assert(Not token.ContainsDiagnostics) + Return TryConvertSimpleDoubleQuoteString(token, """") + End Function + End Class +End Namespace diff --git a/src/Workspaces/VisualBasicTest/VirtualChars/VisualBasicVirtualCharServiceTests.vb b/src/Workspaces/VisualBasicTest/VirtualChars/VisualBasicVirtualCharServiceTests.vb new file mode 100644 index 0000000000000000000000000000000000000000..3b86d2323d342b0c6067bf6f8664687dd3effa0d --- /dev/null +++ b/src/Workspaces/VisualBasicTest/VirtualChars/VisualBasicVirtualCharServiceTests.vb @@ -0,0 +1,61 @@ +' Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +Imports System.Collections.Immutable +Imports Microsoft.CodeAnalysis.VirtualChars +Imports Microsoft.CodeAnalysis.VisualBasic.VirtualChars +Imports Xunit + +Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests.VirtualChars + Public Class VisualBasicVirtualCharServiceTests + Private Const _statementPrefix As String = "dim v = " + + Private Function GetStringToken(text As String) As SyntaxToken + Dim statement = _statementPrefix + text + Dim parsedStatement = SyntaxFactory.ParseExecutableStatement(statement) + Dim token = parsedStatement.DescendantTokens().ToArray()(3) + Assert.True(token.Kind() = SyntaxKind.StringLiteralToken) + + Return token + End Function + + Private Sub Test(stringText As String, expected As String) + Dim token = GetStringToken(stringText) + Dim virtualChars = VisualBasicVirtualCharService.Instance.TryConvertToVirtualChars(token) + Dim actual = ConvertToString(virtualChars) + Assert.Equal(expected, actual) + End Sub + + Private Sub TestFailure(stringText As String) + Dim token = GetStringToken(stringText) + Dim virtualChars = VisualBasicVirtualCharService.Instance.TryConvertToVirtualChars(token) + Assert.True(virtualChars.IsDefault) + End Sub + + + Public Sub TestEmptyString() + Test("""""", "") + End Sub + + + Public Sub TestSimpleString() + Test("""a""", "['a',[1,2]]") + End Sub + + + Public Sub TestStringWithDoubleQuoteInIt() + Test("""a""""b""", "['a',[1,2]]['""',[2,4]]['b',[4,5]]") + End Sub + + Private Function ConvertToString(virtualChars As ImmutableArray(Of VirtualChar)) As String + Return String.Join("", virtualChars.Select(AddressOf ConvertToString)) + End Function + + Private Function ConvertToString(vc As VirtualChar) As String + Return $"[{ConvertToString(vc.Char)},[{vc.Span.Start - _statementPrefix.Length},{vc.Span.End - _statementPrefix.Length}]]" + End Function + + Private Function ConvertToString(c As Char) As String + Return "'" + c + "'" + End Function + End Class +End Namespace