提交 908ea747 编写于 作者: J Julien Couvreur 提交者: GitHub

Lexer should not lose characters when too many bad tokens encountered (#15919)

上级 24b42d0d
......@@ -415,6 +415,7 @@ private void ScanSyntaxToken(ref TokenInfo info)
char character;
char surrogateCharacter = SlidingTextWindow.InvalidCharacter;
bool isEscaped = false;
int startingPosition = TextWindow.Position;
// Start scanning the token
character = TextWindow.PeekChar();
......@@ -877,10 +878,9 @@ private void ScanSyntaxToken(ref TokenInfo info)
if (_badTokenCount++ > 200)
{
// If we get too many characters that we cannot make sense of, absorb the rest of the input.
int position = TextWindow.Position - 1;
int end = TextWindow.Text.Length;
int width = end - position;
info.Text = TextWindow.Text.ToString(new TextSpan(position, width));
int width = end - startingPosition;
info.Text = TextWindow.Text.ToString(new TextSpan(startingPosition, width));
TextWindow.Reset(end);
}
else
......
......@@ -4,6 +4,9 @@
using Microsoft.CodeAnalysis.Test.Utilities;
using Roslyn.Test.Utilities;
using Roslyn.Test.Utilities.Syntax;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Xunit;
namespace Microsoft.CodeAnalysis.CSharp.UnitTests.Parsing
......@@ -128,6 +131,42 @@ public int
);
}
[Fact]
[WorkItem(217398, "https://devdiv.visualstudio.com/DefaultCollection/DevDiv/_workitems?id=217398")]
public void LexerTooManyBadTokens()
{
var source = new StringBuilder();
for (int i = 0; i <= 200; i++)
{
source.Append(@"\u003C");
}
source.Append(@"\u003E\u003E\u003E\u003E");
var parsedTree = ParseWithRoundTripCheck(source.ToString());
IEnumerable<Diagnostic> actualErrors = parsedTree.GetDiagnostics();
Assert.Equal("202", actualErrors.Count().ToString());
Assert.Equal("(1,1201): error CS1056: Unexpected character '\\u003C'", actualErrors.ElementAt(200).ToString());
Assert.Equal("(1,1207): error CS1056: Unexpected character '\\u003E\\u003E\\u003E\\u003E'", actualErrors.ElementAt(201).ToString());
}
[Fact]
[WorkItem(217398, "https://devdiv.visualstudio.com/DefaultCollection/DevDiv/_workitems?id=217398")]
public void LexerTooManyBadTokens_LongUnicode()
{
var source = new StringBuilder();
for (int i = 0; i <= 200; i++)
{
source.Append(@"\U0000003C");
}
source.Append(@"\u003E\u003E\u003E\u003E");
var parsedTree = ParseWithRoundTripCheck(source.ToString());
IEnumerable<Diagnostic> actualErrors = parsedTree.GetDiagnostics();
Assert.Equal("202", actualErrors.Count().ToString());
Assert.Equal("(1,2001): error CS1056: Unexpected character '\\U0000003C'", actualErrors.ElementAt(200).ToString());
Assert.Equal("(1,2011): error CS1056: Unexpected character '\\u003E\\u003E\\u003E\\u003E'", actualErrors.ElementAt(201).ToString());
}
#region "Helpers"
public static void ParseAndValidate(string text, params DiagnosticDescription[] expectedErrors)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册