未验证 提交 a9969f2a 编写于 作者: A Andy Gocke 提交者: GitHub

Implement "character class" for analyzerconfig globs (#34799)

Implements the bracket syntax from Unix globs
(https://en.wikipedia.org/wiki/Glob_(programming)), commonly referred to
as "character classes" in regular expressions.
上级 e14e5f55
......@@ -339,13 +339,6 @@ public void EscapeDot()
Assert.False(matcher.IsMatch("/abc"));
}
[Fact]
public void BadEscapeMatch()
{
SectionNameMatcher? matcher = TryCreateSectionNameMatcher("abc\\d.cs");
Assert.Null(matcher);
}
[Fact]
public void EndBackslashMatch()
{
......@@ -407,7 +400,7 @@ public void LiteralQuestions()
public void LiteralBraces()
{
SectionNameMatcher matcher = TryCreateSectionNameMatcher("abc\\{\\}def").Value;
Assert.Equal("^.*/abc\\{\\}def$", matcher.Regex.ToString());
Assert.Equal(@"^.*/abc\{}def$", matcher.Regex.ToString());
Assert.True(matcher.IsMatch("/abc{}def"));
Assert.True(matcher.IsMatch("/subdir/abc{}def"));
......@@ -664,6 +657,194 @@ public void BadNumberRanges()
Assert.Null(matcherOpt);
}
[Fact]
public void CharacterClassSimple()
{
var matcher = TryCreateSectionNameMatcher("*.[cf]s").Value;
Assert.Equal(@"^.*/[^/]*\.[cf]s$", matcher.Regex.ToString());
Assert.True(matcher.IsMatch("/abc.cs"));
Assert.True(matcher.IsMatch("/abc.fs"));
Assert.False(matcher.IsMatch("/abc.vs"));
}
[Fact]
public void CharacterClassNegative()
{
var matcher = TryCreateSectionNameMatcher("*.[!cf]s").Value;
Assert.Equal(@"^.*/[^/]*\.[^cf]s$", matcher.Regex.ToString());
Assert.False(matcher.IsMatch("/abc.cs"));
Assert.False(matcher.IsMatch("/abc.fs"));
Assert.True(matcher.IsMatch("/abc.vs"));
Assert.True(matcher.IsMatch("/abc.xs"));
Assert.False(matcher.IsMatch("/abc.vxs"));
}
[Fact]
public void CharacterClassCaret()
{
var matcher = TryCreateSectionNameMatcher("*.[^cf]s").Value;
Assert.Equal(@"^.*/[^/]*\.[\^cf]s$", matcher.Regex.ToString());
Assert.True(matcher.IsMatch("/abc.cs"));
Assert.True(matcher.IsMatch("/abc.fs"));
Assert.True(matcher.IsMatch("/abc.^s"));
Assert.False(matcher.IsMatch("/abc.vs"));
Assert.False(matcher.IsMatch("/abc.xs"));
Assert.False(matcher.IsMatch("/abc.vxs"));
}
[Fact]
public void CharacterClassRange()
{
var matcher = TryCreateSectionNameMatcher("[0-9]x").Value;
Assert.Equal("^.*/[0-9]x$", matcher.Regex.ToString());
Assert.True(matcher.IsMatch("/0x"));
Assert.True(matcher.IsMatch("/1x"));
Assert.True(matcher.IsMatch("/9x"));
Assert.False(matcher.IsMatch("/yx"));
Assert.False(matcher.IsMatch("/00x"));
}
[Fact]
public void CharacterClassNegativeRange()
{
var matcher = TryCreateSectionNameMatcher("[!0-9]x").Value;
Assert.Equal("^.*/[^0-9]x$", matcher.Regex.ToString());
Assert.False(matcher.IsMatch("/0x"));
Assert.False(matcher.IsMatch("/1x"));
Assert.False(matcher.IsMatch("/9x"));
Assert.True(matcher.IsMatch("/yx"));
Assert.False(matcher.IsMatch("/00x"));
}
[Fact]
public void CharacterClassRangeAndChoice()
{
var matcher = TryCreateSectionNameMatcher("[ab0-9]x").Value;
Assert.Equal("^.*/[ab0-9]x$", matcher.Regex.ToString());
Assert.True(matcher.IsMatch("/ax"));
Assert.True(matcher.IsMatch("/bx"));
Assert.True(matcher.IsMatch("/0x"));
Assert.True(matcher.IsMatch("/1x"));
Assert.True(matcher.IsMatch("/9x"));
Assert.False(matcher.IsMatch("/yx"));
Assert.False(matcher.IsMatch("/0ax"));
}
[Fact]
public void CharacterClassOpenEnded()
{
var matcher = TryCreateSectionNameMatcher("[");
Assert.Null(matcher);
}
[Fact]
public void CharacterClassEscapedOpenEnded()
{
var matcher = TryCreateSectionNameMatcher(@"[\]");
Assert.Null(matcher);
}
[Fact]
public void CharacterClassEscapeAtEnd()
{
var matcher = TryCreateSectionNameMatcher(@"[\");
Assert.Null(matcher);
}
[Fact]
public void CharacterClassOpenBracketInside()
{
var matcher = TryCreateSectionNameMatcher(@"[[a]bc").Value;
Assert.True(matcher.IsMatch("/abc"));
Assert.True(matcher.IsMatch("/[bc"));
Assert.False(matcher.IsMatch("/ab"));
Assert.False(matcher.IsMatch("/[b"));
Assert.False(matcher.IsMatch("/bc"));
Assert.False(matcher.IsMatch("/ac"));
Assert.False(matcher.IsMatch("/[c"));
Assert.Equal(@"^.*/[\[a]bc$", matcher.Regex.ToString());
}
[Fact]
public void CharacterClassStartingDash()
{
var matcher = TryCreateSectionNameMatcher(@"[-ac]bd").Value;
Assert.True(matcher.IsMatch("/abd"));
Assert.True(matcher.IsMatch("/cbd"));
Assert.True(matcher.IsMatch("/-bd"));
Assert.False(matcher.IsMatch("/bbd"));
Assert.False(matcher.IsMatch("/-cd"));
Assert.False(matcher.IsMatch("/bcd"));
Assert.Equal(@"^.*/[-ac]bd$", matcher.Regex.ToString());
}
[Fact]
public void CharacterClassEndingDash()
{
var matcher = TryCreateSectionNameMatcher(@"[ac-]bd").Value;
Assert.True(matcher.IsMatch("/abd"));
Assert.True(matcher.IsMatch("/cbd"));
Assert.True(matcher.IsMatch("/-bd"));
Assert.False(matcher.IsMatch("/bbd"));
Assert.False(matcher.IsMatch("/-cd"));
Assert.False(matcher.IsMatch("/bcd"));
Assert.Equal(@"^.*/[ac-]bd$", matcher.Regex.ToString());
}
[Fact]
public void CharacterClassEndBracketAfter()
{
var matcher = TryCreateSectionNameMatcher(@"[ab]]cd").Value;
Assert.True(matcher.IsMatch("/a]cd"));
Assert.True(matcher.IsMatch("/b]cd"));
Assert.False(matcher.IsMatch("/acd"));
Assert.False(matcher.IsMatch("/bcd"));
Assert.False(matcher.IsMatch("/acd"));
Assert.Equal(@"^.*/[ab]]cd$", matcher.Regex.ToString());
}
[Fact]
public void CharacterClassEscapeBackslash()
{
var matcher = TryCreateSectionNameMatcher(@"[ab\\]cd").Value;
Assert.True(matcher.IsMatch("/acd"));
Assert.True(matcher.IsMatch("/bcd"));
Assert.True(matcher.IsMatch("/\\cd"));
Assert.False(matcher.IsMatch("/dcd"));
Assert.False(matcher.IsMatch("/\\\\cd"));
Assert.False(matcher.IsMatch("/cd"));
Assert.Equal(@"^.*/[ab\\]cd$", matcher.Regex.ToString());
}
[Fact]
public void EscapeOpenBracket()
{
var matcher = TryCreateSectionNameMatcher(@"ab\[cd").Value;
Assert.True(matcher.IsMatch("/ab[cd"));
Assert.False(matcher.IsMatch("/ab[[cd"));
Assert.False(matcher.IsMatch("/abc"));
Assert.False(matcher.IsMatch("/abd"));
Assert.Equal(@"^.*/ab\[cd$", matcher.Regex.ToString());
}
#endregion
#region Processing of dotnet_diagnostic rules
......
......@@ -183,25 +183,12 @@ public bool IsMatch(string s)
case TokenKind.Comma:
// The end of a choice section, or a failed parse
return parsingChoice;
case TokenKind.LiteralStar:
// Match a literal '*'
sb.Append("\\*");
break;
case TokenKind.LiteralQuestion:
sb.Append("\\?");
break;
case TokenKind.LiteralOpenBrace:
sb.Append("\\{");
break;
case TokenKind.LiteralCloseBrace:
sb.Append("\\}");
break;
case TokenKind.LiteralComma:
sb.Append(",");
break;
case TokenKind.Backslash:
// Literal backslash
sb.Append("\\\\");
case TokenKind.OpenBracket:
sb.Append('[');
if (!TryCompileCharacterClass(ref lexer, sb))
{
return false;
}
break;
default:
throw ExceptionUtilities.UnexpectedValue(tokenKind);
......@@ -211,6 +198,57 @@ public bool IsMatch(string s)
return !parsingChoice;
}
/// <summary>
/// Compile a globbing character class of the form [...]. Returns true if
/// the character class was succesfully compiled. False if there was a syntax
/// error. The starting character is expected to be directly after the '['.
/// </summary>
private static bool TryCompileCharacterClass(ref SectionNameLexer lexer, StringBuilder sb)
{
// [...] should match any of the characters in the brackets, with special
// behavior for four characters: '!' immediately after the opening bracket
// implies the negation of the character class, '-' implies matching
// between the locale-dependent range of the previous and next characters,
// '\' escapes the following character, and ']' ends the range
if (!lexer.IsDone && lexer.CurrentCharacter == '!')
{
sb.Append('^');
lexer.Position++;
}
while (!lexer.IsDone)
{
var currentChar = lexer.EatCurrentCharacter();
switch (currentChar)
{
case '-':
// '-' means the same thing in regex as it does in the glob, so
// put it in verbatim
sb.Append(currentChar);
break;
case '\\':
// Escape the next char
if (lexer.IsDone)
{
return false;
}
sb.Append('\\');
sb.Append(lexer.EatCurrentCharacter());
break;
case ']':
sb.Append(currentChar);
return true;
default:
sb.Append(Regex.Escape(currentChar.ToString()));
break;
}
}
// Stream ended without a closing bracket
return false;
}
/// <summary>
/// Parses choice defined by the following grammar:
/// <![CDATA[
......@@ -356,45 +394,20 @@ public TokenKind Lex()
Position++;
return TokenKind.CloseCurly;
case '[':
Position++;
return TokenKind.OpenBracket;
case '\\':
{
// Backslash escapes the next character
Position++;
if (Position >= _sectionName.Length)
if (IsDone)
{
return TokenKind.BadToken;
}
// Check for all of the possible escapes
switch (_sectionName[Position++])
{
case '\\':
// "\\" -> "\"
return TokenKind.Backslash;
case '*':
// "\*" -> "\*"
return TokenKind.LiteralStar;
case '?':
// "\?" -> "\?"
return TokenKind.LiteralQuestion;
case '{':
// "\{" -> "{"
return TokenKind.LiteralOpenBrace;
case ',':
// "\," -> ","
return TokenKind.LiteralComma;
case '}':
// "\}" -> "}"
return TokenKind.LiteralCloseBrace;
default:
return TokenKind.BadToken;
}
return TokenKind.SimpleCharacter;
}
default:
......@@ -477,12 +490,7 @@ private enum TokenKind
CloseCurly,
Comma,
DoubleDot,
Backslash,
LiteralStar,
LiteralQuestion,
LiteralOpenBrace,
LiteralCloseBrace,
LiteralComma
OpenBracket,
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册