提交 0701f105 编写于 作者: C CyrusNajmabadi 提交者: GitHub

Merge pull request #18138 from CyrusNajmabadi/camelCaseMatching

Support camel cased matching, even if the pattern was entirely lowercase.
......@@ -715,13 +715,103 @@ public void MatchAllLowerPattern3()
[Fact]
public void MatchAllLowerPattern4()
{
Assert.Null(TryMatchSingleWordPattern("AbcdefghijEfgHij", "efghij"));
Assert.NotNull(TryMatchSingleWordPattern("Abcdefghij[|EfgHij|]", "efghij"));
}
[Fact]
public void MatchAllLowerPattern5()
{
Assert.Null(TryMatchSingleWordPattern("Abcdefghijefghij", "efghij"));
}
[Fact]
public void MatchAllLowerCamelCasePattern1()
{
var result =TryMatchSingleWordPattern("[|Co|]de[|Fi|]x[|Pro|]vider", "cofipro");
Assert.NotNull(result);
Assert.Equal(PatternMatcher.CamelCaseMaxWeight, result.Value.CamelCaseWeight);
}
[Fact]
public void MatchAllLowerCamelCasePattern2()
{
Assert.NotNull(TryMatchSingleWordPattern("[|C|]lear[|Ofi|]lac[|Pro|]fessional", "cofipro"));
}
[Fact]
public void MatchAllLowerCamelCasePattern3()
{
Assert.NotNull(TryMatchSingleWordPattern("[|C|]ore[|Ofi|]lac[|Pro|]fessional", "cofipro"));
}
[Fact]
public void MatchAllLowerCamelCasePattern4()
{
var result = TryMatchSingleWordPattern("[|Co|]deFix[|Pro|]vider", "copro");
Assert.NotNull(result);
Assert.Equal(PatternMatcher.CamelCaseMatchesFromStartBonus, result.Value.CamelCaseWeight);
}
[Fact]
public void MatchAllLowerCamelCasePattern5()
{
var result = TryMatchSingleWordPattern("Code[|Fi|]x[|Pro|]vider", "fipro");
Assert.NotNull(result);
Assert.Equal(PatternMatcher.CamelCaseContiguousBonus, result.Value.CamelCaseWeight);
}
[Fact]
public void MatchAllLowerCamelCasePattern6()
{
var result = TryMatchSingleWordPattern("Code[|Fi|]xObject[|Pro|]vider", "fipro");
Assert.NotNull(result);
Assert.Equal(0, result.Value.CamelCaseWeight);
}
[Fact]
public void MatchAllLowerCamelCasePattern7()
{
var result = TryMatchSingleWordPattern("[|co|]deFix[|Pro|]vider", "copro");
Assert.NotNull(result);
Assert.Equal(PatternMatcher.CamelCaseMatchesFromStartBonus, result.Value.CamelCaseWeight);
}
[Fact]
public void MatchAllLowerCamelCasePattern8()
{
var result = TryMatchSingleWordPattern("_[|co|]deFix[|Pro|]vider", "copro");
Assert.NotNull(result);
Assert.Equal(0, result.Value.CamelCaseWeight);
}
[Fact]
public void MatchAllLowerCamelCasePattern9()
{
var result = TryMatchSingleWordPattern("[|Co|]deFix_[|Pro|]vider", "copro");
Assert.NotNull(result);
Assert.Equal(PatternMatcher.CamelCaseMatchesFromStartBonus, result.Value.CamelCaseWeight);
}
[Fact]
public void MatchAllLowerCamelCasePattern10()
{
var result = TryMatchSingleWordPattern("[|CO|]DE_FIX_[|PRO|]VIDER", "copro");
Assert.NotNull(result);
Assert.Equal(PatternMatcher.CamelCaseMatchesFromStartBonus, result.Value.CamelCaseWeight);
}
[Fact]
public void DoNotMatchAllLowerCamelCasePatternReordered()
{
// We could consider supporting this in the future.
var result = TryMatchSingleWordPattern("CodeFixObjectProvider", "ficopro");
Assert.Null(result);
}
private static IList<string> PartListToSubstrings(string identifier, StringBreaks parts)
{
List<string> result = new List<string>();
for (int i = 0; i < parts.Count; i++)
var result = new List<string>();
for (var i = 0; i < parts.Count; i++)
{
var span = parts[i];
result.Add(identifier.Substring(span.Start, span.Length));
......@@ -731,14 +821,10 @@ private static IList<string> PartListToSubstrings(string identifier, StringBreak
}
private static IList<string> BreakIntoCharacterParts(string identifier)
{
return PartListToSubstrings(identifier, StringBreaker.BreakIntoCharacterParts(identifier));
}
=> PartListToSubstrings(identifier, StringBreaker.BreakIntoCharacterParts(identifier));
private static IList<string> BreakIntoWordParts(string identifier)
{
return PartListToSubstrings(identifier, StringBreaker.BreakIntoWordParts(identifier));
}
=> PartListToSubstrings(identifier, StringBreaker.BreakIntoWordParts(identifier));
private static PatternMatch? TryMatchSingleWordPattern(string candidate, string pattern)
{
......
......@@ -157,7 +157,7 @@ End Sub
this.SendKeys(' ');
Assert.Equal(true, Editor.IsCompletionActive());
this.SendKeys("foo");
this.SendKeys("fooo");
Assert.Equal(false, Editor.IsCompletionActive());
this.SendKeys(' ');
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Microsoft.CodeAnalysis.Shared.Utilities;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.PatternMatching
{
internal sealed partial class PatternMatcher : IDisposable
{
/// <summary>
/// Encapsulated matches responsible for mathcing an all lowercase pattern against
/// a candidate using CamelCase matching. i.e. this code is responsible for finding the
/// match between "cofipro" and "CodeFixProvider".
/// </summary>
private struct AllLowerCamelCaseMatcher
{
private readonly string _candidate;
private readonly bool _includeMatchedSpans;
private readonly StringBreaks _candidateHumps;
private readonly TextChunk _patternChunk;
private readonly string _patternText;
public AllLowerCamelCaseMatcher(string candidate, bool includeMatchedSpans, StringBreaks candidateHumps, TextChunk patternChunk)
{
_candidate = candidate;
_includeMatchedSpans = includeMatchedSpans;
_candidateHumps = candidateHumps;
_patternChunk = patternChunk;
_patternText = _patternChunk.Text;
}
/// <summary>
/// Returns null if no match was found, 1 if a contiguous match was found, 2 if a
/// match as found that starts at the beginning of the candidate, and 3 if a continguous
/// match was found that starts at the beginning of the candidate.
/// </summary>
public int? TryMatch(out List<TextSpan> matchedSpans)
{
// We have something like cofipro and we want to match CodeFixProvider.
//
// Note that this is incredibly ambiguous. We'd also want this to match
// CorporateOfficePartsRoom So, for example, if we were to consume the "co"
// as matching "Corporate", then "f" wouldn't match any camel hump. So we
// basically have to branch out and try all options at every character
// in the pattern chunk.
var patternIndex = 0;
var candidateHumpIndex = 0;
var (bestWeight, localMatchedSpans) = TryMatch(
patternIndex, candidateHumpIndex, contiguous: null);
matchedSpans = localMatchedSpans;
return bestWeight;
}
private (int? bestWeight, List<TextSpan> matchedSpans) TryMatch(
int patternIndex, int candidateHumpIndex, bool? contiguous)
{
if (patternIndex == _patternText.Length)
{
// We hit the end. So we were able to match against this candidate.
return (bestWeight: contiguous == false ? 0 : CamelCaseContiguousBonus,
matchedSpans: _includeMatchedSpans ? new List<TextSpan>() : null);
}
var bestWeight = default(int?);
var bestMatchedSpans = default(List<TextSpan>);
// Look for a hump in the candidate that matches the current letter we're on.
var patternCharacter = _patternText[patternIndex];
for (var humpIndex = candidateHumpIndex; humpIndex < _candidateHumps.Count; humpIndex++)
{
// If we've been contiguous, but we jumped past a hump, then we're no longer contiguous.
if (contiguous.HasValue && contiguous.Value)
{
contiguous = humpIndex == candidateHumpIndex;
}
var candidateHump = _candidateHumps[humpIndex];
if (char.ToLower(_candidate[candidateHump.Start]) == patternCharacter)
{
// Found a hump in the candidate string that matches the current pattern
// character we're on. i.e. we matched the c in cofipro against the C in
// CodeFixProvider.
//
// Now, for each subsequent character, we need to both try to consume it
// as part of the current hump, or see if it should match the next hump.
//
// Note, if the candidate is something like CodeFixProvider and our pattern
// is cofipro, and we've matched the 'f' against the 'F', then the max of
// the pattern we'll want to consume is "fip" against "Fix". We don't want
// consume parts of the pattern once we reach the next hump.
// We matched something. If this was our first match, consider ourselves
// contiguous.
if (contiguous == null)
{
contiguous = true;
}
var (weight, matchedSpans) = TryConsumePatternOrMatchNextHump(
patternIndex, humpIndex, contiguous.Value);
if (weight == null)
{
// Even though we matched this current candidate hump we failed to match
// the remainder of the pattern. Continue to the next candidate hump
// to see if our pattern character will match it and potentially succed.
continue;
}
Debug.Assert(weight >= 0);
if (weight == CamelCaseMaxWeight)
{
// We found a path that allowed us to match everything contiguously
// from the beginning. This is the best match possible. So we can
// just stop now and return this result.
return (weight, matchedSpans);
}
// This is a decent match. But something else could beat it, store
// it if it's the best match we have so far, but keep searching.
if (bestWeight == null || weight > bestWeight)
{
bestWeight = weight;
bestMatchedSpans = matchedSpans;
}
}
}
return (bestWeight, bestMatchedSpans);
}
private (int? bestWeight, List<TextSpan> matchedSpans) TryConsumePatternOrMatchNextHump(
int patternIndex, int humpIndex, bool contiguous)
{
var bestWeight = default(int?);
var bestMatchedSpans = default(List<TextSpan>);
var candidateHump = _candidateHumps[humpIndex];
var maxPatternHumpLength = _patternText.Length - patternIndex;
var maxCandidateHumpLength = candidateHump.Length;
var maxHumpMatchLength = Math.Min(maxPatternHumpLength, maxCandidateHumpLength);
for (var possibleHumpMatchLength = 1; possibleHumpMatchLength <= maxHumpMatchLength; possibleHumpMatchLength++)
{
if (!LowercaseSubstringsMatch(
_candidate, candidateHump.Start,
_patternText, patternIndex, possibleHumpMatchLength))
{
// Stop trying to consume once the pattern contents no longer matches
// against the current candidate hump.
break;
}
// This is the span of the hump of the candidate we matched.
var candidateMatchSpan = new TextSpan(candidateHump.Start, possibleHumpMatchLength);
// The pattern substring 'f' has matched against 'F', or 'fi' has matched
// against 'Fi'. recurse and let the rest of the pattern match the remainder
// of the candidate.
var (weight, matchedSpans) = TryMatch(
patternIndex + possibleHumpMatchLength, humpIndex + 1, contiguous);
if (weight == null)
{
// Didn't match when we recursed. Try to consume more and see if that gets us
// somewhere.
continue;
}
Debug.Assert(weight <= CamelCaseContiguousBonus);
if (humpIndex == 0)
{
weight += CamelCaseMatchesFromStartBonus;
}
if (weight == CamelCaseMaxWeight)
{
// We found a path that allowed us to match everything contiguously
// from the beginning. This is the best match possible. So we can
// just stop now and return thie result.
matchedSpans?.Insert(0, candidateMatchSpan);
return (weight, matchedSpans);
}
// This is a decent match. But something else could beat it, store
// it if it's the best match we have so far, but keep searching.
if (bestWeight == null || weight > bestWeight)
{
matchedSpans?.Insert(0, candidateMatchSpan);
bestWeight = weight;
bestMatchedSpans = matchedSpans;
}
}
return (bestWeight, bestMatchedSpans);
}
private bool LowercaseSubstringsMatch(
string s1, int start1, string s2, int start2, int length)
{
for (var i = 0; i < length; i++)
{
if (char.ToLower(s1[start1 + i]) != char.ToLower(s2[start2 + i]))
{
return false;
}
}
return true;
}
}
}
}
\ No newline at end of file
......@@ -21,6 +21,10 @@ namespace Microsoft.CodeAnalysis.PatternMatching
/// </summary>
internal sealed partial class PatternMatcher : IDisposable
{
public const int CamelCaseContiguousBonus = 1;
public const int CamelCaseMatchesFromStartBonus = 2;
public const int CamelCaseMaxWeight = CamelCaseContiguousBonus + CamelCaseMatchesFromStartBonus;
private static readonly char[] s_dotCharacterArray = { '.' };
private readonly object _gate = new object();
......@@ -42,8 +46,8 @@ internal sealed partial class PatternMatcher : IDisposable
public PatternMatcher(
string pattern,
bool verbatimIdentifierPrefixIsWordCharacter = false,
bool allowFuzzyMatching = false)
: this(pattern, CultureInfo.CurrentCulture, verbatimIdentifierPrefixIsWordCharacter, allowFuzzyMatching)
bool allowFuzzyMatching = false)
: this(pattern, CultureInfo.CurrentCulture, verbatimIdentifierPrefixIsWordCharacter, allowFuzzyMatching)
{
}
......@@ -356,39 +360,25 @@ private static bool ContainsUpperCaseLetter(string pattern)
}
}
if (!isLowercase)
var match = TryCamelCaseMatch(
candidate, includeMatchSpans, patternChunk,
punctuationStripped, isLowercase);
if (match.HasValue)
{
// e) If the part was not entirely lowercase, then attempt a camel cased match as well.
if (patternChunk.CharacterSpans.Count > 0)
{
var candidateParts = GetWordSpans(candidate);
var camelCaseWeight = TryCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.None, out var matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: true, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
camelCaseWeight = TryCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.IgnoreCase, out matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: false, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
}
return match.Value;
}
if (isLowercase)
{
// f) Is the pattern a substring of the candidate starting on one of the candidate's word boundaries?
// g) The word is all lower case. Is it a case insensitive substring of the candidate
// starting on a part boundary of the candidate?
// We could check every character boundary start of the candidate for the pattern. However, that's
// an m * n operation in the worst case. Instead, find the first instance of the pattern
// substring, and see if it starts on a capital letter. It seems unlikely that the user will try to
// filter the list based on a substring that starts on a capital letter and also with a lowercase one.
// (Pattern: fogbar, Candidate: quuxfogbarFogBar).
// We could check every character boundary start of the candidate for the pattern.
// However, that's an m * n operation in the worst case. Instead, find the first
// instance of the pattern substring, and see if it starts on a capital letter.
// It seems unlikely that the user will try to filter the list based on a substring
// that starts on a capital letter and also with a lowercase one. (Pattern: fogbar,
// Candidate: quuxfogbarFogBar).
if (patternChunk.Text.Length < candidate.Length)
{
if (caseInsensitiveIndex != -1 && char.IsUpper(candidate[caseInsensitiveIndex]))
......@@ -414,7 +404,7 @@ private static bool ContainsUpperCaseLetter(string pattern)
private ImmutableArray<TextSpan> GetMatchedSpans(bool includeMatchSpans, List<TextSpan> matchedSpans)
{
return includeMatchSpans
return includeMatchSpans
? new NormalizedTextSpanCollection(matchedSpans).ToImmutableArray()
: ImmutableArray<TextSpan>.Empty;
}
......@@ -446,7 +436,7 @@ private static bool ContainsSpaceOrAsterisk(string text)
return ImmutableArray<PatternMatch>.Empty;
}
var singleMatch = MatchPatternSegment(candidate, includeMatchSpans, patternSegment,
var singleMatch = MatchPatternSegment(candidate, includeMatchSpans, patternSegment,
wantAllMatches: true, fuzzyMatch: fuzzyMatch, allMatches: out var matches);
if (singleMatch.HasValue)
{
......@@ -496,7 +486,7 @@ private static bool ContainsSpaceOrAsterisk(string text)
// multi-word segment.
if (!ContainsSpaceOrAsterisk(segment.TotalTextChunk.Text))
{
var match = MatchPatternChunk(candidate, includeMatchSpans,
var match = MatchPatternChunk(candidate, includeMatchSpans,
segment.TotalTextChunk, punctuationStripped: false, fuzzyMatch: fuzzyMatch);
if (match != null)
{
......@@ -533,10 +523,13 @@ private static bool ContainsSpaceOrAsterisk(string text)
// candidate in a case *sensitive* manner. If so, return that there was a substring
// match.
//
// e) If the word was not entirely lowercase, then attempt a camel cased match as
// well.
// e) If the word was entirely lowercase, then attempt a special lower cased camel cased
// match. i.e. cofipro would match CodeFixProvider.
//
// f) If the word was not entirely lowercase, then attempt a normal camel cased match.
// i.e. CoFiPro would match CodeFixProvider, but CofiPro would not.
//
// f) The word is all lower case. Is it a case insensitive substring of the candidate starting
// g) The word is all lower case. Is it a case insensitive substring of the candidate starting
// on a part boundary of the candidate?
//
// Only if all words have some sort of match is the pattern considered matched.
......@@ -609,15 +602,70 @@ private bool PartStartsWith(string candidate, TextSpan candidatePart, string pat
/// <param name="compareOptions">Options for doing the comparison (case sensitive or not)</param>
/// <returns>True if the span identified by <paramref name="candidatePart"/> within <paramref name="candidate"/> starts with <paramref name="pattern"/></returns>
private bool PartStartsWith(string candidate, TextSpan candidatePart, string pattern, CompareOptions compareOptions)
=> PartStartsWith(candidate, candidatePart, pattern, new TextSpan(0, pattern.Length), compareOptions);
private PatternMatch? TryCamelCaseMatch(
string candidate, bool includeMatchSpans, TextChunk patternChunk,
bool punctuationStripped, bool isLowercase)
{
return PartStartsWith(candidate, candidatePart, pattern, new TextSpan(0, pattern.Length), compareOptions);
if (isLowercase)
{
// e) If the word was entirely lowercase, then attempt a special lower cased camel cased
// match. i.e. cofipro would match CodeFixProvider.
var candidateParts = GetWordSpans(candidate);
var camelCaseWeight = TryAllLowerCamelCaseMatch(
candidate, includeMatchSpans, candidateParts, patternChunk, out var matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: false, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
}
else
{
// f) If the word was not entirely lowercase, then attempt a normal camel cased match.
// i.e. CoFiPro would match CodeFixProvider, but CofiPro would not.
if (patternChunk.CharacterSpans.Count > 0)
{
var candidateParts = GetWordSpans(candidate);
var camelCaseWeight = TryUpperCaseCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.None, out var matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: true, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
camelCaseWeight = TryUpperCaseCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.IgnoreCase, out matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: false, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
}
}
return null;
}
private int? TryAllLowerCamelCaseMatch(
string candidate,
bool includeMatchedSpans,
StringBreaks candidateParts,
TextChunk patternChunk,
out List<TextSpan> matchedSpans)
{
var matcher = new AllLowerCamelCaseMatcher(candidate, includeMatchedSpans, candidateParts, patternChunk);
return matcher.TryMatch(out matchedSpans);
}
private int? TryCamelCaseMatch(
string candidate,
private int? TryUpperCaseCamelCaseMatch(
string candidate,
bool includeMatchedSpans,
StringBreaks candidateParts,
TextChunk patternChunk,
StringBreaks candidateParts,
TextChunk patternChunk,
CompareOptions compareOption,
out List<TextSpan> matchedSpans)
{
......@@ -643,18 +691,18 @@ private bool PartStartsWith(string candidate, TextSpan candidatePart, string pat
Contract.Requires(contiguous.HasValue);
// We did match! We shall assign a weight to this
int weight = 0;
var weight = 0;
// Was this contiguous?
if (contiguous.Value)
{
weight += 1;
weight += CamelCaseContiguousBonus;
}
// Did we start at the beginning of the candidate?
if (firstMatch.Value == 0)
{
weight += 2;
weight += CamelCaseMatchesFromStartBonus;
}
return weight;
......
......@@ -389,6 +389,7 @@
<Compile Include="FindSymbols\SyntaxTree\SyntaxTreeIndex.IdentifierInfo.cs" />
<Compile Include="NamingStyles\Serialization\NamingStylePreferencesExtensions.cs" />
<Compile Include="PatternMatching\PatternMatch.cs" />
<Compile Include="PatternMatching\PatternMatcher.AllLowerCamelCaseMatcher.cs" />
<Compile Include="PatternMatching\PatternMatcher.cs" />
<Compile Include="PatternMatching\PatternMatcher.PatternSegment.cs" />
<Compile Include="PatternMatching\PatternMatcher.TextChunk.cs" />
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册