提交 a6a581e0 编写于 作者: C CyrusNajmabadi

Add support for all lower-case camel case searches.

上级 19090ddf
......@@ -718,10 +718,16 @@ public void MatchAllLowerPattern4()
Assert.Null(TryMatchSingleWordPattern("AbcdefghijEfgHij", "efghij"));
}
[Fact]
public void MatchAllLowerCamelCasePattern1()
{
Assert.Null(TryMatchSingleWordPattern("[|Co|]de[|Fi|]x[|Pro|]vider", "cofipro"));
}
private static IList<string> PartListToSubstrings(string identifier, StringBreaks parts)
{
List<string> result = new List<string>();
for (int i = 0; i < parts.Count; i++)
var result = new List<string>();
for (var i = 0; i < parts.Count; i++)
{
var span = parts[i];
result.Add(identifier.Substring(span.Start, span.Length));
......@@ -731,14 +737,10 @@ private static IList<string> PartListToSubstrings(string identifier, StringBreak
}
private static IList<string> BreakIntoCharacterParts(string identifier)
{
return PartListToSubstrings(identifier, StringBreaker.BreakIntoCharacterParts(identifier));
}
=> PartListToSubstrings(identifier, StringBreaker.BreakIntoCharacterParts(identifier));
private static IList<string> BreakIntoWordParts(string identifier)
{
return PartListToSubstrings(identifier, StringBreaker.BreakIntoWordParts(identifier));
}
=> PartListToSubstrings(identifier, StringBreaker.BreakIntoWordParts(identifier));
private static PatternMatch? TryMatchSingleWordPattern(string candidate, string pattern)
{
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Microsoft.CodeAnalysis.Shared.Utilities;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.PatternMatching
{
internal sealed partial class PatternMatcher : IDisposable
{
private struct AllLowerCamelCaseMatcher
{
private readonly string _candidate;
private readonly bool _includeMatchedSpans;
private readonly StringBreaks _candidateParts;
private readonly TextChunk _patternChunk;
private readonly string _patternText;
public AllLowerCamelCaseMatcher(string candidate, bool includeMatchedSpans, StringBreaks candidateParts, TextChunk patternChunk)
{
_candidate = candidate;
_includeMatchedSpans = includeMatchedSpans;
_candidateParts = candidateParts;
_patternChunk = patternChunk;
_patternText = _patternChunk.Text;
}
/// <summary>
/// Returns null if no match was found, 1 if a contiguous match was found, 2 if a
/// match as found that starts at the beginning of the candidate, and 3 if a continguous
/// match was found that starts at the beginning of the candidate.
/// </summary>
public int? TryMatch(out List<TextSpan> matchedSpans)
{
// We have something like cofipro and we want to match CodeFixProvider.
//
// Note that this is incredibly ambiguous. We'd also want this to match
// CorporateOfficePartsRoom So, for example, if we were to consume the "co"
// as matching "Corporate", then "f" wouldn't match any camel hump. So we
// basically have to branch out and try all options at every character
// in the pattern chunk.
var patternIndex = 0;
var candidatePartIndex = 0;
var (bestWeight, localMatchedSpans) = TryMatch(
patternIndex, candidatePartIndex,
contiguous: true, firstMatch: true);
matchedSpans = localMatchedSpans;
return bestWeight;
}
private (int? bestWeight, List<TextSpan> matchedSpans) TryMatch(
int patternIndex, int candidatePartIndex,
bool contiguous, bool firstMatch)
{
if (patternIndex == _patternText.Length)
{
// We hit the end. So we were able to match against this candidate.
return (bestWeight: contiguous ? CamelCaseContiguousBonus : 0,
matchedSpans: _includeMatchedSpans ? new List<TextSpan>() : null);
}
var bestWeight = default(int?);
var bestMatchedSpans = default(List<TextSpan>);
// Look for a part of the candidate that matches the current letter we're on.
var patternCharacter = _patternText[patternIndex];
for (var partIndex = candidatePartIndex; partIndex < _candidateParts.Count; partIndex++)
{
contiguous = contiguous && partIndex == candidatePartIndex;
var candidatePart = _candidateParts[candidatePartIndex];
if (char.ToLower(_candidate[candidatePart.Start]) == patternCharacter)
{
// Found a part of the candidate string that matches the current pattern
// character we're on. i.e. we matched the c in cofipro against the C in
// CodeFixProvider.
//
// Now, for each subsequent character, we need to both try to consume it
// as part of the candidate text, or see if it should match the next hump
// in the candidate.
//
// Note, if the candidate is something like CodeFixProvider and our pattern
// is cofipro, and we've matched the 'f' against the 'F', then the max of
// the pattern we'll want to consume is "fip" against "Fix". We don't want
// consume parts of the pattern once we reach the next hump.
var (weight, matchedSpans) = TryConsumePatternOrMatchNextPart(
patternIndex, partIndex, firstMatch, contiguous);
if (weight == null)
{
// Even though we matched this current candidate part we failed to match
// the remainder of the pattern. Continue to the next candidate hump
// to see if our pattern character will match it and potentially succed.
continue;
}
Debug.Assert(weight >= 0);
if (weight == CamelCaseMaxWeight)
{
// We found a path that allowed us to match everything contiguously
// from the beginning. This is the best match possible. So we can
// just stop now and return thie result.
return (weight, matchedSpans);
}
// This is a decent match. But something else could beat it, store
// it if it's the best match we have so far, but keep searching.
if (bestWeight == null || weight > bestWeight)
{
bestWeight = weight;
bestMatchedSpans = matchedSpans;
}
}
}
return (bestWeight, bestMatchedSpans);
}
private (int? bestWeight, List<TextSpan> matchedSpans) TryConsumePatternOrMatchNextPart(
int patternIndex, int partIndex,
bool firstMatch, bool contiguous)
{
var bestWeight = default(int?);
var bestMatchedSpans = default(List<TextSpan>);
var candidatePart = _candidateParts[partIndex];
var maxPatternHumpLength = _patternText.Length - patternIndex;
var maxCandidateHumpLength = candidatePart.Length;
var maxHumpMatchLength = Math.Min(maxPatternHumpLength, maxCandidateHumpLength);
for (var possibleHumpMatchLength = 1; possibleHumpMatchLength < maxHumpMatchLength; possibleHumpMatchLength++)
{
if (!LowercaseSubstringsMatch(
_candidate, candidatePart.Start,
_patternText, patternIndex, maxHumpMatchLength))
{
// Stop trying to consume once the pattern contents no longer matches
// against the current candidate hump.
break;
}
// This is the span of the part of the candidate we matched.
var candidateMatchSpan = new TextSpan(candidatePart.Start, possibleHumpMatchLength);
// The pattern substring 'f' has matched against 'F', or 'fi' has matched
// against 'Fi'. recurse and let the rest of the pattern match the remainder
// of the candidate.
var (weight, matchedSpans) = TryMatch(
patternIndex + possibleHumpMatchLength, partIndex + 1,
contiguous, firstMatch: false);
if (weight < 0)
{
// Didn't match when we recursed. Try to consume more and see if that gets us
// somewhere.
continue;
}
Debug.Assert(weight <= CamelCaseContiguousBonus);
if (firstMatch)
{
weight += CamelCaseMatchesFromStartBonus;
}
if (weight == CamelCaseMaxWeight)
{
// We found a path that allowed us to match everything contiguously
// from the beginning. This is the best match possible. So we can
// just stop now and return thie result.
matchedSpans?.Insert(0, candidateMatchSpan);
return (weight, matchedSpans);
}
// This is a decent match. But something else could beat it, store
// it if it's the best match we have so far, but keep searching.
if (bestWeight == null || weight > bestWeight)
{
matchedSpans?.Insert(0, candidateMatchSpan);
bestWeight = weight;
bestMatchedSpans = matchedSpans;
}
}
return (bestWeight, bestMatchedSpans);
}
private bool LowercaseSubstringsMatch(
string s1, int start1, string s2, int start2, int length)
{
for (var i = 0; i < length; i++)
{
if (char.ToLower(s1[start1 + i]) != char.ToLower(s2[start2 + i]))
{
return false;
}
}
return true;
}
}
}
}
\ No newline at end of file
......@@ -21,6 +21,10 @@ namespace Microsoft.CodeAnalysis.PatternMatching
/// </summary>
internal sealed partial class PatternMatcher : IDisposable
{
private const int CamelCaseContiguousBonus = 1;
private const int CamelCaseMatchesFromStartBonus = 2;
private const int CamelCaseMaxWeight = CamelCaseContiguousBonus + CamelCaseMatchesFromStartBonus;
private static readonly char[] s_dotCharacterArray = { '.' };
private readonly object _gate = new object();
......@@ -356,39 +360,25 @@ private static bool ContainsUpperCaseLetter(string pattern)
}
}
if (!isLowercase)
{
// e) If the part was not entirely lowercase, then attempt a camel cased match as well.
if (patternChunk.CharacterSpans.Count > 0)
{
var candidateParts = GetWordSpans(candidate);
var camelCaseWeight = TryCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.None, out var matchedSpans);
if (camelCaseWeight.HasValue)
var match = TryCamelCaseMatch(
candidate, includeMatchSpans, patternChunk,
punctuationStripped, isLowercase);
if (match.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: true, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
camelCaseWeight = TryCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.IgnoreCase, out matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: false, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
}
return match.Value;
}
if (isLowercase)
{
// f) Is the pattern a substring of the candidate starting on one of the candidate's word boundaries?
// g) The word is all lower case. Is it a case insensitive substring of the candidate
// starting on a part boundary of the candidate?
// We could check every character boundary start of the candidate for the pattern. However, that's
// an m * n operation in the worst case. Instead, find the first instance of the pattern
// substring, and see if it starts on a capital letter. It seems unlikely that the user will try to
// filter the list based on a substring that starts on a capital letter and also with a lowercase one.
// (Pattern: fogbar, Candidate: quuxfogbarFogBar).
// We could check every character boundary start of the candidate for the pattern.
// However, that's an m * n operation in the worst case. Instead, find the first
// instance of the pattern substring, and see if it starts on a capital letter.
// It seems unlikely that the user will try to filter the list based on a substring
// that starts on a capital letter and also with a lowercase one. (Pattern: fogbar,
// Candidate: quuxfogbarFogBar).
if (patternChunk.Text.Length < candidate.Length)
{
if (caseInsensitiveIndex != -1 && char.IsUpper(candidate[caseInsensitiveIndex]))
......@@ -533,10 +523,13 @@ private static bool ContainsSpaceOrAsterisk(string text)
// candidate in a case *sensitive* manner. If so, return that there was a substring
// match.
//
// e) If the word was not entirely lowercase, then attempt a camel cased match as
// well.
// e) If the word was entirely lowercase, then attempt a special lower cased camel cased
// match. i.e. cofipro would match CodeFixProvider.
//
// f) If the word was not entirely lowercase, then attempt a normal camel cased match.
// i.e. CoFiPro would match CodeFixProvider, but CofiPro would not.
//
// f) The word is all lower case. Is it a case insensitive substring of the candidate starting
// g) The word is all lower case. Is it a case insensitive substring of the candidate starting
// on a part boundary of the candidate?
//
// Only if all words have some sort of match is the pattern considered matched.
......@@ -609,11 +602,66 @@ private bool PartStartsWith(string candidate, TextSpan candidatePart, string pat
/// <param name="compareOptions">Options for doing the comparison (case sensitive or not)</param>
/// <returns>True if the span identified by <paramref name="candidatePart"/> within <paramref name="candidate"/> starts with <paramref name="pattern"/></returns>
private bool PartStartsWith(string candidate, TextSpan candidatePart, string pattern, CompareOptions compareOptions)
=> PartStartsWith(candidate, candidatePart, pattern, new TextSpan(0, pattern.Length), compareOptions);
private PatternMatch? TryCamelCaseMatch(
string candidate, bool includeMatchSpans, TextChunk patternChunk,
bool punctuationStripped, bool isLowercase)
{
if (isLowercase)
{
// e) If the word was entirely lowercase, then attempt a special lower cased camel cased
// match. i.e. cofipro would match CodeFixProvider.
var candidateParts = GetWordSpans(candidate);
var camelCaseWeight = TryAllLowerCamelCaseMatch(
candidate, includeMatchSpans, candidateParts, patternChunk, out var matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: false, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
}
else
{
// f) If the word was not entirely lowercase, then attempt a normal camel cased match.
// i.e. CoFiPro would match CodeFixProvider, but CofiPro would not.
if (patternChunk.CharacterSpans.Count > 0)
{
var candidateParts = GetWordSpans(candidate);
var camelCaseWeight = TryUpperCaseCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.None, out var matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: true, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
camelCaseWeight = TryUpperCaseCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.IgnoreCase, out matchedSpans);
if (camelCaseWeight.HasValue)
{
return new PatternMatch(
PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: false, camelCaseWeight: camelCaseWeight,
matchedSpans: GetMatchedSpans(includeMatchSpans, matchedSpans));
}
}
}
return null;
}
private int? TryAllLowerCamelCaseMatch(
string candidate,
bool includeMatchedSpans,
StringBreaks candidateParts,
TextChunk patternChunk,
out List<TextSpan> matchedSpans)
{
return PartStartsWith(candidate, candidatePart, pattern, new TextSpan(0, pattern.Length), compareOptions);
var matcher = new AllLowerCamelCaseMatcher(candidate, includeMatchedSpans, candidateParts, patternChunk);
return matcher.TryMatch(out matchedSpans);
}
private int? TryCamelCaseMatch(
private int? TryUpperCaseCamelCaseMatch(
string candidate,
bool includeMatchedSpans,
StringBreaks candidateParts,
......@@ -643,18 +691,18 @@ private bool PartStartsWith(string candidate, TextSpan candidatePart, string pat
Contract.Requires(contiguous.HasValue);
// We did match! We shall assign a weight to this
int weight = 0;
var weight = 0;
// Was this contiguous?
if (contiguous.Value)
{
weight += 1;
weight += CamelCaseContiguousBonus;
}
// Did we start at the beginning of the candidate?
if (firstMatch.Value == 0)
{
weight += 2;
weight += CamelCaseMatchesFromStartBonus;
}
return weight;
......
......@@ -388,6 +388,7 @@
<Compile Include="FindSymbols\SyntaxTree\SyntaxTreeIndex.IdentifierInfo.cs" />
<Compile Include="NamingStyles\Serialization\NamingStylePreferencesExtensions.cs" />
<Compile Include="PatternMatching\PatternMatch.cs" />
<Compile Include="PatternMatching\PatternMatcher.AllLowerCamelCaseMatcher.cs" />
<Compile Include="PatternMatching\PatternMatcher.cs" />
<Compile Include="PatternMatching\PatternMatcher.PatternSegment.cs" />
<Compile Include="PatternMatching\PatternMatcher.TextChunk.cs" />
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册