PatternMatcher.cs 36.6 KB
Newer Older
1 2 3 4
// Copyright (c) Microsoft.  All Rights Reserved.  Licensed under the Apache License, Version 2.0.  See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
5
using System.Collections.Immutable;
6
using System.Globalization;
7
using System.Linq;
8 9
using Microsoft.CodeAnalysis.Shared;
using Microsoft.CodeAnalysis.Shared.Utilities;
10 11 12
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;

13
namespace Microsoft.CodeAnalysis.PatternMatching
14 15 16 17 18 19 20 21
{
    /// <summary>
    /// The pattern matcher is thread-safe.  However, it maintains an internal cache of
    /// information as it is used.  Therefore, you should not keep it around forever and should get
    /// and release the matcher appropriately once you no longer need it.
    /// Also, while the pattern matcher is culture aware, it uses the culture specified in the
    /// constructor.
    /// </summary>
22
    internal sealed partial class PatternMatcher : IDisposable
23
    {
24
        public const int NoBonus = 0;
25 26 27
        public const int CamelCaseContiguousBonus = 1;
        public const int CamelCaseMatchesFromStartBonus = 2;
        public const int CamelCaseMaxWeight = CamelCaseContiguousBonus + CamelCaseMatchesFromStartBonus;
28

29
        private static readonly char[] s_dotCharacterArray = { '.' };
30

31
        private readonly object _gate = new object();
32

33
        private readonly bool _allowFuzzyMatching;
34
        private readonly bool _invalidPattern;
35 36
        private readonly PatternSegment _fullPatternSegment;
        private readonly PatternSegment[] _dotSeparatedPatternSegments;
37

38
        private readonly Dictionary<string, StringBreaks> _stringToWordSpans = new Dictionary<string, StringBreaks>();
C
CyrusNajmabadi 已提交
39
        private static readonly Func<string, StringBreaks> _breakIntoWordSpans = StringBreaker.BreakIntoWordParts;
40 41

        // PERF: Cache the culture's compareInfo to avoid the overhead of asking for them repeatedly in inner loops
42
        private readonly CompareInfo _compareInfo;
43 44 45 46

        /// <summary>
        /// Construct a new PatternMatcher using the specified culture.
        /// </summary>
47
        /// <param name="pattern">The pattern to make the pattern matcher for.</param>
48
        /// <param name="culture">The culture to use for string searching and comparison.</param>
49 50 51
        /// <param name="allowFuzzyMatching">Whether or not close matches should count as matches.</param>
        public PatternMatcher(
            string pattern,
52 53
            CultureInfo culture = null,
            bool allowFuzzyMatching = false)
54
        {
55
            culture = culture ?? CultureInfo.CurrentCulture;
56
            pattern = pattern.Trim();
57
            _compareInfo = culture.CompareInfo;
58
            _allowFuzzyMatching = allowFuzzyMatching;
59

60
            _fullPatternSegment = new PatternSegment(pattern, allowFuzzyMatching);
61 62 63 64

            if (pattern.IndexOf('.') < 0)
            {
                // PERF: Avoid string.Split allocations when the pattern doesn't contain a dot.
65
                _dotSeparatedPatternSegments = pattern.Length > 0
66
                    ? new PatternSegment[1] { new PatternSegment(pattern.Trim(), allowFuzzyMatching) }
67
                    : Array.Empty<PatternSegment>();
68 69 70
            }
            else
            {
71
                _dotSeparatedPatternSegments = pattern.Split(s_dotCharacterArray, StringSplitOptions.RemoveEmptyEntries)
72
                                                .Select(text => new PatternSegment(text.Trim(), allowFuzzyMatching))
73 74 75
                                                .ToArray();
            }

76
            _invalidPattern = _dotSeparatedPatternSegments.Length == 0 || _dotSeparatedPatternSegments.Any(s => s.IsInvalid);
77 78
        }

79 80 81
        public void Dispose()
        {
            _fullPatternSegment.Dispose();
82

83
            foreach (var segment in _dotSeparatedPatternSegments)
84 85 86
            {
                segment.Dispose();
            }
87 88 89 90 91 92

            foreach (var kvp in _stringToWordSpans)
            {
                kvp.Value.Dispose();
            }
            _stringToWordSpans.Clear();
93 94
        }

95
        public bool IsDottedPattern => _dotSeparatedPatternSegments.Length > 1;
96

97 98 99
        private bool SkipMatch(string candidate)
        {
            return _invalidPattern || string.IsNullOrWhiteSpace(candidate);
100 101
        }

102
        public ImmutableArray<PatternMatch> GetMatches(string candidate)
103
            => GetMatches(candidate, includeMatchSpans: false);
104

105 106 107 108 109
        /// <summary>
        /// Determines if a given candidate string matches under a multiple word query text, as you
        /// would find in features like Navigate To.
        /// </summary>
        /// <param name="candidate">The word being tested.</param>
110
        /// <param name="includeMatchSpans">Whether or not the matched spans should be included with results</param>
111 112
        /// <returns>If this was a match, a set of match types that occurred while matching the
        /// patterns. If it was not a match, it returns null.</returns>
113
        public ImmutableArray<PatternMatch> GetMatches(string candidate, bool includeMatchSpans)
114
        {
115
            if (SkipMatch(candidate))
116
            {
117
                return ImmutableArray<PatternMatch>.Empty;
118
            }
119

C
CyrusNajmabadi 已提交
120
            var result = MatchPatternSegment(candidate, includeMatchSpans, _fullPatternSegment, fuzzyMatch: true);
121 122 123 124 125
            if (!result.IsEmpty)
            {
                return result;
            }

C
CyrusNajmabadi 已提交
126
            return MatchPatternSegment(candidate, includeMatchSpans, _fullPatternSegment, fuzzyMatch: false);
127 128
        }

129
        public ImmutableArray<PatternMatch> GetMatchesForLastSegmentOfPattern(string candidate)
130
        {
131
            if (SkipMatch(candidate))
132
            {
133 134 135
                return ImmutableArray<PatternMatch>.Empty;
            }

C
CyrusNajmabadi 已提交
136
            var result = MatchPatternSegment(candidate, includeMatchSpans: false, patternSegment: _dotSeparatedPatternSegments.Last(), fuzzyMatch: false);
137 138 139
            if (!result.IsEmpty)
            {
                return result;
140
            }
141

C
CyrusNajmabadi 已提交
142
            return MatchPatternSegment(candidate, includeMatchSpans: false, patternSegment: _dotSeparatedPatternSegments.Last(), fuzzyMatch: true);
143 144
        }

145
        public PatternMatches GetMatches(string candidate, string dottedContainer)
146
            => GetMatches(candidate, dottedContainer, includeMatchSpans: false);
147

148 149 150 151 152 153 154 155 156 157 158 159
        /// <summary>
        /// Matches a pattern against a candidate, and an optional dotted container for the 
        /// candidate. If the container is provided, and the pattern itself contains dots, then
        /// the pattern will be tested against the candidate and container.  Specifically,
        /// the part of the pattern after the last dot will be tested against the candidate. If
        /// a match occurs there, then the remaining dot-separated portions of the pattern will
        /// be tested against every successive portion of the container from right to left.
        /// 
        /// i.e. if you have a pattern of "Con.WL" and the candidate is "WriteLine" with a 
        /// dotted container of "System.Console", then "WL" will be tested against "WriteLine".
        /// With a match found there, "Con" will then be tested against "Console".
        /// </summary>
160
        public PatternMatches GetMatches(
161
            string candidate, string dottedContainer, bool includeMatchSpans)
162
        {
163 164 165 166 167 168 169
            var result = GetMatches(candidate, dottedContainer, includeMatchSpans, fuzzyMatch: false);
            if (!result.IsEmpty)
            {
                return result;
            }

            return GetMatches(candidate, dottedContainer, includeMatchSpans, fuzzyMatch: true);
170 171
        }

172
        private PatternMatches GetMatches(
173 174 175 176
            string candidate, string dottedContainer, bool includeMatchSpans, bool fuzzyMatch)
        {
            if (fuzzyMatch && !_allowFuzzyMatching)
            {
177
                return PatternMatches.Empty;
178 179
            }

180 181
            if (SkipMatch(candidate))
            {
182
                return PatternMatches.Empty;
183 184 185 186 187
            }

            // First, check that the last part of the dot separated pattern matches the name of the
            // candidate.  If not, then there's no point in proceeding and doing the more
            // expensive work.
C
CyrusNajmabadi 已提交
188
            var candidateMatch = MatchPatternSegment(candidate, includeMatchSpans, _dotSeparatedPatternSegments.Last(), fuzzyMatch);
C
CyrusNajmabadi 已提交
189
            if (candidateMatch.IsDefaultOrEmpty)
190
            {
191
                return PatternMatches.Empty;
192 193 194
            }

            dottedContainer = dottedContainer ?? string.Empty;
B
beep boop 已提交
195
            var containerParts = dottedContainer.Split(s_dotCharacterArray, StringSplitOptions.RemoveEmptyEntries);
196 197 198

            // -1 because the last part was checked against the name, and only the rest
            // of the parts are checked against the container.
199
            var relevantDotSeparatedSegmentLength = _dotSeparatedPatternSegments.Length - 1;
200
            if (relevantDotSeparatedSegmentLength > containerParts.Length)
201 202 203
            {
                // There weren't enough container parts to match against the pattern parts.
                // So this definitely doesn't match.
204
                return PatternMatches.Empty;
205 206 207 208
            }

            // So far so good.  Now break up the container for the candidate and check if all
            // the dotted parts match up correctly.
209
            var containerMatches = ArrayBuilder<PatternMatch>.GetInstance();
210

211
            try
212
            {
213 214 215 216
                // Don't need to check the last segment.  We did that as the very first bail out step.
                for (int i = 0, j = containerParts.Length - relevantDotSeparatedSegmentLength;
                     i < relevantDotSeparatedSegmentLength;
                     i++, j++)
217
                {
218
                    var segment = _dotSeparatedPatternSegments[i];
219
                    var containerName = containerParts[j];
C
CyrusNajmabadi 已提交
220
                    var containerMatch = MatchPatternSegment(containerName, includeMatchSpans, segment, fuzzyMatch);
C
CyrusNajmabadi 已提交
221
                    if (containerMatch.IsDefaultOrEmpty)
222 223
                    {
                        // This container didn't match the pattern piece.  So there's no match at all.
224
                        return PatternMatches.Empty;
225
                    }
226

227
                    containerMatches.AddRange(containerMatch);
228
                }
229

230 231
                // Success, this symbol's full name matched against the dotted name the user was asking
                // about.
232
                return new PatternMatches(candidateMatch, containerMatches.ToImmutable());
233 234 235
            }
            finally
            {
236
                containerMatches.Free();
237
            }
238 239
        }

240 241 242 243 244
        /// <summary>
        /// Determines if a given candidate string matches under a multiple word query text, as you
        /// would find in features like Navigate To.
        /// </summary>
        /// <remarks>
245
        /// PERF: This is slightly faster and uses less memory than <see cref="GetMatches(string, bool)"/>
246 247 248
        /// so, unless you need to know the full set of matches, use this version.
        /// </remarks>
        /// <param name="candidate">The word being tested.</param>
249
        /// <param name="includeMatchSpans">Whether or not the matched spans should be included with results</param>
250 251
        /// <returns>If this was a match, the first element of the set of match types that occurred while matching the
        /// patterns. If it was not a match, it returns null.</returns>
252 253
        public PatternMatch? GetFirstMatch(
            string candidate, bool includeMatchSpans)
254
        {
255
            if (SkipMatch(candidate))
256 257 258 259
            {
                return null;
            }

260 261
            return GetFirstMatchWorker(candidate, includeMatchSpans, fuzzyMatch: false) ??
                   GetFirstMatchWorker(candidate, includeMatchSpans, fuzzyMatch: true);
262 263
        }

264 265
        private PatternMatch? GetFirstMatchWorker(
            string candidate, bool includeMatchSpans, bool fuzzyMatch)
266
        {
267 268 269 270 271 272 273 274 275 276 277
            return MatchPatternSegment(
                candidate, includeMatchSpans, _fullPatternSegment, 
                wantAllMatches: false, allMatches: out _, fuzzyMatch: fuzzyMatch);
        }

        private StringBreaks GetWordSpans(string word)
        {
            lock (_gate)
            {
                return _stringToWordSpans.GetOrAdd(word, _breakIntoWordSpans);
            }
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
        }

        private static bool ContainsUpperCaseLetter(string pattern)
        {
            // Expansion of "foreach(char ch in pattern)" to avoid a CharEnumerator allocation
            for (int i = 0; i < pattern.Length; i++)
            {
                if (char.IsUpper(pattern[i]))
                {
                    return true;
                }
            }

            return false;
        }

C
CyrusNajmabadi 已提交
294
        private PatternMatch? MatchPatternChunk(
295 296
            string candidate,
            bool includeMatchSpans,
C
CyrusNajmabadi 已提交
297
            TextChunk patternChunk,
298 299
            bool punctuationStripped,
            bool fuzzyMatch)
300
        {
C
CyrusNajmabadi 已提交
301
            int caseInsensitiveIndex = _compareInfo.IndexOf(candidate, patternChunk.Text, CompareOptions.IgnoreCase);
302
            if (caseInsensitiveIndex == 0)
303
            {
C
CyrusNajmabadi 已提交
304
                if (patternChunk.Text.Length == candidate.Length)
305 306 307
                {
                    // a) Check if the part matches the candidate entirely, in an case insensitive or
                    //    sensitive manner.  If it does, return that there was an exact match.
308
                    return new PatternMatch(
C
CyrusNajmabadi 已提交
309
                        PatternMatchKind.Exact, punctuationStripped, isCaseSensitive: candidate == patternChunk.Text,
310
                        matchedSpan: GetMatchedSpan(includeMatchSpans, 0, candidate.Length));
311 312 313 314 315
                }
                else
                {
                    // b) Check if the part is a prefix of the candidate, in a case insensitive or sensitive
                    //    manner.  If it does, return that there was a prefix match.
316
                    return new PatternMatch(
C
CyrusNajmabadi 已提交
317 318
                        PatternMatchKind.Prefix, punctuationStripped, isCaseSensitive: _compareInfo.IsPrefix(candidate, patternChunk.Text),
                        matchedSpan: GetMatchedSpan(includeMatchSpans, 0, patternChunk.Text.Length));
319 320 321
                }
            }

C
CyrusNajmabadi 已提交
322
            var isLowercase = !ContainsUpperCaseLetter(patternChunk.Text);
323 324
            if (isLowercase)
            {
325
                if (caseInsensitiveIndex > 0)
326 327 328 329 330 331 332 333
                {
                    // c) If the part is entirely lowercase, then check if it is contained anywhere in the
                    //    candidate in a case insensitive manner.  If so, return that there was a substring
                    //    match. 
                    //
                    //    Note: We only have a substring match if the lowercase part is prefix match of some
                    //    word part. That way we don't match something like 'Class' when the user types 'a'.
                    //    But we would match 'FooAttribute' (since 'Attribute' starts with 'a').
334 335 336 337 338
                    //
                    //    Also, if we matched at location right after punctuation, then this is a good
                    //    substring match.  i.e. if the user is testing mybutton against _myButton
                    //    then this should hit. As we really are finding the match at the beginning of 
                    //    a word.
C
CyrusNajmabadi 已提交
339 340
                    if (char.IsPunctuation(candidate[caseInsensitiveIndex - 1]) ||
                        char.IsPunctuation(patternChunk.Text[0]))
341 342 343 344 345 346 347 348 349
                    {
                        return new PatternMatch(
                            PatternMatchKind.Substring, punctuationStripped,
                            isCaseSensitive: PartStartsWith(
                                candidate, new TextSpan(caseInsensitiveIndex, patternChunk.Text.Length),
                                patternChunk.Text, CompareOptions.None),
                            matchedSpan: GetMatchedSpan(includeMatchSpans, caseInsensitiveIndex, patternChunk.Text.Length));
                    }

C
Cyrus Najmabadi 已提交
350
                    var wordSpans = GetWordSpans(candidate);
C
CyrusNajmabadi 已提交
351
                    for (int i = 0, n = wordSpans.GetCount(); i < n; i++)
352
                    {
353
                        var span = wordSpans[i];
C
CyrusNajmabadi 已提交
354
                        if (PartStartsWith(candidate, span, patternChunk.Text, CompareOptions.IgnoreCase))
355
                        {
B
beep boop 已提交
356
                            return new PatternMatch(PatternMatchKind.Substring, punctuationStripped,
C
CyrusNajmabadi 已提交
357 358
                                isCaseSensitive: PartStartsWith(candidate, span, patternChunk.Text, CompareOptions.None),
                                matchedSpan: GetMatchedSpan(includeMatchSpans, span.Start, patternChunk.Text.Length));
359 360 361 362 363 364 365 366 367
                        }
                    }
                }
            }
            else
            {
                // d) If the part was not entirely lowercase, then check if it is contained in the
                //    candidate in a case *sensitive* manner. If so, return that there was a substring
                //    match.
C
CyrusNajmabadi 已提交
368
                var caseSensitiveIndex = _compareInfo.IndexOf(candidate, patternChunk.Text);
369
                if (caseSensitiveIndex > 0)
370
                {
371 372
                    return new PatternMatch(
                        PatternMatchKind.Substring, punctuationStripped, isCaseSensitive: true,
C
CyrusNajmabadi 已提交
373
                        matchedSpan: GetMatchedSpan(includeMatchSpans, caseSensitiveIndex, patternChunk.Text.Length));
374 375 376
                }
            }

377 378 379 380
            var match = TryCamelCaseMatch(
                candidate, includeMatchSpans, patternChunk,
                punctuationStripped, isLowercase);
            if (match.HasValue)
381
            {
382
                return match.Value;
383 384 385 386
            }

            if (isLowercase)
            {
387 388
                //   g) The word is all lower case. Is it a case insensitive substring of the candidate
                //      starting on a part boundary of the candidate?
389

390 391 392 393 394 395
                // We could check every character boundary start of the candidate for the pattern. 
                // However, that's an m * n operation in the worst case. Instead, find the first 
                // instance of the pattern  substring, and see if it starts on a capital letter. 
                // It seems unlikely that the user will try to filter the list based on a substring
                // that starts on a capital letter and also with a lowercase one. (Pattern: fogbar, 
                // Candidate: quuxfogbarFogBar).
C
CyrusNajmabadi 已提交
396
                if (patternChunk.Text.Length < candidate.Length)
397
                {
398
                    if (caseInsensitiveIndex != -1 && char.IsUpper(candidate[caseInsensitiveIndex]))
399
                    {
400 401
                        return new PatternMatch(
                            PatternMatchKind.Substring, punctuationStripped, isCaseSensitive: false,
C
CyrusNajmabadi 已提交
402
                            matchedSpan: GetMatchedSpan(includeMatchSpans, caseInsensitiveIndex, patternChunk.Text.Length));
403 404 405 406
                    }
                }
            }

407
            if (fuzzyMatch)
408
            {
C
CyrusNajmabadi 已提交
409
                if (patternChunk.SimilarityChecker.AreSimilar(candidate))
410 411 412 413 414 415
                {
                    return new PatternMatch(
                        PatternMatchKind.Fuzzy, punctuationStripped, isCaseSensitive: false, matchedSpan: null);
                }
            }

416 417 418
            return null;
        }

419 420 421 422 423
        private static TextSpan? GetMatchedSpan(bool includeMatchSpans, int start, int length)
        {
            return includeMatchSpans ? new TextSpan(start, length) : (TextSpan?)null;
        }

424 425 426 427 428 429 430 431 432 433 434 435 436 437
        private static bool ContainsSpaceOrAsterisk(string text)
        {
            for (int i = 0; i < text.Length; i++)
            {
                char ch = text[i];
                if (ch == ' ' || ch == '*')
                {
                    return true;
                }
            }

            return false;
        }

C
CyrusNajmabadi 已提交
438 439
        private ImmutableArray<PatternMatch> MatchPatternSegment(
            string candidate, bool includeMatchSpans, PatternSegment patternSegment, bool fuzzyMatch)
440
        {
441 442
            if (fuzzyMatch && !_allowFuzzyMatching)
            {
443
                return ImmutableArray<PatternMatch>.Empty;
444 445
            }

446
            var singleMatch = MatchPatternSegment(candidate, includeMatchSpans, patternSegment,
C
CyrusNajmabadi 已提交
447
                wantAllMatches: true, fuzzyMatch: fuzzyMatch, allMatches: out var matches);
448

449 450 451
            return singleMatch.HasValue
                ? ImmutableArray.Create(singleMatch.Value)
                : matches;
452 453 454 455 456 457 458 459 460 461 462 463
        }

        /// <summary>
        /// Internal helper for MatchPatternInternal
        /// </summary>
        /// <remarks>
        /// PERF: Designed to minimize allocations in common cases.
        /// If there's no match, then null is returned.
        /// If there's a single match, or the caller only wants the first match, then it is returned (as a Nullable)
        /// If there are multiple matches, and the caller wants them all, then a List is allocated.
        /// </remarks>
        /// <param name="candidate">The word being tested.</param>
464
        /// <param name="segment">The segment of the pattern to check against the candidate.</param>
465
        /// <param name="wantAllMatches">Does the caller want all matches or just the first?</param>
466
        /// <param name="fuzzyMatch">If a fuzzy match should be performed</param>
467
        /// <param name="allMatches">If <paramref name="wantAllMatches"/> is true, and there's more than one match, then the list of all matches.</param>
468
        /// <param name="includeMatchSpans">Whether or not the matched spans should be included with results</param>
469
        /// <returns>If there's only one match, then the return value is that match. Otherwise it is null.</returns>
470
        private PatternMatch? MatchPatternSegment(
471 472
            string candidate,
            bool includeMatchSpans,
473
            PatternSegment segment,
474
            bool wantAllMatches,
475
            bool fuzzyMatch,
476
            out ImmutableArray<PatternMatch> allMatches)
477
        {
478
            allMatches = ImmutableArray<PatternMatch>.Empty;
479

480 481 482 483 484
            if (fuzzyMatch && !_allowFuzzyMatching)
            {
                return null;
            }

485
            // First check if the segment matches as is.  This is also useful if the segment contains
486
            // characters we would normally strip when splitting into parts that we also may want to
487
            // match in the candidate.  For example if the segment is "@int" and the candidate is
488 489
            // "@int", then that will show up as an exact match here.
            //
490 491 492
            // Note: if the segment contains a space or an asterisk then we must assume that it's a
            // multi-word segment.
            if (!ContainsSpaceOrAsterisk(segment.TotalTextChunk.Text))
493
            {
494
                var match = MatchPatternChunk(candidate, includeMatchSpans,
495
                    segment.TotalTextChunk, punctuationStripped: false, fuzzyMatch: fuzzyMatch);
496 497 498 499 500 501
                if (match != null)
                {
                    return match;
                }
            }

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
            // The logic for pattern matching is now as follows:
            //
            // 1) Break the segment passed in into words.  Breaking is rather simple and a
            //    good way to think about it that if gives you all the individual alphanumeric words
            //    of the pattern.
            //
            // 2) For each word try to match the word against the candidate value.
            //
            // 3) Matching is as follows:
            //
            //   a) Check if the word matches the candidate entirely, in an case insensitive or
            //    sensitive manner.  If it does, return that there was an exact match.
            //
            //   b) Check if the word is a prefix of the candidate, in a case insensitive or
            //      sensitive manner.  If it does, return that there was a prefix match.
            //
            //   c) If the word is entirely lowercase, then check if it is contained anywhere in the
            //      candidate in a case insensitive manner.  If so, return that there was a substring
            //      match. 
            //
            //      Note: We only have a substring match if the lowercase part is prefix match of
            //      some word part. That way we don't match something like 'Class' when the user
            //      types 'a'. But we would match 'FooAttribute' (since 'Attribute' starts with
            //      'a').
            //
            //   d) If the word was not entirely lowercase, then check if it is contained in the
            //      candidate in a case *sensitive* manner. If so, return that there was a substring
            //      match.
            //
531 532 533 534 535
            //   e) If the word was entirely lowercase, then attempt a special lower cased camel cased 
            //      match.  i.e. cofipro would match CodeFixProvider.
            //
            //   f) If the word was not entirely lowercase, then attempt a normal camel cased match.
            //      i.e. CoFiPro would match CodeFixProvider, but CofiPro would not.  
536
            //
537
            //   g) The word is all lower case. Is it a case insensitive substring of the candidate starting 
538 539 540 541
            //      on a part boundary of the candidate?
            //
            // Only if all words have some sort of match is the pattern considered matched.

542
            var matches = ArrayBuilder<PatternMatch>.GetInstance();
543

544
            try
545
            {
C
CyrusNajmabadi 已提交
546
                var subWordTextChunks = segment.SubWordTextChunks;
547
                foreach (var subWordTextChunk in subWordTextChunks)
548
                {
549
                    // Try to match the candidate with this word
C
CyrusNajmabadi 已提交
550
                    var result = MatchPatternChunk(candidate, includeMatchSpans,
551 552 553 554 555
                        subWordTextChunk, punctuationStripped: true, fuzzyMatch: fuzzyMatch);
                    if (result == null)
                    {
                        return null;
                    }
556

557
                    matches.Add(result.Value);
558 559
                }

560
                if (wantAllMatches && matches.Count >= 2)
561 562 563 564 565 566 567 568
                {
                    allMatches = matches.ToImmutable();
                    return null;
                }
                else
                {
                    return matches.FirstOrNullable();
                }
569 570 571 572
            }
            finally
            {
                matches.Free();
573 574 575
            }
        }

576 577
        private static bool IsWordChar(char ch)
            => char.IsLetterOrDigit(ch) || ch == '_';
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596

        /// <summary>
        /// Do the two 'parts' match? i.e. Does the candidate part start with the pattern part?
        /// </summary>
        /// <param name="candidate">The candidate text</param>
        /// <param name="candidatePart">The span within the <paramref name="candidate"/> text</param>
        /// <param name="pattern">The pattern text</param>
        /// <param name="patternPart">The span within the <paramref name="pattern"/> text</param>
        /// <param name="compareOptions">Options for doing the comparison (case sensitive or not)</param>
        /// <returns>True if the span identified by <paramref name="candidatePart"/> within <paramref name="candidate"/> starts with
        /// the span identified by <paramref name="patternPart"/> within <paramref name="pattern"/>.</returns>
        private bool PartStartsWith(string candidate, TextSpan candidatePart, string pattern, TextSpan patternPart, CompareOptions compareOptions)
        {
            if (patternPart.Length > candidatePart.Length)
            {
                // Pattern part is longer than the candidate part. There can never be a match.
                return false;
            }

597
            return _compareInfo.Compare(candidate, candidatePart.Start, patternPart.Length, pattern, patternPart.Start, patternPart.Length, compareOptions) == 0;
598 599 600 601 602 603 604 605 606 607 608
        }

        /// <summary>
        /// Does the given part start with the given pattern?
        /// </summary>
        /// <param name="candidate">The candidate text</param>
        /// <param name="candidatePart">The span within the <paramref name="candidate"/> text</param>
        /// <param name="pattern">The pattern text</param>
        /// <param name="compareOptions">Options for doing the comparison (case sensitive or not)</param>
        /// <returns>True if the span identified by <paramref name="candidatePart"/> within <paramref name="candidate"/> starts with <paramref name="pattern"/></returns>
        private bool PartStartsWith(string candidate, TextSpan candidatePart, string pattern, CompareOptions compareOptions)
609 610 611 612 613
            => PartStartsWith(candidate, candidatePart, pattern, new TextSpan(0, pattern.Length), compareOptions);

        private PatternMatch? TryCamelCaseMatch(
            string candidate, bool includeMatchSpans, TextChunk patternChunk,
            bool punctuationStripped, bool isLowercase)
614
        {
615 616 617 618 619 620 621 622 623 624 625
            if (isLowercase)
            {
                //   e) If the word was entirely lowercase, then attempt a special lower cased camel cased 
                //      match.  i.e. cofipro would match CodeFixProvider.
                var candidateParts = GetWordSpans(candidate);
                var camelCaseWeight = TryAllLowerCamelCaseMatch(
                    candidate, includeMatchSpans, candidateParts, patternChunk, out var matchedSpans);
                if (camelCaseWeight.HasValue)
                {
                    return new PatternMatch(
                        PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: false, camelCaseWeight: camelCaseWeight,
626
                        matchedSpans: matchedSpans);
627 628 629 630 631 632
                }
            }
            else
            {
                //   f) If the word was not entirely lowercase, then attempt a normal camel cased match.
                //      i.e. CoFiPro would match CodeFixProvider, but CofiPro would not.  
C
CyrusNajmabadi 已提交
633
                if (patternChunk.CharacterSpans.GetCount() > 0)
634 635 636 637 638 639
                {
                    var candidateParts = GetWordSpans(candidate);
                    var camelCaseWeight = TryUpperCaseCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.None, out var matchedSpans);
                    if (camelCaseWeight.HasValue)
                    {
                        return new PatternMatch(
640 641
                            PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: true,
                            camelCaseWeight: camelCaseWeight, matchedSpans: matchedSpans);
642 643 644 645 646 647
                    }

                    camelCaseWeight = TryUpperCaseCamelCaseMatch(candidate, includeMatchSpans, candidateParts, patternChunk, CompareOptions.IgnoreCase, out matchedSpans);
                    if (camelCaseWeight.HasValue)
                    {
                        return new PatternMatch(
648 649
                            PatternMatchKind.CamelCase, punctuationStripped, isCaseSensitive: false, 
                            camelCaseWeight: camelCaseWeight, matchedSpans: matchedSpans);
650 651 652 653 654 655 656 657 658 659 660 661
                    }
                }
            }

            return null;
        }

        private int? TryAllLowerCamelCaseMatch(
            string candidate,
            bool includeMatchedSpans,
            StringBreaks candidateParts,
            TextChunk patternChunk,
662
            out ImmutableArray<TextSpan> matchedSpans)
663 664 665
        {
            var matcher = new AllLowerCamelCaseMatcher(candidate, includeMatchedSpans, candidateParts, patternChunk);
            return matcher.TryMatch(out matchedSpans);
666 667
        }

668 669
        private int? TryUpperCaseCamelCaseMatch(
            string candidate,
670
            bool includeMatchedSpans,
671 672
            StringBreaks candidateParts,
            TextChunk patternChunk,
673
            CompareOptions compareOption,
674
            out ImmutableArray<TextSpan> matchedSpans)
675
        {
C
CyrusNajmabadi 已提交
676
            var patternChunkCharacterSpans = patternChunk.CharacterSpans;
677

678 679 680 681 682
            // Note: we may have more pattern parts than candidate parts.  This is because multiple
            // pattern parts may match a candidate part.  For example "SiUI" against "SimpleUI".
            // We'll have 3 pattern parts Si/U/I against two candidate parts Simple/UI.  However, U
            // and I will both match in UI. 

C
Cyrus Najmabadi 已提交
683 684
            int currentCandidate = 0;
            int currentChunkSpan = 0;
685 686 687
            int? firstMatch = null;
            bool? contiguous = null;

C
CyrusNajmabadi 已提交
688 689
            var patternChunkCharacterSpansCount = patternChunkCharacterSpans.GetCount();
            var candidatePartsCount = candidateParts.GetCount();
690

691
            var result = ArrayBuilder<TextSpan>.GetInstance();
692 693 694
            while (true)
            {
                // Let's consider our termination cases
C
CyrusNajmabadi 已提交
695
                if (currentChunkSpan == patternChunkCharacterSpansCount)
696 697 698 699 700
                {
                    Contract.Requires(firstMatch.HasValue);
                    Contract.Requires(contiguous.HasValue);

                    // We did match! We shall assign a weight to this
701
                    var weight = 0;
702 703 704 705

                    // Was this contiguous?
                    if (contiguous.Value)
                    {
706
                        weight += CamelCaseContiguousBonus;
707 708 709 710 711
                    }

                    // Did we start at the beginning of the candidate?
                    if (firstMatch.Value == 0)
                    {
712
                        weight += CamelCaseMatchesFromStartBonus;
713 714
                    }

715 716 717 718
                    matchedSpans = includeMatchedSpans
                        ? new NormalizedTextSpanCollection(result).ToImmutableArray()
                        : ImmutableArray<TextSpan>.Empty;
                    result.Free();
719 720
                    return weight;
                }
C
CyrusNajmabadi 已提交
721
                else if (currentCandidate == candidatePartsCount)
722 723
                {
                    // No match, since we still have more of the pattern to hit
724 725
                    matchedSpans = ImmutableArray<TextSpan>.Empty;
                    result.Free();
726 727 728
                    return null;
                }

C
Cyrus Najmabadi 已提交
729
                var candidatePart = candidateParts[currentCandidate];
730 731 732 733 734 735
                bool gotOneMatchThisCandidate = false;

                // Consider the case of matching SiUI against SimpleUIElement. The candidate parts
                // will be Simple/UI/Element, and the pattern parts will be Si/U/I.  We'll match 'Si'
                // against 'Simple' first.  Then we'll match 'U' against 'UI'. However, we want to
                // still keep matching pattern parts against that candidate part. 
C
CyrusNajmabadi 已提交
736
                for (; currentChunkSpan < patternChunkCharacterSpansCount; currentChunkSpan++)
737
                {
C
CyrusNajmabadi 已提交
738
                    var patternChunkCharacterSpan = patternChunkCharacterSpans[currentChunkSpan];
739 740 741 742

                    if (gotOneMatchThisCandidate)
                    {
                        // We've already gotten one pattern part match in this candidate.  We will
743
                        // only continue trying to consume pattern parts if the last part and this
744
                        // part are both upper case.  
C
CyrusNajmabadi 已提交
745 746
                        if (!char.IsUpper(patternChunk.Text[patternChunkCharacterSpans[currentChunkSpan - 1].Start]) ||
                            !char.IsUpper(patternChunk.Text[patternChunkCharacterSpans[currentChunkSpan].Start]))
747 748 749 750 751
                        {
                            break;
                        }
                    }

C
CyrusNajmabadi 已提交
752
                    if (!PartStartsWith(candidate, candidatePart, patternChunk.Text, patternChunkCharacterSpan, compareOption))
753 754 755 756
                    {
                        break;
                    }

757 758
                    if (includeMatchedSpans)
                    {
759
                        result.Add(new TextSpan(candidatePart.Start, patternChunkCharacterSpan.Length));
760 761
                    }

762 763
                    gotOneMatchThisCandidate = true;

C
Cyrus Najmabadi 已提交
764
                    firstMatch = firstMatch ?? currentCandidate;
765 766 767 768 769 770

                    // If we were contiguous, then keep that value.  If we weren't, then keep that
                    // value.  If we don't know, then set the value to 'true' as an initial match is
                    // obviously contiguous.
                    contiguous = contiguous ?? true;

C
CyrusNajmabadi 已提交
771
                    candidatePart = new TextSpan(candidatePart.Start + patternChunkCharacterSpan.Length, candidatePart.Length - patternChunkCharacterSpan.Length);
772 773 774 775 776 777 778 779 780 781 782 783
                }

                // Check if we matched anything at all.  If we didn't, then we need to unset the
                // contiguous bit if we currently had it set.
                // If we haven't set the bit yet, then that means we haven't matched anything so
                // far, and we don't want to change that.
                if (!gotOneMatchThisCandidate && contiguous.HasValue)
                {
                    contiguous = false;
                }

                // Move onto the next candidate.
C
Cyrus Najmabadi 已提交
784
                currentCandidate++;
785 786 787
            }
        }
    }
788
}