提交 4b443653 编写于 作者: P Paul Harrington

Use a custom encoding for TextSpans coming from the StringBreaker used in PatternMatcher.

上级 013ddf9c
...@@ -692,9 +692,16 @@ public void MatchAllLowerPattern4() ...@@ -692,9 +692,16 @@ public void MatchAllLowerPattern4()
Assert.Null(TryMatchSingleWordPattern("AbcdefghijEfgHij", "efghij")); Assert.Null(TryMatchSingleWordPattern("AbcdefghijEfgHij", "efghij"));
} }
private static IList<string> PartListToSubstrings(string identifier, List<TextSpan> parts) private static IList<string> PartListToSubstrings(string identifier, StringBreaks parts)
{ {
return parts.Select(span => identifier.Substring(span.Start, span.Length)).ToList(); List<string> result = new List<string>();
for(int i = 0; i < parts.Count; i++)
{
var span = parts[i];
result.Add(identifier.Substring(span.Start, span.Length));
}
return result;
} }
private static IList<string> BreakIntoCharacterParts(string identifier) private static IList<string> BreakIntoCharacterParts(string identifier)
......
...@@ -123,7 +123,7 @@ private static TextChunk[] BreakPatternIntoTextChunks(string pattern, bool verba ...@@ -123,7 +123,7 @@ private static TextChunk[] BreakPatternIntoTextChunks(string pattern, bool verba
private struct TextChunk private struct TextChunk
{ {
public readonly string Text; public readonly string Text;
public readonly List<TextSpan> CharacterSpans; public readonly StringBreaks CharacterSpans;
public TextChunk(string text) public TextChunk(string text)
{ {
...@@ -140,8 +140,8 @@ public TextChunk(string text) ...@@ -140,8 +140,8 @@ public TextChunk(string text)
private readonly Segment _fullPatternSegment; private readonly Segment _fullPatternSegment;
private readonly Segment[] _dotSeparatedSegments; private readonly Segment[] _dotSeparatedSegments;
private readonly Dictionary<string, List<TextSpan>> _stringToWordSpans = new Dictionary<string, List<TextSpan>>(); private readonly Dictionary<string, StringBreaks> _stringToWordSpans = new Dictionary<string, StringBreaks>();
private readonly Func<string, List<TextSpan>> _breakIntoWordSpans = StringBreaker.BreakIntoWordParts; private readonly Func<string, StringBreaks> _breakIntoWordSpans = StringBreaker.BreakIntoWordParts;
// PERF: Cache the culture's compareInfo to avoid the overhead of asking for them repeatedly in inner loops // PERF: Cache the culture's compareInfo to avoid the overhead of asking for them repeatedly in inner loops
private readonly CompareInfo _compareInfo; private readonly CompareInfo _compareInfo;
...@@ -292,7 +292,7 @@ public IEnumerable<PatternMatch> GetMatches(string candidate, string dottedConta ...@@ -292,7 +292,7 @@ public IEnumerable<PatternMatch> GetMatches(string candidate, string dottedConta
return MatchSegment(candidate, _fullPatternSegment, wantAllMatches: false, allMatches: out ignored); return MatchSegment(candidate, _fullPatternSegment, wantAllMatches: false, allMatches: out ignored);
} }
private List<TextSpan> GetWordSpans(string word) private StringBreaks GetWordSpans(string word)
{ {
lock (_gate) lock (_gate)
{ {
...@@ -351,8 +351,9 @@ private static bool ContainsUpperCaseLetter(string pattern) ...@@ -351,8 +351,9 @@ private static bool ContainsUpperCaseLetter(string pattern)
// word part. That way we don't match something like 'Class' when the user types 'a'. // word part. That way we don't match something like 'Class' when the user types 'a'.
// But we would match 'FooAttribute' (since 'Attribute' starts with 'a'). // But we would match 'FooAttribute' (since 'Attribute' starts with 'a').
var wordSpans = GetWordSpans(candidate); var wordSpans = GetWordSpans(candidate);
foreach (var span in wordSpans) for(int i = 0; i < wordSpans.Count; i++)
{ {
var span = wordSpans[i];
if (PartStartsWith(candidate, span, chunk.Text, CompareOptions.IgnoreCase)) if (PartStartsWith(candidate, span, chunk.Text, CompareOptions.IgnoreCase))
{ {
return new PatternMatch(PatternMatchKind.Substring, punctuationStripped, return new PatternMatch(PatternMatchKind.Substring, punctuationStripped,
...@@ -577,7 +578,7 @@ private bool PartStartsWith(string candidate, TextSpan candidatePart, string pat ...@@ -577,7 +578,7 @@ private bool PartStartsWith(string candidate, TextSpan candidatePart, string pat
return PartStartsWith(candidate, candidatePart, pattern, new TextSpan(0, pattern.Length), compareOptions); return PartStartsWith(candidate, candidatePart, pattern, new TextSpan(0, pattern.Length), compareOptions);
} }
private int? TryCamelCaseMatch(string candidate, List<TextSpan> candidateParts, TextChunk chunk, CompareOptions compareOption) private int? TryCamelCaseMatch(string candidate, StringBreaks candidateParts, TextChunk chunk, CompareOptions compareOption)
{ {
var chunkCharacterSpans = chunk.CharacterSpans; var chunkCharacterSpans = chunk.CharacterSpans;
......
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. // Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics;
using Microsoft.CodeAnalysis.Text; using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.Shared.Utilities namespace Microsoft.CodeAnalysis.Shared.Utilities
{ {
/// <summary>
/// Values returned from StringBreaker routines.
/// Optimized for short strings with up to 4 spans.
/// Each span is encoded in a byte using 6 bits for a length and 2 bits as the gap.
/// Falls back to a <see cref="List{T}"/> if the encoding won't work.
/// </summary>
internal struct StringBreaks
{
private readonly List<TextSpan> _spans;
private readonly FourBytes _encodedSpans;
private const int MaxGap = 3;
private const int MaxLength = 63;
private unsafe struct FourBytes
{
private fixed byte _bytes[4];
public byte this[int index]
{
get
{
Debug.Assert(index >= 0 && index < 4);
fixed (byte* b = _bytes) return b[index];
}
set
{
Debug.Assert(index >= 0 && index < 4);
fixed (byte* b = _bytes) b[index] = value;
}
}
}
public static StringBreaks Create(string text, Func<string, int, TextSpan> spanGenerator)
{
Debug.Assert(text != null);
Debug.Assert(spanGenerator != null);
if (text.Length == 0)
{
return default(StringBreaks);
}
int b = 0;
FourBytes encodedBytes;
for (int i = 0; i < text.Length;)
{
var span = spanGenerator(text, i);
if (span.Length == 0)
{
// All done
break;
}
Debug.Assert(span.Start >= i, "Bad generator.");
if (b < 4)
{
int gap = span.Start - i;
if (span.Length <= MaxLength && gap <= MaxGap)
{
encodedBytes[b++] = Encode(gap, span.Length);
i = span.End;
continue;
}
}
return CreateFallback(text, spanGenerator);
}
return new StringBreaks(encodedBytes);
}
private static StringBreaks CreateFallback(string text, Func<string, int, TextSpan> spanGenerator)
{
List<TextSpan> list = new List<TextSpan>();
for (int i = 0; i < text.Length;)
{
var span = spanGenerator(text, i);
if (span.Length == 0)
{
// All done
break;
}
Debug.Assert(span.Start >= i, "Bad generator.");
list.Add(span);
i = span.End;
}
return new StringBreaks(list);
}
private StringBreaks(FourBytes encodedSpans)
{
this._encodedSpans = encodedSpans;
this._spans = null;
}
private StringBreaks(List<TextSpan> spans)
{
this._encodedSpans = default(FourBytes);
this._spans = spans;
}
public int Count
{
get
{
if (_spans != null)
{
return _spans.Count;
}
int i;
for (i = 0; i < 4; i++)
{
if (_encodedSpans[i] == 0) break;
}
return i;
}
}
public TextSpan this[int index]
{
get
{
if (index < 0)
{
throw new IndexOutOfRangeException("index");
}
if (_spans != null)
{
return _spans[index];
}
for (int i = 0, start = 0; ; i++)
{
byte b = _encodedSpans[i];
if (b == 0)
{
throw new IndexOutOfRangeException("index");
}
start += DecodeGap(b);
int length = DecodeLength(b);
if (i == index)
{
return new TextSpan(start, length);
}
start += length;
}
}
}
private static byte Encode(int gap, int length)
{
Debug.Assert(gap >= 0 && gap < MaxGap);
Debug.Assert(length >= 0 && length < MaxLength);
return unchecked((byte)((gap << 6) | length));
}
private static int DecodeLength(byte b) => b & 0x3F;
private static int DecodeGap(byte b) => b >> 6;
}
internal static class StringBreaker internal static class StringBreaker
{ {
/// <summary> /// <summary>
/// Breaks an identifier string into constituent parts. /// Breaks an identifier string into constituent parts.
/// </summary> /// </summary>
public static List<TextSpan> BreakIntoCharacterParts(string identifier) public static StringBreaks BreakIntoCharacterParts(string identifier) => StringBreaks.Create(identifier, CharacterPartsGenerator);
{
return BreakIntoParts(identifier, word: false);
}
/// <summary> /// <summary>
/// Breaks an identifier string into constituent parts. /// Breaks an identifier string into constituent parts.
/// </summary> /// </summary>
public static List<TextSpan> BreakIntoWordParts(string identifier) public static StringBreaks BreakIntoWordParts(string identifier) => StringBreaks.Create(identifier, WordPartsGenerator);
{
return BreakIntoParts(identifier, word: true);
}
public static List<TextSpan> BreakIntoParts(string identifier, bool word) private static TextSpan CharacterPartsGenerator(string identifier, int start) => GenerateSpan(identifier, start, word: false);
{
var result = new List<TextSpan>();
int wordStart = 0; private static TextSpan WordPartsGenerator(string identifier, int start) => GenerateSpan(identifier, start, word: true);
for (int i = 1; i < identifier.Length; i++)
public static TextSpan GenerateSpan(string identifier, int wordStart, bool word)
{
for (int i = wordStart + 1; i < identifier.Length; i++)
{ {
var lastIsDigit = char.IsDigit(identifier[i - 1]); var lastIsDigit = char.IsDigit(identifier[i - 1]);
var currentIsDigit = char.IsDigit(identifier[i]); var currentIsDigit = char.IsDigit(identifier[i]);
...@@ -44,7 +212,7 @@ public static List<TextSpan> BreakIntoParts(string identifier, bool word) ...@@ -44,7 +212,7 @@ public static List<TextSpan> BreakIntoParts(string identifier, bool word)
{ {
if (!IsAllPunctuation(identifier, wordStart, i)) if (!IsAllPunctuation(identifier, wordStart, i))
{ {
result.Add(new TextSpan(wordStart, i - wordStart)); return new TextSpan(wordStart, i - wordStart);
} }
wordStart = i; wordStart = i;
...@@ -53,10 +221,10 @@ public static List<TextSpan> BreakIntoParts(string identifier, bool word) ...@@ -53,10 +221,10 @@ public static List<TextSpan> BreakIntoParts(string identifier, bool word)
if (!IsAllPunctuation(identifier, wordStart, identifier.Length)) if (!IsAllPunctuation(identifier, wordStart, identifier.Length))
{ {
result.Add(new TextSpan(wordStart, identifier.Length - wordStart)); return new TextSpan(wordStart, identifier.Length - wordStart);
} }
return result; return default(TextSpan);
} }
private static bool IsAllPunctuation(string identifier, int start, int end) private static bool IsAllPunctuation(string identifier, int start, int end)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册