diff --git a/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo_Serialization.cs b/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo_Serialization.cs index e55ae36555a401579742aee3f9f3cd9975d19205..0e3e8dd11e511c351b8c9cdbafd15fd172761359 100644 --- a/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo_Serialization.cs +++ b/src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo_Serialization.cs @@ -13,7 +13,7 @@ namespace Microsoft.CodeAnalysis.FindSymbols internal partial class SymbolTreeInfo : IObjectWritable { private const string PrefixMetadataSymbolTreeInfo = "_"; - private const string SerializationFormat = "5"; + private const string SerializationFormat = "6"; /// /// this is for a metadata reference in a solution diff --git a/src/Workspaces/Core/Portable/Utilities/ArraySlice.cs b/src/Workspaces/Core/Portable/Utilities/ArraySlice.cs index 5a2ca8743ae8dfdd60e1b85f8bb4d14d401ecaca..c87fa75fb5ddfc23777686c5c9a0c96abb2a1d7d 100644 --- a/src/Workspaces/Core/Portable/Utilities/ArraySlice.cs +++ b/src/Workspaces/Core/Portable/Utilities/ArraySlice.cs @@ -4,6 +4,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using Microsoft.CodeAnalysis.Text; namespace Roslyn.Utilities { @@ -19,6 +20,10 @@ public ArraySlice(T[] array) : this(array, 0, array.Length) { } + public ArraySlice(T[] array, TextSpan span) : this(array, span.Start, span.Length) + { + } + public ArraySlice(T[] array, int start, int length) : this() { _array = array; diff --git a/src/Workspaces/Core/Portable/Utilities/BKTree.Builder.cs b/src/Workspaces/Core/Portable/Utilities/BKTree.Builder.cs index 7fec528356524b32b951e36640dd2a33dbafb61e..ba7a05d0a7da246d08677ef21d4ff3b4756a94a9 100644 --- a/src/Workspaces/Core/Portable/Utilities/BKTree.Builder.cs +++ b/src/Workspaces/Core/Portable/Utilities/BKTree.Builder.cs @@ -1,9 +1,12 @@ using System; +using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Text; namespace Roslyn.Utilities { @@ -13,7 +16,8 @@ private class Builder { private const int CompactEdgeAllocationSize = 4; - private readonly char[][] _values; + private readonly char[] _allLowerCaseCharacters; + private readonly TextSpan[] _characterSpans; // Note: while building a BKTree we have to store children with parents, keyed by the // edit distance between the two. Naive implementations might store a list or dictionary @@ -73,21 +77,35 @@ private class Builder public Builder(IEnumerable values) { - _values = values.Select(v => v.ToLower()) - .Distinct() - .Select(v => v.ToCharArray()) - .Where(a => a.Length > 0).ToArray(); + var distinctValues = values.Where(v => v.Length > 0).Distinct(CaseInsensitiveComparison.Comparer).ToArray(); + var charCount = Enumerable.Sum(values,(Func)(v => (int)v.Length)); + + _allLowerCaseCharacters = new char[charCount]; + _characterSpans = new TextSpan[distinctValues.Length]; + + var characterIndex = 0; + for (int i = 0; i < distinctValues.Length; i++) + { + var value = distinctValues[i]; + _characterSpans[i] = new TextSpan(characterIndex, value.Length); + + foreach (var ch in value) + { + _allLowerCaseCharacters[characterIndex] = char.ToLower(ch); + characterIndex++; + } + } // We will have one node for each string value that we are adding. - _builderNodes = new BuilderNode[_values.Length]; - _compactEdges = new Edge[_values.Length * CompactEdgeAllocationSize]; + _builderNodes = new BuilderNode[distinctValues.Length]; + _compactEdges = new Edge[distinctValues.Length * CompactEdgeAllocationSize]; } internal BKTree Create() { - for (var i = 0; i < _values.Length; i++) + for (var i = 0; i < _characterSpans.Length; i++) { - Add(_values[i], insertionIndex: i); + Add(_characterSpans[i], insertionIndex: i); } var nodes = new Node[_builderNodes.Length]; @@ -98,7 +116,7 @@ internal BKTree Create() BuildArrays(nodes, edges); - return new BKTree(nodes, edges); + return new BKTree(_allLowerCaseCharacters, nodes, edges); } private void BuildArrays(Node[] nodes, Edge[] edges) @@ -109,8 +127,7 @@ private void BuildArrays(Node[] nodes, Edge[] edges) var builderNode = _builderNodes[i]; var edgeCount = builderNode.EdgeCount; - nodes[i] = new Node( - builderNode.LowerCaseCharacters, edgeCount, currentEdgeIndex); + nodes[i] = new Node(builderNode.CharacterSpan, edgeCount, currentEdgeIndex); if (edgeCount > 0) { @@ -140,11 +157,11 @@ private void BuildArrays(Node[] nodes, Edge[] edges) Debug.Assert(currentEdgeIndex == edges.Length); } - private void Add(char[] lowerCaseCharacters, int insertionIndex) + private void Add(TextSpan characterSpan, int insertionIndex) { if (insertionIndex == 0) { - _builderNodes[insertionIndex] = new BuilderNode(lowerCaseCharacters); + _builderNodes[insertionIndex] = new BuilderNode(characterSpan); return; } @@ -156,7 +173,9 @@ private void Add(char[] lowerCaseCharacters, int insertionIndex) // Determine the edit distance between these two words. Note: we do not use // a threshold here as we need the actual edit distance so we can actually // determine what edge to make or walk. - var editDistance = EditDistance.GetEditDistance(currentNode.LowerCaseCharacters, lowerCaseCharacters); + var editDistance = EditDistance.GetEditDistance( + new ArraySlice(_allLowerCaseCharacters, currentNode.CharacterSpan), + new ArraySlice(_allLowerCaseCharacters, characterSpan)); if (editDistance == 0) { @@ -174,13 +193,13 @@ private void Add(char[] lowerCaseCharacters, int insertionIndex) } // found the node we want to add the child node to. - AddChildNode(lowerCaseCharacters, insertionIndex, currentNode.EdgeCount, currentNodeIndex, editDistance); + AddChildNode(characterSpan, insertionIndex, currentNode.EdgeCount, currentNodeIndex, editDistance); return; } } private void AddChildNode( - char[] lowerCaseCharacters, int insertionIndex, int currentNodeEdgeCount, int currentNodeIndex, int editDistance) + TextSpan characterSpan, int insertionIndex, int currentNodeEdgeCount, int currentNodeIndex, int editDistance) { // Node doesn't have an edge with this edit distance. Three cases to handle: // 1) there are less than 4 edges. We simply place the edge into the correct @@ -215,7 +234,7 @@ private void Add(char[] lowerCaseCharacters, int insertionIndex) } _builderNodes[currentNodeIndex].EdgeCount++; - _builderNodes[insertionIndex] = new BuilderNode(lowerCaseCharacters); + _builderNodes[insertionIndex] = new BuilderNode(characterSpan); return; } @@ -247,13 +266,13 @@ private bool TryGetChildIndex(BuilderNode currentNode, int currentNodeIndex, int private struct BuilderNode { - public readonly char[] LowerCaseCharacters; + public readonly TextSpan CharacterSpan; public int EdgeCount; public Dictionary SpilloverEdges; - public BuilderNode(char[] lowerCaseCharacters) : this() + public BuilderNode(TextSpan characterSpan) : this() { - this.LowerCaseCharacters = lowerCaseCharacters; + this.CharacterSpan = characterSpan; } } } diff --git a/src/Workspaces/Core/Portable/Utilities/BKTree.Node.cs b/src/Workspaces/Core/Portable/Utilities/BKTree.Node.cs index c5447a097cdf67889110c861278a85e168c9bb1e..d2bbec8f31b4716b90fa1605c77a628c5ba86cfb 100644 --- a/src/Workspaces/Core/Portable/Utilities/BKTree.Node.cs +++ b/src/Workspaces/Core/Portable/Utilities/BKTree.Node.cs @@ -3,6 +3,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using Microsoft.CodeAnalysis.Text; namespace Roslyn.Utilities { @@ -12,7 +13,7 @@ private struct Node { // The string this node corresponds to. Stored in char[] format so we can easily compute // edit distances on it. - public readonly char[] LowerCaseCharacters; + public readonly TextSpan CharacterSpan; // How many children/edges this node has. public readonly int EdgeCount; @@ -21,23 +22,24 @@ private struct Node // _edges[FirstEdgeIndex, FirstEdgeIndex + EdgeCount) public readonly int FirstEdgeIndex; - public Node(char[] lowerCaseCharacters, int edgeCount, int firstEdgeIndex) + public Node(TextSpan characterSpan, int edgeCount, int firstEdgeIndex) { - LowerCaseCharacters = lowerCaseCharacters; + CharacterSpan = characterSpan; EdgeCount = edgeCount; FirstEdgeIndex = firstEdgeIndex; } internal void WriteTo(ObjectWriter writer) { - writer.WriteValue(LowerCaseCharacters); + writer.WriteInt32(CharacterSpan.Start); + writer.WriteInt32(CharacterSpan.Length); writer.WriteInt32(EdgeCount); writer.WriteInt32(FirstEdgeIndex); } internal static Node ReadFrom(ObjectReader reader) { - return new Node((char[])reader.ReadValue(), reader.ReadInt32(), reader.ReadInt32()); + return new Node(new TextSpan(reader.ReadInt32(), reader.ReadInt32()), reader.ReadInt32(), reader.ReadInt32()); } } } diff --git a/src/Workspaces/Core/Portable/Utilities/BKTree.Serialization.cs b/src/Workspaces/Core/Portable/Utilities/BKTree.Serialization.cs index c45f3f74c610b2239c653bdaa48ba6867038bc56..6a218965b333a316479d8234cb68b0a848865012 100644 --- a/src/Workspaces/Core/Portable/Utilities/BKTree.Serialization.cs +++ b/src/Workspaces/Core/Portable/Utilities/BKTree.Serialization.cs @@ -10,6 +10,8 @@ internal partial class BKTree { internal void WriteTo(ObjectWriter writer) { + writer.WriteValue(_allLowerCaseCharacters); + writer.WriteInt32(this._nodes.Length); foreach (var node in _nodes) { @@ -25,6 +27,7 @@ internal void WriteTo(ObjectWriter writer) internal static BKTree ReadFrom(ObjectReader reader) { + var allLowerCaseCharacters = (char[])reader.ReadValue(); var nodes = new Node[reader.ReadInt32()]; for (var i = 0; i < nodes.Length; i++) { @@ -37,7 +40,7 @@ internal static BKTree ReadFrom(ObjectReader reader) edges[i] = Edge.ReadFrom(reader); } - return new BKTree(nodes, edges); + return new BKTree(allLowerCaseCharacters, nodes, edges); } } } diff --git a/src/Workspaces/Core/Portable/Utilities/BKTree.cs b/src/Workspaces/Core/Portable/Utilities/BKTree.cs index ae3ceeca0a3710d92b213245626996783162712a..5623d0b8ffa45ad0c775488cba211cc31196142e 100644 --- a/src/Workspaces/Core/Portable/Utilities/BKTree.cs +++ b/src/Workspaces/Core/Portable/Utilities/BKTree.cs @@ -13,6 +13,7 @@ namespace Roslyn.Utilities internal partial class BKTree { public static readonly BKTree Empty = new BKTree( + SpecializedCollections.EmptyArray(), SpecializedCollections.EmptyArray(), SpecializedCollections.EmptyArray()); @@ -28,12 +29,13 @@ internal partial class BKTree // * of course '0' is only for the root case. All nodes state where in _edges // their child edges range starts. So the children for any node are in _edges from // [node.FirstEdgeIndex, node.FirstEdgeIndex + node.EdgeCount) - + private readonly char[] _allLowerCaseCharacters; private readonly Node[] _nodes; private readonly Edge[] _edges; - private BKTree(Node[] nodes, Edge[] edges) + private BKTree(char[] allLowerCaseCharacters, Node[] nodes, Edge[] edges) { + _allLowerCaseCharacters = allLowerCaseCharacters; _nodes = nodes; _edges = edges; } @@ -79,13 +81,15 @@ private void Lookup(Node currentNode, char[] queryCharacters, int queryLength, i // We always want to compute the real edit distance (ignoring any thresholds). This is // because we need that edit distance to appropriately determine which edges to walk // in the tree. + var characterSpan = currentNode.CharacterSpan; var editDistance = EditDistance.GetEditDistance( - new ArraySlice(currentNode.LowerCaseCharacters), new ArraySlice(queryCharacters, 0, queryLength)); + new ArraySlice(_allLowerCaseCharacters, characterSpan), + new ArraySlice(queryCharacters, 0, queryLength)); if (editDistance <= threshold) { // Found a match. - result.Add(new string(currentNode.LowerCaseCharacters)); + result.Add(new string(_allLowerCaseCharacters, characterSpan.Start, characterSpan.Length)); } var min = editDistance - threshold;