提交 a4d77cea 编写于 作者: P Pharring

Use a binary search to find child nodes by position in large lists

In a benchmark with 200,000 methods in a type, ChildThatContainsPosition was the clear bottleneck. For such cases, the syntax list is represented using a special type called WithLotsOfChildren (both VB and C# have this optimization) that has a constant-time implementation of GetSlotOffset. Given that, we can find the correct slot via binary search instead of a linear search.
With this change, the benchmark compilation went from 45 seconds to 25 seconds. (changeset 1412034)
上级 a5bcdd4c
......@@ -2,6 +2,7 @@
using System;
using Roslyn.Utilities;
using System.Diagnostics;
namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax
{
......@@ -39,6 +40,22 @@ public override int GetSlotOffset(int index)
return _childOffsets[index];
}
/// <summary>
/// Find the slot that contains the given offset.
/// </summary>
/// <param name="offset">The target offset. Must be between 0 and <see cref="GreenNode.FullWidth"/>.</param>
/// <returns>The slot index of the slot containing the given offset.</returns>
/// <remarks>
/// This implementation uses a binary search to find the first slot that contains
/// the given offset.
/// </remarks>
public override int FindSlotIndexContainingOffset(int offset)
{
Debug.Assert(offset >= 0 && offset < FullWidth);
int idx = Array.BinarySearch(_childOffsets, offset);
return idx >= 0 ? idx : (~idx - 1);
}
private static int[] CalculateOffsets(ArrayElement<CSharpSyntaxNode>[] children)
{
int n = children.Length;
......
......@@ -246,5 +246,37 @@ public void Extensions()
Assert.Equal(-1, list.IndexOf(SyntaxKind.WhereClause));
Assert.False(list.Any(SyntaxKind.WhereClause));
}
[Fact]
public void WithLotsOfChildrenTest()
{
var alphabet = "abcdefghijklmnopqrstuvwxyz";
var commaSeparatedList = string.Join(",", (IEnumerable<char>)alphabet);
var parsedArgumentList = SyntaxFactory.ParseArgumentList(commaSeparatedList);
Assert.Equal(alphabet.Length, parsedArgumentList.Arguments.Count);
for (int position = 0; position < parsedArgumentList.FullWidth; position++)
{
var item = ChildSyntaxList.ChildThatContainsPosition(parsedArgumentList, position);
Assert.Equal(position, item.Position);
Assert.Equal(1, item.Width);
if (position % 2 == 0)
{
// Even. We should get a node
Assert.True(item.IsNode);
Assert.True(item.IsKind(SyntaxKind.Argument));
string expectedArgName = ((char)('a' + (position / 2))).ToString();
Assert.Equal(expectedArgName, ((ArgumentSyntax)item).Expression.ToString());
}
else
{
// Odd. We should get a comma
Assert.True(item.IsToken);
Assert.True(item.IsKind(SyntaxKind.CommaToken));
int expectedTokenIndex = position + 1; // + 1 because there is a (missing) OpenParen at slot 0
Assert.Equal(expectedTokenIndex, item.AsToken().Index);
}
}
}
}
}
......@@ -173,53 +173,76 @@ internal static SyntaxNodeOrToken ChildThatContainsPosition(SyntaxNode node, int
// The targetPosition must already be within this node
Debug.Assert(node.FullSpan.Contains(targetPosition));
var red = node;
var green = node.Green;
var position = node.Position;
var idx = 0;
var index = 0;
#if DEBUG
int dbgLoopCount = 0;
#endif
do
Debug.Assert(!green.IsList);
// Find the green node that spans the target position.
// We will be skipping whole slots here so we will not loop for long
// The max possible number of slots is 11 (TypeDeclarationSyntax)
// and typically much less than that
int slot;
for (slot = 0; ; slot++)
{
GreenNode greenChild = green.GetSlot(slot);
if (greenChild != null)
{
var endPosition = position + greenChild.FullWidth;
if (targetPosition < endPosition)
{
// Descend into the child element
green = greenChild;
break;
}
position = endPosition;
index += Occupancy(greenChild);
}
}
// Realize the red node (if any)
var red = node.GetNodeSlot(slot);
if (!green.IsList)
{
// This is a single node or token.
// If it is a node, we are done.
if (red != null)
{
return red;
}
// Otherwise will have to make a token with current green and position
}
else
{
#if DEBUG
// Since we never have "lists of lists" this should never loop more than once.
Debug.Assert(dbgLoopCount < 2, "A list of lists. Impossible!");
dbgLoopCount++;
#endif
// Find the green node that spans the target position.
// We will be skipping whole slots here so we will not loop for long
// The max possible number of slots is 11 (TypeDeclarationSyntax)
// and typically much less than that
for (int slotIndex = 0; ; slotIndex++)
slot = green.FindSlotIndexContainingOffset(targetPosition - position);
// Realize the red node (if any)
if (red != null)
{
GreenNode greenChild = green.GetSlot(slotIndex);
if (greenChild != null)
// It is a red list of nodes (separated or not)
red = red.GetNodeSlot(slot);
if (red != null)
{
var endPosition = position + greenChild.FullWidth;
if (targetPosition < endPosition)
{
// Realize the red node (if any)
if (red != null)
{
red = red.GetNodeSlot(slotIndex);
}
// Descend into the child element
green = greenChild;
break;
}
position = endPosition;
idx += Occupancy(greenChild);
return red;
}
// Must be a separator
}
} while (green.IsList);
// Reached a single node or token.
// If it is a node, we are done. Otherwise, make a token with current child and position.
return red ?? new SyntaxNodeOrToken(node, green, position, idx);
// Otherwise we have a token.
position += green.GetSlotOffset(slot);
green = green.GetSlot(slot);
// Since we can't have "lists of lists", the Occupancy calculation for
// child elements in a list is simple.
index += slot;
}
// Make a token with current child and position.
return new SyntaxNodeOrToken(node, green, position, index);
}
/// <summary>
......
......@@ -155,6 +155,39 @@ protected virtual int GetSlotCount()
}
public abstract int GetSlotOffset(int index);
/// <summary>
/// Find the slot that contains the given offset.
/// </summary>
/// <param name="offset">The target offset. Must be between 0 and <see cref="FullWidth"/>.</param>
/// <returns>The slot index of the slot containing the given offset.</returns>
/// <remarks>
/// The base implementation is a linear search. This should be overridden
/// if a derived class can implement it more efficiently.
/// </remarks>
public virtual int FindSlotIndexContainingOffset(int offset)
{
Debug.Assert(0 <= offset && offset < FullWidth);
int i;
int accumulatedWidth = 0;
for (i = 0; ; i++)
{
Debug.Assert(i < SlotCount);
var child = GetSlot(i);
if (child != null)
{
accumulatedWidth += child.FullWidth;
if (offset < accumulatedWidth)
{
break;
}
}
}
return i;
}
#endregion
#region Flags
......
......@@ -455,6 +455,21 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.Syntax.InternalSyntax
Return _childOffsets(index)
End Function
''' <summary>
''' Find the slot that contains the given offset.
''' </summary>
''' <param name="offset">The target offset. Must be between 0 and <see cref="GreenNode.FullWidth"/>.</param>
''' <returns>The slot index of the slot containing the given offset.</returns>
''' <remarks>
''' This implementation uses a binary search to find the first slot that contains
''' the given offset.
''' </remarks>
Public Overrides Function FindSlotIndexContainingOffset(offset As Integer) As Integer
Debug.Assert(offset >= 0 AndAlso offset < FullWidth)
Dim idx = Array.BinarySearch(_childOffsets, offset)
Return If(idx >= 0, idx, (Not idx) - 1)
End Function
Friend Overrides Function SetDiagnostics(errors() As DiagnosticInfo) As GreenNode
Return New WithLotsOfChildren(errors, Me.GetAnnotations(), Me._children, Me._childOffsets)
End Function
......
' Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
Imports Microsoft.CodeAnalysis.VisualBasic.Syntax
Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests
Public Class SyntaxListTests
......@@ -204,5 +206,38 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests
Assert.False(list.Any(SyntaxKind.WhereClause))
End Sub
<Fact>
Public Sub WithLotsOfChildrenTest()
Dim alphabet = "abcdefghijklmnopqrstuvwxyz"
Dim commaSeparatedList = String.Join(",", DirectCast(alphabet, IEnumerable(Of Char)))
Dim parsedArgumentList = SyntaxFactory.ParseArgumentList("(" & commaSeparatedList & ")")
Assert.Equal(alphabet.Length, parsedArgumentList.Arguments.Count)
Dim openParen = ChildSyntaxList.ChildThatContainsPosition(parsedArgumentList, 0)
Assert.True(openParen.IsKind(SyntaxKind.OpenParenToken))
Assert.Equal(1, openParen.FullWidth)
' Start at 1 and stop one short to skip the open/close paren tokens
For position = 1 To parsedArgumentList.FullWidth - 2
Dim item = ChildSyntaxList.ChildThatContainsPosition(parsedArgumentList, position)
Assert.Equal(position, item.Position)
Assert.Equal(1, item.FullWidth)
If position Mod 2 = 1 Then
' Odd. We should get a node
Assert.True(item.IsNode)
Assert.True(item.IsKind(SyntaxKind.SimpleArgument))
Dim expectedArgName As String = ChrW(AscW("a") + (position \ 2)).ToString()
Assert.Equal(expectedArgName, CType(item, SimpleArgumentSyntax).Expression.ToString())
Else
' Even. We should get a comma
Assert.True(item.IsToken)
Assert.True(item.IsKind(SyntaxKind.CommaToken))
Assert.Equal(position, item.AsToken.Index)
End If
Next
End Sub
End Class
End Namespace
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册