未验证 提交 0a265bb5 编写于 作者: T Tomáš Matoušek 提交者: GitHub

Optimize memory allocation of LCS algorithm (#37658)

* Cleanup

* Optimize memory allocation of LCS algorithm
上级 aba67f39
......@@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Microsoft.CodeAnalysis.PooledObjects;
namespace Microsoft.CodeAnalysis.Differencing
{
......@@ -11,48 +12,182 @@ namespace Microsoft.CodeAnalysis.Differencing
/// </summary>
internal abstract class LongestCommonSubsequence<TSequence>
{
// VArray struct enables array indexing in range [-d...d].
private struct VArray
/// <summary>
/// Underlying storage for <see cref="VArray"/>s allocated on <see cref="VStack"/>.
/// </summary>
/// <remarks>
/// The LCS algorithm allocates <see cref="VArray"/>s of sizes (3, 2*1 + 1, ..., 2*D + 1), always in this order,
/// where D is at most the sum of lengths of the compared sequences.
/// The arrays get pushed on a stack as they are built up, then all consumed in the reverse order (stack pop).
///
/// Since the exact length of each array in the above sequence is known we avoid allocating each individual array.
/// Instead we allocate a large buffer serving as a a backing storage of a contiguous sequence of arrays
/// corresponding to stack depths <see cref="MinDepth"/> to <see cref="MaxDepth"/>.
/// If more storage is needed we chain next large buffer to the previous one in a linked list.
///
/// We pool a few of these linked buffers on <see cref="VStack"/> to conserve allocations.
/// </remarks>
private sealed class VBuffer
{
/// <summary>
/// The max stack depth backed by the fist buffer.
/// Size of the buffer for 100 is ~10K.
/// For 150 it'd be 91KB, which would be allocated on LOH.
/// The buffers grow by factor of <see cref="GrowFactor"/>, so the next buffer will be allocated on LOH.
/// </summary>
private const int FirstBufferMaxDepth = 100;
// 3 + Sum { d = 1..maxDepth : 2*d+1 } = (maxDepth + 1)^2 + 2
private const int FirstBufferLength = (FirstBufferMaxDepth + 1) * (FirstBufferMaxDepth + 1) + 2;
internal const int GrowFactor = 2;
public VBuffer Previous { get; }
public VBuffer Next { get; private set; }
public readonly int MinDepth;
public readonly int MaxDepth;
private readonly int[] _array;
public VArray(int d)
public VBuffer()
{
_array = new int[2 * d + 1];
_array = new int[FirstBufferLength];
MaxDepth = FirstBufferMaxDepth;
}
public void CopyFrom(VArray otherVArray)
public VBuffer(VBuffer previous)
{
var copyDelta = Offset - otherVArray.Offset;
if (copyDelta >= 0)
Debug.Assert(previous != null);
int minDepth = previous.MaxDepth + 1;
int maxDepth = previous.MaxDepth * GrowFactor;
Debug.Assert(minDepth > 0);
Debug.Assert(minDepth <= maxDepth);
Previous = previous;
_array = new int[GetNextBufferLength(minDepth - 1, maxDepth)];
MinDepth = minDepth;
MaxDepth = maxDepth;
previous.Next = this;
}
public VArray GetVArray(int depth)
{
var length = GetVArrayLength(depth);
var start = GetVArrayStart(depth) - GetVArrayStart(MinDepth);
Debug.Assert(start >= 0);
Debug.Assert(start + length <= _array.Length);
return new VArray(_array, start, length);
}
private static int GetVArrayLength(int depth)
=> 2 * Math.Max(depth, 1) + 1;
// 3 + Sum { d = 1..depth-1 : 2*d+1 } = depth^2 + 2
private static int GetVArrayStart(int depth)
=> (depth == 0) ? 0 : depth * depth + 2;
// Sum { d = previousChunkDepth..maxDepth : 2*d+1 } = (maxDepth + 1)^2 - precedingBufferMaxDepth^2
private static int GetNextBufferLength(int precedingBufferMaxDepth, int maxDepth)
=> (maxDepth + 1) * (maxDepth + 1) - precedingBufferMaxDepth * precedingBufferMaxDepth;
}
private struct VStack
{
private static readonly ObjectPool<VBuffer> s_bufferPool = new ObjectPool<VBuffer>(() => new VBuffer());
private VBuffer _currentBuffer;
private int _depth;
public VStack(bool _)
{
_currentBuffer = s_bufferPool.Allocate();
_depth = 0;
}
public VArray Push()
{
var depth = _depth++;
if (depth > _currentBuffer.MaxDepth)
{
Array.Copy(otherVArray._array, 0, _array, copyDelta, otherVArray._array.Length);
_currentBuffer = _currentBuffer.Next ?? new VBuffer(_currentBuffer);
}
else
return _currentBuffer.GetVArray(depth);
}
public IEnumerable<(VArray Array, int Depth)> ConsumeArrays()
{
var buffer = _currentBuffer;
for (int depth = _depth - 1; depth >= 0; depth--)
{
Array.Copy(otherVArray._array, -copyDelta, _array, 0, _array.Length);
if (depth < buffer.MinDepth)
{
buffer = buffer.Previous;
}
yield return (buffer.GetVArray(depth), depth);
}
s_bufferPool.Free(_currentBuffer);
_currentBuffer = null;
}
}
public int this[int index]
// VArray struct enables array indexing in range [-d...d].
private readonly struct VArray
{
private readonly int[] _buffer;
private readonly int _start;
private readonly int _length;
public VArray(int[] buffer, int start, int length)
{
get
_buffer = buffer;
_start = start;
_length = length;
}
public void InitializeFrom(VArray other)
{
int dstCopyStart, srcCopyStart, copyLength;
var copyDelta = Offset - other.Offset;
if (copyDelta >= 0)
{
return _array[index + Offset];
srcCopyStart = 0;
dstCopyStart = copyDelta;
copyLength = other._length;
}
set
else
{
_array[index + Offset] = value;
srcCopyStart = -copyDelta;
dstCopyStart = 0;
copyLength = _length;
}
// since we might be reusing previously used arrays, we need to clear slots that are not copied over from the other array:
Array.Clear(_buffer, _start, dstCopyStart);
Array.Copy(other._buffer, other._start + srcCopyStart, _buffer, _start + dstCopyStart, copyLength);
Array.Clear(_buffer, _start + dstCopyStart + copyLength, _length - dstCopyStart - copyLength);
}
private int Offset
internal void Initialize()
{
get
{
return _array.Length / 2;
}
Array.Clear(_buffer, _start, _length);
}
public int this[int index]
{
get => _buffer[_start + index + Offset];
set => _buffer[_start + index + Offset] = value;
}
private int Offset => _length / 2;
}
protected abstract bool ItemsEqual(TSequence oldSequence, int oldIndex, TSequence newSequence, int newIndex);
......@@ -60,15 +195,19 @@ private int Offset
// TODO: Consolidate return types between GetMatchingPairs and GetEdit to avoid duplicated code (https://github.com/dotnet/roslyn/issues/16864)
protected IEnumerable<KeyValuePair<int, int>> GetMatchingPairs(TSequence oldSequence, int oldLength, TSequence newSequence, int newLength)
{
var stackOfVs = ComputeEditPaths(oldSequence, oldLength, newSequence, newLength);
VStack stack = ComputeEditPaths(oldSequence, oldLength, newSequence, newLength);
var x = oldLength;
var y = newLength;
var varrays = stack.ConsumeArrays().GetEnumerator();
while (x > 0 || y > 0)
{
var currentV = stackOfVs.Pop();
var d = stackOfVs.Count;
bool hasNext = varrays.MoveNext();
Debug.Assert(hasNext);
var (currentV, d) = varrays.Current;
var k = x - y;
// "snake" == single delete or insert followed by 0 or more diagonals
......@@ -77,7 +216,7 @@ private int Offset
var xEnd = yEnd + k;
// does the snake first go down (insert) or right(delete)?
var right = (k == d || (k != -d && currentV[k - 1] > currentV[k + 1]));
var right = k == d || (k != -d && currentV[k - 1] > currentV[k + 1]);
var kPrev = right ? k - 1 : k + 1;
// snake start point
......@@ -100,20 +239,23 @@ private int Offset
x = xStart;
y = yStart;
}
}
protected IEnumerable<SequenceEdit> GetEdits(TSequence oldSequence, int oldLength, TSequence newSequence, int newLength)
{
var stackOfVs = ComputeEditPaths(oldSequence, oldLength, newSequence, newLength);
var stack = ComputeEditPaths(oldSequence, oldLength, newSequence, newLength);
var x = oldLength;
var y = newLength;
var varrays = stack.ConsumeArrays().GetEnumerator();
while (x > 0 || y > 0)
{
var currentV = stackOfVs.Pop();
var d = stackOfVs.Count;
bool hasNext = varrays.MoveNext();
Debug.Assert(hasNext);
var (currentV, d) = varrays.Current;
var k = x - y;
// "snake" == single delete or insert followed by 0 or more diagonals
......@@ -122,7 +264,7 @@ protected IEnumerable<SequenceEdit> GetEdits(TSequence oldSequence, int oldLengt
var xEnd = yEnd + k;
// does the snake first go down (insert) or right(delete)?
var right = (k == d || (k != -d && currentV[k - 1] > currentV[k + 1]));
var right = k == d || (k != -d && currentV[k - 1] > currentV[k + 1]);
var kPrev = right ? k - 1 : k + 1;
// snake start point
......@@ -233,33 +375,33 @@ protected double ComputeDistance(TSequence oldSequence, int oldLength, TSequence
///
/// VArrays store just the y index because x can be calculated: x = y + k.
/// </remarks>
private Stack<VArray> ComputeEditPaths(TSequence oldSequence, int oldLength, TSequence newSequence, int newLength)
private VStack ComputeEditPaths(TSequence oldSequence, int oldLength, TSequence newSequence, int newLength)
{
var stackOfVs = new Stack<VArray>();
// special-case: the first "snake" to start at (-1,0 )
var previousV = new VArray(1);
VArray currentV;
var reachedEnd = false;
VArray currentV = default;
var stack = new VStack(default);
for (var d = 0; d <= oldLength + newLength && !reachedEnd; d++)
{
// V is in range [-d...d] => use d to offset the k-based array indices to non-negative values
if (d == 0)
{
currentV = previousV;
// the first "snake" to start at (-1, 0)
currentV = stack.Push();
currentV.Initialize();
}
else
{
currentV = new VArray(d);
currentV.CopyFrom(previousV);
// V is in range [-d...d] => use d to offset the k-based array indices to non-negative values
var previousV = currentV;
currentV = stack.Push();
currentV.InitializeFrom(previousV);
}
for (var k = -d; k <= d; k += 2)
{
// down or right?
var right = (k == d || (k != -d && currentV[k - 1] > currentV[k + 1]));
var right = k == d || (k != -d && currentV[k - 1] > currentV[k + 1]);
var kPrev = right ? k - 1 : k + 1;
// start point
......@@ -291,10 +433,9 @@ private Stack<VArray> ComputeEditPaths(TSequence oldSequence, int oldLength, TSe
reachedEnd = true;
}
}
stackOfVs.Push(currentV);
previousV = currentV;
}
return stackOfVs;
return stack;
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册