提交 0382e3e3 编写于 作者: H Heejae Chang

Merge pull request #3247 from heejaechang/formatperf

make formatting performance better

...

this performance improvement is particularly for devdiv bug # 1089540

this makes the file in the bug to be formatted in several seconds compared to several minutes on my machine.

there were several issues. each one fixed by

#1, use concurrency on gathering operations.
#2, don't use too much time to split work to chunks if that requires more work than actually formatting.
#3, don't blindly set beginning of a file as inseparable start point for certain formatting options.

...

but these don't actually address the most impactful root cause of this perf issues. which is perf issue of GetPrevious/GetNextToken API in compiler.
(#3244)

formatter internally uses GetDescendantTokens to get all tokens at once and cache them which takes less than 1 second for the entire file (2M bytes size) in the bug. and use the cache internally.

but certain part of formatter (Rule Provider) can't use that internal cache, so it has to use the GetPrevious/GetNextToken to move around tokens, which in this particular bug, takes more than 40 seconds on my machine. and that is not even for entire file. (less than 1/12 of tokens)

I opened a bug to compiler team, hopely so that we can get better perf on those APIs.

in this PR, I mitigated the issue either by making more things to run concurrently or by changing logic which requires those APIs.
......@@ -198,7 +198,7 @@ public void AddIndentBlockOperation(IndentBlockOperation operation)
// relative indentation case where indentation depends on other token
if (operation.IsRelativeIndentation)
{
var inseparableRegionStartingPosition = operation.Option.IsOn(IndentBlockOption.RelativeToFirstTokenOnBaseTokenLine) ? 0 : operation.BaseToken.FullSpan.Start;
var inseparableRegionStartingPosition = operation.Option.IsOn(IndentBlockOption.RelativeToFirstTokenOnBaseTokenLine) ? _tokenStream.FirstTokenOfBaseTokenLine(operation.BaseToken).FullSpan.Start : operation.BaseToken.FullSpan.Start;
var relativeIndentationGetter = new Lazy<int>(() =>
{
var indentationDelta = operation.IndentationDeltaOrPosition * this.OptionSet.GetOption(FormattingOptions.IndentationSize, _language);
......@@ -409,25 +409,37 @@ public IEnumerable<IndentBlockOperation> GetAllRelativeIndentBlockOperations()
return _relativeIndentationTree.GetIntersectingInOrderIntervals(this.TreeData.StartPosition, this.TreeData.EndPosition, this).Select(i => i.Operation);
}
public SyntaxToken GetEndTokenForRelativeIndentationSpan(SyntaxToken token)
public bool TryGetEndTokenForRelativeIndentationSpan(SyntaxToken token, int maxChainDepth, out SyntaxToken endToken, CancellationToken cancellationToken)
{
var span = token.Span;
var indentationData = _relativeIndentationTree.GetSmallestContainingInterval(span.Start, 0);
if (indentationData == null)
{
// this means the given token is not inside of inseparable regions
return token;
}
endToken = default(SyntaxToken);
// recursively find the end token outside of inseparable regions
var nextToken = indentationData.EndToken.GetNextToken(includeZeroWidth: true);
if (nextToken.RawKind == 0)
var depth = 0;
while (true)
{
// reached end of tree
return default(SyntaxToken);
}
cancellationToken.ThrowIfCancellationRequested();
if (depth++ > maxChainDepth)
{
return false;
}
var span = token.Span;
var indentationData = _relativeIndentationTree.GetSmallestContainingInterval(span.Start, 0);
if (indentationData == null)
{
// this means the given token is not inside of inseparable regions
endToken = token;
return true;
}
return GetEndTokenForRelativeIndentationSpan(nextToken);
// recursively find the end token outside of inseparable regions
token = indentationData.EndToken.GetNextToken(includeZeroWidth: true);
if (token.RawKind == 0)
{
// reached end of tree
return true;
}
}
}
private AnchorData GetAnchorData(SyntaxToken token)
......
......@@ -2,7 +2,8 @@
using System;
using System.Collections.Generic;
using Microsoft.CodeAnalysis.Text;
using System.Threading;
using Microsoft.CodeAnalysis.Internal.Log;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.Formatting
......@@ -28,60 +29,88 @@ public Partitioner(FormattingContext context, TokenStream tokenStream, TokenPair
_operationPairs = operationPairs;
}
public List<IEnumerable<TokenPairWithOperations>> GetPartitions(int partitionCount)
public List<IEnumerable<TokenPairWithOperations>> GetPartitions(int partitionCount, CancellationToken cancellationToken)
{
Contract.ThrowIfFalse(partitionCount > 0);
var list = new List<IEnumerable<TokenPairWithOperations>>();
// too small items in the list. give out one list
int perPartition = _operationPairs.Length / partitionCount;
if (perPartition < 10 || partitionCount <= 1 || _operationPairs.Length < MinimumItemsPerPartition)
using (Logger.LogBlock(FunctionId.Formatting_Partitions, cancellationToken))
{
list.Add(GetOperationPairsFromTo(0, _operationPairs.Length));
return list;
}
Contract.ThrowIfFalse(partitionCount > 0);
// split items up to the partition count with about same number of items if possible
// this algorithm has one problem. if there is an operation that marked whole tree
// as inseparable region, then it wouldn't go into the inseparable regions to find
// local parts that could run concurrently; which means everything will run
// synchronously.
var currentOperationIndex = 0;
while (currentOperationIndex < _operationPairs.Length)
{
var nextPartitionStartOperationIndex = Math.Min(currentOperationIndex + perPartition, _operationPairs.Length);
if (nextPartitionStartOperationIndex >= _operationPairs.Length)
var list = new List<IEnumerable<TokenPairWithOperations>>();
// too small items in the list. give out one list
int perPartition = _operationPairs.Length / partitionCount;
if (perPartition < 10 || partitionCount <= 1 || _operationPairs.Length < MinimumItemsPerPartition)
{
// reached end of operation pairs
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
list.Add(GetOperationPairsFromTo(0, _operationPairs.Length));
return list;
}
var nextToken = _context.GetEndTokenForRelativeIndentationSpan(_operationPairs[nextPartitionStartOperationIndex].Token1);
if (nextToken.RawKind == 0)
// split items up to the partition count with about same number of items if possible
// this algorithm has one problem. if there is an operation that marked whole tree
// as inseparable region, then it wouldn't go into the inseparable regions to find
// local parts that could run concurrently; which means everything will run
// synchronously.
var currentOperationIndex = 0;
while (currentOperationIndex < _operationPairs.Length)
{
// reached the last token in the tree
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
int nextPartitionStartOperationIndex;
if (!TryGetNextPartitionIndex(currentOperationIndex, perPartition, out nextPartitionStartOperationIndex))
{
// reached end of operation pairs
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
}
var nextToken = GetNextPartitionToken(nextPartitionStartOperationIndex, perPartition, cancellationToken);
if (nextToken.RawKind == 0)
{
// reached the last token in the tree
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
}
var nextTokenWithIndex = _tokenStream.GetTokenData(nextToken);
if (nextTokenWithIndex.IndexInStream < 0)
{
// first token for next partition is out side of valid token stream
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
}
Contract.ThrowIfFalse(currentOperationIndex < nextTokenWithIndex.IndexInStream);
Contract.ThrowIfFalse(nextTokenWithIndex.IndexInStream <= _operationPairs.Length);
list.Add(GetOperationPairsFromTo(currentOperationIndex, nextTokenWithIndex.IndexInStream));
currentOperationIndex = nextTokenWithIndex.IndexInStream;
}
var nextTokenWithIndex = _tokenStream.GetTokenData(nextToken);
if (nextTokenWithIndex.IndexInStream < 0)
return list;
}
}
private SyntaxToken GetNextPartitionToken(int index, int perPartition, CancellationToken cancellationToken)
{
while (true)
{
SyntaxToken nextToken;
if (_context.TryGetEndTokenForRelativeIndentationSpan(_operationPairs[index].Token1, 10, out nextToken, cancellationToken))
{
// first token for next partition is out side of valid token stream
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
return nextToken;
}
Contract.ThrowIfFalse(currentOperationIndex < nextTokenWithIndex.IndexInStream);
Contract.ThrowIfFalse(nextTokenWithIndex.IndexInStream <= _operationPairs.Length);
list.Add(GetOperationPairsFromTo(currentOperationIndex, nextTokenWithIndex.IndexInStream));
currentOperationIndex = nextTokenWithIndex.IndexInStream;
// we couldn't determine how to split chunks in short time period. make partition bigger
if (!TryGetNextPartitionIndex(index, perPartition, out index))
{
// reached end of operation pairs
return default(SyntaxToken);
}
}
}
return list;
private bool TryGetNextPartitionIndex(int index, int perPartition, out int nextIndex)
{
nextIndex = Math.Min(index + perPartition, _operationPairs.Length);
return nextIndex < _operationPairs.Length;
}
private IEnumerable<TokenPairWithOperations> GetOperationPairsFromTo(int from, int to)
......
......@@ -211,26 +211,34 @@ protected virtual NodeOperations CreateNodeOperationTasks(CancellationToken canc
private List<T> AddOperations<T>(List<SyntaxNode> nodes, Action<List<T>, SyntaxNode> addOperations, CancellationToken cancellationToken)
{
var operations = new List<T>();
var list = new List<T>();
for (int i = 0; i < nodes.Count; i++)
using (var localOperations = new ThreadLocal<List<T>>(() => new List<T>(), trackAllValues: true))
using (var localList = new ThreadLocal<List<T>>(() => new List<T>(), trackAllValues: false))
{
cancellationToken.ThrowIfCancellationRequested();
addOperations(list, nodes[i]);
foreach (var element in list)
// find out which executor we want to use.
var taskExecutor = nodes.Count > (1000 * Environment.ProcessorCount) ? TaskExecutor.Concurrent : TaskExecutor.Synchronous;
taskExecutor.ForEach(nodes, n =>
{
if (element != null)
cancellationToken.ThrowIfCancellationRequested();
var list = localList.Value;
addOperations(list, n);
foreach (var element in list)
{
operations.Add(element);
if (element != null)
{
localOperations.Value.Add(element);
}
}
}
list.Clear();
}
list.Clear();
}, cancellationToken);
return operations;
var operations = new List<T>(localOperations.Values.Sum(v => v.Count));
operations.AddRange(localOperations.Values.SelectMany(v => v));
return operations;
}
}
private Task<TokenPairWithOperations[]> CreateTokenOperationTask(
......@@ -441,7 +449,7 @@ private SyntaxToken FindCorrectBaseTokenOfRelativeIndentBlockOperation(IndentBlo
var partitioner = new Partitioner(context, tokenStream, tokenOperations);
// always create task 1 more than current processor count
var partitions = partitioner.GetPartitions(this.TaskExecutor == TaskExecutor.Synchronous ? 1 : Environment.ProcessorCount + 1);
var partitions = partitioner.GetPartitions(this.TaskExecutor == TaskExecutor.Synchronous ? 1 : Environment.ProcessorCount + 1, cancellationToken);
var tasks = new Task[partitions.Count];
for (int i = 0; i < partitions.Count; i++)
......
......@@ -118,6 +118,7 @@ internal enum FunctionId
Formatting_AggregateCreateFormattedRoot,
Formatting_CreateTextChanges,
Formatting_CreateFormattedRoot,
Formatting_Partitions,
SmartIndentation_Start,
SmartIndentation_OpenCurly,
......@@ -303,6 +304,6 @@ internal enum FunctionId
VirtualMemory_MemoryLow,
Extension_Exception,
WorkCoordinator_WaitForHigherPriorityOperationsAsync,
WorkCoordinator_WaitForHigherPriorityOperationsAsync
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册