未验证 提交 8ed8517d 编写于 作者: K keymoon 提交者: GitHub

[resubmit] BigInteger parsing optimization for large decimal string (#55121)

* implement divide-and-conquer method for parsing digits

* fix argument order in Assert when x equals to 0

* Apply format fix
Co-authored-by: NStephen Toub <stoub@microsoft.com>

* add test for non-naive algorithm

* add description for naiveThreshold

* fix trivial part

* add check for boundary condition

* add assertions and descriptions

* change variable name

* remove inappropreate use of var

* to use ArrayPool<int>.Shared.Rent for newBuffer allocation

* move both algorithms to separate methods

* add and fix comments

* trivial fix
Co-authored-by: NStephen Toub <stoub@microsoft.com>
上级 de5582b3
......@@ -489,23 +489,57 @@ private static bool HexNumberToBigInteger(ref BigNumberBuffer number, out BigInt
}
}
//
// This threshold is for choosing the algorithm to use based on the number of digits.
//
// Let N be the number of digits. If N is less than or equal to the bound, use a naive
// algorithm with a running time of O(N^2). And if it is greater than the threshold, use
// a divide-and-conquer algorithm with a running time of O(NlogN).
//
private static int s_naiveThreshold = 20000;
private static bool NumberToBigInteger(ref BigNumberBuffer number, out BigInteger result)
{
Span<uint> stackBuffer = stackalloc uint[BigIntegerCalculator.StackAllocThreshold];
Span<uint> currentBuffer = stackBuffer;
int currentBufferSize = 0;
int[]? arrayFromPool = null;
uint partialValue = 0;
int partialDigitCount = 0;
int totalDigitCount = 0;
int numberScale = number.scale;
const int MaxPartialDigits = 9;
const uint TenPowMaxPartial = 1000000000;
int[]? arrayFromPoolForResultBuffer = null;
if (numberScale < 0)
{
result = default;
return false;
}
try
{
if (number.digits.Length <= s_naiveThreshold)
{
return Naive(ref number, out result);
}
else
{
return DivideAndConquer(ref number, out result);
}
}
finally
{
if (arrayFromPoolForResultBuffer != null)
{
ArrayPool<int>.Shared.Return(arrayFromPoolForResultBuffer);
}
}
bool Naive(ref BigNumberBuffer number, out BigInteger result)
{
Span<uint> stackBuffer = stackalloc uint[BigIntegerCalculator.StackAllocThreshold];
Span<uint> currentBuffer = stackBuffer;
uint partialValue = 0;
int partialDigitCount = 0;
foreach (ReadOnlyMemory<char> digitsChunk in number.digits.GetChunks())
{
if (!ProcessChunk(digitsChunk.Span, ref currentBuffer))
......@@ -520,6 +554,231 @@ private static bool NumberToBigInteger(ref BigNumberBuffer number, out BigIntege
MultiplyAdd(ref currentBuffer, s_uint32PowersOfTen[partialDigitCount], partialValue);
}
result = NumberBufferToBigInteger(currentBuffer, number.sign);
return true;
bool ProcessChunk(ReadOnlySpan<char> chunkDigits, ref Span<uint> currentBuffer)
{
int remainingIntDigitCount = Math.Max(numberScale - totalDigitCount, 0);
ReadOnlySpan<char> intDigitsSpan = chunkDigits.Slice(0, Math.Min(remainingIntDigitCount, chunkDigits.Length));
bool endReached = false;
// Storing these captured variables in locals for faster access in the loop.
uint _partialValue = partialValue;
int _partialDigitCount = partialDigitCount;
int _totalDigitCount = totalDigitCount;
for (int i = 0; i < intDigitsSpan.Length; i++)
{
char digitChar = chunkDigits[i];
if (digitChar == '\0')
{
endReached = true;
break;
}
_partialValue = _partialValue * 10 + (uint)(digitChar - '0');
_partialDigitCount++;
_totalDigitCount++;
// Update the buffer when enough partial digits have been accumulated.
if (_partialDigitCount == MaxPartialDigits)
{
MultiplyAdd(ref currentBuffer, TenPowMaxPartial, _partialValue);
_partialValue = 0;
_partialDigitCount = 0;
}
}
// Check for nonzero digits after the decimal point.
if (!endReached)
{
ReadOnlySpan<char> fracDigitsSpan = chunkDigits.Slice(intDigitsSpan.Length);
for (int i = 0; i < fracDigitsSpan.Length; i++)
{
char digitChar = fracDigitsSpan[i];
if (digitChar == '\0')
{
break;
}
if (digitChar != '0')
{
return false;
}
}
}
partialValue = _partialValue;
partialDigitCount = _partialDigitCount;
totalDigitCount = _totalDigitCount;
return true;
}
}
bool DivideAndConquer(ref BigNumberBuffer number, out BigInteger result)
{
Span<uint> currentBuffer;
int[]? arrayFromPoolForMultiplier = null;
try
{
totalDigitCount = Math.Min(number.digits.Length - 1, numberScale);
int bufferSize = (totalDigitCount + MaxPartialDigits - 1) / MaxPartialDigits;
Span<uint> buffer = new uint[bufferSize];
arrayFromPoolForResultBuffer = ArrayPool<int>.Shared.Rent(bufferSize);
Span<uint> newBuffer = MemoryMarshal.Cast<int, uint>(arrayFromPoolForResultBuffer).Slice(0, bufferSize);
newBuffer.Clear();
// Separate every MaxPartialDigits digits and store them in the buffer.
// Buffers are treated as little-endian. That means, the array { 234567890, 1 }
// represents the number 1234567890.
int bufferIndex = bufferSize - 1;
uint currentBlock = 0;
int shiftUntil = (totalDigitCount - 1) % MaxPartialDigits;
int remainingIntDigitCount = totalDigitCount;
foreach (ReadOnlyMemory<char> digitsChunk in number.digits.GetChunks())
{
ReadOnlySpan<char> digitsChunkSpan = digitsChunk.Span;
ReadOnlySpan<char> intDigitsSpan = digitsChunkSpan.Slice(0, Math.Min(remainingIntDigitCount, digitsChunkSpan.Length));
for (int i = 0; i < intDigitsSpan.Length; i++)
{
char digitChar = intDigitsSpan[i];
Debug.Assert(char.IsDigit(digitChar));
currentBlock *= 10;
currentBlock += unchecked((uint)(digitChar - '0'));
if (shiftUntil == 0)
{
buffer[bufferIndex] = currentBlock;
currentBlock = 0;
bufferIndex--;
shiftUntil = MaxPartialDigits;
}
shiftUntil--;
}
remainingIntDigitCount -= intDigitsSpan.Length;
Debug.Assert(0 <= remainingIntDigitCount);
ReadOnlySpan<char> fracDigitsSpan = digitsChunkSpan.Slice(intDigitsSpan.Length);
for (int i = 0; i < fracDigitsSpan.Length; i++)
{
char digitChar = fracDigitsSpan[i];
if (digitChar == '\0')
{
break;
}
if (digitChar != '0')
{
result = default;
return false;
}
}
}
Debug.Assert(currentBlock == 0);
Debug.Assert(bufferIndex == -1);
int blockSize = 1;
arrayFromPoolForMultiplier = ArrayPool<int>.Shared.Rent(blockSize);
Span<uint> multiplier = MemoryMarshal.Cast<int, uint>(arrayFromPoolForMultiplier).Slice(0, blockSize);
multiplier[0] = TenPowMaxPartial;
// This loop is executed ceil(log_2(bufferSize)) times.
while (true)
{
// merge each block pairs.
// When buffer represents:
// | A | B | C | D |
// Make newBuffer like:
// | A + B * multiplier | C + D * multiplier |
for (int i = 0; i < bufferSize; i += blockSize * 2)
{
Span<uint> curBufffer = buffer.Slice(i);
Span<uint> curNewBuffer = newBuffer.Slice(i);
int len = Math.Min(bufferSize - i, blockSize * 2);
int lowerLen = Math.Min(len, blockSize);
int upperLen = len - lowerLen;
if (upperLen != 0)
{
Debug.Assert(blockSize == lowerLen);
Debug.Assert(blockSize == multiplier.Length);
Debug.Assert(multiplier.Length == lowerLen);
BigIntegerCalculator.Multiply(multiplier, curBufffer.Slice(blockSize, upperLen), curNewBuffer.Slice(0, len));
}
long carry = 0;
int j = 0;
for (; j < lowerLen; j++)
{
long digit = (curBufffer[j] + carry) + curNewBuffer[j];
curNewBuffer[j] = unchecked((uint)digit);
carry = digit >> 32;
}
if (carry != 0)
{
while (true)
{
curNewBuffer[j]++;
if (curNewBuffer[j] != 0)
{
break;
}
j++;
}
}
}
Span<uint> tmp = buffer;
buffer = newBuffer;
newBuffer = tmp;
blockSize *= 2;
if (bufferSize <= blockSize)
{
break;
}
newBuffer.Clear();
int[]? arrayToReturn = arrayFromPoolForMultiplier;
arrayFromPoolForMultiplier = ArrayPool<int>.Shared.Rent(blockSize);
Span<uint> newMultiplier = MemoryMarshal.Cast<int, uint>(arrayFromPoolForMultiplier).Slice(0, blockSize);
newMultiplier.Clear();
BigIntegerCalculator.Square(multiplier, newMultiplier);
multiplier = newMultiplier;
if (arrayToReturn is not null)
{
ArrayPool<int>.Shared.Return(arrayToReturn);
}
}
// shrink buffer to the currently used portion.
// First, calculate the rough size of the buffer from the ratio that the number
// of digits follows. Then, shrink the size until there is no more space left.
// The Ratio is calculated as: log_{2^32}(10^9)
const double digitRatio = 0.934292276687070661;
currentBufferSize = Math.Min((int)(bufferSize * digitRatio) + 1, bufferSize);
Debug.Assert(buffer.Length == currentBufferSize || buffer[currentBufferSize] == 0);
while (0 < currentBufferSize && buffer[currentBufferSize - 1] == 0)
{
currentBufferSize--;
}
currentBuffer = buffer.Slice(0, currentBufferSize);
result = NumberBufferToBigInteger(currentBuffer, number.sign);
}
finally
{
if (arrayFromPoolForMultiplier != null)
{
ArrayPool<int>.Shared.Return(arrayFromPoolForMultiplier);
}
}
return true;
}
BigInteger NumberBufferToBigInteger(Span<uint> currentBuffer, bool signa)
{
int trailingZeroCount = numberScale - totalDigitCount;
while (trailingZeroCount >= MaxPartialDigits)
......@@ -543,85 +802,19 @@ private static bool NumberToBigInteger(ref BigNumberBuffer number, out BigIntege
}
else if (currentBufferSize == 1 && currentBuffer[0] <= int.MaxValue)
{
sign = (int)(number.sign ? -currentBuffer[0] : currentBuffer[0]);
sign = (int)(signa ? -currentBuffer[0] : currentBuffer[0]);
bits = null;
}
else
{
sign = number.sign ? -1 : 1;
sign = signa ? -1 : 1;
bits = currentBuffer.Slice(0, currentBufferSize).ToArray();
}
result = new BigInteger(sign, bits);
return true;
}
finally
{
if (arrayFromPool != null)
{
ArrayPool<int>.Shared.Return(arrayFromPool);
}
}
bool ProcessChunk(ReadOnlySpan<char> chunkDigits, ref Span<uint> currentBuffer)
{
int remainingIntDigitCount = Math.Max(numberScale - totalDigitCount, 0);
ReadOnlySpan<char> intDigitsSpan = chunkDigits.Slice(0, Math.Min(remainingIntDigitCount, chunkDigits.Length));
bool endReached = false;
// Storing these captured variables in locals for faster access in the loop.
uint _partialValue = partialValue;
int _partialDigitCount = partialDigitCount;
int _totalDigitCount = totalDigitCount;
for (int i = 0; i < intDigitsSpan.Length; i++)
{
char digitChar = chunkDigits[i];
if (digitChar == '\0')
{
endReached = true;
break;
}
_partialValue = _partialValue * 10 + (uint)(digitChar - '0');
_partialDigitCount++;
_totalDigitCount++;
// Update the buffer when enough partial digits have been accumulated.
if (_partialDigitCount == MaxPartialDigits)
{
MultiplyAdd(ref currentBuffer, TenPowMaxPartial, _partialValue);
_partialValue = 0;
_partialDigitCount = 0;
}
}
// Check for nonzero digits after the decimal point.
if (!endReached)
{
ReadOnlySpan<char> fracDigitsSpan = chunkDigits.Slice(intDigitsSpan.Length);
for (int i = 0; i < fracDigitsSpan.Length; i++)
{
char digitChar = fracDigitsSpan[i];
if (digitChar == '\0')
{
break;
}
if (digitChar != '0')
{
return false;
}
}
}
partialValue = _partialValue;
partialDigitCount = _partialDigitCount;
totalDigitCount = _totalDigitCount;
return true;
return new BigInteger(sign, bits);
}
// This function should only be used for result buffer.
void MultiplyAdd(ref Span<uint> currentBuffer, uint multiplier, uint addValue)
{
Span<uint> curBits = currentBuffer.Slice(0, currentBufferSize);
......@@ -641,10 +834,10 @@ void MultiplyAdd(ref Span<uint> currentBuffer, uint multiplier, uint addValue)
if (currentBufferSize == currentBuffer.Length)
{
int[]? arrayToReturn = arrayFromPool;
int[]? arrayToReturn = arrayFromPoolForResultBuffer;
arrayFromPool = ArrayPool<int>.Shared.Rent(checked(currentBufferSize * 2));
Span<uint> newBuffer = MemoryMarshal.Cast<int, uint>(arrayFromPool);
arrayFromPoolForResultBuffer = ArrayPool<int>.Shared.Rent(checked(currentBufferSize * 2));
Span<uint> newBuffer = MemoryMarshal.Cast<int, uint>(arrayFromPoolForResultBuffer);
currentBuffer.CopyTo(newBuffer);
currentBuffer = newBuffer;
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.Numerics;
using System.Reflection;
namespace BigNumberTools
{
public class Utils
{
private static TypeInfo InternalNumber
{
get
{
if (s_lazyInternalNumber == null)
{
Type t = typeof(BigInteger).Assembly.GetType("System.Numerics.BigNumber");
if (t != null)
{
s_lazyInternalNumber = t.GetTypeInfo();
}
}
return s_lazyInternalNumber;
}
}
private static volatile TypeInfo s_lazyInternalNumber;
public static void RunWithFakeThreshold(string name, int value, Action action)
{
TypeInfo internalNumber = InternalNumber;
if (internalNumber == null)
return; // Internal frame types are not reflectable on AoT platforms. Skip the test.
FieldInfo field = internalNumber.GetDeclaredField(name);
int lastValue = (int)field.GetValue(null);
field.SetValue(null, value);
try
{
action();
}
finally
{
field.SetValue(null, lastValue);
}
}
}
}
......@@ -36,49 +36,54 @@ public static IEnumerable<object[]> Cultures
[OuterLoop]
public static void RunParseToStringTests(CultureInfo culture)
{
byte[] tempByteArray1 = new byte[0];
using (new ThreadCultureChange(culture))
{
//default style
VerifyDefaultParse(s_random);
//single NumberStyles
VerifyNumberStyles(NumberStyles.None, s_random);
VerifyNumberStyles(NumberStyles.AllowLeadingWhite, s_random);
VerifyNumberStyles(NumberStyles.AllowTrailingWhite, s_random);
VerifyNumberStyles(NumberStyles.AllowLeadingSign, s_random);
VerifyNumberStyles(NumberStyles.AllowTrailingSign, s_random);
VerifyNumberStyles(NumberStyles.AllowParentheses, s_random);
VerifyNumberStyles(NumberStyles.AllowDecimalPoint, s_random);
VerifyNumberStyles(NumberStyles.AllowThousands, s_random);
VerifyNumberStyles(NumberStyles.AllowExponent, s_random);
VerifyNumberStyles(NumberStyles.AllowCurrencySymbol, s_random);
VerifyNumberStyles(NumberStyles.AllowHexSpecifier, s_random);
//composite NumberStyles
VerifyNumberStyles(NumberStyles.Integer, s_random);
VerifyNumberStyles(NumberStyles.HexNumber, s_random);
VerifyNumberStyles(NumberStyles.Number, s_random);
VerifyNumberStyles(NumberStyles.Float, s_random);
VerifyNumberStyles(NumberStyles.Currency, s_random);
VerifyNumberStyles(NumberStyles.Any, s_random);
//invalid number style
// ******InvalidNumberStyles
NumberStyles invalid = (NumberStyles)0x7c00;
AssertExtensions.Throws<ArgumentException>("style", () =>
{
BigInteger.Parse("1", invalid).ToString("d");
});
AssertExtensions.Throws<ArgumentException>("style", () =>
Test();
BigNumberTools.Utils.RunWithFakeThreshold("s_naiveThreshold", 0, Test);
void Test()
{
byte[] tempByteArray1 = new byte[0];
using (new ThreadCultureChange(culture))
{
BigInteger junk;
BigInteger.TryParse("1", invalid, null, out junk);
Assert.Equal("1", junk.ToString("d"));
});
//default style
VerifyDefaultParse(s_random);
//single NumberStyles
VerifyNumberStyles(NumberStyles.None, s_random);
VerifyNumberStyles(NumberStyles.AllowLeadingWhite, s_random);
VerifyNumberStyles(NumberStyles.AllowTrailingWhite, s_random);
VerifyNumberStyles(NumberStyles.AllowLeadingSign, s_random);
VerifyNumberStyles(NumberStyles.AllowTrailingSign, s_random);
VerifyNumberStyles(NumberStyles.AllowParentheses, s_random);
VerifyNumberStyles(NumberStyles.AllowDecimalPoint, s_random);
VerifyNumberStyles(NumberStyles.AllowThousands, s_random);
VerifyNumberStyles(NumberStyles.AllowExponent, s_random);
VerifyNumberStyles(NumberStyles.AllowCurrencySymbol, s_random);
VerifyNumberStyles(NumberStyles.AllowHexSpecifier, s_random);
//composite NumberStyles
VerifyNumberStyles(NumberStyles.Integer, s_random);
VerifyNumberStyles(NumberStyles.HexNumber, s_random);
VerifyNumberStyles(NumberStyles.Number, s_random);
VerifyNumberStyles(NumberStyles.Float, s_random);
VerifyNumberStyles(NumberStyles.Currency, s_random);
VerifyNumberStyles(NumberStyles.Any, s_random);
//invalid number style
// ******InvalidNumberStyles
NumberStyles invalid = (NumberStyles)0x7c00;
AssertExtensions.Throws<ArgumentException>("style", () =>
{
BigInteger.Parse("1", invalid).ToString("d");
});
AssertExtensions.Throws<ArgumentException>("style", () =>
{
BigInteger junk;
BigInteger.TryParse("1", invalid, null, out junk);
Assert.Equal("1", junk.ToString("d"));
});
//FormatProvider tests
RunFormatProviderParseStrings();
//FormatProvider tests
RunFormatProviderParseStrings();
}
}
}
......@@ -93,16 +98,26 @@ public static void RunParseToStringTests(CultureInfo culture)
[InlineData("123456789\0", 0, 10, "123456789")]
public void Parse_Subspan_Success(string input, int offset, int length, string expected)
{
Eval(BigInteger.Parse(input.AsSpan(offset, length)), expected);
Assert.True(BigInteger.TryParse(input.AsSpan(offset, length), out BigInteger test));
Eval(test, expected);
Test();
BigNumberTools.Utils.RunWithFakeThreshold("s_naiveThreshold", 0, Test);
void Test()
{
Eval(BigInteger.Parse(input.AsSpan(offset, length)), expected);
Assert.True(BigInteger.TryParse(input.AsSpan(offset, length), out BigInteger test));
Eval(test, expected);
}
}
[Fact]
public void Parse_EmptySubspan_Fails()
{
Assert.False(BigInteger.TryParse("12345".AsSpan(0, 0), out BigInteger result));
Assert.Equal(0, result);
Test();
BigNumberTools.Utils.RunWithFakeThreshold("s_naiveThreshold", 0, Test);
void Test()
{
Assert.False(BigInteger.TryParse("12345".AsSpan(0, 0), out BigInteger result));
Assert.Equal(0, result);
}
}
[Fact]
......@@ -879,9 +894,10 @@ private static void Eval(BigInteger x, string expected)
x = -x;
}
string actual;
if (x == 0)
{
Assert.Equal("0", expected);
actual = "0";
}
else
{
......@@ -892,10 +908,9 @@ private static void Eval(BigInteger x, string expected)
x = x / 10;
}
number.Reverse();
string actual = new string(number.ToArray());
Assert.Equal(expected, actual);
actual = new string(number.ToArray());
}
Assert.Equal(expected, actual);
}
}
}
......@@ -10,6 +10,7 @@
<Compile Include="BigInteger\BigIntegerToStringTests.cs" />
<Compile Include="BigInteger\BigInteger.AddTests.cs" />
<Compile Include="BigInteger\BigInteger.SubtractTests.cs" />
<Compile Include="BigInteger\BigNumberTools.cs" />
<Compile Include="BigInteger\BigIntTools.cs" />
<Compile Include="BigInteger\cast_from.cs" />
<Compile Include="BigInteger\cast_to.cs" />
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册