未验证 提交 b4e258a3 编写于 作者: J Jesper Meyer 提交者: GitHub

Improve vectorization of String.Split (#64899)

上级 7508080e
......@@ -44,9 +44,24 @@ public int Length
public void Append(T item)
{
int pos = _pos;
if (pos >= _span.Length)
Grow();
if ((uint)pos < (uint)_span.Length)
{
_span[pos] = item;
_pos = pos + 1;
}
else
{
AddWithResize(item);
}
}
// Hide uncommon path
[MethodImpl(MethodImplOptions.NoInlining)]
private void AddWithResize(T item)
{
Debug.Assert(_pos == _span.Length);
int pos = _pos;
Grow();
_span[pos] = item;
_pos = pos + 1;
}
......
......@@ -1615,8 +1615,7 @@ private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuild
sep0 = separators[0];
sep1 = separators.Length > 1 ? separators[1] : sep0;
sep2 = separators.Length > 2 ? separators[2] : sep1;
if (Length >= 16 && Sse41.IsSupported)
if (Vector128.IsHardwareAccelerated && Length >= Vector128<ushort>.Count * 2)
{
MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2);
return;
......@@ -1659,75 +1658,54 @@ private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuild
private void MakeSeparatorListVectorized(ref ValueListBuilder<int> sepListBuilder, char c, char c2, char c3)
{
// Redundant test so we won't prejit remainder of this method
// on platforms without SSE.
if (!Sse41.IsSupported)
// on platforms where it is not supported
if (!Vector128.IsHardwareAccelerated)
{
throw new PlatformNotSupportedException();
}
// Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0)
Vector128<byte> shuffleConstant = Vector128.Create(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
Debug.Assert(Length >= Vector128<ushort>.Count);
nuint offset = 0;
nuint lengthToExamine = (nuint)(uint)Length;
ref ushort source = ref Unsafe.As<char, ushort>(ref _firstChar);
Vector128<ushort> v1 = Vector128.Create((ushort)c);
Vector128<ushort> v2 = Vector128.Create((ushort)c2);
Vector128<ushort> v3 = Vector128.Create((ushort)c3);
ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan());
int cond = Length & -Vector128<ushort>.Count;
int i = 0;
for (; i < cond; i += Vector128<ushort>.Count)
do
{
Vector128<ushort> charVector = ReadVector(ref c0, i);
Vector128<ushort> cmp = Sse2.CompareEqual(charVector, v1);
cmp = Sse2.Or(Sse2.CompareEqual(charVector, v2), cmp);
cmp = Sse2.Or(Sse2.CompareEqual(charVector, v3), cmp);
Vector128<ushort> vector = Vector128.LoadUnsafe(ref source, offset);
Vector128<ushort> v1Eq = Vector128.Equals(vector, v1);
Vector128<ushort> v2Eq = Vector128.Equals(vector, v2);
Vector128<ushort> v3Eq = Vector128.Equals(vector, v3);
Vector128<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();
if (Sse41.TestZ(cmp, cmp)) { continue; }
Vector128<byte> mask = Sse2.ShiftRightLogical(cmp.AsUInt64(), 4).AsByte();
mask = Ssse3.Shuffle(mask, shuffleConstant);
uint lowBits = Sse2.ConvertToUInt32(mask.AsUInt32());
mask = Sse2.ShiftRightLogical(mask.AsUInt64(), 32).AsByte();
uint highBits = Sse2.ConvertToUInt32(mask.AsUInt32());
for (int idx = i; lowBits != 0; idx++)
if (cmp != Vector128<byte>.Zero)
{
if ((lowBits & 0xF) != 0)
// Skip every other bit
uint mask = cmp.ExtractMostSignificantBits() & 0x5555;
do
{
sepListBuilder.Append(idx);
}
lowBits >>= 8;
uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
sepListBuilder.Append((int)(offset + bitPos));
mask = BitOperations.ResetLowestSetBit(mask);
} while (mask != 0);
}
for (int idx = i + 4; highBits != 0; idx++)
{
if ((highBits & 0xF) != 0)
{
sepListBuilder.Append(idx);
}
highBits >>= 8;
}
}
offset += (nuint)Vector128<ushort>.Count;
} while (offset <= lengthToExamine - (nuint)Vector128<ushort>.Count);
for (; i < Length; i++)
while (offset < lengthToExamine)
{
char curr = Unsafe.Add(ref c0, (IntPtr)(uint)i);
char curr = (char)Unsafe.Add(ref source, offset);
if (curr == c || curr == c2 || curr == c3)
{
sepListBuilder.Append(i);
sepListBuilder.Append((int)offset);
}
}
static Vector128<ushort> ReadVector(ref char c0, int offset)
{
ref char ci = ref Unsafe.Add(ref c0, (IntPtr)(uint)offset);
ref byte b = ref Unsafe.As<char, byte>(ref ci);
return Unsafe.ReadUnaligned<Vector128<ushort>>(ref b);
offset++;
}
}
......
......@@ -530,6 +530,7 @@ public static void SplitNullCharArraySeparator_BindsToCharArrayOverload()
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.RemoveEmptyEntries, new[] { "thi", " i", " ", " ", "tring", " with ", "ome ", "p", "ce" })]
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.TrimEntries, new[] { "thi", "", "i", "", "", "", "", "tring", "with", "ome", "p", "ce", "" })]
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "thi", "i", "tring", "with", "ome", "p", "ce" })]
[InlineData("this, is, a, very long string, with some spaces, commas and more spaces", new[] { ',', 's' }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "thi", "i", "a", "very long", "tring", "with", "ome", "pace", "comma", "and more", "pace" })]
public static void SplitCharArraySeparator(string value, char[] separators, int count, StringSplitOptions options, string[] expected)
{
Assert.Equal(expected, value.Split(separators, count, options));
......@@ -561,6 +562,7 @@ public static void SplitCharArraySeparator(string value, char[] separators, int
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries, new[] { "this", " is", " a", "tring", " with", "ome", "paces", " " })]
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "", "tring", "with", "ome", "paces", "" })]
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "tring", "with", "ome", "paces" })]
[InlineData("this, is, a, very long string, with some spaces, commas and more spaces", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "very long", "tring", "with", "ome", "paces", "commas and more", "paces" })]
public static void SplitStringArraySeparator(string value, string[] separators, int count, StringSplitOptions options, string[] expected)
{
Assert.Equal(expected, value.Split(separators, count, options));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册