Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
dotNET Platform
runtime
提交
c3c0223e
R
runtime
项目概览
dotNET Platform
/
runtime
11 个月 前同步成功
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
runtime
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
c3c0223e
编写于
3月 28, 2022
作者:
E
Egor Bogatov
提交者:
GitHub
3月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Port SequenceEqual to crossplat Vectors, optimize vector compare on x64 (#67202)
上级
56539095
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
52 addition
and
33 deletion
+52
-33
src/coreclr/jit/lowerxarch.cpp
src/coreclr/jit/lowerxarch.cpp
+41
-11
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs
...ies/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs
+11
-22
未找到文件。
src/coreclr/jit/lowerxarch.cpp
浏览文件 @
c3c0223e
...
...
@@ -1298,17 +1298,28 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
if
(
simdSize
==
32
)
{
cmpIntrinsic
=
NI_AVX2_CompareEqual
;
mskIntrinsic
=
NI_AVX2_MoveMask
;
// With AVX2 we use testz(xor(v1, v2))
cmpIntrinsic
=
NI_AVX2_Xor
;
mskIntrinsic
=
NI_AVX_TestZ
;
cmpJitType
=
simdBaseJitType
;
mskConstant
=
-
1
;
}
else
{
assert
(
simdSize
==
16
);
cmpIntrinsic
=
NI_SSE2_CompareEqual
;
mskIntrinsic
=
NI_SSE2_MoveMask
;
mskConstant
=
0xFFFF
;
mskConstant
=
0xFFFF
;
if
(
comp
->
compOpportunisticallyDependsOn
(
InstructionSet_SSE41
))
{
// With SSE41 we use testz(xor(v1, v2))
cmpIntrinsic
=
NI_SSE2_Xor
;
mskIntrinsic
=
NI_SSE41_TestZ
;
}
else
{
cmpIntrinsic
=
NI_SSE2_CompareEqual
;
mskIntrinsic
=
NI_SSE2_MoveMask
;
}
}
break
;
}
...
...
@@ -1320,28 +1331,30 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
if
(
simdSize
==
32
)
{
cmpIntrinsic
=
NI_AVX2_CompareEqual
;
// With AVX2 we use testz(xor(v1, v2))
cmpIntrinsic
=
NI_AVX2_Xor
;
cmpJitType
=
simdBaseJitType
;
mskIntrinsic
=
NI_AVX
2_MoveMask
;
mskIntrinsic
=
NI_AVX
_TestZ
;
mskConstant
=
-
1
;
}
else
{
assert
(
simdSize
==
16
);
mskConstant
=
0xFFFF
;
if
(
comp
->
compOpportunisticallyDependsOn
(
InstructionSet_SSE41
))
{
cmpIntrinsic
=
NI_SSE41_CompareEqual
;
// With SSE41 we use testz(xor(v1, v2))
mskIntrinsic
=
NI_SSE41_TestZ
;
cmpIntrinsic
=
NI_SSE2_Xor
;
cmpJitType
=
simdBaseJitType
;
}
else
{
mskIntrinsic
=
NI_SSE2_MoveMask
;
cmpIntrinsic
=
NI_SSE2_CompareEqual
;
cmpJitType
=
CORINFO_TYPE_UINT
;
}
mskIntrinsic
=
NI_SSE2_MoveMask
;
mskConstant
=
0xFFFF
;
}
break
;
}
...
...
@@ -1411,6 +1424,23 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
BlockRange
().
InsertBefore
(
node
,
cmp
);
LowerNode
(
cmp
);
// TestZ(Xor(v1, v2)) is smaller
if
((
mskIntrinsic
==
NI_SSE41_TestZ
)
||
(
mskIntrinsic
==
NI_AVX_TestZ
))
{
// Save cmp's result into a temp
node
->
Op
(
1
)
=
cmp
;
LIR
::
Use
cmpUse
(
BlockRange
(),
&
node
->
Op
(
1
),
node
);
ReplaceWithLclVar
(
cmpUse
);
GenTree
*
cmpClone
=
comp
->
gtClone
(
node
->
Op
(
1
));
BlockRange
().
InsertAfter
(
node
->
Op
(
1
),
cmpClone
);
// Emit vptest(cmp, cmpClone)
node
->
Op
(
2
)
=
cmpClone
;
node
->
ChangeHWIntrinsicId
(
mskIntrinsic
);
LowerHWIntrinsicCC
(
node
,
mskIntrinsic
==
NI_SSE41_TestZ
?
NI_SSE41_PTEST
:
NI_AVX_PTEST
,
cmpCnd
);
return
;
}
GenTree
*
msk
=
comp
->
gtNewSimdHWIntrinsicNode
(
TYP_INT
,
cmp
,
mskIntrinsic
,
mskJitType
,
simdSize
);
BlockRange
().
InsertAfter
(
cmp
,
msk
);
LowerNode
(
msk
);
...
...
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs
浏览文件 @
c3c0223e
...
...
@@ -1779,11 +1779,10 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
return
true
;
Vector
:
if
(
Sse2
.
IsSuppor
ted
)
if
(
Vector128
.
IsHardwareAccelera
ted
)
{
if
(
Avx2
.
IsSuppor
ted
&&
length
>=
(
nuint
)
Vector256
<
byte
>.
Count
)
if
(
Vector256
.
IsHardwareAccelera
ted
&&
length
>=
(
nuint
)
Vector256
<
byte
>.
Count
)
{
Vector256
<
byte
>
vecResult
;
nuint
offset
=
0
;
nuint
lengthToExamine
=
length
-
(
nuint
)
Vector256
<
byte
>.
Count
;
// Unsigned, so it shouldn't have overflowed larger than length (rather than negative)
...
...
@@ -1792,8 +1791,8 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
{
do
{
vecResult
=
Avx2
.
CompareEqual
(
LoadVector256
(
ref
first
,
offset
),
LoadVector256
(
ref
second
,
offset
));
if
(
Avx2
.
MoveMask
(
vecResult
)
!=
-
1
)
if
(
Vector256
.
LoadUnsafe
(
ref
first
,
offset
)
!=
Vector256
.
LoadUnsafe
(
ref
second
,
offset
)
)
{
goto
NotEqual
;
}
...
...
@@ -1802,8 +1801,8 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
}
// Do final compare as Vector256<byte>.Count from end rather than start
vecResult
=
Avx2
.
CompareEqual
(
LoadVector256
(
ref
first
,
lengthToExamine
),
LoadVector256
(
ref
second
,
lengthToExamine
));
if
(
Avx2
.
MoveMask
(
vecResult
)
==
-
1
)
if
(
Vector256
.
LoadUnsafe
(
ref
first
,
lengthToExamine
)
==
Vector256
.
LoadUnsafe
(
ref
second
,
lengthToExamine
)
)
{
// C# compiler inverts this test, making the outer goto the conditional jmp.
goto
Equal
;
...
...
@@ -1814,7 +1813,6 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
}
else
if
(
length
>=
(
nuint
)
Vector128
<
byte
>.
Count
)
{
Vector128
<
byte
>
vecResult
;
nuint
offset
=
0
;
nuint
lengthToExamine
=
length
-
(
nuint
)
Vector128
<
byte
>.
Count
;
// Unsigned, so it shouldn't have overflowed larger than length (rather than negative)
...
...
@@ -1823,10 +1821,8 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
{
do
{
// We use instrincs directly as .Equals calls .AsByte() which doesn't inline at R2R time
// https://github.com/dotnet/runtime/issues/32714
vecResult
=
Sse2
.
CompareEqual
(
LoadVector128
(
ref
first
,
offset
),
LoadVector128
(
ref
second
,
offset
));
if
(
Sse2
.
MoveMask
(
vecResult
)
!=
0xFFFF
)
if
(
Vector128
.
LoadUnsafe
(
ref
first
,
offset
)
!=
Vector128
.
LoadUnsafe
(
ref
second
,
offset
))
{
goto
NotEqual
;
}
...
...
@@ -1835,8 +1831,8 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
}
// Do final compare as Vector128<byte>.Count from end rather than start
vecResult
=
Sse2
.
CompareEqual
(
LoadVector128
(
ref
first
,
lengthToExamine
),
LoadVector128
(
ref
second
,
lengthToExamine
));
if
(
Sse2
.
MoveMask
(
vecResult
)
==
0xFFFF
)
if
(
Vector128
.
LoadUnsafe
(
ref
first
,
lengthToExamine
)
==
Vector128
.
LoadUnsafe
(
ref
second
,
lengthToExamine
)
)
{
// C# compiler inverts this test, making the outer goto the conditional jmp.
goto
Equal
;
...
...
@@ -1846,13 +1842,6 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
goto
NotEqual
;
}
}
//else if (AdvSimd.Arm64.IsSupported)
//{
// // This API is not optimized with ARM64 intrinsics because there is not much performance win seen
// // when compared to the vectorized implementation below. In addition to comparing the bytes in chunks of
// // 16-bytes, the only check that is done is if there is a mismatch and if yes, return false. This check
// // done with Vector<T> will generate same code by JIT as that if used ARM64 intrinsic instead.
//}
else
if
(
Vector
.
IsHardwareAccelerated
&&
length
>=
(
nuint
)
Vector
<
byte
>.
Count
)
{
nuint
offset
=
0
;
...
...
@@ -1883,7 +1872,7 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l
}
#if TARGET_64BIT
if
(
Sse2
.
IsSuppor
ted
)
if
(
Vector128
.
IsHardwareAccelera
ted
)
{
Debug
.
Assert
(
length
<=
(
nuint
)
sizeof
(
nuint
)
*
2
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录