提交 3968fc42 编写于 作者: M mikedn 提交者: Carol Eidt

Lower SSE compare scalar and test nodes (dotnet/coreclr#22043)

* Lower SSE compare scalar and test nodes

* Remove bogus instructions from intrinsic table

* Cleanup genHWIntrinsic_R_RM

* Add tests

* Adjust comments


Commit migrated from https://github.com/dotnet/coreclr/commit/013e941c9ce45a57503c1ac3c43c081889e60ffa
上级 ab26fd10
......@@ -984,7 +984,7 @@ protected:
#ifdef FEATURE_HW_INTRINSICS
void genHWIntrinsic(GenTreeHWIntrinsic* node);
#if defined(_TARGET_XARCH_)
void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp);
void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival);
void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
void genHWIntrinsic_R_R_RM(
......
......@@ -5922,8 +5922,8 @@ struct GenCondition
C = Unsigned | S, // = 14
NC = Unsigned | NS, // = 15
FEQ = Float | EQ, // = 16
FNE = Float | NE, // = 17
FEQ = Float | 0, // = 16
FNE = Float | 1, // = 17
FLT = Float | SLT, // = 18
FLE = Float | SLE, // = 19
FGE = Float | SGE, // = 20
......
......@@ -2974,6 +2974,109 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
return nullptr;
}
//----------------------------------------------------------------------------------------------
// LowerNodeCC: Lowers a node that produces a boolean value by setting the condition flags.
//
// Arguments:
// node - The node to lower
// condition - The condition code of the generated SETCC/JCC node
//
// Return Value:
// A SETCC/JCC node or nullptr if `node` is not used.
//
// Notes:
// This simply replaces `node`'s use with an appropiate SETCC/JCC node,
// `node` is not actually changed, except by having its GTF_SET_FLAGS set.
// It's the caller's responsibility to change `node` such that it only
// sets the condition flags, without producing a boolean value.
//
GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition)
{
// Skip over a chain of EQ/NE(x, 0) relops. This may be present either
// because `node` is not a relop and so it cannot be used directly by a
// JTRUE, or because the frontend failed to remove a EQ/NE(x, 0) that's
// used as logical negation.
//
// Usually there's only one such relop but there's little difference
// between removing one or all so we may as well remove them all.
//
// We can't allow any other nodes between `node` and its user because we
// have no way of knowing if those nodes change flags or not. So we're looking
// to skip over a sequence of appropriately connected zero and EQ/NE nodes.
// The x in EQ/NE(x, 0)
GenTree* relop = node;
// The first node of the relop sequence
GenTree* first = node->gtNext;
// The node following the relop sequence
GenTree* next = first;
while ((next != nullptr) && next->IsIntegralConst(0) && (next->gtNext != nullptr) &&
next->gtNext->OperIs(GT_EQ, GT_NE) && (next->gtNext->AsOp()->gtGetOp1() == relop) &&
(next->gtNext->AsOp()->gtGetOp2() == next))
{
relop = next->gtNext;
next = relop->gtNext;
if (relop->OperIs(GT_EQ))
{
condition = GenCondition::Reverse(condition);
}
}
GenTreeCC* cc = nullptr;
// Next may be null if `node` is not used. In that case we don't need to generate a SETCC node.
if (next != nullptr)
{
if (next->OperIs(GT_JTRUE))
{
// If the instruction immediately following 'relop', i.e. 'next' is a conditional branch,
// it should always have 'relop' as its 'op1'. If it doesn't, then we have improperly
// constructed IL (the setting of a condition code should always immediately precede its
// use, since the JIT doesn't track dataflow for condition codes). Still, if it happens
// it's not our problem, it simply means that `node` is not used and can be removed.
if (next->AsUnOp()->gtGetOp1() == relop)
{
assert(relop->OperIsCompare());
next->ChangeOper(GT_JCC);
cc = next->AsCC();
cc->gtCondition = condition;
}
}
else
{
// If the node is used by something other than a JTRUE then we need to insert a
// SETCC node to materialize the boolean value.
LIR::Use use;
if (BlockRange().TryGetUse(relop, &use))
{
cc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, TYP_INT);
BlockRange().InsertAfter(node, cc);
use.ReplaceWith(comp, cc);
}
}
}
if (cc != nullptr)
{
node->gtFlags |= GTF_SET_FLAGS;
cc->gtFlags |= GTF_USE_FLAGS;
}
// Remove the chain of EQ/NE(x, 0) relop nodes, if any. Note that if a SETCC was
// inserted after `node`, `first` still points to the node that was initially
// after `node`.
if (relop != node)
{
BlockRange().Remove(first, relop);
}
return cc;
}
// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
void Lowering::LowerJmpMethod(GenTree* jmp)
{
......
......@@ -130,6 +130,7 @@ private:
GenTree* OptimizeConstCompare(GenTree* cmp);
GenTree* LowerCompare(GenTree* cmp);
GenTree* LowerJTrue(GenTreeOp* jtrue);
GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition);
void LowerJmpMethod(GenTree* jmp);
void LowerRet(GenTree* ret);
GenTree* LowerDelegateInvoke(GenTreeCall* call);
......@@ -309,6 +310,7 @@ private:
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
void LowerHWIntrinsic(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
#endif // FEATURE_HW_INTRINSICS
// Utility functions
......
......@@ -837,77 +837,101 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
}
else if (simdNode->IsSIMDEqualityOrInequality())
{
LIR::Use simdUse;
LowerNodeCC(simdNode,
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality ? GenCondition::EQ : GenCondition::NE);
if (BlockRange().TryGetUse(simdNode, &simdUse))
{
//
// Try to transform JTRUE(EQ|NE(SIMD<OpEquality|OpInEquality>(x, y), 0|1)) into
// JCC(SIMD<OpEquality|OpInEquality>(x, y)). SIMD<OpEquality|OpInEquality>(x, y)
// is expected to set the Zero flag appropriately.
// All the involved nodes must form a continuous range, there's no other way to
// guarantee that condition flags aren't changed between the SIMD node and the JCC
// node.
//
simdNode->gtType = TYP_VOID;
simdNode->ClearUnusedValue();
}
#endif
ContainCheckSIMD(simdNode);
}
#endif // FEATURE_SIMD
bool transformed = false;
GenTree* simdUser = simdUse.User();
#ifdef FEATURE_HW_INTRINSICS
if (simdUser->OperIs(GT_EQ, GT_NE) && simdUser->gtGetOp2()->IsCnsIntOrI() &&
(simdNode->gtNext == simdUser->gtGetOp2()) && (simdUser->gtGetOp2()->gtNext == simdUser))
//----------------------------------------------------------------------------------------------
// LowerHWIntrinsicCC: Lowers a hardware intrinsic node that produces a boolean value by
// setting the condition flags.
//
// Arguments:
// node - The hardware intrinsic node
// newIntrinsicId - The intrinsic id of the lowered intrinsic node
// condition - The condition code of the generated SETCC/JCC node
//
void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition)
{
GenTreeCC* cc = LowerNodeCC(node, condition);
node->gtHWIntrinsicId = newIntrinsicId;
node->gtType = TYP_VOID;
node->ClearUnusedValue();
bool swapOperands = false;
bool canSwapOperands = false;
switch (newIntrinsicId)
{
case NI_SSE_COMISS:
case NI_SSE_UCOMISS:
case NI_SSE2_COMISD:
case NI_SSE2_UCOMISD:
// In some cases we can generate better code if we swap the operands:
// - If the condition is not one of the "preferred" floating point conditions we can swap
// the operands and change the condition to avoid generating an extra JP/JNP branch.
// - If the first operand can be contained but the second cannot, we can swap operands in
// order to be able to contain the first operand and avoid the need for a temp reg.
// We can't handle both situations at the same time and since an extra branch is likely to
// be worse than an extra temp reg (x64 has a reasonable number of XMM registers) we'll favor
// the branch case:
// - If the condition is not preferred then swap, even if doing this will later prevent
// containment.
// - Allow swapping for containment purposes only if this doesn't result in a non-"preferred"
// condition being generated.
if ((cc != nullptr) && cc->gtCondition.PreferSwap())
{
ssize_t relopOp2Value = simdUser->gtGetOp2()->AsIntCon()->IconValue();
if ((relopOp2Value == 0) || (relopOp2Value == 1))
{
GenTree* jtrue = simdUser->gtNext;
swapOperands = true;
}
else
{
canSwapOperands = (cc == nullptr) || !GenCondition::Swap(cc->gtCondition).PreferSwap();
}
break;
if ((jtrue != nullptr) && jtrue->OperIs(GT_JTRUE) && (jtrue->gtGetOp1() == simdUser))
{
if ((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) != simdUser->OperIs(GT_EQ))
{
relopOp2Value ^= 1;
}
case NI_SSE41_PTEST:
case NI_AVX_PTEST:
// If we need the Carry flag then we can't swap operands.
canSwapOperands = (cc == nullptr) || cc->gtCondition.Is(GenCondition::EQ, GenCondition::NE);
break;
jtrue->ChangeOper(GT_JCC);
GenTreeCC* jcc = jtrue->AsCC();
jcc->gtFlags |= GTF_USE_FLAGS;
jcc->gtCondition = (relopOp2Value == 0) ? GenCondition::NE : GenCondition::EQ;
default:
unreached();
}
BlockRange().Remove(simdUser->gtGetOp2());
BlockRange().Remove(simdUser);
transformed = true;
}
}
}
if (canSwapOperands)
{
bool op1SupportsRegOptional = false;
bool op2SupportsRegOptional = false;
if (!transformed)
{
//
// The code generated for SIMD SIMD<OpEquality|OpInEquality>(x, y) nodes sets
// the Zero flag like integer compares do so we can simply use SETCC<EQ|NE>
// to produce the desired result. This avoids the need for subsequent phases
// to have to handle 2 cases (set flags/set destination register).
//
GenCondition condition =
(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? GenCondition::EQ : GenCondition::NE;
GenTreeCC* setcc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, simdNode->TypeGet());
setcc->gtFlags |= GTF_USE_FLAGS;
BlockRange().InsertAfter(simdNode, setcc);
simdUse.ReplaceWith(comp, setcc);
}
if (!IsContainableHWIntrinsicOp(node, node->gtGetOp2(), &op2SupportsRegOptional) &&
IsContainableHWIntrinsicOp(node, node->gtGetOp1(), &op1SupportsRegOptional))
{
// Swap operands if op2 cannot be contained but op1 can.
swapOperands = true;
}
}
simdNode->gtFlags |= GTF_SET_FLAGS;
simdNode->gtType = TYP_VOID;
if (swapOperands)
{
std::swap(node->gtOp1, node->gtOp2);
if (cc != nullptr)
{
cc->gtCondition = GenCondition::Swap(cc->gtCondition);
}
}
#endif
ContainCheckSIMD(simdNode);
}
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
//----------------------------------------------------------------------------------------------
// Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
//
......@@ -916,6 +940,108 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
//
void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
{
switch (node->gtHWIntrinsicId)
{
case NI_SSE_CompareScalarOrderedEqual:
LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FEQ);
break;
case NI_SSE_CompareScalarOrderedNotEqual:
LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FNEU);
break;
case NI_SSE_CompareScalarOrderedLessThan:
LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FLT);
break;
case NI_SSE_CompareScalarOrderedLessThanOrEqual:
LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FLE);
break;
case NI_SSE_CompareScalarOrderedGreaterThan:
LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FGT);
break;
case NI_SSE_CompareScalarOrderedGreaterThanOrEqual:
LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FGE);
break;
case NI_SSE_CompareScalarUnorderedEqual:
LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FEQ);
break;
case NI_SSE_CompareScalarUnorderedNotEqual:
LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FNEU);
break;
case NI_SSE_CompareScalarUnorderedLessThanOrEqual:
LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FLE);
break;
case NI_SSE_CompareScalarUnorderedLessThan:
LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FLT);
break;
case NI_SSE_CompareScalarUnorderedGreaterThanOrEqual:
LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FGE);
break;
case NI_SSE_CompareScalarUnorderedGreaterThan:
LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FGT);
break;
case NI_SSE2_CompareScalarOrderedEqual:
LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FEQ);
break;
case NI_SSE2_CompareScalarOrderedNotEqual:
LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FNEU);
break;
case NI_SSE2_CompareScalarOrderedLessThan:
LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FLT);
break;
case NI_SSE2_CompareScalarOrderedLessThanOrEqual:
LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FLE);
break;
case NI_SSE2_CompareScalarOrderedGreaterThan:
LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FGT);
break;
case NI_SSE2_CompareScalarOrderedGreaterThanOrEqual:
LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FGE);
break;
case NI_SSE2_CompareScalarUnorderedEqual:
LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FEQ);
break;
case NI_SSE2_CompareScalarUnorderedNotEqual:
LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FNEU);
break;
case NI_SSE2_CompareScalarUnorderedLessThanOrEqual:
LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FLE);
break;
case NI_SSE2_CompareScalarUnorderedLessThan:
LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FLT);
break;
case NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual:
LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FGE);
break;
case NI_SSE2_CompareScalarUnorderedGreaterThan:
LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FGT);
break;
case NI_SSE41_TestC:
LowerHWIntrinsicCC(node, NI_SSE41_PTEST, GenCondition::C);
break;
case NI_SSE41_TestZ:
LowerHWIntrinsicCC(node, NI_SSE41_PTEST, GenCondition::EQ);
break;
case NI_SSE41_TestNotZAndNotC:
LowerHWIntrinsicCC(node, NI_SSE41_PTEST, GenCondition::UGT);
break;
case NI_AVX_TestC:
LowerHWIntrinsicCC(node, NI_AVX_PTEST, GenCondition::C);
break;
case NI_AVX_TestZ:
LowerHWIntrinsicCC(node, NI_AVX_PTEST, GenCondition::EQ);
break;
case NI_AVX_TestNotZAndNotC:
LowerHWIntrinsicCC(node, NI_AVX_PTEST, GenCondition::UGT);
break;
default:
break;
}
ContainCheckHWIntrinsic(node);
}
#endif // FEATURE_HW_INTRINSICS
......@@ -2999,35 +3125,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case HW_Category_SIMDScalar:
case HW_Category_Scalar:
{
if (HWIntrinsicInfo::GeneratesMultipleIns(intrinsicId))
{
switch (intrinsicId)
{
case NI_SSE_CompareScalarOrderedLessThan:
case NI_SSE_CompareScalarUnorderedLessThan:
case NI_SSE_CompareScalarOrderedLessThanOrEqual:
case NI_SSE_CompareScalarUnorderedLessThanOrEqual:
case NI_SSE2_CompareScalarOrderedLessThan:
case NI_SSE2_CompareScalarUnorderedLessThan:
case NI_SSE2_CompareScalarOrderedLessThanOrEqual:
case NI_SSE2_CompareScalarUnorderedLessThanOrEqual:
{
// We need to swap the operands for CompareLessThanOrEqual
node->gtOp1 = op2;
node->gtOp2 = op1;
op2 = op1;
break;
}
default:
{
// TODO-XArch-CQ: The CompareScalarOrdered* and CompareScalarUnordered* methods
// are commutative if you also inverse the intrinsic.
break;
}
}
}
bool supportsRegOptional = false;
if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
......
......@@ -2436,20 +2436,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
break;
}
case NI_SSE_CompareScalarOrderedEqual:
case NI_SSE_CompareScalarUnorderedEqual:
case NI_SSE_CompareScalarOrderedNotEqual:
case NI_SSE_CompareScalarUnorderedNotEqual:
case NI_SSE2_CompareScalarOrderedEqual:
case NI_SSE2_CompareScalarUnorderedEqual:
case NI_SSE2_CompareScalarOrderedNotEqual:
case NI_SSE2_CompareScalarUnorderedNotEqual:
{
buildInternalIntRegisterDefForNode(intrinsicTree, allByteRegs());
setInternalRegsDelayFree = true;
break;
}
case NI_SSE2_MaskMove:
{
assert(numArgs == 3);
......
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup>
<DebugType>Embedded</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
// This C# script can be executed using the csi
// tool found in Tools\net46\roslyn\tools.
//
// It produces a C# file (on stdout) containing tests for various
// COMISS/UCOMISS/PTEST/VTESTPS/VTESTPD based intrinsics.
using System;
using System.Collections.Generic;
using System.IO;
[Flags]
enum TestKind
{
// Use the intrinsic as is
Normal = 0,
// Negate the intrinsic result
LogicalNot = 1,
// Use a branch to test the intrinsic result
Branch = 2,
// Try to cause the intrinsic operands to be
// swapped by placing the first operand in
// memory and the second in a register.
Swap = 4
}
void GenerateCompareTests(List<Test> tests)
{
var inputs = new (double x, double y)[]
{
(42.0, 42.0),
(41.0, 42.0),
(42.0, 41.0),
(42.0, double.NaN),
(double.NaN, double.NaN)
};
bool EQ(double x, double y) => x == y;
bool NE(double x, double y) => x != y;
bool LT(double x, double y) => x < y;
bool LE(double x, double y) => x <= y;
bool GT(double x, double y) => x > y;
bool GE(double x, double y) => x >= y;
foreach (var intrinsic in new (string name, Func<double, double, bool> op, (double x, double y)[] inputs)[]
{
("CompareScalarOrderedEqual", EQ, inputs),
("CompareScalarOrderedNotEqual", NE, inputs),
("CompareScalarOrderedLessThan", LT, inputs),
("CompareScalarOrderedLessThanOrEqual", LE, inputs),
("CompareScalarOrderedGreaterThan", GT, inputs),
("CompareScalarOrderedGreaterThanOrEqual", GE, inputs),
("CompareScalarUnorderedEqual", EQ, inputs),
("CompareScalarUnorderedNotEqual", NE, inputs),
("CompareScalarUnorderedLessThan", LT, inputs),
("CompareScalarUnorderedLessThanOrEqual", LE, inputs),
("CompareScalarUnorderedGreaterThan", GT, inputs),
("CompareScalarUnorderedGreaterThanOrEqual", GE, inputs)
})
{
foreach ((string isa, int vectorSize, string vectorElementType) in new[]
{
("Sse", 128, "Single"),
("Sse2", 128, "Double")
})
{
foreach (TestKind kind in new[]
{
TestKind.Normal,
TestKind.LogicalNot,
TestKind.Branch,
TestKind.Swap,
TestKind.Swap | TestKind.Branch
})
{
tests.Add(new BinaryOpTest<double>(isa, intrinsic.name, vectorSize, vectorElementType, kind, intrinsic.op, intrinsic.inputs));
}
}
}
}
void GeneratePackedIntTestTests(List<Test> tests)
{
var inputs = new (int x, int y)[]
{
(0, 0),
(1, 2),
(2, 3),
(3, 2)
};
bool Z(int x, int y) => (x & y) == 0;
bool C(int x, int y) => (~x & y) == 0;
foreach (var intrinsic in new (string name, Func<int, int, bool> op, (int x, int y)[] inputs)[]
{
("TestZ", Z, inputs),
("TestC", C, inputs),
("TestNotZAndNotC", (x, y) => !Z(x, y) & !C(x, y), inputs)
})
{
foreach ((string isa, int vectorSize, string vectorElementType) in new[]
{
("Sse41", 128, "Int32"),
("Avx", 128, "Int32"),
("Avx", 256, "Int32")
})
{
foreach (TestKind kind in new[]
{
TestKind.Normal,
TestKind.LogicalNot,
TestKind.Branch,
TestKind.Swap,
TestKind.Swap | TestKind.LogicalNot
})
{
tests.Add(new BinaryOpTest<int>(isa, intrinsic.name, vectorSize, vectorElementType, kind, intrinsic.op, intrinsic.inputs));
}
}
}
}
void GeneratePackedDoubleTestTests(List<Test> tests)
{
var inputs = new (double x, double y)[]
{
(1.0, 1.0),
(1.0, -1.0),
(-1.0, -1.0)
};
bool S(double d) => d < 0.0;
bool Z(double x, double y) => (S(x) & S(y)) == false;
bool C(double x, double y) => (!S(x) & S(y)) == false;
foreach (var intrinsic in new (string name, Func<double, double, bool> op, (double x, double y)[] inputs)[]
{
("TestZ", Z, inputs),
("TestC", C, inputs),
("TestNotZAndNotC", (x, y) => !Z(x, y) && !C(x, y), inputs)
})
{
foreach ((string isa, int vectorSize, string vectorElementType) in new[]
{
("Avx", 128, "Single"),
("Avx", 256, "Single")
})
{
foreach (TestKind kind in new[]
{
TestKind.Normal,
TestKind.LogicalNot,
TestKind.Branch,
TestKind.Swap,
TestKind.Swap | TestKind.Branch | TestKind.LogicalNot
})
{
tests.Add(new BinaryOpTest<double>(isa, intrinsic.name, vectorSize, vectorElementType, kind, intrinsic.op, intrinsic.inputs));
}
}
}
}
static string CreateVector(int vectorSize, string vectorElementType, double value)
{
if (vectorElementType == "Single")
return double.IsNaN(value) ? $"Vector{vectorSize}.Create(float.NaN)" : $"Vector{vectorSize}.Create({value:F1}f)";
if (vectorElementType == "Double")
return double.IsNaN(value) ? $"Vector{vectorSize}.Create(double.NaN)" : $"Vector{vectorSize}.Create({value:F1})";
throw new NotSupportedException();
}
static string CreateVector(int vectorSize, int value)
{
return $"Vector{vectorSize}.Create({value})";
}
static string CreateVector<T>(int vectorSize, string vectorElementType, T value)
{
if (value is double d)
return CreateVector(vectorSize, vectorElementType, d);
if (value is int i)
return CreateVector(vectorSize, i);
throw new NotSupportedException();
}
abstract class Test
{
public readonly string Isa;
public readonly string Intrinsic;
public readonly int VectorSize;
public readonly string VectorElementType;
public readonly string VectorType;
public readonly TestKind Kind;
public Test(string isa, string intrinsic, int vectorSize, string vectorElementType, TestKind kind)
{
Isa = isa;
Intrinsic = intrinsic;
VectorSize = vectorSize;
VectorElementType = vectorElementType;
VectorType = $"Vector{VectorSize}<{VectorElementType}>";
Kind = kind;
}
public string Name => $"Test_{Isa}_{Intrinsic}_{Kind.ToString().Replace(',', '_').Replace(" ", "")}";
public abstract void WriteTestMethod(TextWriter w);
public abstract void WriteTestCases(TextWriter w);
}
class UnaryOpTest<T> : Test
{
Func<T, bool> op;
T[] inputs;
public UnaryOpTest(string isa, string intrinsic, int vectorSize, string vectorElementType, TestKind kind, Func<T, bool> op, T[] inputs)
: base(isa, intrinsic, vectorSize, vectorElementType, kind)
{
this.op = op;
this.inputs = inputs;
}
public override void WriteTestMethod(TextWriter w)
{
w.WriteLine();
w.WriteLine(" [MethodImpl(MethodImplOptions.NoInlining)]");
w.WriteLine($" static bool {Name}(in {VectorType} x)");
w.WriteLine(" {");
w.Write(" return ");
if (Kind.HasFlag(TestKind.LogicalNot))
w.Write("!");
w.Write($"{Isa}.{Intrinsic}(x)");
if (Kind.HasFlag(TestKind.Branch))
w.Write(" ? True() : False()");
w.WriteLine(";");
w.WriteLine(" }");
}
string Check(T x)
{
return (Kind.HasFlag(TestKind.LogicalNot) ? !op(x) : op(x)).ToString().ToLowerInvariant();
}
public override void WriteTestCases(TextWriter w)
{
foreach (var input in inputs)
w.WriteLine($" r &= !{Isa}.IsSupported || Check({Check(input)}, {Name}({CreateVector(VectorSize, VectorElementType, input)}));");
}
}
class BinaryOpTest<T> : Test
{
Func<T, T, bool> op;
(T x, T y)[] inputs;
public BinaryOpTest(string isa, string intrinsic, int vectorSize, string vectorElementType, TestKind kind, Func<T, T, bool> op, (T x, T y)[] inputs)
: base(isa, intrinsic, vectorSize, vectorElementType, kind)
{
this.op = op;
this.inputs = inputs;
}
public override void WriteTestMethod(TextWriter w)
{
w.WriteLine();
w.WriteLine(" [MethodImpl(MethodImplOptions.NoInlining)]");
// Pass parameters by reference so we get consistency accross various ABIs.
// We get operands in memory and by adding an extra "nop" intrinsic we can
// force one of the operands in a register, just enough to catch some cases
// of containment.
w.WriteLine($" static bool {Name}(in {VectorType} x, in {VectorType} y)");
w.WriteLine(" {");
w.Write(" return ");
if (Kind.HasFlag(TestKind.LogicalNot))
w.Write("!");
if (Kind.HasFlag(TestKind.Swap))
w.Write($"{Isa}.{Intrinsic}(x, {Isa}.Or(y.AsSingle(), default).As{VectorElementType}())");
else
w.Write($"{Isa}.{Intrinsic}(x, y)");
if (Kind.HasFlag(TestKind.Branch))
w.Write(" ? True() : False()");
w.WriteLine(";");
w.WriteLine(" }");
}
string Check((T x, T y) input)
{
return (Kind.HasFlag(TestKind.LogicalNot) ? !op(input.x, input.y) : op(input.x, input.y)).ToString().ToLowerInvariant();
}
public override void WriteTestCases(TextWriter w)
{
foreach (var input in inputs)
w.WriteLine($" r &= !{Isa}.IsSupported || Check({Check(input)}, {Name}({CreateVector(VectorSize, VectorElementType, input.x)}, {CreateVector(VectorSize, VectorElementType, input.y)}));");
}
}
var tests = new List<Test>();
GenerateCompareTests(tests);
GeneratePackedIntTestTests(tests);
GeneratePackedDoubleTestTests(tests);
var w = Console.Out;
w.WriteLine(@"// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
class Program
{
[MethodImpl(MethodImplOptions.NoInlining)] static bool True() => true;
[MethodImpl(MethodImplOptions.NoInlining)] static bool False() => false;
[MethodImpl(MethodImplOptions.NoInlining)]
static bool Check(bool expected, bool actual, [CallerLineNumber] int line = 0)
{
if (expected != actual) Console.WriteLine(""Failed at line {0}"", line);
return expected == actual;
}
");
w.WriteLine(" static int Main()");
w.WriteLine(" {");
w.WriteLine(" bool r = true;");
foreach (var test in tests)
test.WriteTestCases(w);
w.WriteLine(" return r ? 100 : 42;");
w.WriteLine(" }");
foreach (var test in tests)
test.WriteTestMethod(w);
w.WriteLine("}");
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册