未验证 提交 1227d3bf 编写于 作者: W Wraith 提交者: GitHub

Add xarch `andn` (#64350)

* basic functionality implemented

* add instruction format to list checked in AreFlagsSetToZeroCmp
comment and tidy

* review feedback and clarify instruction flags

* change op local check to result containment

* add memory op formats and update comments
上级 21f80782
......@@ -423,6 +423,10 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr
case IF_RWR:
case IF_RRD:
case IF_RRW:
case IF_RWR_RRD_RRD:
case IF_RWR_RRD_MRD:
case IF_RWR_RRD_ARD:
case IF_RWR_RRD_SRD:
break;
default:
return false;
......
......@@ -592,7 +592,7 @@ INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BA
// BMI1
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit
INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit
......
......@@ -139,7 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_AND:
case GT_OR:
case GT_XOR:
return LowerBinaryArithmeticCommon(node->AsOp());
return LowerBinaryArithmetic(node->AsOp());
case GT_MUL:
case GT_MULHI:
......@@ -5133,53 +5133,6 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
return nullptr;
}
//------------------------------------------------------------------------
// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node.
//
// Recognizes opportunities for using target-independent "combined" nodes
// (currently AND_NOT on ARMArch). Performs containment checks.
//
// Arguments:
// node - the arithmetic node to lower
//
// Returns:
// The next node to lower.
//
GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* binOp)
{
// TODO-CQ-XArch: support BMI2 "andn" in codegen and condition
// this logic on the support for the instruction set on XArch.
CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef TARGET_ARMARCH
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
{
GenTree* opNode = nullptr;
GenTree* notNode = nullptr;
if (binOp->gtGetOp1()->OperIs(GT_NOT))
{
notNode = binOp->gtGetOp1();
opNode = binOp->gtGetOp2();
}
else if (binOp->gtGetOp2()->OperIs(GT_NOT))
{
notNode = binOp->gtGetOp2();
opNode = binOp->gtGetOp1();
}
if (notNode != nullptr)
{
binOp->gtOp1 = opNode;
binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
binOp->ChangeOper(GT_AND_NOT);
BlockRange().Remove(notNode);
}
}
#endif
return LowerBinaryArithmetic(binOp);
}
//------------------------------------------------------------------------
// LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
//
......
......@@ -297,7 +297,6 @@ private:
void LowerStoreIndir(GenTreeStoreInd* node);
GenTree* LowerAdd(GenTreeOp* node);
GenTree* LowerMul(GenTreeOp* mul);
GenTree* LowerBinaryArithmeticCommon(GenTreeOp* binOp);
GenTree* LowerBinaryArithmetic(GenTreeOp* binOp);
bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
GenTree* LowerConstIntDivOrMod(GenTree* node);
......@@ -344,7 +343,8 @@ private:
void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* binOp);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode);
#elif defined(TARGET_ARM64)
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
......
......@@ -292,6 +292,30 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
//
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
{
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
{
GenTree* opNode = nullptr;
GenTree* notNode = nullptr;
if (binOp->gtGetOp1()->OperIs(GT_NOT))
{
notNode = binOp->gtGetOp1();
opNode = binOp->gtGetOp2();
}
else if (binOp->gtGetOp2()->OperIs(GT_NOT))
{
notNode = binOp->gtGetOp2();
opNode = binOp->gtGetOp1();
}
if (notNode != nullptr)
{
binOp->gtOp1 = opNode;
binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
binOp->ChangeOper(GT_AND_NOT);
BlockRange().Remove(notNode);
}
}
ContainCheckBinary(binOp);
return binOp->gtNext;
......
......@@ -162,6 +162,9 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
//------------------------------------------------------------------------
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
//
// Recognizes opportunities for using target-independent "combined" nodes
// Performs containment checks.
//
// Arguments:
// node - the arithmetic node to lower
//
......@@ -173,10 +176,16 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
#ifdef FEATURE_HW_INTRINSICS
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp))
{
GenTree* blsrNode = TryLowerAndOpToResetLowestSetBit(binOp);
if (blsrNode != nullptr)
GenTree* replacementNode = TryLowerAndOpToAndNot(binOp);
if (replacementNode != nullptr)
{
return replacementNode->gtNext;
}
replacementNode = TryLowerAndOpToResetLowestSetBit(binOp);
if (replacementNode != nullptr)
{
return blsrNode->gtNext;
return replacementNode->gtNext;
}
}
#endif
......@@ -3726,7 +3735,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
}
//----------------------------------------------------------------------------------------------
// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit
// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1)) to HWIntrinsic::ResetLowestSetBit
//
// Arguments:
// andNode - GT_AND node of integral type
......@@ -3734,6 +3743,8 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
// Return Value:
// Returns the replacement node if one is created else nullptr indicating no replacement
//
// Notes:
// Performs containment checks on the replacement node if one is created
GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
{
assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode));
......@@ -3802,6 +3813,86 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
return blsrNode;
}
//----------------------------------------------------------------------------------------------
// Lowering::TryLowerAndOpToAndNot: Lowers a tree AND(X, NOT(Y)) to HWIntrinsic::AndNot
//
// Arguments:
// andNode - GT_AND node of integral type
//
// Return Value:
// Returns the replacement node if one is created else nullptr indicating no replacement
//
// Notes:
// Performs containment checks on the replacement node if one is created
GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode)
{
assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode));
GenTree* opNode = nullptr;
GenTree* notNode = nullptr;
if (andNode->gtGetOp1()->OperIs(GT_NOT))
{
notNode = andNode->gtGetOp1();
opNode = andNode->gtGetOp2();
}
else if (andNode->gtGetOp2()->OperIs(GT_NOT))
{
notNode = andNode->gtGetOp2();
opNode = andNode->gtGetOp1();
}
if (opNode == nullptr)
{
return nullptr;
}
// We want to avoid using "andn" when one of the operands is both a source and the destination and is also coming
// from memory. In this scenario, we will get smaller and likely faster code by using the RMW encoding of `and`
if (IsBinOpInRMWStoreInd(andNode))
{
return nullptr;
}
NamedIntrinsic intrinsic;
if (andNode->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64))
{
intrinsic = NamedIntrinsic::NI_BMI1_X64_AndNot;
}
else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))
{
intrinsic = NamedIntrinsic::NI_BMI1_AndNot;
}
else
{
return nullptr;
}
LIR::Use use;
if (!BlockRange().TryGetUse(andNode, &use))
{
return nullptr;
}
// note that parameter order for andn is ~y, x so these are purposefully reversed when creating the node
GenTreeHWIntrinsic* andnNode =
comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), notNode->AsUnOp()->gtGetOp1(), opNode, intrinsic);
JITDUMP("Lower: optimize AND(X, NOT(Y)))\n");
DISPNODE(andNode);
JITDUMP("to:\n");
DISPNODE(andnNode);
use.ReplaceWith(andnNode);
BlockRange().InsertBefore(andNode, andnNode);
BlockRange().Remove(andNode);
BlockRange().Remove(notNode);
ContainCheckHWIntrinsic(andnNode);
return andnNode;
}
#endif // FEATURE_HW_INTRINSICS
//----------------------------------------------------------------------------------------------
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册