diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index d90c5771ce1a96b6761d2b565f9c4f43512b4f80..c203a1729dd401b8259796f1c500d33643df0b94 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -423,6 +423,10 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr case IF_RWR: case IF_RRD: case IF_RRW: + case IF_RWR_RRD_RRD: + case IF_RWR_RRD_MRD: + case IF_RWR_RRD_ARD: + case IF_RWR_RRD_SRD: break; default: return false; diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 08d4c4a8c8675d9b2d10ceeecdc4c572b4a2a330..abb2ac86ef99cb912b8dadc3eda7e06594cfe0a8 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -592,7 +592,7 @@ INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BA // BMI1 INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) -INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT +INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index b111d73db6a995dd589ff1ea43e65e193d9ac56d..2697a9892dca3b91563bd7beb560b2b0ebb27b27 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -139,7 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_AND: case GT_OR: case GT_XOR: - return LowerBinaryArithmeticCommon(node->AsOp()); + return LowerBinaryArithmetic(node->AsOp()); case GT_MUL: case GT_MULHI: @@ -5133,53 +5133,6 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) return nullptr; } -//------------------------------------------------------------------------ -// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node. -// -// Recognizes opportunities for using target-independent "combined" nodes -// (currently AND_NOT on ARMArch). Performs containment checks. -// -// Arguments: -// node - the arithmetic node to lower -// -// Returns: -// The next node to lower. -// -GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* binOp) -{ - // TODO-CQ-XArch: support BMI2 "andn" in codegen and condition - // this logic on the support for the instruction set on XArch. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef TARGET_ARMARCH - if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) - { - GenTree* opNode = nullptr; - GenTree* notNode = nullptr; - if (binOp->gtGetOp1()->OperIs(GT_NOT)) - { - notNode = binOp->gtGetOp1(); - opNode = binOp->gtGetOp2(); - } - else if (binOp->gtGetOp2()->OperIs(GT_NOT)) - { - notNode = binOp->gtGetOp2(); - opNode = binOp->gtGetOp1(); - } - - if (notNode != nullptr) - { - binOp->gtOp1 = opNode; - binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); - binOp->ChangeOper(GT_AND_NOT); - BlockRange().Remove(notNode); - } - } -#endif - - return LowerBinaryArithmetic(binOp); -} - //------------------------------------------------------------------------ // LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node. // diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 95843b6bded47e920f98091370154cac6562a5d4..b6a6c178f7e5441fc804671c2de55240b3cf405b 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -297,7 +297,6 @@ private: void LowerStoreIndir(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); - GenTree* LowerBinaryArithmeticCommon(GenTreeOp* binOp); GenTree* LowerBinaryArithmetic(GenTreeOp* binOp); bool LowerUnsignedDivOrMod(GenTreeOp* divMod); GenTree* LowerConstIntDivOrMod(GenTree* node); @@ -344,7 +343,8 @@ private: void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node); void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node); void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node); - GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* binOp); + GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode); + GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode); #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 67e1269dfd429ad73e442ef4ea9c2bcba652b610..41b80fbee67233410f660c9e412e6ee8a88545bd 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -292,6 +292,30 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) // GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) { + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) + { + GenTree* opNode = nullptr; + GenTree* notNode = nullptr; + if (binOp->gtGetOp1()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp1(); + opNode = binOp->gtGetOp2(); + } + else if (binOp->gtGetOp2()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp2(); + opNode = binOp->gtGetOp1(); + } + + if (notNode != nullptr) + { + binOp->gtOp1 = opNode; + binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); + binOp->ChangeOper(GT_AND_NOT); + BlockRange().Remove(notNode); + } + } + ContainCheckBinary(binOp); return binOp->gtNext; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 37459907558a0b6d13550a6f9c97d11cccfcbc15..266d642014fd02c27691f84f616e9a8097e2064d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -162,6 +162,9 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) //------------------------------------------------------------------------ // LowerBinaryArithmetic: lowers the given binary arithmetic node. // +// Recognizes opportunities for using target-independent "combined" nodes +// Performs containment checks. +// // Arguments: // node - the arithmetic node to lower // @@ -173,10 +176,16 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) #ifdef FEATURE_HW_INTRINSICS if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp)) { - GenTree* blsrNode = TryLowerAndOpToResetLowestSetBit(binOp); - if (blsrNode != nullptr) + GenTree* replacementNode = TryLowerAndOpToAndNot(binOp); + if (replacementNode != nullptr) + { + return replacementNode->gtNext; + } + + replacementNode = TryLowerAndOpToResetLowestSetBit(binOp); + if (replacementNode != nullptr) { - return blsrNode->gtNext; + return replacementNode->gtNext; } } #endif @@ -3726,7 +3735,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) } //---------------------------------------------------------------------------------------------- -// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit +// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1)) to HWIntrinsic::ResetLowestSetBit // // Arguments: // andNode - GT_AND node of integral type @@ -3734,6 +3743,8 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) // Return Value: // Returns the replacement node if one is created else nullptr indicating no replacement // +// Notes: +// Performs containment checks on the replacement node if one is created GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode) { assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode)); @@ -3802,6 +3813,86 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode) return blsrNode; } +//---------------------------------------------------------------------------------------------- +// Lowering::TryLowerAndOpToAndNot: Lowers a tree AND(X, NOT(Y)) to HWIntrinsic::AndNot +// +// Arguments: +// andNode - GT_AND node of integral type +// +// Return Value: +// Returns the replacement node if one is created else nullptr indicating no replacement +// +// Notes: +// Performs containment checks on the replacement node if one is created +GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode) +{ + assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode)); + + GenTree* opNode = nullptr; + GenTree* notNode = nullptr; + if (andNode->gtGetOp1()->OperIs(GT_NOT)) + { + notNode = andNode->gtGetOp1(); + opNode = andNode->gtGetOp2(); + } + else if (andNode->gtGetOp2()->OperIs(GT_NOT)) + { + notNode = andNode->gtGetOp2(); + opNode = andNode->gtGetOp1(); + } + + if (opNode == nullptr) + { + return nullptr; + } + + // We want to avoid using "andn" when one of the operands is both a source and the destination and is also coming + // from memory. In this scenario, we will get smaller and likely faster code by using the RMW encoding of `and` + if (IsBinOpInRMWStoreInd(andNode)) + { + return nullptr; + } + + NamedIntrinsic intrinsic; + if (andNode->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) + { + intrinsic = NamedIntrinsic::NI_BMI1_X64_AndNot; + } + else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) + { + intrinsic = NamedIntrinsic::NI_BMI1_AndNot; + } + else + { + return nullptr; + } + + LIR::Use use; + if (!BlockRange().TryGetUse(andNode, &use)) + { + return nullptr; + } + + // note that parameter order for andn is ~y, x so these are purposefully reversed when creating the node + GenTreeHWIntrinsic* andnNode = + comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), notNode->AsUnOp()->gtGetOp1(), opNode, intrinsic); + + JITDUMP("Lower: optimize AND(X, NOT(Y)))\n"); + DISPNODE(andNode); + JITDUMP("to:\n"); + DISPNODE(andnNode); + + use.ReplaceWith(andnNode); + + BlockRange().InsertBefore(andNode, andnNode); + BlockRange().Remove(andNode); + BlockRange().Remove(notNode); + + ContainCheckHWIntrinsic(andnNode); + + return andnNode; +} + #endif // FEATURE_HW_INTRINSICS //----------------------------------------------------------------------------------------------