From 3b2883b097a773715ca84056885e0ca1488da36e Mon Sep 17 00:00:00 2001 From: Will Smith Date: Thu, 16 Jun 2022 20:57:37 -0700 Subject: [PATCH] ARM64 - Optimize `i % 2` (#70599) --- src/coreclr/jit/codegenarm64.cpp | 31 +++++++---- src/coreclr/jit/codegenarmarch.cpp | 1 + src/coreclr/jit/gentree.cpp | 5 +- src/coreclr/jit/gtlist.h | 1 + src/coreclr/jit/lowerarmarch.cpp | 84 +++++++++++++++++++----------- 5 files changed, 81 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 9bf45ab0ffa..5c34b845b09 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10282,18 +10282,34 @@ void CodeGen::genCodeForAddEx(GenTreeOp* tree) // void CodeGen::genCodeForCond(GenTreeOp* tree) { - assert(tree->OperIs(GT_CSNEG_MI)); + assert(tree->OperIs(GT_CSNEG_MI, GT_CNEG_LT)); assert(!(tree->gtFlags & GTF_SET_FLAGS)); genConsumeOperands(tree); - instruction ins; - insCond cond; switch (tree->OperGet()) { case GT_CSNEG_MI: { - ins = INS_csneg; - cond = INS_COND_MI; + instruction ins = INS_csneg; + insCond cond = INS_COND_MI; + + regNumber dstReg = tree->GetRegNum(); + regNumber op1Reg = tree->gtGetOp1()->GetRegNum(); + regNumber op2Reg = tree->gtGetOp2()->GetRegNum(); + + GetEmitter()->emitIns_R_R_R_COND(ins, emitActualTypeSize(tree), dstReg, op1Reg, op2Reg, cond); + break; + } + + case GT_CNEG_LT: + { + instruction ins = INS_cneg; + insCond cond = INS_COND_LT; + + regNumber dstReg = tree->GetRegNum(); + regNumber op1Reg = tree->gtGetOp1()->GetRegNum(); + + GetEmitter()->emitIns_R_R_COND(ins, emitActualTypeSize(tree), dstReg, op1Reg, cond); break; } @@ -10301,11 +10317,6 @@ void CodeGen::genCodeForCond(GenTreeOp* tree) unreached(); } - regNumber dstReg = tree->GetRegNum(); - regNumber op1Reg = tree->gtGetOp1()->GetRegNum(); - regNumber op2Reg = tree->gtGetOp2()->GetRegNum(); - - GetEmitter()->emitIns_R_R_R_COND(ins, emitActualTypeSize(tree), dstReg, op1Reg, op2Reg, cond); genProduceReg(tree); } diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 0f87d506b20..92b34427816 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -324,6 +324,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; case GT_CSNEG_MI: + case GT_CNEG_LT: genCodeForCond(treeNode->AsOp()); break; #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9e56b90be61..0f749b1a710 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9161,7 +9161,10 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) m_state = -1; return; - // Standard unary operators +// Standard unary operators +#ifdef TARGET_ARM64 + case GT_CNEG_LT: +#endif // TARGET_ARM64 case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: case GT_NOT: diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index a089124825f..53a3618100b 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -219,6 +219,7 @@ GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) GTNODE(ADDEX, GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Add with sign/zero extension. GTNODE(BFIZ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero. GTNODE(CSNEG_MI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Conditional select, negate, minus result +GTNODE(CNEG_LT , GenTreeOp ,0,GTK_UNOP|DBK_NOTHIR) // Conditional, negate, signed less than result #endif //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 8598efb8adb..ed77d2a954f 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -104,6 +104,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const case GT_LE: case GT_GE: case GT_GT: + case GT_CMP: case GT_BOUNDS_CHECK: return emitter::emitIns_valid_imm_for_cmp(immVal, size); case GT_AND: @@ -699,19 +700,7 @@ void Lowering::LowerRotate(GenTree* tree) // Arguments: // tree - the node to lower // -// Return Value: -// A new tree node if it changed. -// // Notes: -// {expr} % {cns} -// Logically turns into: -// let a = {expr} -// if a > 0 then (a & ({cns} - 1)) else -(-a & ({cns} - 1)) -// which then turns into: -// and reg1, reg0, #({cns} - 1) -// negs reg0, reg0 -// and reg0, reg0, #({cns} - 1) -// csneg reg0, reg1, reg0, mi // TODO: We could do this optimization in morph but we do not have // a conditional select op in HIR. At some point, we may // introduce such an op. @@ -722,12 +711,15 @@ void Lowering::LowerModPow2(GenTree* node) GenTree* dividend = mod->gtGetOp1(); GenTree* divisor = mod->gtGetOp2(); + JITDUMP("Lower: optimize X MOD POW2"); + assert(divisor->IsIntegralConstPow2()); const var_types type = mod->TypeGet(); assert((type == TYP_INT) || (type == TYP_LONG)); - ssize_t cnsValue = static_cast(divisor->AsIntConCommon()->IntegralValue()) - 1; + ssize_t divisorCnsValue = static_cast(divisor->AsIntConCommon()->IntegralValue()); + ssize_t divisorCnsValueMinusOne = divisorCnsValue - 1; BlockRange().Remove(divisor); @@ -739,30 +731,62 @@ void Lowering::LowerModPow2(GenTree* node) GenTree* dividend2 = comp->gtClone(dividend); BlockRange().InsertAfter(dividend, dividend2); - GenTreeIntCon* cns = comp->gtNewIconNode(cnsValue, type); + GenTreeIntCon* cns = comp->gtNewIconNode(divisorCnsValueMinusOne, type); BlockRange().InsertAfter(dividend2, cns); GenTree* const trueExpr = comp->gtNewOperNode(GT_AND, type, dividend, cns); BlockRange().InsertAfter(cns, trueExpr); LowerNode(trueExpr); - GenTree* const neg = comp->gtNewOperNode(GT_NEG, type, dividend2); - neg->gtFlags |= GTF_SET_FLAGS; - BlockRange().InsertAfter(trueExpr, neg); - - GenTreeIntCon* cns2 = comp->gtNewIconNode(cnsValue, type); - BlockRange().InsertAfter(neg, cns2); - - GenTree* const falseExpr = comp->gtNewOperNode(GT_AND, type, neg, cns2); - BlockRange().InsertAfter(cns2, falseExpr); - LowerNode(falseExpr); - - mod->ChangeOper(GT_CSNEG_MI); - mod->gtOp1 = trueExpr; - mod->gtOp2 = falseExpr; + if (divisorCnsValue == 2) + { + // {expr} % 2 + // Logically turns into: + // let a = {expr} + // if a < 0 then -(a & 1) else (a & 1) + // which then turns into: + // and reg1, reg0, #1 + // cmp reg0, #0 + // cneg reg0, reg1, lt + + GenTreeIntCon* cnsZero = comp->gtNewIconNode(0, type); + BlockRange().InsertAfter(trueExpr, cnsZero); + + GenTree* const cmp = comp->gtNewOperNode(GT_CMP, type, dividend2, cnsZero); + cmp->gtFlags |= GTF_SET_FLAGS; + BlockRange().InsertAfter(cnsZero, cmp); + LowerNode(cmp); - JITDUMP("Lower: optimize X MOD POW2"); - DISPNODE(mod); + mod->ChangeOper(GT_CNEG_LT); + mod->gtOp1 = trueExpr; + } + else + { + // {expr} % {cns} + // Logically turns into: + // let a = {expr} + // if a > 0 then (a & ({cns} - 1)) else -(-a & ({cns} - 1)) + // which then turns into: + // and reg1, reg0, #({cns} - 1) + // negs reg0, reg0 + // and reg0, reg0, #({cns} - 1) + // csneg reg0, reg1, reg0, mi + + GenTree* const neg = comp->gtNewOperNode(GT_NEG, type, dividend2); + neg->gtFlags |= GTF_SET_FLAGS; + BlockRange().InsertAfter(trueExpr, neg); + + GenTreeIntCon* cns2 = comp->gtNewIconNode(divisorCnsValueMinusOne, type); + BlockRange().InsertAfter(neg, cns2); + + GenTree* const falseExpr = comp->gtNewOperNode(GT_AND, type, neg, cns2); + BlockRange().InsertAfter(cns2, falseExpr); + LowerNode(falseExpr); + + mod->ChangeOper(GT_CSNEG_MI); + mod->gtOp1 = trueExpr; + mod->gtOp2 = falseExpr; + } ContainCheckNode(mod); } -- GitLab