未验证 提交 cc9fdadb 编写于 作者: K Kunal Pathak 提交者: GitHub

Do not mark op2 as delayRegFree if op1==op2 (#53964)

* Do not mark op2 as delayRegFree if op1==op2

* Revert NodesAreEquivalentLeaves change

* Pass rmwNode to `BuildDelayFreeUses()` which does the right thing

* Make similar change in arm64

* remove TODO comment

* review feedback
上级 d95bfea5
......@@ -6210,17 +6210,22 @@ bool Lowering::IndirsAreEquivalent(GenTree* candidate, GenTree* storeInd)
}
}
/** Test whether the two given nodes are the same leaves.
* Right now, only constant integers and local variables are supported
*/
//------------------------------------------------------------------------
// NodesAreEquivalentLeaves: Check whether the two given nodes are the same leaves.
//
// Arguments:
// tree1 and tree2 are nodes to be checked.
// Return Value:
// Returns true if they are same leaves, false otherwise.
//
// static
bool Lowering::NodesAreEquivalentLeaves(GenTree* tree1, GenTree* tree2)
{
if (tree1 == nullptr && tree2 == nullptr)
if (tree1 == tree2)
{
return true;
}
// both null, they are equivalent, otherwise if either is null not equivalent
if (tree1 == nullptr || tree2 == nullptr)
{
return false;
......@@ -6247,7 +6252,7 @@ bool Lowering::NodesAreEquivalentLeaves(GenTree* tree1, GenTree* tree2)
switch (tree1->OperGet())
{
case GT_CNS_INT:
return tree1->AsIntCon()->gtIconVal == tree2->AsIntCon()->gtIconVal &&
return tree1->AsIntCon()->IconValue() == tree2->AsIntCon()->IconValue() &&
tree1->IsIconHandle() == tree2->IsIconHandle();
case GT_LCL_VAR:
case GT_LCL_VAR_ADDR:
......
......@@ -1112,54 +1112,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
// RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg
// (i.e. a register that corresponds to read-modify-write operand) and one of them is the last use.
bool op2DelayFree = isRMW;
bool op3DelayFree = isRMW;
bool op4DelayFree = isRMW;
assert(intrin.op1 != nullptr);
if (isRMW && intrin.op1->OperIs(GT_LCL_VAR))
{
unsigned int varNum1 = intrin.op1->AsLclVar()->GetLclNum();
bool op1LastUse = false;
unsigned int varNum2 = BAD_VAR_NUM;
unsigned int varNum3 = BAD_VAR_NUM;
unsigned int varNum4 = BAD_VAR_NUM;
if (intrin.op2->OperIs(GT_LCL_VAR))
{
varNum2 = intrin.op2->AsLclVar()->GetLclNum();
op1LastUse |= ((varNum1 == varNum2) && intrin.op2->HasLastUse());
}
if (intrin.op3 != nullptr)
{
if (intrin.op3->OperIs(GT_LCL_VAR))
{
varNum3 = intrin.op3->AsLclVar()->GetLclNum();
op1LastUse |= ((varNum1 == varNum3) && intrin.op3->HasLastUse());
}
if ((intrin.op4 != nullptr) && intrin.op4->OperIs(GT_LCL_VAR))
{
varNum4 = intrin.op4->AsLclVar()->GetLclNum();
op1LastUse |= ((varNum1 == varNum4) && intrin.op4->HasLastUse());
}
}
if (op1LastUse)
{
op2DelayFree = (varNum1 != varNum2);
op3DelayFree = (varNum1 != varNum3);
op4DelayFree = (varNum1 != varNum4);
}
}
bool forceOp2DelayFree = false;
if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement))
{
assert(!op2DelayFree);
if (!intrin.op2->IsCnsIntOrI() && (!intrin.op1->isContained() || intrin.op1->OperIsLocal()))
{
// If the index is not a constant and the object is not contained or is a local
......@@ -1168,7 +1125,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
// TODO-Cleanup: An internal register will never clobber a source; this code actually
// ensures that the index (op2) doesn't interfere with the target.
buildInternalIntRegisterDefForNode(intrinsicTree);
op2DelayFree = true;
forceOp2DelayFree = true;
}
if (!intrin.op2->IsCnsIntOrI() && !intrin.op1->isContained())
......@@ -1179,15 +1136,22 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
}
}
srcCount += op2DelayFree ? BuildDelayFreeUses(intrin.op2) : BuildOperandUses(intrin.op2);
if (forceOp2DelayFree)
{
srcCount += BuildDelayFreeUses(intrin.op2);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1) : BuildOperandUses(intrin.op2);
}
if (intrin.op3 != nullptr)
{
srcCount += op3DelayFree ? BuildDelayFreeUses(intrin.op3) : BuildOperandUses(intrin.op3);
srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1) : BuildOperandUses(intrin.op3);
if (intrin.op4 != nullptr)
{
srcCount += op4DelayFree ? BuildDelayFreeUses(intrin.op4) : BuildOperandUses(intrin.op4);
srcCount += isRMW ? BuildDelayFreeUses(intrin.op4, intrin.op1) : BuildOperandUses(intrin.op4);
}
}
}
......
......@@ -3097,6 +3097,10 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, regMaskTP ca
}
if (use != nullptr)
{
// If node != rmwNode, then definitely node should be marked as "delayFree".
// However, if node == rmwNode, then we can mark node as "delayFree" only
// none of the node/rmwNode are the last uses. If either of them are last use,
// we can safely reuse the rmwNode as destination.
if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse))
{
setDelayFree(use);
......
......@@ -2420,9 +2420,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
// Any pair of the index, mask, or destination registers should be different
srcCount += BuildOperandUses(op1);
srcCount += BuildDelayFreeUses(op2);
srcCount += BuildDelayFreeUses(op3);
srcCount += BuildDelayFreeUses(op4);
srcCount += BuildDelayFreeUses(op2, op1);
srcCount += BuildDelayFreeUses(op3, op1);
srcCount += BuildDelayFreeUses(op4, op1);
// op5 should always be contained
assert(argList->Rest()->Current()->isContained());
......@@ -2481,8 +2481,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
// When op2 is not contained or if we are producing a scalar value
// we need to mark it as delay free because the operand and target
// exist in the same register set.
srcCount += BuildDelayFreeUses(op2);
srcCount += BuildDelayFreeUses(op2, op1);
}
else
{
......@@ -2500,7 +2499,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
if (op3 != nullptr)
{
srcCount += isRMW ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
srcCount += isRMW ? BuildDelayFreeUses(op3, op1) : BuildOperandUses(op3);
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册