未验证 提交 e5eafc9b 编写于 作者: E Egor Bogatov 提交者: GitHub

[arm64] JIT: Enable CSE/hoisting for "arrayBase + elementOffset" (#61293)

上级 a8cc1d03
......@@ -1574,6 +1574,15 @@ void CodeGen::genConsumeRegs(GenTree* tree)
{
genConsumeAddress(tree);
}
#ifdef TARGET_ARM64
else if (tree->OperIs(GT_BFIZ))
{
// Can be contained as part of LEA on ARM64
GenTreeCast* cast = tree->gtGetOp1()->AsCast();
assert(cast->isContained());
genConsumeAddress(cast->CastOp());
}
#endif
else if (tree->OperIsLocalRead())
{
// A contained lcl var must be living on stack and marked as reg optional, or not be a
......
......@@ -6985,6 +6985,14 @@ void emitter::emitIns_R_R_R_Ext(instruction ins,
{
shiftAmount = insOptsLSL(opt) ? scale : 0;
}
// If target reg is ZR - it means we're doing an implicit nullcheck
// where target type was ignored and set to TYP_INT.
if ((reg1 == REG_ZR) && (shiftAmount > 0))
{
shiftAmount = scale;
}
assert((shiftAmount == scale) || (shiftAmount == 0));
reg2 = encodingSPtoZR(reg2);
......@@ -13481,8 +13489,23 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
}
else // no scale
{
// Then load/store dataReg from/to [memBase + index]
emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
if (index->OperIs(GT_BFIZ) && index->isContained())
{
// Then load/store dataReg from/to [memBase + index*scale with sign/zero extension]
GenTreeCast* cast = index->gtGetOp1()->AsCast();
// For now, this code only supports extensions from i32/u32
assert(cast->isContained() && varTypeIsInt(cast->CastFromType()));
emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(),
cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW,
(int)index->gtGetOp2()->AsIntCon()->IconValue());
}
else
{
// Then load/store dataReg from/to [memBase + index]
emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
}
}
}
}
......
......@@ -5033,6 +5033,28 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, var_types ta
}
}
#ifdef TARGET_ARM64
// Check if we can "contain" LEA(BFIZ) in order to extend 32bit index to 64bit as part of load/store.
if ((index != nullptr) && index->OperIs(GT_BFIZ) && index->gtGetOp1()->OperIs(GT_CAST) &&
index->gtGetOp2()->IsCnsIntOrI() && varTypeIsIntegral(targetType))
{
// BFIZ node is a binary op where op1 is GT_CAST and op2 is GT_CNS_INT
GenTreeCast* cast = index->gtGetOp1()->AsCast();
assert(cast->isContained());
const unsigned shiftBy = (unsigned)index->gtGetOp2()->AsIntCon()->IconValue();
// 'scale' and 'offset' have to be unset since we're going to use [base + index * SXTW/UXTW scale] form
// where there is no room for additional offsets/scales on ARM64. 'shiftBy' has to match target's width.
if (cast->CastOp()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && (genTypeSize(targetType) == (1U << shiftBy)) &&
(scale == 1) && (offset == 0))
{
// TODO: Make sure that genCreateAddrMode marks such BFIZ candidates as GTF_DONT_CSE for better CQ.
MakeSrcContained(addrMode, index);
}
}
#endif
JITDUMP("New addressing mode node:\n ");
DISPNODE(addrMode);
JITDUMP("\n");
......
......@@ -689,7 +689,16 @@ int LinearScan::BuildNode(GenTree* tree)
if (index != nullptr)
{
srcCount++;
BuildUse(index);
if (index->OperIs(GT_BFIZ) && index->isContained())
{
GenTreeCast* cast = index->gtGetOp1()->AsCast();
assert(cast->isContained() && (cns == 0));
BuildUse(cast->CastOp());
}
else
{
BuildUse(index);
}
}
assert(dstCount == 1);
......
......@@ -3021,10 +3021,22 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates)
BuildUse(addrMode->Base(), candidates);
srcCount++;
}
if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
if (addrMode->Index() != nullptr)
{
BuildUse(addrMode->Index(), candidates);
srcCount++;
if (!addrMode->Index()->isContained())
{
BuildUse(addrMode->Index(), candidates);
srcCount++;
}
#ifdef TARGET_ARM64
else if (addrMode->Index()->OperIs(GT_BFIZ))
{
GenTreeCast* cast = addrMode->Index()->gtGetOp1()->AsCast();
assert(cast->isContained());
BuildUse(cast->CastOp(), candidates);
srcCount++;
}
#endif
}
return srcCount;
}
......
......@@ -5461,15 +5461,43 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
// the partial byref will not point within the object, and thus not get updated correctly during a GC.
// This is mostly a risk in fully-interruptible code regions.
/* Add the first element's offset */
GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
// We can generate two types of trees for "addr":
//
// 1) "arrRef + (index + elemOffset)"
// 2) "(arrRef + elemOffset) + index"
//
// XArch has powerful addressing modes such as [base + index*scale + offset] so it's fine with 1),
// while for Arm we better try to make an invariant sub-tree as large as possible, which is usually
// "(arrRef + elemOffset)" and is CSE/LoopHoisting friendly => produces better codegen.
// 2) should still be safe from GC's point of view since both ADD operations are byref and point to
// within the object so GC will be able to correctly track and update them.
addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns);
bool groupArrayRefWithElemOffset = false;
#ifdef TARGET_ARMARCH
groupArrayRefWithElemOffset = true;
// TODO: in some cases even on ARM we better use 1) shape because if "index" is invariant and "arrRef" is not
// we at least will be able to hoist/CSE "index + elemOffset" in some cases.
// See https://github.com/dotnet/runtime/pull/61293#issuecomment-964146497
/* Add the object ref to the element's offset */
// Use 2) form only for primitive types for now - it significantly reduced number of size regressions
if (!varTypeIsIntegral(elemTyp))
{
groupArrayRefWithElemOffset = false;
}
#endif
addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
// First element's offset
GenTree* elemOffset = gtNewIconNode(elemOffs, TYP_I_IMPL);
if (groupArrayRefWithElemOffset)
{
GenTree* basePlusOffset = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, elemOffset);
addr = gtNewOperNode(GT_ADD, TYP_BYREF, basePlusOffset, addr);
}
else
{
addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, elemOffset);
addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
}
assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0) ||
(GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL));
......@@ -5539,16 +5567,16 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
}
JITDUMP("fgMorphArrayIndex (before remorph):\n");
DISPTREE(tree);
JITDUMP("fgMorphArrayIndex (before remorph):\n")
DISPTREE(tree)
// Currently we morph the tree to perform some folding operations prior
// to attaching fieldSeq info and labeling constant array index contributions
//
tree = fgMorphTree(tree);
JITDUMP("fgMorphArrayIndex (after remorph):\n");
DISPTREE(tree);
JITDUMP("fgMorphArrayIndex (after remorph):\n")
DISPTREE(tree)
// Ideally we just want to proceed to attaching fieldSeq info and labeling the
// constant array index contributions, but the morphing operation may have changed
......@@ -5562,48 +5590,66 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
if (fgIsCommaThrow(tree))
{
if ((arrElem != indTree) || // A new tree node may have been created
(indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
if ((arrElem != indTree) || // A new tree node may have been created
(!indTree->OperIs(GT_IND))) // The GT_IND may have been changed to a GT_CNS_INT
{
return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
}
}
assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
addr = arrElem->AsOp()->gtOp1;
DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED)
assert(addr->TypeGet() == TYP_BYREF);
addr = arrElem->gtGetOp1();
GenTree* cnsOff = nullptr;
if (addr->OperGet() == GT_ADD)
if (addr->OperIs(GT_ADD))
{
assert(addr->TypeGet() == TYP_BYREF);
assert(addr->AsOp()->gtOp1->TypeGet() == TYP_REF);
addr = addr->AsOp()->gtOp2;
// Look for the constant [#FirstElem] node here, or as the RHS of an ADD.
if (addr->gtOper == GT_CNS_INT)
GenTree* addrOp1 = addr->gtGetOp1();
if (groupArrayRefWithElemOffset)
{
cnsOff = addr;
addr = nullptr;
if (addrOp1->OperIs(GT_ADD) && addrOp1->gtGetOp2()->IsCnsIntOrI())
{
assert(addrOp1->gtGetOp1()->TypeIs(TYP_REF));
cnsOff = addrOp1->gtGetOp2();
addr = addr->gtGetOp2();
// Label any constant array index contributions with #ConstantIndex and any LclVars with
// GTF_VAR_ARR_INDEX
addr->LabelIndex(this);
}
else
{
assert(addr->gtGetOp2()->IsCnsIntOrI());
cnsOff = addr->gtGetOp2();
addr = nullptr;
}
}
else
{
if ((addr->OperGet() == GT_ADD) && (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT))
assert(addr->TypeIs(TYP_BYREF));
assert(addr->gtGetOp1()->TypeIs(TYP_REF));
addr = addr->gtGetOp2();
// Look for the constant [#FirstElem] node here, or as the RHS of an ADD.
if (addr->IsCnsIntOrI())
{
cnsOff = addr->AsOp()->gtOp2;
addr = addr->AsOp()->gtOp1;
cnsOff = addr;
addr = nullptr;
}
else
{
if ((addr->OperIs(GT_ADD)) && addr->gtGetOp2()->IsCnsIntOrI())
{
cnsOff = addr->gtGetOp2();
addr = addr->gtGetOp1();
}
// Label any constant array index contributions with #ConstantIndex and any LclVars with
// GTF_VAR_ARR_INDEX
addr->LabelIndex(this);
}
// Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
addr->LabelIndex(this);
}
}
else if (addr->OperGet() == GT_CNS_INT)
else if (addr->IsCnsIntOrI())
{
cnsOff = addr;
}
......
......@@ -247,6 +247,12 @@ inline bool varTypeIsLong(T vt)
return (TypeGet(vt) >= TYP_LONG) && (TypeGet(vt) <= TYP_ULONG);
}
template <class T>
inline bool varTypeIsInt(T vt)
{
return (TypeGet(vt) >= TYP_INT) && (TypeGet(vt) <= TYP_UINT);
}
template <class T>
inline bool varTypeIsMultiReg(T vt)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册