未验证 提交 344c7922 编写于 作者: S SingleAccretion 提交者: GitHub

Allow `TYP_STRUCT` `LCL_FLD` on the RHS of block copies (#70633)

* Delete the unused "GTF_USE_FLAGS"

It used to indicate that a branch operation didn't
need to materialize its operand and could just "use
flags" instead, but that purpose has long been lost
now that we have explicit SETCC nodes in lowering.

* Make GTF_DONT_EXTEND a shared flag

So that it can be used for "LCL_FLD" as well as "GT_IND".

No diffs.

* Enable TYP_STRUCT on the RHS

* fgMorphBlockOperand

* Tweak TYP_STRUCT LCL_FLD costs

Model it as two load, like OBJ.

Note we could be more precise here, by using the
register type of the layout. For now, we defer.

* Block CSE

Preserve previous behavior to avoid diffs.
上级 b9d9b0e5
......@@ -10283,7 +10283,7 @@ void CodeGen::genCodeForAddEx(GenTreeOp* tree)
void CodeGen::genCodeForCond(GenTreeOp* tree)
{
assert(tree->OperIs(GT_CSNEG_MI));
assert(!(tree->gtFlags & GTF_SET_FLAGS) && (tree->gtFlags & GTF_USE_FLAGS));
assert(!(tree->gtFlags & GTF_SET_FLAGS));
genConsumeOperands(tree);
instruction ins;
......
......@@ -4755,7 +4755,8 @@ void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
unsigned varNum = tree->GetLclNum();
assert(varNum < compiler->lvaCount);
GetEmitter()->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
instruction loadIns = tree->DontExtend() ? INS_mov : ins_Load(targetType);
GetEmitter()->emitIns_R_S(loadIns, size, targetReg, varNum, offs);
genProduceReg(tree);
}
......@@ -5106,16 +5107,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
else
{
genConsumeAddress(addr);
instruction loadIns = ins_Load(targetType);
if (tree->DontExtend())
{
assert(varTypeIsSmall(tree));
// The user of this IND does not need
// the upper bits to be set, so we don't need to use longer
// INS_movzx/INS_movsx and can use INS_mov instead.
// It usually happens when the real type is a small struct.
loadIns = INS_mov;
}
instruction loadIns = tree->DontExtend() ? INS_mov : ins_Load(targetType);
emit->emitInsLoadInd(loadIns, emitTypeSize(tree), tree->GetRegNum(), tree);
}
......
......@@ -931,7 +931,6 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
Range().InsertAfter(loResult, zero, hiAdjust, hiResult);
loResult->gtFlags |= GTF_SET_FLAGS;
hiAdjust->gtFlags |= GTF_USE_FLAGS;
#elif defined(TARGET_ARM)
......@@ -942,7 +941,6 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
Range().InsertAfter(loResult, hiResult);
loResult->gtFlags |= GTF_SET_FLAGS;
hiResult->gtFlags |= GTF_USE_FLAGS;
#endif
......@@ -997,7 +995,6 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use)
if ((oper == GT_ADD) || (oper == GT_SUB))
{
loResult->gtFlags |= GTF_SET_FLAGS;
hiResult->gtFlags |= GTF_USE_FLAGS;
if ((loResult->gtFlags & GTF_OVERFLOW) != 0)
{
......
......@@ -4680,6 +4680,11 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costEx += 1;
costSz += 1;
}
else if (tree->TypeIs(TYP_STRUCT))
{
costEx += IND_COST_EX;
costSz += 2;
}
break;
case GT_LCL_FLD_ADDR:
......
......@@ -408,7 +408,12 @@ enum GenTreeFlags : unsigned int
GTF_NOREG_AT_USE = 0x00000100, // tree node is in memory at the point of use
GTF_SET_FLAGS = 0x00000200, // Requires that codegen for this node set the flags. Use gtSetFlags() to check this flag.
GTF_USE_FLAGS = 0x00000400, // Indicates that this node uses the flags bits.
#ifdef TARGET_XARCH
GTF_DONT_EXTEND = 0x00000400, // This small-typed tree produces a value with undefined upper bits. Used on x86/x64 as a
// lowering optimization and tells the codegen to use instructions like "mov al, [addr]"
// instead of "movzx/movsx", when the user node doesn't need the upper bits.
#endif // TARGET_XARCH
GTF_MAKE_CSE = 0x00000800, // Hoisted expression: try hard to make this into CSE (see optPerformHoistExpr)
GTF_DONT_CSE = 0x00001000, // Don't bother CSE'ing this expr
......@@ -526,16 +531,9 @@ enum GenTreeFlags : unsigned int
// alignment of 1 byte)
GTF_IND_INVARIANT = 0x01000000, // GT_IND -- the target is invariant (a prejit indirection)
GTF_IND_NONNULL = 0x00400000, // GT_IND -- the indirection never returns null (zero)
#if defined(TARGET_XARCH)
GTF_IND_DONT_EXTEND = 0x00200000, // GT_IND -- the indirection does not need to extend for small types
#endif // TARGET_XARCH
GTF_IND_FLAGS = GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | GTF_IND_UNALIGNED | GTF_IND_INVARIANT |
GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP
#if defined(TARGET_XARCH)
| GTF_IND_DONT_EXTEND
#endif // TARGET_XARCH
,
GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP,
GTF_ADDRMODE_NO_CSE = 0x80000000, // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex
// addressing mode
......@@ -2007,6 +2005,25 @@ public:
gtFlags &= ~GTF_REVERSE_OPS;
}
#if defined(TARGET_XARCH)
void SetDontExtend()
{
assert(varTypeIsSmall(TypeGet()) && OperIs(GT_IND, GT_LCL_FLD));
gtFlags |= GTF_DONT_EXTEND;
}
void ClearDontExtend()
{
gtFlags &= ~GTF_DONT_EXTEND;
}
bool DontExtend() const
{
assert(varTypeIsSmall(TypeGet()) || ((gtFlags & GTF_DONT_EXTEND) == 0));
return (gtFlags & GTF_DONT_EXTEND) != 0;
}
#endif // TARGET_XARCH
bool IsUnsigned() const
{
return ((gtFlags & GTF_UNSIGNED) != 0);
......@@ -6762,23 +6779,6 @@ struct GenTreeIndir : public GenTreeOp
return (gtFlags & GTF_IND_UNALIGNED) != 0;
}
#if defined(TARGET_XARCH)
void SetDontExtend()
{
gtFlags |= GTF_IND_DONT_EXTEND;
}
void ClearDontExtend()
{
gtFlags &= ~GTF_IND_DONT_EXTEND;
}
bool DontExtend() const
{
return (gtFlags & GTF_IND_DONT_EXTEND) != 0;
}
#endif // TARGET_XARCH
#if DEBUGGABLE_GENTREE
// Used only for GenTree::GetVtableForOper()
GenTreeIndir() : GenTreeOp()
......
......@@ -1098,21 +1098,21 @@ private:
// Current matrix of matches/users/types:
//
// |------------|------|---------|---------|
// | STRUCT | CALL | ASG | RETURN |
// |------------|------|---------|---------|
// | Exact | None | LCL_VAR | LCL_VAR |
// | Compatible | None | LCL_VAR | LCL_VAR |
// | Partial | None | OBJ | LCL_FLD |
// |------------|------|---------|---------|
// |------------|------|-------------|---------|
// | STRUCT | CALL | ASG | RETURN |
// |------------|------|-------------|---------|
// | Exact | None | LCL_VAR | LCL_VAR |
// | Compatible | None | LCL_VAR | LCL_VAR |
// | Partial | None | OBJ/LCL_FLD | LCL_FLD |
// |------------|------|-------------|---------|
//
// |------------|------|---------|---------|----------|
// | SIMD | CALL | ASG | RETURN | HWI/SIMD |
// |------------|------|---------|---------|----------|
// | Exact | None | None | None | None |
// | Compatible | None | None | None | None |
// | Partial | None | None | None | None |
// |------------|------|---------|---------|----------|
// |------------|------|------|--------|----------|
// | SIMD | CALL | ASG | RETURN | HWI/SIMD |
// |------------|------|------|--------|----------|
// | Exact | None | None | None | None |
// | Compatible | None | None | None | None |
// | Partial | None | None | None | None |
// |------------|------|------|--------|----------|
//
// TODO-ADDR: delete all the "None" entries and always
// transform local nodes into LCL_VAR or LCL_FLD.
......@@ -1126,7 +1126,7 @@ private:
return IndirTransform::LclVar;
}
if (user->OperIs(GT_ASG))
if (user->OperIs(GT_ASG) && (indir == user->AsOp()->gtGetOp1()))
{
return IndirTransform::ObjAddrLclFld;
}
......
......@@ -1013,7 +1013,6 @@ bool Lowering::TryLowerSwitchToBitTest(
GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue);
bitTest->gtFlags |= GTF_SET_FLAGS;
GenTreeCC* jcc = new (comp, GT_JCC) GenTreeCC(GT_JCC, bbSwitchCondition);
jcc->gtFlags |= GTF_USE_FLAGS;
LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc);
......@@ -2703,7 +2702,6 @@ GenTree* Lowering::DecomposeLongCompare(GenTree* cmp)
GenTree* jcc = cmpUse.User();
jcc->AsOp()->gtOp1 = nullptr;
jcc->ChangeOper(GT_JCC);
jcc->gtFlags |= GTF_USE_FLAGS;
jcc->AsCC()->gtCondition = GenCondition::FromIntegralRelop(condition, cmp->IsUnsigned());
}
else
......@@ -2711,7 +2709,6 @@ GenTree* Lowering::DecomposeLongCompare(GenTree* cmp)
cmp->AsOp()->gtOp1 = nullptr;
cmp->AsOp()->gtOp2 = nullptr;
cmp->ChangeOper(GT_SETCC);
cmp->gtFlags |= GTF_USE_FLAGS;
cmp->AsCC()->gtCondition = GenCondition::FromIntegralRelop(condition, cmp->IsUnsigned());
}
......@@ -2959,8 +2956,6 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
cmpUse.ReplaceWith(cc);
}
cc->gtFlags |= GTF_USE_FLAGS;
return cmp->gtNext;
}
#endif // TARGET_XARCH
......@@ -3028,7 +3023,6 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
GenCondition condition = GenCondition::FromIntegralRelop(cmp);
cc->ChangeOper(ccOp);
cc->AsCC()->gtCondition = condition;
cc->gtFlags |= GTF_USE_FLAGS;
return next;
}
......@@ -3246,7 +3240,6 @@ GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition)
if (cc != nullptr)
{
node->gtFlags |= GTF_SET_FLAGS;
cc->gtFlags |= GTF_USE_FLAGS;
}
// Remove the chain of EQ/NE(x, 0) relop nodes, if any. Note that if a SETCC was
......@@ -3515,13 +3508,13 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
// Do it now.
GenTreeIndir* indir = src->AsIndir();
LowerIndir(indir);
}
#if defined(TARGET_XARCH)
if (varTypeIsSmall(lclRegType))
{
indir->SetDontExtend();
}
#endif // TARGET_XARCH
if (varTypeIsSmall(lclRegType))
{
src->SetDontExtend();
}
#endif // TARGET_XARCH
}
convertToStoreObj = false;
#else // TARGET_ARM64
......@@ -7351,9 +7344,9 @@ bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode)
}
#if defined(TARGET_XARCH)
if (varTypeIsSmall(regType) && src->OperIs(GT_IND))
if (varTypeIsSmall(regType) && src->OperIs(GT_IND, GT_LCL_FLD))
{
src->AsIndir()->SetDontExtend();
src->SetDontExtend();
}
#endif // TARGET_XARCH
......
......@@ -760,7 +760,6 @@ void Lowering::LowerModPow2(GenTree* node)
mod->ChangeOper(GT_CSNEG_MI);
mod->gtOp1 = trueExpr;
mod->gtOp2 = falseExpr;
mod->gtFlags |= GTF_USE_FLAGS;
JITDUMP("Lower: optimize X MOD POW2");
DISPNODE(mod);
......
......@@ -9681,6 +9681,11 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, ClassLa
{
lclNode = effectiveVal->AsLclVarCommon();
}
else if (effectiveVal->OperIs(GT_LCL_FLD))
{
needsIndirection = false;
assert(ClassLayout::AreCompatible(effectiveVal->AsLclFld()->GetLayout(), blockLayout));
}
else if (effectiveVal->IsCall())
{
needsIndirection = false;
......
......@@ -3584,7 +3584,6 @@ bool Compiler::optIsCSEcandidate(GenTree* tree)
case GT_ARR_ELEM:
case GT_ARR_LENGTH:
case GT_LCL_FLD:
return true;
case GT_LCL_VAR:
......@@ -3687,7 +3686,9 @@ bool Compiler::optIsCSEcandidate(GenTree* tree)
return true; // allow Intrinsics to be CSE-ed
case GT_OBJ:
return varTypeIsEnregisterable(type); // Allow enregisterable GT_OBJ's to be CSE-ed. (i.e. SIMD types)
case GT_LCL_FLD:
// TODO-1stClassStructs: support CSE for enregisterable TYP_STRUCTs.
return varTypeIsEnregisterable(type);
case GT_COMMA:
return true; // Allow GT_COMMA nodes to be CSE-ed.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册