未验证 提交 04690205 编写于 作者: S SingleAccretion 提交者: GitHub

Contain memory operands under casts (#72719)

* Add GenTreeCast::IsZeroExtending

* Cast descriptor support

* XARCH support

* ARM/ARM64 support

TODO: consider using a dedicated IND_EXT oper for ARM/ARM64 instead of containment.
This would allow us to cleany handle all indirections. It would not mean we'd give
up on the casts containment, as we'd still need to handle the "reg optional" case.

IND_EXT will be much like an ordinary IND, but have a "source" and "target" types.
The "target" type would always be int/long, while "source" could be of any integral
type.

This design would be a bit more natural, and nicely separable from casts. However,
the main problem with the current state of things, apart from the fact codegen of
indirections is tied strongly to "GenTreeIndir", is the fact that changing type of
the load can invalidate LEA containment. One would think this is solvable with some
tricks, like re-running containment analysis on an indirection after processing the
cast, but ARM64 codegen doesn't support uncontained LEAs in some cases.

A possible solution to that problem is uncontaining the whole address tree. That
would be messy, but ought to work. An additional complication is that these trees
can contain a lot of contained operands as part of ADDEX and BFIZ, so what would
have to be done first is the making of these into proper EXOPs.

In any case, this is all future work.
上级 6c56c43e
......@@ -993,6 +993,13 @@ protected:
ZERO_EXTEND_INT,
SIGN_EXTEND_INT,
#endif
LOAD_ZERO_EXTEND_SMALL_INT,
LOAD_SIGN_EXTEND_SMALL_INT,
#ifdef TARGET_64BIT
LOAD_ZERO_EXTEND_INT,
LOAD_SIGN_EXTEND_INT,
#endif
LOAD_SOURCE
};
private:
......
......@@ -3898,25 +3898,24 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
// cast - The GT_CAST node
//
// Assumptions:
// The cast node is not a contained node and must have an assigned register.
// Neither the source nor target type can be a floating point type.
//
// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
//
void CodeGen::genIntToIntCast(GenTreeCast* cast)
{
genConsumeRegs(cast->gtGetOp1());
genConsumeRegs(cast->CastOp());
const regNumber srcReg = cast->gtGetOp1()->GetRegNum();
GenTree* const src = cast->CastOp();
const regNumber srcReg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA;
const regNumber dstReg = cast->GetRegNum();
emitter* const emit = GetEmitter();
assert(genIsValidIntReg(srcReg));
assert(genIsValidIntReg(dstReg));
GenIntCastDesc desc(cast);
if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE)
{
assert(genIsValidIntReg(srcReg));
genIntCastOverflowCheck(cast, desc, srcReg);
}
......@@ -3944,15 +3943,70 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
ins = INS_sxtw;
insSize = 8;
break;
#endif
default:
assert(desc.ExtendKind() == GenIntCastDesc::COPY);
#endif // TARGET_64BIT
case GenIntCastDesc::COPY:
ins = INS_mov;
insSize = desc.ExtendSrcSize();
break;
case GenIntCastDesc::LOAD_ZERO_EXTEND_SMALL_INT:
ins = (desc.ExtendSrcSize() == 1) ? INS_ldrb : INS_ldrh;
insSize = TARGET_POINTER_SIZE;
break;
case GenIntCastDesc::LOAD_SIGN_EXTEND_SMALL_INT:
ins = (desc.ExtendSrcSize() == 1) ? INS_ldrsb : INS_ldrsh;
insSize = TARGET_POINTER_SIZE;
break;
#ifdef TARGET_64BIT
case GenIntCastDesc::LOAD_ZERO_EXTEND_INT:
ins = INS_ldr;
insSize = 4;
break;
case GenIntCastDesc::LOAD_SIGN_EXTEND_INT:
ins = INS_ldrsw;
insSize = 8;
break;
#endif // TARGET_64BIT
case GenIntCastDesc::LOAD_SOURCE:
ins = ins_Load(src->TypeGet());
insSize = genTypeSize(genActualType(src));
break;
default:
unreached();
}
GetEmitter()->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false);
if (srcReg != REG_NA)
{
emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false);
}
else
{
// The "used from memory" case. On ArmArch casts are the only nodes which can have
// contained memory operands, so we have to handle all possible sources "manually".
assert(src->isUsedFromMemory());
if (src->isUsedFromSpillTemp())
{
assert(src->IsRegOptional());
TempDsc* tmpDsc = getSpillTempDsc(src);
unsigned tmpNum = tmpDsc->tdTempNum();
regSet.tmpRlsTemp(tmpDsc);
emit->emitIns_R_S(ins, EA_ATTR(insSize), dstReg, tmpNum, 0);
}
else if (src->OperIsLocal())
{
emit->emitIns_R_S(ins, EA_ATTR(insSize), dstReg, src->AsLclVarCommon()->GetLclNum(),
src->AsLclVarCommon()->GetLclOffs());
}
else
{
assert(src->OperIs(GT_IND) && !src->AsIndir()->IsVolatile() && !src->AsIndir()->IsUnaligned());
emit->emitIns_R_R_I(ins, EA_ATTR(insSize), dstReg, src->AsIndir()->Base()->GetRegNum(),
static_cast<int>(src->AsIndir()->Offset()));
}
}
}
genProduceReg(cast);
......
......@@ -2448,7 +2448,8 @@ void CodeGen::genCodeForCast(GenTreeOp* tree)
CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
{
const var_types srcType = genActualType(cast->gtGetOp1()->TypeGet());
GenTree* const src = cast->CastOp();
const var_types srcType = genActualType(src);
const bool srcUnsigned = cast->IsUnsigned();
const unsigned srcSize = genTypeSize(srcType);
const var_types castType = cast->gtCastType;
......@@ -2457,7 +2458,9 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
const var_types dstType = genActualType(cast->TypeGet());
const unsigned dstSize = genTypeSize(dstType);
const bool overflow = cast->gtOverflow();
const bool castIsLoad = !src->isUsedFromReg();
assert(castIsLoad == src->isUsedFromMemory());
assert((srcSize == 4) || (srcSize == genTypeSize(TYP_I_IMPL)));
assert((dstSize == 4) || (dstSize == genTypeSize(TYP_I_IMPL)));
......@@ -2473,7 +2476,7 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
// values of the castType without risk of integer overflow.
const int castNumBits = (castSize * 8) - (castUnsigned ? 0 : 1);
m_checkSmallIntMax = (1 << castNumBits) - 1;
m_checkSmallIntMin = (castUnsigned | srcUnsigned) ? 0 : (-m_checkSmallIntMax - 1);
m_checkSmallIntMin = (castUnsigned || srcUnsigned) ? 0 : (-m_checkSmallIntMax - 1);
m_extendKind = COPY;
m_extendSrcSize = dstSize;
......@@ -2568,6 +2571,48 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
m_extendKind = COPY;
m_extendSrcSize = srcSize;
}
if (castIsLoad)
{
const var_types srcLoadType = src->TypeGet();
switch (m_extendKind)
{
case ZERO_EXTEND_SMALL_INT: // small type/int/long -> ubyte/ushort
assert(varTypeIsUnsigned(srcLoadType) || (genTypeSize(srcLoadType) >= genTypeSize(castType)));
m_extendKind = LOAD_ZERO_EXTEND_SMALL_INT;
m_extendSrcSize = min(genTypeSize(srcLoadType), genTypeSize(castType));
break;
case SIGN_EXTEND_SMALL_INT: // small type/int/long -> byte/short
assert(varTypeIsSigned(srcLoadType) || (genTypeSize(srcLoadType) >= genTypeSize(castType)));
m_extendKind = LOAD_SIGN_EXTEND_SMALL_INT;
m_extendSrcSize = min(genTypeSize(srcLoadType), genTypeSize(castType));
break;
#ifdef TARGET_64BIT
case ZERO_EXTEND_INT: // ubyte/ushort/int -> long.
assert(varTypeIsUnsigned(srcLoadType) || (srcLoadType == TYP_INT));
m_extendKind = varTypeIsSmall(srcLoadType) ? LOAD_ZERO_EXTEND_SMALL_INT : LOAD_ZERO_EXTEND_INT;
m_extendSrcSize = genTypeSize(srcLoadType);
break;
case SIGN_EXTEND_INT: // byte/short/int -> long.
assert(varTypeIsSigned(srcLoadType) || (srcLoadType == TYP_INT));
m_extendKind = varTypeIsSmall(srcLoadType) ? LOAD_SIGN_EXTEND_SMALL_INT : LOAD_SIGN_EXTEND_INT;
m_extendSrcSize = genTypeSize(srcLoadType);
break;
#endif // TARGET_64BIT
case COPY: // long -> long, small type/int/long -> int.
m_extendKind = LOAD_SOURCE;
m_extendSrcSize = 0;
break;
default:
unreached();
}
}
}
#if !defined(TARGET_64BIT)
......
......@@ -6752,27 +6752,25 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
// cast - The GT_CAST node
//
// Assumptions:
// The cast node is not a contained node and must have an assigned register.
// Neither the source nor target type can be a floating point type.
// On x86 casts to (U)BYTE require that the source be in a byte register.
//
// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
//
void CodeGen::genIntToIntCast(GenTreeCast* cast)
{
genConsumeRegs(cast->gtGetOp1());
genConsumeRegs(cast->CastOp());
const regNumber srcReg = cast->gtGetOp1()->GetRegNum();
GenTree* const src = cast->CastOp();
const regNumber srcReg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA;
const regNumber dstReg = cast->GetRegNum();
emitter* emit = GetEmitter();
assert(genIsValidIntReg(srcReg));
assert(genIsValidIntReg(dstReg));
GenIntCastDesc desc(cast);
if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE)
{
assert(genIsValidIntReg(srcReg));
genIntCastOverflowCheck(cast, desc, srcReg);
}
......@@ -6783,33 +6781,51 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
switch (desc.ExtendKind())
{
case GenIntCastDesc::ZERO_EXTEND_SMALL_INT:
case GenIntCastDesc::LOAD_ZERO_EXTEND_SMALL_INT:
ins = INS_movzx;
insSize = desc.ExtendSrcSize();
break;
case GenIntCastDesc::SIGN_EXTEND_SMALL_INT:
case GenIntCastDesc::LOAD_SIGN_EXTEND_SMALL_INT:
ins = INS_movsx;
insSize = desc.ExtendSrcSize();
break;
#ifdef TARGET_64BIT
case GenIntCastDesc::ZERO_EXTEND_INT:
case GenIntCastDesc::LOAD_ZERO_EXTEND_INT:
ins = INS_mov;
insSize = 4;
canSkip = compiler->opts.OptimizationEnabled() && emit->AreUpper32BitsZero(srcReg);
break;
case GenIntCastDesc::SIGN_EXTEND_INT:
case GenIntCastDesc::LOAD_SIGN_EXTEND_INT:
ins = INS_movsxd;
insSize = 4;
break;
#endif
default:
assert(desc.ExtendKind() == GenIntCastDesc::COPY);
case GenIntCastDesc::COPY:
ins = INS_mov;
insSize = desc.ExtendSrcSize();
canSkip = true;
break;
case GenIntCastDesc::LOAD_SOURCE:
ins = ins_Load(src->TypeGet());
insSize = genTypeSize(src);
break;
default:
unreached();
}
emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip);
if (srcReg != REG_NA)
{
emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip);
}
else
{
assert(src->isUsedFromMemory());
inst_RV_TT(ins, EA_ATTR(insSize), dstReg, src);
}
genProduceReg(cast);
}
......
......@@ -3858,6 +3858,22 @@ struct GenTreeCast : public GenTreeOp
{
}
#endif
bool IsZeroExtending()
{
assert(varTypeIsIntegral(CastOp()) && varTypeIsIntegral(CastToType()));
if (varTypeIsSmall(CastToType()))
{
return varTypeIsUnsigned(CastToType());
}
if (TypeIs(TYP_LONG) && genActualTypeIsInt(CastOp()))
{
return IsUnsigned();
}
return false;
}
};
// GT_BOX nodes are place markers for boxed values. The "real" tree
......
......@@ -288,11 +288,7 @@ CONFIG_INTEGER(JitNoRangeChks, W("JitNoRngChks"), 0) // If 1, don't generate ran
// AltJitAssertOnNYI should be 0 on targets where JIT is under development or bring up stage, so as to facilitate
// fallback to main JIT on hitting a NYI.
#if defined(TARGET_ARM64) || defined(TARGET_X86)
CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 0) // Controls the AltJit behavior of NYI stuff
#else // !defined(TARGET_ARM64) && !defined(TARGET_X86)
CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff
#endif // defined(TARGET_ARM64) || defined(TARGET_X86)
CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 1) // Enable the register allocator to support EH-write thru:
// partial enregistration of vars exposed on EH boundaries
......
......@@ -2828,6 +2828,11 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
op2->ClearContained();
}
}
else
{
castOp->ClearContained();
}
cmp->AsOp()->gtOp1 = castOp;
BlockRange().Remove(cast);
......@@ -5352,6 +5357,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par
#ifdef TARGET_ARM64
if ((index != nullptr) && index->OperIs(GT_CAST) && (scale == 1) && (offset == 0) && varTypeIsByte(targetType))
{
index->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands.
MakeSrcContained(addrMode, index);
}
......@@ -6111,6 +6117,7 @@ void Lowering::LowerShift(GenTreeOp* shift)
// The parent was replaced, clear contain and regOpt flag.
shift->gtOp2->ClearContained();
}
ContainCheckShiftRotate(shift);
#ifdef TARGET_ARM64
......@@ -6129,13 +6136,13 @@ void Lowering::LowerShift(GenTreeOp* shift)
unsigned dstBits = genTypeSize(cast) * BITS_PER_BYTE;
unsigned srcBits = varTypeIsSmall(cast->CastToType()) ? genTypeSize(cast->CastToType()) * BITS_PER_BYTE
: genTypeSize(cast->CastOp()) * BITS_PER_BYTE;
assert(!cast->CastOp()->isContained());
// It has to be an upcast and CNS must be in [1..srcBits) range
if ((srcBits < dstBits) && (cns->IconValue() > 0) && (cns->IconValue() < srcBits))
{
JITDUMP("Recognized ubfix/sbfix pattern in LSH(CAST, CNS). Changing op to GT_BFIZ");
shift->ChangeOper(GT_BFIZ);
cast->CastOp()->ClearContained(); // Uncontain any memory operands.
MakeSrcContained(shift, cast);
}
}
......
......@@ -388,10 +388,11 @@ private:
return op;
}
GenTreeCast* cast = op->AsCast();
GenTreeCast* cast = op->AsCast();
GenTree* castOp = cast->CastOp();
// FP <-> INT casts should be kept
if (varTypeIsFloating(cast->CastFromType()) ^ varTypeIsFloating(expectedType))
if (varTypeIsFloating(castOp) ^ varTypeIsFloating(expectedType))
{
return op;
}
......@@ -402,17 +403,23 @@ private:
return op;
}
// Keep casts with operands usable from memory.
if (castOp->isContained() || castOp->IsRegOptional())
{
return op;
}
if (genTypeSize(cast->CastToType()) >= genTypeSize(expectedType))
{
#ifndef TARGET_64BIT
// Don't expose TYP_LONG on 32bit
if (varTypeIsLong(cast->CastFromType()))
if (castOp->TypeIs(TYP_LONG))
{
return op;
}
#endif
BlockRange().Remove(op);
return cast->CastOp();
return castOp;
}
return op;
......
......@@ -307,11 +307,13 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
mul->SetUnsigned();
}
op1->CastOp()->ClearContained(); // Uncontain any memory operands.
mul->gtOp1 = op1->CastOp();
BlockRange().Remove(op1);
if (op2->OperIs(GT_CAST))
{
op2->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands.
mul->gtOp2 = op2->AsCast()->CastOp();
BlockRange().Remove(op2);
}
......@@ -1881,6 +1883,7 @@ void Lowering::ContainCheckBinary(GenTreeOp* node)
if (cast->gtGetOp1()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && !cast->gtOverflow())
{
node->ChangeOper(GT_ADDEX);
cast->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands.
MakeSrcContained(node, cast);
}
}
......@@ -2024,11 +2027,63 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const
//
void Lowering::ContainCheckCast(GenTreeCast* node)
{
#ifdef TARGET_ARM
GenTree* castOp = node->CastOp();
var_types castToType = node->CastToType();
var_types srcType = castOp->TypeGet();
if (comp->opts.OptimizationEnabled() && !node->gtOverflow() && varTypeIsIntegral(castOp) &&
varTypeIsIntegral(castToType))
{
// Most integral casts can be re-expressed as loads, except those that would be changing the sign.
if (!varTypeIsSmall(castOp) || (varTypeIsUnsigned(castOp) == node->IsZeroExtending()))
{
bool srcIsContainable = false;
// Make sure to only contain indirections codegen can handle.
if (castOp->OperIs(GT_IND))
{
GenTreeIndir* indir = castOp->AsIndir();
if (!indir->IsVolatile() && !indir->IsUnaligned())
{
GenTree* addr = indir->Addr();
if (!addr->isContained())
{
srcIsContainable = true;
}
else if (addr->OperIs(GT_LEA) && !addr->AsAddrMode()->HasIndex())
{
var_types loadType = varTypeIsSmall(castToType) ? castToType : castOp->TypeGet();
if (emitter::emitIns_valid_imm_for_ldst_offset(addr->AsAddrMode()->Offset(),
emitTypeSize(loadType)))
{
srcIsContainable = true;
}
}
}
}
else
{
assert(castOp->OperIsLocalRead() || !IsContainableMemoryOp(castOp));
srcIsContainable = true;
}
if (srcIsContainable && IsSafeToContainMem(node, castOp))
{
if (IsContainableMemoryOp(castOp))
{
MakeSrcContained(node, castOp);
}
else
{
castOp->SetRegOptional();
}
}
}
}
#ifdef TARGET_ARM
if (varTypeIsLong(castOp))
{
assert(castOp->OperGet() == GT_LONG);
......
......@@ -5135,28 +5135,52 @@ void Lowering::ContainCheckCast(GenTreeCast* node)
srcType = varTypeToUnsigned(srcType);
}
if (!node->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(srcType)))
if (!node->gtOverflow())
{
// Some casts will be able to use the source from memory.
bool srcIsContainable = false;
if (varTypeIsFloating(castToType) || varTypeIsFloating(srcType))
{
#ifdef DEBUG
// If converting to float/double, the operand must be 4 or 8 byte in size.
if (varTypeIsFloating(castToType))
// If converting to float/double, the operand must be 4 or 8 byte in size.
if (varTypeIsFloating(castToType))
{
unsigned opSize = genTypeSize(srcType);
assert(opSize == 4 || opSize == 8);
}
#endif // DEBUG
// U8 -> R8 conversion requires that the operand be in a register.
if (srcType != TYP_ULONG)
{
if (castOp->IsCnsNonZeroFltOrDbl())
{
MakeSrcContained(node, castOp);
}
else
{
srcIsContainable = true;
}
}
}
else if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(castOp) && varTypeIsIntegral(castToType))
{
unsigned opSize = genTypeSize(srcType);
assert(opSize == 4 || opSize == 8);
// Most integral casts can be re-expressed as loads, except those that would be changing the sign.
if (!varTypeIsSmall(castOp) || (varTypeIsUnsigned(castOp) == node->IsZeroExtending()))
{
srcIsContainable = true;
}
}
#endif // DEBUG
// U8 -> R8 conversion requires that the operand be in a register.
if (srcType != TYP_ULONG)
if (srcIsContainable && IsSafeToContainMem(node, castOp))
{
if ((IsContainableMemoryOp(castOp) && IsSafeToContainMem(node, castOp)) || castOp->IsCnsNonZeroFltOrDbl())
if (IsContainableMemoryOp(castOp))
{
MakeSrcContained(node, castOp);
}
else
{
// Mark castOp as reg optional to indicate codegen
// can still generate code if it is on stack.
castOp->SetRegOptional();
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册