未验证 提交 2fb0a8c9 编写于 作者: W Will Smith 提交者: GitHub

[JIT] x86/x64 - improved localloc codegen in some cases (#76851)

上级 5ec2e87b
......@@ -1430,13 +1430,16 @@ protected:
void genReturn(GenTree* treeNode);
#ifdef TARGET_XARCH
void genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments);
void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments);
target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments);
void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta);
#else // !TARGET_XARCH
void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp);
void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp);
target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp);
#if defined(TARGET_XARCH)
void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp);
#endif // defined(TARGET_XARCH)
#endif // !TARGET_XARCH
void genLclHeap(GenTree* tree);
......
......@@ -2216,13 +2216,12 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
//
// Arguments:
// spDelta - the value to add to SP. Must be negative or zero.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// trackSpAdjustments - x86 only: whether or not to track the SP adjustment
//
// Return Value:
// None.
//
void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp)
void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments)
{
assert(spDelta < 0);
......@@ -2230,22 +2229,19 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm
// function that does a probe, which will in turn call this function.
assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize());
#ifdef TARGET_X86
if (regTmp != REG_NA)
#ifdef TARGET_AMD64
// We always track the SP adjustment on X64.
trackSpAdjustments = true;
#endif // TARGET_AMD64
if (trackSpAdjustments)
{
// For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment
// to ESP. So do the work in the count register.
// TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
// creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
// track".
inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false);
inst_RV_IV(INS_sub, regTmp, (target_ssize_t)-spDelta, EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false);
inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
}
else
#endif // TARGET_X86
{
inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
// For x86, some cases don't want to track the adjustment to SP.
inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
}
}
......@@ -2257,16 +2253,15 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm
// Arguments:
// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens,
// but the stack pointer doesn't move.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// trackSpAdjustments - x86 only: whether or not to track the SP adjustment
//
// Return Value:
// None.
//
void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp)
void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments)
{
GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
genStackPointerConstantAdjustment(spDelta, regTmp);
genStackPointerConstantAdjustment(spDelta, trackSpAdjustments);
}
//------------------------------------------------------------------------
......@@ -2280,13 +2275,12 @@ void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNum
//
// Arguments:
// spDelta - the value to add to SP. Must be negative.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// trackSpAdjustments - x86 only: whether or not to track the SP adjustment
//
// Return Value:
// Offset in bytes from SP to last probed address.
//
target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp)
target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments)
{
assert(spDelta < 0);
......@@ -2296,7 +2290,7 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s
do
{
ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize);
genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp);
genStackPointerConstantAdjustmentWithProbe(spOneDelta, trackSpAdjustments);
spRemainingDelta -= spOneDelta;
} while (spRemainingDelta < 0);
......@@ -2323,21 +2317,18 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s
// genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer,
// and probe the stack as appropriate.
//
// Note that for x86, we hide the ESP adjustment from the emitter. To do that, currently,
// requires a temporary register and extra code.
// We hide the ESP adjustment from the emitter.
//
// Arguments:
// regSpDelta - the register value to add to SP. The value in this register must be negative.
// This register might be trashed.
// regTmp - an available temporary register. Will be trashed.
//
// Return Value:
// None.
//
void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp)
void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta)
{
assert(regSpDelta != REG_NA);
assert(regTmp != REG_NA);
// Tickle the pages to ensure that ESP is always valid and is
// in sync with the "stack guard page". Note that in the worst
......@@ -2356,9 +2347,7 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re
// xor regSpDelta, regSpDelta // Overflow, pick lowest possible number
// loop:
// test ESP, [ESP+0] // tickle the page
// mov regTmp, ESP
// sub regTmp, eeGetPageSize()
// mov ESP, regTmp
// sub ESP, eeGetPageSize()
// cmp ESP, regSpDelta
// jae loop
// mov ESP, regSpDelta
......@@ -2376,11 +2365,8 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re
// be on the guard page. It is OK to leave the final value of ESP on the guard page.
GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
// Subtract a page from ESP. This is a trick to avoid the emitter trying to track the
// decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly.
inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false);
inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false);
// Subtract a page from ESP and hide the adjustment.
inst_RV_IV(INS_sub_hide, REG_SPBASE, compiler->eeGetPageSize(), EA_PTRSIZE);
inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL);
inst_JMP(EJ_jae, loop);
......@@ -2470,7 +2456,7 @@ void CodeGen::genLclHeap(GenTree* tree)
}
else
{
regCnt = tree->ExtractTempReg();
regCnt = tree->GetSingleTempReg();
// Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
inst_Mov(size->TypeGet(), regCnt, targetReg, /* canSkip */ true);
......@@ -2533,7 +2519,8 @@ void CodeGen::genLclHeap(GenTree* tree)
if ((amount > 0) && !initMemOrLargeAlloc)
{
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, REG_NA);
lastTouchDelta =
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* trackSpAdjustments */ true);
stackAdjustment = 0;
locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize;
goto ALLOC_DONE;
......@@ -2584,7 +2571,7 @@ void CodeGen::genLclHeap(GenTree* tree)
}
else
{
regCnt = tree->ExtractTempReg();
regCnt = tree->GetSingleTempReg();
}
}
......@@ -2595,7 +2582,8 @@ void CodeGen::genLclHeap(GenTree* tree)
// the alloc, not after.
assert(amount < compiler->eeGetPageSize()); // must be < not <=
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, regCnt);
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount,
/* trackSpAdjustments */ regCnt == REG_NA);
goto ALLOC_DONE;
}
......@@ -2611,6 +2599,9 @@ void CodeGen::genLclHeap(GenTree* tree)
instGen_Set_Reg_To_Imm(((size_t)(int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount);
}
// We should not have any temp registers at this point.
assert(tree->AvailableTempRegCount() == 0);
if (compiler->info.compInitMem)
{
// At this point 'regCnt' is set to the number of loop iterations for this loop, if each
......@@ -2646,8 +2637,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// adds to ESP).
inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
regNumber regTmp = tree->GetSingleTempReg();
genStackPointerDynamicAdjustmentWithProbe(regCnt, regTmp);
genStackPointerDynamicAdjustmentWithProbe(regCnt);
// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
// we're going to assume the worst and probe.
......@@ -2667,11 +2657,11 @@ ALLOC_DONE:
(stackAdjustment + (target_size_t)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES >
compiler->eeGetPageSize()))
{
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, REG_NA);
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, /* trackSpAdjustments */ true);
}
else
{
genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, REG_NA);
genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, /* trackSpAdjustments */ true);
}
}
......@@ -7862,7 +7852,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) ||
compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
{
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, REG_NA);
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, /* trackSpAdjustments */ true);
}
else
{
......
......@@ -12066,7 +12066,7 @@ DONE:
case IF_RRW_ARD:
// Mark the destination register as holding a GCT_BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
break;
......@@ -12083,7 +12083,7 @@ DONE:
case IF_ARW_RRD:
case IF_ARW_CNS:
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
break;
default:
......@@ -12513,7 +12513,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
// reg could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
break;
......@@ -12973,7 +12973,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
case IF_RRW_MRD:
assert(id->idGCref() == GCT_BYREF);
assert(ins == INS_add || ins == INS_sub);
assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide);
// Mark it as holding a GCT_BYREF
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
......@@ -13539,6 +13539,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
case INS_add:
case INS_sub:
case INS_sub_hide:
assert(id->idGCref() == GCT_BYREF);
#if 0
......@@ -13561,7 +13562,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// r1/r2 could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF
assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)));
#endif // DEBUG
#endif // 0
......@@ -14042,7 +14043,7 @@ DONE:
}
if (emitThisByrefRegs & regMask)
{
assert(ins == INS_add || ins == INS_sub);
assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide);
}
#endif
// Mark it as holding a GCT_BYREF
......@@ -15666,7 +15667,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case INS_sub:
// Check for "sub ESP, icon"
if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
{
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
......@@ -15675,7 +15676,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case INS_add:
// Check for "add ESP, icon"
if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
{
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
......@@ -16155,6 +16156,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_add:
case INS_sub:
case INS_sub_hide:
case INS_and:
case INS_or:
case INS_xor:
......
......@@ -73,6 +73,9 @@ INST4(adc, "adc", IUM_RW, 0x000010, 0x001080,
INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | Reads_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
// Does not affect the stack tracking in the emitter
INST4(sub_hide, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Wbit )
......
......@@ -1650,10 +1650,10 @@ int LinearScan::BuildLclHeap(GenTree* tree)
// const and <=6 reg words - 0 (pushes '0')
// const and >6 reg words Yes 0 (pushes '0')
// const and <PageSize No 0 (amd64) 1 (x86)
// (x86:tmpReg for sutracting from esp)
// const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp)
//
// const and >=PageSize No 1 (regCnt)
// Non-const Yes 0 (regCnt=targetReg and pushes '0')
// Non-const No 2 (regCnt and tmpReg for subtracting from sp)
// Non-const No 1 (regCnt)
//
// Note: Here we don't need internal register to be different from targetReg.
// Rather, require it to be different from operand's reg.
......@@ -1667,6 +1667,7 @@ int LinearScan::BuildLclHeap(GenTree* tree)
if (sizeVal == 0)
{
// For regCnt
buildInternalIntRegisterDefForNode(tree);
}
else
......@@ -1679,22 +1680,20 @@ int LinearScan::BuildLclHeap(GenTree* tree)
// For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
// we will generate 'push 0'.
assert((sizeVal % REGSIZE_BYTES) == 0);
if (!compiler->info.compInitMem)
{
// No need to initialize allocated stack space.
if (sizeVal < compiler->eeGetPageSize())
{
#ifdef TARGET_X86
// x86 needs a register here to avoid generating "sub" on ESP.
buildInternalIntRegisterDefForNode(tree);
#endif
}
else
// x86 always needs regCnt.
// For regCnt
buildInternalIntRegisterDefForNode(tree);
#else // !TARGET_X86
if (sizeVal >= compiler->eeGetPageSize())
{
// We need two registers: regCnt and RegTmp
buildInternalIntRegisterDefForNode(tree);
// For regCnt
buildInternalIntRegisterDefForNode(tree);
}
#endif // !TARGET_X86
}
}
}
......@@ -1702,7 +1701,7 @@ int LinearScan::BuildLclHeap(GenTree* tree)
{
if (!compiler->info.compInitMem)
{
buildInternalIntRegisterDefForNode(tree);
// For regCnt
buildInternalIntRegisterDefForNode(tree);
}
BuildUse(size);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册