未验证 提交 2fb0a8c9 编写于 作者: W Will Smith 提交者: GitHub

[JIT] x86/x64 - improved localloc codegen in some cases (#76851)

上级 5ec2e87b
...@@ -1430,13 +1430,16 @@ protected: ...@@ -1430,13 +1430,16 @@ protected:
void genReturn(GenTree* treeNode); void genReturn(GenTree* treeNode);
#ifdef TARGET_XARCH
void genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments);
void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments);
target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments);
void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta);
#else // !TARGET_XARCH
void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp); void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp);
void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp); void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp);
target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp); target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp);
#endif // !TARGET_XARCH
#if defined(TARGET_XARCH)
void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp);
#endif // defined(TARGET_XARCH)
void genLclHeap(GenTree* tree); void genLclHeap(GenTree* tree);
......
...@@ -2216,13 +2216,12 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni ...@@ -2216,13 +2216,12 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
// //
// Arguments: // Arguments:
// spDelta - the value to add to SP. Must be negative or zero. // spDelta - the value to add to SP. Must be negative or zero.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP // trackSpAdjustments - x86 only: whether or not to track the SP adjustment
// adjustment from the emitter, using this register.
// //
// Return Value: // Return Value:
// None. // None.
// //
void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp) void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments)
{ {
assert(spDelta < 0); assert(spDelta < 0);
...@@ -2230,22 +2229,19 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm ...@@ -2230,22 +2229,19 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm
// function that does a probe, which will in turn call this function. // function that does a probe, which will in turn call this function.
assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize());
#ifdef TARGET_X86 #ifdef TARGET_AMD64
if (regTmp != REG_NA) // We always track the SP adjustment on X64.
trackSpAdjustments = true;
#endif // TARGET_AMD64
if (trackSpAdjustments)
{ {
// For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
// to ESP. So do the work in the count register.
// TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
// creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
// track".
inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false);
inst_RV_IV(INS_sub, regTmp, (target_ssize_t)-spDelta, EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false);
} }
else else
#endif // TARGET_X86
{ {
inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); // For x86, some cases don't want to track the adjustment to SP.
inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
} }
} }
...@@ -2257,16 +2253,15 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm ...@@ -2257,16 +2253,15 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm
// Arguments: // Arguments:
// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, // spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens,
// but the stack pointer doesn't move. // but the stack pointer doesn't move.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP // trackSpAdjustments - x86 only: whether or not to track the SP adjustment
// adjustment from the emitter, using this register.
// //
// Return Value: // Return Value:
// None. // None.
// //
void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp) void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments)
{ {
GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
genStackPointerConstantAdjustment(spDelta, regTmp); genStackPointerConstantAdjustment(spDelta, trackSpAdjustments);
} }
//------------------------------------------------------------------------ //------------------------------------------------------------------------
...@@ -2280,13 +2275,12 @@ void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNum ...@@ -2280,13 +2275,12 @@ void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNum
// //
// Arguments: // Arguments:
// spDelta - the value to add to SP. Must be negative. // spDelta - the value to add to SP. Must be negative.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP // trackSpAdjustments - x86 only: whether or not to track the SP adjustment
// adjustment from the emitter, using this register.
// //
// Return Value: // Return Value:
// Offset in bytes from SP to last probed address. // Offset in bytes from SP to last probed address.
// //
target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp) target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments)
{ {
assert(spDelta < 0); assert(spDelta < 0);
...@@ -2296,7 +2290,7 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s ...@@ -2296,7 +2290,7 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s
do do
{ {
ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize);
genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp); genStackPointerConstantAdjustmentWithProbe(spOneDelta, trackSpAdjustments);
spRemainingDelta -= spOneDelta; spRemainingDelta -= spOneDelta;
} while (spRemainingDelta < 0); } while (spRemainingDelta < 0);
...@@ -2323,21 +2317,18 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s ...@@ -2323,21 +2317,18 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s
// genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer, // genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer,
// and probe the stack as appropriate. // and probe the stack as appropriate.
// //
// Note that for x86, we hide the ESP adjustment from the emitter. To do that, currently, // We hide the ESP adjustment from the emitter.
// requires a temporary register and extra code.
// //
// Arguments: // Arguments:
// regSpDelta - the register value to add to SP. The value in this register must be negative. // regSpDelta - the register value to add to SP. The value in this register must be negative.
// This register might be trashed. // This register might be trashed.
// regTmp - an available temporary register. Will be trashed.
// //
// Return Value: // Return Value:
// None. // None.
// //
void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp) void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta)
{ {
assert(regSpDelta != REG_NA); assert(regSpDelta != REG_NA);
assert(regTmp != REG_NA);
// Tickle the pages to ensure that ESP is always valid and is // Tickle the pages to ensure that ESP is always valid and is
// in sync with the "stack guard page". Note that in the worst // in sync with the "stack guard page". Note that in the worst
...@@ -2356,9 +2347,7 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re ...@@ -2356,9 +2347,7 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re
// xor regSpDelta, regSpDelta // Overflow, pick lowest possible number // xor regSpDelta, regSpDelta // Overflow, pick lowest possible number
// loop: // loop:
// test ESP, [ESP+0] // tickle the page // test ESP, [ESP+0] // tickle the page
// mov regTmp, ESP // sub ESP, eeGetPageSize()
// sub regTmp, eeGetPageSize()
// mov ESP, regTmp
// cmp ESP, regSpDelta // cmp ESP, regSpDelta
// jae loop // jae loop
// mov ESP, regSpDelta // mov ESP, regSpDelta
...@@ -2376,11 +2365,8 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re ...@@ -2376,11 +2365,8 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re
// be on the guard page. It is OK to leave the final value of ESP on the guard page. // be on the guard page. It is OK to leave the final value of ESP on the guard page.
GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
// Subtract a page from ESP. This is a trick to avoid the emitter trying to track the // Subtract a page from ESP and hide the adjustment.
// decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly. inst_RV_IV(INS_sub_hide, REG_SPBASE, compiler->eeGetPageSize(), EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false);
inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false);
inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL); inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL);
inst_JMP(EJ_jae, loop); inst_JMP(EJ_jae, loop);
...@@ -2470,7 +2456,7 @@ void CodeGen::genLclHeap(GenTree* tree) ...@@ -2470,7 +2456,7 @@ void CodeGen::genLclHeap(GenTree* tree)
} }
else else
{ {
regCnt = tree->ExtractTempReg(); regCnt = tree->GetSingleTempReg();
// Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
inst_Mov(size->TypeGet(), regCnt, targetReg, /* canSkip */ true); inst_Mov(size->TypeGet(), regCnt, targetReg, /* canSkip */ true);
...@@ -2533,7 +2519,8 @@ void CodeGen::genLclHeap(GenTree* tree) ...@@ -2533,7 +2519,8 @@ void CodeGen::genLclHeap(GenTree* tree)
if ((amount > 0) && !initMemOrLargeAlloc) if ((amount > 0) && !initMemOrLargeAlloc)
{ {
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, REG_NA); lastTouchDelta =
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* trackSpAdjustments */ true);
stackAdjustment = 0; stackAdjustment = 0;
locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize; locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize;
goto ALLOC_DONE; goto ALLOC_DONE;
...@@ -2584,7 +2571,7 @@ void CodeGen::genLclHeap(GenTree* tree) ...@@ -2584,7 +2571,7 @@ void CodeGen::genLclHeap(GenTree* tree)
} }
else else
{ {
regCnt = tree->ExtractTempReg(); regCnt = tree->GetSingleTempReg();
} }
} }
...@@ -2595,7 +2582,8 @@ void CodeGen::genLclHeap(GenTree* tree) ...@@ -2595,7 +2582,8 @@ void CodeGen::genLclHeap(GenTree* tree)
// the alloc, not after. // the alloc, not after.
assert(amount < compiler->eeGetPageSize()); // must be < not <= assert(amount < compiler->eeGetPageSize()); // must be < not <=
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, regCnt); lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount,
/* trackSpAdjustments */ regCnt == REG_NA);
goto ALLOC_DONE; goto ALLOC_DONE;
} }
...@@ -2611,6 +2599,9 @@ void CodeGen::genLclHeap(GenTree* tree) ...@@ -2611,6 +2599,9 @@ void CodeGen::genLclHeap(GenTree* tree)
instGen_Set_Reg_To_Imm(((size_t)(int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); instGen_Set_Reg_To_Imm(((size_t)(int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount);
} }
// We should not have any temp registers at this point.
assert(tree->AvailableTempRegCount() == 0);
if (compiler->info.compInitMem) if (compiler->info.compInitMem)
{ {
// At this point 'regCnt' is set to the number of loop iterations for this loop, if each // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
...@@ -2646,8 +2637,7 @@ void CodeGen::genLclHeap(GenTree* tree) ...@@ -2646,8 +2637,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// adds to ESP). // adds to ESP).
inst_RV(INS_NEG, regCnt, TYP_I_IMPL); inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
regNumber regTmp = tree->GetSingleTempReg(); genStackPointerDynamicAdjustmentWithProbe(regCnt);
genStackPointerDynamicAdjustmentWithProbe(regCnt, regTmp);
// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
// we're going to assume the worst and probe. // we're going to assume the worst and probe.
...@@ -2667,11 +2657,11 @@ ALLOC_DONE: ...@@ -2667,11 +2657,11 @@ ALLOC_DONE:
(stackAdjustment + (target_size_t)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > (stackAdjustment + (target_size_t)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES >
compiler->eeGetPageSize())) compiler->eeGetPageSize()))
{ {
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, REG_NA); genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, /* trackSpAdjustments */ true);
} }
else else
{ {
genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, REG_NA); genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, /* trackSpAdjustments */ true);
} }
} }
...@@ -7862,7 +7852,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) ...@@ -7862,7 +7852,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) || if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) ||
compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5)) compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
{ {
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, REG_NA); genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, /* trackSpAdjustments */ true);
} }
else else
{ {
......
...@@ -12066,7 +12066,7 @@ DONE: ...@@ -12066,7 +12066,7 @@ DONE:
case IF_RRW_ARD: case IF_RRW_ARD:
// Mark the destination register as holding a GCT_BYREF // Mark the destination register as holding a GCT_BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
break; break;
...@@ -12083,7 +12083,7 @@ DONE: ...@@ -12083,7 +12083,7 @@ DONE:
case IF_ARW_RRD: case IF_ARW_RRD:
case IF_ARW_CNS: case IF_ARW_CNS:
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
break; break;
default: default:
...@@ -12513,7 +12513,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) ...@@ -12513,7 +12513,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
// reg could have been a GCREF as GCREF + int=BYREF // reg could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF // or BYREF+/-int=BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
break; break;
...@@ -12973,7 +12973,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) ...@@ -12973,7 +12973,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
case IF_RRW_MRD: case IF_RRW_MRD:
assert(id->idGCref() == GCT_BYREF); assert(id->idGCref() == GCT_BYREF);
assert(ins == INS_add || ins == INS_sub); assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide);
// Mark it as holding a GCT_BYREF // Mark it as holding a GCT_BYREF
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
...@@ -13539,6 +13539,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) ...@@ -13539,6 +13539,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
case INS_add: case INS_add:
case INS_sub: case INS_sub:
case INS_sub_hide:
assert(id->idGCref() == GCT_BYREF); assert(id->idGCref() == GCT_BYREF);
#if 0 #if 0
...@@ -13561,7 +13562,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) ...@@ -13561,7 +13562,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// r1/r2 could have been a GCREF as GCREF + int=BYREF // r1/r2 could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF // or BYREF+/-int=BYREF
assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) || assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub))); ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)));
#endif // DEBUG #endif // DEBUG
#endif // 0 #endif // 0
...@@ -14042,7 +14043,7 @@ DONE: ...@@ -14042,7 +14043,7 @@ DONE:
} }
if (emitThisByrefRegs & regMask) if (emitThisByrefRegs & regMask)
{ {
assert(ins == INS_add || ins == INS_sub); assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide);
} }
#endif #endif
// Mark it as holding a GCT_BYREF // Mark it as holding a GCT_BYREF
...@@ -15666,7 +15667,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) ...@@ -15666,7 +15667,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case INS_sub: case INS_sub:
// Check for "sub ESP, icon" // Check for "sub ESP, icon"
if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
{ {
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE)); emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
...@@ -15675,7 +15676,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) ...@@ -15675,7 +15676,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case INS_add: case INS_add:
// Check for "add ESP, icon" // Check for "add ESP, icon"
if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
{ {
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0, emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
...@@ -16155,6 +16156,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins ...@@ -16155,6 +16156,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_add: case INS_add:
case INS_sub: case INS_sub:
case INS_sub_hide:
case INS_and: case INS_and:
case INS_or: case INS_or:
case INS_xor: case INS_xor:
......
...@@ -73,6 +73,9 @@ INST4(adc, "adc", IUM_RW, 0x000010, 0x001080, ...@@ -73,6 +73,9 @@ INST4(adc, "adc", IUM_RW, 0x000010, 0x001080,
INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | Reads_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | Reads_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
// Does not affect the stack tracking in the emitter
INST4(sub_hide, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Wbit ) INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Wbit )
......
...@@ -1650,10 +1650,10 @@ int LinearScan::BuildLclHeap(GenTree* tree) ...@@ -1650,10 +1650,10 @@ int LinearScan::BuildLclHeap(GenTree* tree)
// const and <=6 reg words - 0 (pushes '0') // const and <=6 reg words - 0 (pushes '0')
// const and >6 reg words Yes 0 (pushes '0') // const and >6 reg words Yes 0 (pushes '0')
// const and <PageSize No 0 (amd64) 1 (x86) // const and <PageSize No 0 (amd64) 1 (x86)
// (x86:tmpReg for sutracting from esp) //
// const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp) // const and >=PageSize No 1 (regCnt)
// Non-const Yes 0 (regCnt=targetReg and pushes '0') // Non-const Yes 0 (regCnt=targetReg and pushes '0')
// Non-const No 2 (regCnt and tmpReg for subtracting from sp) // Non-const No 1 (regCnt)
// //
// Note: Here we don't need internal register to be different from targetReg. // Note: Here we don't need internal register to be different from targetReg.
// Rather, require it to be different from operand's reg. // Rather, require it to be different from operand's reg.
...@@ -1667,6 +1667,7 @@ int LinearScan::BuildLclHeap(GenTree* tree) ...@@ -1667,6 +1667,7 @@ int LinearScan::BuildLclHeap(GenTree* tree)
if (sizeVal == 0) if (sizeVal == 0)
{ {
// For regCnt
buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree);
} }
else else
...@@ -1679,22 +1680,20 @@ int LinearScan::BuildLclHeap(GenTree* tree) ...@@ -1679,22 +1680,20 @@ int LinearScan::BuildLclHeap(GenTree* tree)
// For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc) // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
// we will generate 'push 0'. // we will generate 'push 0'.
assert((sizeVal % REGSIZE_BYTES) == 0); assert((sizeVal % REGSIZE_BYTES) == 0);
if (!compiler->info.compInitMem) if (!compiler->info.compInitMem)
{ {
// No need to initialize allocated stack space.
if (sizeVal < compiler->eeGetPageSize())
{
#ifdef TARGET_X86 #ifdef TARGET_X86
// x86 needs a register here to avoid generating "sub" on ESP. // x86 always needs regCnt.
buildInternalIntRegisterDefForNode(tree); // For regCnt
#endif buildInternalIntRegisterDefForNode(tree);
} #else // !TARGET_X86
else if (sizeVal >= compiler->eeGetPageSize())
{ {
// We need two registers: regCnt and RegTmp // For regCnt
buildInternalIntRegisterDefForNode(tree);
buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree);
} }
#endif // !TARGET_X86
} }
} }
} }
...@@ -1702,7 +1701,7 @@ int LinearScan::BuildLclHeap(GenTree* tree) ...@@ -1702,7 +1701,7 @@ int LinearScan::BuildLclHeap(GenTree* tree)
{ {
if (!compiler->info.compInitMem) if (!compiler->info.compInitMem)
{ {
buildInternalIntRegisterDefForNode(tree); // For regCnt
buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree);
} }
BuildUse(size); BuildUse(size);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册