未验证 提交 e13f0dc1 编写于 作者: E Egor Bogatov 提交者: GitHub

Optimize stackalloc zeroing via BLK (#83255)

Co-authored-by: NSingleAccretion <62474226+SingleAccretion@users.noreply.github.com>
上级 3e6ad475
......@@ -2742,18 +2742,10 @@ void CodeGen::genLclHeap(GenTree* tree)
// compute the amount of memory to allocate to properly STACK_ALIGN.
size_t amount = 0;
if (size->IsCnsIntOrI())
if (size->IsCnsIntOrI() && size->isContained())
{
// If size is a constant, then it must be contained.
assert(size->isContained());
// If amount is zero then return null in targetReg
amount = size->AsIntCon()->gtIconVal;
if (amount == 0)
{
instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
goto BAILOUT;
}
assert((amount > 0) && (amount <= UINT_MAX));
// 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
amount = AlignUp(amount, STACK_ALIGN);
......@@ -2848,77 +2840,44 @@ void CodeGen::genLclHeap(GenTree* tree)
goto ALLOC_DONE;
}
inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
stackAdjustment += (target_size_t)compiler->lvaOutgoingArgSpaceSize;
locAllocStackOffset = stackAdjustment;
if (size->IsCnsIntOrI() && size->isContained())
{
stackAdjustment = 0;
locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize;
}
else
{
inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
stackAdjustment += (target_size_t)compiler->lvaOutgoingArgSpaceSize;
locAllocStackOffset = stackAdjustment;
}
}
#endif
if (size->IsCnsIntOrI())
if (size->IsCnsIntOrI() && size->isContained())
{
// We should reach here only for non-zero, constant size allocations.
assert(amount > 0);
assert((amount % STACK_ALIGN) == 0);
assert((amount % REGSIZE_BYTES) == 0);
// For small allocations we will generate up to six push 0 inline
size_t cntRegSizedWords = amount / REGSIZE_BYTES;
if (compiler->info.compInitMem && (cntRegSizedWords <= 6))
// We should reach here only for non-zero, constant size allocations which we zero
// via BLK explicitly, so just bump the stack pointer.
if ((amount >= compiler->eeGetPageSize()) || (TARGET_POINTER_SIZE == 4))
{
for (; cntRegSizedWords != 0; cntRegSizedWords--)
{
inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
}
lastTouchDelta = 0;
goto ALLOC_DONE;
}
#ifdef TARGET_X86
bool needRegCntRegister = true;
#else // !TARGET_X86
bool needRegCntRegister = initMemOrLargeAlloc;
#endif // !TARGET_X86
if (needRegCntRegister)
{
// If compInitMem=true, we can reuse targetReg as regcnt.
// Since size is a constant, regCnt is not yet initialized.
assert(regCnt == REG_NA);
if (compiler->info.compInitMem)
{
assert(tree->AvailableTempRegCount() == 0);
regCnt = targetReg;
}
else
{
regCnt = tree->GetSingleTempReg();
}
regCnt = tree->GetSingleTempReg();
instGen_Set_Reg_To_Imm(EA_PTRSIZE, regCnt, -(ssize_t)amount);
genStackPointerDynamicAdjustmentWithProbe(regCnt);
// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
// we're going to assume the worst and probe.
}
if (!initMemOrLargeAlloc)
else
{
// Since the size is less than a page, and we don't need to zero init memory, simply adjust ESP.
// ESP might already be in the guard page, so we must touch it BEFORE
// the alloc, not after.
assert(amount < compiler->eeGetPageSize()); // must be < not <=
// ESP might already be in the guard page, so we must touch it BEFORE the alloc, not after.
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount,
/* trackSpAdjustments */ regCnt == REG_NA);
goto ALLOC_DONE;
/* trackSpAdjustments */ true);
}
// else, "mov regCnt, amount"
if (compiler->info.compInitMem)
{
// When initializing memory, we want 'amount' to be the loop count.
assert((amount % STACK_ALIGN) == 0);
amount /= STACK_ALIGN;
}
instGen_Set_Reg_To_Imm(((size_t)(int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount);
goto ALLOC_DONE;
}
// We should not have any temp registers at this point.
......@@ -2996,8 +2955,6 @@ ALLOC_DONE:
genDefineTempLabel(endLabel);
}
BAILOUT:
#ifdef JIT32_GCENCODER
if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
{
......
......@@ -580,7 +580,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
break;
case GT_LCLHEAP:
ContainCheckLclHeap(node->AsOp());
LowerLclHeap(node);
break;
#ifdef TARGET_XARCH
......@@ -7992,6 +7992,70 @@ void Lowering::TransformUnusedIndirection(GenTreeIndir* ind, Compiler* comp, Bas
}
}
//------------------------------------------------------------------------
// LowerLclHeap: a common logic to lower LCLHEAP.
//
// Arguments:
// blkNode - the LCLHEAP node we are lowering.
//
void Lowering::LowerLclHeap(GenTree* node)
{
assert(node->OperIs(GT_LCLHEAP));
#if defined(TARGET_XARCH)
if (node->gtGetOp1()->IsCnsIntOrI())
{
GenTreeIntCon* sizeNode = node->gtGetOp1()->AsIntCon();
ssize_t size = sizeNode->IconValue();
if (size == 0)
{
// Replace with null for LCLHEAP(0)
node->BashToZeroConst(TYP_I_IMPL);
BlockRange().Remove(sizeNode);
return;
}
if (comp->info.compInitMem)
{
ssize_t alignedSize = ALIGN_UP(size, STACK_ALIGN);
if ((size > UINT_MAX) || (alignedSize > UINT_MAX))
{
// Size is too big - don't mark sizeNode as contained
return;
}
LIR::Use use;
if (BlockRange().TryGetUse(node, &use))
{
// Align LCLHEAP size for more efficient zeroing via BLK
sizeNode->SetIconValue(alignedSize);
// Emit STORE_BLK to zero it
//
// * STORE_BLK struct<alignedSize> (init) (Unroll)
// +--* LCL_VAR long V01
// \--* CNS_INT int 0
//
GenTree* heapLcl = comp->gtNewLclvNode(use.ReplaceWithLclVar(comp), TYP_I_IMPL);
GenTree* zero = comp->gtNewIconNode(0);
GenTreeBlk* storeBlk = new (comp, GT_STORE_BLK)
GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, heapLcl, zero, comp->typGetBlkLayout((unsigned)alignedSize));
storeBlk->gtFlags |= (GTF_IND_UNALIGNED | GTF_ASG | GTF_EXCEPT | GTF_GLOB_REF);
BlockRange().InsertAfter(use.Def(), heapLcl, zero, storeBlk);
LowerNode(storeBlk);
}
else
{
// Value is unused and we don't mark the size node as contained
return;
}
}
}
#endif
ContainCheckLclHeap(node->AsOp());
}
//------------------------------------------------------------------------
// LowerBlockStoreCommon: a common logic to lower STORE_OBJ/BLK/DYN_BLK.
//
......
......@@ -315,6 +315,7 @@ private:
GenTree* LowerSignedDivOrMod(GenTree* node);
void LowerBlockStore(GenTreeBlk* blkNode);
void LowerBlockStoreCommon(GenTreeBlk* blkNode);
void LowerLclHeap(GenTree* node);
void ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr, GenTree* addrParent);
void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode);
#ifdef TARGET_XARCH
......
......@@ -1843,60 +1843,17 @@ int LinearScan::BuildLclHeap(GenTree* tree)
{
int srcCount = 1;
// Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
// Here '-' means don't care.
//
// Size? Init Memory? # temp regs
// 0 - 0 (returns 0)
// const and <=6 reg words - 0 (pushes '0')
// const and >6 reg words Yes 0 (pushes '0')
// const and <PageSize No 0 (amd64) 1 (x86)
//
// const and >=PageSize No 1 (regCnt)
// Non-const Yes 0 (regCnt=targetReg and pushes '0')
// Non-const No 1 (regCnt)
//
// Note: Here we don't need internal register to be different from targetReg.
// Rather, require it to be different from operand's reg.
GenTree* size = tree->gtGetOp1();
if (size->IsCnsIntOrI())
if (size->IsCnsIntOrI() && size->isContained())
{
assert(size->isContained());
srcCount = 0;
size_t sizeVal = size->AsIntCon()->gtIconVal;
size_t sizeVal = AlignUp((size_t)size->AsIntCon()->gtIconVal, STACK_ALIGN);
if (sizeVal == 0)
// Explicitly zeroed LCLHEAP also needs a regCnt in case of x86 or large page
if ((TARGET_POINTER_SIZE == 4) || (sizeVal >= compiler->eeGetPageSize()))
{
// For regCnt
buildInternalIntRegisterDefForNode(tree);
}
else
{
// Compute the amount of memory to properly STACK_ALIGN.
// Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
// This should also help in debugging as we can examine the original size specified with localloc.
sizeVal = AlignUp(sizeVal, STACK_ALIGN);
// For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
// we will generate 'push 0'.
assert((sizeVal % REGSIZE_BYTES) == 0);
if (!compiler->info.compInitMem)
{
#ifdef TARGET_X86
// x86 always needs regCnt.
// For regCnt
buildInternalIntRegisterDefForNode(tree);
#else // !TARGET_X86
if (sizeVal >= compiler->eeGetPageSize())
{
// For regCnt
buildInternalIntRegisterDefForNode(tree);
}
#endif // !TARGET_X86
}
}
}
else
{
......@@ -1905,7 +1862,7 @@ int LinearScan::BuildLclHeap(GenTree* tree)
// For regCnt
buildInternalIntRegisterDefForNode(tree);
}
BuildUse(size);
BuildUse(size); // could be a non-contained constant
}
buildInternalRegisterUses();
BuildDef(tree);
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
public unsafe class StackallocTests
{
public static int Main()
{
int numberOftests = 0;
foreach (var method in typeof(StackallocTests)
.GetMethods(BindingFlags.Public | BindingFlags.Static)
.Where(t => t.Name.StartsWith("Test")))
{
// Invoke the test and make sure both return value and out
// parameters are empty guids
var args = new object[1];
args[0] = Guid.NewGuid();
var value = (Guid)method.Invoke(null, args);
if ((Guid)args[0] != Guid.Empty || value != Guid.Empty)
throw new InvalidOperationException();
numberOftests++;
}
return numberOftests + 70;
}
[MethodImpl(MethodImplOptions.NoInlining)]
static void PoisonStack()
{
var b = stackalloc byte[20000];
Unsafe.InitBlockUnaligned(b, 0xFF, 20000);
Consume(b);
}
[MethodImpl(MethodImplOptions.NoInlining)]
static void EnsureZeroed(byte* ptr, int size)
{
for (int i = 0; i < size; i++)
{
if (ptr[i] != 0)
throw new InvalidOperationException();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
static void Consume(byte* ptr) { } // to avoid dead-code elimination
[MethodImpl(MethodImplOptions.NoInlining)]
static T ToVar<T>(T o) => o; // convert a constant to a variable
// Tests: Constant-sized
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test0(out Guid g)
{
const int size = 0;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test1(out Guid g)
{
const int size = 1;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test8(out Guid g)
{
const int size = 8;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test16(out Guid g)
{
const int size = 16;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test32(out Guid g)
{
const int size = 32;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test40(out Guid g)
{
const int size = 40;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test64(out Guid g)
{
const int size = 64;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test100(out Guid g)
{
const int size = 100;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test128(out Guid g)
{
const int size = 128;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test200(out Guid g)
{
const int size = 200;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test256(out Guid g)
{
const int size = 256;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test512(out Guid g)
{
const int size = 512;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test4096(out Guid g)
{
const int size = 4096;
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test20000(out Guid g)
{
const int size = 20000; // larger than a typical page (but still constant)
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
// Variable-sized tests
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test0_var(out Guid g)
{
int size = ToVar(0);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test1_var(out Guid g)
{
int size = ToVar(1);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test8_var(out Guid g)
{
int size = ToVar(8);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test16_var(out Guid g)
{
int size = ToVar(16);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test32_var(out Guid g)
{
int size = ToVar(32);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test40_var(out Guid g)
{
int size = ToVar(40);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test64_var(out Guid g)
{
int size = ToVar(64);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test100_var(out Guid g)
{
int size = ToVar(100);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test128_var(out Guid g)
{
int size = ToVar(128);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test200_var(out Guid g)
{
int size = ToVar(200);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test256_var(out Guid g)
{
int size = ToVar(256);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test512_var(out Guid g)
{
int size = ToVar(512);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Guid Test20000_var(out Guid g)
{
int size = ToVar(20000);
PoisonStack();
byte* p = stackalloc byte[size];
EnsureZeroed(p, size);
g = default;
return default;
}
// A couple of SkipLocalsInit, just to make sure there are no asserts or unexpected garbage in Guids
[MethodImpl(MethodImplOptions.NoInlining)]
[SkipLocalsInit]
public static Guid Test32_SkipLocalsInit(out Guid g)
{
const int size = 32;
PoisonStack();
byte* p = stackalloc byte[size];
Consume(p);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
[SkipLocalsInit]
public static Guid Test256_SkipLocalsInit(out Guid g)
{
const int size = 256;
PoisonStack();
byte* p = stackalloc byte[size];
Consume(p);
g = default;
return default;
}
[MethodImpl(MethodImplOptions.NoInlining)]
[SkipLocalsInit]
public static Guid Test20000_SkipLocalsInit(out Guid g)
{
const int size = 20000;
PoisonStack();
byte* p = stackalloc byte[size];
Consume(p);
g = default;
return default;
}
}
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<Optimize>True</Optimize>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<SkipLocalsInit>false</SkipLocalsInit>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册