未验证 提交 87249338 编写于 作者: M Mark Plesko 提交者: GitHub

unroll byref struct copies (#86820)

If a struct contains a byref, then it is known to be on the stack/regs (not in the heap), so GC write barriers are not required.  This adds that case to lower*.cpp and attempts to make the code more similar.  I didn't actually factor them (especially with a few subtle differences such as the call to `getUnrollThreshold`).

This partially handles #80086.  It improves the code for common cases, but since the strategy is not always used, the correctness issue in it is not completely handled.  Next step is to apply the fix for that and see how bad the regressions are; this change will reduce the impact.

Example:

``` C#
static Span<int> Copy1(Span<int> s) => s;
```
``` asm
G_M44162_IG01:  ;; offset=0000H
       vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M44162_IG02:  ;; offset=0003H
       vmovdqu  xmm0, xmmword ptr [rdx]
       vmovdqu  xmmword ptr [rcx], xmm0
						;; size=8 bbWeight=1 PerfScore 6.00
G_M44162_IG03:  ;; offset=000BH
       mov      rax, rcx
						;; size=3 bbWeight=1 PerfScore 0.25
G_M44162_IG04:  ;; offset=000EH
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 15, prolog size 3, PerfScore 9.75, instruction count 5, allocated bytes for code 15 (MethodHash=4d5b537d) for method
```

Platform      | Overall | MinOpts | FullOpts
--------------|---------|---------|---------
linux arm64   |  -5,232 |  -3,260 |   -1,972
linux x64     |  -1,142 |    -750 |     -392
osx arm64     |  -5,732 |  -3,276 |   -2,456
windows arm64 |  -4,416 |  -2,580 |   -1,836
windows x64   |  -8,993 |  -5,772 |   -3,221
linux arm     | -13,518 |  -9,530 |   -3,988
windows x86   |       0 |       0 |        0
上级 7358e1b4
......@@ -416,6 +416,25 @@ void ClassLayout::InitializeGCPtrs(Compiler* compiler)
INDEBUG(m_gcPtrsInitialized = true;)
}
//------------------------------------------------------------------------
// HasGCByRef: does the layout contain at least one GC ByRef
//
// Return value:
// true if at least one GC ByRef, false otherwise.
bool ClassLayout::HasGCByRef() const
{
unsigned slots = GetSlotCount();
for (unsigned i = 0; i < slots; i++)
{
if (IsGCByRef(i))
{
return true;
}
}
return false;
}
//------------------------------------------------------------------------
// AreCompatible: check if 2 layouts are the same for copying.
//
......
......@@ -184,11 +184,23 @@ public:
return m_gcPtrCount != 0;
}
bool HasGCByRef() const;
bool IsGCPtr(unsigned slot) const
{
return GetGCPtr(slot) != TYPE_GC_NONE;
}
bool IsGCRef(unsigned slot) const
{
return GetGCPtr(slot) == TYPE_GC_REF;
}
bool IsGCByRef(unsigned slot) const
{
return GetGCPtr(slot) == TYPE_GC_BYREF;
}
var_types GetGCPtrType(unsigned slot) const
{
switch (GetGCPtr(slot))
......
......@@ -552,8 +552,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
GenTree* src = blkNode->Data();
unsigned size = blkNode->Size();
const bool isDstAddrLocal = dstAddr->OperIs(GT_LCL_ADDR);
if (blkNode->OperIsInitBlkOp())
{
if (src->OperIs(GT_INIT_VAL))
......@@ -617,12 +615,22 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
if (doCpObj && isDstAddrLocal && (size <= copyBlockUnrollLimit))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
if (doCpObj && (size <= copyBlockUnrollLimit))
{
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}
if (doCpObj)
......
......@@ -343,15 +343,22 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) && (size <= copyBlockUnrollLimit))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
if (doCpObj && (size <= copyBlockUnrollLimit))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}
// CopyObj or CopyBlk
......
......@@ -297,14 +297,21 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) && (size <= CPBLK_UNROLL_LIMIT))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
if (doCpObj && (size <= CPBLK_UNROLL_LIMIT))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}
// CopyObj or CopyBlk
......
......@@ -394,18 +394,23 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::StoreBlkSrc));
}
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false);
#ifndef JIT32_GCENCODER
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) &&
(size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false)))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying. This is not supported for the JIT32_GCENCODER.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
if (doCpObj && (size <= copyBlockUnrollLimit))
{
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying. This is not supported for the JIT32_GCENCODER.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}
#endif
......
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Xunit;
#pragma warning disable CS8500
namespace Runtime_80086
{
public static unsafe class Test
{
[MethodImpl(MethodImplOptions.NoInlining)]
public static Span<int> Marshal1(Span<int> a)
{
return MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(a), a.Length);
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static Span<int> Marshal2(Span<int>* a)
{
return MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(*a), a->Length);
}
[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy1(Span<int> s) => s;
[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy2(Span<int> a)
{
ref Span<int> ra = ref a;
return ra;
}
[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy3(Span<int>* a)
{
return *a;
}
[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy4(scoped ref Span<int> a)
{
return a;
}
[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy5(ReadOnlySpan<int> a)
{
// Example is used to check code generation but shouldn't be used elsewhere
return *(Span<int>*)&a;
}
[Fact]
public static void TestEntryPoint()
{
Span<int> s = new int[1] { 13 };
s = Marshal1(s);
s = Marshal2(&s);
s = Copy1(s);
s = Copy2(s);
s = Copy3(&s);
s = Copy4(ref s);
s = Copy5(s);
Assert.Equal(13, s[0]);
}
}
}
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册