From 9321801ec622dffe962e7108b4de5cf8202abb83 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 4 Apr 2023 10:04:31 +0200 Subject: [PATCH] JIT: Properly allocate shadow space for block op helpers on win-x64 (#84124) Move the accounting of the outgoing arg space size from simple lowering to lowering and start accounting for the fact that some block ops may turn into helper calls. Fix #80140 --- src/coreclr/jit/flowgraph.cpp | 79 ------------------------------- src/coreclr/jit/lower.cpp | 85 ++++++++++++++++++++++++++++++++++ src/coreclr/jit/lower.h | 7 +++ src/coreclr/jit/lowerxarch.cpp | 7 +++ 4 files changed, 99 insertions(+), 79 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index ea47ddb5869..50c31d5cb6b 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2935,38 +2935,6 @@ PhaseStatus Compiler::fgSimpleLowering() break; } -#if FEATURE_FIXED_OUT_ARGS - case GT_CALL: - { - GenTreeCall* call = tree->AsCall(); - // Fast tail calls use the caller-supplied scratch - // space so have no impact on this method's outgoing arg size. - if (!call->IsFastTailCall() && !call->IsHelperCall(this, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) - { - // Update outgoing arg size to handle this call - const unsigned thisCallOutAreaSize = call->gtArgs.OutgoingArgsStackSize(); - assert(thisCallOutAreaSize >= MIN_ARG_AREA_FOR_CALL); - - if (thisCallOutAreaSize > outgoingArgSpaceSize) - { - JITDUMP("Bumping outgoingArgSpaceSize from %u to %u for call [%06d]\n", - outgoingArgSpaceSize, thisCallOutAreaSize, dspTreeID(tree)); - outgoingArgSpaceSize = thisCallOutAreaSize; - } - else - { - JITDUMP("outgoingArgSpaceSize %u sufficient for call [%06d], which needs %u\n", - outgoingArgSpaceSize, dspTreeID(tree), thisCallOutAreaSize); - } - } - else - { - JITDUMP("outgoingArgSpaceSize not impacted by fast tail call [%06d]\n", dspTreeID(tree)); - } - break; - } -#endif // FEATURE_FIXED_OUT_ARGS - case GT_CAST: { if (tree->AsCast()->CastOp()->OperIsSimple() && fgSimpleLowerCastOfSmpOp(range, tree->AsCast())) @@ -2985,53 +2953,6 @@ PhaseStatus Compiler::fgSimpleLowering() } // foreach tree } // foreach BB -#if FEATURE_FIXED_OUT_ARGS - // Finish computing the outgoing args area size - // - // Need to make sure the MIN_ARG_AREA_FOR_CALL space is added to the frame if: - // 1. there are calls to THROW_HELPER methods. - // 2. we are generating profiling Enter/Leave/TailCall hooks. This will ensure - // that even methods without any calls will have outgoing arg area space allocated. - // 3. We will be generating calls to PInvoke helpers. TODO: This shouldn't be required because - // if there are any calls to PInvoke methods, there should be a call that we processed - // above. However, we still generate calls to PInvoke prolog helpers even if we have dead code - // eliminated all the calls. - // 4. We will be generating a stack cookie check. In this case we can call a helper to fail fast. - // - // An example for these two cases is Windows Amd64, where the ABI requires to have 4 slots for - // the outgoing arg space if the method makes any calls. - if (outgoingArgSpaceSize < MIN_ARG_AREA_FOR_CALL) - { - if (compUsesThrowHelper || compIsProfilerHookNeeded() || - (compMethodRequiresPInvokeFrame() && !opts.ShouldUsePInvokeHelpers()) || getNeedsGSSecurityCookie()) - { - outgoingArgSpaceSize = MIN_ARG_AREA_FOR_CALL; - JITDUMP("Bumping outgoingArgSpaceSize to %u for possible helper or profile hook call", - outgoingArgSpaceSize); - } - } - - // If a function has localloc, we will need to move the outgoing arg space when the - // localloc happens. When we do this, we need to maintain stack alignment. To avoid - // leaving alignment-related holes when doing this move, make sure the outgoing - // argument space size is a multiple of the stack alignment by aligning up to the next - // stack alignment boundary. - if (compLocallocUsed) - { - outgoingArgSpaceSize = roundUp(outgoingArgSpaceSize, STACK_ALIGN); - JITDUMP("Bumping outgoingArgSpaceSize to %u for localloc", outgoingArgSpaceSize); - } - - assert((outgoingArgSpaceSize % TARGET_POINTER_SIZE) == 0); - - // Publish the final value and mark it as read only so any update - // attempt later will cause an assert. - lvaOutgoingArgSpaceSize = outgoingArgSpaceSize; - lvaOutgoingArgSpaceSize.MarkAsReadOnly(); - lvaGetDesc(lvaOutgoingArgSpaceVar)->GrowBlockLayout(typGetBlkLayout(outgoingArgSpaceSize)); - -#endif // FEATURE_FIXED_OUT_ARGS - return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 8b00440e919..b5cac0dd978 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2193,6 +2193,13 @@ GenTree* Lowering::LowerCall(GenTree* node) // since LowerFastTailCall calls InsertPInvokeMethodEpilog. LowerFastTailCall(call); } + else + { + if (!call->IsHelperCall(comp, CORINFO_HELP_VALIDATE_INDIRECT_CALL)) + { + RequireOutgoingArgSpace(call, call->gtArgs.OutgoingArgsStackSize()); + } + } if (varTypeIsStruct(call)) { @@ -7160,6 +7167,8 @@ PhaseStatus Lowering::DoPhase() } #endif + FinalizeOutgoingArgSpace(); + // Recompute local var ref counts before potentially sorting for liveness. // Note this does minimal work in cases where we are not going to sort. const bool isRecompute = true; @@ -8269,3 +8278,79 @@ GenTree* Lowering::InsertNewSimdCreateScalarUnsafeNode(var_types simdType, return result; } #endif // FEATURE_HW_INTRINSICS + +//---------------------------------------------------------------------------------------------- +// Lowering::RequireOutgoingArgSpace: Record that the compilation will require +// outgoing arg space of at least the specified size. +// +// Arguments: +// node - The node that is the reason for the requirement. +// size - The minimal required size of the outgoing arg space. +// +void Lowering::RequireOutgoingArgSpace(GenTree* node, unsigned size) +{ +#if FEATURE_FIXED_OUT_ARGS + if (size <= m_outgoingArgSpaceSize) + { + return; + } + + JITDUMP("Bumping outgoing arg space size from %u to %u for [%06u]\n", m_outgoingArgSpaceSize, size, + Compiler::dspTreeID(node)); + m_outgoingArgSpaceSize = size; +#endif +} + +//---------------------------------------------------------------------------------------------- +// Lowering::FinalizeOutgoingArgSpace: Finalize and allocate the outgoing arg +// space area. +// +void Lowering::FinalizeOutgoingArgSpace() +{ +#if FEATURE_FIXED_OUT_ARGS + // Finish computing the outgoing args area size + // + // Need to make sure the MIN_ARG_AREA_FOR_CALL space is added to the frame if: + // 1. there are calls to THROW_HELPER methods. + // 2. we are generating profiling Enter/Leave/TailCall hooks. This will ensure + // that even methods without any calls will have outgoing arg area space allocated. + // 3. We will be generating calls to PInvoke helpers. TODO: This shouldn't be required because + // if there are any calls to PInvoke methods, there should be a call that we processed + // above. However, we still generate calls to PInvoke prolog helpers even if we have dead code + // eliminated all the calls. + // 4. We will be generating a stack cookie check. In this case we can call a helper to fail fast. + // + // An example for these two cases is Windows Amd64, where the ABI requires to have 4 slots for + // the outgoing arg space if the method makes any calls. + if (m_outgoingArgSpaceSize < MIN_ARG_AREA_FOR_CALL) + { + if (comp->compUsesThrowHelper || comp->compIsProfilerHookNeeded() || + (comp->compMethodRequiresPInvokeFrame() && !comp->opts.ShouldUsePInvokeHelpers()) || + comp->getNeedsGSSecurityCookie()) + { + m_outgoingArgSpaceSize = MIN_ARG_AREA_FOR_CALL; + JITDUMP("Bumping outgoing arg space size to %u for possible helper or profile hook call", + m_outgoingArgSpaceSize); + } + } + + // If a function has localloc, we will need to move the outgoing arg space when the + // localloc happens. When we do this, we need to maintain stack alignment. To avoid + // leaving alignment-related holes when doing this move, make sure the outgoing + // argument space size is a multiple of the stack alignment by aligning up to the next + // stack alignment boundary. + if (comp->compLocallocUsed) + { + m_outgoingArgSpaceSize = roundUp(m_outgoingArgSpaceSize, STACK_ALIGN); + JITDUMP("Bumping outgoing arg space size to %u for localloc", m_outgoingArgSpaceSize); + } + + assert((m_outgoingArgSpaceSize % TARGET_POINTER_SIZE) == 0); + + // Publish the final value and mark it as read only so any update + // attempt later will cause an assert. + comp->lvaOutgoingArgSpaceSize = m_outgoingArgSpaceSize; + comp->lvaOutgoingArgSpaceSize.MarkAsReadOnly(); + comp->lvaGetDesc(comp->lvaOutgoingArgSpaceVar)->GrowBlockLayout(comp->typGetBlkLayout(m_outgoingArgSpaceSize)); +#endif +} diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 34b5f76f6b6..15a89990758 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -542,10 +542,17 @@ private: } } + void RequireOutgoingArgSpace(GenTree* node, unsigned numBytes); + void FinalizeOutgoingArgSpace(); + LinearScan* m_lsra; unsigned vtableCallTemp; // local variable we use as a temp for vtable calls mutable SideEffectSet m_scratchSideEffects; // SideEffectSet used for IsSafeToContainMem and isRMWIndirCandidate BasicBlock* m_block; + +#ifdef FEATURE_FIXED_OUT_ARGS + unsigned m_outgoingArgSpaceSize = 0; +#endif }; #endif // _LOWER_H_ diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 619eb4b75b4..d8ccb43a76a 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -515,6 +515,13 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) #endif } } + +#ifndef TARGET_X86 + if ((MIN_ARG_AREA_FOR_CALL > 0) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper)) + { + RequireOutgoingArgSpace(blkNode, MIN_ARG_AREA_FOR_CALL); + } +#endif } //------------------------------------------------------------------------ -- GitLab