From e2d192ac4ecc8595bb6934aa1217baf77344328c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 17 Jan 2023 16:31:19 -0800 Subject: [PATCH] JIT: maintain pred lists during loop unrolling (#80625) We now update pred lists during loop unrolling, rather than recomputing them from scratch. There are several parts to the fix: first, `optRedirectBlock' now has a new ability to add pred references for the flow from a newly cloned block, be it either to a remapped successor or a non-remapped successor. Along with this we no longer copy over the block ref count in `CloneBlockState`. These changes allow us to create the right pred links and ref counts in the interior of a cloned subgraph. Second, we now scrub block references from the original loop body blocks instead of just setting their ref counts to zero. Finally, we fix up references for exterior flow into and out of the unroll complex. Addresses one of the cases mentioned in #49030. --- src/coreclr/jit/block.cpp | 7 ++- src/coreclr/jit/compiler.h | 11 +++- src/coreclr/jit/optimizer.cpp | 100 +++++++++++++++++++++++++++++---- src/coreclr/jit/ssabuilder.cpp | 2 +- 4 files changed, 103 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 041a0f6ca3e..720f4492ae6 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -788,18 +788,19 @@ void* BasicBlock::MemoryPhiArg::operator new(size_t sz, Compiler* comp) // IR nodes. If cloning of any statement fails, `false` will be returned and block `to` may be // partially populated. If cloning of all statements succeeds, `true` will be returned and // block `to` will be fully populated. - +// +// Note: +// Leaves block ref count at zero, and pred edge list empty. +// bool BasicBlock::CloneBlockState( Compiler* compiler, BasicBlock* to, const BasicBlock* from, unsigned varNum, int varVal) { assert(to->bbStmtList == nullptr); - to->bbFlags = from->bbFlags; to->bbWeight = from->bbWeight; BlockSetOps::AssignAllowUninitRhs(compiler, to->bbReach, from->bbReach); to->copyEHRegion(from); to->bbCatchTyp = from->bbCatchTyp; - to->bbRefs = from->bbRefs; to->bbStkTempsIn = from->bbStkTempsIn; to->bbStkTempsOut = from->bbStkTempsOut; to->bbStkDepth = from->bbStkDepth; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 11fb3c3986d..7ed0ae94c48 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6474,7 +6474,16 @@ protected: // loop nested in "loopInd" that shares the same head as "loopInd". void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to); - void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool updatePreds = false); + enum class RedirectBlockOption + { + DoNotChangePredLists, // do not modify pred lists + UpdatePredLists, // add/remove to pred lists + AddToPredLists, // only add to pred lists + }; + + void optRedirectBlock(BasicBlock* blk, + BlockToBlockMap* redirectMap, + const RedirectBlockOption = RedirectBlockOption::DoNotChangePredLists); // Marks the containsCall information to "lnum" and any parent loops. void AddContainsCallAllContainingLoops(unsigned lnum); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 50555a7c1a2..95db592e4ac 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2763,16 +2763,27 @@ void Compiler::optIdentifyLoopsForAlignment() // Updates the successors of `blk`: if `blk2` is a branch successor of `blk`, and there is a mapping // for `blk2->blk3` in `redirectMap`, change `blk` so that `blk3` is this branch successor. // -// Note that fall-through successors are not modified, including predecessor lists. -// // Arguments: // blk - block to redirect // redirectMap - block->block map specifying how the `blk` target will be redirected. -// updatePreds - if `true`, update the predecessor lists to match. +// predOption - specifies how to update the pred lists +// +// Notes: +// Fall-through successors are assumed correct and are not modified. +// Pred lists for successors of `blk` may be changed, depending on `predOption`. // -void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool updatePreds) +void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, RedirectBlockOption predOption) { + const bool updatePreds = (predOption == RedirectBlockOption::UpdatePredLists); + const bool addPreds = (predOption == RedirectBlockOption::AddToPredLists); + + if (addPreds && blk->bbFallsThrough()) + { + fgAddRefPred(blk->bbNext, blk); + } + BasicBlock* newJumpDest = nullptr; + switch (blk->bbJumpKind) { case BBJ_NONE: @@ -2794,10 +2805,17 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, c if (updatePreds) { fgRemoveRefPred(blk->bbJumpDest, blk); + } + if (updatePreds || addPreds) + { fgAddRefPred(newJumpDest, blk); } blk->bbJumpDest = newJumpDest; } + else if (addPreds) + { + fgAddRefPred(blk->bbJumpDest, blk); + } break; case BBJ_SWITCH: @@ -2811,11 +2829,18 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, c if (updatePreds) { fgRemoveRefPred(switchDest, blk); + } + if (updatePreds || addPreds) + { fgAddRefPred(newJumpDest, blk); } blk->bbJumpSwt->bbsDstTab[i] = newJumpDest; redirected = true; } + else if (addPreds) + { + fgAddRefPred(switchDest, blk); + } } // If any redirections happened, invalidate the switch table map for the switch. if (redirected) @@ -4357,6 +4382,7 @@ PhaseStatus Compiler::optUnrollLoops() BlockToBlockMap blockMap(getAllocator(CMK_LoopOpt)); BasicBlock* insertAfter = bottom; + BasicBlock* const tail = bottom->bbNext; BasicBlock::loopNumber newLoopNum = loop.lpParent; bool anyNestedLoopsUnrolledThisLoop = false; int lval; @@ -4379,9 +4405,8 @@ PhaseStatus Compiler::optUnrollLoops() // to clone a block in the loop, splice out and forget all the blocks we cloned so far: // put the loop blocks back to how they were before we started cloning blocks, // and abort unrolling the loop. - BasicBlock* oldBottomNext = insertAfter->bbNext; - bottom->bbNext = oldBottomNext; - oldBottomNext->bbPrev = bottom; + bottom->bbNext = tail; + tail->bbPrev = bottom; loop.lpFlags |= LPFLG_DONT_UNROLL; // Mark it so we don't try to unroll it again. INDEBUG(++unrollFailures); JITDUMP("Failed to unroll loop " FMT_LP ": block cloning failed on " FMT_BB "\n", lnum, @@ -4436,9 +4461,16 @@ PhaseStatus Compiler::optUnrollLoops() { BasicBlock* newBlock = blockMap[block]; optCopyBlkDest(block, newBlock); - optRedirectBlock(newBlock, &blockMap); + optRedirectBlock(newBlock, &blockMap, RedirectBlockOption::AddToPredLists); } + // We fall into to this unroll iteration from the bottom block (first iteration) + // or from the previous unroll clone of the bottom block (subsequent iterations). + // After doing this, all the newly cloned blocks now have proper flow and pred lists. + // + BasicBlock* const clonedTop = blockMap[loop.lpTop]; + fgAddRefPred(clonedTop, clonedTop->bbPrev); + /* update the new value for the unrolled iterator */ switch (iterOper) @@ -4463,8 +4495,10 @@ PhaseStatus Compiler::optUnrollLoops() } // If we get here, we successfully cloned all the blocks in the unrolled loop. + // Note we may not have done any cloning at all, if the loop iteration count was zero. - // Gut the old loop body + // Gut the old loop body. + // for (BasicBlock* const block : loop.LoopBlocks()) { // Check if the old loop body had any nested loops that got cloned. Note that we need to do this @@ -4475,6 +4509,18 @@ PhaseStatus Compiler::optUnrollLoops() anyNestedLoopsUnrolledThisLoop = true; } + // Scrub all pred list references to block, except for bottom-> bottom->bbNext. + // + for (BasicBlock* succ : block->Succs()) + { + if ((block == bottom) && (succ == bottom->bbNext)) + { + continue; + } + + fgRemoveAllRefPreds(succ, block); + } + block->bbStmtList = nullptr; block->bbJumpKind = BBJ_NONE; block->bbFlags &= ~BBF_LOOP_HEAD; @@ -4482,26 +4528,56 @@ PhaseStatus Compiler::optUnrollLoops() block->bbNatLoopNum = newLoopNum; } + // The old loop blocks will form an emtpy linear chain. + // Add back a suitable pred list links. + // + BasicBlock* oldLoopPred = head; + for (BasicBlock* const block : loop.LoopBlocks()) + { + if (block != top) + { + fgAddRefPred(block, oldLoopPred); + } + oldLoopPred = block; + } + if (anyNestedLoopsUnrolledThisLoop) { anyNestedLoopsUnrolled = true; } + // Now fix up the exterior flow and pred list entries. + // + // Control will fall through from HEAD to its successor, which is either + // the now empty TOP (if totalIter == 0) or the first cloned top. + // // If the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block). - + // if (head->bbJumpKind == BBJ_COND) { testStmt = head->lastStmt(); noway_assert(testStmt->GetRootNode()->gtOper == GT_JTRUE); fgRemoveStmt(head, testStmt); + fgRemoveRefPred(head->bbJumpDest, head); head->bbJumpKind = BBJ_NONE; } else { /* the loop must execute */ + assert(totalIter > 0); noway_assert(head->bbJumpKind == BBJ_NONE); } + // If we actually unrolled, tail is now reached + // by the last cloned bottom, and no longer + // reached by bottom. + // + if (totalIter > 0) + { + fgAddRefPred(tail, blockMap[bottom]); + fgRemoveRefPred(tail, bottom); + } + #ifdef DEBUG if (verbose) { @@ -4564,7 +4640,7 @@ PhaseStatus Compiler::optUnrollLoops() // If we unrolled any nested loops, we rebuild the loop table (including recomputing the // return blocks list). - constexpr bool computePreds = true; + constexpr bool computePreds = false; constexpr bool computeDoms = true; const bool computeReturnBlocks = anyNestedLoopsUnrolled; const bool computeLoops = anyNestedLoopsUnrolled; @@ -5084,7 +5160,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // Redirect the predecessor to the new block. JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, bTest->bbNum, predBlock->bbNum, bNewCond->bbNum); - optRedirectBlock(predBlock, &blockMap, /*updatePreds*/ true); + optRedirectBlock(predBlock, &blockMap, RedirectBlockOption::UpdatePredLists); } // If we have profile data for all blocks and we know that we are cloning the diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 3e8c0559f7f..a6490d1ab07 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1662,7 +1662,7 @@ void Compiler::DumpSsaSummary() if (numDefs == 0) { - printf("V%02u: in SSA but no defs\n"); + printf("V%02u: in SSA but no defs\n", lclNum); } else { -- GitLab