diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 041a0f6ca3e91b4f5d1b0dd56337eec50c730e5a..720f4492ae68eef9a05da12720b11de5a4c28444 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -788,18 +788,19 @@ void* BasicBlock::MemoryPhiArg::operator new(size_t sz, Compiler* comp) // IR nodes. If cloning of any statement fails, `false` will be returned and block `to` may be // partially populated. If cloning of all statements succeeds, `true` will be returned and // block `to` will be fully populated. - +// +// Note: +// Leaves block ref count at zero, and pred edge list empty. +// bool BasicBlock::CloneBlockState( Compiler* compiler, BasicBlock* to, const BasicBlock* from, unsigned varNum, int varVal) { assert(to->bbStmtList == nullptr); - to->bbFlags = from->bbFlags; to->bbWeight = from->bbWeight; BlockSetOps::AssignAllowUninitRhs(compiler, to->bbReach, from->bbReach); to->copyEHRegion(from); to->bbCatchTyp = from->bbCatchTyp; - to->bbRefs = from->bbRefs; to->bbStkTempsIn = from->bbStkTempsIn; to->bbStkTempsOut = from->bbStkTempsOut; to->bbStkDepth = from->bbStkDepth; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 11fb3c3986d7a7a8ccd660e0f5155f0a999afa13..7ed0ae94c48fbcd96a8caeca7ffaa59db80f2ef4 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6474,7 +6474,16 @@ protected: // loop nested in "loopInd" that shares the same head as "loopInd". void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to); - void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool updatePreds = false); + enum class RedirectBlockOption + { + DoNotChangePredLists, // do not modify pred lists + UpdatePredLists, // add/remove to pred lists + AddToPredLists, // only add to pred lists + }; + + void optRedirectBlock(BasicBlock* blk, + BlockToBlockMap* redirectMap, + const RedirectBlockOption = RedirectBlockOption::DoNotChangePredLists); // Marks the containsCall information to "lnum" and any parent loops. void AddContainsCallAllContainingLoops(unsigned lnum); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 50555a7c1a256a6525abad67e3e1265ab83eadb2..95db592e4ac1de9e7a652e604920d954c2ad2901 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2763,16 +2763,27 @@ void Compiler::optIdentifyLoopsForAlignment() // Updates the successors of `blk`: if `blk2` is a branch successor of `blk`, and there is a mapping // for `blk2->blk3` in `redirectMap`, change `blk` so that `blk3` is this branch successor. // -// Note that fall-through successors are not modified, including predecessor lists. -// // Arguments: // blk - block to redirect // redirectMap - block->block map specifying how the `blk` target will be redirected. -// updatePreds - if `true`, update the predecessor lists to match. +// predOption - specifies how to update the pred lists +// +// Notes: +// Fall-through successors are assumed correct and are not modified. +// Pred lists for successors of `blk` may be changed, depending on `predOption`. // -void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool updatePreds) +void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, RedirectBlockOption predOption) { + const bool updatePreds = (predOption == RedirectBlockOption::UpdatePredLists); + const bool addPreds = (predOption == RedirectBlockOption::AddToPredLists); + + if (addPreds && blk->bbFallsThrough()) + { + fgAddRefPred(blk->bbNext, blk); + } + BasicBlock* newJumpDest = nullptr; + switch (blk->bbJumpKind) { case BBJ_NONE: @@ -2794,10 +2805,17 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, c if (updatePreds) { fgRemoveRefPred(blk->bbJumpDest, blk); + } + if (updatePreds || addPreds) + { fgAddRefPred(newJumpDest, blk); } blk->bbJumpDest = newJumpDest; } + else if (addPreds) + { + fgAddRefPred(blk->bbJumpDest, blk); + } break; case BBJ_SWITCH: @@ -2811,11 +2829,18 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, c if (updatePreds) { fgRemoveRefPred(switchDest, blk); + } + if (updatePreds || addPreds) + { fgAddRefPred(newJumpDest, blk); } blk->bbJumpSwt->bbsDstTab[i] = newJumpDest; redirected = true; } + else if (addPreds) + { + fgAddRefPred(switchDest, blk); + } } // If any redirections happened, invalidate the switch table map for the switch. if (redirected) @@ -4357,6 +4382,7 @@ PhaseStatus Compiler::optUnrollLoops() BlockToBlockMap blockMap(getAllocator(CMK_LoopOpt)); BasicBlock* insertAfter = bottom; + BasicBlock* const tail = bottom->bbNext; BasicBlock::loopNumber newLoopNum = loop.lpParent; bool anyNestedLoopsUnrolledThisLoop = false; int lval; @@ -4379,9 +4405,8 @@ PhaseStatus Compiler::optUnrollLoops() // to clone a block in the loop, splice out and forget all the blocks we cloned so far: // put the loop blocks back to how they were before we started cloning blocks, // and abort unrolling the loop. - BasicBlock* oldBottomNext = insertAfter->bbNext; - bottom->bbNext = oldBottomNext; - oldBottomNext->bbPrev = bottom; + bottom->bbNext = tail; + tail->bbPrev = bottom; loop.lpFlags |= LPFLG_DONT_UNROLL; // Mark it so we don't try to unroll it again. INDEBUG(++unrollFailures); JITDUMP("Failed to unroll loop " FMT_LP ": block cloning failed on " FMT_BB "\n", lnum, @@ -4436,9 +4461,16 @@ PhaseStatus Compiler::optUnrollLoops() { BasicBlock* newBlock = blockMap[block]; optCopyBlkDest(block, newBlock); - optRedirectBlock(newBlock, &blockMap); + optRedirectBlock(newBlock, &blockMap, RedirectBlockOption::AddToPredLists); } + // We fall into to this unroll iteration from the bottom block (first iteration) + // or from the previous unroll clone of the bottom block (subsequent iterations). + // After doing this, all the newly cloned blocks now have proper flow and pred lists. + // + BasicBlock* const clonedTop = blockMap[loop.lpTop]; + fgAddRefPred(clonedTop, clonedTop->bbPrev); + /* update the new value for the unrolled iterator */ switch (iterOper) @@ -4463,8 +4495,10 @@ PhaseStatus Compiler::optUnrollLoops() } // If we get here, we successfully cloned all the blocks in the unrolled loop. + // Note we may not have done any cloning at all, if the loop iteration count was zero. - // Gut the old loop body + // Gut the old loop body. + // for (BasicBlock* const block : loop.LoopBlocks()) { // Check if the old loop body had any nested loops that got cloned. Note that we need to do this @@ -4475,6 +4509,18 @@ PhaseStatus Compiler::optUnrollLoops() anyNestedLoopsUnrolledThisLoop = true; } + // Scrub all pred list references to block, except for bottom-> bottom->bbNext. + // + for (BasicBlock* succ : block->Succs()) + { + if ((block == bottom) && (succ == bottom->bbNext)) + { + continue; + } + + fgRemoveAllRefPreds(succ, block); + } + block->bbStmtList = nullptr; block->bbJumpKind = BBJ_NONE; block->bbFlags &= ~BBF_LOOP_HEAD; @@ -4482,26 +4528,56 @@ PhaseStatus Compiler::optUnrollLoops() block->bbNatLoopNum = newLoopNum; } + // The old loop blocks will form an emtpy linear chain. + // Add back a suitable pred list links. + // + BasicBlock* oldLoopPred = head; + for (BasicBlock* const block : loop.LoopBlocks()) + { + if (block != top) + { + fgAddRefPred(block, oldLoopPred); + } + oldLoopPred = block; + } + if (anyNestedLoopsUnrolledThisLoop) { anyNestedLoopsUnrolled = true; } + // Now fix up the exterior flow and pred list entries. + // + // Control will fall through from HEAD to its successor, which is either + // the now empty TOP (if totalIter == 0) or the first cloned top. + // // If the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block). - + // if (head->bbJumpKind == BBJ_COND) { testStmt = head->lastStmt(); noway_assert(testStmt->GetRootNode()->gtOper == GT_JTRUE); fgRemoveStmt(head, testStmt); + fgRemoveRefPred(head->bbJumpDest, head); head->bbJumpKind = BBJ_NONE; } else { /* the loop must execute */ + assert(totalIter > 0); noway_assert(head->bbJumpKind == BBJ_NONE); } + // If we actually unrolled, tail is now reached + // by the last cloned bottom, and no longer + // reached by bottom. + // + if (totalIter > 0) + { + fgAddRefPred(tail, blockMap[bottom]); + fgRemoveRefPred(tail, bottom); + } + #ifdef DEBUG if (verbose) { @@ -4564,7 +4640,7 @@ PhaseStatus Compiler::optUnrollLoops() // If we unrolled any nested loops, we rebuild the loop table (including recomputing the // return blocks list). - constexpr bool computePreds = true; + constexpr bool computePreds = false; constexpr bool computeDoms = true; const bool computeReturnBlocks = anyNestedLoopsUnrolled; const bool computeLoops = anyNestedLoopsUnrolled; @@ -5084,7 +5160,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // Redirect the predecessor to the new block. JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, bTest->bbNum, predBlock->bbNum, bNewCond->bbNum); - optRedirectBlock(predBlock, &blockMap, /*updatePreds*/ true); + optRedirectBlock(predBlock, &blockMap, RedirectBlockOption::UpdatePredLists); } // If we have profile data for all blocks and we know that we are cloning the diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 3e8c0559f7f28521f6f31d5379f88e8ae55c4764..a6490d1ab073ec6584ebc1e60efb0840df7db7eb 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1662,7 +1662,7 @@ void Compiler::DumpSsaSummary() if (numDefs == 0) { - printf("V%02u: in SSA but no defs\n"); + printf("V%02u: in SSA but no defs\n", lclNum); } else {