未验证 提交 e2d192ac 编写于 作者: A Andy Ayers 提交者: GitHub

JIT: maintain pred lists during loop unrolling (#80625)

We now update pred lists during loop unrolling, rather than recomputing
them from scratch.

There are several parts to the fix: first, `optRedirectBlock' now has
a new ability to add pred references for the flow from a newly cloned
block, be it either to a remapped successor or a non-remapped successor.
Along with this we no longer copy over the block ref count in `CloneBlockState`.
These changes allow us to create the right pred links and ref counts in the
interior of a cloned subgraph.

Second, we now scrub block references from the original loop body blocks
instead of just setting their ref counts to zero.

Finally, we fix up references for exterior flow into and out of the unroll
complex.

Addresses one of the cases mentioned in #49030.
上级 11d4c4c6
......@@ -788,18 +788,19 @@ void* BasicBlock::MemoryPhiArg::operator new(size_t sz, Compiler* comp)
// IR nodes. If cloning of any statement fails, `false` will be returned and block `to` may be
// partially populated. If cloning of all statements succeeds, `true` will be returned and
// block `to` will be fully populated.
//
// Note:
// Leaves block ref count at zero, and pred edge list empty.
//
bool BasicBlock::CloneBlockState(
Compiler* compiler, BasicBlock* to, const BasicBlock* from, unsigned varNum, int varVal)
{
assert(to->bbStmtList == nullptr);
to->bbFlags = from->bbFlags;
to->bbWeight = from->bbWeight;
BlockSetOps::AssignAllowUninitRhs(compiler, to->bbReach, from->bbReach);
to->copyEHRegion(from);
to->bbCatchTyp = from->bbCatchTyp;
to->bbRefs = from->bbRefs;
to->bbStkTempsIn = from->bbStkTempsIn;
to->bbStkTempsOut = from->bbStkTempsOut;
to->bbStkDepth = from->bbStkDepth;
......
......@@ -6474,7 +6474,16 @@ protected:
// loop nested in "loopInd" that shares the same head as "loopInd".
void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to);
void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool updatePreds = false);
enum class RedirectBlockOption
{
DoNotChangePredLists, // do not modify pred lists
UpdatePredLists, // add/remove to pred lists
AddToPredLists, // only add to pred lists
};
void optRedirectBlock(BasicBlock* blk,
BlockToBlockMap* redirectMap,
const RedirectBlockOption = RedirectBlockOption::DoNotChangePredLists);
// Marks the containsCall information to "lnum" and any parent loops.
void AddContainsCallAllContainingLoops(unsigned lnum);
......
......@@ -2763,16 +2763,27 @@ void Compiler::optIdentifyLoopsForAlignment()
// Updates the successors of `blk`: if `blk2` is a branch successor of `blk`, and there is a mapping
// for `blk2->blk3` in `redirectMap`, change `blk` so that `blk3` is this branch successor.
//
// Note that fall-through successors are not modified, including predecessor lists.
//
// Arguments:
// blk - block to redirect
// redirectMap - block->block map specifying how the `blk` target will be redirected.
// updatePreds - if `true`, update the predecessor lists to match.
// predOption - specifies how to update the pred lists
//
// Notes:
// Fall-through successors are assumed correct and are not modified.
// Pred lists for successors of `blk` may be changed, depending on `predOption`.
//
void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool updatePreds)
void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, RedirectBlockOption predOption)
{
const bool updatePreds = (predOption == RedirectBlockOption::UpdatePredLists);
const bool addPreds = (predOption == RedirectBlockOption::AddToPredLists);
if (addPreds && blk->bbFallsThrough())
{
fgAddRefPred(blk->bbNext, blk);
}
BasicBlock* newJumpDest = nullptr;
switch (blk->bbJumpKind)
{
case BBJ_NONE:
......@@ -2794,10 +2805,17 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, c
if (updatePreds)
{
fgRemoveRefPred(blk->bbJumpDest, blk);
}
if (updatePreds || addPreds)
{
fgAddRefPred(newJumpDest, blk);
}
blk->bbJumpDest = newJumpDest;
}
else if (addPreds)
{
fgAddRefPred(blk->bbJumpDest, blk);
}
break;
case BBJ_SWITCH:
......@@ -2811,11 +2829,18 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, c
if (updatePreds)
{
fgRemoveRefPred(switchDest, blk);
}
if (updatePreds || addPreds)
{
fgAddRefPred(newJumpDest, blk);
}
blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
redirected = true;
}
else if (addPreds)
{
fgAddRefPred(switchDest, blk);
}
}
// If any redirections happened, invalidate the switch table map for the switch.
if (redirected)
......@@ -4357,6 +4382,7 @@ PhaseStatus Compiler::optUnrollLoops()
BlockToBlockMap blockMap(getAllocator(CMK_LoopOpt));
BasicBlock* insertAfter = bottom;
BasicBlock* const tail = bottom->bbNext;
BasicBlock::loopNumber newLoopNum = loop.lpParent;
bool anyNestedLoopsUnrolledThisLoop = false;
int lval;
......@@ -4379,9 +4405,8 @@ PhaseStatus Compiler::optUnrollLoops()
// to clone a block in the loop, splice out and forget all the blocks we cloned so far:
// put the loop blocks back to how they were before we started cloning blocks,
// and abort unrolling the loop.
BasicBlock* oldBottomNext = insertAfter->bbNext;
bottom->bbNext = oldBottomNext;
oldBottomNext->bbPrev = bottom;
bottom->bbNext = tail;
tail->bbPrev = bottom;
loop.lpFlags |= LPFLG_DONT_UNROLL; // Mark it so we don't try to unroll it again.
INDEBUG(++unrollFailures);
JITDUMP("Failed to unroll loop " FMT_LP ": block cloning failed on " FMT_BB "\n", lnum,
......@@ -4436,9 +4461,16 @@ PhaseStatus Compiler::optUnrollLoops()
{
BasicBlock* newBlock = blockMap[block];
optCopyBlkDest(block, newBlock);
optRedirectBlock(newBlock, &blockMap);
optRedirectBlock(newBlock, &blockMap, RedirectBlockOption::AddToPredLists);
}
// We fall into to this unroll iteration from the bottom block (first iteration)
// or from the previous unroll clone of the bottom block (subsequent iterations).
// After doing this, all the newly cloned blocks now have proper flow and pred lists.
//
BasicBlock* const clonedTop = blockMap[loop.lpTop];
fgAddRefPred(clonedTop, clonedTop->bbPrev);
/* update the new value for the unrolled iterator */
switch (iterOper)
......@@ -4463,8 +4495,10 @@ PhaseStatus Compiler::optUnrollLoops()
}
// If we get here, we successfully cloned all the blocks in the unrolled loop.
// Note we may not have done any cloning at all, if the loop iteration count was zero.
// Gut the old loop body
// Gut the old loop body.
//
for (BasicBlock* const block : loop.LoopBlocks())
{
// Check if the old loop body had any nested loops that got cloned. Note that we need to do this
......@@ -4475,6 +4509,18 @@ PhaseStatus Compiler::optUnrollLoops()
anyNestedLoopsUnrolledThisLoop = true;
}
// Scrub all pred list references to block, except for bottom-> bottom->bbNext.
//
for (BasicBlock* succ : block->Succs())
{
if ((block == bottom) && (succ == bottom->bbNext))
{
continue;
}
fgRemoveAllRefPreds(succ, block);
}
block->bbStmtList = nullptr;
block->bbJumpKind = BBJ_NONE;
block->bbFlags &= ~BBF_LOOP_HEAD;
......@@ -4482,26 +4528,56 @@ PhaseStatus Compiler::optUnrollLoops()
block->bbNatLoopNum = newLoopNum;
}
// The old loop blocks will form an emtpy linear chain.
// Add back a suitable pred list links.
//
BasicBlock* oldLoopPred = head;
for (BasicBlock* const block : loop.LoopBlocks())
{
if (block != top)
{
fgAddRefPred(block, oldLoopPred);
}
oldLoopPred = block;
}
if (anyNestedLoopsUnrolledThisLoop)
{
anyNestedLoopsUnrolled = true;
}
// Now fix up the exterior flow and pred list entries.
//
// Control will fall through from HEAD to its successor, which is either
// the now empty TOP (if totalIter == 0) or the first cloned top.
//
// If the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block).
//
if (head->bbJumpKind == BBJ_COND)
{
testStmt = head->lastStmt();
noway_assert(testStmt->GetRootNode()->gtOper == GT_JTRUE);
fgRemoveStmt(head, testStmt);
fgRemoveRefPred(head->bbJumpDest, head);
head->bbJumpKind = BBJ_NONE;
}
else
{
/* the loop must execute */
assert(totalIter > 0);
noway_assert(head->bbJumpKind == BBJ_NONE);
}
// If we actually unrolled, tail is now reached
// by the last cloned bottom, and no longer
// reached by bottom.
//
if (totalIter > 0)
{
fgAddRefPred(tail, blockMap[bottom]);
fgRemoveRefPred(tail, bottom);
}
#ifdef DEBUG
if (verbose)
{
......@@ -4564,7 +4640,7 @@ PhaseStatus Compiler::optUnrollLoops()
// If we unrolled any nested loops, we rebuild the loop table (including recomputing the
// return blocks list).
constexpr bool computePreds = true;
constexpr bool computePreds = false;
constexpr bool computeDoms = true;
const bool computeReturnBlocks = anyNestedLoopsUnrolled;
const bool computeLoops = anyNestedLoopsUnrolled;
......@@ -5084,7 +5160,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
// Redirect the predecessor to the new block.
JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum,
bTest->bbNum, predBlock->bbNum, bNewCond->bbNum);
optRedirectBlock(predBlock, &blockMap, /*updatePreds*/ true);
optRedirectBlock(predBlock, &blockMap, RedirectBlockOption::UpdatePredLists);
}
// If we have profile data for all blocks and we know that we are cloning the
......
......@@ -1662,7 +1662,7 @@ void Compiler::DumpSsaSummary()
if (numDefs == 0)
{
printf("V%02u: in SSA but no defs\n");
printf("V%02u: in SSA but no defs\n", lclNum);
}
else
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册