未验证 提交 a4d5889a 编写于 作者: A Andy Ayers 提交者: GitHub

JIT: chained guarded devirtualization (#51890)

When expanding a guarded devirtualization call site, scout ahead to see if
there's another site just after this one. If so, expand the second site
so that the frequently-taken paths from the first and second tests form a
join-free "hot path". Continue scouting and chaining candidates along this
path until we run out of candidates, find a big enough stretch without a
candidate, or find a candidate with low likelihood of success.

Chaining enhances the abilities of the redundant branch optimizer to clean
up redundant type tests, so if there are multiple virtual or interface
calls in a short span, all dispatching off the same object, one test will
suffice to cover all the calls.
上级 201772fe
......@@ -5416,7 +5416,8 @@ public:
void fgRemoveEmptyBlocks();
void fgRemoveStmt(BasicBlock* block, Statement* stmt);
void fgRemoveStmt(BasicBlock* block, Statement* stmt DEBUGARG(bool isUnlink = false));
void fgUnlinkStmt(BasicBlock* block, Statement* stmt);
bool fgCheckRemoveStmt(BasicBlock* block, Statement* stmt);
......
......@@ -414,23 +414,44 @@ Statement* Compiler::fgNewStmtFromTree(GenTree* tree, IL_OFFSETX offs)
return fgNewStmtFromTree(tree, nullptr, offs);
}
/*****************************************************************************
*
* Remove a useless statement from a basic block.
*
*/
//------------------------------------------------------------------------
// fgUnlinkStmt: unlink a statement from a block's statement list
//
// Arguments:
// block - the block from which 'stmt' will be unlinked
// stmt - the statement to be unlinked
//
// Notes:
// next and previous links are nulled out, in anticipation
// of this statement being re-inserted somewhere else.
//
void Compiler::fgUnlinkStmt(BasicBlock* block, Statement* stmt)
{
constexpr bool isUnlink = true;
fgRemoveStmt(block, stmt DEBUGARG(isUnlink));
stmt->SetNextStmt(nullptr);
stmt->SetPrevStmt(nullptr);
}
void Compiler::fgRemoveStmt(BasicBlock* block, Statement* stmt)
//------------------------------------------------------------------------
// fgRemoveStmt: remove a statement from a block's statement list
//
// Arguments:
// block - the block from which 'stmt' will be removed
// stmt - the statement to be removed
// isUnlink - ultimate plan is to move the statement, not delete it
//
void Compiler::fgRemoveStmt(BasicBlock* block, Statement* stmt DEBUGARG(bool isUnlink))
{
assert(fgOrder == FGOrderTree);
#ifdef DEBUG
if (verbose &&
stmt->GetRootNode()->gtOper != GT_NOP) // Don't print if it is a GT_NOP. Too much noise from the inliner.
// Don't print if it is a GT_NOP. Too much noise from the inliner.
if (verbose && (stmt->GetRootNode()->gtOper != GT_NOP))
{
printf("\nRemoving statement ");
printf("\n%s ", isUnlink ? "unlinking" : "removing useless");
gtDispStmt(stmt);
printf(" in " FMT_BB " as useless:\n", block->bbNum);
printf(" from " FMT_BB "\n", block->bbNum);
}
#endif // DEBUG
......@@ -480,9 +501,8 @@ void Compiler::fgRemoveStmt(BasicBlock* block, Statement* stmt)
{
if (block->bbStmtList == nullptr)
{
printf("\n" FMT_BB " becomes empty", block->bbNum);
printf("\n" FMT_BB " becomes empty\n", block->bbNum);
}
printf("\n");
}
#endif // DEBUG
}
......
......@@ -11460,10 +11460,13 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack)
break;
case GT_RET_EXPR:
printf("(inl return from call ");
printTreeID(tree->AsRetExpr()->gtInlineCandidate);
{
GenTree* const associatedTree = tree->AsRetExpr()->gtInlineCandidate;
printf("(inl return %s ", tree->IsCall() ? " from call" : "expr");
printTreeID(associatedTree);
printf(")");
break;
}
break;
case GT_PHYSREG:
printf(" %s", getRegName(tree->AsPhysReg()->gtSrcReg, varTypeUsesFloatReg(tree)));
......
......@@ -4229,12 +4229,13 @@ struct GenTreeCall final : public GenTree
#define GTF_CALL_M_DEVIRTUALIZED 0x00040000 // GT_CALL -- this call was devirtualized
#define GTF_CALL_M_UNBOXED 0x00080000 // GT_CALL -- this call was optimized to use the unboxed entry point
#define GTF_CALL_M_GUARDED_DEVIRT 0x00100000 // GT_CALL -- this call is a candidate for guarded devirtualization
#define GTF_CALL_M_GUARDED 0x00200000 // GT_CALL -- this call was transformed by guarded devirtualization
#define GTF_CALL_M_ALLOC_SIDE_EFFECTS 0x00400000 // GT_CALL -- this is a call to an allocator with side effects
#define GTF_CALL_M_SUPPRESS_GC_TRANSITION 0x00800000 // GT_CALL -- suppress the GC transition (i.e. during a pinvoke) but a separate GC safe point is required.
#define GTF_CALL_M_EXP_RUNTIME_LOOKUP 0x01000000 // GT_CALL -- this call needs to be tranformed into CFG for the dynamic dictionary expansion feature.
#define GTF_CALL_M_STRESS_TAILCALL 0x02000000 // GT_CALL -- the call is NOT "tail" prefixed but GTF_CALL_M_EXPLICIT_TAILCALL was added because of tail call stress mode
#define GTF_CALL_M_EXPANDED_EARLY 0x04000000 // GT_CALL -- the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower
#define GTF_CALL_M_GUARDED_DEVIRT_CHAIN 0x00200000 // GT_CALL -- this call is a candidate for chained guarded devirtualization
#define GTF_CALL_M_GUARDED 0x00400000 // GT_CALL -- this call was transformed by guarded devirtualization
#define GTF_CALL_M_ALLOC_SIDE_EFFECTS 0x00800000 // GT_CALL -- this is a call to an allocator with side effects
#define GTF_CALL_M_SUPPRESS_GC_TRANSITION 0x01000000 // GT_CALL -- suppress the GC transition (i.e. during a pinvoke) but a separate GC safe point is required.
#define GTF_CALL_M_EXP_RUNTIME_LOOKUP 0x02000000 // GT_CALL -- this call needs to be tranformed into CFG for the dynamic dictionary expansion feature.
#define GTF_CALL_M_STRESS_TAILCALL 0x04000000 // GT_CALL -- the call is NOT "tail" prefixed but GTF_CALL_M_EXPLICIT_TAILCALL was added because of tail call stress mode
#define GTF_CALL_M_EXPANDED_EARLY 0x08000000 // GT_CALL -- the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower
// clang-format on
......
......@@ -513,7 +513,8 @@ private:
{
origCall = GetCall(stmt);
JITDUMP("*** %s contemplating [%06u]\n", Name(), compiler->dspTreeID(origCall));
JITDUMP("\n----------------\n\n*** %s contemplating [%06u] in " FMT_BB " \n", Name(),
compiler->dspTreeID(origCall), currBlock->bbNum);
// We currently need inline candidate info to guarded devirt.
if (!origCall->IsInlineCandidate())
......@@ -527,7 +528,23 @@ private:
assert((likelihood >= 0) && (likelihood <= 100));
JITDUMP("Likelihood of correct guess is %u\n", likelihood);
const bool isChainedGdv = (origCall->gtCallMoreFlags & GTF_CALL_M_GUARDED_DEVIRT_CHAIN) != 0;
if (isChainedGdv)
{
JITDUMP("Expansion will chain to the previous GDV\n");
}
Transform();
if (isChainedGdv)
{
TransformForChainedGdv();
}
// Look ahead and see if there's another Gdv we might chain to this one.
//
ScoutForChainedGdv();
}
protected:
......@@ -589,14 +606,23 @@ private:
origCall->gtCallThisArg = compiler->gtNewCallArgs(compiler->gtNewLclvNode(thisTempNum, TYP_REF));
}
GenTree* methodTable = compiler->gtNewMethodTableLookup(thisTree);
// Remember the current last statement. If we're doing a chained GDV, we'll clone/copy
// all the code in the check block up to and including this statement.
//
// Note it's important that we clone/copy the temp assign above, if we created one,
// because flow along the "cold path" is going to bypass the check block.
//
lastStmt = checkBlock->lastStmt();
// Find target method table
//
GenTree* methodTable = compiler->gtNewMethodTableLookup(thisTree);
GuardedDevirtualizationCandidateInfo* guardedInfo = origCall->gtGuardedDevirtualizationCandidateInfo;
CORINFO_CLASS_HANDLE clsHnd = guardedInfo->guardedClassHandle;
GenTree* targetMethodTable = compiler->gtNewIconEmbClsHndNode(clsHnd);
// Compare and jump to else (which does the indirect call) if NOT equal
//
GenTree* methodTableCompare = compiler->gtNewOperNode(GT_NE, TYP_INT, targetMethodTable, methodTable);
GenTree* jmpTree = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, methodTableCompare);
Statement* jmpStmt = compiler->fgNewStmtFromTree(jmpTree, stmt->GetILOffsetX());
......@@ -806,11 +832,212 @@ private:
compiler->fgInsertStmtAtEnd(elseBlock, newStmt);
// Set the original statement to a nop.
//
stmt->SetRootNode(compiler->gtNewNothingNode());
}
// For chained gdv, we modify the expansion as follows:
//
// We verify the check block has two BBJ_NONE/ALWAYS predecessors, one of
// which (the "cold path") ends in a normal call, the other in an
// inline candidate call.
//
// All the statements in the check block before the type test are copied to the
// predecessor blocks (one via cloning, the other via direct copy).
//
// The cold path block is then modified to bypass the type test and jump
// directly to the else block.
//
void TransformForChainedGdv()
{
// Find the hot/cold predecessors. (Consider: just record these when
// we did the scouting).
//
BasicBlock* const coldBlock = checkBlock->bbPrev;
if (coldBlock->bbJumpKind != BBJ_NONE)
{
JITDUMP("Unexpected flow from cold path " FMT_BB "\n", coldBlock->bbNum);
return;
}
BasicBlock* const hotBlock = coldBlock->bbPrev;
if ((hotBlock->bbJumpKind != BBJ_ALWAYS) || (hotBlock->bbJumpDest != checkBlock))
{
JITDUMP("Unexpected flow from hot path " FMT_BB "\n", hotBlock->bbNum);
return;
}
JITDUMP("Hot pred block is " FMT_BB " and cold pred block is " FMT_BB "\n", hotBlock->bbNum,
coldBlock->bbNum);
// Clone and and copy the statements in the check block up to
// and including lastStmt over to the hot block.
//
// This will be the "hot" copy of the code.
//
Statement* const afterLastStmt = lastStmt->GetNextStmt();
for (Statement* checkStmt = checkBlock->firstStmt(); checkStmt != afterLastStmt;)
{
Statement* const nextStmt = checkStmt->GetNextStmt();
// We should have ensured during scouting that all the statements
// here can safely be cloned.
//
// Consider: allow inline candidates here, and keep them viable
// in the hot copy, and demote them in the cold copy.
//
Statement* const clonedStmt = compiler->gtCloneStmt(checkStmt);
compiler->fgInsertStmtAtEnd(hotBlock, clonedStmt);
checkStmt = nextStmt;
}
// Now move the same span of statements to the cold block.
//
for (Statement* checkStmt = checkBlock->firstStmt(); checkStmt != afterLastStmt;)
{
Statement* const nextStmt = checkStmt->GetNextStmt();
compiler->fgUnlinkStmt(checkBlock, checkStmt);
compiler->fgInsertStmtAtEnd(coldBlock, checkStmt);
checkStmt = nextStmt;
}
// Finally, rewire the cold block to jump to the else block,
// not fall through to the the check block.
//
coldBlock->bbJumpKind = BBJ_ALWAYS;
coldBlock->bbJumpDest = elseBlock;
}
// When the current candidate hads sufficiently high likelihood, scan
// the remainer block looking for another GDV candidate.
//
// (also consider: if currBlock has sufficiently high execution frequency)
//
// We want to see if it makes sense to mark the subsequent GDV site as a "chained"
// GDV, where we duplicate the code in between to stitch together the high-likehood
// outcomes without a join.
//
void ScoutForChainedGdv()
{
// If the current call isn't sufficiently likely, don't try and form a chain.
//
const unsigned gdvChainLikelihood = JitConfig.JitGuardedDevirtualizationChainLikelihood();
if (likelihood < gdvChainLikelihood)
{
return;
}
JITDUMP("Scouting for possible GDV chain as likelihood %u >= %u\n", likelihood, gdvChainLikelihood);
const unsigned maxStatementDup = JitConfig.JitGuardedDevirtualizationChainStatements();
unsigned chainStatementDup = 0;
unsigned chainNodeDup = 0;
unsigned chainLikelihood = 0;
GenTreeCall* chainedCall = nullptr;
// Helper class to check a statement for uncloneable nodes and count
// the total number of nodes
//
class UnclonableVisitor final : public GenTreeVisitor<UnclonableVisitor>
{
public:
enum
{
DoPreOrder = true
};
GenTree* m_unclonableNode;
unsigned m_nodeCount;
UnclonableVisitor(Compiler* compiler)
: GenTreeVisitor<UnclonableVisitor>(compiler), m_unclonableNode(nullptr), m_nodeCount(0)
{
}
fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
GenTree* const node = *use;
if (node->IsCall())
{
GenTreeCall* const call = node->AsCall();
if (call->IsInlineCandidate() && !call->IsGuardedDevirtualizationCandidate())
{
m_unclonableNode = node;
return fgWalkResult::WALK_ABORT;
}
}
else if (node->OperIs(GT_RET_EXPR))
{
m_unclonableNode = node;
return fgWalkResult::WALK_ABORT;
}
m_nodeCount++;
return fgWalkResult::WALK_CONTINUE;
}
};
for (Statement* nextStmt : remainderBlock->Statements())
{
JITDUMP(" Scouting " FMT_STMT "\n", nextStmt->GetID());
// See if this is a guarded devirt candidate.
// These will be top-level trees.
//
GenTree* const root = nextStmt->GetRootNode();
if (root->IsCall())
{
GenTreeCall* const call = root->AsCall();
if (call->IsGuardedDevirtualizationCandidate() &&
(call->gtGuardedDevirtualizationCandidateInfo->likelihood >= gdvChainLikelihood))
{
JITDUMP("GDV call at [%06u] has likelihood %u >= %u; chaining (%u stmts, %u nodes to dup).\n",
compiler->dspTreeID(call), call->gtGuardedDevirtualizationCandidateInfo->likelihood,
gdvChainLikelihood, chainStatementDup, chainNodeDup);
call->gtCallMoreFlags |= GTF_CALL_M_GUARDED_DEVIRT_CHAIN;
break;
}
}
// Stop searching if we've accumulated too much dup cost.
// Consider: use node count instead.
//
if (chainStatementDup >= maxStatementDup)
{
JITDUMP(" reached max statement dup limit of %u, bailing out\n", maxStatementDup);
break;
}
// See if this statement's tree is one that we can clone.
//
UnclonableVisitor unclonableVisitor(compiler);
unclonableVisitor.WalkTree(nextStmt->GetRootNodePointer(), nullptr);
if (unclonableVisitor.m_unclonableNode != nullptr)
{
JITDUMP(" node [%06u] can't be cloned\n", compiler->dspTreeID(unclonableVisitor.m_unclonableNode));
break;
}
// Looks like we can clone this, so keep scouting.
//
chainStatementDup++;
chainNodeDup += unclonableVisitor.m_nodeCount;
}
}
private:
unsigned returnTemp;
unsigned returnTemp;
Statement* lastStmt;
};
// Runtime lookup with dynamic dictionary expansion transformer,
......
......@@ -458,8 +458,10 @@ CONFIG_INTEGER(JitEnableRemoveEmptyTry, W("JitEnableRemoveEmptyTry"), 1)
// Overall master enable for Guarded Devirtualization.
CONFIG_INTEGER(JitEnableGuardedDevirtualization, W("JitEnableGuardedDevirtualization"), 1)
#if defined(DEBUG)
// Various policies for GuardedDevirtualization
CONFIG_INTEGER(JitGuardedDevirtualizationChainLikelihood, W("JitGuardedDevirtualizationChainLikelihood"), 0x4B) // 75
CONFIG_INTEGER(JitGuardedDevirtualizationChainStatements, W("JitGuardedDevirtualizationChainStatements"), 4)
#if defined(DEBUG)
CONFIG_STRING(JitGuardedDevirtualizationRange, W("JitGuardedDevirtualizationRange"))
#endif // DEBUG
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册