未验证 提交 e0e80780 编写于 作者: A Andy Ayers 提交者: GitHub

JIT: treat synthesized profile data as pgo data, fix finally weights (#83185)

Once synthesis arrives on the scene, we're not going to want phases in the JIT
to arbitrarily modifying block weights. There is already a guard of this sort
for regular profile data, so it makes sense to extend that to synthesized data
as well.

When synthesizing counts, propagate counts to finallies from the associated
trys. This needs to be done carefully as we have make sure not to visit the
finally until the count in the try is set. We rely on some of the properties
of DFS pre and post number bracketing to do this efficiently, without needing
to track extra state.

Contributes to #82964.
上级 434d6647
......@@ -419,7 +419,8 @@ public:
Blend = 3, // PGO data comes from blend of prior runs and current run
Text = 4, // PGO data comes from text file
IBC = 5, // PGO data from classic IBC
Sampling= 6, // PGO data derived from sampling
Sampling = 6, // PGO data derived from sampling
Synthesis = 7, // PGO data derived from synthesis
};
#define DEFAULT_UNKNOWN_HANDLE 1
......
......@@ -1835,9 +1835,9 @@ void CodeGen::genGenerateMachineCode()
{
printf("; instrumented for collecting profile data\n");
}
else if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && compiler->fgHaveProfileData())
else if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && compiler->fgHaveProfileWeights())
{
printf("; optimized using profile data\n");
printf("; optimized using %s\n", compiler->compGetPgoSourceName());
}
#if DOUBLE_ALIGN
......@@ -1856,7 +1856,7 @@ void CodeGen::genGenerateMachineCode()
printf("; partially interruptible\n");
}
if (compiler->fgHaveProfileData())
if (compiler->fgHaveProfileWeights())
{
printf("; with %s: edge weights are %s, and fgCalledCount is " FMT_WT "\n",
compiler->compGetPgoSourceName(), compiler->fgHaveValidEdgeWeights ? "valid" : "invalid",
......
......@@ -3343,14 +3343,9 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
printf("OPTIONS: compProcedureSplitting = %s\n", dspBool(opts.compProcedureSplitting));
printf("OPTIONS: compProcedureSplittingEH = %s\n", dspBool(opts.compProcedureSplittingEH));
if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData())
if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
{
printf("OPTIONS: optimized using %s profile data\n", pgoSourceToString(fgPgoSource));
}
if (fgPgoFailReason != nullptr)
{
printf("OPTIONS: %s\n", fgPgoFailReason);
printf("OPTIONS: optimizer should use profile data\n");
}
if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
......@@ -4266,13 +4261,17 @@ const char* Compiler::compGetPgoSourceName() const
case ICorJitInfo::PgoSource::Dynamic:
return "Dynamic PGO";
case ICorJitInfo::PgoSource::Blend:
return "Blend PGO";
return "Blended PGO";
case ICorJitInfo::PgoSource::Text:
return "Textual PGO";
case ICorJitInfo::PgoSource::Sampling:
return "Sample-based PGO";
case ICorJitInfo::PgoSource::IBC:
return "Classic IBC";
case ICorJitInfo::PgoSource::Synthesis:
return "Synthesized PGO";
default:
return "";
return "Unknown PGO";
}
}
......
......@@ -9745,7 +9745,6 @@ public:
}
}
const char* pgoSourceToString(ICorJitInfo::PgoSource p);
const char* devirtualizationDetailToString(CORINFO_DEVIRTUALIZATION_DETAIL detail);
#endif // DEBUG
......
......@@ -3925,7 +3925,7 @@ void Compiler::fgCheckForLoopsInHandlers()
return;
}
// Walk blocks in handlers and filters, looing for a backedge target.
// Walk blocks in handlers and filters, looking for a backedge target.
//
assert(!compHasBackwardJumpInHandler);
for (BasicBlock* const blk : Blocks())
......
......@@ -54,11 +54,6 @@ bool Compiler::fgHaveProfileData()
//
bool Compiler::fgHaveProfileWeights()
{
if (!fgHaveProfileData())
{
return false;
}
return fgPgoHaveWeights;
}
......@@ -79,12 +74,29 @@ bool Compiler::fgHaveSufficientProfileWeights()
return false;
}
if ((fgFirstBB != nullptr) && (fgPgoSource == ICorJitInfo::PgoSource::Static))
switch (fgPgoSource)
{
const weight_t sufficientSamples = 1000;
return fgFirstBB->bbWeight > sufficientSamples;
case ICorJitInfo::PgoSource::Dynamic:
case ICorJitInfo::PgoSource::Text:
return true;
case ICorJitInfo::PgoSource::Static:
case ICorJitInfo::PgoSource::Blend:
{
// We sometimes call this very early, eg evaluating the prejit root.
//
if (fgFirstBB != nullptr)
{
const weight_t sufficientSamples = 1000;
return fgFirstBB->bbWeight > sufficientSamples;
}
return true;
}
default:
return false;
}
return true;
}
//------------------------------------------------------------------------
......@@ -2493,7 +2505,6 @@ PhaseStatus Compiler::fgIncorporateProfileData()
{
JITDUMP("Synthesizing profile data and writing it out as the actual profile data\n");
ProfileSynthesis::Run(this, ProfileSynthesisOption::AssignLikelihoods);
fgPgoHaveWeights = false;
return PhaseStatus::MODIFIED_EVERYTHING;
}
#endif
......@@ -2522,8 +2533,8 @@ PhaseStatus Compiler::fgIncorporateProfileData()
// Summarize profile data
//
JITDUMP("Have %s profile data: %d schema records (schema at %p, data at %p)\n", pgoSourceToString(fgPgoSource),
fgPgoSchemaCount, dspPtr(fgPgoSchema), dspPtr(fgPgoData));
JITDUMP("Have %s: %d schema records (schema at %p, data at %p)\n", compGetPgoSourceName(), fgPgoSchemaCount,
dspPtr(fgPgoSchema), dspPtr(fgPgoData));
fgNumProfileRuns = 0;
unsigned otherRecords = 0;
......@@ -3431,15 +3442,29 @@ void EfficientEdgeCountReconstructor::Propagate()
//
if (m_badcode || m_mismatch || m_failedToConverge || m_allWeightsZero)
{
JITDUMP("... discarding profile data because of %s\n",
m_badcode ? "badcode" : m_mismatch ? "mismatch" : m_allWeightsZero ? "zero counts"
: "failed to converge");
// Make sure nothing else in the jit looks at the profile data.
//
m_comp->fgPgoSchema = nullptr;
m_comp->fgPgoFailReason = "PGO data available, but there was a reconstruction problem";
m_comp->fgPgoHaveWeights = false;
m_comp->fgPgoSchema = nullptr;
if (m_badcode)
{
m_comp->fgPgoFailReason = "PGO data available, but IL was malformed";
}
else if (m_mismatch)
{
m_comp->fgPgoFailReason = "PGO data available, but IL did not match";
}
else if (m_failedToConverge)
{
m_comp->fgPgoFailReason = "PGO data available, but solver did not converge";
}
else
{
m_comp->fgPgoFailReason = "PGO data available, profile data was all zero";
}
JITDUMP("... discarding profile data: %s\n", m_comp->fgPgoFailReason);
return;
}
......@@ -5333,43 +5358,4 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks
return classicWeightsValid && likelyWeightsValid;
}
//------------------------------------------------------------------------------
// pgoSourceToString: describe source of pgo data
//
// Arguments:
// r - source enum to describe
//
// Returns:
// descriptive string
//
const char* Compiler::pgoSourceToString(ICorJitInfo::PgoSource p)
{
const char* pgoSource = "unknown";
switch (fgPgoSource)
{
case ICorJitInfo::PgoSource::Dynamic:
pgoSource = "dynamic";
break;
case ICorJitInfo::PgoSource::Static:
pgoSource = "static";
break;
case ICorJitInfo::PgoSource::Text:
pgoSource = "text";
break;
case ICorJitInfo::PgoSource::Blend:
pgoSource = "static+dynamic";
break;
case ICorJitInfo::PgoSource::IBC:
pgoSource = "IBC";
break;
case ICorJitInfo::PgoSource::Sampling:
pgoSource = "Sampling";
break;
default:
break;
}
return pgoSource;
}
#endif // DEBUG
......@@ -96,7 +96,8 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option)
// For now, since we have installed synthesized profile data,
// act as if we don't have "real" profile data.
//
m_comp->fgPgoHaveWeights = false;
m_comp->fgPgoHaveWeights = true;
m_comp->fgPgoSource = ICorJitInfo::PgoSource::Synthesis;
#ifdef DEBUG
if (JitConfig.JitCheckSynthesizedCounts() > 0)
......@@ -847,11 +848,9 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop)
//------------------------------------------------------------------------
// fgAssignInputWeights: provide initial profile weights for all blocks
//
//
// Notes:
// For finallys we may want to come back and rescale once we know the
// weights of all the callfinallies, or perhaps just use the weight
// of the first block in the associated try.
// For finallys we will pick up new entry weights when we process
// the subtree that can invoke them normally.
//
void ProfileSynthesis::AssignInputWeights()
{
......@@ -860,19 +859,19 @@ void ProfileSynthesis::AssignInputWeights()
for (BasicBlock* block : m_comp->Blocks())
{
block->bbWeight = 0.0;
block->setBBProfileWeight(0.0);
}
m_comp->fgFirstBB->bbWeight = entryWeight;
m_comp->fgFirstBB->setBBProfileWeight(entryWeight);
for (EHblkDsc* const HBtab : EHClauses(m_comp))
{
if (HBtab->HasFilter())
{
HBtab->ebdFilter->bbWeight = ehWeight;
HBtab->ebdFilter->setBBProfileWeight(ehWeight);
}
HBtab->ebdHndBeg->bbWeight = ehWeight;
HBtab->ebdHndBeg->setBBProfileWeight(ehWeight);
}
}
......@@ -880,62 +879,129 @@ void ProfileSynthesis::AssignInputWeights()
// ComputeBlockWeights: compute weights for all blocks
// based on input weights, edge likelihoods, and cyclic probabilities
//
// Notes:
// We want to first walk the main method body, then any finally
// handers from outermost to innermost.
//
// The depth first walk we did to kick off synthesis has split the
// graph into a forest of depth first spanning trees. We leverage
// this and the EH table structure to accomplish the visiting order above.
//
// We might be able to avoid all this if during the DFS walk we
// walked from try entries to filter or handlers, so that a
// single DFST encompassed all the reachable blocks in the right order.
//
void ProfileSynthesis::ComputeBlockWeights()
{
JITDUMP("Computing block weights\n");
for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++)
{
BasicBlock* const block = m_comp->fgBBReversePostorder[i];
SimpleLoop* const loop = GetLoopFromHeader(block);
// Main method body
//
ComputeBlockWeightsSubgraph(m_comp->fgFirstBB);
if (loop != nullptr)
// All finally and fault handlers from outer->inner
// (walk EH table backwards)
//
for (unsigned i = 0; i < m_comp->compHndBBtabCount; i++)
{
unsigned const XTnum = m_comp->compHndBBtabCount - i - 1;
EHblkDsc* const HBtab = &m_comp->compHndBBtab[XTnum];
if (HBtab->HasFilter())
{
// Start with initial weight, sum entry edges, multiply by Cp
// Filter subtree includes handler
//
weight_t newWeight = block->bbWeight;
for (FlowEdge* const edge : loop->m_entryEdges)
{
if (BasicBlock::sameHndRegion(block, edge->getSourceBlock()))
{
newWeight += edge->getLikelyWeight();
}
}
newWeight *= loop->m_cyclicProbability;
block->bbWeight = newWeight;
JITDUMP("cbw (header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, block->bbWeight);
ComputeBlockWeightsSubgraph(HBtab->ebdFilter);
}
else
{
// start with initial weight, sum all incoming edges
//
weight_t newWeight = block->bbWeight;
ComputeBlockWeightsSubgraph(HBtab->ebdHndBeg);
}
}
for (FlowEdge* const edge : block->PredEdges())
{
if (BasicBlock::sameHndRegion(block, edge->getSourceBlock()))
{
newWeight += edge->getLikelyWeight();
}
}
// Anything else is unreachable and will have zero count
}
//------------------------------------------------------------------------
// ComputeBlockWeights: compute weights for all blocks in a particular DFST
//
// Arguments:
// entry - root node of a DFST
//
void ProfileSynthesis::ComputeBlockWeightsSubgraph(BasicBlock* entry)
{
// Determine the range of indices for this DFST in the overall RPO.
//
const unsigned firstIndex = m_comp->fgBBNumMax - entry->bbPostorderNum + 1;
assert(m_comp->fgBBReversePostorder[firstIndex] == entry);
assert(entry->bbPostorderNum >= entry->bbPreorderNum);
const unsigned lastIndex = firstIndex + entry->bbPostorderNum - entry->bbPreorderNum;
for (unsigned int i = firstIndex; i <= lastIndex; i++)
{
BasicBlock* const block = m_comp->fgBBReversePostorder[i];
ComputeBlockWeight(block);
}
}
block->bbWeight = newWeight;
//------------------------------------------------------------------------
// ComputeBlockWeight: compute weight for a given block
//
// Arguments:
// block: block in question
//
void ProfileSynthesis::ComputeBlockWeight(BasicBlock* block)
{
SimpleLoop* const loop = GetLoopFromHeader(block);
weight_t newWeight = block->bbWeight;
const char* kind = "";
JITDUMP("cbw: " FMT_BB " :: " FMT_WT "\n", block->bbNum, block->bbWeight);
if (loop != nullptr)
{
// Sum all entry edges that aren't EH flow
//
for (FlowEdge* const edge : loop->m_entryEdges)
{
if (BasicBlock::sameHndRegion(block, edge->getSourceBlock()))
{
newWeight += edge->getLikelyWeight();
}
}
// Todo: just use weight to determine run rarely, not flag
// Scale by cyclic probability
//
newWeight *= loop->m_cyclicProbability;
kind = " (loop head)";
}
else
{
// Sum all incoming edges that aren't EH flow
//
if (block->bbWeight == 0.0)
for (FlowEdge* const edge : block->PredEdges())
{
block->bbSetRunRarely();
if (BasicBlock::sameHndRegion(block, edge->getSourceBlock()))
{
newWeight += edge->getLikelyWeight();
}
}
else
}
block->setBBProfileWeight(newWeight);
JITDUMP("cbw%s: " FMT_BB " :: " FMT_WT "\n", kind, block->bbNum, block->bbWeight);
// If we're at the start of try in a try/finally, update the finally
// entry to reflect the proper weight.
//
if (m_comp->bbIsTryBeg(block))
{
EHblkDsc* const HBtab = m_comp->ehGetBlockTryDsc(block);
if (HBtab->HasFinallyHandler())
{
block->bbFlags &= ~BBF_RUN_RARELY;
BasicBlock* const finallyEntry = HBtab->ebdHndBeg;
finallyEntry->setBBProfileWeight(newWeight);
kind = " (finally)";
JITDUMP("cbw%s: " FMT_BB " :: " FMT_WT "\n", kind, finallyEntry->bbNum, finallyEntry->bbWeight);
}
}
}
......@@ -96,7 +96,10 @@ private:
void ComputeCyclicProbabilities(SimpleLoop* loop);
void AssignInputWeights();
void ComputeBlockWeights();
void ComputeBlockWeightsSubgraph(BasicBlock* block);
void ComputeBlockWeight(BasicBlock* block);
private:
Compiler* const m_comp;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册