From ccefbf9eb36cb3d8adb36f4b141914c2c6cc489b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 15 Mar 2023 11:57:55 +0100 Subject: [PATCH] JIT: Remove "add copies" phase (#83310) This phase seems to try to do some ad-hoc live range splitting to improve things in assertion prop, but it almost barely kicks in. When it does kick in it seems to overall be a regression, both in ASM diffs and in TP diffs. Furthermore, we pay 16 bytes in every LclVarDsc (out of 88) for bookkeeping purposes for this pass, even in MinOpts. --- src/coreclr/jit/assertionprop.cpp | 468 ------------------------------ src/coreclr/jit/compiler.cpp | 8 - src/coreclr/jit/compiler.h | 22 +- src/coreclr/jit/compphases.h | 1 - src/coreclr/jit/lclvars.cpp | 51 ---- 5 files changed, 4 insertions(+), 546 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 498b00a4f94..7c53a70f292 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -474,474 +474,6 @@ bool IntegralRange::Contains(int64_t value) const } #endif // DEBUG -/***************************************************************************** - * - * Helper passed to Compiler::fgWalkTreePre() to find the Asgn node for optAddCopies() - */ - -/* static */ -Compiler::fgWalkResult Compiler::optAddCopiesCallback(GenTree** pTree, fgWalkData* data) -{ - GenTree* tree = *pTree; - - if (tree->OperIs(GT_ASG)) - { - GenTree* op1 = tree->AsOp()->gtOp1; - Compiler* comp = data->compiler; - - if ((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == comp->optAddCopyLclNum)) - { - comp->optAddCopyAsgnNode = tree; - return WALK_ABORT; - } - } - return WALK_CONTINUE; -} - -//------------------------------------------------------------------------------ -// optAddCopies: Add new copies before Assertion Prop. -// -// Returns: -// suitable phase satus -// -PhaseStatus Compiler::optAddCopies() -{ - unsigned lclNum; - LclVarDsc* varDsc; - -#ifdef DEBUG - if (verbose) - { - printf("\n*************** In optAddCopies()\n\n"); - } -#endif - - // Don't add any copies if we have reached the tracking limit. - if (lvaHaveManyLocals()) - { - return PhaseStatus::MODIFIED_NOTHING; - } - - bool modified = false; - - for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) - { - var_types typ = varDsc->TypeGet(); - - // We only add copies for non temp local variables - // that have a single def and that can possibly be enregistered - - if (varDsc->lvIsTemp || !varDsc->lvSingleDef || !varTypeIsEnregisterable(typ)) - { - continue; - } - - /* For lvNormalizeOnLoad(), we need to add a cast to the copy-assignment - like "copyLclNum = int(varDsc)" and optAssertionGen() only - tracks simple assignments. The same goes for lvNormalizedOnStore as - the cast is generated in fgMorphSmpOpAsg. This boils down to not having - a copy until optAssertionGen handles this*/ - if (varDsc->lvNormalizeOnLoad() || varDsc->lvNormalizeOnStore()) - { - continue; - } - - if (varTypeIsSmall(varDsc->TypeGet()) || typ == TYP_BOOL) - { - continue; - } - - // If locals must be initialized to zero, that initialization counts as a second definition. - // VB in particular allows usage of variables not explicitly initialized. - // Note that this effectively disables this optimization for all local variables - // as C# sets InitLocals all the time starting in Whidbey. - - if (!varDsc->lvIsParam && info.compInitMem) - { - continue; - } - - // On x86 we may want to add a copy for an incoming double parameter - // because we can ensure that the copy we make is double aligned - // where as we can never ensure the alignment of an incoming double parameter - // - // On all other platforms we will never need to make a copy - // for an incoming double parameter - - bool isFloatParam = false; - -#ifdef TARGET_X86 - isFloatParam = varDsc->lvIsParam && varTypeIsFloating(typ); -#endif - - if (!isFloatParam && !varDsc->lvVolatileHint) - { - continue; - } - - // We don't want to add a copy for a variable that is part of a struct - if (varDsc->lvIsStructField) - { - continue; - } - - // We require that the weighted ref count be significant. - if (varDsc->lvRefCntWtd() <= (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT / 2)) - { - continue; - } - - // For parameters, we only want to add a copy for the heavier-than-average - // uses instead of adding a copy to cover every single use. - // 'paramImportantUseDom' is the set of blocks that dominate the - // heavier-than-average uses of a parameter. - // Initial value is all blocks. - - BlockSet paramImportantUseDom(BlockSetOps::MakeFull(this)); - - // This will be threshold for determining heavier-than-average uses - weight_t paramAvgWtdRefDiv2 = (varDsc->lvRefCntWtd() + varDsc->lvRefCnt() / 2) / (varDsc->lvRefCnt() * 2); - - bool paramFoundImportantUse = false; - -#ifdef DEBUG - if (verbose) - { - printf("Trying to add a copy for V%02u %s, avg_wtd = %s\n", lclNum, - varDsc->lvIsParam ? "an arg" : "a local", refCntWtd2str(paramAvgWtdRefDiv2)); - } -#endif - - // - // We must have a ref in a block that is dominated only by the entry block - // - - if (BlockSetOps::MayBeUninit(varDsc->lvRefBlks)) - { - // No references - continue; - } - - bool isDominatedByFirstBB = false; - - BlockSetOps::Iter iter(this, varDsc->lvRefBlks); - unsigned bbNum = 0; - while (iter.NextElem(&bbNum)) - { - /* Find the block 'bbNum' */ - BasicBlock* block = fgFirstBB; - while (block && (block->bbNum != bbNum)) - { - block = block->bbNext; - } - noway_assert(block && (block->bbNum == bbNum)); - - bool importantUseInBlock = (varDsc->lvIsParam) && (block->getBBWeight(this) > paramAvgWtdRefDiv2); - bool isPreHeaderBlock = ((block->bbFlags & BBF_LOOP_PREHEADER) != 0); - BlockSet blockDom(BlockSetOps::UninitVal()); - BlockSet blockDomSub0(BlockSetOps::UninitVal()); - - if (block->bbIDom == nullptr && isPreHeaderBlock) - { - // Loop Preheader blocks that we insert will have a bbDom set that is nullptr - // but we can instead use the bNext successor block's dominator information - noway_assert(block->bbNext != nullptr); - BlockSetOps::AssignNoCopy(this, blockDom, fgGetDominatorSet(block->bbNext)); - } - else - { - BlockSetOps::AssignNoCopy(this, blockDom, fgGetDominatorSet(block)); - } - - if (!BlockSetOps::IsEmpty(this, blockDom)) - { - BlockSetOps::Assign(this, blockDomSub0, blockDom); - if (isPreHeaderBlock) - { - // We must clear bbNext block number from the dominator set - BlockSetOps::RemoveElemD(this, blockDomSub0, block->bbNext->bbNum); - } - /* Is this block dominated by fgFirstBB? */ - if (BlockSetOps::IsMember(this, blockDomSub0, fgFirstBB->bbNum)) - { - isDominatedByFirstBB = true; - } - } - -#ifdef DEBUG - if (verbose) - { - printf(" Referenced in " FMT_BB ", bbWeight is %s", bbNum, - refCntWtd2str(block->getBBWeight(this))); - - if (isDominatedByFirstBB) - { - printf(", which is dominated by BB01"); - } - - if (importantUseInBlock) - { - printf(", ImportantUse"); - } - - printf("\n"); - } -#endif - - /* If this is a heavier-than-average block, then track which - blocks dominate this use of the parameter. */ - if (importantUseInBlock) - { - paramFoundImportantUse = true; - BlockSetOps::IntersectionD(this, paramImportantUseDom, - blockDomSub0); // Clear blocks that do not dominate - } - } - - // We should have found at least one heavier-than-averageDiv2 block. - if (varDsc->lvIsParam) - { - if (!paramFoundImportantUse) - { - continue; - } - } - - // For us to add a new copy: - // we require that we have a floating point parameter - // or a lvVolatile variable that is always reached from the first BB - // and we have at least one block available in paramImportantUseDom - // - bool doCopy = (isFloatParam || (isDominatedByFirstBB && varDsc->lvVolatileHint)) && - !BlockSetOps::IsEmpty(this, paramImportantUseDom); - - // Under stress mode we expand the number of candidates - // to include parameters of any type - // or any variable that is always reached from the first BB - // - if (compStressCompile(STRESS_GENERIC_VARN, 30)) - { - // Ensure that we preserve the invariants required by the subsequent code. - if (varDsc->lvIsParam || isDominatedByFirstBB) - { - doCopy = true; - } - } - - if (!doCopy) - { - continue; - } - - Statement* stmt; - unsigned copyLclNum = lvaGrabTemp(false DEBUGARG("optAddCopies")); - - // Because lvaGrabTemp may have reallocated the lvaTable, ensure varDsc is still in sync. - varDsc = lvaGetDesc(lclNum); - - // Set lvType on the new Temp Lcl Var - lvaGetDesc(copyLclNum)->lvType = typ; - -#ifdef DEBUG - if (verbose) - { - printf("\n Finding the best place to insert the assignment V%02i=V%02i\n", copyLclNum, lclNum); - } -#endif - - if (varDsc->lvIsParam) - { - noway_assert(varDsc->lvDefStmt == nullptr || varDsc->lvIsStructField); - - // Create a new copy assignment tree - GenTree* copyAsgn = gtNewTempAssign(copyLclNum, gtNewLclvNode(lclNum, typ)); - - /* Find the best block to insert the new assignment */ - /* We will choose the lowest weighted block, and within */ - /* those block, the highest numbered block which */ - /* dominates all the uses of the local variable */ - - /* Our default is to use the first block */ - BasicBlock* bestBlock = fgFirstBB; - weight_t bestWeight = bestBlock->getBBWeight(this); - BasicBlock* block = bestBlock; - -#ifdef DEBUG - if (verbose) - { - printf(" Starting at " FMT_BB ", bbWeight is %s", block->bbNum, - refCntWtd2str(block->getBBWeight(this))); - - printf(", bestWeight is %s\n", refCntWtd2str(bestWeight)); - } -#endif - - /* We have already calculated paramImportantUseDom above. */ - BlockSetOps::Iter iter(this, paramImportantUseDom); - unsigned bbNum = 0; - while (iter.NextElem(&bbNum)) - { - /* Advance block to point to 'bbNum' */ - /* This assumes that the iterator returns block number is increasing lexical order. */ - while (block && (block->bbNum != bbNum)) - { - block = block->bbNext; - } - noway_assert(block && (block->bbNum == bbNum)); - -#ifdef DEBUG - if (verbose) - { - printf(" Considering " FMT_BB ", bbWeight is %s", block->bbNum, - refCntWtd2str(block->getBBWeight(this))); - - printf(", bestWeight is %s\n", refCntWtd2str(bestWeight)); - } -#endif - - // Does this block have a smaller bbWeight value? - if (block->getBBWeight(this) > bestWeight) - { -#ifdef DEBUG - if (verbose) - { - printf("bbWeight too high\n"); - } -#endif - continue; - } - - // Don't use blocks that are exception handlers because - // inserting a new first statement will interface with - // the CATCHARG - - if (handlerGetsXcptnObj(block->bbCatchTyp)) - { -#ifdef DEBUG - if (verbose) - { - printf("Catch block\n"); - } -#endif - continue; - } - - // Don't use the BBJ_ALWAYS block marked with BBF_KEEP_BBJ_ALWAYS. These - // are used by EH code. The JIT can not generate code for such a block. - - if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS) - { -#if defined(FEATURE_EH_FUNCLETS) - // With funclets, this is only used for BBJ_CALLFINALLY/BBJ_ALWAYS pairs. For x86, it is also used - // as the "final step" block for leaving finallys. - assert(block->isBBCallAlwaysPairTail()); -#endif // FEATURE_EH_FUNCLETS -#ifdef DEBUG - if (verbose) - { - printf("Internal EH BBJ_ALWAYS block\n"); - } -#endif - continue; - } - - // This block will be the new candidate for the insert point - // for the new assignment - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - if (verbose) - { - printf("new bestBlock\n"); - } -#endif - - bestBlock = block; - bestWeight = block->getBBWeight(this); - } - - // If there is a use of the variable in this block - // then we insert the assignment at the beginning - // otherwise we insert the statement at the end - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - if (verbose) - { - printf(" Insert copy at the %s of " FMT_BB "\n", - (BlockSetOps::IsEmpty(this, paramImportantUseDom) || - BlockSetOps::IsMember(this, varDsc->lvRefBlks, bestBlock->bbNum)) - ? "start" - : "end", - bestBlock->bbNum); - } -#endif - - if (BlockSetOps::IsEmpty(this, paramImportantUseDom) || - BlockSetOps::IsMember(this, varDsc->lvRefBlks, bestBlock->bbNum)) - { - stmt = fgNewStmtAtBeg(bestBlock, copyAsgn); - } - else - { - stmt = fgNewStmtNearEnd(bestBlock, copyAsgn); - } - } - else - { - noway_assert(varDsc->lvDefStmt != nullptr); - - /* Locate the assignment to varDsc in the lvDefStmt */ - stmt = varDsc->lvDefStmt; - - optAddCopyLclNum = lclNum; // in - optAddCopyAsgnNode = nullptr; // out - - fgWalkTreePre(stmt->GetRootNodePointer(), Compiler::optAddCopiesCallback, (void*)this, false); - - noway_assert(optAddCopyAsgnNode); - - GenTree* tree = optAddCopyAsgnNode; - GenTree* op1 = tree->AsOp()->gtOp1; - - noway_assert(tree && op1 && tree->OperIs(GT_ASG) && (op1->gtOper == GT_LCL_VAR) && - (op1->AsLclVarCommon()->GetLclNum() == lclNum)); - - /* Assign the old expression into the new temp */ - - GenTree* newAsgn = gtNewTempAssign(copyLclNum, tree->AsOp()->gtOp2); - - /* Copy the new temp to op1 */ - - GenTree* copyAsgn = gtNewAssignNode(op1, gtNewLclvNode(copyLclNum, typ)); - - /* Change the tree to a GT_COMMA with the two assignments as child nodes */ - - tree->gtBashToNOP(); - tree->ChangeOper(GT_COMMA); - - tree->AsOp()->gtOp1 = newAsgn; - tree->AsOp()->gtOp2 = copyAsgn; - - tree->gtFlags |= (newAsgn->gtFlags & GTF_ALL_EFFECT); - tree->gtFlags |= (copyAsgn->gtFlags & GTF_ALL_EFFECT); - } - - modified = true; - -#ifdef DEBUG - if (verbose) - { - printf("\nIntroducing a new copy for V%02u\n", lclNum); - gtDispTree(stmt->GetRootNode()); - printf("\n"); - } -#endif - } - - return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; -} - //------------------------------------------------------------------------------ // GetAssertionDep: Retrieve the assertions on this local variable // diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index fe8a524ce66..581addf4123 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4828,14 +4828,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // However, ref counts are not kept incrementally up to date. assert(lvaLocalVarRefCounted()); - if (opts.OptimizationEnabled()) - { - // Introduce copies for some single-def locals to make them more - // amenable to optimization - // - DoPhase(this, PHASE_OPTIMIZE_ADD_COPIES, &Compiler::optAddCopies); - } - // Figure out the order in which operators are to be evaluated // DoPhase(this, PHASE_FIND_OPER_ORDER, &Compiler::fgFindOperOrder); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 17a5a1d0894..9a255c6f975 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -492,13 +492,10 @@ public: // Morph will update if this local is passed in a register. LclVarDsc() : _lvArgReg(REG_STK) - , #if FEATURE_MULTIREG_ARGS - _lvOtherArgReg(REG_STK) - , + , _lvOtherArgReg(REG_STK) #endif // FEATURE_MULTIREG_ARGS - lvClassHnd(NO_CLASS_HANDLE) - , lvRefBlks(BlockSetOps::UninitVal()) + , lvClassHnd(NO_CLASS_HANDLE) , lvPerSsaData() { } @@ -553,9 +550,8 @@ public: #endif // defined(TARGET_LOONGARCH64) unsigned char lvIsBoolean : 1; // set if variable is boolean - unsigned char lvSingleDef : 1; // variable has a single def - // before lvaMarkLocalVars: identifies ref type locals that can get type updates - // after lvaMarkLocalVars: identifies locals that are suitable for optAddCopies + unsigned char lvSingleDef : 1; // variable has a single def. Used to identify ref type locals that can get type + // updates unsigned char lvSingleDefRegCandidate : 1; // variable has a single def and hence is a register candidate // Currently, this is only used to decide if an EH variable can be @@ -571,7 +567,6 @@ public: // in earlier phase and the information might not be appropriate // in LSRA. - unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization unsigned char lvVolatileHint : 1; // hint for AssertionProp #ifndef TARGET_64BIT @@ -1090,10 +1085,6 @@ private: ClassLayout* m_layout; // layout info for structs public: - BlockSet lvRefBlks; // Set of blocks that contain refs - Statement* lvDefStmt; // Pointer to the statement with the single definition - void lvaDisqualifyVar(); // Call to disqualify a local variable from use in optAddCopies - var_types TypeGet() const { return (var_types)lvType; @@ -7438,10 +7429,7 @@ public: }; protected: - static fgWalkPreFn optAddCopiesCallback; static fgWalkPreFn optVNAssertionPropCurStmtVisitor; - unsigned optAddCopyLclNum; - GenTree* optAddCopyAsgnNode; bool optLocalAssertionProp; // indicates that we are performing local assertion prop bool optAssertionPropagated; // set to true if we modified the trees @@ -7589,8 +7577,6 @@ public: static void optDumpAssertionIndices(const char* header, ASSERT_TP assertions, const char* footer = nullptr); static void optDumpAssertionIndices(ASSERT_TP assertions, const char* footer = nullptr); - PhaseStatus optAddCopies(); - /************************************************************************** * Range checks *************************************************************************/ diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index dfcd9da0ee9..1949a9d26a3 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -69,7 +69,6 @@ CompPhaseNameMacro(PHASE_CLEAR_LOOP_INFO, "Clear loop info", CompPhaseNameMacro(PHASE_MORPH_MDARR, "Morph array ops", false, -1, false) CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", false, -1, false) CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", false, -1, false) -CompPhaseNameMacro(PHASE_OPTIMIZE_ADD_COPIES, "Opt add copies", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", false, -1, false) CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", false, -1, false) CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", false, -1, true) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index bbb559c9f8b..0690520eb75 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -3840,18 +3840,6 @@ void Compiler::lvaSortByRefCount() #endif } -/***************************************************************************** - * - * This is called by lvaMarkLclRefs to disqualify a variable from being - * considered by optAddCopies() - */ -void LclVarDsc::lvaDisqualifyVar() -{ - this->lvDisqualify = true; - this->lvSingleDef = false; - this->lvDefStmt = nullptr; -} - #ifdef FEATURE_SIMD var_types LclVarDsc::GetSimdBaseType() const { @@ -4291,9 +4279,6 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, if (tree->gtOper == GT_LCL_FLD) { - // variables that have uses inside a GT_LCL_FLD - // cause problems, so we will disqualify them here - varDsc->lvaDisqualifyVar(); return; } @@ -4304,42 +4289,6 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt, /* Record if the variable has a single def or not */ - if (!varDsc->lvDisqualify) // If this variable is already disqualified, we can skip this - { - if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable - { - /* - If we have one of these cases: - 1. We have already seen a definition (i.e lvSingleDef is true) - 2. or info.CompInitMem is true (thus this would be the second definition) - 3. or we have an assignment inside QMARK-COLON trees - 4. or we have an update form of assignment (i.e. +=, -=, *=) - Then we must disqualify this variable for use in optAddCopies() - - Note that all parameters start out with lvSingleDef set to true - */ - if ((varDsc->lvSingleDef == true) || (info.compInitMem == true) || (tree->gtFlags & GTF_COLON_COND) || - (tree->gtFlags & GTF_VAR_USEASG)) - { - varDsc->lvaDisqualifyVar(); - } - else - { - varDsc->lvSingleDef = true; - varDsc->lvDefStmt = stmt; - } - } - else // otherwise this is a ref of our variable - { - if (BlockSetOps::MayBeUninit(varDsc->lvRefBlks)) - { - // Lazy initialization - BlockSetOps::AssignNoCopy(this, varDsc->lvRefBlks, BlockSetOps::MakeEmpty(this)); - } - BlockSetOps::AddElemD(this, varDsc->lvRefBlks, block->bbNum); - } - } - if (!varDsc->lvDisqualifySingleDefRegCandidate) // If this var is already disqualified, we can skip this { if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable -- GitLab