diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index abde69c46abbdfdd92f0b963dc20c50bfd559c6a..cd5d0fefd404029ce9f7f6a5dd3b9f1502d647e4 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -10070,11 +10070,13 @@ void emitter::emitDispImm(ssize_t imm, bool addComma, bool alwaysHex /* =false * printf("#"); } - // Munge any pointers if we want diff-able disassembly + // Munge any pointers if we want diff-able disassembly. + // Since some may be emitted as partial words, print as diffable anything that has + // significant bits beyond the lowest 8-bits. if (emitComp->opts.disDiffable) { - ssize_t top44bits = (imm >> 20); - if ((top44bits != 0) && (top44bits != -1)) + ssize_t top56bits = (imm >> 8); + if ((top56bits != 0) && (top56bits != -1)) imm = 0xD1FFAB1E; } diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 196e3c99bd1204804d19ea8016e22c4dfc27e278..5c4d9169ff938b5bebf76786b360e61ce14f9eb7 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -764,6 +764,61 @@ bool GenTree::gtHasReg() const return hasReg; } +//----------------------------------------------------------------------------- +// GetRegisterDstCount: Get the number of registers defined by the node. +// +// Arguments: +// None +// +// Return Value: +// The number of registers that this node defines. +// +// Notes: +// This should not be called on a contained node. +// This does not look at the actual register assignments, if any, and so +// is valid after Lowering. +// +int GenTree::GetRegisterDstCount() const +{ + assert(!isContained()); + if (!IsMultiRegNode()) + { + return (IsValue()) ? 1 : 0; + } + else if (IsMultiRegCall()) + { + // temporarily cast away const-ness as AsCall() method is not declared const + GenTree* temp = const_cast(this); + return temp->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); + } + else if (IsCopyOrReloadOfMultiRegCall()) + { + // A multi-reg copy or reload, will have valid regs for only those + // positions that need to be copied or reloaded. Hence we need + // to consider only those registers for computing reg mask. + + GenTree* tree = const_cast(this); + GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload(); + GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall(); + return call->GetReturnTypeDesc()->GetReturnRegCount(); + } +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + else if (OperIsPutArgSplit()) + { + return (const_cast(this))->AsPutArgSplit()->gtNumRegs; + } + // A PUTARG_REG could be a MultiRegOp on ARM since we could move a double register to two int registers, + // either for all double parameters w/SoftFP or for varargs). + else + { + assert(OperIsMultiRegOp()); + return (TypeGet() == TYP_LONG) ? 2 : 1; + } +#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + assert(!"Unexpected multi-reg node"); + return 0; +} + //--------------------------------------------------------------- // gtGetRegMask: Get the reg mask of the node. // @@ -16065,19 +16120,6 @@ bool Compiler::gtComplexityExceeds(GenTreePtr* tree, unsigned limit) } } -// ------------------------------------------------------------------------- -// IsRegOptional: Returns true if this gentree node is marked by lowering to -// indicate that codegen can still generate code even if it wasn't allocated -// a register. -bool GenTree::IsRegOptional() const -{ -#ifdef LEGACY_BACKEND - return false; -#else - return gtLsraInfo.regOptional; -#endif -} - bool GenTree::IsPhiNode() { return (OperGet() == GT_PHI_ARG) || (OperGet() == GT_PHI) || IsPhiDefn(); diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index f30713bed415d3893c322774493e33af9aea6cbe..7096b581bab6598cd2e701e97d27c6957fb4cc93 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -680,6 +680,8 @@ public: void CopyReg(GenTreePtr from); bool gtHasReg() const; + int GetRegisterDstCount() const; + regMaskTP gtGetRegMask() const; unsigned gtFlags; // see GTF_xxxx below @@ -702,10 +704,6 @@ public: regMaskSmall gtUsedRegs; // set of used (trashed) registers #endif // LEGACY_BACKEND -#ifndef LEGACY_BACKEND - TreeNodeInfo gtLsraInfo; -#endif // !LEGACY_BACKEND - void SetVNsFromNode(GenTreePtr tree) { gtVNPair = tree->gtVNPair; @@ -1022,8 +1020,9 @@ public: #define GTF_DEBUG_NODE_LARGE 0x00000004 #define GTF_DEBUG_NODE_CG_PRODUCED 0x00000008 // genProduceReg has been called on this node #define GTF_DEBUG_NODE_CG_CONSUMED 0x00000010 // genConsumeReg has been called on this node +#define GTF_DEBUG_NODE_LSRA_ADDED 0x00000020 // This node was added by LSRA -#define GTF_DEBUG_NODE_MASK 0x0000001F // These flags are all node (rather than operation) properties. +#define GTF_DEBUG_NODE_MASK 0x0000003F // These flags are all node (rather than operation) properties. #define GTF_DEBUG_VAR_CSE_REF 0x00800000 // GT_LCL_VAR -- This is a CSE LCL_VAR node #endif // defined(DEBUG) @@ -1133,10 +1132,20 @@ public: } } - // NOTE: the three UnusedValue helpers immediately below are defined in lir.h. + // LIR flags + // These helper methods, along with the flag values they manipulate, are defined in lir.h + // + // UnusedValue indicates that, although this node produces a value, it is unused. inline void SetUnusedValue(); inline void ClearUnusedValue(); inline bool IsUnusedValue() const; + // RegOptional indicates that codegen can still generate code even if it isn't allocated a register. + inline bool IsRegOptional() const; + inline void SetRegOptional(); + inline void ClearRegOptional(); +#ifdef DEBUG + void dumpLIRFlags(); +#endif bool OperIs(genTreeOps oper) const { @@ -2135,17 +2144,6 @@ public: inline var_types CastFromType(); inline var_types& CastToType(); - // Returns true if this gentree node is marked by lowering to indicate - // that codegen can still generate code even if it wasn't allocated a - // register. - bool IsRegOptional() const; -#ifndef LEGACY_BACKEND - void ClearRegOptional() - { - gtLsraInfo.regOptional = false; - } -#endif - // Returns "true" iff "this" is a phi-related node (i.e. a GT_PHI_ARG, GT_PHI, or a PhiDefn). bool IsPhiNode(); diff --git a/src/coreclr/src/jit/lir.cpp b/src/coreclr/src/jit/lir.cpp index 3f716cf435f5978434b213e63247e8831c4eadb5..ae6149381c21f2f3f8a72700aa2e48deb5f9609f 100644 --- a/src/coreclr/src/jit/lir.cpp +++ b/src/coreclr/src/jit/lir.cpp @@ -1763,3 +1763,10 @@ void LIR::InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range) blockRange.InsertBefore(insertionPoint, std::move(range)); } + +#ifdef DEBUG +void GenTree::dumpLIRFlags() +{ + JITDUMP("[%c%c%c]", IsUnusedValue() ? 'U' : '-', IsRegOptional() ? 'O' : '-'); +} +#endif diff --git a/src/coreclr/src/jit/lir.h b/src/coreclr/src/jit/lir.h index 4a71947be79b1ff75f43b7aa87dca5262c9185bf..9d465af8f917643998e604b9c416366fc0867249 100644 --- a/src/coreclr/src/jit/lir.h +++ b/src/coreclr/src/jit/lir.h @@ -38,6 +38,9 @@ public: // that this bit should not be assumed to be valid // at all points during compilation: it is currently // only computed during target-dependent lowering. + + RegOptional = 0x04, // Set on a node if it produces a value, but does not + // require a register (i.e. it can be used from memory). }; }; @@ -327,4 +330,19 @@ inline bool GenTree::IsUnusedValue() const return (gtLIRFlags & LIR::Flags::UnusedValue) != 0; } +inline void GenTree::SetRegOptional() +{ + gtLIRFlags |= LIR::Flags::RegOptional; +} + +inline void GenTree::ClearRegOptional() +{ + gtLIRFlags &= ~LIR::Flags::RegOptional; +} + +inline bool GenTree::IsRegOptional() const +{ + return (gtLIRFlags & LIR::Flags::RegOptional) != 0; +} + #endif // _LIR_H_ diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 1080807285652c06c2f999369da122df30ca77c6..11a751c80b0b407c8a9243ffe95dcfeeeca41a59 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -4747,19 +4747,19 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) // node - pointer to the DIV or MOD node // // Returns: -// The next node to lower. +// nullptr if no transformation is done, or the next node in the transformed node sequence that +// needs to be lowered. // GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) { assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); - GenTree* next = node->gtNext; GenTree* divMod = node; GenTree* dividend = divMod->gtGetOp1(); GenTree* divisor = divMod->gtGetOp2(); if (!divisor->IsCnsIntOrI()) { - return next; // no transformations to make + return nullptr; // no transformations to make } const var_types type = divMod->TypeGet(); @@ -4770,7 +4770,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) // We shouldn't see a divmod with constant operands here but if we do then it's likely // because optimizations are disabled or it's a case that's supposed to throw an exception. // Don't optimize this. - return next; + return nullptr; } ssize_t divisorValue = divisor->gtIntCon.IconValue(); @@ -4786,7 +4786,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) // case so optimizing this case would break C# code. // A runtime check could be used to handle this case but it's probably too rare to matter. - return next; + return nullptr; } bool isDiv = divMod->OperGet() == GT_DIV; @@ -4798,8 +4798,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) // If the divisor is the minimum representable integer value then we can use a compare, // the result is 1 iff the dividend equals divisor. divMod->SetOper(GT_EQ); - ContainCheckCompare(divMod->AsOp()); - return next; + return node; } } @@ -4810,7 +4809,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) { if (comp->opts.MinOpts()) { - return next; + return nullptr; } #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) @@ -4921,7 +4920,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) return mulhi; #else // Currently there's no GT_MULHI for ARM32 - return next; + return nullptr; #endif } @@ -4929,7 +4928,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) LIR::Use use; if (!BlockRange().TryGetUse(node, &use)) { - return next; + return nullptr; } // We need to use the dividend node multiple times so its value needs to be @@ -5030,13 +5029,14 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) if (!varTypeIsFloating(node->TypeGet())) #endif // _TARGET_XARCH_ { - next = LowerConstIntDivOrMod(node); - } - - if ((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)) - { - ContainCheckDivOrMod(node->AsOp()); + // LowerConstIntDivOrMod will return nullptr if it doesn't transform the node. + GenTree* newNode = LowerConstIntDivOrMod(node); + if (newNode != nullptr) + { + return newNode; + } } + ContainCheckDivOrMod(node->AsOp()); return next; } @@ -5890,7 +5890,7 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node) { // If there are no containable operands, we can make an operand reg optional. // SSE2 allows only divisor to be a memory-op. - SetRegOptional(divisor); + divisor->SetRegOptional(); } return; } @@ -5912,7 +5912,7 @@ void Lowering::ContainCheckDivOrMod(GenTreeOp* node) { // If there are no containable operands, we can make an operand reg optional. // Div instruction allows only divisor to be a memory op. - SetRegOptional(divisor); + divisor->SetRegOptional(); } #endif // _TARGET_XARCH_ } diff --git a/src/coreclr/src/jit/lower.h b/src/coreclr/src/jit/lower.h index 60957435beeeb51e16c3527e823fc7a3bd19cee5..22c9a9d800ebefd4b92caf7feaf254a6b2c2400f 100644 --- a/src/coreclr/src/jit/lower.h +++ b/src/coreclr/src/jit/lower.h @@ -224,22 +224,6 @@ private: bool IsCallTargetInRange(void* addr); #if defined(_TARGET_XARCH_) - //---------------------------------------------------------------------- - // SetRegOptional - sets a bit to indicate to LSRA that register - // for a given tree node is optional for codegen purpose. If no - // register is allocated to such a tree node, its parent node treats - // it as a contained memory operand during codegen. - // - // Arguments: - // tree - GenTree node - // - // Returns - // None - void SetRegOptional(GenTree* tree) - { - tree->gtLsraInfo.regOptional = true; - } - GenTree* PreferredRegOptionalOperand(GenTree* tree); // ------------------------------------------------------------------ @@ -273,13 +257,18 @@ private: const bool op1Legal = tree->OperIsCommutative() && (operatorSize == genTypeSize(op1->TypeGet())); const bool op2Legal = operatorSize == genTypeSize(op2->TypeGet()); + GenTree* regOptionalOperand = nullptr; if (op1Legal) { - SetRegOptional(op2Legal ? PreferredRegOptionalOperand(tree) : op1); + regOptionalOperand = op2Legal ? PreferredRegOptionalOperand(tree) : op1; } else if (op2Legal) { - SetRegOptional(op2); + regOptionalOperand = op2; + } + if (regOptionalOperand != nullptr) + { + regOptionalOperand->SetRegOptional(); } } #endif // defined(_TARGET_XARCH_) diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 37388d48f567cdd429af7151fc6eaa43d3693b14..08b3d00fdf2b1ca95cd4417f0e86519843d01e7f 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -472,8 +472,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) head->gtSeqNum = fieldList->gtSeqNum; #endif // DEBUG - head->gtLsraInfo = fieldList->gtLsraInfo; - BlockRange().InsertAfter(fieldList, head); BlockRange().Remove(fieldList); @@ -515,7 +513,7 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]); if (!varDsc->lvDoNotEnregister) { - SetRegOptional(fieldNode); + fieldNode->SetRegOptional(); } else { @@ -533,7 +531,7 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) // than spilling, but this situation is not all that common, as most cases of promoted // structs do not have a large number of fields, and of those most are lclVars or // copy-propagated constants. - SetRegOptional(fieldNode); + fieldNode->SetRegOptional(); } } @@ -1655,12 +1653,12 @@ void Lowering::ContainCheckMul(GenTreeOp* node) // Has a contained immediate operand. // Only 'other' operand can be marked as reg optional. assert(other != nullptr); - SetRegOptional(other); + other->SetRegOptional(); } else if (hasImpliedFirstOperand) { // Only op2 can be marke as reg optional. - SetRegOptional(op2); + op2->SetRegOptional(); } else { @@ -1779,7 +1777,7 @@ void Lowering::ContainCheckCast(GenTreeCast* node) { // Mark castOp as reg optional to indicate codegen // can still generate code if it is on stack. - SetRegOptional(castOp); + castOp->SetRegOptional(); } } } @@ -1854,7 +1852,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) { // SSE2 allows only otherOp to be a memory-op. Since otherOp is not // contained, we can mark it reg-optional. - SetRegOptional(otherOp); + otherOp->SetRegOptional(); } return; @@ -1875,7 +1873,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) } else { - SetRegOptional(op1); + op1->SetRegOptional(); } } } @@ -1894,14 +1892,14 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) } else if (op1->IsCnsIntOrI()) { - SetRegOptional(op2); + op2->SetRegOptional(); } else { // One of op1 or op2 could be marked as reg optional // to indicate that codegen can still generate code // if one of them is on stack. - SetRegOptional(PreferredRegOptionalOperand(cmp)); + PreferredRegOptionalOperand(cmp)->SetRegOptional(); } } } @@ -2142,7 +2140,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) else { // We can mark 'other' as reg optional, since it is not contained. - SetRegOptional(other); + other->SetRegOptional(); } } } @@ -2167,7 +2165,7 @@ void Lowering::ContainCheckIntrinsic(GenTreeOp* node) { // Mark the operand as reg optional since codegen can still // generate code if op1 is on stack. - SetRegOptional(op1); + op1->SetRegOptional(); } } } diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 0dc5c439b8425f0b27f2a8e17ca950fa614c27c2..58ea6ae390b8ef1dbfe6ddec53df554fd48a7631 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -13,7 +13,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX Preconditions - All register requirements are expressed in the code stream, either as destination registers of tree nodes, or as internal registers. These requirements are - expressed in the TreeNodeInfo (gtLsraInfo) on each node, which includes: + expressed in the TreeNodeInfo computed for each node, which includes: - The number of register sources and destinations. - The register restrictions (candidates) of the target register, both from itself, as producer of the value (dstCandidates), and from its consuming node (srcCandidates). @@ -787,9 +787,7 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp) regMaskTP newAssignment = (prevAssignment & rp->registerAssignment); if (newAssignment != RBM_NONE) { - if (!isSingleRegister(newAssignment) || - (!theInterval->hasNonCommutativeRMWDef && (prevRefPosition->treeNode != nullptr) && - !prevRefPosition->treeNode->gtLsraInfo.isInternalRegDelayFree)) + if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) { prevRefPosition->registerAssignment = newAssignment; } @@ -1762,9 +1760,6 @@ BasicBlock* LinearScan::getNextBlock() // Return Value: // None. // -// Assumptions: -// Lowering must have set the NodeInfo (gtLsraInfo) on each node to communicate -// the register requirements. void LinearScan::doLinearScan() { @@ -2842,7 +2837,6 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) // usual kill location which is the same as the defs at tree loc+1. // Note that we don't have to add interference for the live vars, because that // will be done below, and is not sensitive to the precise location. - LsraLocation currentLoc = tree->gtLsraInfo.loc; assert(currentLoc != 0); addRefsForPhysRegMask(RBM_RDX, currentLoc, RefTypeKill, true); // Both RAX and RDX are killed by the operation @@ -3101,7 +3095,6 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo // DEBUG only arg. RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, - LsraLocation currentLoc, regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount)) { Interval* current = newInterval(regType); @@ -3115,7 +3108,6 @@ RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, // // Arguments: // tree - Gentree node that needs internal registers -// currentLoc - Location at which Def positions need to be defined // temps - in-out array which is populated with ref positions // created for Def of internal registers // minRegCandidateCount - Minimum registers to be ensured in candidate @@ -3124,14 +3116,14 @@ RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, // // Returns: // The total number of Def positions created for internal registers of tree node. -int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, - LsraLocation currentLoc, - RefPosition* temps[] // populates +int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, + TreeNodeInfo* info, + RefPosition* temps[] // populates DEBUGARG(unsigned minRegCandidateCount)) { int count; - int internalIntCount = tree->gtLsraInfo.internalIntCount; - regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this); + int internalIntCount = info->internalIntCount; + regMaskTP internalCands = info->getInternalCandidates(this); // If the number of internal integer registers required is the same as the number of candidate integer registers in // the candidate set, then they must be handled as fixed registers. @@ -3151,16 +3143,15 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, internalIntCands = genFindLowestBit(internalIntCands); internalCands &= ~internalIntCands; } - temps[count] = - defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands DEBUG_ARG(minRegCandidateCount)); + temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands DEBUG_ARG(minRegCandidateCount)); } - int internalFloatCount = tree->gtLsraInfo.internalFloatCount; + int internalFloatCount = info->internalFloatCount; for (int i = 0; i < internalFloatCount; i++) { regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates()); temps[count++] = - defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands DEBUG_ARG(minRegCandidateCount)); + defineNewInternalTemp(tree, FloatRegisterType, internalFPCands DEBUG_ARG(minRegCandidateCount)); } assert(count < MaxInternalRegisters); @@ -3174,7 +3165,6 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, // // Arguments: // tree - Gentree node that needs internal registers -// currentLoc - Location at which Use positions need to be defined // defs - int array containing Def positions of internal // registers. // total - Total number of Def positions in 'defs' array. @@ -3184,9 +3174,9 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, // // Returns: // Void. -void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, - LsraLocation currentLoc, - RefPosition* defs[], +void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, + TreeNodeInfo* info, + RefPosition* defs[], int total DEBUGARG(unsigned minRegCandidateCount)) { assert(total < MaxInternalRegisters); @@ -3207,7 +3197,7 @@ void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0 DEBUG_ARG(minRegCandidateCount)); - if (tree->gtLsraInfo.isInternalRegDelayFree) + if (info->isInternalRegDelayFree) { newest->delayRegFree = true; } @@ -3215,146 +3205,11 @@ void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, } } -regMaskTP LinearScan::getUseCandidates(GenTree* useNode) -{ - TreeNodeInfo info = useNode->gtLsraInfo; - return info.getSrcCandidates(this); -} - -regMaskTP LinearScan::getDefCandidates(GenTree* tree) -{ - TreeNodeInfo info = tree->gtLsraInfo; - return info.getDstCandidates(this); -} - RegisterType LinearScan::getDefType(GenTree* tree) { return tree->TypeGet(); } -//------------------------------------------------------------------------ -// LocationInfoListNode: used to store a single `LocationInfo` value for a -// node during `buildIntervals`. -// -// This is the node type for `LocationInfoList` below. -// -class LocationInfoListNode final : public LocationInfo -{ - friend class LocationInfoList; - friend class LocationInfoListNodePool; - - LocationInfoListNode* m_next; // The next node in the list - -public: - LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx) - { - } - - //------------------------------------------------------------------------ - // LocationInfoListNode::Next: Returns the next node in the list. - LocationInfoListNode* Next() const - { - return m_next; - } -}; - -//------------------------------------------------------------------------ -// LocationInfoList: used to store a list of `LocationInfo` values for a -// node during `buildIntervals`. -// -// Given an IR node that either directly defines N registers or that is a -// contained node with uses that define a total of N registers, that node -// will map to N `LocationInfo` values. These values are stored as a -// linked list of `LocationInfoListNode` values. -// -class LocationInfoList final -{ - friend class LocationInfoListNodePool; - - LocationInfoListNode* m_head; // The head of the list - LocationInfoListNode* m_tail; // The tail of the list - -public: - LocationInfoList() : m_head(nullptr), m_tail(nullptr) - { - } - - LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node) - { - assert(m_head->m_next == nullptr); - } - - //------------------------------------------------------------------------ - // LocationInfoList::IsEmpty: Returns true if the list is empty. - // - bool IsEmpty() const - { - return m_head == nullptr; - } - - //------------------------------------------------------------------------ - // LocationInfoList::Begin: Returns the first node in the list. - // - LocationInfoListNode* Begin() const - { - return m_head; - } - - //------------------------------------------------------------------------ - // LocationInfoList::End: Returns the position after the last node in the - // list. The returned value is suitable for use as - // a sentinel for iteration. - // - LocationInfoListNode* End() const - { - return nullptr; - } - - //------------------------------------------------------------------------ - // LocationInfoList::Append: Appends a node to the list. - // - // Arguments: - // node - The node to append. Must not be part of an existing list. - // - void Append(LocationInfoListNode* node) - { - assert(node->m_next == nullptr); - - if (m_tail == nullptr) - { - assert(m_head == nullptr); - m_head = node; - } - else - { - m_tail->m_next = node; - } - - m_tail = node; - } - - //------------------------------------------------------------------------ - // LocationInfoList::Append: Appends another list to this list. - // - // Arguments: - // other - The list to append. - // - void Append(LocationInfoList other) - { - if (m_tail == nullptr) - { - assert(m_head == nullptr); - m_head = other.m_head; - } - else - { - m_tail->m_next = other.m_head; - } - - m_tail = other.m_tail; - } -}; - //------------------------------------------------------------------------ // LocationInfoListNodePool: manages a pool of `LocationInfoListNode` // values to decrease overall memory usage @@ -3428,11 +3283,10 @@ public: m_freeList = head->m_next; } - head->loc = l; - head->interval = i; - head->treeNode = t; - head->multiRegIdx = regIdx; - head->m_next = nullptr; + head->loc = l; + head->interval = i; + head->treeNode = t; + head->m_next = nullptr; return head; } @@ -3452,6 +3306,9 @@ public: LocationInfoListNode* head = m_freeList; list.m_tail->m_next = head; m_freeList = list.m_head; + + list.m_head = nullptr; + list.m_tail = nullptr; } }; @@ -3541,51 +3398,43 @@ void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree, // static int ComputeOperandDstCount(GenTree* operand) { - TreeNodeInfo& operandInfo = operand->gtLsraInfo; - - if (operandInfo.isLocalDefUse) + // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though + // note that it is not in the linear order. It seems best to check for !IsLIR() rather than + // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method. + if (!operand->IsLIR()) { - // Operands that define an unused value do not produce any registers. return 0; } - else if (operandInfo.dstCount != 0) + if (operand->isContained()) { - // Operands that have a specified number of destination registers consume all of their operands - // and therefore produce exactly that number of registers. - return operandInfo.dstCount; - } - else if (operandInfo.srcCount != 0) - { - // If an operand has no destination registers but does have source registers, it must be a store - // or a compare. - assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || - operand->OperIsCompare() || operand->OperIs(GT_CMP, GT_JCMP) || operand->IsSIMDEqualityOrInequality()); - return 0; + int dstCount = 0; + for (GenTree* op : operand->Operands()) + { + dstCount += ComputeOperandDstCount(op); + } + + return dstCount; } - else if (!operand->OperIsFieldListHead() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID)) + if (operand->IsUnusedValue()) { - // Stores and void-typed operands may be encountered when processing call nodes, which contain - // pointers to argument setup stores. + // Operands that define an unused value do not produce any registers. return 0; } - else if (operand->OperIsPutArgStk()) + if (operand->IsValue()) { - // A PUTARG_STK argument is an operand of a call, but is neither contained, nor does it produce - // a result. - assert(!operand->isContained()); - return 0; + // Operands that are values and are not contained consume all of their operands + // and produce one or more registers. + return operand->GetRegisterDstCount(); } else { - // If a field list or non-void-typed operand is not an unused value and does not have source registers, - // that argument is contained within its parent and produces `sum(operand_dst_count)` registers. - int dstCount = 0; - for (GenTree* op : operand->Operands()) - { - dstCount += ComputeOperandDstCount(op); - } - - return dstCount; + // This must be one of the operand types that are neither contained nor produce a value. + // Stores and void-typed operands may be encountered when processing call nodes, which contain + // pointers to argument setup stores. + assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || + operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || + operand->TypeGet() == TYP_VOID); + return 0; } } @@ -3614,21 +3463,10 @@ static int ComputeAvailableSrcCount(GenTree* node) } #endif // DEBUG -static GenTree* GetFirstOperand(GenTree* node) -{ - GenTree* firstOperand = nullptr; - node->VisitOperands([&firstOperand](GenTree* operand) -> GenTree::VisitResult { - firstOperand = operand; - return GenTree::VisitResult::Abort; - }); - return firstOperand; -} - void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LocationInfoListNodePool& listNodePool, - HashTableBase& operandToLocationInfoMap, - LsraLocation currentLoc) + LsraLocation currentLoc) { #ifdef _TARGET_ARM_ assert(!isRegPairType(tree->TypeGet())); @@ -3647,21 +3485,54 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // gtRsvdRegs register mask. Clear it out. tree->gtRsvdRegs = RBM_NONE; - TreeNodeInfo info = tree->gtLsraInfo; - assert(info.IsValid(this)); - int consume = info.srcCount; - int produce = info.dstCount; +#ifdef DEBUG + if (VERBOSE) + { + dumpOperandToLocationInfoMap(); + compiler->gtDispTree(tree, nullptr, nullptr, true); + } +#endif // DEBUG + + // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will + // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will + // be thrown away when we're done. + LocationInfoListNode* locationInfo = nullptr; + TreeNodeInfo tempInfo; + TreeNodeInfo* info = nullptr; + if (!tree->isContained() && tree->IsValue()) + { + locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree); + info = &locationInfo->info; + } + else + { + info = &tempInfo; + } + info->Initialize(this, tree); + TreeNodeInfoInit(tree, info); + +#ifdef DEBUG + if (VERBOSE) + { + printf(" +"); + info->dump(this); + tree->dumpLIRFlags(); + printf("\n"); + } +#endif // DEBUG + + assert(info->IsValid(this)); + int consume = info->srcCount; + int produce = info->dstCount; #ifdef DEBUG if (VERBOSE) { - lsraDispNode(tree, LSRA_DUMP_REFPOS, (produce != 0)); - JITDUMP("\n"); if (tree->isContained()) { JITDUMP("Contained\n"); } - else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && info.isLocalDefUse) + else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && info->isLocalDefUse) { JITDUMP("Unused\n"); } @@ -3669,27 +3540,6 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, { JITDUMP(" consume=%d produce=%d\n", consume, produce); } - - if (consume != 0) - { - JITDUMP("at start of tree, map contains: { "); - bool first = true; - for (auto kvp : operandToLocationInfoMap) - { - GenTree* node = kvp.Key(); - LocationInfoList defList = kvp.Value(); - - JITDUMP("%sN%03u. %s -> (", first ? "" : "; ", node->gtSeqNum, GenTree::OpName(node->OperGet())); - for (LocationInfoListNode *def = defList.Begin(), *end = defList.End(); def != end; def = def->Next()) - { - JITDUMP("%s%d.N%03u", def == defList.Begin() ? "" : ", ", def->loc, def->treeNode->gtSeqNum); - } - JITDUMP(")"); - - first = false; - } - JITDUMP(" }\n"); - } } #endif // DEBUG @@ -3728,55 +3578,18 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); } - if (!info.isLocalDefUse && !tree->isContained()) + if (!info->isLocalDefUse && !tree->isContained()) { assert(produce != 0); - LocationInfoList list(listNodePool.GetNode(currentLoc, getIntervalForLocalVar(varIndex), tree)); - bool added = operandToLocationInfoMap.AddOrUpdate(tree, list); + locationInfo->interval = getIntervalForLocalVar(varIndex); + bool added = operandToLocationInfoMap->AddOrUpdate(tree, locationInfo); assert(added); - - tree->gtLsraInfo.definesAnyRegisters = true; } return; } } - if (tree->isContained()) - { - assert(!info.isLocalDefUse); - assert(consume == 0); - assert(produce == 0); - assert(info.internalIntCount == 0); - assert(info.internalFloatCount == 0); - - // Contained nodes map to the concatenated lists of their operands. - LocationInfoList locationInfoList; - tree->VisitOperands([&](GenTree* op) -> GenTree::VisitResult { - if (!op->gtLsraInfo.definesAnyRegisters) - { - assert(ComputeOperandDstCount(op) == 0); - return GenTree::VisitResult::Continue; - } - - LocationInfoList operandList; - bool removed = operandToLocationInfoMap.TryRemove(op, &operandList); - assert(removed); - - locationInfoList.Append(operandList); - return GenTree::VisitResult::Continue; - }); - - if (!locationInfoList.IsEmpty()) - { - bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList); - assert(added); - tree->gtLsraInfo.definesAnyRegisters = true; - } - JITDUMP("\n"); - return; - } - // Handle the case of local variable assignment Interval* varDefInterval = nullptr; RefType defRefType = RefTypeDef; @@ -3785,9 +3598,9 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // noAdd means the node creates a def but for purposes of map // management do not add it because data is not flowing up the - // tree but over (as in ASG nodes) + // tree - bool noAdd = info.isLocalDefUse; + bool noAdd = info->isLocalDefUse; RefPosition* prevPos = nullptr; bool isSpecialPutArg = false; @@ -3817,15 +3630,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, if (consume == 1) { // Get the location info for the register defined by the first operand. - LocationInfoList operandDefs; - bool found = operandToLocationInfoMap.TryGetValue(GetFirstOperand(tree), &operandDefs); - assert(found); - - // Since we only expect to consume one register, we should only have a single register to - // consume. - assert(operandDefs.Begin()->Next() == operandDefs.End()); - - LocationInfo& operandInfo = *static_cast(operandDefs.Begin()); + LocationInfoListNode& operandInfo = *(useList.Begin()); + assert(operandInfo.treeNode == tree->gtGetOp1()); Interval* srcInterval = operandInfo.interval; if (srcInterval->relatedInterval == nullptr) @@ -3855,14 +3661,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet(); // Get the location info for the register defined by the first operand. - LocationInfoList operandDefs; - bool found = operandToLocationInfoMap.TryGetValue(GetFirstOperand(store), &operandDefs); - assert(found); - - // Since we only expect to consume one register, we should only have a single register to consume. - assert(operandDefs.Begin()->Next() == operandDefs.End()); - - LocationInfo& operandInfo = *static_cast(operandDefs.Begin()); + LocationInfoListNode& operandInfo = *(useList.Begin()); + assert(operandInfo.treeNode == tree->gtGetOp1()); Interval* srcInterval = operandInfo.interval; srcInterval->registerType = regType(store->TypeGet()); @@ -3873,23 +3673,15 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, assert(srcDefPosition->treeNode == store->gtOp1); srcDefPosition->registerAssignment = allRegs(store->TypeGet()); - store->gtOp1->gtLsraInfo.setSrcCandidates(this, allRegs(store->TypeGet())); + operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet())); } } else if (noAdd && produce == 0) { - // This is the case for dead nodes that occur after - // tree rationalization + // Dead nodes may remain after tree rationalization, decomposition or lowering. + // They should be marked as UnusedValue. // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation. - if (tree->IsMultiRegCall()) - { - // In case of multi-reg call node, produce = number of return registers - produce = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); - } - else - { - produce = 1; - } + assert(!noAdd || (produce != 0)); } Interval* prefSrcInterval = nullptr; @@ -3897,7 +3689,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // If this is a binary operator that will be encoded with 2 operand fields // (i.e. the target is read-modify-write), preference the dst to op1. - bool hasDelayFreeSrc = tree->gtLsraInfo.hasDelayFreeSrc; + bool hasDelayFreeSrc = info->hasDelayFreeSrc; #if defined(DEBUG) && defined(_TARGET_X86_) // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode @@ -3951,12 +3743,11 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n"); // Get the register information for the first operand of the node. - LocationInfoList operandDefs; - bool found = operandToLocationInfoMap.TryGetValue(GetFirstOperand(tree), &operandDefs); - assert(found); + LocationInfoListNode* operandDef = useList.Begin(); + assert(operandDef->treeNode == tree->gtGetOp1()); // Preference the destination to the interval of the first register defined by the first operand. - Interval* srcInterval = operandDefs.Begin()->interval; + Interval* srcInterval = operandDef->interval; assert(srcInterval->isLocalVar); prefSrcInterval = srcInterval; isSpecialPutArg = true; @@ -3969,121 +3760,126 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // consume + produce + internalCount. This is the minimum // set of registers that needs to be ensured in candidate // set of ref positions created. - unsigned minRegCount = consume + produce + info.internalIntCount + info.internalFloatCount; + unsigned minRegCount = consume + produce + info->internalIntCount + info->internalFloatCount; #endif // DEBUG - // make intervals for all the 'internal' register requirements for this node - // where internal means additional registers required temporarily - int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs DEBUG_ARG(minRegCount)); + // Make intervals for all the 'internal' register requirements for this node, + // where internal means additional registers required temporarily. + // Create a RefTypeDef RefPosition for each such interval. + int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs DEBUG_ARG(minRegCount)); - // pop all ref'd tree temps - tree->VisitOperands([&](GenTree* operand) -> GenTree::VisitResult { - // Skip operands that do not define any registers, whether directly or indirectly. - if (!operand->gtLsraInfo.definesAnyRegisters) - { - return GenTree::VisitResult::Continue; - } - - // Remove the list of registers defined by the current operand from the map. Note that this - // is only correct because tree nodes are singly-used: if this property ever changes (e.g. - // if tree nodes are eventually allowed to be multiply-used), then the removal is only - // correct at the last use. - LocationInfoList operandDefs; - bool removed = operandToLocationInfoMap.TryRemove(operand, &operandDefs); - assert(removed); - assert(!operandDefs.IsEmpty()); + // Make use RefPositions for all used values. + int consumed = 0; + for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end; + listNode = listNode->Next()) + { + LocationInfo& locInfo = *static_cast(listNode); -#ifdef _TARGET_ARM_ - regMaskTP currCandidates = RBM_NONE; -#endif // _TARGET_ARM_ + // For tree temps, a use is always a last use and the end of the range; + // this is set by default in newRefPosition + GenTree* const useNode = locInfo.treeNode; + assert(useNode != nullptr); - LocationInfoListNode* const operandDefsEnd = operandDefs.End(); - for (LocationInfoListNode* operandDefsIterator = operandDefs.Begin(); operandDefsIterator != operandDefsEnd; - operandDefsIterator = operandDefsIterator->Next()) + Interval* srcInterval = locInfo.interval; + TreeNodeInfo& useNodeInfo = locInfo.info; + if (useNodeInfo.isTgtPref) { - LocationInfo& locInfo = *static_cast(operandDefsIterator); - - // for interstitial tree temps, a use is always last and end; this is set by default in newRefPosition - GenTree* const useNode = locInfo.treeNode; - assert(useNode != nullptr); - - Interval* const i = locInfo.interval; - if (useNode->gtLsraInfo.isTgtPref) - { - prefSrcInterval = i; - } + prefSrcInterval = srcInterval; + } - const bool delayRegFree = (hasDelayFreeSrc && useNode->gtLsraInfo.isDelayFree); + const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree); #ifdef DEBUG - // If delayRegFree, then Use will interfere with the destination of - // the consuming node. Therefore, we also need add the kill set of - // consuming node to minRegCount. - // - // For example consider the following IR on x86, where v01 and v02 - // are method args coming in ecx and edx respectively. - // GT_DIV(v01, v02) - // - // For GT_DIV minRegCount will be 3 without adding kill set - // of GT_DIV node. - // - // Assume further JitStressRegs=2, which would constrain - // candidates to callee trashable regs { eax, ecx, edx } on - // use positions of v01 and v02. LSRA allocates ecx for v01. - // Use position of v02 cannot be allocated a regs since it - // is marked delay-reg free and {eax,edx} are getting killed - // before the def of GT_DIV. For this reason, minRegCount - // for Use position of v02 also needs to take into account - // of kill set of its consuming node. - unsigned minRegCountForUsePos = minRegCount; - if (delayRegFree && (lsraStressMask != 0)) + // If delayRegFree, then Use will interfere with the destination of + // the consuming node. Therefore, we also need add the kill set of + // consuming node to minRegCount. + // + // For example consider the following IR on x86, where v01 and v02 + // are method args coming in ecx and edx respectively. + // GT_DIV(v01, v02) + // + // For GT_DIV minRegCount will be 3 without adding kill set + // of GT_DIV node. + // + // Assume further JitStressRegs=2, which would constrain + // candidates to callee trashable regs { eax, ecx, edx } on + // use positions of v01 and v02. LSRA allocates ecx for v01. + // Use position of v02 cannot be allocated a regs since it + // is marked delay-reg free and {eax,edx} are getting killed + // before the def of GT_DIV. For this reason, minRegCount + // for Use position of v02 also needs to take into account + // of kill set of its consuming node. + unsigned minRegCountForUsePos = minRegCount; + if (delayRegFree && (lsraStressMask != 0)) + { + regMaskTP killMask = getKillSetForNode(tree); + if (killMask != RBM_NONE) { - regMaskTP killMask = getKillSetForNode(tree); - if (killMask != RBM_NONE) - { - minRegCountForUsePos += genCountBits(killMask); - } + minRegCountForUsePos += genCountBits(killMask); } + } #endif // DEBUG - regMaskTP candidates = getUseCandidates(useNode); + regMaskTP candidates = useNodeInfo.getSrcCandidates(this); #ifdef _TARGET_ARM_ - if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp()) - { - // get i-th candidate, set bits in useCandidates must be in sequential order. - candidates = genFindLowestReg(candidates & ~currCandidates); - currCandidates |= candidates; - } + regMaskTP allCandidates = candidates; + + if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp()) + { + // get i-th candidate, set bits in useCandidates must be in sequential order. + candidates = genFindLowestReg(allCandidates); + allCandidates &= ~candidates; + } #endif // _TARGET_ARM_ - assert((candidates & allRegs(i->registerType)) != 0); + assert((candidates & allRegs(srcInterval->registerType)) != 0); - // For non-localVar uses we record nothing, as nothing needs to be written back to the tree. - GenTree* const refPosNode = i->isLocalVar ? useNode : nullptr; - RefPosition* pos = newRefPosition(i, currentLoc, RefTypeUse, refPosNode, candidates, - locInfo.multiRegIdx DEBUG_ARG(minRegCountForUsePos)); + // For non-localVar uses we record nothing, as nothing needs to be written back to the tree. + GenTree* const refPosNode = srcInterval->isLocalVar ? useNode : nullptr; + RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, + 0 DEBUG_ARG(minRegCountForUsePos)); + if (delayRegFree) + { + pos->delayRegFree = true; + } - if (delayRegFree) - { - pos->delayRegFree = true; - } + if (useNode->IsRegOptional()) + { + pos->setAllocateIfProfitable(true); + } + consumed++; - if (useNode->IsRegOptional()) + // Create additional use RefPositions for multi-reg nodes. + for (int idx = 1; idx < locInfo.info.dstCount; idx++) + { + noway_assert(srcInterval->relatedInterval != nullptr); + srcInterval = srcInterval->relatedInterval; +#ifdef _TARGET_ARM_ + if (useNode->OperIsPutArgSplit() || + (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST))) { - pos->setAllocateIfProfitable(true); + // get first candidate, set bits in useCandidates must be in sequential order. + candidates = genFindLowestReg(allCandidates); + allCandidates &= ~candidates; } +#endif // _TARGET_ARM_ + RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, + idx DEBUG_ARG(minRegCountForUsePos)); + consumed++; } + } - listNodePool.ReturnNodes(operandDefs); - - return GenTree::VisitResult::Continue; - }); + assert(consumed == consume); + if (consume != 0) + { + listNodePool.ReturnNodes(useList); + } - buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount DEBUG_ARG(minRegCount)); + buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount DEBUG_ARG(minRegCount)); RegisterType registerType = getDefType(tree); - regMaskTP candidates = getDefCandidates(tree); - regMaskTP useCandidates = getUseCandidates(tree); + regMaskTP candidates = info->getDstCandidates(this); + regMaskTP useCandidates = info->getSrcCandidates(this); #ifdef DEBUG if (VERBOSE && produce) @@ -4126,10 +3922,13 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // push defs LocationInfoList locationInfoList; LsraLocation defLocation = currentLoc + 1; + Interval* interval = varDefInterval; + // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field. + // Keep track of the previous interval allocated, for that purpose. + Interval* prevInterval = nullptr; for (int i = 0; i < produce; i++) { regMaskTP currCandidates = candidates; - Interval* interval = varDefInterval; // In case of multi-reg call node, registerType is given by // the type of ith position return register. @@ -4154,9 +3953,9 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, { // Make a new interval interval = newInterval(registerType); - if (hasDelayFreeSrc) + if (hasDelayFreeSrc || info->isInternalRegDelayFree) { - interval->hasNonCommutativeRMWDef = true; + interval->hasInterferingUses = true; } else if (tree->OperIsConst()) { @@ -4188,12 +3987,28 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // but not push it if (!noAdd) { - locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned)i)); + if (i == 0) + { + locationInfo->interval = interval; + prevInterval = interval; + bool added = operandToLocationInfoMap->AddOrUpdate(tree, locationInfo); + assert(added); + } + else + { + // This is the 2nd or subsequent register defined by a multi-reg node. + // Connect them using 'relatedInterval'. + noway_assert((prevInterval != nullptr) && (prevInterval->relatedInterval == nullptr)); + prevInterval->relatedInterval = interval; + prevInterval = interval; + prevInterval->isMultiReg = true; + interval->isMultiReg = true; + } } RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i DEBUG_ARG(minRegCount)); - if (info.isLocalDefUse) + if (info->isLocalDefUse) { // This must be an unused value, OR it is a special node for which we allocate // a target register even though it produces no value. @@ -4203,6 +4018,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, } interval->updateRegisterPreferences(currCandidates); interval->updateRegisterPreferences(useCandidates); + interval = nullptr; } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -4213,12 +4029,6 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (!locationInfoList.IsEmpty()) - { - bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList); - assert(added); - tree->gtLsraInfo.definesAnyRegisters = true; - } JITDUMP("\n"); } @@ -4578,7 +4388,7 @@ void LinearScan::buildIntervals() // second part: JITDUMP("\nbuildIntervals second part ========\n"); - LsraLocation currentLoc = 0; + currentLoc = 0; // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering. // Previously, the block sequencing was done for the (formerly separate) TreeNodeInfoInit pass, @@ -4695,7 +4505,8 @@ void LinearScan::buildIntervals() } LocationInfoListNodePool listNodePool(compiler, 8); - SmallHashTable operandToLocationInfoMap(compiler); + OperandToLocationInfoMap theOperandToLocationInfoMap(compiler); + operandToLocationInfoMap = &theOperandToLocationInfoMap; BasicBlock* predBlock = nullptr; BasicBlock* prevBlock = nullptr; @@ -4803,30 +4614,7 @@ void LinearScan::buildIntervals() node->gtRegNum = node->gtRegNum; #endif - node->gtLsraInfo.Initialize(this, node, currentLoc); - - TreeNodeInfoInit(node); - - // If the node produces an unused value, mark it as a local def-use - if (node->IsValue() && node->IsUnusedValue()) - { - node->gtLsraInfo.isLocalDefUse = true; - node->gtLsraInfo.dstCount = 0; - } - -#ifdef DEBUG - if (VERBOSE) - { - compiler->gtDispTree(node, nullptr, nullptr, true); - printf(" +"); - node->gtLsraInfo.dump(this); - } -#endif // DEBUG - - // Only nodes that produce values should have a non-zero dstCount. - assert((node->gtLsraInfo.dstCount == 0) || node->IsValue()); - - buildRefPositionsForNode(node, block, listNodePool, operandToLocationInfoMap, currentLoc); + buildRefPositionsForNode(node, block, listNodePool, currentLoc); #ifdef DEBUG if (currentLoc > maxNodeLocation) @@ -4839,6 +4627,7 @@ void LinearScan::buildIntervals() // Note: the visited set is cleared in LinearScan::doLinearScan() markBlockVisited(block); + assert(operandToLocationInfoMap->Count() == 0); if (enregisterLocalVars) { @@ -5483,8 +5272,10 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* RefPosition* nextRelatedRefPosition = relatedInterval->getNextRefPosition(); if (nextRelatedRefPosition != nullptr) { - // Don't use the relatedInterval for preferencing if its next reference is not a new definition. - if (!RefTypeIsDef(nextRelatedRefPosition->refType)) + // Don't use the relatedInterval for preferencing if its next reference is not a new definition, + // or if it is only related because they are multi-reg targets of the same node. + if (!RefTypeIsDef(nextRelatedRefPosition->refType) || + isMultiRegRelated(nextRelatedRefPosition, refPosition->nodeLocation)) { relatedInterval = nullptr; } @@ -7694,6 +7485,18 @@ bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType) return isFree; } +// isMultiRegRelated: is this RefPosition defining part of a multi-reg value +// at the given location? +// +bool LinearScan::isMultiRegRelated(RefPosition* refPosition, LsraLocation location) +{ +#ifdef FEATURE_MULTIREG_ARGS_OR_RET + return ((refPosition->nodeLocation == location) && refPosition->getInterval()->isMultiReg); +#else + return false; +#endif +} + //------------------------------------------------------------------------ // LinearScan::freeRegister: Make a register available for use // @@ -8981,9 +8784,8 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree); assert(refPosition->registerAssignment != RBM_NONE); + SetLsraAdded(newNode); newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx); - newNode->gtLsraInfo.isLsraAdded = true; - newNode->gtLsraInfo.isLocalDefUse = false; if (refPosition->copyReg) { // This is a TEMPORARY copy @@ -9030,16 +8832,15 @@ void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* re // First, insert the save before the call. - GenTreePtr saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); - saveLcl->gtLsraInfo.isLsraAdded = true; - saveLcl->gtRegNum = lclVarReg; - saveLcl->gtLsraInfo.isLocalDefUse = false; + GenTreePtr saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); + saveLcl->gtRegNum = lclVarReg; + SetLsraAdded(saveLcl); GenTreeSIMD* simdNode = new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave, varDsc->lvBaseType, genTypeSize(varDsc->lvType)); - simdNode->gtLsraInfo.isLsraAdded = true; - simdNode->gtRegNum = spillReg; + SetLsraAdded(simdNode); + simdNode->gtRegNum = spillReg; if (spillToMem) { simdNode->gtFlags |= GTF_SPILL; @@ -9049,15 +8850,14 @@ void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* re // Now insert the restore after the call. - GenTreePtr restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); - restoreLcl->gtLsraInfo.isLsraAdded = true; - restoreLcl->gtRegNum = lclVarReg; - restoreLcl->gtLsraInfo.isLocalDefUse = false; + GenTreePtr restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); + restoreLcl->gtRegNum = lclVarReg; + SetLsraAdded(restoreLcl); simdNode = new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore, varDsc->lvBaseType, genTypeSize(varDsc->lvType)); - simdNode->gtLsraInfo.isLsraAdded = true; - simdNode->gtRegNum = spillReg; + simdNode->gtRegNum = spillReg; + SetLsraAdded(simdNode); if (spillToMem) { simdNode->gtFlags |= GTF_SPILLED; @@ -9487,9 +9287,6 @@ void LinearScan::resolveRegisters() continue; } - LsraLocation loc = treeNode->gtLsraInfo.loc; - assert(treeNode->IsLocal() || currentLocation == loc || currentLocation == loc + 1); - if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal) { treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment; @@ -9813,8 +9610,8 @@ void LinearScan::insertMove( // This var can't be marked lvRegister now varDsc->lvRegNum = REG_STK; - GenTreePtr src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet()); - src->gtLsraInfo.isLsraAdded = true; + GenTreePtr src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet()); + SetLsraAdded(src); // There are three cases we need to handle: // - We are loading a lclVar from the stack. @@ -9848,12 +9645,10 @@ void LinearScan::insertMove( // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag. // Note that if src is itself a lastUse, this will have no effect. dst->gtFlags &= ~(GTF_VAR_DEATH); - src->gtRegNum = fromReg; - dst->gtRegNum = toReg; - src->gtLsraInfo.isLocalDefUse = false; - dst->gtLsraInfo.isLsraAdded = true; + src->gtRegNum = fromReg; + dst->gtRegNum = toReg; + SetLsraAdded(dst); } - dst->gtLsraInfo.isLocalDefUse = true; dst->SetUnusedValue(); LIR::Range treeRange = LIR::SeqTree(compiler, dst); @@ -9905,20 +9700,17 @@ void LinearScan::insertSwap( LclVarDsc* varDsc2 = compiler->lvaTable + lclNum2; assert(reg1 != REG_STK && reg1 != REG_NA && reg2 != REG_STK && reg2 != REG_NA); - GenTreePtr lcl1 = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet()); - lcl1->gtLsraInfo.isLsraAdded = true; - lcl1->gtLsraInfo.isLocalDefUse = false; - lcl1->gtRegNum = reg1; + GenTreePtr lcl1 = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet()); + lcl1->gtRegNum = reg1; + SetLsraAdded(lcl1); - GenTreePtr lcl2 = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet()); - lcl2->gtLsraInfo.isLsraAdded = true; - lcl2->gtLsraInfo.isLocalDefUse = false; - lcl2->gtRegNum = reg2; + GenTreePtr lcl2 = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet()); + lcl2->gtRegNum = reg2; + SetLsraAdded(lcl2); - GenTreePtr swap = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2); - swap->gtLsraInfo.isLsraAdded = true; - swap->gtLsraInfo.isLocalDefUse = false; - swap->gtRegNum = REG_NA; + GenTreePtr swap = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2); + swap->gtRegNum = REG_NA; + SetLsraAdded(swap); lcl1->gtNext = lcl2; lcl2->gtPrev = lcl1; @@ -11025,7 +10817,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, } //------------------------------------------------------------------------ -// GetIndirSourceCount: Get the source registers for an indirection that might be contained. +// GetIndirInfo: Get the source registers for an indirection that might be contained. // // Arguments: // node - The node of interest @@ -11033,11 +10825,15 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::GetIndirSourceCount(GenTreeIndir* indirTree) +// Notes: +// Adds the defining node for each register to the useList. +// +int LinearScan::GetIndirInfo(GenTreeIndir* indirTree) { GenTree* const addr = indirTree->gtOp1; if (!addr->isContained()) { + appendLocationInfoToList(addr); return 1; } if (!addr->OperIs(GT_LEA)) @@ -11050,19 +10846,94 @@ int LinearScan::GetIndirSourceCount(GenTreeIndir* indirTree) unsigned srcCount = 0; if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) { + appendLocationInfoToList(addrMode->Base()); srcCount++; } - if (addrMode->Index() != nullptr) + if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) { - // We never have a contained index. - assert(!addrMode->Index()->isContained()); + appendLocationInfoToList(addrMode->Index()); srcCount++; } return srcCount; } -void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation location) +//------------------------------------------------------------------------ +// GetOperandInfo: Get the source registers for an operand that might be contained. +// +// Arguments: +// node - The node of interest +// useList - The list of uses for the node that we're currently processing +// +// Return Value: +// The number of source registers used by the *parent* of this node. +// +// Notes: +// Adds the defining node for each register to the given useList. +// +int LinearScan::GetOperandInfo(GenTree* node) { + if (!node->isContained()) + { + appendLocationInfoToList(node); + return 1; + } + +#if !defined(_TARGET_64BIT_) + if (node->OperIs(GT_LONG)) + { + return appendBinaryLocationInfoToList(node->AsOp()); + } +#endif // !defined(_TARGET_64BIT_) + if (node->OperIsIndir()) + { + const unsigned srcCount = GetIndirInfo(node->AsIndir()); + return srcCount; + } + + return 0; +} + +//------------------------------------------------------------------------ +// GetOperandInfo: Get the source registers for an operand that might be contained. +// +// Arguments: +// node - The node of interest +// useList - The list of uses for the node that we're currently processing +// +// Return Value: +// The number of source registers used by the *parent* of this node. +// +// Notes: +// Adds the defining node for each register to the useList. +// +int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo) +{ + LocationInfoListNode* prevLast = useList.Last(); + int srcCount = GetOperandInfo(node); + if (prevLast == nullptr) + { + *pFirstInfo = useList.Begin(); + } + else + { + *pFirstInfo = prevLast->Next(); + } + return srcCount; +} + +void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node) +{ + _dstCount = 0; + _srcCount = 0; + _internalIntCount = 0; + _internalFloatCount = 0; + + isLocalDefUse = false; + isDelayFree = false; + hasDelayFreeSrc = false; + isTgtPref = false; + isInternalRegDelayFree = false; + regMaskTP dstCandidates; // if there is a reg indicated on the tree node, use that for dstCandidates @@ -11090,18 +10961,11 @@ void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation loca dstCandidates = genRegMask(node->gtRegNum); } - internalIntCount = 0; - internalFloatCount = 0; - isLocalDefUse = false; - isLsraAdded = false; - definesAnyRegisters = false; - setDstCandidates(lsra, dstCandidates); srcCandsIndex = dstCandsIndex; setInternalCandidates(lsra, lsra->allRegs(TYP_INT)); - loc = location; #ifdef DEBUG isInitialized = true; #endif @@ -11431,6 +11295,18 @@ void Interval::dump() { printf(" (struct)"); } + if (isPromotedStruct) + { + printf(" (promoted struct)"); + } + if (hasConflictingDefUse) + { + printf(" (def-use conflict)"); + } + if (hasInterferingUses) + { + printf(" (interfering uses)"); + } if (isSpecialPutArg) { printf(" (specialPutArg)"); @@ -11439,6 +11315,10 @@ void Interval::dump() { printf(" (constant)"); } + if (isMultiReg) + { + printf(" (multireg)"); + } printf(" RefPositions {"); for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr; @@ -11508,7 +11388,7 @@ void RegRecord::tinyDump() void TreeNodeInfo::dump(LinearScan* lsra) { - printf("\n"); + printf(">"); +} + +void LinearScan::dumpOperandToLocationInfoMap() +{ + JITDUMP("OperandToLocationInfoMap: { "); + bool first = true; + for (auto kvp : *operandToLocationInfoMap) + { + GenTree* node = kvp.Key(); + LocationInfoListNode* def = kvp.Value(); + + JITDUMP("%sN%03u.t%d. %s", first ? "" : "; ", node->gtSeqNum, node->gtTreeID, GenTree::OpName(node->OperGet())); + + first = false; + } + JITDUMP(" }\n"); } void LinearScan::lsraDumpIntervals(const char* msg) @@ -11721,16 +11609,17 @@ void LinearScan::DumpOperandDefs( { assert(operand != nullptr); assert(operandString != nullptr); - - if (ComputeOperandDstCount(operand) == 0) + if (!operand->IsLIR()) { return; } - if (operand->gtLsraInfo.dstCount != 0) + int dstCount = ComputeOperandDstCount(operand); + + if (dstCount != 0) { // This operand directly produces registers; print it. - for (int i = 0; i < operand->gtLsraInfo.dstCount; i++) + for (int i = 0; i < dstCount; i++) { if (!first) { @@ -11743,7 +11632,7 @@ void LinearScan::DumpOperandDefs( first = false; } } - else + else if (operand->isContained()) { // This is a contained node. Dump the defs produced by its operands. for (GenTree* op : operand->Operands()) @@ -11884,52 +11773,11 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) { GenTree* tree = node; - genTreeOps oper = tree->OperGet(); - TreeNodeInfo& info = tree->gtLsraInfo; - if (tree->gtLsraInfo.isLsraAdded) - { - // This must be one of the nodes that we add during LSRA - - if (oper == GT_LCL_VAR) - { - info.srcCount = 0; - info.dstCount = 1; - } - else if (oper == GT_RELOAD || oper == GT_COPY) - { - info.srcCount = 1; - info.dstCount = 1; - } -#ifdef FEATURE_SIMD - else if (oper == GT_SIMD) - { - if (tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperSave) - { - info.srcCount = 1; - info.dstCount = 1; - } - else - { - assert(tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore); - info.srcCount = 2; - info.dstCount = 0; - } - } -#endif // FEATURE_SIMD - else - { - assert(oper == GT_SWAP); - info.srcCount = 2; - info.dstCount = 0; - } - info.internalIntCount = 0; - info.internalFloatCount = 0; - } - - int consume = info.srcCount; - int produce = info.dstCount; - regMaskTP killMask = RBM_NONE; - regMaskTP fixedMask = RBM_NONE; + genTreeOps oper = tree->OperGet(); + int produce = tree->IsValue() ? ComputeOperandDstCount(tree) : 0; + int consume = ComputeAvailableSrcCount(tree); + regMaskTP killMask = RBM_NONE; + regMaskTP fixedMask = RBM_NONE; lsraDispNode(tree, mode, produce != 0 && mode != LSRA_DUMP_REFPOS); @@ -12046,32 +11894,6 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) } } printf("\n"); - if (info.internalIntCount != 0 && mode != LSRA_DUMP_REFPOS) - { - printf("\tinternal (%d):\t", info.internalIntCount); - if (mode == LSRA_DUMP_POST) - { - dumpRegMask(tree->gtRsvdRegs); - } - else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT)) - { - dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT)); - } - printf("\n"); - } - if (info.internalFloatCount != 0 && mode != LSRA_DUMP_REFPOS) - { - printf("\tinternal (%d):\t", info.internalFloatCount); - if (mode == LSRA_DUMP_POST) - { - dumpRegMask(tree->gtRsvdRegs); - } - else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT)) - { - dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT)); - } - printf("\n"); - } } if (enregisterLocalVars && mode == LSRA_DUMP_POST) { @@ -12575,7 +12397,7 @@ void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* curr // bool LinearScan::IsResolutionMove(GenTree* node) { - if (!node->gtLsraInfo.isLsraAdded) + if (!IsLsraAdded(node)) { return false; } @@ -12584,7 +12406,7 @@ bool LinearScan::IsResolutionMove(GenTree* node) { case GT_LCL_VAR: case GT_COPY: - return node->gtLsraInfo.isLocalDefUse; + return node->IsUnusedValue(); case GT_SWAP: return true; @@ -12612,7 +12434,7 @@ bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node) return true; } - if (!node->gtLsraInfo.isLsraAdded || (node->OperGet() != GT_LCL_VAR)) + if (!IsLsraAdded(node) || (node->OperGet() != GT_LCL_VAR)) { return false; } diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index fb58364a1e4e19ba037163b32a184f393153a216..43bc05458d1cbbd301c8e95c8b368da692d8886d 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -1,1784 +1,2056 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. -/*****************************************************************************/ - -#ifndef _LSRA_H_ -#define _LSRA_H_ - -#include "arraylist.h" -#include "smallhash.h" -#include "nodeinfo.h" - -// Minor and forward-reference types -class Interval; -class RefPosition; -class LinearScan; -class RegRecord; - -template -class ArrayStack; - -// LsraLocation tracks the linearized order of the nodes. -// Each node is assigned two LsraLocations - one for all the uses and all but the last -// def, and a second location for the last def (if any) - -typedef unsigned int LsraLocation; -const unsigned int MinLocation = 0; -const unsigned int MaxLocation = UINT_MAX; -// max number of registers an operation could require internally (in addition to uses and defs) -const unsigned int MaxInternalRegisters = 8; -const unsigned int RegisterTypeCount = 2; - -typedef var_types RegisterType; -#define IntRegisterType TYP_INT -#define FloatRegisterType TYP_FLOAT - -inline regMaskTP calleeSaveRegs(RegisterType rt) -{ - return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED; -} - -struct LocationInfo -{ - LsraLocation loc; - - // Reg Index in case of multi-reg result producing call node. - // Indicates the position of the register that this location refers to. - // The max bits needed is based on max value of MAX_RET_REG_COUNT value - // across all targets and that happens 4 on on Arm. Hence index value - // would be 0..MAX_RET_REG_COUNT-1. - unsigned multiRegIdx : 2; - - Interval* interval; - GenTree* treeNode; - - LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) - : loc(l), multiRegIdx(regIdx), interval(i), treeNode(t) - { - assert(multiRegIdx == regIdx); - } - - // default constructor for data structures - LocationInfo() - { - } -}; - -struct LsraBlockInfo -{ - // bbNum of the predecessor to use for the register location of live-in variables. - // 0 for fgFirstBB. - unsigned int predBBNum; - BasicBlock::weight_t weight; - bool hasCriticalInEdge; - bool hasCriticalOutEdge; - -#if TRACK_LSRA_STATS - // Per block maintained LSRA statistics. - - // Number of spills of local vars or tree temps in this basic block. - unsigned spillCount; - - // Number of GT_COPY nodes inserted in this basic block while allocating regs. - // Note that GT_COPY nodes are also inserted as part of basic block boundary - // resolution, which are accounted against resolutionMovCount but not - // against copyRegCount. - unsigned copyRegCount; - - // Number of resolution moves inserted in this basic block. - unsigned resolutionMovCount; - - // Number of critical edges from this block that are split. - unsigned splitEdgeCount; -#endif // TRACK_LSRA_STATS -}; - -// This is sort of a bit mask -// The low order 2 bits will be 1 for defs, and 2 for uses -enum RefType : unsigned char -{ -#define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue, -#include "lsra_reftypes.h" -#undef DEF_REFTYPE -}; - -// position in a block (for resolution) -enum BlockStartOrEnd -{ - BlockPositionStart = 0, - BlockPositionEnd = 1, - PositionCount = 2 -}; - -inline bool RefTypeIsUse(RefType refType) -{ - return ((refType & RefTypeUse) == RefTypeUse); -} - -inline bool RefTypeIsDef(RefType refType) -{ - return ((refType & RefTypeDef) == RefTypeDef); -} - -typedef regNumberSmall* VarToRegMap; - -template -class ListElementAllocator -{ -private: - template - friend class ListElementAllocator; - - Compiler* m_compiler; - -public: - ListElementAllocator(Compiler* compiler) : m_compiler(compiler) - { - } - - template - ListElementAllocator(const ListElementAllocator& other) : m_compiler(other.m_compiler) - { - } - - ElementType* allocate(size_t count) - { - return reinterpret_cast(m_compiler->compGetMem(sizeof(ElementType) * count, MemKind)); - } - - void deallocate(ElementType* pointer, size_t count) - { - } - - template - struct rebind - { - typedef ListElementAllocator allocator; - }; -}; - -typedef ListElementAllocator LinearScanMemoryAllocatorInterval; -typedef ListElementAllocator LinearScanMemoryAllocatorRefPosition; - -typedef jitstd::list IntervalList; -typedef jitstd::list RefPositionList; - -class Referenceable -{ -public: - Referenceable() - { - firstRefPosition = nullptr; - recentRefPosition = nullptr; - lastRefPosition = nullptr; - isActive = false; - } - - // A linked list of RefPositions. These are only traversed in the forward - // direction, and are not moved, so they don't need to be doubly linked - // (see RefPosition). - - RefPosition* firstRefPosition; - RefPosition* recentRefPosition; - RefPosition* lastRefPosition; - - bool isActive; - - // Get the position of the next reference which is at or greater than - // the current location (relies upon recentRefPosition being udpated - // during traversal). - RefPosition* getNextRefPosition(); - LsraLocation getNextRefLocation(); -}; - -class RegRecord : public Referenceable -{ -public: - RegRecord() - { - assignedInterval = nullptr; - previousInterval = nullptr; - regNum = REG_NA; - isCalleeSave = false; - registerType = IntRegisterType; - isBusyUntilNextKill = false; - } - - void init(regNumber reg) - { -#ifdef _TARGET_ARM64_ - // The Zero register, or the SP - if ((reg == REG_ZR) || (reg == REG_SP)) - { - // IsGeneralRegister returns false for REG_ZR and REG_SP - regNum = reg; - registerType = IntRegisterType; - } - else -#endif - if (emitter::isFloatReg(reg)) - { - registerType = FloatRegisterType; - } - else - { - // The constructor defaults to IntRegisterType - assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType); - } - regNum = reg; - isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0); - } - -#ifdef DEBUG - // print out representation - void dump(); - // concise representation for embedding - void tinyDump(); -#endif // DEBUG - - bool isFree(); - - // RefPosition * getNextRefPosition(); - // LsraLocation getNextRefLocation(); - - // DATA - - // interval to which this register is currently allocated. - // If the interval is inactive (isActive == false) then it is not currently live, - // and the register call be unassigned (i.e. setting assignedInterval to nullptr) - // without spilling the register. - Interval* assignedInterval; - // Interval to which this register was previously allocated, and which was unassigned - // because it was inactive. This register will be reassigned to this Interval when - // assignedInterval becomes inactive. - Interval* previousInterval; - - regNumber regNum; - bool isCalleeSave; - RegisterType registerType; - // This register must be considered busy until the next time it is explicitly killed. - // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding - // the problem with the reg becoming free if the last-use is encountered before the call. - bool isBusyUntilNextKill; - - bool conflictingFixedRegReference(RefPosition* refPosition); -}; - -inline bool leafInRange(GenTree* leaf, int lower, int upper) -{ - if (!leaf->IsIntCnsFitsInI32()) - { - return false; - } - if (leaf->gtIntCon.gtIconVal < lower) - { - return false; - } - if (leaf->gtIntCon.gtIconVal > upper) - { - return false; - } - - return true; -} - -inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple) -{ - if (!leafInRange(leaf, lower, upper)) - { - return false; - } - if (leaf->gtIntCon.gtIconVal % multiple) - { - return false; - } - - return true; -} - -inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1) -{ - if (leaf->OperGet() != GT_ADD) - { - return false; - } - return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple); -} - -inline bool isCandidateVar(LclVarDsc* varDsc) -{ - return varDsc->lvLRACandidate; -} - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XX XX -XX LinearScan XX -XX XX -XX This is the container for the Linear Scan data structures and methods. XX -XX XX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -*/ -// OPTION 1: The algorithm as described in "Optimized Interval Splitting in a -// Linear Scan Register Allocator". It is driven by iterating over the Interval -// lists. In this case, we need multiple IntervalLists, and Intervals will be -// moved between them so they must be easily updated. - -// OPTION 2: The algorithm is driven by iterating over the RefPositions. In this -// case, we only need a single IntervalList, and it won't be updated. -// The RefPosition must refer to its Interval, and we need to be able to traverse -// to the next RefPosition in code order -// THIS IS THE OPTION CURRENTLY BEING PURSUED - -class LocationInfoList; -class LocationInfoListNodePool; - -class LinearScan : public LinearScanInterface -{ - friend class RefPosition; - friend class Interval; - friend class Lowering; - friend class TreeNodeInfo; - -public: - // This could use further abstraction. From Compiler we need the tree, - // the flowgraph and the allocator. - LinearScan(Compiler* theCompiler); - - // This is the main driver - virtual void doLinearScan(); - - // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates. - // Instead of storing actual register masks, however, which are large, we store a small index into a table - // of register masks, stored in this class. We create only as many distinct register masks as are needed. - // All identical register masks get the same index. The register mask table contains: - // 1. A mask containing all eligible integer registers. - // 2. A mask containing all elibible floating-point registers. - // 3. A mask for each of single register. - // 4. A mask for each combination of registers, created dynamically as required. - // - // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask - // table is never resized. It is also limited by the size of the index, currently an unsigned char. - CLANG_FORMAT_COMMENT_ANCHOR; - -#if defined(_TARGET_ARM64_) - static const int numMasks = 128; -#else - static const int numMasks = 64; -#endif - - regMaskTP* regMaskTable; - int nextFreeMask; - - typedef int RegMaskIndex; - - // allint is 0, allfloat is 1, all the single-bit masks start at 2 - enum KnownRegIndex - { - ALLINT_IDX = 0, - ALLFLOAT_IDX = 1, - FIRST_SINGLE_REG_IDX = 2 - }; - - RegMaskIndex GetIndexForRegMask(regMaskTP mask); - regMaskTP GetRegMaskForIndex(RegMaskIndex index); - void RemoveRegisterFromMasks(regNumber reg); - -#ifdef DEBUG - void dspRegisterMaskTable(); -#endif // DEBUG - - // Initialize the block traversal for LSRA. - // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array, - // which determines the order in which blocks will be allocated (currently called during Lowering). - BasicBlock* startBlockSequence(); - // Move to the next block in sequence, updating the current block information. - BasicBlock* moveToNextBlock(); - // Get the next block to be scheduled without changing the current block, - // but updating the blockSequence during the first iteration if it is not fully computed. - BasicBlock* getNextBlock(); - - // This is called during code generation to update the location of variables - virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb); - - // This does the dataflow analysis and builds the intervals - void buildIntervals(); - - // This is where the actual assignment is done - void allocateRegisters(); - - // This is the resolution phase, where cross-block mismatches are fixed up - void resolveRegisters(); - - void writeRegisters(RefPosition* currentRefPosition, GenTree* tree); - - // Insert a copy in the case where a tree node value must be moved to a different - // register at the point of use, or it is reloaded to a different register - // than the one it was spilled from - void insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition); - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - // Insert code to save and restore the upper half of a vector that lives - // in a callee-save register at the point of a call (the upper half is - // not preserved). - void insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block); -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - - // resolve along one block-block edge - enum ResolveType - { - ResolveSplit, - ResolveJoin, - ResolveCritical, - ResolveSharedCritical, - ResolveTypeCount - }; -#ifdef DEBUG - static const char* resolveTypeName[ResolveTypeCount]; -#endif - - enum WhereToInsert - { - InsertAtTop, - InsertAtBottom - }; - -#ifdef _TARGET_ARM_ - void addResolutionForDouble(BasicBlock* block, - GenTreePtr insertionPoint, - Interval** sourceIntervals, - regNumberSmall* location, - regNumber toReg, - regNumber fromReg, - ResolveType resolveType); -#endif - void addResolution( - BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber outReg, regNumber inReg); - - void handleOutgoingCriticalEdges(BasicBlock* block); - - void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet); - - void resolveEdges(); - - // Finally, the register assignments are written back to the tree nodes. - void recordRegisterAssignments(); - - // Keep track of how many temp locations we'll need for spill - void initMaxSpill(); - void updateMaxSpill(RefPosition* refPosition); - void recordMaxSpill(); - - // max simultaneous spill locations used of every type - unsigned int maxSpill[TYP_COUNT]; - unsigned int currentSpill[TYP_COUNT]; - bool needFloatTmpForFPCall; - bool needDoubleTmpForFPCall; - -#ifdef DEBUG -private: - //------------------------------------------------------------------------ - // Should we stress lsra? - // This uses the same COMPLUS variable as rsStressRegs (COMPlus_JitStressRegs) - // However, the possible values and their interpretation are entirely different. - // - // The mask bits are currently divided into fields in which each non-zero value - // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2). - // However, subject to possible constraints (to be determined), the different - // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4). - // Note that the field values are declared in a public enum, but the actual bits are - // only accessed via accessors. - - unsigned lsraStressMask; - - // This controls the registers available for allocation - enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2, - LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3}; - - // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save - // registers, so as to get different coverage than limiting to callee or caller. - // At least for x86 and AMD64, and potentially other architecture that will support SIMD, - // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4. - // Hence the "SmallFPSet" has 5 elements. - CLANG_FORMAT_COMMENT_ANCHOR; - -#if defined(_TARGET_AMD64_) -#ifdef UNIX_AMD64_ABI - // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers. - static const regMaskTP LsraLimitSmallIntSet = - (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); -#else // !UNIX_AMD64_ABI - // On Windows Amd64 use the RDI and RSI as callee saved registers. - static const regMaskTP LsraLimitSmallIntSet = - (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); -#endif // !UNIX_AMD64_ABI - static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); -#elif defined(_TARGET_ARM_) - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4); - static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); -#elif defined(_TARGET_ARM64_) - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); - static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); -#elif defined(_TARGET_X86_) - static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); - static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); -#else -#error Unsupported or unset target architecture -#endif // target - - LsraStressLimitRegs getStressLimitRegs() - { - return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK); - } - - regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount); - regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask); - - // This controls the heuristics used to select registers - // These can be combined. - enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04, - LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c}; - LsraSelect getSelectionHeuristics() - { - return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK); - } - bool doReverseSelect() - { - return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0); - } - bool doReverseCallerCallee() - { - return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0); - } - bool doSelectNearest() - { - return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0); - } - - // This controls the order in which basic blocks are visited during allocation - enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40, - LSRA_TRAVERSE_RANDOM = 0x60, // NYI - LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60}; - LsraTraversalOrder getLsraTraversalOrder() - { - if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0) - { - return LSRA_TRAVERSE_DEFAULT; - } - return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK); - } - bool isTraversalLayoutOrder() - { - return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT; - } - bool isTraversalPredFirstOrder() - { - return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST; - } - - // This controls whether lifetimes should be extended to the entire method. - // Note that this has no effect under MinOpts - enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80}; - LsraExtendLifetimes getLsraExtendLifeTimes() - { - return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK); - } - bool extendLifetimes() - { - return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES; - } - - // This controls whether variables locations should be set to the previous block in layout order - // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED - - // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE). - enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100, - LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300}; - LsraBlockBoundaryLocations getLsraBlockBoundaryLocations() - { - return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK); - } - regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs); - - // This controls whether we always insert a GT_RELOAD instruction after a spill - // Note that this can be combined with LSRA_SPILL_ALWAYS (or not) - enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400}; - LsraReload getLsraReload() - { - return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK); - } - bool alwaysInsertReload() - { - return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD; - } - - // This controls whether we spill everywhere - enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800}; - LsraSpill getLsraSpill() - { - return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK); - } - bool spillAlways() - { - return getLsraSpill() == LSRA_SPILL_ALWAYS; - } - - // This controls whether RefPositions that lower/codegen indicated as reg optional be - // allocated a reg at all. - enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000, - LSRA_REG_OPTIONAL_MASK = 0x1000}; - - LsraRegOptionalControl getLsraRegOptionalControl() - { - return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK); - } - - bool regOptionalNoAlloc() - { - return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC; - } - - bool candidatesAreStressLimited() - { - return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0); - } - - // Dump support - void lsraDumpIntervals(const char* msg); - void dumpRefPositions(const char* msg); - void dumpVarRefPositions(const char* msg); - - static bool IsResolutionMove(GenTree* node); - static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node); - - void verifyFinalAllocation(); - void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation); -#else // !DEBUG - bool doSelectNearest() - { - return false; - } - bool extendLifetimes() - { - return false; - } - bool spillAlways() - { - return false; - } - // In a retail build we support only the default traversal order - bool isTraversalLayoutOrder() - { - return false; - } - bool isTraversalPredFirstOrder() - { - return true; - } - bool getLsraExtendLifeTimes() - { - return false; - } - bool candidatesAreStressLimited() - { - return false; - } -#endif // !DEBUG - -public: - // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates(). - bool isRegCandidate(LclVarDsc* varDsc); - - bool isContainableMemoryOp(GenTree* node); - -private: - // Determine which locals are candidates for allocation - void identifyCandidates(); - - // determine which locals are used in EH constructs we don't want to deal with - void identifyCandidatesExceptionDataflow(); - - void buildPhysRegRecords(); - -#ifdef DEBUG - void checkLastUses(BasicBlock* block); -#endif // DEBUG - - void setFrameType(); - - // Update allocations at start/end of block - void unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap); - void processBlockEndAllocation(BasicBlock* current); - - // Record variable locations at start/end of block - void processBlockStartLocations(BasicBlock* current, bool allocationPass); - void processBlockEndLocations(BasicBlock* current); - -#ifdef _TARGET_ARM_ - bool isSecondHalfReg(RegRecord* regRec, Interval* interval); - RegRecord* getSecondHalfRegRec(RegRecord* regRec); - RegRecord* findAnotherHalfRegRec(RegRecord* regRec); - bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight); - void unassignDoublePhysReg(RegRecord* doubleRegRecord); -#endif - void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType); - void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType); - bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval); - bool isAssignedToInterval(Interval* interval, RegRecord* regRec); - bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation); - bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight); - bool isRegInUse(RegRecord* regRec, RefPosition* refPosition); - - RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock); - - // insert refpositions representing prolog zero-inits which will be added later - void insertZeroInitRefPositions(); - - void AddMapping(GenTree* node, LsraLocation loc); - - // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions - void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse); - - void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); - - void buildRefPositionsForNode(GenTree* tree, - BasicBlock* block, - LocationInfoListNodePool& listNodePool, - HashTableBase& operandToLocationInfoMap, - LsraLocation loc); - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc); - void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors); -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - // For AMD64 on SystemV machines. This method - // is called as replacement for raUpdateRegStateForArg - // that is used on Windows. On System V systems a struct can be passed - // partially using registers from the 2 register files. - void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc); -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - - // Update reg state for an incoming register argument - void updateRegStateForArg(LclVarDsc* argDsc); - - inline bool isLocalDefUse(GenTree* tree) - { - return tree->gtLsraInfo.isLocalDefUse; - } - - inline bool isCandidateLocalRef(GenTree* tree) - { - if (tree->IsLocal()) - { - unsigned int lclNum = tree->gtLclVarCommon.gtLclNum; - assert(lclNum < compiler->lvaCount); - LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum; - - return isCandidateVar(varDsc); - } - return false; - } - - static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTreePtr tree, void* p); - - // Return the registers killed by the given tree node. - regMaskTP getKillSetForNode(GenTree* tree); - - // Given some tree node add refpositions for all the registers this node kills - bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc); - - regMaskTP allRegs(RegisterType rt); - regMaskTP allRegs(GenTree* tree); - regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree); - regMaskTP allSIMDRegs(); - regMaskTP internalFloatRegCandidates(); - - bool registerIsFree(regNumber regNum, RegisterType regType); - bool registerIsAvailable(RegRecord* physRegRecord, - LsraLocation currentLoc, - LsraLocation* nextRefLocationPtr, - RegisterType regType); - void freeRegister(RegRecord* physRegRecord); - void freeRegisters(regMaskTP regsToFree); - - regMaskTP getUseCandidates(GenTree* useNode); - regMaskTP getDefCandidates(GenTree* tree); - var_types getDefType(GenTree* tree); - - RefPosition* defineNewInternalTemp(GenTree* tree, - RegisterType regType, - LsraLocation currentLoc, - regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount)); - - int buildInternalRegisterDefsForNode(GenTree* tree, - LsraLocation currentLoc, - RefPosition* defs[] DEBUGARG(unsigned minRegCandidateCount)); - - void buildInternalRegisterUsesForNode(GenTree* tree, - LsraLocation currentLoc, - RefPosition* defs[], - int total DEBUGARG(unsigned minRegCandidateCount)); - - void resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition); - - void insertMove(BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg); - - void insertSwap(BasicBlock* block, - GenTreePtr insertionPoint, - unsigned lclNum1, - regNumber reg1, - unsigned lclNum2, - regNumber reg2); - -public: - // TODO-Cleanup: unused? - class PhysRegIntervalIterator - { - public: - PhysRegIntervalIterator(LinearScan* theLinearScan) - { - nextRegNumber = (regNumber)0; - linearScan = theLinearScan; - } - RegRecord* GetNext() - { - return &linearScan->physRegs[nextRegNumber]; - } - - private: - // This assumes that the physical registers are contiguous, starting - // with a register number of 0 - regNumber nextRegNumber; - LinearScan* linearScan; - }; - -private: - Interval* newInterval(RegisterType regType); - - Interval* getIntervalForLocalVar(unsigned varIndex) - { - assert(varIndex < compiler->lvaTrackedCount); - assert(localVarIntervals[varIndex] != nullptr); - return localVarIntervals[varIndex]; - } - - Interval* getIntervalForLocalVarNode(GenTreeLclVarCommon* tree) - { - LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclNum]; - assert(varDsc->lvTracked); - return getIntervalForLocalVar(varDsc->lvVarIndex); - } - - RegRecord* getRegisterRecord(regNumber regNum); - - RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType); - - RefPosition* newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx = 0 DEBUGARG(unsigned minRegCandidateCount = 1)); - - RefPosition* newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask); - - void applyCalleeSaveHeuristics(RefPosition* rp); - - void associateRefPosWithInterval(RefPosition* rp); - - void associateRefPosWithRegister(RefPosition* rp); - - unsigned getWeight(RefPosition* refPos); - - /***************************************************************************** - * Register management - ****************************************************************************/ - RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition); - regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition); - regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable); - regNumber assignCopyReg(RefPosition* refPosition); - - bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); - bool isSpillCandidate(Interval* current, - RefPosition* refPosition, - RegRecord* physRegRecord, - LsraLocation& nextLocation); - void checkAndAssignInterval(RegRecord* regRec, Interval* interval); - void assignPhysReg(RegRecord* regRec, Interval* interval); - void assignPhysReg(regNumber reg, Interval* interval) - { - assignPhysReg(getRegisterRecord(reg), interval); - } - - bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType)); - bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType)); - void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition); - void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType)); - void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition); - void unassignPhysRegNoSpill(RegRecord* reg); - void unassignPhysReg(regNumber reg) - { - unassignPhysReg(getRegisterRecord(reg), nullptr); - } - - void setIntervalAsSpilled(Interval* interval); - void setIntervalAsSplit(Interval* interval); - void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition); - - void spillGCRefs(RefPosition* killRefPosition); - - /***************************************************************************** - * For Resolution phase - ****************************************************************************/ - // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling - unsigned int regMapCount; - - // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we - // rely on the property that the "in" map is the same as the "from" block of the edge, and the - // "out" map is the same as the "to" block of the edge (by construction). - // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the - // splitBBNumToTargetBBNumMap. - // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding - // the arrays. - - unsigned bbNumMaxBeforeResolution; - struct SplitEdgeInfo - { - unsigned fromBBNum; - unsigned toBBNum; - }; - typedef JitHashTable, SplitEdgeInfo> SplitBBNumToTargetBBNumMap; - SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap; - SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap() - { - if (splitBBNumToTargetBBNumMap == nullptr) - { - splitBBNumToTargetBBNumMap = - new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler)); - } - return splitBBNumToTargetBBNumMap; - } - SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum); - - void initVarRegMaps(); - void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg); - void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg); - VarToRegMap getInVarToRegMap(unsigned int bbNum); - VarToRegMap getOutVarToRegMap(unsigned int bbNum); - void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg); - regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex); - // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of - // the block) - VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap); - - regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type); - -#ifdef DEBUG - void dumpVarToRegMap(VarToRegMap map); - void dumpInVarToRegMap(BasicBlock* block); - void dumpOutVarToRegMap(BasicBlock* block); - - // There are three points at which a tuple-style dump is produced, and each - // differs slightly: - // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what - // tree nodes are consumed. - // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before - // register allocation, each node is dumped, along with all of the RefPositions, - // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other - // intervals, and Tnnn for internal temps. - // - In LSRA_DUMP_POST, which is after register allocation, the registers are - // shown. - - enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST}; - void lsraGetOperandString(GenTreePtr tree, - LsraTupleDumpMode mode, - char* operandString, - unsigned operandStringLength); - void lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest); - void DumpOperandDefs( - GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength); - void TupleStyleDump(LsraTupleDumpMode mode); - - LsraLocation maxNodeLocation; - - // Width of various fields - used to create a streamlined dump during allocation that shows the - // state of all the registers in columns. - int regColumnWidth; - int regTableIndent; - - const char* columnSeparator; - const char* line; - const char* leftBox; - const char* middleBox; - const char* rightBox; - - static const int MAX_FORMAT_CHARS = 12; - char intervalNameFormat[MAX_FORMAT_CHARS]; - char regNameFormat[MAX_FORMAT_CHARS]; - char shortRefPositionFormat[MAX_FORMAT_CHARS]; - char emptyRefPositionFormat[MAX_FORMAT_CHARS]; - char indentFormat[MAX_FORMAT_CHARS]; - static const int MAX_LEGEND_FORMAT_CHARS = 25; - char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS]; - char legendFormat[MAX_LEGEND_FORMAT_CHARS]; - - // How many rows have we printed since last printing a "title row"? - static const int MAX_ROWS_BETWEEN_TITLES = 50; - int rowCountSinceLastTitle; - // Current mask of registers being printed in the dump. - regMaskTP lastDumpedRegisters; - regMaskTP registersToDump; - int lastUsedRegNumIndex; - bool shouldDumpReg(regNumber regNum) - { - return (registersToDump & genRegMask(regNum)) != 0; - } - - void dumpRegRecordHeader(); - void dumpRegRecordTitle(); - void dumpRegRecordTitleIfNeeded(); - void dumpRegRecordTitleLines(); - void dumpRegRecords(); - // An abbreviated RefPosition dump for printing with column-based register state - void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock); - // Print the number of spaces occupied by a dumpRefPositionShort() - void dumpEmptyRefPosition(); - // A dump of Referent, in exactly regColumnWidth characters - void dumpIntervalName(Interval* interval); - - // Events during the allocation phase that cause some dump output - enum LsraDumpEvent{ - // Conflicting def/use - LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2, - LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6, - - // Spilling - LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, - LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS, - - // Block boundaries - LSRA_EVENT_START_BB, LSRA_EVENT_END_BB, - - // Miscellaneous - LSRA_EVENT_FREE_REGS, - - // Characteristics of the current RefPosition - LSRA_EVENT_INCREMENT_RANGE_END, // ??? - LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG, - - // Allocation decisions - LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, - LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG, - LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, - LSRA_EVENT_REUSE_REG, - }; - void dumpLsraAllocationEvent(LsraDumpEvent event, - Interval* interval = nullptr, - regNumber reg = REG_NA, - BasicBlock* currentBlock = nullptr); - - void dumpBlockHeader(BasicBlock* block); - - void validateIntervals(); -#endif // DEBUG - -#if TRACK_LSRA_STATS - enum LsraStat{ - LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE, - }; - - unsigned regCandidateVarCount; - void updateLsraStat(LsraStat stat, unsigned currentBBNum); - - void dumpLsraStats(FILE* file); - -#define INTRACK_STATS(x) x -#else // !TRACK_LSRA_STATS -#define INTRACK_STATS(x) -#endif // !TRACK_LSRA_STATS - - Compiler* compiler; - -private: -#if MEASURE_MEM_ALLOC - CompAllocator* lsraAllocator; -#endif - - CompAllocator* getAllocator(Compiler* comp) - { -#if MEASURE_MEM_ALLOC - if (lsraAllocator == nullptr) - { - lsraAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA); - } - return lsraAllocator; -#else - return comp->getAllocator(); -#endif - } - -#ifdef DEBUG - // This is used for dumping - RefPosition* activeRefPosition; -#endif // DEBUG - - IntervalList intervals; - - RegRecord physRegs[REG_COUNT]; - - // Map from tracked variable index to Interval*. - Interval** localVarIntervals; - - // Set of blocks that have been visited. - BlockSet bbVisitedSet; - void markBlockVisited(BasicBlock* block) - { - BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum); - } - void clearVisitedBlocks() - { - BlockSetOps::ClearD(compiler, bbVisitedSet); - } - bool isBlockVisited(BasicBlock* block) - { - return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum); - } - -#if DOUBLE_ALIGN - bool doDoubleAlign; -#endif - - // A map from bbNum to the block information used during register allocation. - LsraBlockInfo* blockInfo; - BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated)); - - // The order in which the blocks will be allocated. - // This is any array of BasicBlock*, in the order in which they should be traversed. - BasicBlock** blockSequence; - // The verifiedAllBBs flag indicates whether we have verified that all BBs have been - // included in the blockSeuqence above, during setBlockSequence(). - bool verifiedAllBBs; - void setBlockSequence(); - int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights); - BasicBlockList* blockSequenceWorkList; - bool blockSequencingDone; - void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet); - void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode); - BasicBlock* getNextCandidateFromWorkList(); - - // The bbNum of the block being currently allocated or resolved. - unsigned int curBBNum; - // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated). - unsigned int curBBSeqNum; - // The number of blocks that we've sequenced. - unsigned int bbSeqCount; - // The Location of the start of the current block. - LsraLocation curBBStartLocation; - // True if the method contains any critical edges. - bool hasCriticalEdges; - - // True if there are any register candidate lclVars available for allocation. - bool enregisterLocalVars; - - virtual bool willEnregisterLocalVars() const - { - return enregisterLocalVars; - } - - // Ordered list of RefPositions - RefPositionList refPositions; - - // Per-block variable location mappings: an array indexed by block number that yields a - // pointer to an array of regNumber, one per variable. - VarToRegMap* inVarToRegMaps; - VarToRegMap* outVarToRegMaps; - - // A temporary VarToRegMap used during the resolution of critical edges. - VarToRegMap sharedCriticalVarToRegMap; - - PhasedVar availableIntRegs; - PhasedVar availableFloatRegs; - PhasedVar availableDoubleRegs; - - // The set of all register candidates. Note that this may be a subset of tracked vars. - VARSET_TP registerCandidateVars; - // Current set of live register candidate vars, used during building of RefPositions to determine - // whether to preference to callee-save. - VARSET_TP currentLiveVars; - // Set of variables that may require resolution across an edge. - // This is first constructed during interval building, to contain all the lclVars that are live at BB edges. - // Then, any lclVar that is always in the same register is removed from the set. - VARSET_TP resolutionCandidateVars; - // This set contains all the lclVars that are ever spilled or split. - VARSET_TP splitOrSpilledVars; - // Set of floating point variables to consider for callee-save registers. - VARSET_TP fpCalleeSaveCandidateVars; -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE -#if defined(_TARGET_AMD64_) - static bool varTypeNeedsPartialCalleeSave(var_types type) - { - return (emitTypeSize(type) == 32); - } - static const var_types LargeVectorSaveType = TYP_SIMD16; -#elif defined(_TARGET_ARM64_) - static bool varTypeNeedsPartialCalleeSave(var_types type) - { - // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes - // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes. - return (emitTypeSize(type) == 16); - } - static const var_types LargeVectorSaveType = TYP_DOUBLE; -#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) -#error("Unknown target architecture for FEATURE_SIMD") -#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) - - // Set of large vector (TYP_SIMD32 on AVX) variables. - VARSET_TP largeVectorVars; - // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers. - VARSET_TP largeVectorCalleeSaveCandidateVars; -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - - //----------------------------------------------------------------------- - // TreeNodeInfo methods - //----------------------------------------------------------------------- - - void TreeNodeInfoInit(GenTree* stmt); - - void TreeNodeInfoInitCheckByteable(GenTree* tree); - - bool CheckAndSetDelayFree(GenTree* delayUseSrc); - - void TreeNodeInfoInitSimple(GenTree* tree); - int GetOperandSourceCount(GenTree* node); - int GetIndirSourceCount(GenTreeIndir* indirTree); - void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); - - void TreeNodeInfoInitStoreLoc(GenTree* tree); - void TreeNodeInfoInitReturn(GenTree* tree); - void TreeNodeInfoInitShiftRotate(GenTree* tree); - void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node); - void TreeNodeInfoInitCall(GenTreeCall* call); - void TreeNodeInfoInitCmp(GenTreePtr tree); - void TreeNodeInfoInitStructArg(GenTreePtr structArg); - void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode); - void TreeNodeInfoInitModDiv(GenTree* tree); - void TreeNodeInfoInitIntrinsic(GenTree* tree); - void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree); - void TreeNodeInfoInitIndir(GenTreeIndir* indirTree); - void TreeNodeInfoInitGCWriteBarrier(GenTree* tree); - void TreeNodeInfoInitCast(GenTree* tree); - -#ifdef _TARGET_X86_ - bool ExcludeNonByteableRegisters(GenTree* tree); -#endif - -#if defined(_TARGET_XARCH_) - // returns true if the tree can use the read-modify-write memory instruction form - bool isRMWRegOper(GenTreePtr tree); - void TreeNodeInfoInitMul(GenTreePtr tree); - void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0); -#endif // defined(_TARGET_XARCH_) - -#ifdef FEATURE_SIMD - void TreeNodeInfoInitSIMD(GenTreeSIMD* tree); -#endif // FEATURE_SIMD - -#if FEATURE_HW_INTRINSICS - void TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree); -#endif // FEATURE_HW_INTRINSICS - - void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode); -#ifdef _TARGET_ARM_ - void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree); -#endif - void TreeNodeInfoInitLclHeap(GenTree* tree); -}; - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XX XX -XX Interval XX -XX XX -XX This is the fundamental data structure for linear scan register XX -XX allocation. It represents the live range(s) for a variable or temp. XX -XX XX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -*/ - -class Interval : public Referenceable -{ -public: - Interval(RegisterType registerType, regMaskTP registerPreferences) - : registerPreferences(registerPreferences) - , relatedInterval(nullptr) - , assignedReg(nullptr) - , registerType(registerType) - , isLocalVar(false) - , isSplit(false) - , isSpilled(false) - , isInternal(false) - , isStructField(false) - , isPromotedStruct(false) - , hasConflictingDefUse(false) - , hasNonCommutativeRMWDef(false) - , isSpecialPutArg(false) - , preferCalleeSave(false) - , isConstant(false) - , physReg(REG_COUNT) -#ifdef DEBUG - , intervalIndex(0) -#endif - , varNum(0) - { - } - -#ifdef DEBUG - // print out representation - void dump(); - // concise representation for embedding - void tinyDump(); - // extremely concise representation - void microDump(); -#endif // DEBUG - - void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l); - - // Fixed registers for which this Interval has a preference - regMaskTP registerPreferences; - - // The relatedInterval is: - // - for any other interval, it is the interval to which this interval - // is currently preferenced (e.g. because they are related by a copy) - Interval* relatedInterval; - - // The assignedReg is the RecRecord for the register to which this interval - // has been assigned at some point - if the interval is active, this is the - // register it currently occupies. - RegRecord* assignedReg; - - // DECIDE : put this in a union or do something w/ inheritance? - // this is an interval for a physical register, not a allocatable entity - - RegisterType registerType; - bool isLocalVar : 1; - // Indicates whether this interval has been assigned to different registers - bool isSplit : 1; - // Indicates whether this interval is ever spilled - bool isSpilled : 1; - // indicates an interval representing the internal requirements for - // generating code for a node (temp registers internal to the node) - // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND - // case (though never lives beyond a stmt) - bool isInternal : 1; - // true if this is a LocalVar for a struct field - bool isStructField : 1; - // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct - bool isPromotedStruct : 1; - // true if this is an SDSU interval for which the def and use have conflicting register - // requirements - bool hasConflictingDefUse : 1; - // true if this interval is defined by a non-commutative 2-operand instruction - bool hasNonCommutativeRMWDef : 1; - - // True if this interval is defined by a putArg, whose source is a non-last-use lclVar. - // During allocation, this flag will be cleared if the source is not already in the required register. - // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as - // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call. - bool isSpecialPutArg : 1; - - // True if this interval interferes with a call. - bool preferCalleeSave : 1; - - // True if this interval is defined by a constant node that may be reused and/or may be - // able to reuse a constant that's already in a register. - bool isConstant : 1; - - // The register to which it is currently assigned. - regNumber physReg; - -#ifdef DEBUG - unsigned int intervalIndex; -#endif // DEBUG - - unsigned int varNum; // This is the "variable number": the index into the lvaTable array - - LclVarDsc* getLocalVar(Compiler* comp) - { - assert(isLocalVar); - return &(comp->lvaTable[this->varNum]); - } - - // Get the local tracked variable "index" (lvVarIndex), used in bitmasks. - unsigned getVarIndex(Compiler* comp) - { - LclVarDsc* varDsc = getLocalVar(comp); - assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function! - return varDsc->lvVarIndex; - } - - bool isAssignedTo(regNumber regNum) - { - // This uses regMasks to handle the case where a double actually occupies two registers - // TODO-Throughput: This could/should be done more cheaply. - return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE); - } - - // Assign the related interval. - void assignRelatedInterval(Interval* newRelatedInterval) - { -#ifdef DEBUG - if (VERBOSE) - { - printf("Assigning related "); - newRelatedInterval->microDump(); - printf(" to "); - this->microDump(); - printf("\n"); - } -#endif // DEBUG - relatedInterval = newRelatedInterval; - } - - // Assign the related interval, but only if it isn't already assigned. - void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval) - { - if (relatedInterval == nullptr) - { - assignRelatedInterval(newRelatedInterval); - } - else - { -#ifdef DEBUG - if (VERBOSE) - { - printf("Interval "); - this->microDump(); - printf(" already has a related interval\n"); - } -#endif // DEBUG - } - } - - // Update the registerPreferences on the interval. - // If there are conflicting requirements on this interval, set the preferences to - // the union of them. That way maybe we'll get at least one of them. - // An exception is made in the case where one of the existing or new - // preferences are all callee-save, in which case we "prefer" the callee-save - - void updateRegisterPreferences(regMaskTP preferences) - { - // We require registerPreferences to have been initialized. - assert(registerPreferences != RBM_NONE); - // It is invalid to update with empty preferences - assert(preferences != RBM_NONE); - - regMaskTP commonPreferences = (registerPreferences & preferences); - if (commonPreferences != RBM_NONE) - { - registerPreferences = commonPreferences; - return; - } - - // There are no preferences in common. - // Preferences need to reflect both cases where a var must occupy a specific register, - // as well as cases where a var is live when a register is killed. - // In the former case, we would like to record all such registers, however we don't - // really want to use any registers that will interfere. - // To approximate this, we never "or" together multi-reg sets, which are generally kill sets. - - if (!genMaxOneBit(preferences)) - { - // The new preference value is a multi-reg set, so it's probably a kill. - // Keep the new value. - registerPreferences = preferences; - return; - } - - if (!genMaxOneBit(registerPreferences)) - { - // The old preference value is a multi-reg set. - // Keep the existing preference set, as it probably reflects one or more kills. - // It may have been a union of multiple individual registers, but we can't - // distinguish that case without extra cost. - return; - } - - // If we reach here, we have two disjoint single-reg sets. - // Keep only the callee-save preferences, if not empty. - // Otherwise, take the union of the preferences. - - regMaskTP newPreferences = registerPreferences | preferences; - - if (preferCalleeSave) - { - regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences)); - if (calleeSaveMask != RBM_NONE) - { - newPreferences = calleeSaveMask; - } - } - registerPreferences = newPreferences; - } -}; - -class RefPosition -{ -public: - // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one - // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it - // refers to an Interval, then 'isPhysRegRef' is false. - // - // Q: can 'referent' be NULL? - - Referenceable* referent; - - // nextRefPosition is the next in code order. - // Note that in either case there is no need for these to be doubly linked, as they - // are only traversed in the forward direction, and are not moved. - RefPosition* nextRefPosition; - - // The remaining fields are common to both options - GenTree* treeNode; - unsigned int bbNum; - - // Prior to the allocation pass, registerAssignment captures the valid registers - // for this RefPosition. An empty set means that any register is valid. A non-empty - // set means that it must be one of the given registers (may be the full set if the - // only constraint is that it must reside in SOME register) - // After the allocation pass, this contains the actual assignment - LsraLocation nodeLocation; - regMaskTP registerAssignment; - - RefType refType; - - // NOTE: C++ only packs bitfields if the base type is the same. So make all the base - // NOTE: types of the logically "bool" types that follow 'unsigned char', so they match - // NOTE: RefType that precedes this, and multiRegIdx can also match. - - // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the - // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if - // no reg is allocated. - unsigned char allocRegIfProfitable : 1; - - // Used by RefTypeDef/Use positions of a multi-reg call node. - // Indicates the position of the register that this ref position refers to. - // The max bits needed is based on max value of MAX_RET_REG_COUNT value - // across all targets and that happens 4 on on Arm. Hence index value - // would be 0..MAX_RET_REG_COUNT-1. - unsigned char multiRegIdx : 2; - - // Last Use - this may be true for multiple RefPositions in the same Interval - unsigned char lastUse : 1; - - // Spill and Copy info - // reload indicates that the value was spilled, and must be reloaded here. - // spillAfter indicates that the value is spilled here, so a spill must be added. - // copyReg indicates that the value needs to be copied to a specific register, - // but that it will also retain its current assigned register. - // moveReg indicates that the value needs to be moved to a different register, - // and that this will be its new assigned register. - // A RefPosition may have any flag individually or the following combinations: - // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg - // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg) - // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled) - // - spillAfter and moveReg (i.e. it most be both spilled and moved) - // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going - // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to - // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting - // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then - // we need an explicit move. - // - copyReg and moveReg must not exist with each other. - - unsigned char reload : 1; - unsigned char spillAfter : 1; - unsigned char copyReg : 1; - unsigned char moveReg : 1; // true if this var is moved to a new register - - unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval - unsigned char isFixedRegRef : 1; - unsigned char isLocalDefUse : 1; - - // delayRegFree indicates that the register should not be freed right away, but instead wait - // until the next Location after it would normally be freed. This is used for the case of - // non-commutative binary operators, where op2 must not be assigned the same register as - // the target. We do this by not freeing it until after the target has been defined. - // Another option would be to actually change the Location of the op2 use until the same - // Location as the def, but then it could potentially reuse a register that has been freed - // from the other source(s), e.g. if it's a lastUse or spilled. - unsigned char delayRegFree : 1; - - // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the - // register currently assigned to the Interval. This happens when we use the assigned - // register from a predecessor that is not the most recently allocated BasicBlock. - unsigned char outOfOrder : 1; - -#ifdef DEBUG - // Minimum number registers that needs to be ensured while - // constraining candidates for this ref position under - // LSRA stress. - unsigned minRegCandidateCount; - - // The unique RefPosition number, equal to its index in the - // refPositions list. Only used for debugging dumps. - unsigned rpNum; -#endif // DEBUG - - RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType) - : referent(nullptr) - , nextRefPosition(nullptr) - , treeNode(treeNode) - , bbNum(bbNum) - , nodeLocation(nodeLocation) - , registerAssignment(RBM_NONE) - , refType(refType) - , multiRegIdx(0) - , lastUse(false) - , reload(false) - , spillAfter(false) - , copyReg(false) - , moveReg(false) - , isPhysRegRef(false) - , isFixedRegRef(false) - , isLocalDefUse(false) - , delayRegFree(false) - , outOfOrder(false) -#ifdef DEBUG - , minRegCandidateCount(1) - , rpNum(0) -#endif - { - } - - Interval* getInterval() - { - assert(!isPhysRegRef); - return (Interval*)referent; - } - void setInterval(Interval* i) - { - referent = i; - isPhysRegRef = false; - } - - RegRecord* getReg() - { - assert(isPhysRegRef); - return (RegRecord*)referent; - } - void setReg(RegRecord* r) - { - referent = r; - isPhysRegRef = true; - registerAssignment = genRegMask(r->regNum); - } - - regNumber assignedReg() - { - if (registerAssignment == RBM_NONE) - { - return REG_NA; - } - - return genRegNumFromMask(registerAssignment); - } - - // Returns true if it is a reference on a gentree node. - bool IsActualRef() - { - return (refType == RefTypeDef || refType == RefTypeUse); - } - - bool RequiresRegister() - { - return (IsActualRef() -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - ) && - !AllocateIfProfitable(); - } - - void setAllocateIfProfitable(bool val) - { - allocRegIfProfitable = val; - } - - // Returns true whether this ref position is to be allocated - // a reg only if it is profitable. - bool AllocateIfProfitable() - { - // TODO-CQ: Right now if a ref position is marked as - // copyreg or movereg, then it is not treated as - // 'allocate if profitable'. This is an implementation - // limitation that needs to be addressed. - return allocRegIfProfitable && !copyReg && !moveReg; - } - - void setMultiRegIdx(unsigned idx) - { - multiRegIdx = idx; - assert(multiRegIdx == idx); - } - - unsigned getMultiRegIdx() - { - return multiRegIdx; - } - - LsraLocation getRefEndLocation() - { - return delayRegFree ? nodeLocation + 1 : nodeLocation; - } - - bool isIntervalRef() - { - return (!isPhysRegRef && (referent != nullptr)); - } - - // isTrueDef indicates that the RefPosition is a non-update def of a non-internal - // interval - bool isTrueDef() - { - return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal); - } - - // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register - // specified by the given mask - bool isFixedRefOfRegMask(regMaskTP regMask) - { - assert(genMaxOneBit(regMask)); - return (registerAssignment == regMask); - } - - // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register - bool isFixedRefOfReg(regNumber regNum) - { - return (isFixedRefOfRegMask(genRegMask(regNum))); - } - -#ifdef DEBUG - // operator= copies everything except 'rpNum', which must remain unique - RefPosition& operator=(const RefPosition& rp) - { - unsigned rpNumSave = rpNum; - memcpy(this, &rp, sizeof(rp)); - rpNum = rpNumSave; - return *this; - } - - void dump(); -#endif // DEBUG -}; - -#ifdef DEBUG -void dumpRegMask(regMaskTP regs); -#endif // DEBUG - -/*****************************************************************************/ -#endif //_LSRA_H_ -/*****************************************************************************/ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +/*****************************************************************************/ + +#ifndef _LSRA_H_ +#define _LSRA_H_ + +#include "arraylist.h" +#include "smallhash.h" +#include "nodeinfo.h" + +// Minor and forward-reference types +class Interval; +class RefPosition; +class LinearScan; +class RegRecord; + +template +class ArrayStack; + +// LsraLocation tracks the linearized order of the nodes. +// Each node is assigned two LsraLocations - one for all the uses and all but the last +// def, and a second location for the last def (if any) + +typedef unsigned int LsraLocation; +const unsigned int MinLocation = 0; +const unsigned int MaxLocation = UINT_MAX; +// max number of registers an operation could require internally (in addition to uses and defs) +const unsigned int MaxInternalRegisters = 8; +const unsigned int RegisterTypeCount = 2; + +typedef var_types RegisterType; +#define IntRegisterType TYP_INT +#define FloatRegisterType TYP_FLOAT + +inline regMaskTP calleeSaveRegs(RegisterType rt) +{ + return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED; +} + +struct LocationInfo +{ + Interval* interval; + GenTree* treeNode; + LsraLocation loc; + TreeNodeInfo info; + + LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : interval(i), treeNode(t), loc(l) + { + } + + // default constructor for data structures + LocationInfo() + { + } +}; + +//------------------------------------------------------------------------ +// LocationInfoListNode: used to store a single `LocationInfo` value for a +// node during `buildIntervals`. +// +// This is the node type for `LocationInfoList` below. +// +class LocationInfoListNode final : public LocationInfo +{ + friend class LocationInfoList; + friend class LocationInfoListNodePool; + + LocationInfoListNode* m_next; // The next node in the list + +public: + LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx) + { + } + + //------------------------------------------------------------------------ + // LocationInfoListNode::Next: Returns the next node in the list. + LocationInfoListNode* Next() const + { + return m_next; + } +}; + +//------------------------------------------------------------------------ +// LocationInfoList: used to store a list of `LocationInfo` values for a +// node during `buildIntervals`. +// +// This list of 'LocationInfoListNode's contains the source nodes consumed by +// a node, and is created by 'TreeNodeInfoInit'. +// +class LocationInfoList final +{ + friend class LocationInfoListNodePool; + + LocationInfoListNode* m_head; // The head of the list + LocationInfoListNode* m_tail; // The tail of the list + +public: + LocationInfoList() : m_head(nullptr), m_tail(nullptr) + { + } + + LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node) + { + assert(m_head->m_next == nullptr); + } + + //------------------------------------------------------------------------ + // LocationInfoList::IsEmpty: Returns true if the list is empty. + // + bool IsEmpty() const + { + return m_head == nullptr; + } + + //------------------------------------------------------------------------ + // LocationInfoList::Begin: Returns the first node in the list. + // + LocationInfoListNode* Begin() const + { + return m_head; + } + + //------------------------------------------------------------------------ + // LocationInfoList::End: Returns the position after the last node in the + // list. The returned value is suitable for use as + // a sentinel for iteration. + // + LocationInfoListNode* End() const + { + return nullptr; + } + + //------------------------------------------------------------------------ + // LocationInfoList::End: Returns the position after the last node in the + // list. The returned value is suitable for use as + // a sentinel for iteration. + // + LocationInfoListNode* Last() const + { + return m_tail; + } + + //------------------------------------------------------------------------ + // LocationInfoList::Append: Appends a node to the list. + // + // Arguments: + // node - The node to append. Must not be part of an existing list. + // + void Append(LocationInfoListNode* node) + { + assert(node->m_next == nullptr); + + if (m_tail == nullptr) + { + assert(m_head == nullptr); + m_head = node; + } + else + { + m_tail->m_next = node; + } + + m_tail = node; + } + //------------------------------------------------------------------------ + // LocationInfoList::Append: Appends another list to this list. + // + // Arguments: + // other - The list to append. + // + void Append(LocationInfoList other) + { + if (m_tail == nullptr) + { + assert(m_head == nullptr); + m_head = other.m_head; + } + else + { + m_tail->m_next = other.m_head; + } + + m_tail = other.m_tail; + } + + //------------------------------------------------------------------------ + // LocationInfoList::Prepend: Prepends a node to the list. + // + // Arguments: + // node - The node to prepend. Must not be part of an existing list. + // + void Prepend(LocationInfoListNode* node) + { + assert(node->m_next == nullptr); + + if (m_head == nullptr) + { + assert(m_tail == nullptr); + m_tail = node; + } + else + { + node->m_next = m_head; + } + + m_head = node; + } + + //------------------------------------------------------------------------ + // LocationInfoList::Add: Adds a node to the list. + // + // Arguments: + // node - The node to add. Must not be part of an existing list. + // prepend - True if it should be prepended (otherwise is appended) + // + void Add(LocationInfoListNode* node, bool prepend) + { + if (prepend) + { + Prepend(node); + } + else + { + Append(node); + } + } + + //------------------------------------------------------------------------ + // GetTreeNodeInfo - retrieve the TreeNodeInfo for the given node + // + // Notes: + // The TreeNodeInfoInit methods use this helper to retrieve the TreeNodeInfo for child nodes + // from the useList being constructed. Note that, if the user knows the order of the operands, + // it is expected that they should just retrieve them directly. + + TreeNodeInfo& GetTreeNodeInfo(GenTree* node) + { + for (LocationInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next()) + { + if (listNode->treeNode == node) + { + return listNode->info; + } + } + assert(!"GetTreeNodeInfo didn't find the node"); + unreached(); + } + + //------------------------------------------------------------------------ + // LocationInfoList::GetSecond: Gets the second node in the list. + // + // Arguments: + // (DEBUG ONLY) treeNode - The GenTree* we expect to be in the second node. + // + LocationInfoListNode* GetSecond(INDEBUG(GenTree* treeNode)) + { + noway_assert((Begin() != nullptr) && (Begin()->Next() != nullptr)); + LocationInfoListNode* second = Begin()->Next(); + assert(second->treeNode == treeNode); + return second; + } +}; + +struct LsraBlockInfo +{ + // bbNum of the predecessor to use for the register location of live-in variables. + // 0 for fgFirstBB. + unsigned int predBBNum; + BasicBlock::weight_t weight; + bool hasCriticalInEdge; + bool hasCriticalOutEdge; + +#if TRACK_LSRA_STATS + // Per block maintained LSRA statistics. + + // Number of spills of local vars or tree temps in this basic block. + unsigned spillCount; + + // Number of GT_COPY nodes inserted in this basic block while allocating regs. + // Note that GT_COPY nodes are also inserted as part of basic block boundary + // resolution, which are accounted against resolutionMovCount but not + // against copyRegCount. + unsigned copyRegCount; + + // Number of resolution moves inserted in this basic block. + unsigned resolutionMovCount; + + // Number of critical edges from this block that are split. + unsigned splitEdgeCount; +#endif // TRACK_LSRA_STATS +}; + +// This is sort of a bit mask +// The low order 2 bits will be 1 for defs, and 2 for uses +enum RefType : unsigned char +{ +#define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue, +#include "lsra_reftypes.h" +#undef DEF_REFTYPE +}; + +// position in a block (for resolution) +enum BlockStartOrEnd +{ + BlockPositionStart = 0, + BlockPositionEnd = 1, + PositionCount = 2 +}; + +inline bool RefTypeIsUse(RefType refType) +{ + return ((refType & RefTypeUse) == RefTypeUse); +} + +inline bool RefTypeIsDef(RefType refType) +{ + return ((refType & RefTypeDef) == RefTypeDef); +} + +typedef regNumberSmall* VarToRegMap; + +template +class ListElementAllocator +{ +private: + template + friend class ListElementAllocator; + + Compiler* m_compiler; + +public: + ListElementAllocator(Compiler* compiler) : m_compiler(compiler) + { + } + + template + ListElementAllocator(const ListElementAllocator& other) : m_compiler(other.m_compiler) + { + } + + ElementType* allocate(size_t count) + { + return reinterpret_cast(m_compiler->compGetMem(sizeof(ElementType) * count, MemKind)); + } + + void deallocate(ElementType* pointer, size_t count) + { + } + + template + struct rebind + { + typedef ListElementAllocator allocator; + }; +}; + +typedef ListElementAllocator LinearScanMemoryAllocatorInterval; +typedef ListElementAllocator LinearScanMemoryAllocatorRefPosition; + +typedef jitstd::list IntervalList; +typedef jitstd::list RefPositionList; + +class Referenceable +{ +public: + Referenceable() + { + firstRefPosition = nullptr; + recentRefPosition = nullptr; + lastRefPosition = nullptr; + isActive = false; + } + + // A linked list of RefPositions. These are only traversed in the forward + // direction, and are not moved, so they don't need to be doubly linked + // (see RefPosition). + + RefPosition* firstRefPosition; + RefPosition* recentRefPosition; + RefPosition* lastRefPosition; + + bool isActive; + + // Get the position of the next reference which is at or greater than + // the current location (relies upon recentRefPosition being udpated + // during traversal). + RefPosition* getNextRefPosition(); + LsraLocation getNextRefLocation(); +}; + +class RegRecord : public Referenceable +{ +public: + RegRecord() + { + assignedInterval = nullptr; + previousInterval = nullptr; + regNum = REG_NA; + isCalleeSave = false; + registerType = IntRegisterType; + isBusyUntilNextKill = false; + } + + void init(regNumber reg) + { +#ifdef _TARGET_ARM64_ + // The Zero register, or the SP + if ((reg == REG_ZR) || (reg == REG_SP)) + { + // IsGeneralRegister returns false for REG_ZR and REG_SP + regNum = reg; + registerType = IntRegisterType; + } + else +#endif + if (emitter::isFloatReg(reg)) + { + registerType = FloatRegisterType; + } + else + { + // The constructor defaults to IntRegisterType + assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType); + } + regNum = reg; + isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0); + } + +#ifdef DEBUG + // print out representation + void dump(); + // concise representation for embedding + void tinyDump(); +#endif // DEBUG + + bool isFree(); + + // RefPosition * getNextRefPosition(); + // LsraLocation getNextRefLocation(); + + // DATA + + // interval to which this register is currently allocated. + // If the interval is inactive (isActive == false) then it is not currently live, + // and the register call be unassigned (i.e. setting assignedInterval to nullptr) + // without spilling the register. + Interval* assignedInterval; + // Interval to which this register was previously allocated, and which was unassigned + // because it was inactive. This register will be reassigned to this Interval when + // assignedInterval becomes inactive. + Interval* previousInterval; + + regNumber regNum; + bool isCalleeSave; + RegisterType registerType; + // This register must be considered busy until the next time it is explicitly killed. + // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding + // the problem with the reg becoming free if the last-use is encountered before the call. + bool isBusyUntilNextKill; + + bool conflictingFixedRegReference(RefPosition* refPosition); +}; + +inline bool leafInRange(GenTree* leaf, int lower, int upper) +{ + if (!leaf->IsIntCnsFitsInI32()) + { + return false; + } + if (leaf->gtIntCon.gtIconVal < lower) + { + return false; + } + if (leaf->gtIntCon.gtIconVal > upper) + { + return false; + } + + return true; +} + +inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple) +{ + if (!leafInRange(leaf, lower, upper)) + { + return false; + } + if (leaf->gtIntCon.gtIconVal % multiple) + { + return false; + } + + return true; +} + +inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1) +{ + if (leaf->OperGet() != GT_ADD) + { + return false; + } + return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple); +} + +inline bool isCandidateVar(LclVarDsc* varDsc) +{ + return varDsc->lvLRACandidate; +} + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX LinearScan XX +XX XX +XX This is the container for the Linear Scan data structures and methods. XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +// OPTION 1: The algorithm as described in "Optimized Interval Splitting in a +// Linear Scan Register Allocator". It is driven by iterating over the Interval +// lists. In this case, we need multiple IntervalLists, and Intervals will be +// moved between them so they must be easily updated. + +// OPTION 2: The algorithm is driven by iterating over the RefPositions. In this +// case, we only need a single IntervalList, and it won't be updated. +// The RefPosition must refer to its Interval, and we need to be able to traverse +// to the next RefPosition in code order +// THIS IS THE OPTION CURRENTLY BEING PURSUED + +class LocationInfoList; +class LocationInfoListNodePool; + +class LinearScan : public LinearScanInterface +{ + friend class RefPosition; + friend class Interval; + friend class Lowering; + friend class TreeNodeInfo; + +public: + // This could use further abstraction. From Compiler we need the tree, + // the flowgraph and the allocator. + LinearScan(Compiler* theCompiler); + + // This is the main driver + virtual void doLinearScan(); + + // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates. + // Instead of storing actual register masks, however, which are large, we store a small index into a table + // of register masks, stored in this class. We create only as many distinct register masks as are needed. + // All identical register masks get the same index. The register mask table contains: + // 1. A mask containing all eligible integer registers. + // 2. A mask containing all elibible floating-point registers. + // 3. A mask for each of single register. + // 4. A mask for each combination of registers, created dynamically as required. + // + // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask + // table is never resized. It is also limited by the size of the index, currently an unsigned char. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(_TARGET_ARM64_) + static const int numMasks = 128; +#else + static const int numMasks = 64; +#endif + + regMaskTP* regMaskTable; + int nextFreeMask; + + typedef int RegMaskIndex; + + // allint is 0, allfloat is 1, all the single-bit masks start at 2 + enum KnownRegIndex + { + ALLINT_IDX = 0, + ALLFLOAT_IDX = 1, + FIRST_SINGLE_REG_IDX = 2 + }; + + RegMaskIndex GetIndexForRegMask(regMaskTP mask); + regMaskTP GetRegMaskForIndex(RegMaskIndex index); + void RemoveRegisterFromMasks(regNumber reg); + +#ifdef DEBUG + void dspRegisterMaskTable(); +#endif // DEBUG + + // Initialize the block traversal for LSRA. + // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array, + // which determines the order in which blocks will be allocated (currently called during Lowering). + BasicBlock* startBlockSequence(); + // Move to the next block in sequence, updating the current block information. + BasicBlock* moveToNextBlock(); + // Get the next block to be scheduled without changing the current block, + // but updating the blockSequence during the first iteration if it is not fully computed. + BasicBlock* getNextBlock(); + + // This is called during code generation to update the location of variables + virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb); + + // This does the dataflow analysis and builds the intervals + void buildIntervals(); + + // This is where the actual assignment is done + void allocateRegisters(); + + // This is the resolution phase, where cross-block mismatches are fixed up + void resolveRegisters(); + + void writeRegisters(RefPosition* currentRefPosition, GenTree* tree); + + // Insert a copy in the case where a tree node value must be moved to a different + // register at the point of use, or it is reloaded to a different register + // than the one it was spilled from + void insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition); + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + // Insert code to save and restore the upper half of a vector that lives + // in a callee-save register at the point of a call (the upper half is + // not preserved). + void insertUpperVectorSaveAndReload(GenTree* tree, RefPosition* refPosition, BasicBlock* block); +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + // resolve along one block-block edge + enum ResolveType + { + ResolveSplit, + ResolveJoin, + ResolveCritical, + ResolveSharedCritical, + ResolveTypeCount + }; +#ifdef DEBUG + static const char* resolveTypeName[ResolveTypeCount]; +#endif + + enum WhereToInsert + { + InsertAtTop, + InsertAtBottom + }; + +#ifdef _TARGET_ARM_ + void addResolutionForDouble(BasicBlock* block, + GenTreePtr insertionPoint, + Interval** sourceIntervals, + regNumberSmall* location, + regNumber toReg, + regNumber fromReg, + ResolveType resolveType); +#endif + void addResolution( + BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber outReg, regNumber inReg); + + void handleOutgoingCriticalEdges(BasicBlock* block); + + void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet); + + void resolveEdges(); + + // Finally, the register assignments are written back to the tree nodes. + void recordRegisterAssignments(); + + // Keep track of how many temp locations we'll need for spill + void initMaxSpill(); + void updateMaxSpill(RefPosition* refPosition); + void recordMaxSpill(); + + // max simultaneous spill locations used of every type + unsigned int maxSpill[TYP_COUNT]; + unsigned int currentSpill[TYP_COUNT]; + bool needFloatTmpForFPCall; + bool needDoubleTmpForFPCall; + +#ifdef DEBUG +private: + //------------------------------------------------------------------------ + // Should we stress lsra? + // This uses the same COMPLUS variable as rsStressRegs (COMPlus_JitStressRegs) + // However, the possible values and their interpretation are entirely different. + // + // The mask bits are currently divided into fields in which each non-zero value + // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2). + // However, subject to possible constraints (to be determined), the different + // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4). + // Note that the field values are declared in a public enum, but the actual bits are + // only accessed via accessors. + + unsigned lsraStressMask; + + // This controls the registers available for allocation + enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2, + LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3}; + + // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save + // registers, so as to get different coverage than limiting to callee or caller. + // At least for x86 and AMD64, and potentially other architecture that will support SIMD, + // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4. + // Hence the "SmallFPSet" has 5 elements. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(_TARGET_AMD64_) +#ifdef UNIX_AMD64_ABI + // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers. + static const regMaskTP LsraLimitSmallIntSet = + (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); +#else // !UNIX_AMD64_ABI + // On Windows Amd64 use the RDI and RSI as callee saved registers. + static const regMaskTP LsraLimitSmallIntSet = + (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); +#endif // !UNIX_AMD64_ABI + static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +#elif defined(_TARGET_ARM_) + static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4); + static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); +#elif defined(_TARGET_ARM64_) + static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); +#elif defined(_TARGET_X86_) + static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); + static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +#else +#error Unsupported or unset target architecture +#endif // target + + LsraStressLimitRegs getStressLimitRegs() + { + return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK); + } + + regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount); + regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask); + + // This controls the heuristics used to select registers + // These can be combined. + enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04, + LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c}; + LsraSelect getSelectionHeuristics() + { + return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK); + } + bool doReverseSelect() + { + return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0); + } + bool doReverseCallerCallee() + { + return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0); + } + bool doSelectNearest() + { + return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0); + } + + // This controls the order in which basic blocks are visited during allocation + enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40, + LSRA_TRAVERSE_RANDOM = 0x60, // NYI + LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60}; + LsraTraversalOrder getLsraTraversalOrder() + { + if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0) + { + return LSRA_TRAVERSE_DEFAULT; + } + return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK); + } + bool isTraversalLayoutOrder() + { + return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT; + } + bool isTraversalPredFirstOrder() + { + return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST; + } + + // This controls whether lifetimes should be extended to the entire method. + // Note that this has no effect under MinOpts + enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80}; + LsraExtendLifetimes getLsraExtendLifeTimes() + { + return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK); + } + bool extendLifetimes() + { + return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES; + } + + // This controls whether variables locations should be set to the previous block in layout order + // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED - + // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE). + enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100, + LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300}; + LsraBlockBoundaryLocations getLsraBlockBoundaryLocations() + { + return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK); + } + regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs); + + // This controls whether we always insert a GT_RELOAD instruction after a spill + // Note that this can be combined with LSRA_SPILL_ALWAYS (or not) + enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400}; + LsraReload getLsraReload() + { + return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK); + } + bool alwaysInsertReload() + { + return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD; + } + + // This controls whether we spill everywhere + enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800}; + LsraSpill getLsraSpill() + { + return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK); + } + bool spillAlways() + { + return getLsraSpill() == LSRA_SPILL_ALWAYS; + } + + // This controls whether RefPositions that lower/codegen indicated as reg optional be + // allocated a reg at all. + enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000, + LSRA_REG_OPTIONAL_MASK = 0x1000}; + + LsraRegOptionalControl getLsraRegOptionalControl() + { + return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK); + } + + bool regOptionalNoAlloc() + { + return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC; + } + + bool candidatesAreStressLimited() + { + return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0); + } + + // Dump support + void dumpOperandToLocationInfoMap(); + void lsraDumpIntervals(const char* msg); + void dumpRefPositions(const char* msg); + void dumpVarRefPositions(const char* msg); + + // Checking code + static bool IsLsraAdded(GenTree* node) + { + return ((node->gtDebugFlags & GTF_DEBUG_NODE_LSRA_ADDED) != 0); + } + static void SetLsraAdded(GenTree* node) + { + node->gtDebugFlags |= GTF_DEBUG_NODE_LSRA_ADDED; + } + static bool IsResolutionMove(GenTree* node); + static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node); + + void verifyFinalAllocation(); + void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation); +#else // !DEBUG + bool doSelectNearest() + { + return false; + } + bool extendLifetimes() + { + return false; + } + bool spillAlways() + { + return false; + } + // In a retail build we support only the default traversal order + bool isTraversalLayoutOrder() + { + return false; + } + bool isTraversalPredFirstOrder() + { + return true; + } + bool getLsraExtendLifeTimes() + { + return false; + } + static void SetLsraAdded(GenTree* node) + { + // do nothing; checked only under #DEBUG + } + bool candidatesAreStressLimited() + { + return false; + } +#endif // !DEBUG + +public: + // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates(). + bool isRegCandidate(LclVarDsc* varDsc); + + bool isContainableMemoryOp(GenTree* node); + +private: + // Determine which locals are candidates for allocation + void identifyCandidates(); + + // determine which locals are used in EH constructs we don't want to deal with + void identifyCandidatesExceptionDataflow(); + + void buildPhysRegRecords(); + +#ifdef DEBUG + void checkLastUses(BasicBlock* block); +#endif // DEBUG + + void setFrameType(); + + // Update allocations at start/end of block + void unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap); + void processBlockEndAllocation(BasicBlock* current); + + // Record variable locations at start/end of block + void processBlockStartLocations(BasicBlock* current, bool allocationPass); + void processBlockEndLocations(BasicBlock* current); + +#ifdef _TARGET_ARM_ + bool isSecondHalfReg(RegRecord* regRec, Interval* interval); + RegRecord* getSecondHalfRegRec(RegRecord* regRec); + RegRecord* findAnotherHalfRegRec(RegRecord* regRec); + bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight); + void unassignDoublePhysReg(RegRecord* doubleRegRecord); +#endif + void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType); + void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType); + bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval); + bool isAssignedToInterval(Interval* interval, RegRecord* regRec); + bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation); + bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight); + bool isRegInUse(RegRecord* regRec, RefPosition* refPosition); + + RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock); + + // insert refpositions representing prolog zero-inits which will be added later + void insertZeroInitRefPositions(); + + void AddMapping(GenTree* node, LsraLocation loc); + + // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions + void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse); + + void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); + + void buildRefPositionsForNode(GenTree* tree, + BasicBlock* block, + LocationInfoListNodePool& listNodePool, + LsraLocation loc); + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc); + void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors); +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // For AMD64 on SystemV machines. This method + // is called as replacement for raUpdateRegStateForArg + // that is used on Windows. On System V systems a struct can be passed + // partially using registers from the 2 register files. + void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc); +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + // Update reg state for an incoming register argument + void updateRegStateForArg(LclVarDsc* argDsc); + + inline bool isCandidateLocalRef(GenTree* tree) + { + if (tree->IsLocal()) + { + unsigned int lclNum = tree->gtLclVarCommon.gtLclNum; + assert(lclNum < compiler->lvaCount); + LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum; + + return isCandidateVar(varDsc); + } + return false; + } + + static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTree* tree, void* p); + + // Return the registers killed by the given tree node. + regMaskTP getKillSetForNode(GenTree* tree); + + // Given some tree node add refpositions for all the registers this node kills + bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc); + + regMaskTP allRegs(RegisterType rt); + regMaskTP allRegs(GenTree* tree); + regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree); + regMaskTP allSIMDRegs(); + regMaskTP internalFloatRegCandidates(); + + bool isMultiRegRelated(RefPosition* refPosition, LsraLocation location); + bool registerIsFree(regNumber regNum, RegisterType regType); + bool registerIsAvailable(RegRecord* physRegRecord, + LsraLocation currentLoc, + LsraLocation* nextRefLocationPtr, + RegisterType regType); + void freeRegister(RegRecord* physRegRecord); + void freeRegisters(regMaskTP regsToFree); + + var_types getDefType(GenTree* tree); + + RefPosition* defineNewInternalTemp(GenTree* tree, + RegisterType regType, + regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount)); + + int buildInternalRegisterDefsForNode(GenTree* tree, + TreeNodeInfo* info, + RefPosition* defs[] DEBUGARG(unsigned minRegCandidateCount)); + + void buildInternalRegisterUsesForNode(GenTree* tree, + TreeNodeInfo* info, + RefPosition* defs[], + int total DEBUGARG(unsigned minRegCandidateCount)); + + void resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition); + + void insertMove(BasicBlock* block, GenTree* insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg); + + void insertSwap( + BasicBlock* block, GenTree* insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2); + +public: + // TODO-Cleanup: unused? + class PhysRegIntervalIterator + { + public: + PhysRegIntervalIterator(LinearScan* theLinearScan) + { + nextRegNumber = (regNumber)0; + linearScan = theLinearScan; + } + RegRecord* GetNext() + { + return &linearScan->physRegs[nextRegNumber]; + } + + private: + // This assumes that the physical registers are contiguous, starting + // with a register number of 0 + regNumber nextRegNumber; + LinearScan* linearScan; + }; + +private: + Interval* newInterval(RegisterType regType); + + Interval* getIntervalForLocalVar(unsigned varIndex) + { + assert(varIndex < compiler->lvaTrackedCount); + assert(localVarIntervals[varIndex] != nullptr); + return localVarIntervals[varIndex]; + } + + Interval* getIntervalForLocalVarNode(GenTreeLclVarCommon* tree) + { + LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclNum]; + assert(varDsc->lvTracked); + return getIntervalForLocalVar(varDsc->lvVarIndex); + } + + RegRecord* getRegisterRecord(regNumber regNum); + + RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType); + + RefPosition* newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskTP mask, + unsigned multiRegIdx = 0 DEBUGARG(unsigned minRegCandidateCount = 1)); + + RefPosition* newRefPosition( + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask); + + void applyCalleeSaveHeuristics(RefPosition* rp); + + void associateRefPosWithInterval(RefPosition* rp); + + void associateRefPosWithRegister(RefPosition* rp); + + unsigned getWeight(RefPosition* refPos); + + /***************************************************************************** + * Register management + ****************************************************************************/ + RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition); + regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition); + regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable); + regNumber assignCopyReg(RefPosition* refPosition); + + bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); + bool isSpillCandidate(Interval* current, + RefPosition* refPosition, + RegRecord* physRegRecord, + LsraLocation& nextLocation); + void checkAndAssignInterval(RegRecord* regRec, Interval* interval); + void assignPhysReg(RegRecord* regRec, Interval* interval); + void assignPhysReg(regNumber reg, Interval* interval) + { + assignPhysReg(getRegisterRecord(reg), interval); + } + + bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType)); + bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType)); + void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition); + void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType)); + void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition); + void unassignPhysRegNoSpill(RegRecord* reg); + void unassignPhysReg(regNumber reg) + { + unassignPhysReg(getRegisterRecord(reg), nullptr); + } + + void setIntervalAsSpilled(Interval* interval); + void setIntervalAsSplit(Interval* interval); + void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition); + + void spillGCRefs(RefPosition* killRefPosition); + + /***************************************************************************** + * For Resolution phase + ****************************************************************************/ + // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling + unsigned int regMapCount; + + // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we + // rely on the property that the "in" map is the same as the "from" block of the edge, and the + // "out" map is the same as the "to" block of the edge (by construction). + // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the + // splitBBNumToTargetBBNumMap. + // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding + // the arrays. + + unsigned bbNumMaxBeforeResolution; + struct SplitEdgeInfo + { + unsigned fromBBNum; + unsigned toBBNum; + }; + typedef JitHashTable, SplitEdgeInfo> SplitBBNumToTargetBBNumMap; + SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap; + SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap() + { + if (splitBBNumToTargetBBNumMap == nullptr) + { + splitBBNumToTargetBBNumMap = + new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler)); + } + return splitBBNumToTargetBBNumMap; + } + SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum); + + void initVarRegMaps(); + void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg); + void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg); + VarToRegMap getInVarToRegMap(unsigned int bbNum); + VarToRegMap getOutVarToRegMap(unsigned int bbNum); + void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg); + regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex); + // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of + // the block) + VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap); + + regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type); + +#ifdef DEBUG + void dumpVarToRegMap(VarToRegMap map); + void dumpInVarToRegMap(BasicBlock* block); + void dumpOutVarToRegMap(BasicBlock* block); + + // There are three points at which a tuple-style dump is produced, and each + // differs slightly: + // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what + // tree nodes are consumed. + // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before + // register allocation, each node is dumped, along with all of the RefPositions, + // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other + // intervals, and Tnnn for internal temps. + // - In LSRA_DUMP_POST, which is after register allocation, the registers are + // shown. + + enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST}; + void lsraGetOperandString(GenTree* tree, LsraTupleDumpMode mode, char* operandString, unsigned operandStringLength); + void lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest); + void DumpOperandDefs( + GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength); + void TupleStyleDump(LsraTupleDumpMode mode); + + LsraLocation maxNodeLocation; + + // Width of various fields - used to create a streamlined dump during allocation that shows the + // state of all the registers in columns. + int regColumnWidth; + int regTableIndent; + + const char* columnSeparator; + const char* line; + const char* leftBox; + const char* middleBox; + const char* rightBox; + + static const int MAX_FORMAT_CHARS = 12; + char intervalNameFormat[MAX_FORMAT_CHARS]; + char regNameFormat[MAX_FORMAT_CHARS]; + char shortRefPositionFormat[MAX_FORMAT_CHARS]; + char emptyRefPositionFormat[MAX_FORMAT_CHARS]; + char indentFormat[MAX_FORMAT_CHARS]; + static const int MAX_LEGEND_FORMAT_CHARS = 25; + char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS]; + char legendFormat[MAX_LEGEND_FORMAT_CHARS]; + + // How many rows have we printed since last printing a "title row"? + static const int MAX_ROWS_BETWEEN_TITLES = 50; + int rowCountSinceLastTitle; + // Current mask of registers being printed in the dump. + regMaskTP lastDumpedRegisters; + regMaskTP registersToDump; + int lastUsedRegNumIndex; + bool shouldDumpReg(regNumber regNum) + { + return (registersToDump & genRegMask(regNum)) != 0; + } + + void dumpRegRecordHeader(); + void dumpRegRecordTitle(); + void dumpRegRecordTitleIfNeeded(); + void dumpRegRecordTitleLines(); + void dumpRegRecords(); + // An abbreviated RefPosition dump for printing with column-based register state + void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock); + // Print the number of spaces occupied by a dumpRefPositionShort() + void dumpEmptyRefPosition(); + // A dump of Referent, in exactly regColumnWidth characters + void dumpIntervalName(Interval* interval); + + // Events during the allocation phase that cause some dump output + enum LsraDumpEvent{ + // Conflicting def/use + LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2, + LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6, + + // Spilling + LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, + LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS, + + // Block boundaries + LSRA_EVENT_START_BB, LSRA_EVENT_END_BB, + + // Miscellaneous + LSRA_EVENT_FREE_REGS, + + // Characteristics of the current RefPosition + LSRA_EVENT_INCREMENT_RANGE_END, // ??? + LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG, + + // Allocation decisions + LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, + LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG, + LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, + LSRA_EVENT_REUSE_REG, + }; + void dumpLsraAllocationEvent(LsraDumpEvent event, + Interval* interval = nullptr, + regNumber reg = REG_NA, + BasicBlock* currentBlock = nullptr); + + void dumpBlockHeader(BasicBlock* block); + + void validateIntervals(); +#endif // DEBUG + +#if TRACK_LSRA_STATS + enum LsraStat{ + LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE, + }; + + unsigned regCandidateVarCount; + void updateLsraStat(LsraStat stat, unsigned currentBBNum); + + void dumpLsraStats(FILE* file); + +#define INTRACK_STATS(x) x +#else // !TRACK_LSRA_STATS +#define INTRACK_STATS(x) +#endif // !TRACK_LSRA_STATS + + Compiler* compiler; + +private: +#if MEASURE_MEM_ALLOC + CompAllocator* lsraAllocator; +#endif + + CompAllocator* getAllocator(Compiler* comp) + { +#if MEASURE_MEM_ALLOC + if (lsraAllocator == nullptr) + { + lsraAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA); + } + return lsraAllocator; +#else + return comp->getAllocator(); +#endif + } + +#ifdef DEBUG + // This is used for dumping + RefPosition* activeRefPosition; +#endif // DEBUG + + IntervalList intervals; + + RegRecord physRegs[REG_COUNT]; + + // Map from tracked variable index to Interval*. + Interval** localVarIntervals; + + // Set of blocks that have been visited. + BlockSet bbVisitedSet; + void markBlockVisited(BasicBlock* block) + { + BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum); + } + void clearVisitedBlocks() + { + BlockSetOps::ClearD(compiler, bbVisitedSet); + } + bool isBlockVisited(BasicBlock* block) + { + return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum); + } + +#if DOUBLE_ALIGN + bool doDoubleAlign; +#endif + + // A map from bbNum to the block information used during register allocation. + LsraBlockInfo* blockInfo; + BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated)); + + // The order in which the blocks will be allocated. + // This is any array of BasicBlock*, in the order in which they should be traversed. + BasicBlock** blockSequence; + // The verifiedAllBBs flag indicates whether we have verified that all BBs have been + // included in the blockSeuqence above, during setBlockSequence(). + bool verifiedAllBBs; + void setBlockSequence(); + int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights); + BasicBlockList* blockSequenceWorkList; + bool blockSequencingDone; + void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet); + void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode); + BasicBlock* getNextCandidateFromWorkList(); + + // The bbNum of the block being currently allocated or resolved. + unsigned int curBBNum; + // The current location + LsraLocation currentLoc; + // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated). + unsigned int curBBSeqNum; + // The number of blocks that we've sequenced. + unsigned int bbSeqCount; + // The Location of the start of the current block. + LsraLocation curBBStartLocation; + // True if the method contains any critical edges. + bool hasCriticalEdges; + + // True if there are any register candidate lclVars available for allocation. + bool enregisterLocalVars; + + virtual bool willEnregisterLocalVars() const + { + return enregisterLocalVars; + } + + // Ordered list of RefPositions + RefPositionList refPositions; + + // Per-block variable location mappings: an array indexed by block number that yields a + // pointer to an array of regNumber, one per variable. + VarToRegMap* inVarToRegMaps; + VarToRegMap* outVarToRegMaps; + + // A temporary VarToRegMap used during the resolution of critical edges. + VarToRegMap sharedCriticalVarToRegMap; + + PhasedVar availableIntRegs; + PhasedVar availableFloatRegs; + PhasedVar availableDoubleRegs; + + // The set of all register candidates. Note that this may be a subset of tracked vars. + VARSET_TP registerCandidateVars; + // Current set of live register candidate vars, used during building of RefPositions to determine + // whether to preference to callee-save. + VARSET_TP currentLiveVars; + // Set of variables that may require resolution across an edge. + // This is first constructed during interval building, to contain all the lclVars that are live at BB edges. + // Then, any lclVar that is always in the same register is removed from the set. + VARSET_TP resolutionCandidateVars; + // This set contains all the lclVars that are ever spilled or split. + VARSET_TP splitOrSpilledVars; + // Set of floating point variables to consider for callee-save registers. + VARSET_TP fpCalleeSaveCandidateVars; +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +#if defined(_TARGET_AMD64_) + static bool varTypeNeedsPartialCalleeSave(var_types type) + { + return (emitTypeSize(type) == 32); + } + static const var_types LargeVectorSaveType = TYP_SIMD16; +#elif defined(_TARGET_ARM64_) + static bool varTypeNeedsPartialCalleeSave(var_types type) + { + // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes + // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes. + return (emitTypeSize(type) == 16); + } + static const var_types LargeVectorSaveType = TYP_DOUBLE; +#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) +#error("Unknown target architecture for FEATURE_SIMD") +#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) + + // Set of large vector (TYP_SIMD32 on AVX) variables. + VARSET_TP largeVectorVars; + // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers. + VARSET_TP largeVectorCalleeSaveCandidateVars; +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + //----------------------------------------------------------------------- + // TreeNodeInfo methods + //----------------------------------------------------------------------- + + // The operandToLocationInfoMap is used for the transient TreeNodeInfo that is computed by + // the TreeNodeInfoInit methods, and used in building RefPositions. + typedef SmallHashTable OperandToLocationInfoMap; + OperandToLocationInfoMap* operandToLocationInfoMap; + // The useList is constructed for each node by the TreeNodeInfoInit methods. + // It contains the TreeNodeInfo for its operands, in their order of use. + LocationInfoList useList; + + // Get the LocationInfoListNode for the given node, and put it into the useList. + // The node must not be contained, and must have been processed by buildRefPositionsForNode(). + void appendLocationInfoToList(GenTree* node) + { + LocationInfoListNode* locationInfo; + bool found = operandToLocationInfoMap->TryRemove(node, &locationInfo); + assert(found); + useList.Append(locationInfo); + } + // Get the LocationInfoListNodes for the given node, and return it, but don't put it into the useList. + // The node must not be contained, and must have been processed by buildRefPositionsForNode(). + LocationInfoListNode* getLocationInfo(GenTree* node) + { + LocationInfoListNode* locationInfo; + bool found = operandToLocationInfoMap->TryRemove(node, &locationInfo); + assert(found); + return locationInfo; + } + //------------------------------------------------------------------------ + // appendBinaryLocationInfoToList: Get the LocationInfoListNodes for the operands of the + // given node, and put them into the useList. + // + // Arguments: + // node - a GenTreeOp + // + // Return Value: + // The number of actual register operands. + // + // Notes: + // The operands must already have been processed by buildRefPositionsForNode, and their + // LocationInfoListNodes placed in the operandToLocationInfoMap. + // + int appendBinaryLocationInfoToList(GenTreeOp* node) + { + bool found; + LocationInfoListNode* op1LocationInfo = nullptr; + LocationInfoListNode* op2LocationInfo = nullptr; + int srcCount = 0; + GenTree* op1 = node->gtOp1; + GenTree* op2 = node->gtGetOp2IfPresent(); + if (node->IsReverseOp() && op2 != nullptr) + { + srcCount += GetOperandInfo(op2); + op2 = nullptr; + } + if (op1 != nullptr) + { + srcCount += GetOperandInfo(op1); + } + if (op2 != nullptr) + { + srcCount += GetOperandInfo(op2); + } + return srcCount; + } + + // This is the main entry point for computing the TreeNodeInfo for a node. + void TreeNodeInfoInit(GenTree* stmt, TreeNodeInfo* info); + + void TreeNodeInfoInitCheckByteable(GenTree* tree, TreeNodeInfo* info); + + bool CheckAndSetDelayFree(GenTree* delayUseSrc); + + void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info); + int GetOperandInfo(GenTree* node); + int GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo); + int GetIndirInfo(GenTreeIndir* indirTree); + void HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs); + + void TreeNodeInfoInitStoreLoc(GenTree* tree, TreeNodeInfo* info); + void TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info); + // This method, unlike the others, returns the number of sources, since it may be called when + // 'tree' is contained. + int TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info); + void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info); + void TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info); + void TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info); + void TreeNodeInfoInitStructArg(GenTree* structArg, TreeNodeInfo* info); + void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info); + void TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info); + void TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info); + void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree, TreeNodeInfo* info); + void TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info); + void TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info); + void TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info); + +#ifdef _TARGET_X86_ + bool ExcludeNonByteableRegisters(GenTree* tree); +#endif + +#if defined(_TARGET_XARCH_) + // returns true if the tree can use the read-modify-write memory instruction form + bool isRMWRegOper(GenTree* tree); + void TreeNodeInfoInitMul(GenTree* tree, TreeNodeInfo* info); + void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0); +#endif // defined(_TARGET_XARCH_) + +#ifdef FEATURE_SIMD + void TreeNodeInfoInitSIMD(GenTreeSIMD* tree, TreeNodeInfo* info); +#endif // FEATURE_SIMD + +#if FEATURE_HW_INTRINSICS + void TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info); +#endif // FEATURE_HW_INTRINSICS + + void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeInfo* info); +#ifdef _TARGET_ARM_ + void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree, TreeNodeInfo* info); +#endif + void TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info); +}; + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Interval XX +XX XX +XX This is the fundamental data structure for linear scan register XX +XX allocation. It represents the live range(s) for a variable or temp. XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +class Interval : public Referenceable +{ +public: + Interval(RegisterType registerType, regMaskTP registerPreferences) + : registerPreferences(registerPreferences) + , relatedInterval(nullptr) + , assignedReg(nullptr) + , registerType(registerType) + , isLocalVar(false) + , isSplit(false) + , isSpilled(false) + , isInternal(false) + , isStructField(false) + , isPromotedStruct(false) + , hasConflictingDefUse(false) + , hasInterferingUses(false) + , isSpecialPutArg(false) + , preferCalleeSave(false) + , isConstant(false) + , isMultiReg(false) + , physReg(REG_COUNT) +#ifdef DEBUG + , intervalIndex(0) +#endif + , varNum(0) + { + } + +#ifdef DEBUG + // print out representation + void dump(); + // concise representation for embedding + void tinyDump(); + // extremely concise representation + void microDump(); +#endif // DEBUG + + void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l); + + // Fixed registers for which this Interval has a preference + regMaskTP registerPreferences; + + // The relatedInterval is: + // - for any other interval, it is the interval to which this interval + // is currently preferenced (e.g. because they are related by a copy) + Interval* relatedInterval; + + // The assignedReg is the RecRecord for the register to which this interval + // has been assigned at some point - if the interval is active, this is the + // register it currently occupies. + RegRecord* assignedReg; + + // DECIDE : put this in a union or do something w/ inheritance? + // this is an interval for a physical register, not a allocatable entity + + RegisterType registerType; + bool isLocalVar : 1; + // Indicates whether this interval has been assigned to different registers + bool isSplit : 1; + // Indicates whether this interval is ever spilled + bool isSpilled : 1; + // indicates an interval representing the internal requirements for + // generating code for a node (temp registers internal to the node) + // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND + // case (though never lives beyond a stmt) + bool isInternal : 1; + // true if this is a LocalVar for a struct field + bool isStructField : 1; + // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct + bool isPromotedStruct : 1; + // true if this is an SDSU interval for which the def and use have conflicting register + // requirements + bool hasConflictingDefUse : 1; + // true if this interval's defining node has "delayRegFree" uses, either due to it being an RMW instruction, + // OR because it requires an internal register that differs from the target. + bool hasInterferingUses : 1; + + // True if this interval is defined by a putArg, whose source is a non-last-use lclVar. + // During allocation, this flag will be cleared if the source is not already in the required register. + // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as + // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call. + bool isSpecialPutArg : 1; + + // True if this interval interferes with a call. + bool preferCalleeSave : 1; + + // True if this interval is defined by a constant node that may be reused and/or may be + // able to reuse a constant that's already in a register. + bool isConstant : 1; + + // True if this Interval is defined by a node that produces multiple registers. + bool isMultiReg : 1; + + // The register to which it is currently assigned. + regNumber physReg; + +#ifdef DEBUG + unsigned int intervalIndex; +#endif // DEBUG + + unsigned int varNum; // This is the "variable number": the index into the lvaTable array + + LclVarDsc* getLocalVar(Compiler* comp) + { + assert(isLocalVar); + return &(comp->lvaTable[this->varNum]); + } + + // Get the local tracked variable "index" (lvVarIndex), used in bitmasks. + unsigned getVarIndex(Compiler* comp) + { + LclVarDsc* varDsc = getLocalVar(comp); + assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function! + return varDsc->lvVarIndex; + } + + bool isAssignedTo(regNumber regNum) + { + // This uses regMasks to handle the case where a double actually occupies two registers + // TODO-Throughput: This could/should be done more cheaply. + return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE); + } + + // Assign the related interval. + void assignRelatedInterval(Interval* newRelatedInterval) + { +#ifdef DEBUG + if (VERBOSE) + { + printf("Assigning related "); + newRelatedInterval->microDump(); + printf(" to "); + this->microDump(); + printf("\n"); + } +#endif // DEBUG + relatedInterval = newRelatedInterval; + } + + // Assign the related interval, but only if it isn't already assigned. + void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval) + { + if (relatedInterval == nullptr) + { + assignRelatedInterval(newRelatedInterval); + } + else + { +#ifdef DEBUG + if (VERBOSE) + { + printf("Interval "); + this->microDump(); + printf(" already has a related interval\n"); + } +#endif // DEBUG + } + } + + // Update the registerPreferences on the interval. + // If there are conflicting requirements on this interval, set the preferences to + // the union of them. That way maybe we'll get at least one of them. + // An exception is made in the case where one of the existing or new + // preferences are all callee-save, in which case we "prefer" the callee-save + + void updateRegisterPreferences(regMaskTP preferences) + { + // We require registerPreferences to have been initialized. + assert(registerPreferences != RBM_NONE); + // It is invalid to update with empty preferences + assert(preferences != RBM_NONE); + + regMaskTP commonPreferences = (registerPreferences & preferences); + if (commonPreferences != RBM_NONE) + { + registerPreferences = commonPreferences; + return; + } + + // There are no preferences in common. + // Preferences need to reflect both cases where a var must occupy a specific register, + // as well as cases where a var is live when a register is killed. + // In the former case, we would like to record all such registers, however we don't + // really want to use any registers that will interfere. + // To approximate this, we never "or" together multi-reg sets, which are generally kill sets. + + if (!genMaxOneBit(preferences)) + { + // The new preference value is a multi-reg set, so it's probably a kill. + // Keep the new value. + registerPreferences = preferences; + return; + } + + if (!genMaxOneBit(registerPreferences)) + { + // The old preference value is a multi-reg set. + // Keep the existing preference set, as it probably reflects one or more kills. + // It may have been a union of multiple individual registers, but we can't + // distinguish that case without extra cost. + return; + } + + // If we reach here, we have two disjoint single-reg sets. + // Keep only the callee-save preferences, if not empty. + // Otherwise, take the union of the preferences. + + regMaskTP newPreferences = registerPreferences | preferences; + + if (preferCalleeSave) + { + regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences)); + if (calleeSaveMask != RBM_NONE) + { + newPreferences = calleeSaveMask; + } + } + registerPreferences = newPreferences; + } +}; + +class RefPosition +{ +public: + // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one + // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it + // refers to an Interval, then 'isPhysRegRef' is false. + // + // Q: can 'referent' be NULL? + + Referenceable* referent; + + // nextRefPosition is the next in code order. + // Note that in either case there is no need for these to be doubly linked, as they + // are only traversed in the forward direction, and are not moved. + RefPosition* nextRefPosition; + + // The remaining fields are common to both options + GenTree* treeNode; + unsigned int bbNum; + + // Prior to the allocation pass, registerAssignment captures the valid registers + // for this RefPosition. An empty set means that any register is valid. A non-empty + // set means that it must be one of the given registers (may be the full set if the + // only constraint is that it must reside in SOME register) + // After the allocation pass, this contains the actual assignment + LsraLocation nodeLocation; + regMaskTP registerAssignment; + + RefType refType; + + // NOTE: C++ only packs bitfields if the base type is the same. So make all the base + // NOTE: types of the logically "bool" types that follow 'unsigned char', so they match + // NOTE: RefType that precedes this, and multiRegIdx can also match. + + // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the + // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if + // no reg is allocated. + unsigned char allocRegIfProfitable : 1; + + // Used by RefTypeDef/Use positions of a multi-reg call node. + // Indicates the position of the register that this ref position refers to. + // The max bits needed is based on max value of MAX_RET_REG_COUNT value + // across all targets and that happens 4 on on Arm. Hence index value + // would be 0..MAX_RET_REG_COUNT-1. + unsigned char multiRegIdx : 2; + + // Last Use - this may be true for multiple RefPositions in the same Interval + unsigned char lastUse : 1; + + // Spill and Copy info + // reload indicates that the value was spilled, and must be reloaded here. + // spillAfter indicates that the value is spilled here, so a spill must be added. + // copyReg indicates that the value needs to be copied to a specific register, + // but that it will also retain its current assigned register. + // moveReg indicates that the value needs to be moved to a different register, + // and that this will be its new assigned register. + // A RefPosition may have any flag individually or the following combinations: + // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg + // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg) + // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled) + // - spillAfter and moveReg (i.e. it most be both spilled and moved) + // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going + // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to + // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting + // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then + // we need an explicit move. + // - copyReg and moveReg must not exist with each other. + + unsigned char reload : 1; + unsigned char spillAfter : 1; + unsigned char copyReg : 1; + unsigned char moveReg : 1; // true if this var is moved to a new register + + unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval + unsigned char isFixedRegRef : 1; + unsigned char isLocalDefUse : 1; + + // delayRegFree indicates that the register should not be freed right away, but instead wait + // until the next Location after it would normally be freed. This is used for the case of + // non-commutative binary operators, where op2 must not be assigned the same register as + // the target. We do this by not freeing it until after the target has been defined. + // Another option would be to actually change the Location of the op2 use until the same + // Location as the def, but then it could potentially reuse a register that has been freed + // from the other source(s), e.g. if it's a lastUse or spilled. + unsigned char delayRegFree : 1; + + // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the + // register currently assigned to the Interval. This happens when we use the assigned + // register from a predecessor that is not the most recently allocated BasicBlock. + unsigned char outOfOrder : 1; + +#ifdef DEBUG + // Minimum number registers that needs to be ensured while + // constraining candidates for this ref position under + // LSRA stress. + unsigned minRegCandidateCount; + + // The unique RefPosition number, equal to its index in the + // refPositions list. Only used for debugging dumps. + unsigned rpNum; +#endif // DEBUG + + RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType) + : referent(nullptr) + , nextRefPosition(nullptr) + , treeNode(treeNode) + , bbNum(bbNum) + , nodeLocation(nodeLocation) + , registerAssignment(RBM_NONE) + , refType(refType) + , multiRegIdx(0) + , lastUse(false) + , reload(false) + , spillAfter(false) + , copyReg(false) + , moveReg(false) + , isPhysRegRef(false) + , isFixedRegRef(false) + , isLocalDefUse(false) + , delayRegFree(false) + , outOfOrder(false) +#ifdef DEBUG + , minRegCandidateCount(1) + , rpNum(0) +#endif + { + } + + Interval* getInterval() + { + assert(!isPhysRegRef); + return (Interval*)referent; + } + void setInterval(Interval* i) + { + referent = i; + isPhysRegRef = false; + } + + RegRecord* getReg() + { + assert(isPhysRegRef); + return (RegRecord*)referent; + } + void setReg(RegRecord* r) + { + referent = r; + isPhysRegRef = true; + registerAssignment = genRegMask(r->regNum); + } + + regNumber assignedReg() + { + if (registerAssignment == RBM_NONE) + { + return REG_NA; + } + + return genRegNumFromMask(registerAssignment); + } + + // Returns true if it is a reference on a gentree node. + bool IsActualRef() + { + return (refType == RefTypeDef || refType == RefTypeUse); + } + + bool RequiresRegister() + { + return (IsActualRef() +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + ) && + !AllocateIfProfitable(); + } + + void setAllocateIfProfitable(bool val) + { + allocRegIfProfitable = val; + } + + // Returns true whether this ref position is to be allocated + // a reg only if it is profitable. + bool AllocateIfProfitable() + { + // TODO-CQ: Right now if a ref position is marked as + // copyreg or movereg, then it is not treated as + // 'allocate if profitable'. This is an implementation + // limitation that needs to be addressed. + return allocRegIfProfitable && !copyReg && !moveReg; + } + + void setMultiRegIdx(unsigned idx) + { + multiRegIdx = idx; + assert(multiRegIdx == idx); + } + + unsigned getMultiRegIdx() + { + return multiRegIdx; + } + + LsraLocation getRefEndLocation() + { + return delayRegFree ? nodeLocation + 1 : nodeLocation; + } + + bool isIntervalRef() + { + return (!isPhysRegRef && (referent != nullptr)); + } + + // isTrueDef indicates that the RefPosition is a non-update def of a non-internal + // interval + bool isTrueDef() + { + return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal); + } + + // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register + // specified by the given mask + bool isFixedRefOfRegMask(regMaskTP regMask) + { + assert(genMaxOneBit(regMask)); + return (registerAssignment == regMask); + } + + // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register + bool isFixedRefOfReg(regNumber regNum) + { + return (isFixedRefOfRegMask(genRegMask(regNum))); + } + +#ifdef DEBUG + // operator= copies everything except 'rpNum', which must remain unique + RefPosition& operator=(const RefPosition& rp) + { + unsigned rpNumSave = rpNum; + memcpy(this, &rp, sizeof(rp)); + rpNum = rpNumSave; + return *this; + } + + void dump(); +#endif // DEBUG +}; + +#ifdef DEBUG +void dumpRegMask(regMaskTP regs); +#endif // DEBUG + +/*****************************************************************************/ +#endif //_LSRA_H_ +/*****************************************************************************/ diff --git a/src/coreclr/src/jit/lsraarm.cpp b/src/coreclr/src/jit/lsraarm.cpp index 19d745fbfa31d65f5d4fb0e6fb27f7e4b03d2b76..eabb7cadf5e59542df705d96c1248f1478aed362 100644 --- a/src/coreclr/src/jit/lsraarm.cpp +++ b/src/coreclr/src/jit/lsraarm.cpp @@ -38,22 +38,25 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Return Value: // None. // -void LinearScan::TreeNodeInfoInitReturn(GenTree* tree) +void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - GenTree* op1 = tree->gtGetOp1(); + GenTree* op1 = tree->gtGetOp1(); assert(info->dstCount == 0); if (tree->TypeGet() == TYP_LONG) { assert((op1->OperGet() == GT_LONG) && op1->isContained()); - GenTree* loVal = op1->gtGetOp1(); - GenTree* hiVal = op1->gtGetOp2(); - info->srcCount = 2; - loVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_LO); - hiVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_HI); + GenTree* loVal = op1->gtGetOp1(); + GenTree* hiVal = op1->gtGetOp2(); + info->srcCount = 2; + LocationInfoListNode* loValInfo = getLocationInfo(loVal); + LocationInfoListNode* hiValInfo = getLocationInfo(hiVal); + loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO); + hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI); + useList.Append(loValInfo); + useList.Append(hiValInfo); } - else + else if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) { regMaskTP useCandidates = RBM_NONE; @@ -95,17 +98,17 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree) } } + LocationInfoListNode* locationInfo = getLocationInfo(op1); if (useCandidates != RBM_NONE) { - tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, useCandidates); + locationInfo->info.setSrcCandidates(this, useCandidates); } + useList.Append(locationInfo); } } -void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree) +void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - assert(info->dstCount == 1); // Need a variable number of temp regs (see genLclHeap() in codegenarm.cpp): @@ -175,6 +178,7 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree) // target (regCnt) + tmp + [psp] info->srcCount = 1; info->internalIntCount = hasPspSym ? 2 : 1; + appendLocationInfoToList(size); } // If we are needed in temporary registers we should be sure that @@ -201,11 +205,10 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree) // requirements needed by LSRA to build the Interval Table (source, // destination and internal [temp] register counts). // -void LinearScan::TreeNodeInfoInit(GenTree* tree) +void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) { - unsigned kind = tree->OperKind(); - TreeNodeInfo* info = &(tree->gtLsraInfo); - RegisterType registerType = TypeGet(tree); + unsigned kind = tree->OperKind(); + RegisterType registerType = TypeGet(tree); if (tree->isContained()) { @@ -235,7 +238,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: - TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon()); + TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info); break; case GT_NOP: @@ -260,6 +263,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) op1 = tree->gtOp.gtOp1; assert(varTypeIsFloating(op1)); assert(op1->TypeGet() == tree->TypeGet()); + appendLocationInfoToList(op1); switch (tree->gtIntrinsic.gtIntrinsicId) { @@ -277,7 +281,6 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_CAST: { - info->srcCount = 1; assert(info->dstCount == 1); // Non-overflow casts to/from float/double are done using SSE2 instructions @@ -287,6 +290,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) var_types castToType = tree->CastToType(); GenTreePtr castOp = tree->gtCast.CastOp(); var_types castOpType = castOp->TypeGet(); + info->srcCount = GetOperandInfo(castOp); if (tree->gtFlags & GTF_UNSIGNED) { castOpType = genUnsignedType(castOpType); @@ -371,8 +375,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_SWITCH_TABLE: - info->srcCount = 2; assert(info->dstCount == 0); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + assert(info->srcCount == 2); break; case GT_ASG: @@ -395,9 +400,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // everything is made explicit by adding casts. assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet()); - info->srcCount = 2; assert(info->dstCount == 1); - + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + assert(info->srcCount == 2); break; } @@ -406,8 +411,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_AND: case GT_OR: case GT_XOR: - info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; assert(info->dstCount == 1); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + assert(info->srcCount == (tree->gtOp.gtOp2->isContained() ? 1 : 2)); break; case GT_RETURNTRAP: @@ -415,6 +421,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // + a conditional call info->srcCount = 1; assert(info->dstCount == 0); + appendLocationInfoToList(tree->gtOp.gtOp1); break; case GT_MUL: @@ -430,14 +437,16 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_MULHI: case GT_UDIV: { - info->srcCount = 2; assert(info->dstCount == 1); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + assert(info->srcCount == 2); } break; case GT_MUL_LONG: - info->srcCount = 2; info->dstCount = 2; + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + assert(info->srcCount == 2); break; case GT_LIST: @@ -457,9 +466,11 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) tree->ClearUnusedValue(); info->isLocalDefUse = false; - // An unused GT_LONG node needs to consume its sources. + // An unused GT_LONG node needs to consume its sources, but need not produce a register. info->srcCount = 2; info->dstCount = 0; + appendLocationInfoToList(tree->gtGetOp1()); + appendLocationInfoToList(tree->gtGetOp2()); break; case GT_CNS_DBL: @@ -481,7 +492,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_RETURN: - TreeNodeInfoInitReturn(tree); + TreeNodeInfoInitReturn(tree, info); break; case GT_RETFILT: @@ -496,7 +507,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) info->srcCount = 1; info->setSrcCandidates(this, RBM_INTRET); - tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, RBM_INTRET); + LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1); + locationInfo->info.setSrcCandidates(this, RBM_INTRET); + useList.Append(locationInfo); } break; @@ -508,6 +521,8 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // Consumes arrLen & index - has no result info->srcCount = 2; assert(info->dstCount == 0); + appendLocationInfoToList(tree->AsBoundsChk()->gtIndex); + appendLocationInfoToList(tree->AsBoundsChk()->gtArrLen); } break; @@ -519,6 +534,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_ARR_INDEX: + { info->srcCount = 2; assert(info->dstCount == 1); info->internalIntCount = 1; @@ -526,11 +542,16 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. - tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true; - info->hasDelayFreeSrc = true; - break; + LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj()); + arrObjInfo->info.isDelayFree = true; + useList.Append(arrObjInfo); + useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr())); + info->hasDelayFreeSrc = true; + } + break; case GT_ARR_OFFSET: + // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. assert(info->dstCount == 1); @@ -545,7 +566,10 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // from any of the operand's registers, but may be the same as targetReg. info->internalIntCount = 1; info->srcCount = 3; + appendLocationInfoToList(tree->AsArrOffs()->gtOffset); } + appendLocationInfoToList(tree->AsArrOffs()->gtIndex); + appendLocationInfoToList(tree->AsArrOffs()->gtArrObj); break; case GT_LEA: @@ -555,15 +579,17 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // This LEA is instantiating an address, so we set up the srcCount and dstCount here. info->srcCount = 0; + assert(info->dstCount == 1); if (lea->HasBase()) { info->srcCount++; + appendLocationInfoToList(tree->AsAddrMode()->Base()); } if (lea->HasIndex()) { info->srcCount++; + appendLocationInfoToList(tree->AsAddrMode()->Index()); } - assert(info->dstCount == 1); // An internal register may be needed too; the logic here should be in sync with the // genLeaInstruction()'s requirements for a such register. @@ -589,11 +615,13 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_NEG: info->srcCount = 1; assert(info->dstCount == 1); + appendLocationInfoToList(tree->gtOp.gtOp1); break; case GT_NOT: info->srcCount = 1; assert(info->dstCount == 1); + appendLocationInfoToList(tree->gtOp.gtOp1); break; case GT_LSH: @@ -602,7 +630,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_ROR: case GT_LSH_HI: case GT_RSH_LO: - TreeNodeInfoInitShiftRotate(tree); + TreeNodeInfoInitShiftRotate(tree, info); break; case GT_EQ: @@ -612,17 +640,18 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_GE: case GT_GT: case GT_CMP: - TreeNodeInfoInitCmp(tree); + TreeNodeInfoInitCmp(tree, info); break; case GT_CKFINITE: info->srcCount = 1; assert(info->dstCount == 1); info->internalIntCount = 1; + appendLocationInfoToList(tree->gtOp.gtOp1); break; case GT_CALL: - TreeNodeInfoInitCall(tree->AsCall()); + TreeNodeInfoInitCall(tree->AsCall(), info); break; case GT_ADDR: @@ -639,7 +668,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: - TreeNodeInfoInitBlockStore(tree->AsBlk()); + TreeNodeInfoInitBlockStore(tree->AsBlk(), info); break; case GT_INIT_VAL: @@ -648,7 +677,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_LCLHEAP: - TreeNodeInfoInitLclHeap(tree); + TreeNodeInfoInitLclHeap(tree, info); break; case GT_STOREIND: @@ -659,14 +688,15 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { info->srcCount = 2; - TreeNodeInfoInitGCWriteBarrier(tree); + TreeNodeInfoInitGCWriteBarrier(tree, info); break; } - TreeNodeInfoInitIndir(tree->AsIndir()); + TreeNodeInfoInitIndir(tree->AsIndir(), info); // No contained source on ARM. assert(!src->isContained()); info->srcCount++; + appendLocationInfoToList(src); } break; @@ -676,12 +706,13 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) assert(!tree->gtGetOp1()->isContained()); info->srcCount = 1; info->internalIntCount = 1; + appendLocationInfoToList(tree->gtOp.gtOp1); break; case GT_IND: assert(info->dstCount == 1); info->srcCount = 1; - TreeNodeInfoInitIndir(tree->AsIndir()); + TreeNodeInfoInitIndir(tree->AsIndir(), info); break; case GT_CATCH_ARG: @@ -716,24 +747,28 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) { assert(info->dstCount == 1); } + appendLocationInfoToList(tree->gtOp.gtOp1); break; case GT_PUTARG_SPLIT: - TreeNodeInfoInitPutArgSplit(tree->AsPutArgSplit()); + TreeNodeInfoInitPutArgSplit(tree->AsPutArgSplit(), info); break; case GT_PUTARG_STK: - TreeNodeInfoInitPutArgStk(tree->AsPutArgStk()); + TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info); break; case GT_PUTARG_REG: - TreeNodeInfoInitPutArgReg(tree->AsUnOp()); + TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info); break; case GT_BITCAST: { info->srcCount = 1; assert(info->dstCount == 1); + LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1); + locationInfo->info.isTgtPref = true; + useList.Append(locationInfo); regNumber argReg = tree->gtRegNum; regMaskTP argMask = genRegMask(argReg); @@ -748,7 +783,6 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) info->setDstCandidates(this, argMask); info->setSrcCandidates(this, argMask); - tree->AsUnOp()->gtOp1->gtLsraInfo.isTgtPref = true; } break; @@ -782,14 +816,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) } else if (kind & (GTK_SMPOP)) { - if (tree->gtGetOp2IfPresent() != nullptr) - { - info->srcCount = 2; - } - else - { - info->srcCount = 1; - } + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); } else { @@ -798,9 +825,10 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_INDEX_ADDR: - info->srcCount = 2; info->dstCount = 1; info->internalIntCount = 1; + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + assert(info->srcCount == 2); break; } // end switch (tree->OperGet()) @@ -812,6 +840,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) assert((info->dstCount < 2) || tree->IsMultiRegNode()); assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); assert(!tree->IsUnusedValue() || (info->dstCount != 0)); + assert(info->dstCount == tree->GetRegisterDstCount()); } #endif // _TARGET_ARM_ diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 7c9b60174c8f692fbdd5832914eab8aaf401acc3..93b34d99b640eab77340fc6d58bcf22b1b9a3e0e 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -44,11 +44,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // requirements needed by LSRA to build the Interval Table (source, // destination and internal [temp] register counts). // -void LinearScan::TreeNodeInfoInit(GenTree* tree) +void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) { - unsigned kind = tree->OperKind(); - TreeNodeInfo* info = &(tree->gtLsraInfo); - RegisterType registerType = TypeGet(tree); + unsigned kind = tree->OperKind(); + RegisterType registerType = TypeGet(tree); if (tree->isContained()) { @@ -83,14 +82,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) } else if (kind & (GTK_SMPOP)) { - if (tree->gtGetOp2IfPresent() != nullptr) - { - info->srcCount = 2; - } - else - { - info->srcCount = 1; - } + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); } else { @@ -102,7 +94,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_STORE_LCL_VAR: info->srcCount = 1; assert(info->dstCount == 0); - TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon()); + TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info); break; case GT_LIST: @@ -144,7 +136,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_RETURN: - TreeNodeInfoInitReturn(tree); + TreeNodeInfoInitReturn(tree, info); break; case GT_RETFILT: @@ -161,7 +153,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) assert(info->dstCount == 0); info->setSrcCandidates(this, RBM_INTRET); - tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, RBM_INTRET); + LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1); + locationInfo->info.setSrcCandidates(this, RBM_INTRET); + useList.Append(locationInfo); } break; @@ -203,7 +197,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_SWITCH_TABLE: - info->srcCount = 2; + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); info->internalIntCount = 1; assert(info->dstCount == 0); break; @@ -223,8 +217,6 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // No implicit conversions at this stage as the expectation is that // everything is made explicit by adding casts. assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet()); - - info->srcCount = 2; } __fallthrough; @@ -232,13 +224,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_AND: case GT_OR: case GT_XOR: - info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); assert(info->dstCount == 1); break; case GT_RETURNTRAP: // this just turns into a compare of its child with an int // + a conditional call + appendLocationInfoToList(tree->gtGetOp1()); info->srcCount = 1; assert(info->dstCount == 0); break; @@ -262,7 +255,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_MULHI: case GT_UDIV: { - info->srcCount = 2; + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); assert(info->dstCount == 1); } break; @@ -280,6 +273,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) assert(varTypeIsFloating(op1)); assert(op1->TypeGet() == tree->TypeGet()); + appendLocationInfoToList(op1); info->srcCount = 1; assert(info->dstCount == 1); } @@ -287,7 +281,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) #ifdef FEATURE_SIMD case GT_SIMD: - TreeNodeInfoInitSIMD(tree->AsSIMD()); + TreeNodeInfoInitSIMD(tree->AsSIMD(), info); break; #endif // FEATURE_SIMD @@ -297,6 +291,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // register. // see CodeGen::genIntToIntCast() + appendLocationInfoToList(tree->gtGetOp1()); info->srcCount = 1; assert(info->dstCount == 1); @@ -339,11 +334,8 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_NEG: - info->srcCount = 1; - assert(info->dstCount == 1); - break; - case GT_NOT: + appendLocationInfoToList(tree->gtGetOp1()); info->srcCount = 1; assert(info->dstCount == 1); break; @@ -352,7 +344,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_RSH: case GT_RSZ: case GT_ROR: - TreeNodeInfoInitShiftRotate(tree); + TreeNodeInfoInitShiftRotate(tree, info); break; case GT_EQ: @@ -364,10 +356,11 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_TEST_EQ: case GT_TEST_NE: case GT_JCMP: - TreeNodeInfoInitCmp(tree); + TreeNodeInfoInitCmp(tree, info); break; case GT_CKFINITE: + appendLocationInfoToList(tree->gtOp.gtOp1); info->srcCount = 1; assert(info->dstCount == 1); info->internalIntCount = 1; @@ -383,11 +376,17 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // For ARMv8 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries - cmpXchgNode->gtOpLocation->gtLsraInfo.isDelayFree = true; - cmpXchgNode->gtOpValue->gtLsraInfo.isDelayFree = true; + LocationInfoListNode* locationInfo = getLocationInfo(tree->gtCmpXchg.gtOpLocation); + locationInfo->info.isDelayFree = true; + useList.Append(locationInfo); + LocationInfoListNode* valueInfo = getLocationInfo(tree->gtCmpXchg.gtOpValue); + valueInfo->info.isDelayFree = true; + useList.Append(valueInfo); if (!cmpXchgNode->gtOpComparand->isContained()) { - cmpXchgNode->gtOpComparand->gtLsraInfo.isDelayFree = true; + LocationInfoListNode* comparandInfo = getLocationInfo(tree->gtCmpXchg.gtOpComparand); + comparandInfo->info.isDelayFree = true; + useList.Append(comparandInfo); } info->hasDelayFreeSrc = true; @@ -399,33 +398,40 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_LOCKADD: case GT_XADD: case GT_XCHG: + { assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; info->internalIntCount = (tree->OperGet() == GT_XCHG) ? 1 : 2; // For ARMv8 exclusives the lifetime of the addr and data must be extended because // it may be used used multiple during retries - tree->gtOp.gtOp1->gtLsraInfo.isDelayFree = true; + assert(!tree->gtOp.gtOp1->isContained()); + LocationInfoListNode* op1Info = getLocationInfo(tree->gtOp.gtOp1); + op1Info->info.isDelayFree = true; + useList.Append(op1Info); if (!tree->gtOp.gtOp2->isContained()) { - tree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true; + LocationInfoListNode* op2Info = getLocationInfo(tree->gtOp.gtOp2); + op2Info->info.isDelayFree = true; + useList.Append(op2Info); } info->hasDelayFreeSrc = true; // Internals may not collide with target info->isInternalRegDelayFree = true; - break; + } + break; case GT_PUTARG_STK: - TreeNodeInfoInitPutArgStk(tree->AsPutArgStk()); + TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info); break; case GT_PUTARG_REG: - TreeNodeInfoInitPutArgReg(tree->AsUnOp()); + TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info); break; case GT_CALL: - TreeNodeInfoInitCall(tree->AsCall()); + TreeNodeInfoInitCall(tree->AsCall(), info); break; case GT_ADDR: @@ -449,7 +455,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: - TreeNodeInfoInitBlockStore(tree->AsBlk()); + TreeNodeInfoInitBlockStore(tree->AsBlk(), info); break; case GT_INIT_VAL: @@ -533,6 +539,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) } else { + appendLocationInfoToList(size); info->srcCount = 1; if (!compiler->info.compInitMem) { @@ -564,19 +571,12 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) { GenTreeBoundsChk* node = tree->AsBoundsChk(); // Consumes arrLen & index - has no result - info->srcCount = 2; assert(info->dstCount == 0); GenTree* intCns = nullptr; GenTree* other = nullptr; - if (node->gtIndex->isContained() || node->gtArrLen->isContained()) - { - info->srcCount = 1; - } - else - { - info->srcCount = 2; - } + info->srcCount = GetOperandInfo(tree->AsBoundsChk()->gtIndex); + info->srcCount += GetOperandInfo(tree->AsBoundsChk()->gtArrLen); } break; @@ -588,6 +588,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_ARR_INDEX: + { info->srcCount = 2; assert(info->dstCount == 1); info->internalIntCount = 1; @@ -595,14 +596,25 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. - tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true; - info->hasDelayFreeSrc = true; - break; + LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj()); + arrObjInfo->info.isDelayFree = true; + useList.Append(arrObjInfo); + useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr())); + info->hasDelayFreeSrc = true; + } + break; case GT_ARR_OFFSET: // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. - info->srcCount = tree->gtArrOffs.gtOffset->isContained() ? 2 : 3; + info->srcCount = 2; + if (!tree->gtArrOffs.gtOffset->isContained()) + { + appendLocationInfoToList(tree->AsArrOffs()->gtOffset); + info->srcCount++; + } + appendLocationInfoToList(tree->AsArrOffs()->gtIndex); + appendLocationInfoToList(tree->AsArrOffs()->gtArrObj); assert(info->dstCount == 1); info->internalIntCount = 1; break; @@ -620,10 +632,12 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) if (base != nullptr) { info->srcCount++; + appendLocationInfoToList(base); } if (index != nullptr) { info->srcCount++; + appendLocationInfoToList(index); } assert(info->dstCount == 1); @@ -649,13 +663,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { info->srcCount = 2; - TreeNodeInfoInitGCWriteBarrier(tree); + TreeNodeInfoInitGCWriteBarrier(tree, info); break; } - TreeNodeInfoInitIndir(tree->AsIndir()); + TreeNodeInfoInitIndir(tree->AsIndir(), info); if (!tree->gtGetOp2()->isContained()) { + appendLocationInfoToList(tree->gtGetOp2()); info->srcCount++; } } @@ -666,13 +681,13 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // is required, and it is not a localDefUse. assert(info->dstCount == 0); assert(!tree->gtGetOp1()->isContained()); + appendLocationInfoToList(tree->gtOp.gtOp1); info->srcCount = 1; break; case GT_IND: assert(info->dstCount == 1); - info->srcCount = 1; - TreeNodeInfoInitIndir(tree->AsIndir()); + TreeNodeInfoInitIndir(tree->AsIndir(), info); break; case GT_CATCH_ARG: @@ -694,8 +709,8 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_INDEX_ADDR: - info->srcCount = 2; - info->dstCount = 1; + assert(info->dstCount == 1); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); info->internalIntCount = 1; break; } // end switch (tree->OperGet()) @@ -708,6 +723,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) assert((info->dstCount < 2) || tree->IsMultiRegCall()); assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); assert(!tree->IsUnusedValue() || (info->dstCount != 0)); + assert(info->dstCount == tree->GetRegisterDstCount()); } //------------------------------------------------------------------------ @@ -719,54 +735,57 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitReturn(GenTree* tree) +void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - GenTree* op1 = tree->gtGetOp1(); regMaskTP useCandidates = RBM_NONE; info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1; assert(info->dstCount == 0); - if (varTypeIsStruct(tree)) + if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) { - // op1 has to be either an lclvar or a multi-reg returning call - if (op1->OperGet() != GT_LCL_VAR) + if (varTypeIsStruct(tree)) { - noway_assert(op1->IsMultiRegCall()); + // op1 has to be either an lclvar or a multi-reg returning call + if (op1->OperGet() != GT_LCL_VAR) + { + noway_assert(op1->IsMultiRegCall()); - ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); - useCandidates = retTypeDesc->GetABIReturnRegs(); + ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); + info->srcCount = retTypeDesc->GetReturnRegCount(); + useCandidates = retTypeDesc->GetABIReturnRegs(); + } } - } - else - { - // Non-struct type return - determine useCandidates - switch (tree->TypeGet()) + else { - case TYP_VOID: - useCandidates = RBM_NONE; - break; - case TYP_FLOAT: - useCandidates = RBM_FLOATRET; - break; - case TYP_DOUBLE: - useCandidates = RBM_DOUBLERET; - break; - case TYP_LONG: - useCandidates = RBM_LNGRET; - break; - default: - useCandidates = RBM_INTRET; - break; + // Non-struct type return - determine useCandidates + switch (tree->TypeGet()) + { + case TYP_VOID: + useCandidates = RBM_NONE; + break; + case TYP_FLOAT: + useCandidates = RBM_FLOATRET; + break; + case TYP_DOUBLE: + useCandidates = RBM_DOUBLERET; + break; + case TYP_LONG: + useCandidates = RBM_LNGRET; + break; + default: + useCandidates = RBM_INTRET; + break; + } } - } - if (useCandidates != RBM_NONE) - { - tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, useCandidates); + LocationInfoListNode* locationInfo = getLocationInfo(op1); + if (useCandidates != RBM_NONE) + { + locationInfo->info.setSrcCandidates(this, useCandidates); + } + useList.Append(locationInfo); } } @@ -780,10 +799,8 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree) // Return Value: // None. -void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) +void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(simdTree->gtLsraInfo); - // Only SIMDIntrinsicInit can be contained if (simdTree->isContained()) { @@ -791,13 +808,21 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) } assert(info->dstCount == 1); - switch (simdTree->gtSIMDIntrinsicID) + GenTree* op1 = simdTree->gtOp.gtOp1; + GenTree* op2 = simdTree->gtOp.gtOp2; + if (!op1->OperIs(GT_LIST)) { - GenTree* op1; - GenTree* op2; + info->srcCount += GetOperandInfo(op1); + } + if ((op2 != nullptr) && !op2->isContained()) + { + info->srcCount += GetOperandInfo(op2); + } + switch (simdTree->gtSIMDIntrinsicID) + { case SIMDIntrinsicInit: - info->srcCount = simdTree->gtGetOp1()->isContained() ? 0 : 1; + assert(info->srcCount == (simdTree->gtGetOp1()->isContained() ? 0 : 1)); break; case SIMDIntrinsicCast: @@ -811,26 +836,15 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicConvertToUInt64: case SIMDIntrinsicWidenLo: case SIMDIntrinsicWidenHi: - info->srcCount = 1; + assert(info->srcCount == 1); break; case SIMDIntrinsicGetItem: + { op1 = simdTree->gtGetOp1(); op2 = simdTree->gtGetOp2(); - // We have an object and an item, which may be contained. - info->srcCount = (op2->isContained() ? 1 : 2); - - if (op1->isContained()) - { - // Although GT_IND of TYP_SIMD12 reserves an internal register for reading 4 and 8 bytes from memory - // and assembling them into target reg, it is not required in this case. - op1->gtLsraInfo.internalIntCount = 0; - op1->gtLsraInfo.internalFloatCount = 0; - info->srcCount -= 1; - info->srcCount += GetOperandSourceCount(op1); - } - + // We have an object and an index, either of which may be contained. if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal())) { // If the index is not a constant and not contained or is a local @@ -838,8 +852,10 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) info->internalIntCount = 1; // internal register must not clobber input index - op2->gtLsraInfo.isDelayFree = true; - info->hasDelayFreeSrc = true; + LocationInfoListNode* op2Info = + (op1->isContained()) ? useList.Begin() : useList.GetSecond(INDEBUG(op2)); + op2Info->info.isDelayFree = true; + info->hasDelayFreeSrc = true; } if (!op2->IsCnsIntOrI() && (!op1->isContained())) @@ -848,7 +864,8 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // we will use the SIMD temp location to store the vector. compiler->getSIMDInitTempVarNum(); } - break; + } + break; case SIMDIntrinsicAdd: case SIMDIntrinsicSub: @@ -865,7 +882,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicGreaterThan: case SIMDIntrinsicLessThanOrEqual: case SIMDIntrinsicGreaterThanOrEqual: - info->srcCount = 2; + assert(info->srcCount == 2); break; case SIMDIntrinsicSetX: @@ -873,16 +890,28 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicSetZ: case SIMDIntrinsicSetW: case SIMDIntrinsicNarrow: - info->srcCount = 2; + assert(info->srcCount == 2); // Op1 will write to dst before Op2 is free - simdTree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true; - info->hasDelayFreeSrc = true; + useList.GetSecond(INDEBUG(simdTree->gtGetOp2()))->info.isDelayFree = true; + info->hasDelayFreeSrc = true; break; case SIMDIntrinsicInitN: { - info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType)); + var_types baseType = simdTree->gtSIMDBaseType; + info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType)); + int initCount = 0; + for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2()) + { + assert(list->OperGet() == GT_LIST); + GenTree* listItem = list->gtGetOp1(); + assert(listItem->TypeGet() == baseType); + assert(!listItem->isContained()); + appendLocationInfoToList(listItem); + initCount++; + } + assert(initCount == info->srcCount); if (varTypeIsFloating(simdTree->gtSIMDBaseType)) { @@ -895,28 +924,30 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicInitArray: // We have an array and an index, which may be contained. - info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2; + assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2)); break; case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: - info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2; + assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2)); info->setInternalCandidates(this, RBM_ALLFLOAT); info->internalFloatCount = 1; break; case SIMDIntrinsicDotProduct: - info->srcCount = 2; + assert(info->srcCount == 2); info->setInternalCandidates(this, RBM_ALLFLOAT); info->internalFloatCount = 1; break; case SIMDIntrinsicSelect: // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB - // bsl target register must be VC. Reserve a temp in case we need to shuffle things + // bsl target register must be VC. Reserve a temp in case we need to shuffle things. + // This will require a different approach, as GenTreeSIMD has only two operands. + assert(!"SIMDIntrinsicSelect not yet supported"); + assert(info->srcCount == 3); info->setInternalCandidates(this, RBM_ALLFLOAT); info->internalFloatCount = 1; - info->srcCount = 3; break; case SIMDIntrinsicInitArrayX: diff --git a/src/coreclr/src/jit/lsraarmarch.cpp b/src/coreclr/src/jit/lsraarmarch.cpp index aad62fe406142e662c70f03100aeb94cbc421403..e0bbbb1bce04279f9c9edb1f612b27a18661c9dd 100644 --- a/src/coreclr/src/jit/lsraarmarch.cpp +++ b/src/coreclr/src/jit/lsraarmarch.cpp @@ -39,25 +39,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // - Setting the appropriate candidates for a store of a multi-reg call return value. // - Handling of contained immediates. // -void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) +void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNodeInfo* info) { - TreeNodeInfo* info = &(storeLoc->gtLsraInfo); - GenTree* op1 = storeLoc->gtGetOp1(); + GenTree* op1 = storeLoc->gtGetOp1(); assert(info->dstCount == 0); -#ifdef _TARGET_ARM_ - if (varTypeIsLong(op1)) - { - info->srcCount = 2; - assert(!op1->OperIs(GT_LONG) || op1->isContained()); - } - else -#endif // _TARGET_ARM_ - if (op1->isContained()) - { - info->srcCount = 0; - } - else if (op1->IsMultiRegCall()) + if (op1->IsMultiRegCall()) { // This is the case of var = call where call is returning // a value in multiple return registers. @@ -67,15 +54,35 @@ void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) // srcCount = number of registers in which the value is returned by call GenTreeCall* call = op1->AsCall(); ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + info->srcCount = regCount; // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 - regMaskTP srcCandidates = allMultiRegCallNodeRegs(call); - op1->gtLsraInfo.setSrcCandidates(this, srcCandidates); + regMaskTP srcCandidates = allMultiRegCallNodeRegs(call); + LocationInfoListNode* locInfo = getLocationInfo(op1); + locInfo->info.setSrcCandidates(this, srcCandidates); + useList.Append(locInfo); + } +#ifdef _TARGET_ARM_ + else if (varTypeIsLong(op1)) + { + // The only possible operands for a GT_STORE_LCL_VAR are a multireg call node, which we have + // handled above, or a GT_LONG node. + assert(!op1->OperIs(GT_LONG) || op1->isContained()); + info->srcCount = 2; + // TODO: Currently, GetOperandInfo always returns 1 for any non-contained node. + // Consider enhancing it to handle multi-reg nodes. + (void)GetOperandInfo(op1); + } +#endif // _TARGET_ARM_ + else if (op1->isContained()) + { + info->srcCount = 0; } else { info->srcCount = 1; + appendLocationInfoToList(op1); } #ifdef FEATURE_SIMD @@ -99,39 +106,30 @@ void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree) +void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID)); info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; } -void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) +void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info) { - GenTreePtr dst = tree; - GenTreePtr addr = tree->gtOp.gtOp1; - GenTreePtr src = tree->gtOp.gtOp2; - - if (addr->OperGet() == GT_LEA) - { - // In the case where we are doing a helper assignment, if the dst - // is an indir through an lea, we need to actually instantiate the - // lea in a register - GenTreeAddrMode* lea = addr->AsAddrMode(); - - short leaSrcCount = 0; - if (lea->Base() != nullptr) - { - leaSrcCount++; - } - if (lea->Index() != nullptr) - { - leaSrcCount++; - } - lea->gtLsraInfo.srcCount = leaSrcCount; - lea->gtLsraInfo.dstCount = 1; - } + GenTreePtr dst = tree; + GenTreePtr addr = tree->gtOp.gtOp1; + GenTreePtr src = tree->gtOp.gtOp2; + LocationInfoListNode* addrInfo = getLocationInfo(addr); + LocationInfoListNode* srcInfo = getLocationInfo(src); + + // In the case where we are doing a helper assignment, even if the dst + // is an indir through an lea, we need to actually instantiate the + // lea in a register + assert(!addr->isContained() && !src->isContained()); + useList.Append(addrInfo); + useList.Append(srcInfo); + info->srcCount = 2; + assert(info->dstCount == 0); #if NOGC_WRITE_BARRIERS NYI_ARM("NOGC_WRITE_BARRIERS"); @@ -141,21 +139,21 @@ void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) // the 'src' goes into x15 (REG_WRITE_BARRIER) // - addr->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF); - src->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER); + addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF); + srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); #else // For the standard JIT Helper calls // op1 goes into REG_ARG_0 and // op2 goes into REG_ARG_1 // - addr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0); - src->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1); + addrInfo->info.setSrcCandidates(this, RBM_ARG_0); + srcInfo->info.setSrcCandidates(this, RBM_ARG_1); #endif // NOGC_WRITE_BARRIERS // Both src and dst must reside in a register, which they should since we haven't set // either of them as contained. - assert(addr->gtLsraInfo.dstCount == 1); - assert(src->gtLsraInfo.dstCount == 1); + assert(addrInfo->info.dstCount == 1); + assert(srcInfo->info.dstCount == 1); } //------------------------------------------------------------------------ @@ -165,7 +163,7 @@ void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) // Arguments: // indirTree - GT_IND, GT_STOREIND or block gentree node // -void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) +void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info) { // If this is the rhs of a block copy (i.e. non-enregisterable struct), // it has no register requirements. @@ -174,9 +172,8 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) return; } - TreeNodeInfo* info = &(indirTree->gtLsraInfo); - bool isStore = (indirTree->gtOper == GT_STOREIND); - info->srcCount = GetIndirSourceCount(indirTree); + bool isStore = (indirTree->gtOper == GT_STOREIND); + info->srcCount = GetIndirInfo(indirTree); GenTree* addr = indirTree->Addr(); GenTree* index = nullptr; @@ -250,40 +247,47 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree) +int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - + GenTreePtr source = tree->gtOp.gtOp1; GenTreePtr shiftBy = tree->gtOp.gtOp2; - info->srcCount = shiftBy->isContained() ? 1 : 2; - info->dstCount = 1; + assert(info->dstCount == 1); + if (!shiftBy->isContained()) + { + appendLocationInfoToList(shiftBy); + info->srcCount = 1; + } #ifdef _TARGET_ARM_ // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that // we can have a three operand form. Increment the srcCount. - GenTreePtr source = tree->gtOp.gtOp1; if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) { assert((source->OperGet() == GT_LONG) && source->isContained()); - info->srcCount++; + info->srcCount += 2; + LocationInfoListNode* sourceLoInfo = getLocationInfo(source->gtOp.gtOp1); + useList.Append(sourceLoInfo); + LocationInfoListNode* sourceHiInfo = getLocationInfo(source->gtOp.gtOp2); + useList.Append(sourceHiInfo); if (tree->OperGet() == GT_LSH_HI) { - GenTreePtr sourceLo = source->gtOp.gtOp1; - sourceLo->gtLsraInfo.isDelayFree = true; + sourceLoInfo->info.isDelayFree = true; } else { - GenTreePtr sourceHi = source->gtOp.gtOp2; - sourceHi->gtLsraInfo.isDelayFree = true; + sourceHiInfo->info.isDelayFree = true; } - - source->gtLsraInfo.hasDelayFreeSrc = true; - info->hasDelayFreeSrc = true; + info->hasDelayFreeSrc = true; } - + else #endif // _TARGET_ARM_ + { + appendLocationInfoToList(source); + info->srcCount++; + } + return info->srcCount; } //------------------------------------------------------------------------ @@ -299,12 +303,12 @@ void LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node) +void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info) { assert(node != nullptr); assert(node->OperIsPutArgReg()); - node->gtLsraInfo.srcCount = 1; - regNumber argReg = node->gtRegNum; + info->srcCount = 1; + regNumber argReg = node->gtRegNum; assert(argReg != REG_NA); // Set the register requirements for the node. @@ -315,19 +319,21 @@ void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node) // The actual `long` types must have been transformed as a field list with two fields. if (node->TypeGet() == TYP_LONG) { - node->gtLsraInfo.srcCount++; - node->gtLsraInfo.dstCount = node->gtLsraInfo.srcCount; + info->srcCount++; + info->dstCount = info->srcCount; assert(genRegArgNext(argReg) == REG_NEXT(argReg)); argMask |= genRegMask(REG_NEXT(argReg)); } #endif // _TARGET_ARM_ - - node->gtLsraInfo.setDstCandidates(this, argMask); - node->gtLsraInfo.setSrcCandidates(this, argMask); + info->setDstCandidates(this, argMask); + info->setSrcCandidates(this, argMask); // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. - node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, getUseCandidates(node)); + LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1); + op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this)); + op1Info->info.isDelayFree = true; + useList.Append(op1Info); } //------------------------------------------------------------------------ @@ -346,7 +352,7 @@ void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node) // Since the integer register is not associated with the arg node, we will reserve it as // an internal register on the call so that it is not used during the evaluation of the call node // (e.g. for the target). -void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs) +void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs) { #if FEATURE_VARARG if (call->IsVarargs() && varTypeIsFloating(argNode)) @@ -355,8 +361,8 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* c regNumber argReg = argNode->gtRegNum; regNumber targetReg = compiler->getCallArgIntRegister(argReg); - call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1); - call->gtLsraInfo.addInternalCandidates(this, genRegMask(targetReg)); + info->setInternalIntCount(info->internalIntCount + 1); + info->addInternalCandidates(this, genRegMask(targetReg)); } #endif // FEATURE_VARARG } @@ -370,9 +376,8 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* c // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) +void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) { - TreeNodeInfo* info = &(call->gtLsraInfo); bool hasMultiRegRetVal = false; ReturnTypeDesc* retTypeDesc = nullptr; @@ -396,7 +401,8 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) info->dstCount = 0; } - GenTree* ctrlExpr = call->gtControlExpr; + GenTree* ctrlExpr = call->gtControlExpr; + LocationInfoListNode* ctrlExprInfo = nullptr; if (call->gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. @@ -409,18 +415,18 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) // set reg requirements on call target represented as control sequence. if (ctrlExpr != nullptr) { + ctrlExprInfo = getLocationInfo(ctrlExpr); + // we should never see a gtControlExpr whose type is void. assert(ctrlExpr->TypeGet() != TYP_VOID); - info->srcCount++; - // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into a register. if (call->IsFastTailCall()) { // Fast tail call - make sure that call target is always computed in R12(ARM32)/IP0(ARM64) // so that epilog sequence can generate "br xip0/r12" to achieve fast tail call. - ctrlExpr->gtLsraInfo.setSrcCandidates(this, RBM_FASTTAILCALL_TARGET); + ctrlExprInfo->info.setSrcCandidates(this, RBM_FASTTAILCALL_TARGET); } } #ifdef _TARGET_ARM_ @@ -492,8 +498,8 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) // have been decomposed. if (putArgChild->TypeGet() == TYP_LONG) { - argNode->gtLsraInfo.srcCount = 2; - expectedSlots = 2; + useList.GetTreeNodeInfo(argNode).srcCount = 2; + expectedSlots = 2; } else if (putArgChild->TypeGet() == TYP_DOUBLE) { @@ -515,6 +521,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) { info->srcCount++; + appendLocationInfoToList(entry->Current()); #ifdef DEBUG assert(entry->Current()->OperIs(GT_PUTARG_REG)); assert(entry->Current()->gtRegNum == argReg); @@ -534,25 +541,30 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) #ifdef _TARGET_ARM_ else if (argNode->OperGet() == GT_PUTARG_SPLIT) { - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); - info->srcCount += argNode->AsPutArgSplit()->gtNumRegs; + unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs; + assert(regCount == curArgTabEntry->numRegs); + info->srcCount += regCount; + appendLocationInfoToList(argNode); } #endif else { assert(argNode->OperIs(GT_PUTARG_REG)); assert(argNode->gtRegNum == argReg); - HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); - info->srcCount++; - + HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs); #ifdef _TARGET_ARM_ - // The `double` types have been transformed to `long` on arm, + // The `double` types have been transformed to `long` on armel, // while the actual long types have been decomposed. if (argNode->TypeGet() == TYP_LONG) { - info->srcCount++; + info->srcCount += appendBinaryLocationInfoToList(argNode->AsOp()); } + else #endif // _TARGET_ARM_ + { + appendLocationInfoToList(argNode); + info->srcCount++; + } } } @@ -574,22 +586,19 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) #ifdef DEBUG fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); assert(curArgTabEntry); +#endif +#ifdef _TARGET_ARM_ + // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they + // define registers used by the call. + assert(arg->OperGet() != GT_PUTARG_SPLIT); #endif if (arg->gtOper == GT_PUTARG_STK) { assert(curArgTabEntry->regNum == REG_STK); } -#ifdef _TARGET_ARM_ - else if (arg->OperGet() == GT_PUTARG_SPLIT) - { - assert(arg->AsPutArgSplit()->gtNumRegs == curArgTabEntry->numRegs); - info->srcCount += arg->gtLsraInfo.dstCount; - } -#endif else { - TreeNodeInfo* argInfo = &(arg->gtLsraInfo); - assert((argInfo->dstCount == 0) || (argInfo->isLocalDefUse)); + assert(!arg->IsValue() || arg->IsUnusedValue()); } } args = args->gtOp.gtOp2; @@ -597,14 +606,20 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) // If it is a fast tail call, it is already preferenced to use IP0. // Therefore, no need set src candidates on call tgt again. - if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) + if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExprInfo != nullptr)) { NYI_ARM("float reg varargs"); // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by Arm64 ABI. - ctrlExpr->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS)); + ctrlExprInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS)); + } + + if (ctrlExprInfo != nullptr) + { + useList.Append(ctrlExprInfo); + info->srcCount++; } #ifdef _TARGET_ARM_ @@ -629,14 +644,14 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) // Notes: // Set the child node(s) to be contained when we have a multireg arg // -void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode) +void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeInfo* info) { assert(argNode->gtOper == GT_PUTARG_STK); GenTreePtr putArgChild = argNode->gtOp.gtOp1; - argNode->gtLsraInfo.srcCount = 0; - argNode->gtLsraInfo.dstCount = 0; + info->srcCount = 0; + info->dstCount = 0; // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) @@ -649,54 +664,51 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode) // We consume all of the items in the GT_FIELD_LIST for (GenTreeFieldList* current = putArgChild->AsFieldList(); current != nullptr; current = current->Rest()) { - argNode->gtLsraInfo.srcCount++; + appendLocationInfoToList(current->Current()); + info->srcCount++; } } else { #ifdef _TARGET_ARM64_ // We could use a ldp/stp sequence so we need two internal registers - argNode->gtLsraInfo.internalIntCount = 2; + info->internalIntCount = 2; #else // _TARGET_ARM_ // We could use a ldr/str sequence so we need a internal register - argNode->gtLsraInfo.internalIntCount = 1; + info->internalIntCount = 1; #endif // _TARGET_ARM_ if (putArgChild->OperGet() == GT_OBJ) { + assert(putArgChild->isContained()); GenTreePtr objChild = putArgChild->gtOp.gtOp1; if (objChild->OperGet() == GT_LCL_VAR_ADDR) { // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR - // as one contained operation + // as one contained operation, and there are no source registers. // assert(objChild->isContained()); } + else + { + // We will generate all of the code for the GT_PUTARG_STK and its child node + // as one contained operation + // + appendLocationInfoToList(objChild); + info->srcCount = 1; + } + } + else + { + // No source registers. + putArgChild->OperIs(GT_LCL_VAR); } - - // We will generate all of the code for the GT_PUTARG_STK and its child node - // as one contained operation - // - argNode->gtLsraInfo.srcCount = putArgChild->gtLsraInfo.srcCount; - assert(putArgChild->isContained()); } } else { assert(!putArgChild->isContained()); -#if defined(_TARGET_ARM_) - // The `double` types have been transformed to `long` on armel, - // while the actual long types have been decomposed. - const bool isDouble = (putArgChild->TypeGet() == TYP_LONG); - if (isDouble) - { - argNode->gtLsraInfo.srcCount = 2; - } - else -#endif // defined(_TARGET_ARM_) - { - argNode->gtLsraInfo.srcCount = 1; - } + info->srcCount = GetOperandInfo(putArgChild); } } @@ -713,14 +725,14 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode) // Notes: // Set the child node(s) to be contained // -void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode) +void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo* info) { assert(argNode->gtOper == GT_PUTARG_SPLIT); GenTreePtr putArgChild = argNode->gtOp.gtOp1; // Registers for split argument corresponds to source - argNode->gtLsraInfo.dstCount = argNode->gtNumRegs; + info->dstCount = argNode->gtNumRegs; regNumber argReg = argNode->gtRegNum; regMaskTP argMask = RBM_NONE; @@ -728,8 +740,8 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode) { argMask |= genRegMask((regNumber)((unsigned)argReg + i)); } - argNode->gtLsraInfo.setDstCandidates(this, argMask); - argNode->gtLsraInfo.setSrcCandidates(this, argMask); + info->setDstCandidates(this, argMask); + info->setSrcCandidates(this, argMask); if (putArgChild->OperGet() == GT_FIELD_LIST) { @@ -747,19 +759,21 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode) { GenTreePtr node = fieldListPtr->gtGetOp1(); assert(!node->isContained()); - unsigned currentRegCount = node->gtLsraInfo.dstCount; - regMaskTP sourceMask = RBM_NONE; + LocationInfoListNode* nodeInfo = getLocationInfo(node); + unsigned currentRegCount = nodeInfo->info.dstCount; + regMaskTP sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) { sourceMask |= genRegMask((regNumber)((unsigned)argReg + sourceRegCount + regIndex)); } - node->gtLsraInfo.setSrcCandidates(this, sourceMask); + nodeInfo->info.setSrcCandidates(this, sourceMask); } sourceRegCount += currentRegCount; + useList.Append(nodeInfo); } - argNode->gtLsraInfo.srcCount = sourceRegCount; + info->srcCount += sourceRegCount; assert(putArgChild->isContained()); } else @@ -768,9 +782,9 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode) assert(putArgChild->OperGet() == GT_OBJ); // We can use a ldr/str sequence so we need an internal register - argNode->gtLsraInfo.internalIntCount = 1; - regMaskTP internalMask = RBM_ALLINT & ~argMask; - argNode->gtLsraInfo.setInternalCandidates(this, internalMask); + info->internalIntCount = 1; + regMaskTP internalMask = RBM_ALLINT & ~argMask; + info->setInternalCandidates(this, internalMask); GenTreePtr objChild = putArgChild->gtOp.gtOp1; if (objChild->OperGet() == GT_LCL_VAR_ADDR) @@ -782,7 +796,7 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode) } else { - argNode->gtLsraInfo.srcCount = GetIndirSourceCount(putArgChild->AsIndir()); + info->srcCount = GetIndirInfo(putArgChild->AsIndir()); } assert(putArgChild->isContained()); } @@ -798,19 +812,34 @@ void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) +void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info) { GenTree* dstAddr = blkNode->Addr(); unsigned size = blkNode->gtBlkSize; GenTree* source = blkNode->Data(); + LocationInfoListNode* dstAddrInfo = nullptr; + LocationInfoListNode* sourceInfo = nullptr; + LocationInfoListNode* sizeInfo = nullptr; + // Sources are dest address and initVal or source. // We may require an additional source or temp register for the size. - blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr); - assert(blkNode->gtLsraInfo.dstCount == 0); + if (!dstAddr->isContained()) + { + info->srcCount++; + dstAddrInfo = getLocationInfo(dstAddr); + } + assert(info->dstCount == 0); GenTreePtr srcAddrOrFill = nullptr; bool isInitBlk = blkNode->OperIsInitBlkOp(); + regMaskTP dstAddrRegMask = RBM_NONE; + regMaskTP sourceRegMask = RBM_NONE; + regMaskTP blkSizeRegMask = RBM_NONE; + + short internalIntCount = 0; + regMaskTP internalIntCandidates = RBM_NONE; + if (isInitBlk) { GenTreePtr initVal = source; @@ -820,6 +849,11 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) initVal = initVal->gtGetOp1(); } srcAddrOrFill = initVal; + if (!initVal->isContained()) + { + info->srcCount++; + sourceInfo = getLocationInfo(initVal); + } if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) { @@ -828,33 +862,15 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // code sequences to improve CQ. // For reference see the code in lsraxarch.cpp. NYI_ARM("initblk loop unrolling is currently not implemented."); - if (!initVal->isContained()) - { - blkNode->gtLsraInfo.srcCount++; - } } else { assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); - // The helper follows the regular ABI. - dstAddr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0); assert(!initVal->isContained()); - blkNode->gtLsraInfo.srcCount++; - initVal->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1); - if (size != 0) - { - // Reserve a temp register for the block size argument. - blkNode->gtLsraInfo.setInternalCandidates(this, RBM_ARG_2); - blkNode->gtLsraInfo.internalIntCount = 1; - } - else - { - // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.setSrcCount(3); - GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; - sizeNode->gtLsraInfo.setSrcCandidates(this, RBM_ARG_2); - } + // The helper follows the regular ABI. + dstAddrRegMask = RBM_ARG_0; + sourceRegMask = RBM_ARG_1; + blkSizeRegMask = RBM_ARG_2; } } else @@ -863,43 +879,42 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // Sources are src and dest and size if not constant. if (source->gtOper == GT_IND) { - srcAddrOrFill = blkNode->Data()->gtGetOp1(); + assert(source->isContained()); + srcAddrOrFill = source->gtGetOp1(); + sourceInfo = getLocationInfo(srcAddrOrFill); + info->srcCount++; } if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. - blkNode->gtLsraInfo.internalIntCount = 1; + internalIntCount = 1; if (size >= 2 * REGSIZE_BYTES) { // We will use ldp/stp to reduce code size and improve performance // so we need to reserve an extra internal register - blkNode->gtLsraInfo.internalIntCount++; + internalIntCount++; } // We can't use the special Write Barrier registers, so exclude them from the mask - regMaskTP internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); - blkNode->gtLsraInfo.setInternalCandidates(this, internalIntCandidates); + internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. - dstAddr->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF); + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, // which is killed by a StoreObj (and thus needn't be reserved). if (srcAddrOrFill != nullptr) { - srcAddrOrFill->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC_BYREF); + sourceRegMask = RBM_WRITE_BARRIER_SRC_BYREF; } } else { // CopyBlk - short internalIntCount = 0; - regMaskTP internalIntCandidates = RBM_NONE; - if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) { // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size @@ -921,67 +936,73 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) else { assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); - dstAddr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0); + dstAddrRegMask = RBM_ARG_0; // The srcAddr goes in arg1. if (srcAddrOrFill != nullptr) { - srcAddrOrFill->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1); - } - if (size != 0) - { - // Reserve a temp register for the block size argument. - internalIntCandidates |= RBM_ARG_2; - internalIntCount++; - } - else - { - // The block size argument is a third argument to GT_STORE_DYN_BLK - assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.srcCount++; - GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; - blockSize->gtLsraInfo.setSrcCandidates(this, RBM_ARG_2); + sourceRegMask = RBM_ARG_1; } - } - if (internalIntCount != 0) - { - blkNode->gtLsraInfo.internalIntCount = internalIntCount; - blkNode->gtLsraInfo.setInternalCandidates(this, internalIntCandidates); + blkSizeRegMask = RBM_ARG_2; } } - blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source); } -} - -//------------------------------------------------------------------------ -// GetOperandSourceCount: Get the source registers for an operand that might be contained. -// -// Arguments: -// node - The node of interest -// -// Return Value: -// The number of source registers used by the *parent* of this node. -// -int LinearScan::GetOperandSourceCount(GenTree* node) -{ - if (!node->isContained()) + if (dstAddrInfo != nullptr) { - return 1; + if (dstAddrRegMask != RBM_NONE) + { + dstAddrInfo->info.setSrcCandidates(this, dstAddrRegMask); + } + useList.Append(dstAddrInfo); } - -#if !defined(_TARGET_64BIT_) - if (node->OperIs(GT_LONG)) + if (sourceRegMask != RBM_NONE) { - return 2; + if (sourceInfo != nullptr) + { + sourceInfo->info.setSrcCandidates(this, sourceRegMask); + } + else + { + // This is a local source; we'll use a temp register for its address. + internalIntCandidates |= sourceRegMask; + internalIntCount++; + } + } + if (sourceInfo != nullptr) + { + useList.Add(sourceInfo, blkNode->IsReverseOp()); } -#endif // !defined(_TARGET_64BIT_) - if (node->OperIsIndir()) + if (blkNode->OperIs(GT_STORE_DYN_BLK)) { - const unsigned srcCount = GetIndirSourceCount(node->AsIndir()); - return srcCount; + // The block size argument is a third argument to GT_STORE_DYN_BLK + info->srcCount++; + + GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; + sizeInfo = getLocationInfo(blockSize); + useList.Add(sizeInfo, blkNode->AsDynBlk()->gtEvalSizeFirst); } - return 0; + if (blkSizeRegMask != RBM_NONE) + { + if (size != 0) + { + // Reserve a temp register for the block size argument. + internalIntCandidates |= blkSizeRegMask; + internalIntCount++; + } + else + { + // The block size argument is a third argument to GT_STORE_DYN_BLK + assert((blkNode->gtOper == GT_STORE_DYN_BLK) && (sizeInfo != nullptr)); + info->setSrcCount(3); + sizeInfo->info.setSrcCandidates(this, blkSizeRegMask); + } + } + if (internalIntCount != 0) + { + info->internalIntCount = internalIntCount; + info->setInternalCandidates(this, internalIntCandidates); + } } #endif // _TARGET_ARMARCH_ diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 6c5abcdca6700a385e833d44e17e74d97c5c86f3..850488c0c3d1ddfc7b5937eb52976de6d573f87e 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -39,17 +39,17 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // - Setting the appropriate candidates for a store of a multi-reg call return value. // - Requesting an internal register for SIMD12 stores. // -void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) +void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNodeInfo* info) { - TreeNodeInfo* info = &(storeLoc->gtLsraInfo); assert(info->dstCount == 0); GenTree* op1 = storeLoc->gtGetOp1(); #ifdef _TARGET_X86_ if (op1->OperGet() == GT_LONG) { - assert(op1->isContained()); - info->srcCount = 2; + assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained()); + info->srcCount = appendBinaryLocationInfoToList(op1->AsOp()); + assert(info->srcCount == 2); } else #endif // _TARGET_X86_ @@ -67,16 +67,19 @@ void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) // srcCount = number of registers in which the value is returned by call GenTreeCall* call = op1->AsCall(); ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + info->srcCount = regCount; // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 - regMaskTP srcCandidates = allMultiRegCallNodeRegs(call); - op1->gtLsraInfo.setSrcCandidates(this, srcCandidates); - return; + regMaskTP srcCandidates = allMultiRegCallNodeRegs(call); + LocationInfoListNode* locInfo = getLocationInfo(op1); + locInfo->info.setSrcCandidates(this, srcCandidates); + useList.Append(locInfo); } else { info->srcCount = 1; + appendLocationInfoToList(op1); } #ifdef FEATURE_SIMD @@ -108,10 +111,8 @@ void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) // requirements needed by LSRA to build the Interval Table (source, // destination and internal [temp] register counts). // -void LinearScan::TreeNodeInfoInit(GenTree* tree) +void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - if (tree->isContained()) { info->dstCount = 0; @@ -138,7 +139,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) switch (tree->OperGet()) { default: - TreeNodeInfoInitSimple(tree); + TreeNodeInfoInitSimple(tree, info); break; case GT_LCL_VAR: @@ -149,12 +150,12 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // is not, if they were marked regOptional they should now be marked contained instead. // TODO-XArch-CQ: When this is being called while RefPositions are being created, // use lvLRACandidate here instead. - if (info->regOptional) + if (tree->IsRegOptional()) { if (!compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvTracked || compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvDoNotEnregister) { - info->regOptional = false; + tree->ClearRegOptional(); tree->SetContained(); info->dstCount = 0; return; @@ -180,7 +181,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: - TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon()); + TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info); break; case GT_LIST: @@ -202,14 +203,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_LONG: assert(tree->IsUnusedValue()); // Contained nodes are already processed, only unused GT_LONG can reach here. - // An unused GT_LONG doesn't produce any registers. + // An unused GT_LONG node needs to consume its sources, but need not produce a register. tree->gtType = TYP_VOID; tree->ClearUnusedValue(); info->isLocalDefUse = false; - - // An unused GT_LONG node needs to consume its sources. - info->srcCount = 2; - info->dstCount = 0; + info->srcCount = 2; + info->dstCount = 0; + appendLocationInfoToList(tree->gtGetOp1()); + appendLocationInfoToList(tree->gtGetOp2()); break; #endif // !defined(_TARGET_64BIT_) @@ -224,7 +225,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_RETURN: - TreeNodeInfoInitReturn(tree); + TreeNodeInfoInitReturn(tree, info); break; case GT_RETFILT: @@ -240,7 +241,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) info->srcCount = 1; info->setSrcCandidates(this, RBM_INTRET); - tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, RBM_INTRET); + LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1); + locationInfo->info.setSrcCandidates(this, RBM_INTRET); + useList.Append(locationInfo); } break; @@ -249,6 +252,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // has a type but no child case GT_NOP: info->srcCount = 0; + assert((tree->gtOp.gtOp1 == nullptr) || tree->isContained()); if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr) { assert(info->dstCount == 1); @@ -263,9 +267,8 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) { info->srcCount = 0; assert(info->dstCount == 0); - GenTree* cmp = tree->gtGetOp1(); - assert(cmp->gtLsraInfo.dstCount == 0); + assert(!cmp->IsValue()); } break; @@ -300,9 +303,10 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_SWITCH_TABLE: - info->srcCount = 2; info->internalIntCount = 1; assert(info->dstCount == 0); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); + assert(info->srcCount == 2); break; case GT_ASG: @@ -322,8 +326,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // Rather they only support "op xmm, mem/xmm" form. if (varTypeIsFloating(tree->TypeGet())) { - info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); - info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); break; } @@ -333,13 +336,12 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_OR: case GT_XOR: case GT_BT: - info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); - info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); break; case GT_RETURNTRAP: - // This just turns into a compare of its child with an int + a conditional call - info->srcCount = tree->gtOp.gtOp1->isContained() ? 0 : 1; + // This just turns into a compare of its child with an int + a conditional call. + info->srcCount = GetOperandInfo(tree->gtOp.gtOp1); assert(info->dstCount == 0); info->internalIntCount = 1; info->setInternalCandidates(this, allRegs(TYP_INT)); @@ -349,7 +351,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_DIV: case GT_UMOD: case GT_UDIV: - TreeNodeInfoInitModDiv(tree->AsOp()); + TreeNodeInfoInitModDiv(tree->AsOp(), info); break; case GT_MUL: @@ -357,37 +359,41 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) case GT_MUL_LONG: #endif - TreeNodeInfoInitMul(tree->AsOp()); + TreeNodeInfoInitMul(tree->AsOp(), info); break; case GT_INTRINSIC: - TreeNodeInfoInitIntrinsic(tree->AsOp()); + TreeNodeInfoInitIntrinsic(tree->AsOp(), info); break; #ifdef FEATURE_SIMD case GT_SIMD: - TreeNodeInfoInitSIMD(tree->AsSIMD()); + TreeNodeInfoInitSIMD(tree->AsSIMD(), info); break; #endif // FEATURE_SIMD #if FEATURE_HW_INTRINSICS case GT_HWIntrinsic: - TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic()); + TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic(), info); break; #endif // FEATURE_HW_INTRINSICS case GT_CAST: - TreeNodeInfoInitCast(tree); + TreeNodeInfoInitCast(tree, info); break; case GT_BITCAST: - info->srcCount = 1; - info->dstCount = 1; - tree->AsUnOp()->gtOp1->gtLsraInfo.isTgtPref = true; - break; + { + LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1); + locationInfo->info.isTgtPref = true; + useList.Append(locationInfo); + info->srcCount = 1; + info->dstCount = 1; + } + break; case GT_NEG: - info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); + info->srcCount = GetOperandInfo(tree->gtOp.gtOp1); // TODO-XArch-CQ: // SSE instruction set doesn't have an instruction to negate a number. @@ -413,7 +419,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_NOT: - info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); + info->srcCount = GetOperandInfo(tree->gtOp.gtOp1); break; case GT_LSH: @@ -425,7 +431,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_LSH_HI: case GT_RSH_LO: #endif - TreeNodeInfoInitShiftRotate(tree); + (void)TreeNodeInfoInitShiftRotate(tree, info); break; case GT_EQ: @@ -437,41 +443,47 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_TEST_EQ: case GT_TEST_NE: case GT_CMP: - TreeNodeInfoInitCmp(tree); + TreeNodeInfoInitCmp(tree, info); break; case GT_CKFINITE: + appendLocationInfoToList(tree->gtOp.gtOp1); info->srcCount = 1; assert(info->dstCount == 1); info->internalIntCount = 1; break; case GT_CMPXCHG: + { info->srcCount = 3; assert(info->dstCount == 1); // comparand is preferenced to RAX. // Remaining two operands can be in any reg other than RAX. - tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(this, RBM_RAX); - tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX); - tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX); + LocationInfoListNode* locationInfo = getLocationInfo(tree->gtCmpXchg.gtOpLocation); + locationInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX); + useList.Append(locationInfo); + LocationInfoListNode* valueInfo = getLocationInfo(tree->gtCmpXchg.gtOpValue); + valueInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX); + useList.Append(valueInfo); info->setDstCandidates(this, RBM_RAX); - break; + LocationInfoListNode* comparandInfo = getLocationInfo(tree->gtCmpXchg.gtOpComparand); + comparandInfo->info.setSrcCandidates(this, RBM_RAX); + useList.Append(comparandInfo); + } + break; case GT_LOCKADD: - { - GenTreePtr op2 = tree->gtOp.gtOp2; - info->srcCount = op2->isContained() ? 1 : 2; + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); - } - break; + break; case GT_PUTARG_REG: - TreeNodeInfoInitPutArgReg(tree->AsUnOp()); + TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info); break; case GT_CALL: - TreeNodeInfoInitCall(tree->AsCall()); + TreeNodeInfoInitCall(tree->AsCall(), info); break; case GT_ADDR: @@ -497,14 +509,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) #ifdef FEATURE_PUT_STRUCT_ARG_STK case GT_PUTARG_STK: - TreeNodeInfoInitPutArgStk(tree->AsPutArgStk()); + TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info); break; #endif // FEATURE_PUT_STRUCT_ARG_STK case GT_STORE_BLK: case GT_STORE_OBJ: case GT_STORE_DYN_BLK: - TreeNodeInfoInitBlockStore(tree->AsBlk()); + TreeNodeInfoInitBlockStore(tree->AsBlk(), info); break; case GT_INIT_VAL: @@ -513,7 +525,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_LCLHEAP: - TreeNodeInfoInitLclHeap(tree); + TreeNodeInfoInitLclHeap(tree, info); break; case GT_ARR_BOUNDS_CHECK: @@ -521,9 +533,10 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_SIMD_CHK: #endif // FEATURE_SIMD // Consumes arrLen & index - has no result - info->srcCount = GetOperandSourceCount(tree->AsBoundsChk()->gtIndex); - info->srcCount += GetOperandSourceCount(tree->AsBoundsChk()->gtArrLen); + info->srcCount = 2; assert(info->dstCount == 0); + info->srcCount = GetOperandInfo(tree->AsBoundsChk()->gtIndex); + info->srcCount += GetOperandInfo(tree->AsBoundsChk()->gtArrLen); break; case GT_ARR_ELEM: @@ -533,13 +546,20 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_ARR_INDEX: + { info->srcCount = 2; assert(info->dstCount == 1); + assert(!tree->AsArrIndex()->ArrObj()->isContained()); + assert(!tree->AsArrIndex()->IndexExpr()->isContained()); // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. - tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true; - info->hasDelayFreeSrc = true; - break; + LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj()); + arrObjInfo->info.isDelayFree = true; + useList.Append(arrObjInfo); + useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr())); + info->hasDelayFreeSrc = true; + } + break; case GT_ARR_OFFSET: // This consumes the offset, if any, the arrObj and the effective index, @@ -551,12 +571,14 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) } else { - info->srcCount++; // Here we simply need an internal register, which must be different // from any of the operand's registers, but may be the same as targetReg. info->srcCount = 3; info->internalIntCount = 1; + appendLocationInfoToList(tree->AsArrOffs()->gtOffset); } + appendLocationInfoToList(tree->AsArrOffs()->gtIndex); + appendLocationInfoToList(tree->AsArrOffs()->gtArrObj); break; case GT_LEA: @@ -567,29 +589,32 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) if (tree->AsAddrMode()->HasBase()) { info->srcCount++; + appendLocationInfoToList(tree->AsAddrMode()->Base()); } if (tree->AsAddrMode()->HasIndex()) { info->srcCount++; + appendLocationInfoToList(tree->AsAddrMode()->Index()); } break; case GT_STOREIND: if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) { - TreeNodeInfoInitGCWriteBarrier(tree); + TreeNodeInfoInitGCWriteBarrier(tree, info); break; } - TreeNodeInfoInitIndir(tree->AsIndir()); + TreeNodeInfoInitIndir(tree->AsIndir(), info); break; case GT_NULLCHECK: assert(info->dstCount == 0); + appendLocationInfoToList(tree->gtOp.gtOp1); info->srcCount = 1; break; case GT_IND: - TreeNodeInfoInitIndir(tree->AsIndir()); + TreeNodeInfoInitIndir(tree->AsIndir(), info); assert(info->dstCount == 1); break; @@ -613,8 +638,8 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_INDEX_ADDR: - info->srcCount = 2; - info->dstCount = 1; + assert(info->dstCount == 1); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); if (tree->AsIndexAddr()->Index()->TypeGet() == TYP_I_IMPL) { @@ -658,9 +683,9 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) // If we have a read-modify-write operation, we want to preference op1 to the target, // if it is not contained. - if (!op1->isContained()) + if (!op1->isContained() && !op1->OperIs(GT_LIST)) { - op1->gtLsraInfo.isTgtPref = true; + useList.GetTreeNodeInfo(op1).isTgtPref = true; } // Is this a non-commutative operator, or is op2 a contained memory op? @@ -719,12 +744,13 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) } } - TreeNodeInfoInitCheckByteable(tree); + TreeNodeInfoInitCheckByteable(tree, info); // We need to be sure that we've set info->srcCount and info->dstCount appropriately assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT)); assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); assert(!tree->IsUnusedValue() || (info->dstCount != 0)); + assert(info->dstCount == tree->GetRegisterDstCount()); } //--------------------------------------------------------------------- @@ -752,22 +778,23 @@ bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc) { GenTree* base = delayUseSrc->AsIndir()->Base(); GenTree* index = delayUseSrc->AsIndir()->Index(); - if (base != nullptr) + if ((base != nullptr) && !base->isContained()) { - base->gtLsraInfo.isDelayFree = true; - returnValue = true; + useList.GetTreeNodeInfo(base).isDelayFree = true; + returnValue = true; } if (index != nullptr) { - index->gtLsraInfo.isDelayFree = true; - returnValue = true; + assert(!index->isContained()); + useList.GetTreeNodeInfo(index).isDelayFree = true; + returnValue = true; } } } else { - delayUseSrc->gtLsraInfo.isDelayFree = true; - returnValue = true; + useList.GetTreeNodeInfo(delayUseSrc).isDelayFree = true; + returnValue = true; } return returnValue; } @@ -782,11 +809,9 @@ bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree) +void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree, TreeNodeInfo* info) { #ifdef _TARGET_X86_ - TreeNodeInfo* info = &(tree->gtLsraInfo); - // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands // if the tree node is a byte type. // @@ -812,9 +837,10 @@ void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree) // No need to set src candidates on a contained child operand. if (!op->isContained()) { - regMask = op->gtLsraInfo.getSrcCandidates(this); + TreeNodeInfo& op1Info = useList.GetTreeNodeInfo(op); + regMask = op1Info.getSrcCandidates(this); assert(regMask != RBM_NONE); - op->gtLsraInfo.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS); + op1Info.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS); } } @@ -823,9 +849,10 @@ void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree) op = tree->gtOp.gtOp2; if (!op->isContained()) { - regMask = op->gtLsraInfo.getSrcCandidates(this); + TreeNodeInfo& op2Info = useList.GetTreeNodeInfo(op); + regMask = op2Info.getSrcCandidates(this); assert(regMask != RBM_NONE); - op->gtLsraInfo.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS); + op2Info.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS); } } } @@ -885,9 +912,8 @@ bool LinearScan::isRMWRegOper(GenTreePtr tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitSimple(GenTree* tree) +void LinearScan::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); if (tree->isContained()) { info->srcCount = 0; @@ -900,11 +926,7 @@ void LinearScan::TreeNodeInfoInitSimple(GenTree* tree) } else if (kind & (GTK_SMPOP)) { - if (tree->gtGetOp2IfPresent() != nullptr) - { - info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2); - } - info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); } else { @@ -921,9 +943,8 @@ void LinearScan::TreeNodeInfoInitSimple(GenTree* tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitReturn(GenTree* tree) +void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); assert(info->dstCount == 0); GenTree* op1 = tree->gtGetOp1(); @@ -931,11 +952,15 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree) if (tree->TypeGet() == TYP_LONG) { assert((op1->OperGet() == GT_LONG) && op1->isContained()); - GenTree* loVal = op1->gtGetOp1(); - GenTree* hiVal = op1->gtGetOp2(); - info->srcCount = 2; - loVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_LO); - hiVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_HI); + GenTree* loVal = op1->gtGetOp1(); + GenTree* hiVal = op1->gtGetOp2(); + info->srcCount = 2; + LocationInfoListNode* loValInfo = getLocationInfo(loVal); + LocationInfoListNode* hiValInfo = getLocationInfo(hiVal); + loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO); + hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI); + useList.Append(loValInfo); + useList.Append(hiValInfo); } else #endif // !defined(_TARGET_64BIT_) @@ -982,10 +1007,12 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree) } } + LocationInfoListNode* locationInfo = getLocationInfo(op1); if (useCandidates != RBM_NONE) { - op1->gtLsraInfo.setSrcCandidates(this, useCandidates); + locationInfo->info.setSrcCandidates(this, useCandidates); } + useList.Append(locationInfo); } } @@ -998,88 +1025,84 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree) +int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - // For shift operations, we need that the number // of bits moved gets stored in CL in case // the number of bits to shift is not a constant. - GenTreePtr shiftBy = tree->gtOp.gtOp2; - GenTreePtr source = tree->gtOp.gtOp1; - + int srcCount = 0; + GenTreePtr shiftBy = tree->gtOp.gtOp2; + GenTreePtr source = tree->gtOp.gtOp1; + LocationInfoListNode* shiftByInfo = nullptr; // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off) // We will allow whatever can be encoded - hope you know what you are doing. - if (!shiftBy->isContained()) + if (shiftBy->isContained()) { - shiftBy->gtLsraInfo.setSrcCandidates(this, RBM_RCX); - if (source->isContained()) - { - if (source->OperIs(GT_IND)) - { - if (source->AsIndir()->Base() != nullptr) - { - source->AsIndir()->Base()->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX); - } - if (source->AsIndir()->Index() != nullptr) - { - source->AsIndir()->Index()->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX); - } - } - } - else + srcCount += GetOperandInfo(source); + } + else + { + srcCount++; + shiftByInfo = getLocationInfo(shiftBy); + shiftByInfo->info.setSrcCandidates(this, RBM_RCX); + info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX); + LocationInfoListNode* sourceInfo; + srcCount += GetOperandInfo(source, &sourceInfo); + for (; sourceInfo != nullptr; sourceInfo = sourceInfo->Next()) { - source->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX); + sourceInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX); } - info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX); } - // Note that Rotate Left/Right instructions don't set ZF and SF flags. - // - // If the operand being shifted is 32-bits then upper three bits are masked - // by hardware to get actual shift count. Similarly for 64-bit operands - // shift count is narrowed to [0..63]. If the resulting shift count is zero, - // then shift operation won't modify flags. - // - // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0) - // if the shift count is known to be non-zero and in the range depending on the - // operand size. +// Note that Rotate Left/Right instructions don't set ZF and SF flags. +// +// If the operand being shifted is 32-bits then upper three bits are masked +// by hardware to get actual shift count. Similarly for 64-bit operands +// shift count is narrowed to [0..63]. If the resulting shift count is zero, +// then shift operation won't modify flags. +// +// TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0) +// if the shift count is known to be non-zero and in the range depending on the +// operand size. - if (!tree->isContained()) - { #ifdef _TARGET_X86_ - // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that - // we can have a three operand form. Increment the srcCount. - if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) - { - assert((source->OperGet() == GT_LONG) && source->isContained()); + // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that + // we can have a three operand form. Increment the srcCount. + if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) + { + assert((source->OperGet() == GT_LONG) && source->isContained()); - if (tree->OperGet() == GT_LSH_HI) - { - GenTreePtr sourceLo = source->gtOp.gtOp1; - sourceLo->gtLsraInfo.isDelayFree = true; - } - else - { - GenTreePtr sourceHi = source->gtOp.gtOp2; - sourceHi->gtLsraInfo.isDelayFree = true; - } + GenTreePtr sourceLo = source->gtOp.gtOp1; + LocationInfoListNode* sourceLoInfo = useList.Begin(); + LocationInfoListNode* sourceHiInfo = useList.GetSecond(INDEBUG(source->gtGetOp2())); - source->gtLsraInfo.hasDelayFreeSrc = true; - info->hasDelayFreeSrc = true; - info->srcCount += 2; + info->hasDelayFreeSrc = true; + if (tree->OperGet() == GT_LSH_HI) + { + sourceLoInfo->info.isDelayFree = true; } else + { + sourceHiInfo->info.isDelayFree = true; + } + } #endif - if (!source->isContained()) + if (shiftByInfo != nullptr) + { + if (tree->IsReverseOp()) { - info->srcCount++; + useList.Prepend(shiftByInfo); } - if (!shiftBy->isContained()) + else { - info->srcCount++; + useList.Append(shiftByInfo); } } + if (!tree->isContained()) + { + info->srcCount = srcCount; + } + return srcCount; } //------------------------------------------------------------------------ @@ -1095,22 +1118,25 @@ void LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node) +void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info) { assert(node != nullptr); assert(node->OperIsPutArgReg()); - node->gtLsraInfo.srcCount = 1; - regNumber argReg = node->gtRegNum; + info->srcCount = 1; + regNumber argReg = node->gtRegNum; assert(argReg != REG_NA); // Set the register requirements for the node. const regMaskTP argMask = genRegMask(argReg); - node->gtLsraInfo.setDstCandidates(this, argMask); - node->gtLsraInfo.setSrcCandidates(this, argMask); + info->setDstCandidates(this, argMask); + info->setSrcCandidates(this, argMask); // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. - node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, getUseCandidates(node)); + LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1); + op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this)); + op1Info->info.isDelayFree = true; + useList.Append(op1Info); } //------------------------------------------------------------------------ @@ -1129,7 +1155,7 @@ void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node) // Since the integer register is not associated with the arg node, we will reserve it as // an internal register on the call so that it is not used during the evaluation of the call node // (e.g. for the target). -void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs) +void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs) { #if FEATURE_VARARG if (call->IsVarargs() && varTypeIsFloating(argNode)) @@ -1138,8 +1164,8 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* c regNumber argReg = argNode->gtRegNum; regNumber targetReg = compiler->getCallArgIntRegister(argReg); - call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1); - call->gtLsraInfo.addInternalCandidates(this, genRegMask(targetReg)); + info->setInternalIntCount(info->internalIntCount + 1); + info->addInternalCandidates(this, genRegMask(targetReg)); } #endif // FEATURE_VARARG } @@ -1153,9 +1179,8 @@ void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* c // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) +void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info) { - TreeNodeInfo* info = &(call->gtLsraInfo); bool hasMultiRegRetVal = false; ReturnTypeDesc* retTypeDesc = nullptr; @@ -1180,45 +1205,13 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) assert(info->dstCount == 0); } - GenTree* ctrlExpr = call->gtControlExpr; + GenTree* ctrlExpr = call->gtControlExpr; + LocationInfoListNode* ctrlExprInfo = nullptr; if (call->gtCallType == CT_INDIRECT) { ctrlExpr = call->gtCallAddr; } - // set reg requirements on call target represented as control sequence. - if (ctrlExpr != nullptr) - { - // In case of fast tail implemented as jmp, make sure that gtControlExpr is - // computed into a register. - if (call->IsFastTailCall()) - { - { - // Fast tail call - make sure that call target is always computed in RAX - // so that epilog sequence can generate "jmp rax" to achieve fast tail call. - ctrlExpr->gtLsraInfo.setSrcCandidates(this, RBM_RAX); - } - } -#ifdef _TARGET_X86_ - else - { - // On x86, we need to generate a very specific pattern for indirect VSD calls: - // - // 3-byte nop - // call dword ptr [eax] - // - // Where EAX is also used as an argument to the stub dispatch helper. Make - // sure that the call target address is computed into EAX in this case. - if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) - { - assert(ctrlExpr->isIndir() && ctrlExpr->isContained()); - ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(this, RBM_VIRTUAL_STUB_TARGET); - } - } -#endif // _TARGET_X86_ - info->srcCount += GetOperandSourceCount(ctrlExpr); - } - // If this is a varargs call, we will clear the internal candidates in case we need // to reserve some integer registers for copying float args. // We have to do this because otherwise the default candidates are allRegs, and adding @@ -1293,7 +1286,8 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) if (argNode->OperIsPutArgReg()) { info->srcCount++; - HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); + HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs); + appendLocationInfoToList(argNode); } #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING else if (argNode->OperGet() == GT_FIELD_LIST) @@ -1302,7 +1296,8 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) { assert(entry->Current()->OperIsPutArgReg()); info->srcCount++; - HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); + HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs); + appendLocationInfoToList(entry->Current()); } } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING @@ -1327,7 +1322,7 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) if (argNode->TypeGet() == TYP_STRUCT) { assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ); - assert(argNode->gtLsraInfo.srcCount == 0); + assert(argNode->gtOp.gtOp1->isContained()); } #endif // FEATURE_PUT_STRUCT_ARG_STK continue; @@ -1367,29 +1362,67 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) while (args) { GenTreePtr arg = args->gtOp.gtOp1; - if (!(args->gtFlags & GTF_LATE_ARG)) + if (!(arg->gtFlags & GTF_LATE_ARG) && !arg) { - TreeNodeInfo* argInfo = &(arg->gtLsraInfo); - if ((argInfo->dstCount != 0) && !arg->IsArgPlaceHolderNode() && !arg->isContained()) + if (arg->IsValue() && !arg->isContained()) { - argInfo->isLocalDefUse = true; + // argInfo->isLocalDefUse = true; + assert(arg->IsUnusedValue()); } - assert(argInfo->dstCount == 0); + // assert(argInfo->dstCount == 0); } args = args->gtOp.gtOp2; } -#if FEATURE_VARARG - // If it is a fast tail call, it is already preferenced to use RAX. - // Therefore, no need set src candidates on call tgt again. - if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) + // set reg requirements on call target represented as control sequence. + if (ctrlExpr != nullptr) { - // Don't assign the call target to any of the argument registers because - // we will use them to also pass floating point arguments as required - // by Amd64 ABI. - ctrlExpr->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS)); - } + LocationInfoListNode* ctrlExprInfo = nullptr; + int ctrlExprCount = GetOperandInfo(ctrlExpr); + if (ctrlExprCount != 0) + { + assert(ctrlExprCount == 1); + ctrlExprInfo = useList.Last(); + info->srcCount++; + } + + // In case of fast tail implemented as jmp, make sure that gtControlExpr is + // computed into a register. + if (call->IsFastTailCall()) + { + assert(!ctrlExpr->isContained() && ctrlExprInfo != nullptr); + // Fast tail call - make sure that call target is always computed in RAX + // so that epilog sequence can generate "jmp rax" to achieve fast tail call. + ctrlExprInfo->info.setSrcCandidates(this, RBM_RAX); + } +#ifdef _TARGET_X86_ + else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) + { + // On x86, we need to generate a very specific pattern for indirect VSD calls: + // + // 3-byte nop + // call dword ptr [eax] + // + // Where EAX is also used as an argument to the stub dispatch helper. Make + // sure that the call target address is computed into EAX in this case. + assert(ctrlExprInfo != nullptr); + assert(ctrlExpr->isIndir() && ctrlExpr->isContained()); + ctrlExprInfo->info.setSrcCandidates(this, RBM_VIRTUAL_STUB_TARGET); + } +#endif // _TARGET_X86_ + +#if FEATURE_VARARG + // If it is a fast tail call, it is already preferenced to use RAX. + // Therefore, no need set src candidates on call tgt again. + if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExprInfo != nullptr)) + { + // Don't assign the call target to any of the argument registers because + // we will use them to also pass floating point arguments as required + // by Amd64 ABI. + ctrlExprInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS)); + } #endif // !FEATURE_VARARG + } } //------------------------------------------------------------------------ @@ -1401,17 +1434,25 @@ void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) +void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info) { GenTree* dstAddr = blkNode->Addr(); unsigned size = blkNode->gtBlkSize; GenTree* source = blkNode->Data(); + LocationInfoListNode* dstAddrInfo = nullptr; + LocationInfoListNode* sourceInfo = nullptr; + LocationInfoListNode* sizeInfo = nullptr; + // Sources are dest address, initVal or source. // We may require an additional source or temp register for the size. - blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr); - assert(blkNode->gtLsraInfo.dstCount == 0); - blkNode->gtLsraInfo.setInternalCandidates(this, RBM_NONE); + if (!dstAddr->isContained()) + { + info->srcCount++; + dstAddrInfo = getLocationInfo(dstAddr); + } + assert(info->dstCount == 0); + info->setInternalCandidates(this, RBM_NONE); GenTreePtr srcAddrOrFill = nullptr; bool isInitBlk = blkNode->OperIsInitBlkOp(); @@ -1430,7 +1471,8 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) srcAddrOrFill = initVal; if (!initVal->isContained()) { - blkNode->gtLsraInfo.srcCount++; + info->srcCount++; + sourceInfo = getLocationInfo(initVal); } switch (blkNode->gtBlkOpKind) @@ -1440,8 +1482,8 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) if (size >= XMM_REGSIZE_BYTES) { // Reserve an XMM register to fill it with a pack of 16 init value constants. - blkNode->gtLsraInfo.internalFloatCount = 1; - blkNode->gtLsraInfo.setInternalCandidates(this, internalFloatRegCandidates()); + info->internalFloatCount = 1; + info->setInternalCandidates(this, internalFloatRegCandidates()); // use XMM register to fill with constants, it's AVX instruction and set the flag SetContainsAVXFlags(); } @@ -1463,7 +1505,6 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // b) The fill value has to be in RAX. // c) The buffer size will go in RCX. dstAddrRegMask = RBM_RDI; - srcAddrOrFill = initVal; sourceRegMask = RBM_RAX; blkSizeRegMask = RBM_RCX; break; @@ -1490,7 +1531,13 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // CopyObj or CopyBlk if (source->gtOper == GT_IND) { + assert(source->isContained()); srcAddrOrFill = source->gtGetOp1(); + if (!srcAddrOrFill->isContained()) + { + sourceInfo = getLocationInfo(srcAddrOrFill); + info->srcCount++; + } } if (blkNode->OperGet() == GT_STORE_OBJ) { @@ -1516,7 +1563,7 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // RBM_NON_BYTE_REGS from internal candidates. if ((size & (XMM_REGSIZE_BYTES - 1)) != 0) { - blkNode->gtLsraInfo.internalIntCount++; + info->internalIntCount++; regMaskTP regMask = allRegs(TYP_INT); #ifdef _TARGET_X86_ @@ -1525,7 +1572,7 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) regMask &= ~RBM_NON_BYTE_REGS; } #endif - blkNode->gtLsraInfo.setInternalCandidates(this, regMask); + info->setInternalCandidates(this, regMask); } if (size >= XMM_REGSIZE_BYTES) @@ -1533,8 +1580,8 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // If we have a buffer larger than XMM_REGSIZE_BYTES, // reserve an XMM register to use it for a // series of 16-byte loads and stores. - blkNode->gtLsraInfo.internalFloatCount = 1; - blkNode->gtLsraInfo.addInternalCandidates(this, internalFloatRegCandidates()); + info->internalFloatCount = 1; + info->addInternalCandidates(this, internalFloatRegCandidates()); // Uses XMM reg for load and store and hence check to see whether AVX instructions // are used for codegen, set ContainsAVX flag SetContainsAVXFlags(); @@ -1568,41 +1615,58 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) unreached(); } } - blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source); } - if (dstAddrRegMask != RBM_NONE) + if (dstAddrInfo != nullptr) { - dstAddr->gtLsraInfo.setSrcCandidates(this, dstAddrRegMask); + if (dstAddrRegMask != RBM_NONE) + { + dstAddrInfo->info.setSrcCandidates(this, dstAddrRegMask); + } + useList.Append(dstAddrInfo); } if (sourceRegMask != RBM_NONE) { - if (srcAddrOrFill != nullptr) + if (sourceInfo != nullptr) { - srcAddrOrFill->gtLsraInfo.setSrcCandidates(this, sourceRegMask); + sourceInfo->info.setSrcCandidates(this, sourceRegMask); } else { // This is a local source; we'll use a temp register for its address. - blkNode->gtLsraInfo.addInternalCandidates(this, sourceRegMask); - blkNode->gtLsraInfo.internalIntCount++; + info->addInternalCandidates(this, sourceRegMask); + info->internalIntCount++; } } + if (sourceInfo != nullptr) + { + useList.Add(sourceInfo, blkNode->IsReverseOp()); + } + + if (blkNode->OperIs(GT_STORE_DYN_BLK)) + { + // The block size argument is a third argument to GT_STORE_DYN_BLK + info->srcCount++; + + GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; + sizeInfo = getLocationInfo(blockSize); + useList.Add(sizeInfo, blkNode->AsDynBlk()->gtEvalSizeFirst); + } + if (blkSizeRegMask != RBM_NONE) { if (size != 0) { // Reserve a temp register for the block size argument. - blkNode->gtLsraInfo.addInternalCandidates(this, blkSizeRegMask); - blkNode->gtLsraInfo.internalIntCount++; + info->addInternalCandidates(this, blkSizeRegMask); + info->internalIntCount++; } else { // The block size argument is a third argument to GT_STORE_DYN_BLK - assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.setSrcCount(3); - GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; - blockSize->gtLsraInfo.setSrcCandidates(this, blkSizeRegMask); + assert((blkNode->gtOper == GT_STORE_DYN_BLK) && (sizeInfo != nullptr)); + info->setSrcCount(3); + sizeInfo->info.setSrcCandidates(this, blkSizeRegMask); } } } @@ -1617,10 +1681,9 @@ void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) +void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNodeInfo* info) { - TreeNodeInfo* info = &(putArgStk->gtLsraInfo); - info->srcCount = 0; + info->srcCount = 0; assert(info->dstCount == 0); if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST) @@ -1672,6 +1735,7 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) fieldCount++; if (!fieldNode->isContained()) { + appendLocationInfoToList(fieldNode); info->srcCount++; } } @@ -1693,7 +1757,6 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) // For PutArgStk of a TYP_SIMD12, we need a SIMD temp register. if (needsSimdTemp) { - info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount; assert(info->dstCount == 0); info->internalFloatCount += 1; info->addInternalCandidates(this, allSIMDRegs()); @@ -1711,7 +1774,8 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) // For PutArgStk of a TYP_SIMD12, we need an extra register. if (putArgStk->isSIMD12()) { - info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount; + appendLocationInfoToList(putArgStk->gtOp1); + info->srcCount = 1; info->internalFloatCount = 1; info->setInternalCandidates(this, allSIMDRegs()); return; @@ -1720,14 +1784,14 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) if (type != TYP_STRUCT) { - TreeNodeInfoInitSimple(putArgStk); + TreeNodeInfoInitSimple(putArgStk, info); return; } GenTreePtr dst = putArgStk; GenTreePtr srcAddr = nullptr; - info->srcCount = GetOperandSourceCount(src); + info->srcCount = GetOperandInfo(src); // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of @@ -1792,10 +1856,8 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree) +void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - info->srcCount = 1; assert(info->dstCount == 1); @@ -1867,6 +1929,7 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree) } else { + appendLocationInfoToList(size); if (!compiler->info.compInitMem) { info->internalIntCount = 2; @@ -1887,19 +1950,16 @@ void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree) +void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); - info->srcCount = GetOperandSourceCount(op1); - info->srcCount += GetOperandSourceCount(op2); assert(info->dstCount == 1); if (varTypeIsFloating(tree->TypeGet())) { + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); return; } @@ -1936,33 +1996,29 @@ void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree) info->internalIntCount = 1; info->setInternalCandidates(this, allRegs(TYP_INT)); - loVal->gtLsraInfo.setSrcCandidates(this, RBM_EAX); - hiVal->gtLsraInfo.setSrcCandidates(this, RBM_EDX); + LocationInfoListNode* loValInfo = getLocationInfo(loVal); + LocationInfoListNode* hiValInfo = getLocationInfo(hiVal); + loValInfo->info.setSrcCandidates(this, RBM_EAX); + hiValInfo->info.setSrcCandidates(this, RBM_EDX); + useList.Append(loValInfo); + useList.Append(hiValInfo); + info->srcCount = 2; } else #endif { // If possible would like to have op1 in RAX to avoid a register move - op1->gtLsraInfo.setSrcCandidates(this, RBM_RAX); + LocationInfoListNode* op1Info = getLocationInfo(op1); + op1Info->info.setSrcCandidates(this, RBM_RAX); + useList.Append(op1Info); + info->srcCount = 1; } - if (op2->isContained()) - { - if (op2->gtOper == GT_IND) - { - if (op2->AsIndir()->Base() != nullptr) - { - op2->AsIndir()->Base()->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); - } - if (op2->AsIndir()->Index() != nullptr) - { - op2->AsIndir()->Index()->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); - } - } - } - else + LocationInfoListNode* op2Info; + info->srcCount += GetOperandInfo(op2, &op2Info); + for (; op2Info != nullptr; op2Info = op2Info->Next()) { - op2->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); + op2Info->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); } } @@ -1975,16 +2031,14 @@ void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree) +void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - // Both operand and its result must be of floating point type. GenTree* op1 = tree->gtGetOp1(); assert(varTypeIsFloating(op1)); assert(op1->TypeGet() == tree->TypeGet()); - info->srcCount = GetOperandSourceCount(op1); + info->srcCount = GetOperandInfo(op1); assert(info->dstCount == 1); switch (tree->gtIntrinsic.gtIntrinsicId) @@ -2038,10 +2092,8 @@ void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree) // Return Value: // None. -void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) +void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(simdTree->gtLsraInfo); - // Only SIMDIntrinsicInit can be contained. Other than that, // only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount. if (simdTree->isContained()) @@ -2054,27 +2106,22 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality)); } SetContainsAVXFlags(true, simdTree->gtSIMDSize); - switch (simdTree->gtSIMDIntrinsicID) + GenTree* op1 = simdTree->gtOp.gtOp1; + GenTree* op2 = simdTree->gtOp.gtOp2; + info->srcCount = 0; + if (!op1->OperIs(GT_LIST)) + { + info->srcCount += GetOperandInfo(op1); + } + if ((op2 != nullptr) && !op2->isContained()) { - GenTree* op1; - GenTree* op2; + info->srcCount += GetOperandInfo(op2); + } + switch (simdTree->gtSIMDIntrinsicID) + { case SIMDIntrinsicInit: { - op1 = simdTree->gtOp.gtOp1; - -#if !defined(_TARGET_64BIT_) - if (op1->OperGet() == GT_LONG) - { - info->srcCount = 2; - assert(op1->isContained()); - } - else -#endif // !defined(_TARGET_64BIT_) - { - info->srcCount = 1; - } - // This sets all fields of a SIMD struct to the given value. // Mark op1 as contained if it is either zero or int constant of all 1's, // or a float constant with 16 or 32 byte simdType (AVX case) @@ -2085,7 +2132,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) #if !defined(_TARGET_64BIT_) if (op1->OperGet() == GT_LONG) { - op1->SetContained(); + assert(op1->isContained()); GenTree* op1lo = op1->gtGetOp1(); GenTree* op1hi = op1->gtGetOp2(); @@ -2094,28 +2141,35 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) assert(op1hi->isContained()); assert((op1lo->IsIntegralConst(0) && op1hi->IsIntegralConst(0)) || (op1lo->IsIntegralConst(-1) && op1hi->IsIntegralConst(-1))); - info->srcCount = 0; + assert(info->srcCount == 0); } else { - // need a temp + assert(info->srcCount == 2); info->internalFloatCount = 1; info->setInternalCandidates(this, allSIMDRegs()); info->isInternalRegDelayFree = true; - info->srcCount = 2; } } - else #endif // !defined(_TARGET_64BIT_) - { - info->srcCount = op1->isContained() ? 0 : 1; - } } break; case SIMDIntrinsicInitN: { - info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType)); + var_types baseType = simdTree->gtSIMDBaseType; + info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType)); + int initCount = 0; + for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2()) + { + assert(list->OperGet() == GT_LIST); + GenTree* listItem = list->gtGetOp1(); + assert(listItem->TypeGet() == baseType); + assert(!listItem->isContained()); + appendLocationInfoToList(listItem); + initCount++; + } + assert(initCount == info->srcCount); // Need an internal register to stitch together all the values into a single vector in a SIMD reg. info->internalFloatCount = 1; @@ -2125,13 +2179,13 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicInitArray: // We have an array and an index, which may be contained. - info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2; + assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2)); break; case SIMDIntrinsicDiv: // SSE2 has no instruction support for division on integer vectors noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); - info->srcCount = 2; + assert(info->srcCount == 2); break; case SIMDIntrinsicAbs: @@ -2142,13 +2196,13 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT || simdTree->gtSIMDBaseType == TYP_BYTE); assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported); - info->srcCount = 1; + assert(info->srcCount == 1); break; case SIMDIntrinsicSqrt: // SSE2 has no instruction support for sqrt on integer vectors. noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); - info->srcCount = 1; + assert(info->srcCount == 1); break; case SIMDIntrinsicAdd: @@ -2160,7 +2214,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicMin: case SIMDIntrinsicMax: - info->srcCount = 2; + assert(info->srcCount == 2); // SSE2 32-bit integer multiplication requires two temp regs if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT && @@ -2172,14 +2226,14 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) break; case SIMDIntrinsicEqual: - info->srcCount = 2; + assert(info->srcCount == 2); break; // SSE2 doesn't support < and <= directly on int vectors. // Instead we need to use > and >= with swapped operands. case SIMDIntrinsicLessThan: case SIMDIntrinsicLessThanOrEqual: - info->srcCount = 2; + assert(info->srcCount == 2); noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType)); break; @@ -2188,7 +2242,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // Instead we need to use < and <= with swapped operands. case SIMDIntrinsicGreaterThan: noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType)); - info->srcCount = 2; + assert(info->srcCount == 2); break; case SIMDIntrinsicOpEquality: @@ -2198,14 +2252,14 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // If the second operand is contained then ContainCheckSIMD has determined // that PTEST can be used. We only need a single source register and no // internal registers. - info->srcCount = 1; + assert(info->srcCount == 1); } else { // Can't use PTEST so we need 2 source registers, 1 internal SIMD register // (to hold the result of PCMPEQD or other similar SIMD compare instruction) // and one internal INT register (to hold the result of PMOVMSKB). - info->srcCount = 2; + assert(info->srcCount == 2); info->internalFloatCount = 1; info->setInternalCandidates(this, allSIMDRegs()); info->internalIntCount = 1; @@ -2250,7 +2304,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) info->internalFloatCount = (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) ? 2 : 1; info->setInternalCandidates(this, allSIMDRegs()); } - info->srcCount = 2; + assert(info->srcCount == 2); break; case SIMDIntrinsicGetItem: @@ -2260,27 +2314,11 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // - index (which element to get) // The result is baseType of SIMD struct. // op1 may be a contained memory op, but if so we will consume its address. - info->srcCount = 0; - op1 = simdTree->gtOp.gtOp1; - op2 = simdTree->gtOp.gtOp2; - // op2 may be a contained constant. - if (!op2->isContained()) - { - info->srcCount++; - } + op1 = simdTree->gtOp.gtOp1; + op2 = simdTree->gtOp.gtOp2; - if (op1->isContained()) - { - // Although GT_IND of TYP_SIMD12 reserves an internal float - // register for reading 4 and 8 bytes from memory and - // assembling them into target XMM reg, it is not required - // in this case. - op1->gtLsraInfo.internalIntCount = 0; - op1->gtLsraInfo.internalFloatCount = 0; - info->srcCount += GetOperandSourceCount(op1); - } - else + if (!op1->isContained()) { // If the index is not a constant, we will use the SIMD temp location to store the vector. // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we @@ -2291,7 +2329,6 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // In all other cases with constant index, we need a temp xmm register to extract the // element if index is other than zero. - info->srcCount++; if (!op2->IsCnsIntOrI()) { (void)compiler->getSIMDInitTempVarNum(); @@ -2324,7 +2361,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicSetY: case SIMDIntrinsicSetZ: case SIMDIntrinsicSetW: - info->srcCount = 2; + assert(info->srcCount == 2); // We need an internal integer register for SSE2 codegen if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) @@ -2336,11 +2373,11 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) break; case SIMDIntrinsicCast: - info->srcCount = 1; + assert(info->srcCount == 1); break; case SIMDIntrinsicConvertToSingle: - info->srcCount = 1; + assert(info->srcCount == 1); if (simdTree->gtSIMDBaseType == TYP_UINT) { // We need an internal register different from targetReg. @@ -2353,12 +2390,12 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicConvertToUInt32: case SIMDIntrinsicConvertToInt32: - info->srcCount = 1; + assert(info->srcCount == 1); break; case SIMDIntrinsicWidenLo: case SIMDIntrinsicWidenHi: - info->srcCount = 1; + assert(info->srcCount == 1); if (varTypeIsIntegral(simdTree->gtSIMDBaseType)) { // We need an internal register different from targetReg. @@ -2370,9 +2407,9 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicConvertToInt64: case SIMDIntrinsicConvertToUInt64: + assert(info->srcCount == 1); // We need an internal register different from targetReg. info->isInternalRegDelayFree = true; - info->srcCount = 1; info->internalIntCount = 1; if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { @@ -2386,9 +2423,9 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) break; case SIMDIntrinsicConvertToDouble: + assert(info->srcCount == 1); // We need an internal register different from targetReg. info->isInternalRegDelayFree = true; - info->srcCount = 1; info->internalIntCount = 1; #ifdef _TARGET_X86_ if (simdTree->gtSIMDBaseType == TYP_LONG) @@ -2409,9 +2446,9 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) break; case SIMDIntrinsicNarrow: + assert(info->srcCount == 2); // We need an internal register different from targetReg. info->isInternalRegDelayFree = true; - info->srcCount = 2; if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) && (simdTree->gtSIMDBaseType != TYP_DOUBLE)) { info->internalFloatCount = 2; @@ -2424,9 +2461,9 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) break; case SIMDIntrinsicShuffleSSE2: + assert(info->srcCount == 1); // Second operand is an integer constant and marked as contained. assert(simdTree->gtOp.gtOp2->isContainedIntOrIImmed()); - info->srcCount = 1; break; case SIMDIntrinsicGetX: @@ -2457,7 +2494,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // Return Value: // None. -void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) +void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info) { NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; InstructionSet isa = compiler->isaOfHWIntrinsic(intrinsicID); @@ -2465,12 +2502,11 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { SetContainsAVXFlags(true, 32); } - TreeNodeInfo* info = &(intrinsicTree->gtLsraInfo); + info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp1); if (intrinsicTree->gtGetOp2IfPresent() != nullptr) { - info->srcCount += GetOperandSourceCount(intrinsicTree->gtOp.gtOp2); + info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp2); } - info->srcCount += GetOperandSourceCount(intrinsicTree->gtOp.gtOp1); #ifdef _TARGET_X86_ if (intrinsicTree->gtHWIntrinsicId == NI_SSE42_Crc32) @@ -2482,7 +2518,8 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) var_types srcType = intrinsicTree->gtSIMDBaseType; if (varTypeIsByte(srcType)) { - intrinsicTree->gtOp.gtOp2->gtLsraInfo.setSrcCandidates(this, RBM_BYTE_REGS); + LocationInfoListNode* op2Info = useList.GetSecond(INDEBUG(intrinsicTree->gtGetOp2())); + op2Info->info.setSrcCandidates(this, RBM_BYTE_REGS); } } #endif @@ -2498,10 +2535,8 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCast(GenTree* tree) +void LinearScan::TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info) { - TreeNodeInfo* info = &(tree->gtLsraInfo); - // TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register. // see CodeGen::genIntToIntCast() @@ -2513,7 +2548,7 @@ void LinearScan::TreeNodeInfoInitCast(GenTree* tree) GenTreePtr castOp = tree->gtCast.CastOp(); var_types castOpType = castOp->TypeGet(); - info->srcCount = GetOperandSourceCount(castOp); + info->srcCount = GetOperandInfo(castOp); assert(info->dstCount == 1); if (tree->gtFlags & GTF_UNSIGNED) { @@ -2542,19 +2577,23 @@ void LinearScan::TreeNodeInfoInitCast(GenTree* tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) +void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info) { assert(tree->OperGet() == GT_STOREIND); - GenTreeStoreInd* dst = tree->AsStoreInd(); - GenTreePtr addr = dst->Addr(); - GenTreePtr src = dst->Data(); + GenTreeStoreInd* dst = tree->AsStoreInd(); + GenTreePtr addr = dst->Addr(); + GenTreePtr src = dst->Data(); + LocationInfoListNode* addrInfo = getLocationInfo(addr); + LocationInfoListNode* srcInfo = getLocationInfo(src); // In the case where we are doing a helper assignment, we need to actually instantiate the // address in a register. - assert(!addr->isContained()); - tree->gtLsraInfo.srcCount = 1 + GetIndirSourceCount(dst); - assert(tree->gtLsraInfo.dstCount == 0); + assert(!addr->isContained() && !src->isContained()); + useList.Append(addrInfo); + useList.Append(srcInfo); + info->srcCount = 2; + assert(info->dstCount == 0); bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers. @@ -2576,8 +2615,8 @@ void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) // Special write barrier: // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and // op2 (src) goes into any int register. - addr->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER); - src->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC); + addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); + srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC); } #else // !defined(_TARGET_X86_) @@ -2591,14 +2630,14 @@ void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) // For the standard JIT Helper calls: // op1 (addr) goes into REG_ARG_0 and // op2 (src) goes into REG_ARG_1 - addr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0); - src->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1); + addrInfo->info.setSrcCandidates(this, RBM_ARG_0); + srcInfo->info.setSrcCandidates(this, RBM_ARG_1); } // Both src and dst must reside in a register, which they should since we haven't set // either of them as contained. - assert(addr->gtLsraInfo.dstCount == 1); - assert(src->gtLsraInfo.dstCount == 1); + assert(addrInfo->info.dstCount == 1); + assert(srcInfo->info.dstCount == 1); } //----------------------------------------------------------------------------------------- @@ -2607,7 +2646,7 @@ void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) // Arguments: // indirTree - GT_IND or GT_STOREIND gentree node // -void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) +void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info) { // If this is the rhs of a block copy (i.e. non-enregisterable struct), // it has no register requirements. @@ -2616,9 +2655,7 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) return; } - TreeNodeInfo* info = &(indirTree->gtLsraInfo); - - info->srcCount = GetIndirSourceCount(indirTree); + int indirSrcCount = GetIndirInfo(indirTree); if (indirTree->gtOper == GT_STOREIND) { GenTree* source = indirTree->gtOp.gtOp2; @@ -2631,7 +2668,11 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) if (source->OperIsShiftOrRotate()) { - TreeNodeInfoInitShiftRotate(source); + info->srcCount += TreeNodeInfoInitShiftRotate(source, info); + } + else + { + info->srcCount += appendBinaryLocationInfoToList(source->AsOp()); } if (indirTree->AsStoreInd()->IsRMWDstOp1()) { @@ -2646,35 +2687,42 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) } if (nonMemSource != nullptr) { - info->srcCount += GetOperandSourceCount(nonMemSource); assert(!nonMemSource->isContained() || (!nonMemSource->isMemoryOp() && !nonMemSource->IsLocal())); #ifdef _TARGET_X86_ if (varTypeIsByte(indirTree) && !nonMemSource->isContained()) { // If storeInd is of TYP_BYTE, set source to byteable registers. - regMaskTP regMask = nonMemSource->gtLsraInfo.getSrcCandidates(this); + TreeNodeInfo& nonMemSourceInfo = useList.GetTreeNodeInfo(nonMemSource); + regMaskTP regMask = nonMemSourceInfo.getSrcCandidates(this); regMask &= ~RBM_NON_BYTE_REGS; assert(regMask != RBM_NONE); - nonMemSource->gtLsraInfo.setSrcCandidates(this, regMask); + nonMemSourceInfo.setSrcCandidates(this, regMask); } #endif } } else { - info->srcCount += GetOperandSourceCount(source); - } #ifdef _TARGET_X86_ - if (varTypeIsByte(indirTree) && !source->isContained()) - { - // If storeInd is of TYP_BYTE, set source to byteable registers. - regMaskTP regMask = source->gtLsraInfo.getSrcCandidates(this); - regMask &= ~RBM_NON_BYTE_REGS; - assert(regMask != RBM_NONE); - source->gtLsraInfo.setSrcCandidates(this, regMask); - } + if (varTypeIsByte(indirTree) && !source->isContained()) + { + // If storeInd is of TYP_BYTE, set source to byteable registers. + LocationInfoListNode* sourceInfo = getLocationInfo(source); + regMaskTP regMask = sourceInfo->info.getSrcCandidates(this); + regMask &= ~RBM_NON_BYTE_REGS; + assert(regMask != RBM_NONE); + sourceInfo->info.setSrcCandidates(this, regMask); + useList.Append(sourceInfo); + info->srcCount++; + } + else #endif + { + info->srcCount += GetOperandInfo(source); + } + } } + info->srcCount += indirSrcCount; #ifdef FEATURE_SIMD if (indirTree->TypeGet() == TYP_SIMD12) @@ -2712,12 +2760,11 @@ void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree) +void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree, TreeNodeInfo* info) { assert(tree->OperIsCompare() || tree->OperIs(GT_CMP)); - TreeNodeInfo* info = &(tree->gtLsraInfo); - info->srcCount = 0; + info->srcCount = 0; assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID)); #ifdef _TARGET_X86_ @@ -2734,11 +2781,7 @@ void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree) var_types op1Type = op1->TypeGet(); var_types op2Type = op2->TypeGet(); - if (op1->TypeGet() != TYP_VOID) - { - info->srcCount += GetOperandSourceCount(op1); - } - info->srcCount += GetOperandSourceCount(op2); + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); } //------------------------------------------------------------------------ @@ -2750,18 +2793,16 @@ void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree) // Return Value: // None. // -void LinearScan::TreeNodeInfoInitMul(GenTreePtr tree) +void LinearScan::TreeNodeInfoInitMul(GenTreePtr tree, TreeNodeInfo* info) { #if defined(_TARGET_X86_) assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG)); #else assert(tree->OperIs(GT_MUL, GT_MULHI)); #endif - TreeNodeInfo* info = &(tree->gtLsraInfo); - GenTree* op1 = tree->gtOp.gtOp1; - GenTree* op2 = tree->gtOp.gtOp2; - info->srcCount = GetOperandSourceCount(op1); - info->srcCount += GetOperandSourceCount(op2); + GenTree* op1 = tree->gtOp.gtOp1; + GenTree* op2 = tree->gtOp.gtOp2; + info->srcCount = appendBinaryLocationInfoToList(tree->AsOp()); assert(info->dstCount == 1); // Case of float/double mul. @@ -2961,37 +3002,6 @@ bool LinearScan::ExcludeNonByteableRegisters(GenTree* tree) } #endif // _TARGET_X86_ -//------------------------------------------------------------------------ -// GetOperandSourceCount: Get the source registers for an operand that might be contained. -// -// Arguments: -// node - The node of interest -// -// Return Value: -// The number of source registers used by the *parent* of this node. -// -int LinearScan::GetOperandSourceCount(GenTree* node) -{ - if (!node->isContained()) - { - return 1; - } - -#if !defined(_TARGET_64BIT_) - if (node->OperIs(GT_LONG)) - { - return 2; - } -#endif // !defined(_TARGET_64BIT_) - if (node->OperIsIndir()) - { - const unsigned srcCount = GetIndirSourceCount(node->AsIndir()); - return srcCount; - } - - return 0; -} - #endif // _TARGET_XARCH_ #endif // !LEGACY_BACKEND diff --git a/src/coreclr/src/jit/nodeinfo.h b/src/coreclr/src/jit/nodeinfo.h index 5f03da27764c220dea3a10120fcfedd9d1447f9a..d689037630073b69581c4a3049acad0437603146 100644 --- a/src/coreclr/src/jit/nodeinfo.h +++ b/src/coreclr/src/jit/nodeinfo.h @@ -15,7 +15,6 @@ class TreeNodeInfo public: TreeNodeInfo() { - loc = 0; _dstCount = 0; _srcCount = 0; _internalIntCount = 0; @@ -25,12 +24,9 @@ public: dstCandsIndex = 0; internalCandsIndex = 0; isLocalDefUse = false; - isLsraAdded = false; isDelayFree = false; hasDelayFreeSrc = false; isTgtPref = false; - regOptional = false; - definesAnyRegisters = false; isInternalRegDelayFree = false; #ifdef DEBUG isInitialized = false; @@ -97,8 +93,6 @@ public: void setInternalCandidates(LinearScan* lsra, regMaskTP mask); void addInternalCandidates(LinearScan* lsra, regMaskTP mask); - LsraLocation loc; - public: unsigned char srcCandsIndex; unsigned char dstCandsIndex; @@ -116,9 +110,6 @@ public: // nodes, or top-level nodes that are non-void. unsigned char isLocalDefUse : 1; - // Is this node added by LSRA, e.g. as a resolution or copy/reload move. - unsigned char isLsraAdded : 1; - // isDelayFree is set when the register defined by this node will interfere with the destination // of the consuming node, and therefore it must not be freed immediately after use. unsigned char isDelayFree : 1; @@ -132,14 +123,6 @@ public: // in the same register as op1. unsigned char isTgtPref : 1; - // Whether a spilled second src can be treated as a contained operand - unsigned char regOptional : 1; - - // Whether or not a node defines any registers, whether directly (for nodes where dstCout is non-zero) - // or indirectly (for contained nodes, which propagate the transitive closure of the registers - // defined by their inputs). Used during buildRefPositionsForNode in order to avoid unnecessary work. - unsigned char definesAnyRegisters : 1; - // Whether internal register needs to be different from targetReg // in which result is produced. unsigned char isInternalRegDelayFree : 1; @@ -151,7 +134,7 @@ public: public: // Initializes the TreeNodeInfo value with the given values. - void Initialize(LinearScan* lsra, GenTree* node, LsraLocation location); + void Initialize(LinearScan* lsra, GenTree* node); #ifdef DEBUG void dump(LinearScan* lsra);