From 58fda174ec730750775920db4f116d16e7ba82a6 Mon Sep 17 00:00:00 2001 From: Mike Danes Date: Sat, 29 Apr 2017 13:50:26 +0300 Subject: [PATCH] Use BT in switch lowering Commit migrated from https://github.com/dotnet/coreclr/commit/9c1e0e042d0b48793449543603d046450e0e5527 --- src/coreclr/src/jit/lower.cpp | 228 +++++++++++++++++++++++++++++----- src/coreclr/src/jit/lower.h | 3 + 2 files changed, 202 insertions(+), 29 deletions(-) diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 85561d3b6a1..9d63c4b5224 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -721,39 +721,40 @@ GenTree* Lowering::LowerSwitch(GenTree* node) } else { - // Lower the switch into an indirect branch using a jump table: - // - // 1. Create the constant for the default case - // 2. Generate a GT_GE condition to compare to the default case - // 3. Generate a GT_JTRUE to jump. - // 4. Load the jump table address into a local (presumably the just - // created constant for GT_SWITCH). - // 5. Create a new node for the lowered switch, this will both generate - // the branch table and also will be responsible for the indirect - // branch. - - JITDUMP("Lowering switch BB%02u: using jump table expansion\n", originalSwitchBB->bbNum); - - GenTree* switchValue = comp->gtNewLclvNode(tempLclNum, tempLclType); -#ifdef _TARGET_64BIT_ - if (tempLclType != TYP_I_IMPL) + // At this point the default case has already been handled and we need to generate a jump + // table based switch or a bit test based switch at the end of afterDefaultCondBlock. Both + // switch variants need the switch value so create the necessary LclVar node here. + GenTree* switchValue = comp->gtNewLclvNode(tempLclNum, tempLclType); + LIR::Range& switchBlockRange = LIR::AsRange(afterDefaultCondBlock); + tempVarDsc->incRefCnts(blockWeight, comp); + switchBlockRange.InsertAtEnd(switchValue); + + // Try generating a bit test based switch first, + // if that's not possible a jump table based switch will be generated. + if (!TryLowerSwitchToBitTest(jumpTab, jumpCnt, targetCnt, afterDefaultCondBlock, switchValue)) { - // Note that the switch value is unsigned so the cast should be unsigned as well. - switchValue = comp->gtNewCastNode(TYP_I_IMPL, switchValue, TYP_U_IMPL); - switchValue->gtFlags |= GTF_UNSIGNED; - } + JITDUMP("Lowering switch BB%02u: using jump table expansion\n", originalSwitchBB->bbNum); + +#ifdef _TARGET_64BIT_ + if (tempLclType != TYP_I_IMPL) + { + // SWITCH_TABLE expects the switch value (the index into the jump table) to be TYP_I_IMPL. + // Note that the switch value is unsigned so the cast should be unsigned as well. + switchValue = comp->gtNewCastNode(TYP_I_IMPL, switchValue, TYP_U_IMPL); + switchValue->gtFlags |= GTF_UNSIGNED; + switchBlockRange.InsertAtEnd(switchValue); + } #endif - GenTreePtr gtTableSwitch = - comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, switchValue, comp->gtNewJmpTableNode()); - /* Increment the lvRefCnt and lvRefCntWtd for temp */ - tempVarDsc->incRefCnts(blockWeight, comp); - // this block no longer branches to the default block - afterDefaultCondBlock->bbJumpSwt->removeDefault(); - comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock); + GenTree* switchTable = comp->gtNewJmpTableNode(); + GenTree* switchJump = comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, switchValue, switchTable); + switchBlockRange.InsertAfter(switchValue, switchTable, switchJump); - LIR::Range& afterDefaultCondBBRange = LIR::AsRange(afterDefaultCondBlock); - afterDefaultCondBBRange.InsertAtEnd(LIR::SeqTree(comp, gtTableSwitch)); + // this block no longer branches to the default block + afterDefaultCondBlock->bbJumpSwt->removeDefault(); + } + + comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock); } GenTree* next = node->gtNext; @@ -765,6 +766,175 @@ GenTree* Lowering::LowerSwitch(GenTree* node) return next; } +//------------------------------------------------------------------------ +// TryLowerSwitchToBitTest: Attempts to transform a jump table switch into a bit test. +// +// Arguments: +// jumpTable - The jump table +// jumpCount - The number of blocks in the jump table +// targetCount - The number of distinct blocks in the jump table +// bbSwitch - The switch block +// switchValue - A LclVar node that provides the switch value +// +// Return value: +// true if the switch has been lowered to a bit test +// +// Notes: +// If the jump table contains less than 32 (64 on 64 bit targets) entries and there +// are at most 2 distinct jump targets then the jump table can be converted to a word +// of bits where a 0 bit corresponds to one jump target and a 1 bit corresponds to the +// other jump target. Instead of the indirect jump a BT-JCC sequnce is used to jump +// to the appropiate target: +// mov eax, 245 ; jump table converted to a "bit table" +// bt eax, ebx ; ebx is supposed to contain the switch value +// jc target1 +// target0: +// ... +// target1: +// Such code is both shorter and faster (in part due to the removal of a memory load) +// than the traditional jump table base code. And of course, it also avoids the need +// to emit the jump table itself that can reach up to 256 bytes (for 64 entries). +// +bool Lowering::TryLowerSwitchToBitTest( + BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue) +{ +#ifndef _TARGET_XARCH_ + // Other architectures may use this if they substitute GT_BT with equivalent code. + return false; +#else + assert(jumpCount >= 2); + assert(targetCount >= 2); + assert(bbSwitch->bbJumpKind == BBJ_SWITCH); + assert(switchValue->OperIs(GT_LCL_VAR)); + + // + // Quick check to see if it's worth going through the jump table. The bit test switch supports + // up to 2 targets but targetCount also includes the default block so we need to allow 3 targets. + // We'll ensure that there are only 2 targets when building the bit table. + // + + if (targetCount > 3) + { + return false; + } + + // + // The number of bits in the bit table is the same as the number of jump table entries. But the + // jump table also includes the default target (at the end) so we need to ignore it. The default + // has already been handled by a JTRUE(GT(switchValue, jumpCount - 2)) that LowerSwitch generates. + // + + const unsigned bitCount = jumpCount - 1; + + if (bitCount > (genTypeSize(TYP_I_IMPL) * 8)) + { + return false; + } + + // + // Build a bit table where a bit set to 0 corresponds to bbCase0 and a bit set to 1 corresponds to + // bbCase1. Simply use the first block in the jump table as bbCase1, later we can invert the bit + // table and/or swap the blocks if it's beneficial. + // + + BasicBlock* bbCase0 = nullptr; + BasicBlock* bbCase1 = jumpTable[0]; + size_t bitTable = 1; + + for (unsigned bitIndex = 1; bitIndex < bitCount; bitIndex++) + { + if (jumpTable[bitIndex] == bbCase1) + { + bitTable |= (size_t(1) << bitIndex); + } + else if (bbCase0 == nullptr) + { + bbCase0 = jumpTable[bitIndex]; + } + else if (jumpTable[bitIndex] != bbCase0) + { + // If it's neither bbCase0 nor bbCase1 then it means we have 3 targets. There can't be more + // than 3 because of the check at the start of the function. + assert(targetCount == 3); + return false; + } + } + + // + // One of the case blocks has to follow the switch block. This requirement could be avoided + // by adding a BBJ_ALWAYS block after the switch block but doing that sometimes negatively + // impacts register allocation. + // + + if ((bbSwitch->bbNext != bbCase0) && (bbSwitch->bbNext != bbCase1)) + { + return false; + } + +#ifdef _TARGET_64BIT_ + // + // See if we can avoid a 8 byte immediate on 64 bit targets. If all upper 32 bits are 1 + // then inverting the bit table will make them 0 so that the table now fits in 32 bits. + // Note that this does not change the number of bits in the bit table, it just takes + // advantage of the fact that loading a 32 bit immediate into a 64 bit register zero + // extends the immediate value to 64 bit. + // + + if (~bitTable <= UINT32_MAX) + { + bitTable = ~bitTable; + std::swap(bbCase0, bbCase1); + } +#endif + + // + // Rewire the blocks as needed and figure out the condition to use for JCC. + // + + genTreeOps bbSwitchCondition = GT_NONE; + bbSwitch->bbJumpKind = BBJ_COND; + + comp->fgRemoveAllRefPreds(bbCase1, bbSwitch); + comp->fgRemoveAllRefPreds(bbCase0, bbSwitch); + + if (bbSwitch->bbNext == bbCase0) + { + // GT_LT + GTF_UNSIGNED generates JC so we jump to bbCase1 when the bit is set + bbSwitchCondition = GT_LT; + bbSwitch->bbJumpDest = bbCase1; + + comp->fgAddRefPred(bbCase0, bbSwitch); + comp->fgAddRefPred(bbCase1, bbSwitch); + } + else + { + assert(bbSwitch->bbNext == bbCase1); + + // GT_GE + GTF_UNSIGNED generates JNC so we jump to bbCase0 when the bit is not set + bbSwitchCondition = GT_GE; + bbSwitch->bbJumpDest = bbCase0; + + comp->fgAddRefPred(bbCase0, bbSwitch); + comp->fgAddRefPred(bbCase1, bbSwitch); + } + + // + // Append BT(bitTable, switchValue) and JCC(condition) to the switch block. + // + + var_types bitTableType = (bitCount <= (genTypeSize(TYP_INT) * 8)) ? TYP_INT : TYP_LONG; + GenTree* bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType); + GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue); + bitTest->gtFlags |= GTF_SET_FLAGS; + GenTreeCC* jcc = new (comp, GT_JCC) GenTreeCC(GT_JCC, bbSwitchCondition); + jcc->gtFlags |= GTF_UNSIGNED | GTF_USE_FLAGS; + + LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc); + + return true; +#endif // _TARGET_XARCH_ +} + // NOTE: this method deliberately does not update the call arg table. It must only // be used by NewPutArg and LowerArg; these functions are responsible for updating // the call arg table as necessary. diff --git a/src/coreclr/src/jit/lower.h b/src/coreclr/src/jit/lower.h index 2be57655e61..97b0ccd590a 100644 --- a/src/coreclr/src/jit/lower.h +++ b/src/coreclr/src/jit/lower.h @@ -293,6 +293,9 @@ private: void AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node); GenTree* LowerSwitch(GenTree* node); + bool TryLowerSwitchToBitTest( + BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue); + void LowerCast(GenTree* node); #if !CPU_LOAD_STORE_ARCH -- GitLab