diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp index 49cc9ed65cf1875c85732123b3f31cc2a4b22c5f..d305ac185374ec0192695f6e0c9ef6cd168e18cb 100644 --- a/src/cpu/x86/vm/assembler_x86.cpp +++ b/src/cpu/x86/vm/assembler_x86.cpp @@ -1089,6 +1089,21 @@ void Assembler::andl(Register dst, Register src) { emit_arith(0x23, 0xC0, dst, src); } +void Assembler::andnl(Register dst, Register src1, Register src2) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode(dst, src1, src2); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::andnl(Register dst, Register src1, Address src2) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38(dst, src1, src2); + emit_int8((unsigned char)0xF2); + emit_operand(dst, src2); +} + void Assembler::bsfl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_int8(0x0F); @@ -1110,6 +1125,51 @@ void Assembler::bswapl(Register reg) { // bswap emit_int8((unsigned char)(0xC8 | encode)); } +void Assembler::blsil(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode(rbx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsil(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38(rbx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rbx, src); +} + +void Assembler::blsmskl(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode(rdx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsmskl(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38(rdx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rdx, src); +} + +void Assembler::blsrl(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode(rcx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsrl(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38(rcx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rcx, src); +} + void Assembler::call(Label& L, relocInfo::relocType rtype) { // suspect disp32 is always good int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); @@ -2878,6 +2938,24 @@ void Assembler::testl(Register dst, Address src) { emit_operand(dst, src); } +void Assembler::tzcntl(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); + emit_int8((unsigned char)0xF3); + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_int8(0x0F); + emit_int8((unsigned char)0xBC); + emit_int8((unsigned char)0xC0 | encode); +} + +void Assembler::tzcntq(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); + emit_int8((unsigned char)0xF3); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_int8(0x0F); + emit_int8((unsigned char)0xBC); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); @@ -4837,6 +4915,21 @@ void Assembler::andq(Register dst, Register src) { emit_arith(0x23, 0xC0, dst, src); } +void Assembler::andnq(Register dst, Register src1, Register src2) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::andnq(Register dst, Register src1, Address src2) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38_q(dst, src1, src2); + emit_int8((unsigned char)0xF2); + emit_operand(dst, src2); +} + void Assembler::bsfq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); emit_int8(0x0F); @@ -4858,6 +4951,51 @@ void Assembler::bswapq(Register reg) { emit_int8((unsigned char)(0xC8 | encode)); } +void Assembler::blsiq(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsiq(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38_q(rbx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rbx, src); +} + +void Assembler::blsmskq(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsmskq(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38_q(rdx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rdx, src); +} + +void Assembler::blsrq(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsrq(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38_q(rcx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rcx, src); +} + void Assembler::cdqq() { prefix(REX_W); emit_int8((unsigned char)0x99); diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp index 1ad66bd6a9544eb9d86d9a146bb88df3ffacc6d3..95ca231cef4d9bb2a6cff2e14f33f23eb78d1740 100644 --- a/src/cpu/x86/vm/assembler_x86.hpp +++ b/src/cpu/x86/vm/assembler_x86.hpp @@ -590,10 +590,35 @@ private: vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); } + void vex_prefix_0F38(Register dst, Register nds, Address src) { + bool vex_w = false; + bool vector256 = false; + vex_prefix(src, nds->encoding(), dst->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); + } + + void vex_prefix_0F38_q(Register dst, Register nds, Address src) { + bool vex_w = true; + bool vector256 = false; + vex_prefix(src, nds->encoding(), dst->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); + } int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256); + int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) { + bool vex_w = false; + bool vector256 = false; + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); + } + int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) { + bool vex_w = true; + bool vector256 = false; + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); + } int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, bool vector256 = false, VexOpcode opc = VEX_OPCODE_0F) { @@ -897,6 +922,27 @@ private: void andq(Register dst, Address src); void andq(Register dst, Register src); + // BMI instructions + void andnl(Register dst, Register src1, Register src2); + void andnl(Register dst, Register src1, Address src2); + void andnq(Register dst, Register src1, Register src2); + void andnq(Register dst, Register src1, Address src2); + + void blsil(Register dst, Register src); + void blsil(Register dst, Address src); + void blsiq(Register dst, Register src); + void blsiq(Register dst, Address src); + + void blsmskl(Register dst, Register src); + void blsmskl(Register dst, Address src); + void blsmskq(Register dst, Register src); + void blsmskq(Register dst, Address src); + + void blsrl(Register dst, Register src); + void blsrl(Register dst, Address src); + void blsrq(Register dst, Register src); + void blsrq(Register dst, Address src); + void bsfl(Register dst, Register src); void bsrl(Register dst, Register src); @@ -1574,6 +1620,9 @@ private: void testq(Register dst, int32_t imm32); void testq(Register dst, Register src); + // BMI - count trailing zeros + void tzcntl(Register dst, Register src); + void tzcntq(Register dst, Register src); // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS void ucomisd(XMMRegister dst, Address src); diff --git a/src/cpu/x86/vm/globals_x86.hpp b/src/cpu/x86/vm/globals_x86.hpp index b194ffbcfbe4ee1952c0deaec5b5a5ce2cb93c11..1bc5d1e6828c0abcdf29edfffec76f3e204bc3d4 100644 --- a/src/cpu/x86/vm/globals_x86.hpp +++ b/src/cpu/x86/vm/globals_x86.hpp @@ -134,5 +134,11 @@ define_pd_global(uintx, TypeProfileLevel, 111); \ product(bool, UseCountLeadingZerosInstruction, false, \ "Use count leading zeros instruction") \ + \ + product(bool, UseCountTrailingZerosInstruction, false, \ + "Use count trailing zeros instruction") \ + \ + product(bool, UseBMI1Instructions, false, \ + "Use BMI instructions") #endif // CPU_X86_VM_GLOBALS_X86_HPP diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp index de38b4a3a53cfb272397c77c3e3e2933234935f1..b949e4e299874d8be1d2b2240d9347ef59b94a0b 100644 --- a/src/cpu/x86/vm/vm_version_x86.cpp +++ b/src/cpu/x86/vm/vm_version_x86.cpp @@ -429,7 +429,7 @@ void VM_Version::get_processor_features() { } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -455,7 +455,9 @@ void VM_Version::get_processor_features() { (supports_ht() ? ", ht": ""), (supports_tsc() ? ", tsc": ""), (supports_tscinv_bit() ? ", tscinvbit": ""), - (supports_tscinv() ? ", tscinv": "")); + (supports_tscinv() ? ", tscinv": ""), + (supports_bmi1() ? ", bmi1" : ""), + (supports_bmi2() ? ", bmi2" : "")); _features_str = strdup(buf); // UseSSE is set to the smaller of what hardware supports and what @@ -600,13 +602,6 @@ void VM_Version::get_processor_features() { } } - // Use count leading zeros count instruction if available. - if (supports_lzcnt()) { - if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { - UseCountLeadingZerosInstruction = true; - } - } - // some defaults for AMD family 15h if ( cpu_family() == 0x15 ) { // On family 15h processors default is no sw prefetch @@ -692,6 +687,35 @@ void VM_Version::get_processor_features() { } #endif // COMPILER2 + // Use count leading zeros count instruction if available. + if (supports_lzcnt()) { + if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { + UseCountLeadingZerosInstruction = true; + } + } else if (UseCountLeadingZerosInstruction) { + warning("lzcnt instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); + } + + if (supports_bmi1()) { + if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { + UseBMI1Instructions = true; + } + } else if (UseBMI1Instructions) { + warning("BMI1 instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseBMI1Instructions, false); + } + + // Use count trailing zeros instruction if available + if (supports_bmi1()) { + if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { + UseCountTrailingZerosInstruction = UseBMI1Instructions; + } + } else if (UseCountTrailingZerosInstruction) { + warning("tzcnt instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); + } + // Use population count instruction if available. if (supports_popcnt()) { if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { diff --git a/src/cpu/x86/vm/vm_version_x86.hpp b/src/cpu/x86/vm/vm_version_x86.hpp index 86e9b662d52b052e1a70b8f03011c5faa54195b5..07d6445187004a5e507d24ee1a19c9649ca07661 100644 --- a/src/cpu/x86/vm/vm_version_x86.hpp +++ b/src/cpu/x86/vm/vm_version_x86.hpp @@ -141,7 +141,8 @@ public: struct { uint32_t LahfSahf : 1, CmpLegacy : 1, - : 4, + : 3, + lzcnt_intel : 1, lzcnt : 1, sse4a : 1, misalignsse : 1, @@ -251,7 +252,9 @@ protected: CPU_AVX2 = (1 << 18), CPU_AES = (1 << 19), CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions - CPU_CLMUL = (1 << 21) // carryless multiply for CRC + CPU_CLMUL = (1 << 21), // carryless multiply for CRC + CPU_BMI1 = (1 << 22), + CPU_BMI2 = (1 << 23) } cpuFeatureFlags; enum { @@ -423,6 +426,8 @@ protected: if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) result |= CPU_AVX2; } + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) + result |= CPU_BMI1; if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) result |= CPU_TSC; if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) @@ -444,6 +449,13 @@ protected: if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) result |= CPU_SSE4A; } + // Intel features. + if(is_intel()) { + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) + result |= CPU_BMI2; + if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) + result |= CPU_LZCNT; + } return result; } @@ -560,7 +572,8 @@ public: static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; } static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; } - + static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; } + static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; } // Intel features static bool is_intel_family_core() { return is_intel() && extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad index f4a68abc789f103867635a6fea6e1b6ba9ec99eb..f0bd571a8b131408329ca4452424a0a05566b2d1 100644 --- a/src/cpu/x86/vm/x86_32.ad +++ b/src/cpu/x86/vm/x86_32.ad @@ -5155,6 +5155,19 @@ instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ %} instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosI src)); + effect(KILL cr); + + format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} + ins_encode %{ + __ tzcntl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ + predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosI src)); effect(KILL cr); @@ -5174,6 +5187,30 @@ instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ %} instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosL src)); + effect(TEMP dst, KILL cr); + + format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" + "JNC done\n\t" + "TZCNT $dst, $src.hi\n\t" + "ADD $dst, 32\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Label done; + __ tzcntl(Rdst, Rsrc); + __ jccb(Assembler::carryClear, done); + __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); + __ addl(Rdst, BitsPerInt); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ + predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosL src)); effect(TEMP dst, KILL cr); @@ -8017,6 +8054,123 @@ instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ ins_pipe( ialu_mem_imm ); %} +// BMI1 instructions +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ + match(Set dst (AndI (XorI src1 minus_1) src2)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "ANDNL $dst, $src1, $src2" %} + + ins_encode %{ + __ andnl($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ + match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "ANDNL $dst, $src1, $src2" %} + + ins_encode %{ + __ andnl($dst$$Register, $src1$$Register, $src2$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ + match(Set dst (AndI (SubI imm_zero src) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "BLSIL $dst, $src" %} + + ins_encode %{ + __ blsil($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ + match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "BLSIL $dst, $src" %} + + ins_encode %{ + __ blsil($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (XorI (AddI src minus_1) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "BLSMSKL $dst, $src" %} + + ins_encode %{ + __ blsmskl($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "BLSMSKL $dst, $src" %} + + ins_encode %{ + __ blsmskl($dst$$Register, $src$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (AndI (AddI src minus_1) src) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "BLSRL $dst, $src" %} + + ins_encode %{ + __ blsrl($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "BLSRL $dst, $src" %} + + ins_encode %{ + __ blsrl($dst$$Register, $src$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + // Or Instructions // Or Register with Register instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ @@ -8639,6 +8793,210 @@ instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ ins_pipe( ialu_reg_long_mem ); %} +// BMI1 instructions +instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ + match(Set dst (AndL (XorL src1 minus_1) src2)); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" + "ANDNL $dst.hi, $src1.hi, $src2.hi" + %} + + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Register Rsrc2 = $src2$$Register; + __ andnl(Rdst, Rsrc1, Rsrc2); + __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); + %} + ins_pipe(ialu_reg_reg_long); +%} + +instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ + match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + ins_cost(125); + format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" + "ANDNL $dst.hi, $src1.hi, $src2+4" + %} + + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); + + __ andnl(Rdst, Rsrc1, $src2$$Address); + __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ + match(Set dst (AndL (SubL imm_zero src) src)); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + format %{ "MOVL $dst.hi, 0\n\t" + "BLSIL $dst.lo, $src.lo\n\t" + "JNZ done\n\t" + "BLSIL $dst.hi, $src.hi\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ movl(HIGH_FROM_LOW(Rdst), 0); + __ blsil(Rdst, Rsrc); + __ jccb(Assembler::notZero, done); + __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ + match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + ins_cost(125); + format %{ "MOVL $dst.hi, 0\n\t" + "BLSIL $dst.lo, $src\n\t" + "JNZ done\n\t" + "BLSIL $dst.hi, $src+4\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); + + __ movl(HIGH_FROM_LOW(Rdst), 0); + __ blsil(Rdst, $src$$Address); + __ jccb(Assembler::notZero, done); + __ blsil(HIGH_FROM_LOW(Rdst), src_hi); + __ bind(done); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (XorL (AddL src minus_1) src)); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + format %{ "MOVL $dst.hi, 0\n\t" + "BLSMSKL $dst.lo, $src.lo\n\t" + "JNC done\n\t" + "BLSMSKL $dst.hi, $src.hi\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ movl(HIGH_FROM_LOW(Rdst), 0); + __ blsmskl(Rdst, Rsrc); + __ jccb(Assembler::carryClear, done); + __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); + __ bind(done); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + ins_cost(125); + format %{ "MOVL $dst.hi, 0\n\t" + "BLSMSKL $dst.lo, $src\n\t" + "JNC done\n\t" + "BLSMSKL $dst.hi, $src+4\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); + + __ movl(HIGH_FROM_LOW(Rdst), 0); + __ blsmskl(Rdst, $src$$Address); + __ jccb(Assembler::carryClear, done); + __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); + __ bind(done); + %} + + ins_pipe(ialu_reg_mem); +%} + +instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (AndL (AddL src minus_1) src) ); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + format %{ "MOVL $dst.hi, $src.hi\n\t" + "BLSRL $dst.lo, $src.lo\n\t" + "JNC done\n\t" + "BLSRL $dst.hi, $src.hi\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); + __ blsrl(Rdst, Rsrc); + __ jccb(Assembler::carryClear, done); + __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); + __ bind(done); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + ins_cost(125); + format %{ "MOVL $dst.hi, $src+4\n\t" + "BLSRL $dst.lo, $src\n\t" + "JNC done\n\t" + "BLSRL $dst.hi, $src+4\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); + __ movl(HIGH_FROM_LOW(Rdst), src_hi); + __ blsrl(Rdst, $src$$Address); + __ jccb(Assembler::carryClear, done); + __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); + __ bind(done); + %} + + ins_pipe(ialu_reg_mem); +%} + // Or Long Register with Register instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (OrL dst src)); diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad index f82b8f381c94a96c92a3c2c131fa089cb436d4c3..220aff9a8407d988e991248f35f9a775aca805f0 100644 --- a/src/cpu/x86/vm/x86_64.ad +++ b/src/cpu/x86/vm/x86_64.ad @@ -6014,6 +6014,19 @@ instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{ %} instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosI src)); + effect(KILL cr); + + format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %} + ins_encode %{ + __ tzcntl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{ + predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosI src)); effect(KILL cr); @@ -6033,6 +6046,19 @@ instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ %} instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosL src)); + effect(KILL cr); + + format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %} + ins_encode %{ + __ tzcntq($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{ + predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosL src)); effect(KILL cr); @@ -8612,6 +8638,122 @@ instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr) ins_pipe(ialu_mem_imm); %} +// BMI1 instructions +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{ + match(Set dst (AndI (XorI src1 minus_1) (LoadI src2))); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "andnl $dst, $src1, $src2" %} + + ins_encode %{ + __ andnl($dst$$Register, $src1$$Register, $src2$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{ + match(Set dst (AndI (XorI src1 minus_1) src2)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "andnl $dst, $src1, $src2" %} + + ins_encode %{ + __ andnl($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{ + match(Set dst (AndI (SubI imm_zero src) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsil $dst, $src" %} + + ins_encode %{ + __ blsil($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{ + match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsil $dst, $src" %} + + ins_encode %{ + __ blsil($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsmskl $dst, $src" %} + + ins_encode %{ + __ blsmskl($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (XorI (AddI src minus_1) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsmskl $dst, $src" %} + + ins_encode %{ + __ blsmskl($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (AndI (AddI src minus_1) src) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsrl $dst, $src" %} + + ins_encode %{ + __ blsrl($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg_mem); +%} + +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsrl $dst, $src" %} + + ins_encode %{ + __ blsrl($dst$$Register, $src$$Address); + %} + + ins_pipe(ialu_reg); +%} + // Or Instructions // Or Register with Register instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr) @@ -8843,6 +8985,122 @@ instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr) ins_pipe(ialu_mem_imm); %} +// BMI1 instructions +instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{ + match(Set dst (AndL (XorL src1 minus_1) (LoadL src2))); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "andnq $dst, $src1, $src2" %} + + ins_encode %{ + __ andnq($dst$$Register, $src1$$Register, $src2$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{ + match(Set dst (AndL (XorL src1 minus_1) src2)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "andnq $dst, $src1, $src2" %} + + ins_encode %{ + __ andnq($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{ + match(Set dst (AndL (SubL imm_zero src) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsiq $dst, $src" %} + + ins_encode %{ + __ blsiq($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{ + match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsiq $dst, $src" %} + + ins_encode %{ + __ blsiq($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsmskq $dst, $src" %} + + ins_encode %{ + __ blsmskq($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (XorL (AddL src minus_1) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsmskq $dst, $src" %} + + ins_encode %{ + __ blsmskq($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (AndL (AddL src minus_1) src) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsrq $dst, $src" %} + + ins_encode %{ + __ blsrq($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsrq $dst, $src" %} + + ins_encode %{ + __ blsrq($dst$$Register, $src$$Address); + %} + + ins_pipe(ialu_reg); +%} + // Or Instructions // Or Register with Register instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr) diff --git a/src/share/vm/adlc/formssel.cpp b/src/share/vm/adlc/formssel.cpp index 01aedd36a70de48ac161de91b4af11935cb80314..b7cd6736ed534b99c65572829f60c0ca4d9119e3 100644 --- a/src/share/vm/adlc/formssel.cpp +++ b/src/share/vm/adlc/formssel.cpp @@ -649,6 +649,7 @@ int InstructForm::memory_operand(FormDict &globals) const { int USE_of_memory = 0; int DEF_of_memory = 0; const char* last_memory_DEF = NULL; // to test DEF/USE pairing in asserts + const char* last_memory_USE = NULL; Component *unique = NULL; Component *comp = NULL; ComponentList &components = (ComponentList &)_components; @@ -670,7 +671,16 @@ int InstructForm::memory_operand(FormDict &globals) const { assert(0 == strcmp(last_memory_DEF, comp->_name), "every memory DEF is followed by a USE of the same name"); last_memory_DEF = NULL; } - USE_of_memory++; + // Handles same memory being used multiple times in the case of BMI1 instructions. + if (last_memory_USE != NULL) { + if (strcmp(comp->_name, last_memory_USE) != 0) { + USE_of_memory++; + } + } else { + USE_of_memory++; + } + last_memory_USE = comp->_name; + if (DEF_of_memory == 0) // defs take precedence unique = comp; } else { diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp index 5088deb7c68c8d3b72265fc0c3e2dbe3aee5b5aa..81e7455a8beb94a0547055fca41395faf1e701c2 100644 --- a/src/share/vm/opto/matcher.cpp +++ b/src/share/vm/opto/matcher.cpp @@ -1908,6 +1908,105 @@ OptoReg::Name Matcher::find_receiver( bool is_outgoing ) { return OptoReg::as_OptoReg(regs.first()); } +// This function identifies sub-graphs in which a 'load' node is +// input to two different nodes, and such that it can be matched +// with BMI instructions like blsi, blsr, etc. +// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. +// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* +// refers to the same node. +#ifdef X86 +// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) +// This is a temporary solution until we make DAGs expressible in ADL. +template +class FusedPatternMatcher { + Node* _op1_node; + Node* _mop_node; + int _con_op; + + static int match_next(Node* n, int next_op, int next_op_idx) { + if (n->in(1) == NULL || n->in(2) == NULL) { + return -1; + } + + if (next_op_idx == -1) { // n is commutative, try rotations + if (n->in(1)->Opcode() == next_op) { + return 1; + } else if (n->in(2)->Opcode() == next_op) { + return 2; + } + } else { + assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); + if (n->in(next_op_idx)->Opcode() == next_op) { + return next_op_idx; + } + } + return -1; + } +public: + FusedPatternMatcher(Node* op1_node, Node *mop_node, int con_op) : + _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } + + bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative + int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative + typename ConType::NativeType con_value) { + if (_op1_node->Opcode() != op1) { + return false; + } + if (_mop_node->outcnt() > 2) { + return false; + } + op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); + if (op1_op2_idx == -1) { + return false; + } + // Memory operation must be the other edge + int op1_mop_idx = (op1_op2_idx & 1) + 1; + + // Check that the mop node is really what we want + if (_op1_node->in(op1_mop_idx) == _mop_node) { + Node *op2_node = _op1_node->in(op1_op2_idx); + if (op2_node->outcnt() > 1) { + return false; + } + assert(op2_node->Opcode() == op2, "Should be"); + op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); + if (op2_con_idx == -1) { + return false; + } + // Memory operation must be the other edge + int op2_mop_idx = (op2_con_idx & 1) + 1; + // Check that the memory operation is the same node + if (op2_node->in(op2_mop_idx) == _mop_node) { + // Now check the constant + const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); + if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { + return true; + } + } + } + return false; + } +}; + + +bool Matcher::is_bmi_pattern(Node *n, Node *m) { + if (n != NULL && m != NULL) { + if (m->Opcode() == Op_LoadI) { + FusedPatternMatcher bmii(n, m, Op_ConI); + return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || + bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || + bmii.match(Op_XorI, -1, Op_AddI, -1, -1); + } else if (m->Opcode() == Op_LoadL) { + FusedPatternMatcher bmil(n, m, Op_ConL); + return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || + bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || + bmil.match(Op_XorL, -1, Op_AddL, -1, -1); + } + } + return false; +} +#endif // X86 + // A method-klass-holder may be passed in the inline_cache_reg // and then expanded into the inline_cache_reg and a method_oop register // defined in ad_.cpp @@ -2063,6 +2162,14 @@ void Matcher::find_shared( Node *n ) { set_shared(m->in(AddPNode::Base)->in(1)); } + // if 'n' and 'm' are part of a graph for BMI instruction, clone this node. +#ifdef X86 + if (UseBMI1Instructions && is_bmi_pattern(n, m)) { + mstack.push(m, Visit); + continue; + } +#endif + // Clone addressing expressions as they are "free" in memory access instructions if( mem_op && i == MemNode::Address && mop == Op_AddP ) { // Some inputs for address expression are not put on stack diff --git a/src/share/vm/opto/matcher.hpp b/src/share/vm/opto/matcher.hpp index 5ebf59068134586f37c65fec1e04fbfc71a70e86..6d90b9fb7195afd6ef423147dd73fe1001fb8517 100644 --- a/src/share/vm/opto/matcher.hpp +++ b/src/share/vm/opto/matcher.hpp @@ -79,6 +79,9 @@ class Matcher : public PhaseTransform { // Find shared Nodes, or Nodes that otherwise are Matcher roots void find_shared( Node *n ); +#ifdef X86 + bool is_bmi_pattern(Node *n, Node *m); +#endif // Debug and profile information for nodes in old space: GrowableArray* _old_node_note_array; diff --git a/test/compiler/codegen/BMI1.java b/test/compiler/codegen/BMI1.java new file mode 100644 index 0000000000000000000000000000000000000000..ada9cf06c3462e24b39b981edcb8d8e5eb109625 --- /dev/null +++ b/test/compiler/codegen/BMI1.java @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8031321 + * @summary Support BMI1 instructions on x86/x64 + * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileCommand=compileonly,BMITests.* BMI1 + * + */ + +class MemI { + public int x; + public MemI(int x) { this.x = x; } +} + +class MemL { + public long x; + public MemL(long x) { this.x = x; } +} + +class BMITests { + static int andnl(int src1, int src2) { + return ~src1 & src2; + } + static long andnq(long src1, long src2) { + return ~src1 & src2; + } + static int andnl(int src1, MemI src2) { + return ~src1 & src2.x; + } + static long andnq(long src1, MemL src2) { + return ~src1 & src2.x; + } + static int blsil(int src1) { + return src1 & -src1; + } + static long blsiq(long src1) { + return src1 & -src1; + } + static int blsil(MemI src1) { + return src1.x & -src1.x; + } + static long blsiq(MemL src1) { + return src1.x & -src1.x; + } + static int blsmskl(int src1) { + return (src1 - 1) ^ src1; + } + static long blsmskq(long src1) { + return (src1 - 1) ^ src1; + } + static int blsmskl(MemI src1) { + return (src1.x - 1) ^ src1.x; + } + static long blsmskq(MemL src1) { + return (src1.x - 1) ^ src1.x; + } + static int blsrl(int src1) { + return (src1 - 1) & src1; + } + static long blsrq(long src1) { + return (src1 - 1) & src1; + } + static int blsrl(MemI src1) { + return (src1.x - 1) & src1.x; + } + static long blsrq(MemL src1) { + return (src1.x - 1) & src1.x; + } + static int lzcntl(int src1) { + return Integer.numberOfLeadingZeros(src1); + } + static int lzcntq(long src1) { + return Long.numberOfLeadingZeros(src1); + } + static int tzcntl(int src1) { + return Integer.numberOfTrailingZeros(src1); + } + static int tzcntq(long src1) { + return Long.numberOfTrailingZeros(src1); + } +} + +public class BMI1 { + private final static int ITERATIONS = 1000000; + + public static void main(String[] args) { + int ix = 0x01234567; + int iy = 0x89abcdef; + MemI imy = new MemI(iy); + long lx = 0x0123456701234567L; + long ly = 0x89abcdef89abcdefL; + MemL lmy = new MemL(ly); + + { // match(Set dst (AndI (XorI src1 minus_1) src2)) + int z = BMITests.andnl(ix, iy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.andnl(ix, iy); + if (ii != z) { + throw new Error("andnl with register failed"); + } + } + } + { // match(Set dst (AndL (XorL src1 minus_1) src2)) + long z = BMITests.andnq(lx, ly); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.andnq(lx, ly); + if (ll != z) { + throw new Error("andnq with register failed"); + } + } + } + { // match(Set dst (AndI (XorI src1 minus_1) (LoadI src2))) + int z = BMITests.andnl(ix, imy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.andnl(ix, imy); + if (ii != z) { + throw new Error("andnl with memory failed"); + } + } + } + { // match(Set dst (AndL (XorL src1 minus_1) (LoadL src2))) + long z = BMITests.andnq(lx, lmy); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.andnq(lx, lmy); + if (ll != z) { + throw new Error("andnq with memory failed"); + } + } + } + { // match(Set dst (AndI (SubI imm_zero src) src)) + int z = BMITests.blsil(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsil(ix); + if (ii != z) { + throw new Error("blsil with register failed"); + } + } + } + { // match(Set dst (AndL (SubL imm_zero src) src)) + long z = BMITests.blsiq(lx); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsiq(lx); + if (ll != z) { + throw new Error("blsiq with register failed"); + } + } + } + { // match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )) + int z = BMITests.blsil(imy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsil(imy); + if (ii != z) { + throw new Error("blsil with memory failed"); + } + } + } + { // match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )) + long z = BMITests.blsiq(lmy); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsiq(lmy); + if (ll != z) { + throw new Error("blsiq with memory failed"); + } + } + } + + { // match(Set dst (XorI (AddI src minus_1) src)) + int z = BMITests.blsmskl(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsmskl(ix); + if (ii != z) { + throw new Error("blsmskl with register failed"); + } + } + } + { // match(Set dst (XorL (AddL src minus_1) src)) + long z = BMITests.blsmskq(lx); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsmskq(lx); + if (ll != z) { + throw new Error("blsmskq with register failed"); + } + } + } + { // match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) ) + int z = BMITests.blsmskl(imy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsmskl(imy); + if (ii != z) { + throw new Error("blsmskl with memory failed"); + } + } + } + { // match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) ) + long z = BMITests.blsmskq(lmy); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsmskq(lmy); + if (ll != z) { + throw new Error("blsmskq with memory failed"); + } + } + } + + { // match(Set dst (AndI (AddI src minus_1) src) ) + int z = BMITests.blsrl(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsrl(ix); + if (ii != z) { + throw new Error("blsrl with register failed"); + } + } + } + { // match(Set dst (AndL (AddL src minus_1) src) ) + long z = BMITests.blsrq(lx); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsrq(lx); + if (ll != z) { + throw new Error("blsrq with register failed"); + } + } + } + { // match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) ) + int z = BMITests.blsrl(imy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsrl(imy); + if (ii != z) { + throw new Error("blsrl with memory failed"); + } + } + } + { // match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) ) + long z = BMITests.blsrq(lmy); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsrq(lmy); + if (ll != z) { + throw new Error("blsrq with memory failed"); + } + } + } + + { + int z = BMITests.lzcntl(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.lzcntl(ix); + if (ii != z) { + throw new Error("lzcntl failed"); + } + } + } + { + int z = BMITests.lzcntq(lx); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.lzcntq(lx); + if (ii != z) { + throw new Error("lzcntq failed"); + } + } + } + + { + int z = BMITests.tzcntl(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.tzcntl(ix); + if (ii != z) { + throw new Error("tzcntl failed"); + } + } + } + { + int z = BMITests.tzcntq(lx); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.tzcntq(lx); + if (ii != z) { + throw new Error("tzcntq failed"); + } + } + } + } +}