提交 3cacdd3a 编写于 作者: T twisti

6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()

Summary: These methods can be instrinsified by using bit scan, bit test, and population count instructions.
Reviewed-by: kvn, never
上级 cb3436e7
......@@ -1712,6 +1712,23 @@ static FloatRegister reg_to_DoubleFloatRegister_object(int register_encoding) {
return as_DoubleFloatRegister(register_encoding);
}
const bool Matcher::match_rule_supported(int opcode) {
if (!has_match_rule(opcode))
return false;
switch (opcode) {
case Op_CountLeadingZerosI:
case Op_CountLeadingZerosL:
case Op_CountTrailingZerosI:
case Op_CountTrailingZerosL:
if (!UsePopCountInstruction)
return false;
break;
}
return true; // Per default match rules are supported.
}
int Matcher::regnum_to_fpu_offset(int regnum) {
return regnum - 32; // The FP registers are in the second chunk
}
......@@ -9188,6 +9205,145 @@ instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp
ins_pipe(long_memory_op);
%}
//---------- Zeros Count Instructions ------------------------------------------
instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
match(Set dst (CountLeadingZerosI src));
effect(TEMP dst, TEMP tmp, KILL cr);
// x |= (x >> 1);
// x |= (x >> 2);
// x |= (x >> 4);
// x |= (x >> 8);
// x |= (x >> 16);
// return (WORDBITS - popc(x));
format %{ "SRL $src,1,$dst\t! count leading zeros (int)\n\t"
"OR $src,$tmp,$dst\n\t"
"SRL $dst,2,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"SRL $dst,4,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"SRL $dst,8,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"SRL $dst,16,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"POPC $dst,$dst\n\t"
"MOV 32,$tmp\n\t"
"SUB $tmp,$dst,$dst" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
Register Rtmp = $tmp$$Register;
__ srl(Rsrc, 1, Rtmp);
__ or3(Rsrc, Rtmp, Rdst);
__ srl(Rdst, 2, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ srl(Rdst, 4, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ srl(Rdst, 8, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ srl(Rdst, 16, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ popc(Rdst, Rdst);
__ mov(BitsPerInt, Rtmp);
__ sub(Rtmp, Rdst, Rdst);
%}
ins_pipe(ialu_reg);
%}
instruct countLeadingZerosL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
match(Set dst (CountLeadingZerosL src));
effect(TEMP dst, TEMP tmp, KILL cr);
// x |= (x >> 1);
// x |= (x >> 2);
// x |= (x >> 4);
// x |= (x >> 8);
// x |= (x >> 16);
// x |= (x >> 32);
// return (WORDBITS - popc(x));
format %{ "SRLX $src,1,$dst\t! count leading zeros (long)\n\t"
"OR $src,$tmp,$dst\n\t"
"SRLX $dst,2,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"SRLX $dst,4,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"SRLX $dst,8,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"SRLX $dst,16,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"SRLX $dst,32,$tmp\n\t"
"OR $dst,$tmp,$dst\n\t"
"POPC $dst,$dst\n\t"
"MOV 64,$tmp\n\t"
"SUB $tmp,$dst,$dst" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
Register Rtmp = $tmp$$Register;
__ srlx(Rsrc, 1, Rtmp);
__ or3(Rsrc, Rtmp, Rdst);
__ srlx(Rdst, 2, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ srlx(Rdst, 4, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ srlx(Rdst, 8, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ srlx(Rdst, 16, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ srlx(Rdst, 32, Rtmp);
__ or3(Rdst, Rtmp, Rdst);
__ popc(Rdst, Rdst);
__ mov(BitsPerLong, Rtmp);
__ sub(Rtmp, Rdst, Rdst);
%}
ins_pipe(ialu_reg);
%}
instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
match(Set dst (CountTrailingZerosI src));
effect(TEMP dst, KILL cr);
// return popc(~x & (x - 1));
format %{ "SUB $src,1,$dst\t! count trailing zeros (int)\n\t"
"ANDN $dst,$src,$dst\n\t"
"SRL $dst,R_G0,$dst\n\t"
"POPC $dst,$dst" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
__ sub(Rsrc, 1, Rdst);
__ andn(Rdst, Rsrc, Rdst);
__ srl(Rdst, G0, Rdst);
__ popc(Rdst, Rdst);
%}
ins_pipe(ialu_reg);
%}
instruct countTrailingZerosL(iRegI dst, iRegL src, flagsReg cr) %{
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
match(Set dst (CountTrailingZerosL src));
effect(TEMP dst, KILL cr);
// return popc(~x & (x - 1));
format %{ "SUB $src,1,$dst\t! count trailing zeros (long)\n\t"
"ANDN $dst,$src,$dst\n\t"
"POPC $dst,$dst" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
__ sub(Rsrc, 1, Rdst);
__ andn(Rdst, Rsrc, Rdst);
__ popc(Rdst, Rdst);
%}
ins_pipe(ialu_reg);
%}
//---------- Population Count Instructions -------------------------------------
instruct popCountI(iRegI dst, iRegI src) %{
......
......@@ -952,6 +952,21 @@ void Assembler::andpd(XMMRegister dst, Address src) {
emit_operand(dst, src);
}
void Assembler::bsfl(Register dst, Register src) {
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xBC);
emit_byte(0xC0 | encode);
}
void Assembler::bsrl(Register dst, Register src) {
assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xBD);
emit_byte(0xC0 | encode);
}
void Assembler::bswapl(Register reg) { // bswap
int encode = prefix_and_encode(reg->encoding());
emit_byte(0x0F);
......@@ -1438,6 +1453,15 @@ void Assembler::lock() {
}
}
void Assembler::lzcntl(Register dst, Register src) {
assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
emit_byte(0xF3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xBD);
emit_byte(0xC0 | encode);
}
// Emit mfence instruction
void Assembler::mfence() {
NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
......@@ -3688,6 +3712,21 @@ void Assembler::andq(Register dst, Register src) {
emit_arith(0x23, 0xC0, dst, src);
}
void Assembler::bsfq(Register dst, Register src) {
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xBC);
emit_byte(0xC0 | encode);
}
void Assembler::bsrq(Register dst, Register src) {
assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xBD);
emit_byte(0xC0 | encode);
}
void Assembler::bswapq(Register reg) {
int encode = prefixq_and_encode(reg->encoding());
emit_byte(0x0F);
......@@ -3941,6 +3980,15 @@ void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder con
emit_data((int)imm32, rspec, narrow_oop_operand);
}
void Assembler::lzcntq(Register dst, Register src) {
assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
emit_byte(0xF3);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xBD);
emit_byte(0xC0 | encode);
}
void Assembler::movdq(XMMRegister dst, Register src) {
// table D-1 says MMX/SSE2
NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
......
......@@ -757,6 +757,14 @@ private:
void andpd(XMMRegister dst, Address src);
void andpd(XMMRegister dst, XMMRegister src);
void bsfl(Register dst, Register src);
void bsrl(Register dst, Register src);
#ifdef _LP64
void bsfq(Register dst, Register src);
void bsrq(Register dst, Register src);
#endif
void bswapl(Register reg);
void bswapq(Register reg);
......@@ -1061,6 +1069,12 @@ private:
void lock();
void lzcntl(Register dst, Register src);
#ifdef _LP64
void lzcntq(Register dst, Register src);
#endif
enum Membar_mask_bits {
StoreStore = 1 << 3,
LoadStore = 1 << 2,
......
......@@ -284,7 +284,7 @@ void VM_Version::get_processor_features() {
}
char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
......@@ -301,6 +301,7 @@ void VM_Version::get_processor_features() {
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow() ? ", 3dnow" : ""),
(supports_3dnow2() ? ", 3dnowext" : ""),
(supports_lzcnt() ? ", lzcnt": ""),
(supports_sse4a() ? ", sse4a": ""),
(supports_ht() ? ", ht": ""));
_features_str = strdup(buf);
......@@ -364,6 +365,13 @@ void VM_Version::get_processor_features() {
UseXmmI2D = false;
}
}
// Use count leading zeros count instruction if available.
if (supports_lzcnt()) {
if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
UseCountLeadingZerosInstruction = true;
}
}
}
if( is_intel() ) { // Intel cpus specific settings
......
......@@ -120,7 +120,7 @@ public:
uint32_t LahfSahf : 1,
CmpLegacy : 1,
: 4,
abm : 1,
lzcnt : 1,
sse4a : 1,
misalignsse : 1,
prefetchw : 1,
......@@ -182,7 +182,8 @@ protected:
CPU_SSE4A = (1 << 10),
CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12),
CPU_POPCNT = (1 << 13)
CPU_POPCNT = (1 << 13),
CPU_LZCNT = (1 << 14)
} cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with
......@@ -277,8 +278,6 @@ protected:
if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
_cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
result |= CPU_MMX;
if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
result |= CPU_3DNOW;
if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
result |= CPU_SSE;
if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
......@@ -287,14 +286,23 @@ protected:
result |= CPU_SSE3;
if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
result |= CPU_SSSE3;
if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
result |= CPU_SSE4A;
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
result |= CPU_SSE4_1;
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
result |= CPU_SSE4_2;
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT;
// AMD features.
if (is_amd()) {
if (_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
result |= CPU_3DNOW;
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
result |= CPU_LZCNT;
if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
result |= CPU_SSE4A;
}
return result;
}
......@@ -391,6 +399,7 @@ public:
static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; }
static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; }
static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; }
static bool supports_compare_and_exchange() { return true; }
......
......@@ -1281,6 +1281,13 @@ static void emit_float_constant(CodeBuffer& cbuf, float x) {
}
const bool Matcher::match_rule_supported(int opcode) {
if (!has_match_rule(opcode))
return false;
return true; // Per default match rules are supported.
}
int Matcher::regnum_to_fpu_offset(int regnum) {
return regnum - 32; // The FP registers are in the second chunk
}
......@@ -6644,6 +6651,153 @@ instruct bytes_reverse_long(eRegL dst) %{
%}
//---------- Zeros Count Instructions ------------------------------------------
instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
predicate(UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosI src));
effect(KILL cr);
format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
ins_encode %{
__ lzcntl($dst$$Register, $src$$Register);
%}
ins_pipe(ialu_reg);
%}
instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
predicate(!UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosI src));
effect(KILL cr);
format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
"JNZ skip\n\t"
"MOV $dst, -1\n"
"skip:\n\t"
"NEG $dst\n\t"
"ADD $dst, 31" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
Label skip;
__ bsrl(Rdst, Rsrc);
__ jccb(Assembler::notZero, skip);
__ movl(Rdst, -1);
__ bind(skip);
__ negl(Rdst);
__ addl(Rdst, BitsPerInt - 1);
%}
ins_pipe(ialu_reg);
%}
instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
predicate(UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosL src));
effect(TEMP dst, KILL cr);
format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
"JNC done\n\t"
"LZCNT $dst, $src.lo\n\t"
"ADD $dst, 32\n"
"done:" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
Label done;
__ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
__ jccb(Assembler::carryClear, done);
__ lzcntl(Rdst, Rsrc);
__ addl(Rdst, BitsPerInt);
__ bind(done);
%}
ins_pipe(ialu_reg);
%}
instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
predicate(!UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosL src));
effect(TEMP dst, KILL cr);
format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
"JZ msw_is_zero\n\t"
"ADD $dst, 32\n\t"
"JMP not_zero\n"
"msw_is_zero:\n\t"
"BSR $dst, $src.lo\n\t"
"JNZ not_zero\n\t"
"MOV $dst, -1\n"
"not_zero:\n\t"
"NEG $dst\n\t"
"ADD $dst, 63\n" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
Label msw_is_zero;
Label not_zero;
__ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
__ jccb(Assembler::zero, msw_is_zero);
__ addl(Rdst, BitsPerInt);
__ jmpb(not_zero);
__ bind(msw_is_zero);
__ bsrl(Rdst, Rsrc);
__ jccb(Assembler::notZero, not_zero);
__ movl(Rdst, -1);
__ bind(not_zero);
__ negl(Rdst);
__ addl(Rdst, BitsPerLong - 1);
%}
ins_pipe(ialu_reg);
%}
instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
match(Set dst (CountTrailingZerosI src));
effect(KILL cr);
format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
"JNZ done\n\t"
"MOV $dst, 32\n"
"done:" %}
ins_encode %{
Register Rdst = $dst$$Register;
Label done;
__ bsfl(Rdst, $src$$Register);
__ jccb(Assembler::notZero, done);
__ movl(Rdst, BitsPerInt);
__ bind(done);
%}
ins_pipe(ialu_reg);
%}
instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
match(Set dst (CountTrailingZerosL src));
effect(TEMP dst, KILL cr);
format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
"JNZ done\n\t"
"BSF $dst, $src.hi\n\t"
"JNZ msw_not_zero\n\t"
"MOV $dst, 32\n"
"msw_not_zero:\n\t"
"ADD $dst, 32\n"
"done:" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
Label msw_not_zero;
Label done;
__ bsfl(Rdst, Rsrc);
__ jccb(Assembler::notZero, done);
__ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
__ jccb(Assembler::notZero, msw_not_zero);
__ movl(Rdst, BitsPerInt);
__ bind(msw_not_zero);
__ addl(Rdst, BitsPerInt);
__ bind(done);
%}
ins_pipe(ialu_reg);
%}
//---------- Population Count Instructions -------------------------------------
instruct popCountI(eRegI dst, eRegI src) %{
......
......@@ -1980,6 +1980,13 @@ static void emit_float_constant(CodeBuffer& cbuf, float x) {
}
const bool Matcher::match_rule_supported(int opcode) {
if (!has_match_rule(opcode))
return false;
return true; // Per default match rules are supported.
}
int Matcher::regnum_to_fpu_offset(int regnum)
{
return regnum - 32; // The FP registers are in the second chunk
......@@ -7656,6 +7663,121 @@ instruct storeL_reversed(memory dst, rRegL src) %{
%}
//---------- Zeros Count Instructions ------------------------------------------
instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
predicate(UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosI src));
effect(KILL cr);
format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
ins_encode %{
__ lzcntl($dst$$Register, $src$$Register);
%}
ins_pipe(ialu_reg);
%}
instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
predicate(!UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosI src));
effect(KILL cr);
format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
"jnz skip\n\t"
"movl $dst, -1\n"
"skip:\n\t"
"negl $dst\n\t"
"addl $dst, 31" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
Label skip;
__ bsrl(Rdst, Rsrc);
__ jccb(Assembler::notZero, skip);
__ movl(Rdst, -1);
__ bind(skip);
__ negl(Rdst);
__ addl(Rdst, BitsPerInt - 1);
%}
ins_pipe(ialu_reg);
%}
instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
predicate(UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosL src));
effect(KILL cr);
format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
ins_encode %{
__ lzcntq($dst$$Register, $src$$Register);
%}
ins_pipe(ialu_reg);
%}
instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
predicate(!UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosL src));
effect(KILL cr);
format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
"jnz skip\n\t"
"movl $dst, -1\n"
"skip:\n\t"
"negl $dst\n\t"
"addl $dst, 63" %}
ins_encode %{
Register Rdst = $dst$$Register;
Register Rsrc = $src$$Register;
Label skip;
__ bsrq(Rdst, Rsrc);
__ jccb(Assembler::notZero, skip);
__ movl(Rdst, -1);
__ bind(skip);
__ negl(Rdst);
__ addl(Rdst, BitsPerLong - 1);
%}
ins_pipe(ialu_reg);
%}
instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
match(Set dst (CountTrailingZerosI src));
effect(KILL cr);
format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
"jnz done\n\t"
"movl $dst, 32\n"
"done:" %}
ins_encode %{
Register Rdst = $dst$$Register;
Label done;
__ bsfl(Rdst, $src$$Register);
__ jccb(Assembler::notZero, done);
__ movl(Rdst, BitsPerInt);
__ bind(done);
%}
ins_pipe(ialu_reg);
%}
instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
match(Set dst (CountTrailingZerosL src));
effect(KILL cr);
format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
"jnz done\n\t"
"movl $dst, 64\n"
"done:" %}
ins_encode %{
Register Rdst = $dst$$Register;
Label done;
__ bsfq(Rdst, $src$$Register);
__ jccb(Assembler::notZero, done);
__ movl(Rdst, BitsPerLong);
__ bind(done);
%}
ins_pipe(ialu_reg);
%}
//---------- Population Count Instructions -------------------------------------
instruct popCountI(rRegI dst, rRegI src) %{
......
......@@ -313,6 +313,8 @@
template(value_name, "value") \
template(frontCacheEnabled_name, "frontCacheEnabled") \
template(stringCacheEnabled_name, "stringCacheEnabled") \
template(numberOfLeadingZeros_name, "numberOfLeadingZeros") \
template(numberOfTrailingZeros_name, "numberOfTrailingZeros") \
template(bitCount_name, "bitCount") \
template(profile_name, "profile") \
template(equals_name, "equals") \
......@@ -559,6 +561,12 @@
do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \
do_name( longBitsToDouble_name, "longBitsToDouble") \
\
do_intrinsic(_numberOfLeadingZeros_i, java_lang_Integer, numberOfLeadingZeros_name,int_int_signature, F_S) \
do_intrinsic(_numberOfLeadingZeros_l, java_lang_Long, numberOfLeadingZeros_name,long_int_signature, F_S) \
\
do_intrinsic(_numberOfTrailingZeros_i, java_lang_Integer, numberOfTrailingZeros_name,int_int_signature, F_S) \
do_intrinsic(_numberOfTrailingZeros_l, java_lang_Long, numberOfTrailingZeros_name,long_int_signature, F_S) \
\
do_intrinsic(_bitCount_i, java_lang_Integer, bitCount_name, int_int_signature, F_S) \
do_intrinsic(_bitCount_l, java_lang_Long, bitCount_name, long_int_signature, F_S) \
\
......
......@@ -104,6 +104,10 @@ macro(ConvL2I)
macro(CosD)
macro(CountedLoop)
macro(CountedLoopEnd)
macro(CountLeadingZerosI)
macro(CountLeadingZerosL)
macro(CountTrailingZerosI)
macro(CountTrailingZerosL)
macro(CreateEx)
macro(DecodeN)
macro(DivD)
......
......@@ -1255,3 +1255,93 @@ const Type *MoveD2LNode::Value( PhaseTransform *phase ) const {
v.set_jdouble(td->getd());
return TypeLong::make( v.get_jlong() );
}
//------------------------------Value------------------------------------------
const Type* CountLeadingZerosINode::Value(PhaseTransform* phase) const {
const Type* t = phase->type(in(1));
if (t == Type::TOP) return Type::TOP;
const TypeInt* ti = t->isa_int();
if (ti && ti->is_con()) {
jint i = ti->get_con();
// HD, Figure 5-6
if (i == 0)
return TypeInt::make(BitsPerInt);
int n = 1;
unsigned int x = i;
if (x >> 16 == 0) { n += 16; x <<= 16; }
if (x >> 24 == 0) { n += 8; x <<= 8; }
if (x >> 28 == 0) { n += 4; x <<= 4; }
if (x >> 30 == 0) { n += 2; x <<= 2; }
n -= x >> 31;
return TypeInt::make(n);
}
return TypeInt::INT;
}
//------------------------------Value------------------------------------------
const Type* CountLeadingZerosLNode::Value(PhaseTransform* phase) const {
const Type* t = phase->type(in(1));
if (t == Type::TOP) return Type::TOP;
const TypeLong* tl = t->isa_long();
if (tl && tl->is_con()) {
jlong l = tl->get_con();
// HD, Figure 5-6
if (l == 0)
return TypeInt::make(BitsPerLong);
int n = 1;
unsigned int x = (((julong) l) >> 32);
if (x == 0) { n += 32; x = (int) l; }
if (x >> 16 == 0) { n += 16; x <<= 16; }
if (x >> 24 == 0) { n += 8; x <<= 8; }
if (x >> 28 == 0) { n += 4; x <<= 4; }
if (x >> 30 == 0) { n += 2; x <<= 2; }
n -= x >> 31;
return TypeInt::make(n);
}
return TypeInt::INT;
}
//------------------------------Value------------------------------------------
const Type* CountTrailingZerosINode::Value(PhaseTransform* phase) const {
const Type* t = phase->type(in(1));
if (t == Type::TOP) return Type::TOP;
const TypeInt* ti = t->isa_int();
if (ti && ti->is_con()) {
jint i = ti->get_con();
// HD, Figure 5-14
int y;
if (i == 0)
return TypeInt::make(BitsPerInt);
int n = 31;
y = i << 16; if (y != 0) { n = n - 16; i = y; }
y = i << 8; if (y != 0) { n = n - 8; i = y; }
y = i << 4; if (y != 0) { n = n - 4; i = y; }
y = i << 2; if (y != 0) { n = n - 2; i = y; }
y = i << 1; if (y != 0) { n = n - 1; }
return TypeInt::make(n);
}
return TypeInt::INT;
}
//------------------------------Value------------------------------------------
const Type* CountTrailingZerosLNode::Value(PhaseTransform* phase) const {
const Type* t = phase->type(in(1));
if (t == Type::TOP) return Type::TOP;
const TypeLong* tl = t->isa_long();
if (tl && tl->is_con()) {
jlong l = tl->get_con();
// HD, Figure 5-14
int x, y;
if (l == 0)
return TypeInt::make(BitsPerLong);
int n = 63;
y = (int) l; if (y != 0) { n = n - 32; x = y; } else x = (((julong) l) >> 32);
y = x << 16; if (y != 0) { n = n - 16; x = y; }
y = x << 8; if (y != 0) { n = n - 8; x = y; }
y = x << 4; if (y != 0) { n = n - 4; x = y; }
y = x << 2; if (y != 0) { n = n - 2; x = y; }
y = x << 1; if (y != 0) { n = n - 1; }
return TypeInt::make(n);
}
return TypeInt::INT;
}
......@@ -636,22 +636,62 @@ class MoveD2LNode : public Node {
virtual const Type* Value( PhaseTransform *phase ) const;
};
//---------- CountBitsNode -----------------------------------------------------
class CountBitsNode : public Node {
public:
CountBitsNode(Node* in1) : Node(0, in1) {}
const Type* bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//---------- CountLeadingZerosINode --------------------------------------------
// Count leading zeros (0-bit count starting from MSB) of an integer.
class CountLeadingZerosINode : public CountBitsNode {
public:
CountLeadingZerosINode(Node* in1) : CountBitsNode(in1) {}
virtual int Opcode() const;
virtual const Type* Value(PhaseTransform* phase) const;
};
//---------- CountLeadingZerosLNode --------------------------------------------
// Count leading zeros (0-bit count starting from MSB) of a long.
class CountLeadingZerosLNode : public CountBitsNode {
public:
CountLeadingZerosLNode(Node* in1) : CountBitsNode(in1) {}
virtual int Opcode() const;
virtual const Type* Value(PhaseTransform* phase) const;
};
//---------- CountTrailingZerosINode -------------------------------------------
// Count trailing zeros (0-bit count starting from LSB) of an integer.
class CountTrailingZerosINode : public CountBitsNode {
public:
CountTrailingZerosINode(Node* in1) : CountBitsNode(in1) {}
virtual int Opcode() const;
virtual const Type* Value(PhaseTransform* phase) const;
};
//---------- CountTrailingZerosLNode -------------------------------------------
// Count trailing zeros (0-bit count starting from LSB) of a long.
class CountTrailingZerosLNode : public CountBitsNode {
public:
CountTrailingZerosLNode(Node* in1) : CountBitsNode(in1) {}
virtual int Opcode() const;
virtual const Type* Value(PhaseTransform* phase) const;
};
//---------- PopCountINode -----------------------------------------------------
// Population count (bit count) of an integer.
class PopCountINode : public Node {
class PopCountINode : public CountBitsNode {
public:
PopCountINode(Node* in1) : Node(0, in1) {}
PopCountINode(Node* in1) : CountBitsNode(in1) {}
virtual int Opcode() const;
const Type* bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//---------- PopCountLNode -----------------------------------------------------
// Population count (bit count) of a long.
class PopCountLNode : public Node {
class PopCountLNode : public CountBitsNode {
public:
PopCountLNode(Node* in1) : Node(0, in1) {}
PopCountLNode(Node* in1) : CountBitsNode(in1) {}
virtual int Opcode() const;
const Type* bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
......@@ -222,6 +222,8 @@ class LibraryCallKit : public GraphKit {
bool inline_unsafe_CAS(BasicType type);
bool inline_unsafe_ordered_store(BasicType type);
bool inline_fp_conversions(vmIntrinsics::ID id);
bool inline_numberOfLeadingZeros(vmIntrinsics::ID id);
bool inline_numberOfTrailingZeros(vmIntrinsics::ID id);
bool inline_bitCount(vmIntrinsics::ID id);
bool inline_reverseBytes(vmIntrinsics::ID id);
};
......@@ -630,6 +632,14 @@ bool LibraryCallKit::try_to_inline() {
case vmIntrinsics::_longBitsToDouble:
return inline_fp_conversions(intrinsic_id());
case vmIntrinsics::_numberOfLeadingZeros_i:
case vmIntrinsics::_numberOfLeadingZeros_l:
return inline_numberOfLeadingZeros(intrinsic_id());
case vmIntrinsics::_numberOfTrailingZeros_i:
case vmIntrinsics::_numberOfTrailingZeros_l:
return inline_numberOfTrailingZeros(intrinsic_id());
case vmIntrinsics::_bitCount_i:
case vmIntrinsics::_bitCount_l:
return inline_bitCount(intrinsic_id());
......@@ -1844,6 +1854,48 @@ inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) {
}
}
//-------------------inline_numberOfLeadingZeros_int/long-----------------------
// inline int Integer.numberOfLeadingZeros(int)
// inline int Long.numberOfLeadingZeros(long)
bool LibraryCallKit::inline_numberOfLeadingZeros(vmIntrinsics::ID id) {
assert(id == vmIntrinsics::_numberOfLeadingZeros_i || id == vmIntrinsics::_numberOfLeadingZeros_l, "not numberOfLeadingZeros");
if (id == vmIntrinsics::_numberOfLeadingZeros_i && !Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false;
if (id == vmIntrinsics::_numberOfLeadingZeros_l && !Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false;
_sp += arg_size(); // restore stack pointer
switch (id) {
case vmIntrinsics::_numberOfLeadingZeros_i:
push(_gvn.transform(new (C, 2) CountLeadingZerosINode(pop())));
break;
case vmIntrinsics::_numberOfLeadingZeros_l:
push(_gvn.transform(new (C, 2) CountLeadingZerosLNode(pop_pair())));
break;
default:
ShouldNotReachHere();
}
return true;
}
//-------------------inline_numberOfTrailingZeros_int/long----------------------
// inline int Integer.numberOfTrailingZeros(int)
// inline int Long.numberOfTrailingZeros(long)
bool LibraryCallKit::inline_numberOfTrailingZeros(vmIntrinsics::ID id) {
assert(id == vmIntrinsics::_numberOfTrailingZeros_i || id == vmIntrinsics::_numberOfTrailingZeros_l, "not numberOfTrailingZeros");
if (id == vmIntrinsics::_numberOfTrailingZeros_i && !Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false;
if (id == vmIntrinsics::_numberOfTrailingZeros_l && !Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
_sp += arg_size(); // restore stack pointer
switch (id) {
case vmIntrinsics::_numberOfTrailingZeros_i:
push(_gvn.transform(new (C, 2) CountTrailingZerosINode(pop())));
break;
case vmIntrinsics::_numberOfTrailingZeros_l:
push(_gvn.transform(new (C, 2) CountTrailingZerosLNode(pop_pair())));
break;
default:
ShouldNotReachHere();
}
return true;
}
//----------------------------inline_bitCount_int/long-----------------------
// inline int Integer.bitCount(int)
// inline int Long.bitCount(long)
......
......@@ -225,10 +225,16 @@ public:
OptoRegPair *_parm_regs; // Array of machine registers per argument
RegMask *_calling_convention_mask; // Array of RegMasks per argument
// Does matcher support this ideal node?
// Does matcher have a match rule for this ideal node?
static const bool has_match_rule(int opcode);
static const bool _hasMatchRule[_last_opcode];
// Does matcher have a match rule for this ideal node and is the
// predicate (if there is one) true?
// NOTE: If this function is used more commonly in the future, ADLC
// should generate this one.
static const bool match_rule_supported(int opcode);
// Used to determine if we have fast l2f conversion
// USII has it, USIII doesn't
static const bool convL2FSupported(void);
......
......@@ -2185,6 +2185,9 @@ class CommandLineFlags {
diagnostic(bool, PrintIntrinsics, false, \
"prints attempted and successful inlining of intrinsics") \
\
product(bool, UseCountLeadingZerosInstruction, false, \
"Use count leading zeros instruction") \
\
product(bool, UsePopCountInstruction, false, \
"Use population count instruction") \
\
......
/*
* Copyright 2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/**
* @test
* @bug 6823354
* @summary These methods can be instrinsified by using bit scan, bit test, and population count instructions.
*
* @run main/othervm -Xcomp -XX:CompileOnly=Test6823354.lzcomp,Test6823354.tzcomp,.dolzcomp,.dotzcomp Test6823354
*/
import java.net.URLClassLoader;
public class Test6823354 {
// Arrays of corner case values.
static final int[] ia = new int[] { 0, 1, -1, Integer.MIN_VALUE, Integer.MAX_VALUE };
static final long[] la = new long[] { 0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE };
public static void main(String[] args) throws Exception {
// Load the classes and the methods.
Integer.numberOfLeadingZeros(0);
Integer.numberOfTrailingZeros(0);
Long.numberOfLeadingZeros(0);
Long.numberOfTrailingZeros(0);
lz();
tz();
}
static void lz() throws Exception {
// int
// Test corner cases.
for (int i = 0; i < ia.length; i++) {
int x = ia[i];
check(x, lzcomp(x), lzint(x));
}
// Test all possible return values.
for (int i = 0; i < Integer.SIZE; i++) {
int x = 1 << i;
check(x, lzcomp(x), lzint(x));
}
String classname = "Test6823354$lzconI";
// Test Ideal optimizations (constant values).
for (int i = 0; i < ia.length; i++) {
testclass(classname, ia[i]);
}
// Test Ideal optimizations (constant values).
for (int i = 0; i < Integer.SIZE; i++) {
int x = 1 << i;
testclass(classname, x);
}
// long
// Test corner cases.
for (int i = 0; i < ia.length; i++) {
long x = la[i];
check(x, lzcomp(x), lzint(x));
}
// Test all possible return values.
for (int i = 0; i < Long.SIZE; i++) {
long x = 1L << i;
check(x, lzcomp(x), lzint(x));
}
classname = "Test6823354$lzconL";
// Test Ideal optimizations (constant values).
for (int i = 0; i < la.length; i++) {
testclass(classname, la[i]);
}
// Test Ideal optimizations (constant values).
for (int i = 0; i < Long.SIZE; i++) {
long x = 1L << i;
testclass(classname, x);
}
}
static void tz() throws Exception {
// int
// Test corner cases.
for (int i = 0; i < ia.length; i++) {
int x = ia[i];
check(x, tzcomp(x), tzint(x));
}
// Test all possible return values.
for (int i = 0; i < Integer.SIZE; i++) {
int x = 1 << i;
check(x, tzcomp(x), tzint(x));
}
String classname = "Test6823354$tzconI";
// Test Ideal optimizations (constant values).
for (int i = 0; i < ia.length; i++) {
testclass(classname, ia[i]);
}
// Test Ideal optimizations (constant values).
for (int i = 0; i < Integer.SIZE; i++) {
int x = 1 << i;
testclass(classname, x);
}
// long
// Test corner cases.
for (int i = 0; i < la.length; i++) {
long x = la[i];
check(x, tzcomp(x), tzint(x));
}
// Test all possible return values.
for (int i = 0; i < Long.SIZE; i++) {
long x = 1L << i;
check(x, tzcomp(x), tzint(x));
}
classname = "Test6823354$tzconL";
// Test Ideal optimizations (constant values).
for (int i = 0; i < la.length; i++) {
testclass(classname, la[i]);
}
// Test Ideal optimizations (constant values).
for (int i = 0; i < Long.SIZE; i++) {
long x = 1L << i;
testclass(classname, x);
}
}
static void check(int value, int result, int expected) {
//System.out.println(value + ": " + result + ", " + expected);
if (result != expected)
throw new InternalError(value + " failed: " + result + " != " + expected);
}
static void check(long value, long result, long expected) {
//System.out.println(value + ": " + result + ", " + expected);
if (result != expected)
throw new InternalError(value + " failed: " + result + " != " + expected);
}
static int lzint( int i) { return Integer.numberOfLeadingZeros(i); }
static int lzcomp(int i) { return Integer.numberOfLeadingZeros(i); }
static int lzint( long l) { return Long.numberOfLeadingZeros(l); }
static int lzcomp(long l) { return Long.numberOfLeadingZeros(l); }
static int tzint( int i) { return Integer.numberOfTrailingZeros(i); }
static int tzcomp(int i) { return Integer.numberOfTrailingZeros(i); }
static int tzint( long l) { return Long.numberOfTrailingZeros(l); }
static int tzcomp(long l) { return Long.numberOfTrailingZeros(l); }
static void testclass(String classname, int x) throws Exception {
System.setProperty("value", "" + x);
loadandrunclass(classname);
}
static void testclass(String classname, long x) throws Exception {
System.setProperty("value", "" + x);
loadandrunclass(classname);
}
static void loadandrunclass(String classname) throws Exception {
Class cl = Class.forName(classname);
URLClassLoader apploader = (URLClassLoader) cl.getClassLoader();
ClassLoader loader = new URLClassLoader(apploader.getURLs(), apploader.getParent());
Class c = loader.loadClass(classname);
Runnable r = (Runnable) c.newInstance();
r.run();
}
public static class lzconI implements Runnable {
static final int VALUE;
static {
int value = 0;
try {
value = Integer.decode(System.getProperty("value"));
} catch (Throwable e) {}
VALUE = value;
}
public void run() { check(VALUE, lzint(VALUE), dolzcomp()); }
static int dolzcomp() { return lzcomp(VALUE); }
}
public static class lzconL implements Runnable {
static final long VALUE;
static {
long value = 0;
try {
value = Long.decode(System.getProperty("value"));
} catch (Throwable e) {}
VALUE = value;
}
public void run() { check(VALUE, lzint(VALUE), dolzcomp()); }
static int dolzcomp() { return lzcomp(VALUE); }
}
public static class tzconI implements Runnable {
static final int VALUE;
static {
int value = 0;
try {
value = Integer.decode(System.getProperty("value"));
} catch (Throwable e) {}
VALUE = value;
}
public void run() { check(VALUE, tzint(VALUE), dotzcomp()); }
static int dotzcomp() { return tzcomp(VALUE); }
}
public static class tzconL implements Runnable {
static final long VALUE;
static {
long value = 0;
try {
value = Long.decode(System.getProperty("value"));
} catch (Throwable e) {}
VALUE = value;
}
public void run() { check(VALUE, tzint(VALUE), dotzcomp()); }
static int dotzcomp() { return tzcomp(VALUE); }
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册