提交 138777ae 编写于 作者: X xlu

Merge

...@@ -1575,6 +1575,35 @@ void Assembler::movdqa(Address dst, XMMRegister src) { ...@@ -1575,6 +1575,35 @@ void Assembler::movdqa(Address dst, XMMRegister src) {
emit_operand(src, dst); emit_operand(src, dst);
} }
void Assembler::movdqu(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
emit_byte(0xF3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x6F);
emit_operand(dst, src);
}
void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF3);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x6F);
emit_byte(0xC0 | encode);
}
void Assembler::movdqu(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
emit_byte(0xF3);
prefix(dst, src);
emit_byte(0x0F);
emit_byte(0x7F);
emit_operand(src, dst);
}
// Uses zero extension on 64bit // Uses zero extension on 64bit
void Assembler::movl(Register dst, int32_t imm32) { void Assembler::movl(Register dst, int32_t imm32) {
......
...@@ -1055,6 +1055,11 @@ private: ...@@ -1055,6 +1055,11 @@ private:
void movdqa(XMMRegister dst, Address src); void movdqa(XMMRegister dst, Address src);
void movdqa(XMMRegister dst, XMMRegister src); void movdqa(XMMRegister dst, XMMRegister src);
// Move Unaligned Double Quadword
void movdqu(Address dst, XMMRegister src);
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
void movl(Register dst, int32_t imm32); void movl(Register dst, int32_t imm32);
void movl(Address dst, int32_t imm32); void movl(Address dst, int32_t imm32);
void movl(Register dst, Register src); void movl(Register dst, Register src);
......
...@@ -791,6 +791,69 @@ class StubGenerator: public StubCodeGenerator { ...@@ -791,6 +791,69 @@ class StubGenerator: public StubCodeGenerator {
} }
} }
// Copy 64 bytes chunks
//
// Inputs:
// from - source array address
// to_from - destination array address - from
// qword_count - 8-bytes element count, negative
//
void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
assert( UseSSE >= 2, "supported cpu only" );
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
// Copy 64-byte chunks
__ jmpb(L_copy_64_bytes);
__ align(16);
__ BIND(L_copy_64_bytes_loop);
if(UseUnalignedLoadStores) {
__ movdqu(xmm0, Address(from, 0));
__ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
__ movdqu(xmm1, Address(from, 16));
__ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
__ movdqu(xmm2, Address(from, 32));
__ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
__ movdqu(xmm3, Address(from, 48));
__ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
} else {
__ movq(xmm0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1, 0), xmm0);
__ movq(xmm1, Address(from, 8));
__ movq(Address(from, to_from, Address::times_1, 8), xmm1);
__ movq(xmm2, Address(from, 16));
__ movq(Address(from, to_from, Address::times_1, 16), xmm2);
__ movq(xmm3, Address(from, 24));
__ movq(Address(from, to_from, Address::times_1, 24), xmm3);
__ movq(xmm4, Address(from, 32));
__ movq(Address(from, to_from, Address::times_1, 32), xmm4);
__ movq(xmm5, Address(from, 40));
__ movq(Address(from, to_from, Address::times_1, 40), xmm5);
__ movq(xmm6, Address(from, 48));
__ movq(Address(from, to_from, Address::times_1, 48), xmm6);
__ movq(xmm7, Address(from, 56));
__ movq(Address(from, to_from, Address::times_1, 56), xmm7);
}
__ addl(from, 64);
__ BIND(L_copy_64_bytes);
__ subl(qword_count, 8);
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
__ addl(qword_count, 8);
__ jccb(Assembler::zero, L_exit);
//
// length is too short, just copy qwords
//
__ BIND(L_copy_8_bytes);
__ movq(xmm0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1), xmm0);
__ addl(from, 8);
__ decrement(qword_count);
__ jcc(Assembler::greater, L_copy_8_bytes);
__ BIND(L_exit);
}
// Copy 64 bytes chunks // Copy 64 bytes chunks
// //
// Inputs: // Inputs:
...@@ -799,6 +862,7 @@ class StubGenerator: public StubCodeGenerator { ...@@ -799,6 +862,7 @@ class StubGenerator: public StubCodeGenerator {
// qword_count - 8-bytes element count, negative // qword_count - 8-bytes element count, negative
// //
void mmx_copy_forward(Register from, Register to_from, Register qword_count) { void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
assert( VM_Version::supports_mmx(), "supported cpu only" );
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
// Copy 64-byte chunks // Copy 64-byte chunks
__ jmpb(L_copy_64_bytes); __ jmpb(L_copy_64_bytes);
...@@ -876,7 +940,7 @@ class StubGenerator: public StubCodeGenerator { ...@@ -876,7 +940,7 @@ class StubGenerator: public StubCodeGenerator {
__ subptr(to, from); // to --> to_from __ subptr(to, from); // to --> to_from
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
if (!aligned && (t == T_BYTE || t == T_SHORT)) { if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
// align source address at 4 bytes address boundary // align source address at 4 bytes address boundary
if (t == T_BYTE) { if (t == T_BYTE) {
// One byte misalignment happens only for byte arrays // One byte misalignment happens only for byte arrays
...@@ -906,20 +970,26 @@ class StubGenerator: public StubCodeGenerator { ...@@ -906,20 +970,26 @@ class StubGenerator: public StubCodeGenerator {
__ mov(count, rax); // restore 'count' __ mov(count, rax); // restore 'count'
__ jmpb(L_copy_2_bytes); // all dwords were copied __ jmpb(L_copy_2_bytes); // all dwords were copied
} else { } else {
// align to 8 bytes, we know we are 4 byte aligned to start if (!UseUnalignedLoadStores) {
__ testptr(from, 4); // align to 8 bytes, we know we are 4 byte aligned to start
__ jccb(Assembler::zero, L_copy_64_bytes); __ testptr(from, 4);
__ movl(rax, Address(from, 0)); __ jccb(Assembler::zero, L_copy_64_bytes);
__ movl(Address(from, to_from, Address::times_1, 0), rax); __ movl(rax, Address(from, 0));
__ addptr(from, 4); __ movl(Address(from, to_from, Address::times_1, 0), rax);
__ subl(count, 1<<shift); __ addptr(from, 4);
__ subl(count, 1<<shift);
}
__ BIND(L_copy_64_bytes); __ BIND(L_copy_64_bytes);
__ mov(rax, count); __ mov(rax, count);
__ shrl(rax, shift+1); // 8 bytes chunk count __ shrl(rax, shift+1); // 8 bytes chunk count
// //
// Copy 8-byte chunks through MMX registers, 8 per iteration of the loop // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
// //
mmx_copy_forward(from, to_from, rax); if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, rax);
} else {
mmx_copy_forward(from, to_from, rax);
}
} }
// copy tailing dword // copy tailing dword
__ BIND(L_copy_4_bytes); __ BIND(L_copy_4_bytes);
...@@ -1069,13 +1139,20 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1069,13 +1139,20 @@ class StubGenerator: public StubCodeGenerator {
__ align(16); __ align(16);
// Move 8 bytes // Move 8 bytes
__ BIND(L_copy_8_bytes_loop); __ BIND(L_copy_8_bytes_loop);
__ movq(mmx0, Address(from, count, sf, 0)); if (UseXMMForArrayCopy) {
__ movq(Address(to, count, sf, 0), mmx0); __ movq(xmm0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), xmm0);
} else {
__ movq(mmx0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), mmx0);
}
__ BIND(L_copy_8_bytes); __ BIND(L_copy_8_bytes);
__ subl(count, 2<<shift); __ subl(count, 2<<shift);
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
__ addl(count, 2<<shift); __ addl(count, 2<<shift);
__ emms(); if (!UseXMMForArrayCopy) {
__ emms();
}
} }
__ BIND(L_copy_4_bytes); __ BIND(L_copy_4_bytes);
// copy prefix qword // copy prefix qword
...@@ -1143,7 +1220,11 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1143,7 +1220,11 @@ class StubGenerator: public StubCodeGenerator {
__ subptr(to, from); // to --> to_from __ subptr(to, from); // to --> to_from
if (VM_Version::supports_mmx()) { if (VM_Version::supports_mmx()) {
mmx_copy_forward(from, to_from, count); if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, count);
} else {
mmx_copy_forward(from, to_from, count);
}
} else { } else {
__ jmpb(L_copy_8_bytes); __ jmpb(L_copy_8_bytes);
__ align(16); __ align(16);
...@@ -1196,8 +1277,13 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1196,8 +1277,13 @@ class StubGenerator: public StubCodeGenerator {
__ align(16); __ align(16);
__ BIND(L_copy_8_bytes_loop); __ BIND(L_copy_8_bytes_loop);
if (VM_Version::supports_mmx()) { if (VM_Version::supports_mmx()) {
__ movq(mmx0, Address(from, count, Address::times_8)); if (UseXMMForArrayCopy) {
__ movq(Address(to, count, Address::times_8), mmx0); __ movq(xmm0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), xmm0);
} else {
__ movq(mmx0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), mmx0);
}
} else { } else {
__ fild_d(Address(from, count, Address::times_8)); __ fild_d(Address(from, count, Address::times_8));
__ fistp_d(Address(to, count, Address::times_8)); __ fistp_d(Address(to, count, Address::times_8));
...@@ -1206,7 +1292,7 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1206,7 +1292,7 @@ class StubGenerator: public StubCodeGenerator {
__ decrement(count); __ decrement(count);
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
if (VM_Version::supports_mmx()) { if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
__ emms(); __ emms();
} }
inc_copy_counter_np(T_LONG); inc_copy_counter_np(T_LONG);
......
...@@ -1251,6 +1251,7 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1251,6 +1251,7 @@ class StubGenerator: public StubCodeGenerator {
} }
} }
// Copy big chunks forward // Copy big chunks forward
// //
// Inputs: // Inputs:
...@@ -1268,14 +1269,22 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1268,14 +1269,22 @@ class StubGenerator: public StubCodeGenerator {
Label L_loop; Label L_loop;
__ align(16); __ align(16);
__ BIND(L_loop); __ BIND(L_loop);
__ movq(to, Address(end_from, qword_count, Address::times_8, -24)); if(UseUnalignedLoadStores) {
__ movq(Address(end_to, qword_count, Address::times_8, -24), to); __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
__ movq(to, Address(end_from, qword_count, Address::times_8, -16)); __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
__ movq(Address(end_to, qword_count, Address::times_8, -16), to); __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
__ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); } else {
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to); __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
__ movq(Address(end_to, qword_count, Address::times_8, -16), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
}
__ BIND(L_copy_32_bytes); __ BIND(L_copy_32_bytes);
__ addptr(qword_count, 4); __ addptr(qword_count, 4);
__ jcc(Assembler::lessEqual, L_loop); __ jcc(Assembler::lessEqual, L_loop);
...@@ -1301,14 +1310,22 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1301,14 +1310,22 @@ class StubGenerator: public StubCodeGenerator {
Label L_loop; Label L_loop;
__ align(16); __ align(16);
__ BIND(L_loop); __ BIND(L_loop);
__ movq(to, Address(from, qword_count, Address::times_8, 24)); if(UseUnalignedLoadStores) {
__ movq(Address(dest, qword_count, Address::times_8, 24), to); __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
__ movq(to, Address(from, qword_count, Address::times_8, 16)); __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
__ movq(Address(dest, qword_count, Address::times_8, 16), to); __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
__ movq(to, Address(from, qword_count, Address::times_8, 8)); __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
__ movq(to, Address(from, qword_count, Address::times_8, 0)); } else {
__ movq(Address(dest, qword_count, Address::times_8, 0), to); __ movq(to, Address(from, qword_count, Address::times_8, 24));
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
__ movq(to, Address(from, qword_count, Address::times_8, 16));
__ movq(Address(dest, qword_count, Address::times_8, 16), to);
__ movq(to, Address(from, qword_count, Address::times_8, 8));
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
__ movq(to, Address(from, qword_count, Address::times_8, 0));
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
}
__ BIND(L_copy_32_bytes); __ BIND(L_copy_32_bytes);
__ subptr(qword_count, 4); __ subptr(qword_count, 4);
__ jcc(Assembler::greaterEqual, L_loop); __ jcc(Assembler::greaterEqual, L_loop);
......
...@@ -242,9 +242,11 @@ void VM_Version::get_processor_features() { ...@@ -242,9 +242,11 @@ void VM_Version::get_processor_features() {
_supports_cx8 = supports_cmpxchg8(); _supports_cx8 = supports_cmpxchg8();
// if the OS doesn't support SSE, we can't use this feature even if the HW does // if the OS doesn't support SSE, we can't use this feature even if the HW does
if( !os::supports_sse()) if( !os::supports_sse())
_cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A); _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
if (UseSSE < 4) if (UseSSE < 4) {
_cpuFeatures &= ~CPU_SSE4; _cpuFeatures &= ~CPU_SSE4_1;
_cpuFeatures &= ~CPU_SSE4_2;
}
if (UseSSE < 3) { if (UseSSE < 3) {
_cpuFeatures &= ~CPU_SSE3; _cpuFeatures &= ~CPU_SSE3;
_cpuFeatures &= ~CPU_SSSE3; _cpuFeatures &= ~CPU_SSSE3;
...@@ -261,7 +263,7 @@ void VM_Version::get_processor_features() { ...@@ -261,7 +263,7 @@ void VM_Version::get_processor_features() {
} }
char buf[256]; char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(), cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping, cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""), (supports_cmov() ? ", cmov" : ""),
...@@ -272,7 +274,8 @@ void VM_Version::get_processor_features() { ...@@ -272,7 +274,8 @@ void VM_Version::get_processor_features() {
(supports_sse2() ? ", sse2" : ""), (supports_sse2() ? ", sse2" : ""),
(supports_sse3() ? ", sse3" : ""), (supports_sse3() ? ", sse3" : ""),
(supports_ssse3()? ", ssse3": ""), (supports_ssse3()? ", ssse3": ""),
(supports_sse4() ? ", sse4" : ""), (supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""),
(supports_mmx_ext() ? ", mmxext" : ""), (supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow() ? ", 3dnow" : ""),
(supports_3dnow2() ? ", 3dnowext" : ""), (supports_3dnow2() ? ", 3dnowext" : ""),
...@@ -285,7 +288,7 @@ void VM_Version::get_processor_features() { ...@@ -285,7 +288,7 @@ void VM_Version::get_processor_features() {
// older Pentiums which do not support it. // older Pentiums which do not support it.
if( UseSSE > 4 ) UseSSE=4; if( UseSSE > 4 ) UseSSE=4;
if( UseSSE < 0 ) UseSSE=0; if( UseSSE < 0 ) UseSSE=0;
if( !supports_sse4() ) // Drop to 3 if no SSE4 support if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
UseSSE = MIN2((intx)3,UseSSE); UseSSE = MIN2((intx)3,UseSSE);
if( !supports_sse3() ) // Drop to 2 if no SSE3 support if( !supports_sse3() ) // Drop to 2 if no SSE3 support
UseSSE = MIN2((intx)2,UseSSE); UseSSE = MIN2((intx)2,UseSSE);
...@@ -375,6 +378,14 @@ void VM_Version::get_processor_features() { ...@@ -375,6 +378,14 @@ void VM_Version::get_processor_features() {
MaxLoopPad = 11; MaxLoopPad = 11;
} }
#endif // COMPILER2 #endif // COMPILER2
if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
}
if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
}
}
} }
} }
...@@ -413,7 +424,7 @@ void VM_Version::get_processor_features() { ...@@ -413,7 +424,7 @@ void VM_Version::get_processor_features() {
#ifndef PRODUCT #ifndef PRODUCT
if (PrintMiscellaneous && Verbose) { if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per package: %u", tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package()); logical_processors_per_package());
tty->print_cr("UseSSE=%d",UseSSE); tty->print_cr("UseSSE=%d",UseSSE);
tty->print("Allocation: "); tty->print("Allocation: ");
......
...@@ -68,9 +68,9 @@ public: ...@@ -68,9 +68,9 @@ public:
cmpxchg16: 1, cmpxchg16: 1,
: 4, : 4,
dca : 1, dca : 1,
: 4, sse4_1 : 1,
popcnt : 1, sse4_2 : 1,
: 8; : 11;
} bits; } bits;
}; };
...@@ -177,8 +177,9 @@ protected: ...@@ -177,8 +177,9 @@ protected:
CPU_SSE2 = (1 << 7), CPU_SSE2 = (1 << 7),
CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX) CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX)
CPU_SSSE3= (1 << 9), CPU_SSSE3= (1 << 9),
CPU_SSE4 = (1 <<10), CPU_SSE4A= (1 <<10),
CPU_SSE4A= (1 <<11) CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12)
} cpuFeatureFlags; } cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with // cpuid information block. All info derived from executing cpuid with
...@@ -240,22 +241,14 @@ protected: ...@@ -240,22 +241,14 @@ protected:
static CpuidInfo _cpuid_info; static CpuidInfo _cpuid_info;
// Extractors and predicates // Extractors and predicates
static bool is_extended_cpu_family() {
const uint32_t Extended_Cpu_Family = 0xf;
return _cpuid_info.std_cpuid1_rax.bits.family == Extended_Cpu_Family;
}
static uint32_t extended_cpu_family() { static uint32_t extended_cpu_family() {
uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family; uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family;
if (is_extended_cpu_family()) { result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
}
return result; return result;
} }
static uint32_t extended_cpu_model() { static uint32_t extended_cpu_model() {
uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model; uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model;
if (is_extended_cpu_family()) { result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
}
return result; return result;
} }
static uint32_t cpu_stepping() { static uint32_t cpu_stepping() {
...@@ -293,6 +286,10 @@ protected: ...@@ -293,6 +286,10 @@ protected:
result |= CPU_SSSE3; result |= CPU_SSSE3;
if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0) if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0)
result |= CPU_SSE4A; result |= CPU_SSE4A;
if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0)
result |= CPU_SSE4_1;
if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0)
result |= CPU_SSE4_2;
return result; return result;
} }
...@@ -380,7 +377,8 @@ public: ...@@ -380,7 +377,8 @@ public:
static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; } static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
// //
// AMD features // AMD features
// //
......
...@@ -186,8 +186,10 @@ void VM_Version::get_processor_features() { ...@@ -186,8 +186,10 @@ void VM_Version::get_processor_features() {
if (!VM_Version::supports_sse2()) { if (!VM_Version::supports_sse2()) {
vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
} }
if (UseSSE < 4) if (UseSSE < 4) {
_cpuFeatures &= ~CPU_SSE4; _cpuFeatures &= ~CPU_SSE4_1;
_cpuFeatures &= ~CPU_SSE4_2;
}
if (UseSSE < 3) { if (UseSSE < 3) {
_cpuFeatures &= ~CPU_SSE3; _cpuFeatures &= ~CPU_SSE3;
_cpuFeatures &= ~CPU_SSSE3; _cpuFeatures &= ~CPU_SSSE3;
...@@ -204,7 +206,7 @@ void VM_Version::get_processor_features() { ...@@ -204,7 +206,7 @@ void VM_Version::get_processor_features() {
} }
char buf[256]; char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(), cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping, cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""), (supports_cmov() ? ", cmov" : ""),
...@@ -215,7 +217,8 @@ void VM_Version::get_processor_features() { ...@@ -215,7 +217,8 @@ void VM_Version::get_processor_features() {
(supports_sse2() ? ", sse2" : ""), (supports_sse2() ? ", sse2" : ""),
(supports_sse3() ? ", sse3" : ""), (supports_sse3() ? ", sse3" : ""),
(supports_ssse3()? ", ssse3": ""), (supports_ssse3()? ", ssse3": ""),
(supports_sse4() ? ", sse4" : ""), (supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""),
(supports_mmx_ext() ? ", mmxext" : ""), (supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow() ? ", 3dnow" : ""),
(supports_3dnow2() ? ", 3dnowext" : ""), (supports_3dnow2() ? ", 3dnowext" : ""),
...@@ -228,7 +231,7 @@ void VM_Version::get_processor_features() { ...@@ -228,7 +231,7 @@ void VM_Version::get_processor_features() {
// older Pentiums which do not support it. // older Pentiums which do not support it.
if( UseSSE > 4 ) UseSSE=4; if( UseSSE > 4 ) UseSSE=4;
if( UseSSE < 0 ) UseSSE=0; if( UseSSE < 0 ) UseSSE=0;
if( !supports_sse4() ) // Drop to 3 if no SSE4 support if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
UseSSE = MIN2((intx)3,UseSSE); UseSSE = MIN2((intx)3,UseSSE);
if( !supports_sse3() ) // Drop to 2 if no SSE3 support if( !supports_sse3() ) // Drop to 2 if no SSE3 support
UseSSE = MIN2((intx)2,UseSSE); UseSSE = MIN2((intx)2,UseSSE);
...@@ -314,6 +317,14 @@ void VM_Version::get_processor_features() { ...@@ -314,6 +317,14 @@ void VM_Version::get_processor_features() {
MaxLoopPad = 11; MaxLoopPad = 11;
} }
#endif // COMPILER2 #endif // COMPILER2
if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
}
if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
}
}
} }
} }
...@@ -355,7 +366,7 @@ void VM_Version::get_processor_features() { ...@@ -355,7 +366,7 @@ void VM_Version::get_processor_features() {
#ifndef PRODUCT #ifndef PRODUCT
if (PrintMiscellaneous && Verbose) { if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per package: %u", tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package()); logical_processors_per_package());
tty->print_cr("UseSSE=%d",UseSSE); tty->print_cr("UseSSE=%d",UseSSE);
tty->print("Allocation: "); tty->print("Allocation: ");
......
...@@ -68,9 +68,9 @@ public: ...@@ -68,9 +68,9 @@ public:
cmpxchg16: 1, cmpxchg16: 1,
: 4, : 4,
dca : 1, dca : 1,
: 4, sse4_1 : 1,
popcnt : 1, sse4_2 : 1,
: 8; : 11;
} bits; } bits;
}; };
...@@ -177,8 +177,9 @@ protected: ...@@ -177,8 +177,9 @@ protected:
CPU_SSE2 = (1 << 7), CPU_SSE2 = (1 << 7),
CPU_SSE3 = (1 << 8), CPU_SSE3 = (1 << 8),
CPU_SSSE3= (1 << 9), CPU_SSSE3= (1 << 9),
CPU_SSE4 = (1 <<10), CPU_SSE4A= (1 <<10),
CPU_SSE4A= (1 <<11) CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12)
} cpuFeatureFlags; } cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with // cpuid information block. All info derived from executing cpuid with
...@@ -240,22 +241,14 @@ protected: ...@@ -240,22 +241,14 @@ protected:
static CpuidInfo _cpuid_info; static CpuidInfo _cpuid_info;
// Extractors and predicates // Extractors and predicates
static bool is_extended_cpu_family() {
const uint32_t Extended_Cpu_Family = 0xf;
return _cpuid_info.std_cpuid1_eax.bits.family == Extended_Cpu_Family;
}
static uint32_t extended_cpu_family() { static uint32_t extended_cpu_family() {
uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
if (is_extended_cpu_family()) { result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
}
return result; return result;
} }
static uint32_t extended_cpu_model() { static uint32_t extended_cpu_model() {
uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
if (is_extended_cpu_family()) { result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
}
return result; return result;
} }
static uint32_t cpu_stepping() { static uint32_t cpu_stepping() {
...@@ -293,6 +286,10 @@ protected: ...@@ -293,6 +286,10 @@ protected:
result |= CPU_SSSE3; result |= CPU_SSSE3;
if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
result |= CPU_SSE4A; result |= CPU_SSE4A;
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
result |= CPU_SSE4_1;
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
result |= CPU_SSE4_2;
return result; return result;
} }
...@@ -380,7 +377,8 @@ public: ...@@ -380,7 +377,8 @@ public:
static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; } static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
// //
// AMD features // AMD features
// //
......
...@@ -4810,6 +4810,16 @@ operand immL0() %{ ...@@ -4810,6 +4810,16 @@ operand immL0() %{
interface(CONST_INTER); interface(CONST_INTER);
%} %}
// Long Immediate zero
operand immL_M1() %{
predicate( n->get_long() == -1L );
match(ConL);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
// Long immediate from 0 to 127. // Long immediate from 0 to 127.
// Used for a shorter form of long mul by 10. // Used for a shorter form of long mul by 10.
operand immL_127() %{ operand immL_127() %{
...@@ -8621,6 +8631,18 @@ instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ ...@@ -8621,6 +8631,18 @@ instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
ins_pipe( ialu_reg_reg ); ins_pipe( ialu_reg_reg );
%} %}
// Xor Register with Immediate -1
instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
match(Set dst (XorI dst imm));
size(2);
format %{ "NOT $dst" %}
ins_encode %{
__ notl($dst$$Register);
%}
ins_pipe( ialu_reg );
%}
// Xor Register with Immediate // Xor Register with Immediate
instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (XorI dst src)); match(Set dst (XorI dst src));
...@@ -8938,6 +8960,18 @@ instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ ...@@ -8938,6 +8960,18 @@ instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
ins_pipe( ialu_reg_reg_long ); ins_pipe( ialu_reg_reg_long );
%} %}
// Xor Long Register with Immediate -1
instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
match(Set dst (XorL dst imm));
format %{ "NOT $dst.lo\n\t"
"NOT $dst.hi" %}
ins_encode %{
__ notl($dst$$Register);
__ notl(HIGH_FROM_LOW($dst$$Register));
%}
ins_pipe( ialu_reg_long );
%}
// Xor Long Register with Immediate // Xor Long Register with Immediate
instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
match(Set dst (XorL dst src)); match(Set dst (XorL dst src));
......
...@@ -9309,6 +9309,17 @@ instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr) ...@@ -9309,6 +9309,17 @@ instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
ins_pipe(ialu_reg_reg); ins_pipe(ialu_reg_reg);
%} %}
// Xor Register with Immediate -1
instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
match(Set dst (XorI dst imm));
format %{ "not $dst" %}
ins_encode %{
__ notl($dst$$Register);
%}
ins_pipe(ialu_reg);
%}
// Xor Register with Immediate // Xor Register with Immediate
instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr) instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
%{ %{
...@@ -9529,6 +9540,17 @@ instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr) ...@@ -9529,6 +9540,17 @@ instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
ins_pipe(ialu_reg_reg); ins_pipe(ialu_reg_reg);
%} %}
// Xor Register with Immediate -1
instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
match(Set dst (XorL dst imm));
format %{ "notq $dst" %}
ins_encode %{
__ notq($dst$$Register);
%}
ins_pipe(ialu_reg);
%}
// Xor Register with Immediate // Xor Register with Immediate
instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr) instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
%{ %{
......
...@@ -156,7 +156,8 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) { ...@@ -156,7 +156,8 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) {
if( add1_op == this_op && !con_right ) { if( add1_op == this_op && !con_right ) {
Node *a12 = add1->in(2); Node *a12 = add1->in(2);
const Type *t12 = phase->type( a12 ); const Type *t12 = phase->type( a12 );
if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) ) { if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) &&
!(add1->in(1)->is_Phi() && add1->in(1)->as_Phi()->is_tripcount()) ) {
assert(add1->in(1) != this, "dead loop in AddNode::Ideal"); assert(add1->in(1) != this, "dead loop in AddNode::Ideal");
add2 = add1->clone(); add2 = add1->clone();
add2->set_req(2, in(2)); add2->set_req(2, in(2));
...@@ -173,7 +174,8 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) { ...@@ -173,7 +174,8 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) {
if( add2_op == this_op && !con_left ) { if( add2_op == this_op && !con_left ) {
Node *a22 = add2->in(2); Node *a22 = add2->in(2);
const Type *t22 = phase->type( a22 ); const Type *t22 = phase->type( a22 );
if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) ) { if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) &&
!(add2->in(1)->is_Phi() && add2->in(1)->as_Phi()->is_tripcount()) ) {
assert(add2->in(1) != this, "dead loop in AddNode::Ideal"); assert(add2->in(1) != this, "dead loop in AddNode::Ideal");
Node *addx = add2->clone(); Node *addx = add2->clone();
addx->set_req(1, in(1)); addx->set_req(1, in(1));
...@@ -225,34 +227,63 @@ const Type *AddNode::add_of_identity( const Type *t1, const Type *t2 ) const { ...@@ -225,34 +227,63 @@ const Type *AddNode::add_of_identity( const Type *t1, const Type *t2 ) const {
//============================================================================= //=============================================================================
//------------------------------Idealize--------------------------------------- //------------------------------Idealize---------------------------------------
Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) { Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) {
int op1 = in(1)->Opcode(); Node* in1 = in(1);
int op2 = in(2)->Opcode(); Node* in2 = in(2);
int op1 = in1->Opcode();
int op2 = in2->Opcode();
// Fold (con1-x)+con2 into (con1+con2)-x // Fold (con1-x)+con2 into (con1+con2)-x
if ( op1 == Op_AddI && op2 == Op_SubI ) {
// Swap edges to try optimizations below
in1 = in2;
in2 = in(1);
op1 = op2;
op2 = in2->Opcode();
}
if( op1 == Op_SubI ) { if( op1 == Op_SubI ) {
const Type *t_sub1 = phase->type( in(1)->in(1) ); const Type *t_sub1 = phase->type( in1->in(1) );
const Type *t_2 = phase->type( in(2) ); const Type *t_2 = phase->type( in2 );
if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP ) if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
return new (phase->C, 3) SubINode(phase->makecon( add_ring( t_sub1, t_2 ) ), return new (phase->C, 3) SubINode(phase->makecon( add_ring( t_sub1, t_2 ) ),
in(1)->in(2) ); in1->in(2) );
// Convert "(a-b)+(c-d)" into "(a+c)-(b+d)" // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
if( op2 == Op_SubI ) { if( op2 == Op_SubI ) {
// Check for dead cycle: d = (a-b)+(c-d) // Check for dead cycle: d = (a-b)+(c-d)
assert( in(1)->in(2) != this && in(2)->in(2) != this, assert( in1->in(2) != this && in2->in(2) != this,
"dead loop in AddINode::Ideal" ); "dead loop in AddINode::Ideal" );
Node *sub = new (phase->C, 3) SubINode(NULL, NULL); Node *sub = new (phase->C, 3) SubINode(NULL, NULL);
sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in(1)->in(1), in(2)->in(1) ) )); sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in1->in(1), in2->in(1) ) ));
sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in(1)->in(2), in(2)->in(2) ) )); sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in1->in(2), in2->in(2) ) ));
return sub; return sub;
} }
// Convert "(a-b)+(b+c)" into "(a+c)"
if( op2 == Op_AddI && in1->in(2) == in2->in(1) ) {
assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal");
return new (phase->C, 3) AddINode(in1->in(1), in2->in(2));
}
// Convert "(a-b)+(c+b)" into "(a+c)"
if( op2 == Op_AddI && in1->in(2) == in2->in(2) ) {
assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddINode::Ideal");
return new (phase->C, 3) AddINode(in1->in(1), in2->in(1));
}
// Convert "(a-b)+(b-c)" into "(a-c)"
if( op2 == Op_SubI && in1->in(2) == in2->in(1) ) {
assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal");
return new (phase->C, 3) SubINode(in1->in(1), in2->in(2));
}
// Convert "(a-b)+(c-a)" into "(c-b)"
if( op2 == Op_SubI && in1->in(1) == in2->in(2) ) {
assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddINode::Ideal");
return new (phase->C, 3) SubINode(in2->in(1), in1->in(2));
}
} }
// Convert "x+(0-y)" into "(x-y)" // Convert "x+(0-y)" into "(x-y)"
if( op2 == Op_SubI && phase->type(in(2)->in(1)) == TypeInt::ZERO ) if( op2 == Op_SubI && phase->type(in2->in(1)) == TypeInt::ZERO )
return new (phase->C, 3) SubINode(in(1), in(2)->in(2) ); return new (phase->C, 3) SubINode(in1, in2->in(2) );
// Convert "(0-y)+x" into "(x-y)" // Convert "(0-y)+x" into "(x-y)"
if( op1 == Op_SubI && phase->type(in(1)->in(1)) == TypeInt::ZERO ) if( op1 == Op_SubI && phase->type(in1->in(1)) == TypeInt::ZERO )
return new (phase->C, 3) SubINode( in(2), in(1)->in(2) ); return new (phase->C, 3) SubINode( in2, in1->in(2) );
// Convert (x>>>z)+y into (x+(y<<z))>>>z for small constant z and y. // Convert (x>>>z)+y into (x+(y<<z))>>>z for small constant z and y.
// Helps with array allocation math constant folding // Helps with array allocation math constant folding
...@@ -266,15 +297,15 @@ Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) { ...@@ -266,15 +297,15 @@ Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) {
// Have not observed cases where type information exists to support // Have not observed cases where type information exists to support
// positive y and (x <= -(y << z)) // positive y and (x <= -(y << z))
if( op1 == Op_URShiftI && op2 == Op_ConI && if( op1 == Op_URShiftI && op2 == Op_ConI &&
in(1)->in(2)->Opcode() == Op_ConI ) { in1->in(2)->Opcode() == Op_ConI ) {
jint z = phase->type( in(1)->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter jint z = phase->type( in1->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
jint y = phase->type( in(2) )->is_int()->get_con(); jint y = phase->type( in2 )->is_int()->get_con();
if( z < 5 && -5 < y && y < 0 ) { if( z < 5 && -5 < y && y < 0 ) {
const Type *t_in11 = phase->type(in(1)->in(1)); const Type *t_in11 = phase->type(in1->in(1));
if( t_in11 != Type::TOP && (t_in11->is_int()->_lo >= -(y << z)) ) { if( t_in11 != Type::TOP && (t_in11->is_int()->_lo >= -(y << z)) ) {
Node *a = phase->transform( new (phase->C, 3) AddINode( in(1)->in(1), phase->intcon(y<<z) ) ); Node *a = phase->transform( new (phase->C, 3) AddINode( in1->in(1), phase->intcon(y<<z) ) );
return new (phase->C, 3) URShiftINode( a, in(1)->in(2) ); return new (phase->C, 3) URShiftINode( a, in1->in(2) );
} }
} }
} }
...@@ -328,39 +359,73 @@ const Type *AddINode::add_ring( const Type *t0, const Type *t1 ) const { ...@@ -328,39 +359,73 @@ const Type *AddINode::add_ring( const Type *t0, const Type *t1 ) const {
//============================================================================= //=============================================================================
//------------------------------Idealize--------------------------------------- //------------------------------Idealize---------------------------------------
Node *AddLNode::Ideal(PhaseGVN *phase, bool can_reshape) { Node *AddLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
int op1 = in(1)->Opcode(); Node* in1 = in(1);
int op2 = in(2)->Opcode(); Node* in2 = in(2);
int op1 = in1->Opcode();
int op2 = in2->Opcode();
// Fold (con1-x)+con2 into (con1+con2)-x
if ( op1 == Op_AddL && op2 == Op_SubL ) {
// Swap edges to try optimizations below
in1 = in2;
in2 = in(1);
op1 = op2;
op2 = in2->Opcode();
}
// Fold (con1-x)+con2 into (con1+con2)-x // Fold (con1-x)+con2 into (con1+con2)-x
if( op1 == Op_SubL ) { if( op1 == Op_SubL ) {
const Type *t_sub1 = phase->type( in(1)->in(1) ); const Type *t_sub1 = phase->type( in1->in(1) );
const Type *t_2 = phase->type( in(2) ); const Type *t_2 = phase->type( in2 );
if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP ) if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
return new (phase->C, 3) SubLNode(phase->makecon( add_ring( t_sub1, t_2 ) ), return new (phase->C, 3) SubLNode(phase->makecon( add_ring( t_sub1, t_2 ) ),
in(1)->in(2) ); in1->in(2) );
// Convert "(a-b)+(c-d)" into "(a+c)-(b+d)" // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
if( op2 == Op_SubL ) { if( op2 == Op_SubL ) {
// Check for dead cycle: d = (a-b)+(c-d) // Check for dead cycle: d = (a-b)+(c-d)
assert( in(1)->in(2) != this && in(2)->in(2) != this, assert( in1->in(2) != this && in2->in(2) != this,
"dead loop in AddLNode::Ideal" ); "dead loop in AddLNode::Ideal" );
Node *sub = new (phase->C, 3) SubLNode(NULL, NULL); Node *sub = new (phase->C, 3) SubLNode(NULL, NULL);
sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(1), in(2)->in(1) ) )); sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in1->in(1), in2->in(1) ) ));
sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(2), in(2)->in(2) ) )); sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in1->in(2), in2->in(2) ) ));
return sub; return sub;
} }
// Convert "(a-b)+(b+c)" into "(a+c)"
if( op2 == Op_AddL && in1->in(2) == in2->in(1) ) {
assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal");
return new (phase->C, 3) AddLNode(in1->in(1), in2->in(2));
}
// Convert "(a-b)+(c+b)" into "(a+c)"
if( op2 == Op_AddL && in1->in(2) == in2->in(2) ) {
assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal");
return new (phase->C, 3) AddLNode(in1->in(1), in2->in(1));
}
// Convert "(a-b)+(b-c)" into "(a-c)"
if( op2 == Op_SubL && in1->in(2) == in2->in(1) ) {
assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal");
return new (phase->C, 3) SubLNode(in1->in(1), in2->in(2));
}
// Convert "(a-b)+(c-a)" into "(c-b)"
if( op2 == Op_SubL && in1->in(1) == in1->in(2) ) {
assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal");
return new (phase->C, 3) SubLNode(in2->in(1), in1->in(2));
}
} }
// Convert "x+(0-y)" into "(x-y)" // Convert "x+(0-y)" into "(x-y)"
if( op2 == Op_SubL && phase->type(in(2)->in(1)) == TypeLong::ZERO ) if( op2 == Op_SubL && phase->type(in2->in(1)) == TypeLong::ZERO )
return new (phase->C, 3) SubLNode(in(1), in(2)->in(2) ); return new (phase->C, 3) SubLNode( in1, in2->in(2) );
// Convert "(0-y)+x" into "(x-y)"
if( op1 == Op_SubL && phase->type(in1->in(1)) == TypeInt::ZERO )
return new (phase->C, 3) SubLNode( in2, in1->in(2) );
// Convert "X+X+X+X+X...+X+Y" into "k*X+Y" or really convert "X+(X+Y)" // Convert "X+X+X+X+X...+X+Y" into "k*X+Y" or really convert "X+(X+Y)"
// into "(X<<1)+Y" and let shift-folding happen. // into "(X<<1)+Y" and let shift-folding happen.
if( op2 == Op_AddL && if( op2 == Op_AddL &&
in(2)->in(1) == in(1) && in2->in(1) == in1 &&
op1 != Op_ConL && op1 != Op_ConL &&
0 ) { 0 ) {
Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in(1),phase->intcon(1))); Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in1,phase->intcon(1)));
return new (phase->C, 3) AddLNode(shift,in(2)->in(2)); return new (phase->C, 3) AddLNode(shift,in2->in(2));
} }
return AddNode::Ideal(phase, can_reshape); return AddNode::Ideal(phase, can_reshape);
......
...@@ -1817,6 +1817,12 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) { ...@@ -1817,6 +1817,12 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
return progress; // Return any progress return progress; // Return any progress
} }
//------------------------------is_tripcount-----------------------------------
bool PhiNode::is_tripcount() const {
return (in(0) != NULL && in(0)->is_CountedLoop() &&
in(0)->as_CountedLoop()->phi() == this);
}
//------------------------------out_RegMask------------------------------------ //------------------------------out_RegMask------------------------------------
const RegMask &PhiNode::in_RegMask(uint i) const { const RegMask &PhiNode::in_RegMask(uint i) const {
return i ? out_RegMask() : RegMask::Empty; return i ? out_RegMask() : RegMask::Empty;
...@@ -1832,9 +1838,7 @@ const RegMask &PhiNode::out_RegMask() const { ...@@ -1832,9 +1838,7 @@ const RegMask &PhiNode::out_RegMask() const {
#ifndef PRODUCT #ifndef PRODUCT
void PhiNode::dump_spec(outputStream *st) const { void PhiNode::dump_spec(outputStream *st) const {
TypeNode::dump_spec(st); TypeNode::dump_spec(st);
if (in(0) != NULL && if (is_tripcount()) {
in(0)->is_CountedLoop() &&
in(0)->as_CountedLoop()->phi() == this) {
st->print(" #tripcount"); st->print(" #tripcount");
} }
} }
......
...@@ -162,6 +162,8 @@ public: ...@@ -162,6 +162,8 @@ public:
return NULL; // not a copy! return NULL; // not a copy!
} }
bool is_tripcount() const;
// Determine a unique non-trivial input, if any. // Determine a unique non-trivial input, if any.
// Ignore casts if it helps. Return NULL on failure. // Ignore casts if it helps. Return NULL on failure.
Node* unique_input(PhaseTransform *phase); Node* unique_input(PhaseTransform *phase);
......
...@@ -110,10 +110,13 @@ static Node *transform_int_divide( PhaseGVN *phase, Node *dividend, jint divisor ...@@ -110,10 +110,13 @@ static Node *transform_int_divide( PhaseGVN *phase, Node *dividend, jint divisor
} else if( dividend->Opcode() == Op_AndI ) { } else if( dividend->Opcode() == Op_AndI ) {
// An AND mask of sufficient size clears the low bits and // An AND mask of sufficient size clears the low bits and
// I can avoid rounding. // I can avoid rounding.
const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int(); const TypeInt *andconi_t = phase->type( dividend->in(2) )->isa_int();
if( andconi && andconi->is_con(-d) ) { if( andconi_t && andconi_t->is_con() ) {
dividend = dividend->in(1); jint andconi = andconi_t->get_con();
needs_rounding = false; if( andconi < 0 && is_power_of_2(-andconi) && (-andconi) >= d ) {
dividend = dividend->in(1);
needs_rounding = false;
}
} }
} }
...@@ -316,10 +319,13 @@ static Node *transform_long_divide( PhaseGVN *phase, Node *dividend, jlong divis ...@@ -316,10 +319,13 @@ static Node *transform_long_divide( PhaseGVN *phase, Node *dividend, jlong divis
} else if( dividend->Opcode() == Op_AndL ) { } else if( dividend->Opcode() == Op_AndL ) {
// An AND mask of sufficient size clears the low bits and // An AND mask of sufficient size clears the low bits and
// I can avoid rounding. // I can avoid rounding.
const TypeLong *andconl = phase->type( dividend->in(2) )->isa_long(); const TypeLong *andconl_t = phase->type( dividend->in(2) )->isa_long();
if( andconl && andconl->is_con(-d)) { if( andconl_t && andconl_t->is_con() ) {
dividend = dividend->in(1); jlong andconl = andconl_t->get_con();
needs_rounding = false; if( andconl < 0 && is_power_of_2_long(-andconl) && (-andconl) >= d ) {
dividend = dividend->in(1);
needs_rounding = false;
}
} }
} }
...@@ -704,11 +710,18 @@ const Type *DivDNode::Value( PhaseTransform *phase ) const { ...@@ -704,11 +710,18 @@ const Type *DivDNode::Value( PhaseTransform *phase ) const {
if( t2 == TypeD::ONE ) if( t2 == TypeD::ONE )
return t1; return t1;
// If divisor is a constant and not zero, divide them numbers #if defined(IA32)
if( t1->base() == Type::DoubleCon && if (!phase->C->method()->is_strict())
t2->base() == Type::DoubleCon && // Can't trust native compilers to properly fold strict double
t2->getd() != 0.0 ) // could be negative zero // division with round-to-zero on this platform.
return TypeD::make( t1->getd()/t2->getd() ); #endif
{
// If divisor is a constant and not zero, divide them numbers
if( t1->base() == Type::DoubleCon &&
t2->base() == Type::DoubleCon &&
t2->getd() != 0.0 ) // could be negative zero
return TypeD::make( t1->getd()/t2->getd() );
}
// If the dividend is a constant zero // If the dividend is a constant zero
// Note: if t1 and t2 are zero then result is NaN (JVMS page 213) // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
......
...@@ -679,6 +679,10 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_ ...@@ -679,6 +679,10 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop(); CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
post_head->set_post_loop(main_head); post_head->set_post_loop(main_head);
// Reduce the post-loop trip count.
CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
post_end->_prob = PROB_FAIR;
// Build the main-loop normal exit. // Build the main-loop normal exit.
IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end); IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end);
_igvn.register_new_node_with_optimizer( new_main_exit ); _igvn.register_new_node_with_optimizer( new_main_exit );
...@@ -748,6 +752,9 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_ ...@@ -748,6 +752,9 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
pre_head->set_pre_loop(main_head); pre_head->set_pre_loop(main_head);
Node *pre_incr = old_new[incr->_idx]; Node *pre_incr = old_new[incr->_idx];
// Reduce the pre-loop trip count.
pre_end->_prob = PROB_FAIR;
// Find the pre-loop normal exit. // Find the pre-loop normal exit.
Node* pre_exit = pre_end->proj_out(false); Node* pre_exit = pre_end->proj_out(false);
assert( pre_exit->Opcode() == Op_IfFalse, "" ); assert( pre_exit->Opcode() == Op_IfFalse, "" );
...@@ -767,8 +774,8 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_ ...@@ -767,8 +774,8 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
register_new_node( min_cmp , new_pre_exit ); register_new_node( min_cmp , new_pre_exit );
register_new_node( min_bol , new_pre_exit ); register_new_node( min_bol , new_pre_exit );
// Build the IfNode // Build the IfNode (assume the main-loop is executed always).
IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_FAIR, COUNT_UNKNOWN ); IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
_igvn.register_new_node_with_optimizer( min_iff ); _igvn.register_new_node_with_optimizer( min_iff );
set_idom(min_iff, new_pre_exit, dd_main_head); set_idom(min_iff, new_pre_exit, dd_main_head);
set_loop(min_iff, loop->_parent); set_loop(min_iff, loop->_parent);
...@@ -1583,10 +1590,10 @@ bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) { ...@@ -1583,10 +1590,10 @@ bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
//============================================================================= //=============================================================================
//------------------------------iteration_split_impl--------------------------- //------------------------------iteration_split_impl---------------------------
void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
// Check and remove empty loops (spam micro-benchmarks) // Check and remove empty loops (spam micro-benchmarks)
if( policy_do_remove_empty_loop(phase) ) if( policy_do_remove_empty_loop(phase) )
return; // Here we removed an empty loop return true; // Here we removed an empty loop
bool should_peel = policy_peeling(phase); // Should we peel? bool should_peel = policy_peeling(phase); // Should we peel?
...@@ -1596,7 +1603,8 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ ...@@ -1596,7 +1603,8 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_
// This removes loop-invariant tests (usually null checks). // This removes loop-invariant tests (usually null checks).
if( !_head->is_CountedLoop() ) { // Non-counted loop if( !_head->is_CountedLoop() ) { // Non-counted loop
if (PartialPeelLoop && phase->partial_peel(this, old_new)) { if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
return; // Partial peel succeeded so terminate this round of loop opts
return false;
} }
if( should_peel ) { // Should we peel? if( should_peel ) { // Should we peel?
#ifndef PRODUCT #ifndef PRODUCT
...@@ -1606,14 +1614,14 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ ...@@ -1606,14 +1614,14 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_
} else if( should_unswitch ) { } else if( should_unswitch ) {
phase->do_unswitching(this, old_new); phase->do_unswitching(this, old_new);
} }
return; return true;
} }
CountedLoopNode *cl = _head->as_CountedLoop(); CountedLoopNode *cl = _head->as_CountedLoop();
if( !cl->loopexit() ) return; // Ignore various kinds of broken loops if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops
// Do nothing special to pre- and post- loops // Do nothing special to pre- and post- loops
if( cl->is_pre_loop() || cl->is_post_loop() ) return; if( cl->is_pre_loop() || cl->is_post_loop() ) return true;
// Compute loop trip count from profile data // Compute loop trip count from profile data
compute_profile_trip_cnt(phase); compute_profile_trip_cnt(phase);
...@@ -1626,11 +1634,11 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ ...@@ -1626,11 +1634,11 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_
// Here we did some unrolling and peeling. Eventually we will // Here we did some unrolling and peeling. Eventually we will
// completely unroll this loop and it will no longer be a loop. // completely unroll this loop and it will no longer be a loop.
phase->do_maximally_unroll(this,old_new); phase->do_maximally_unroll(this,old_new);
return; return true;
} }
if (should_unswitch) { if (should_unswitch) {
phase->do_unswitching(this, old_new); phase->do_unswitching(this, old_new);
return; return true;
} }
} }
...@@ -1691,14 +1699,16 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ ...@@ -1691,14 +1699,16 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_
if( should_peel ) // Might want to peel but do nothing else if( should_peel ) // Might want to peel but do nothing else
phase->do_peeling(this,old_new); phase->do_peeling(this,old_new);
} }
return true;
} }
//============================================================================= //=============================================================================
//------------------------------iteration_split-------------------------------- //------------------------------iteration_split--------------------------------
void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) { bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
// Recursively iteration split nested loops // Recursively iteration split nested loops
if( _child ) _child->iteration_split( phase, old_new ); if( _child && !_child->iteration_split( phase, old_new ))
return false;
// Clean out prior deadwood // Clean out prior deadwood
DCE_loop_body(); DCE_loop_body();
...@@ -1720,7 +1730,9 @@ void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) ...@@ -1720,7 +1730,9 @@ void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new )
_allow_optimizations && _allow_optimizations &&
!tail()->is_top() ) { // Also ignore the occasional dead backedge !tail()->is_top() ) { // Also ignore the occasional dead backedge
if (!_has_call) { if (!_has_call) {
iteration_split_impl( phase, old_new ); if (!iteration_split_impl( phase, old_new )) {
return false;
}
} else if (policy_unswitching(phase)) { } else if (policy_unswitching(phase)) {
phase->do_unswitching(this, old_new); phase->do_unswitching(this, old_new);
} }
...@@ -1729,5 +1741,7 @@ void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) ...@@ -1729,5 +1741,7 @@ void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new )
// Minor offset re-organization to remove loop-fallout uses of // Minor offset re-organization to remove loop-fallout uses of
// trip counter. // trip counter.
if( _head->is_CountedLoop() ) phase->reorg_offsets( this ); if( _head->is_CountedLoop() ) phase->reorg_offsets( this );
if( _next ) _next->iteration_split( phase, old_new ); if( _next && !_next->iteration_split( phase, old_new ))
return false;
return true;
} }
...@@ -325,12 +325,14 @@ public: ...@@ -325,12 +325,14 @@ public:
// Returns TRUE if loop tree is structurally changed. // Returns TRUE if loop tree is structurally changed.
bool beautify_loops( PhaseIdealLoop *phase ); bool beautify_loops( PhaseIdealLoop *phase );
// Perform iteration-splitting on inner loops. Split iterations to avoid // Perform iteration-splitting on inner loops. Split iterations to
// range checks or one-shot null checks. // avoid range checks or one-shot null checks. Returns false if the
void iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); // current round of loop opts should stop.
bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
// Driver for various flavors of iteration splitting
void iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ); // Driver for various flavors of iteration splitting. Returns false
// if the current round of loop opts should stop.
bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
// Given dominators, try to find loops with calls that must always be // Given dominators, try to find loops with calls that must always be
// executed (call dominates loop tail). These loops do not need non-call // executed (call dominates loop tail). These loops do not need non-call
......
...@@ -1903,9 +1903,6 @@ void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, N ...@@ -1903,9 +1903,6 @@ void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, N
// Use in a phi is considered a use in the associated predecessor block // Use in a phi is considered a use in the associated predecessor block
use_c = use->in(0)->in(j); use_c = use->in(0)->in(j);
} }
if (use_c->is_CountedLoop()) {
use_c = use_c->in(LoopNode::EntryControl);
}
set_ctrl(n_clone, use_c); set_ctrl(n_clone, use_c);
assert(!loop->is_member(get_loop(use_c)), "should be outside loop"); assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
get_loop(use_c)->_body.push(n_clone); get_loop(use_c)->_body.push(n_clone);
......
...@@ -152,6 +152,14 @@ const Type *MulNode::Value( PhaseTransform *phase ) const { ...@@ -152,6 +152,14 @@ const Type *MulNode::Value( PhaseTransform *phase ) const {
if( t1 == Type::BOTTOM || t2 == Type::BOTTOM ) if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
return bottom_type(); return bottom_type();
#if defined(IA32)
// Can't trust native compilers to properly fold strict double
// multiplication with round-to-zero on this platform.
if (op == Op_MulD && phase->C->method()->is_strict()) {
return TypeD::DOUBLE;
}
#endif
return mul_ring(t1,t2); // Local flavor of type multiplication return mul_ring(t1,t2); // Local flavor of type multiplication
} }
...@@ -360,7 +368,7 @@ const Type *MulFNode::mul_ring(const Type *t0, const Type *t1) const { ...@@ -360,7 +368,7 @@ const Type *MulFNode::mul_ring(const Type *t0, const Type *t1) const {
// Compute the product type of two double ranges into this node. // Compute the product type of two double ranges into this node.
const Type *MulDNode::mul_ring(const Type *t0, const Type *t1) const { const Type *MulDNode::mul_ring(const Type *t0, const Type *t1) const {
if( t0 == Type::DOUBLE || t1 == Type::DOUBLE ) return Type::DOUBLE; if( t0 == Type::DOUBLE || t1 == Type::DOUBLE ) return Type::DOUBLE;
// We must be adding 2 double constants. // We must be multiplying 2 double constants.
return TypeD::make( t0->getd() * t1->getd() ); return TypeD::make( t0->getd() * t1->getd() );
} }
......
...@@ -1320,7 +1320,8 @@ public: ...@@ -1320,7 +1320,8 @@ public:
Node *pop() { Node *pop() {
if( _clock_index >= size() ) _clock_index = 0; if( _clock_index >= size() ) _clock_index = 0;
Node *b = at(_clock_index); Node *b = at(_clock_index);
map( _clock_index++, Node_List::pop()); map( _clock_index, Node_List::pop());
if (size() != 0) _clock_index++; // Always start from 0
_in_worklist >>= b->_idx; _in_worklist >>= b->_idx;
return b; return b;
} }
......
...@@ -34,7 +34,7 @@ static bool is_single_register(uint x) { ...@@ -34,7 +34,7 @@ static bool is_single_register(uint x) {
#endif #endif
} }
//------------------------------may_be_copy_of_callee----------------------------- //---------------------------may_be_copy_of_callee-----------------------------
// Check to see if we can possibly be a copy of a callee-save value. // Check to see if we can possibly be a copy of a callee-save value.
bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const { bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const {
// Short circuit if there are no callee save registers // Short circuit if there are no callee save registers
...@@ -225,6 +225,20 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v ...@@ -225,6 +225,20 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
// Scan all registers to see if this value is around already // Scan all registers to see if this value is around already
for( uint reg = 0; reg < (uint)_max_reg; reg++ ) { for( uint reg = 0; reg < (uint)_max_reg; reg++ ) {
if (reg == (uint)nk_reg) {
// Found ourselves so check if there is only one user of this
// copy and keep on searching for a better copy if so.
bool ignore_self = true;
x = n->in(k);
DUIterator_Fast imax, i = x->fast_outs(imax);
Node* first = x->fast_out(i); i++;
while (i < imax && ignore_self) {
Node* use = x->fast_out(i); i++;
if (use != first) ignore_self = false;
}
if (ignore_self) continue;
}
Node *vv = value[reg]; Node *vv = value[reg];
if( !single ) { // Doubles check for aligned-adjacent pair if( !single ) { // Doubles check for aligned-adjacent pair
if( (reg&1)==0 ) continue; // Wrong half of a pair if( (reg&1)==0 ) continue; // Wrong half of a pair
......
...@@ -206,6 +206,14 @@ Node *SubINode::Ideal(PhaseGVN *phase, bool can_reshape){ ...@@ -206,6 +206,14 @@ Node *SubINode::Ideal(PhaseGVN *phase, bool can_reshape){
if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) ) if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) )
return new (phase->C, 3) SubINode( in1->in(1), in2->in(1) ); return new (phase->C, 3) SubINode( in1->in(1), in2->in(1) );
// Convert "(A+X) - (X+B)" into "A - B"
if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(1) )
return new (phase->C, 3) SubINode( in1->in(1), in2->in(2) );
// Convert "(X+A) - (B+X)" into "A - B"
if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(2) )
return new (phase->C, 3) SubINode( in1->in(2), in2->in(1) );
// Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally // Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally
// nicer to optimize than subtract. // nicer to optimize than subtract.
if( op2 == Op_SubI && in2->outcnt() == 1) { if( op2 == Op_SubI && in2->outcnt() == 1) {
......
...@@ -997,6 +997,12 @@ class CommandLineFlags { ...@@ -997,6 +997,12 @@ class CommandLineFlags {
product(bool, UseXmmI2F, false, \ product(bool, UseXmmI2F, false, \
"Use SSE2 CVTDQ2PS instruction to convert Integer to Float") \ "Use SSE2 CVTDQ2PS instruction to convert Integer to Float") \
\ \
product(bool, UseXMMForArrayCopy, false, \
"Use SSE2 MOVQ instruction for Arraycopy") \
\
product(bool, UseUnalignedLoadStores, false, \
"Use SSE2 MOVDQU instruction for Arraycopy") \
\
product(intx, FieldsAllocationStyle, 1, \ product(intx, FieldsAllocationStyle, 1, \
"0 - type based with oops first, 1 - with oops last") \ "0 - type based with oops first, 1 - with oops last") \
\ \
...@@ -2555,7 +2561,7 @@ class CommandLineFlags { ...@@ -2555,7 +2561,7 @@ class CommandLineFlags {
develop(intx, MaxRecursiveInlineLevel, 1, \ develop(intx, MaxRecursiveInlineLevel, 1, \
"maximum number of nested recursive calls that are inlined") \ "maximum number of nested recursive calls that are inlined") \
\ \
develop(intx, InlineSmallCode, 1000, \ product(intx, InlineSmallCode, 1000, \
"Only inline already compiled methods if their code size is " \ "Only inline already compiled methods if their code size is " \
"less than this") \ "less than this") \
\ \
......
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
*/ */
public class Test6700047 { public class Test6700047 {
static byte[] dummy = new byte[256];
public static void main(String[] args) { public static void main(String[] args) {
for (int i = 0; i < 100000; i++) { for (int i = 0; i < 100000; i++) {
intToLeftPaddedAsciiBytes(); intToLeftPaddedAsciiBytes();
...@@ -53,6 +55,7 @@ public class Test6700047 { ...@@ -53,6 +55,7 @@ public class Test6700047 {
if (offset > 0) { if (offset > 0) {
for(int j = 0; j < offset; j++) { for(int j = 0; j < offset; j++) {
result++; result++;
dummy[i] = 0;
} }
} }
return result; return result;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册