提交 41479e87 编写于 作者: K kvn

7116452: Add support for AVX instructions

Summary: Added support for AVX extension to the x86 instruction set.
Reviewed-by: never
上级 8c079e48
此差异已折叠。
......@@ -503,7 +503,31 @@ class Assembler : public AbstractAssembler {
REX_WR = 0x4C,
REX_WRB = 0x4D,
REX_WRX = 0x4E,
REX_WRXB = 0x4F
REX_WRXB = 0x4F,
VEX_3bytes = 0xC4,
VEX_2bytes = 0xC5
};
enum VexPrefix {
VEX_B = 0x20,
VEX_X = 0x40,
VEX_R = 0x80,
VEX_W = 0x80
};
enum VexSimdPrefix {
VEX_SIMD_NONE = 0x0,
VEX_SIMD_66 = 0x1,
VEX_SIMD_F3 = 0x2,
VEX_SIMD_F2 = 0x3
};
enum VexOpcode {
VEX_OPCODE_NONE = 0x0,
VEX_OPCODE_0F = 0x1,
VEX_OPCODE_0F_38 = 0x2,
VEX_OPCODE_0F_3A = 0x3
};
enum WhichOperand {
......@@ -546,12 +570,88 @@ private:
void prefixq(Address adr);
void prefix(Address adr, Register reg, bool byteinst = false);
void prefixq(Address adr, Register reg);
void prefix(Address adr, XMMRegister reg);
void prefixq(Address adr, Register reg);
void prefixq(Address adr, XMMRegister reg);
void prefetch_prefix(Address src);
void rex_prefix(Address adr, XMMRegister xreg,
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
int rex_prefix_and_encode(int dst_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
bool vector256);
void vex_prefix(Address adr, int nds_enc, int xreg_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, bool vector256);
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, bool vector256);
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, bool vector256 = false);
void simd_prefix(XMMRegister dst, Address src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
simd_prefix(dst, xnoreg, src, pre, opc);
}
void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
simd_prefix(src, dst, pre);
}
void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre) {
bool rex_w = true;
simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, bool vector256 = false);
int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
}
// Move/convert 32-bit integer value.
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre) {
// It is OK to cast from Register to XMMRegister to pass argument here
// since only encoding is used in simd_prefix_and_encode() and number of
// Gen and Xmm registers are the same.
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
}
int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
return simd_prefix_and_encode(dst, xnoreg, src, pre);
}
int simd_prefix_and_encode(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
}
// Move/convert 64-bit integer value.
int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre) {
bool rex_w = true;
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
}
int simd_prefix_and_encode_q(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
bool rex_w = true;
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
}
// Helper functions for groups of instructions
void emit_arith_b(int op1, int op2, Register dst, int imm8);
......@@ -764,6 +864,7 @@ private:
void addss(XMMRegister dst, Address src);
void addss(XMMRegister dst, XMMRegister src);
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
void andl(Register dst, Register src);
......@@ -774,9 +875,11 @@ private:
void andq(Register dst, Register src);
// Bitwise Logical AND of Packed Double-Precision Floating-Point Values
void andpd(XMMRegister dst, Address src);
void andpd(XMMRegister dst, XMMRegister src);
// Bitwise Logical AND of Packed Single-Precision Floating-Point Values
void andps(XMMRegister dst, XMMRegister src);
void bsfl(Register dst, Register src);
void bsrl(Register dst, Register src);
......@@ -837,9 +940,11 @@ private:
// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void comisd(XMMRegister dst, Address src);
void comisd(XMMRegister dst, XMMRegister src);
// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
void comiss(XMMRegister dst, Address src);
void comiss(XMMRegister dst, XMMRegister src);
// Identify processor type and features
void cpuid() {
......@@ -849,14 +954,19 @@ private:
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
void cvtsd2ss(XMMRegister dst, XMMRegister src);
void cvtsd2ss(XMMRegister dst, Address src);
// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
void cvtsi2sdl(XMMRegister dst, Register src);
void cvtsi2sdl(XMMRegister dst, Address src);
void cvtsi2sdq(XMMRegister dst, Register src);
void cvtsi2sdq(XMMRegister dst, Address src);
// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
void cvtsi2ssl(XMMRegister dst, Register src);
void cvtsi2ssl(XMMRegister dst, Address src);
void cvtsi2ssq(XMMRegister dst, Register src);
void cvtsi2ssq(XMMRegister dst, Address src);
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src);
......@@ -866,6 +976,7 @@ private:
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtss2sd(XMMRegister dst, Address src);
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
void cvttsd2sil(Register dst, Address src);
......@@ -1140,8 +1251,6 @@ private:
void movdq(Register dst, XMMRegister src);
// Move Aligned Double Quadword
void movdqa(Address dst, XMMRegister src);
void movdqa(XMMRegister dst, Address src);
void movdqa(XMMRegister dst, XMMRegister src);
// Move Unaligned Double Quadword
......@@ -1261,10 +1370,18 @@ private:
void orq(Register dst, Address src);
void orq(Register dst, Register src);
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
// SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
// SSE4.1 packed move
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
#ifndef _LP64 // no 32bit push/pop on amd64
void popl(Address dst);
#endif
......@@ -1292,6 +1409,7 @@ private:
// POR - Bitwise logical OR
void por(XMMRegister dst, XMMRegister src);
void por(XMMRegister dst, Address src);
// Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode);
......@@ -1313,6 +1431,11 @@ private:
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src);
// Interleave Low Doublewords
void punpckldq(XMMRegister dst, XMMRegister src);
void punpckldq(XMMRegister dst, Address src);
#ifndef _LP64 // no 32bit push/pop on amd64
void pushl(Address src);
......@@ -1429,6 +1552,13 @@ private:
void xchgq(Register reg, Address adr);
void xchgq(Register dst, Register src);
// Get Value of Extended Control Register
void xgetbv() {
emit_byte(0x0F);
emit_byte(0x01);
emit_byte(0xD0);
}
void xorl(Register dst, int32_t imm32);
void xorl(Register dst, Address src);
void xorl(Register dst, Register src);
......@@ -1437,14 +1567,21 @@ private:
void xorq(Register dst, Register src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, Address src);
void xorpd(XMMRegister dst, XMMRegister src);
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
void xorps(XMMRegister dst, Address src);
void xorps(XMMRegister dst, XMMRegister src);
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
void andpd(XMMRegister dst, Address src);
void andps(XMMRegister dst, Address src);
void xorpd(XMMRegister dst, Address src);
void xorps(XMMRegister dst, Address src);
};
......@@ -2175,9 +2312,15 @@ class MacroAssembler: public Assembler {
void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
void andpd(XMMRegister dst, AddressLiteral src);
void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, AddressLiteral src);
void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, AddressLiteral src);
void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
void comisd(XMMRegister dst, AddressLiteral src);
......@@ -2211,62 +2354,62 @@ private:
void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
void movss(XMMRegister dst, AddressLiteral src);
void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
void movlpd(XMMRegister dst, AddressLiteral src);
public:
void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); }
void addsd(XMMRegister dst, AddressLiteral src);
void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); }
void addss(XMMRegister dst, AddressLiteral src);
void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); }
void divsd(XMMRegister dst, AddressLiteral src);
void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); }
void divss(XMMRegister dst, AddressLiteral src);
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); }
void movsd(XMMRegister dst, AddressLiteral src);
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); }
void mulsd(XMMRegister dst, AddressLiteral src);
void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); }
void mulss(XMMRegister dst, AddressLiteral src);
void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); }
void sqrtsd(XMMRegister dst, AddressLiteral src);
void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); }
void sqrtss(XMMRegister dst, AddressLiteral src);
void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); }
void subsd(XMMRegister dst, AddressLiteral src);
void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); }
void subss(XMMRegister dst, AddressLiteral src);
void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, AddressLiteral src);
void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, AddressLiteral src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
......
......@@ -86,6 +86,7 @@ inline void Assembler::prefix(Address adr, Register reg, bool byteinst) {}
inline void Assembler::prefixq(Address adr, Register reg) {}
inline void Assembler::prefix(Address adr, XMMRegister reg) {}
inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
#else
inline void Assembler::emit_long64(jlong x) {
*(jlong*) _code_pos = x;
......
......@@ -237,9 +237,21 @@ int NativeMovRegMem::instruction_start() const {
int off = 0;
u_char instr_0 = ubyte_at(off);
// See comment in Assembler::locate_operand() about VEX prefixes.
if (instr_0 == instruction_VEX_prefix_2bytes) {
assert((UseAVX > 0), "shouldn't have VEX prefix");
NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
return 2;
}
if (instr_0 == instruction_VEX_prefix_3bytes) {
assert((UseAVX > 0), "shouldn't have VEX prefix");
NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
return 3;
}
// First check to see if we have a (prefixed or not) xor
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
if (instr_0 >= instruction_prefix_wide_lo && // 0x40
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
off++;
instr_0 = ubyte_at(off);
}
......@@ -256,13 +268,13 @@ int NativeMovRegMem::instruction_start() const {
instr_0 = ubyte_at(off);
}
if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2
off++;
instr_0 = ubyte_at(off);
}
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
off++;
instr_0 = ubyte_at(off);
......
......@@ -287,6 +287,9 @@ class NativeMovRegMem: public NativeInstruction {
instruction_code_xmm_store = 0x11,
instruction_code_xmm_lpd = 0x12,
instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes,
instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes,
instruction_size = 4,
instruction_offset = 0,
data_offset = 2,
......
......@@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r14);
REGISTER_DEFINITION(Register, r15);
#endif // AMD64
REGISTER_DEFINITION(XMMRegister, xnoreg);
REGISTER_DEFINITION(XMMRegister, xmm0 );
REGISTER_DEFINITION(XMMRegister, xmm1 );
REGISTER_DEFINITION(XMMRegister, xmm2 );
......@@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r12_heapbase);
REGISTER_DEFINITION(Register, r15_thread);
#endif // AMD64
REGISTER_DEFINITION(MMXRegister, mnoreg );
REGISTER_DEFINITION(MMXRegister, mmx0 );
REGISTER_DEFINITION(MMXRegister, mmx1 );
REGISTER_DEFINITION(MMXRegister, mmx2 );
......
......@@ -50,7 +50,7 @@ const char* VM_Version::_features_str = "";
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
static BufferBlob* stub_blob;
static const int stub_size = 400;
static const int stub_size = 500;
extern "C" {
typedef void (*getPsrInfo_stub_t)(void*);
......@@ -73,7 +73,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label ext_cpuid1, ext_cpuid5, done;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done;
StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
# define __ _masm->
......@@ -229,6 +229,41 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports AVX
//
__ andl(rcx, 0x18000000);
__ cmpl(rcx, 0x18000000);
__ jccb(Assembler::notEqual, sef_cpuid);
//
// XCR0, XFEATURE_ENABLED_MASK register
//
__ xorl(rcx, rcx); // zero for XCR0 register
__ xgetbv();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rdx);
//
// cpuid(0x7) Structured Extended Features
//
__ bind(sef_cpuid);
__ movl(rax, 7);
__ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
__ jccb(Assembler::greater, ext_cpuid);
__ xorl(rcx, rcx);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
//
// Extended cpuid(0x80000000)
//
__ bind(ext_cpuid);
__ movl(rax, 0x80000000);
__ cpuid();
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
......@@ -359,13 +394,19 @@ void VM_Version::get_processor_features() {
if (UseSSE < 1)
_cpuFeatures &= ~CPU_SSE;
if (UseAVX < 2)
_cpuFeatures &= ~CPU_AVX2;
if (UseAVX < 1)
_cpuFeatures &= ~CPU_AVX;
if (logical_processors_per_package() == 1) {
// HT processor could be installed on a system which doesn't support HT.
_cpuFeatures &= ~CPU_HT;
}
char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
......@@ -379,6 +420,8 @@ void VM_Version::get_processor_features() {
(supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""),
(supports_popcnt() ? ", popcnt" : ""),
(supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""),
......@@ -389,17 +432,24 @@ void VM_Version::get_processor_features() {
// UseSSE is set to the smaller of what hardware supports and what
// the command line requires. I.e., you cannot set UseSSE to 2 on
// older Pentiums which do not support it.
if( UseSSE > 4 ) UseSSE=4;
if( UseSSE < 0 ) UseSSE=0;
if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
if (UseSSE > 4) UseSSE=4;
if (UseSSE < 0) UseSSE=0;
if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
UseSSE = MIN2((intx)3,UseSSE);
if( !supports_sse3() ) // Drop to 2 if no SSE3 support
if (!supports_sse3()) // Drop to 2 if no SSE3 support
UseSSE = MIN2((intx)2,UseSSE);
if( !supports_sse2() ) // Drop to 1 if no SSE2 support
if (!supports_sse2()) // Drop to 1 if no SSE2 support
UseSSE = MIN2((intx)1,UseSSE);
if( !supports_sse () ) // Drop to 0 if no SSE support
if (!supports_sse ()) // Drop to 0 if no SSE support
UseSSE = 0;
if (UseAVX > 2) UseAVX=2;
if (UseAVX < 0) UseAVX=0;
if (!supports_avx2()) // Drop to 1 if no AVX2 support
UseAVX = MIN2((intx)1,UseAVX);
if (!supports_avx ()) // Drop to 0 if no AVX support
UseAVX = 0;
// On new cpus instructions which update whole XMM register should be used
// to prevent partial register stall due to dependencies on high half.
//
......@@ -534,6 +584,9 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
UsePopCountInstruction = true;
}
} else if (UsePopCountInstruction) {
warning("POPCNT instruction is not available on this CPU");
FLAG_SET_DEFAULT(UsePopCountInstruction, false);
}
#ifdef COMPILER2
......@@ -605,7 +658,11 @@ void VM_Version::get_processor_features() {
if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package());
tty->print_cr("UseSSE=%d",UseSSE);
tty->print("UseSSE=%d",UseSSE);
if (UseAVX > 0) {
tty->print(" UseAVX=%d",UseAVX);
}
tty->cr();
tty->print("Allocation");
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
tty->print_cr(": no prefetching");
......
......@@ -78,7 +78,10 @@ public:
sse4_2 : 1,
: 2,
popcnt : 1,
: 8;
: 3,
osxsave : 1,
avx : 1,
: 3;
} bits;
};
......@@ -176,6 +179,34 @@ public:
} bits;
};
union SefCpuid7Eax {
uint32_t value;
};
union SefCpuid7Ebx {
uint32_t value;
struct {
uint32_t fsgsbase : 1,
: 2,
bmi1 : 1,
: 1,
avx2 : 1,
: 2,
bmi2 : 1,
: 23;
} bits;
};
union XemXcr0Eax {
uint32_t value;
struct {
uint32_t x87 : 1,
sse : 1,
ymm : 1,
: 29;
} bits;
};
protected:
static int _cpu;
static int _model;
......@@ -200,7 +231,9 @@ protected:
CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12),
CPU_POPCNT = (1 << 13),
CPU_LZCNT = (1 << 14)
CPU_LZCNT = (1 << 14),
CPU_AVX = (1 << 15),
CPU_AVX2 = (1 << 16)
} cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with
......@@ -228,6 +261,12 @@ protected:
uint32_t dcp_cpuid4_ecx; // unused currently
uint32_t dcp_cpuid4_edx; // unused currently
// cpuid function 7 (structured extended features)
SefCpuid7Eax sef_cpuid7_eax;
SefCpuid7Ebx sef_cpuid7_ebx;
uint32_t sef_cpuid7_ecx; // unused currently
uint32_t sef_cpuid7_edx; // unused currently
// cpuid function 0xB (processor topology)
// ecx = 0
uint32_t tpl_cpuidB0_eax;
......@@ -275,6 +314,10 @@ protected:
uint32_t ext_cpuid8_ebx; // reserved
ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
XemXcr0Eax xem_xcr0_eax;
uint32_t xem_xcr0_edx; // reserved
};
// The actual cpuid info block
......@@ -328,6 +371,14 @@ protected:
result |= CPU_SSE4_2;
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT;
if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
_cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
_cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
result |= CPU_AVX;
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
result |= CPU_AVX2;
}
// AMD features.
if (is_amd()) {
......@@ -350,12 +401,14 @@ public:
static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
// Initialization
static void initialize();
......@@ -447,6 +500,8 @@ public:
static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; }
static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
//
// AMD features
//
......
此差异已折叠。
此差异已折叠。
......@@ -525,6 +525,9 @@ class CommandLineFlags {
product(intx, UseSSE, 99, \
"Highest supported SSE instructions set on x86/x64") \
\
product(intx, UseAVX, 99, \
"Highest supported AVX instructions set on x86/x64") \
\
product(intx, UseVIS, 99, \
"Highest supported VIS instructions set on Sparc") \
\
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册