提交 41479e87 编写于 作者: K kvn

7116452: Add support for AVX instructions

Summary: Added support for AVX extension to the x86 instruction set.
Reviewed-by: never
上级 8c079e48
此差异已折叠。
...@@ -503,7 +503,31 @@ class Assembler : public AbstractAssembler { ...@@ -503,7 +503,31 @@ class Assembler : public AbstractAssembler {
REX_WR = 0x4C, REX_WR = 0x4C,
REX_WRB = 0x4D, REX_WRB = 0x4D,
REX_WRX = 0x4E, REX_WRX = 0x4E,
REX_WRXB = 0x4F REX_WRXB = 0x4F,
VEX_3bytes = 0xC4,
VEX_2bytes = 0xC5
};
enum VexPrefix {
VEX_B = 0x20,
VEX_X = 0x40,
VEX_R = 0x80,
VEX_W = 0x80
};
enum VexSimdPrefix {
VEX_SIMD_NONE = 0x0,
VEX_SIMD_66 = 0x1,
VEX_SIMD_F3 = 0x2,
VEX_SIMD_F2 = 0x3
};
enum VexOpcode {
VEX_OPCODE_NONE = 0x0,
VEX_OPCODE_0F = 0x1,
VEX_OPCODE_0F_38 = 0x2,
VEX_OPCODE_0F_3A = 0x3
}; };
enum WhichOperand { enum WhichOperand {
...@@ -546,12 +570,88 @@ private: ...@@ -546,12 +570,88 @@ private:
void prefixq(Address adr); void prefixq(Address adr);
void prefix(Address adr, Register reg, bool byteinst = false); void prefix(Address adr, Register reg, bool byteinst = false);
void prefixq(Address adr, Register reg);
void prefix(Address adr, XMMRegister reg); void prefix(Address adr, XMMRegister reg);
void prefixq(Address adr, Register reg);
void prefixq(Address adr, XMMRegister reg);
void prefetch_prefix(Address src); void prefetch_prefix(Address src);
void rex_prefix(Address adr, XMMRegister xreg,
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
int rex_prefix_and_encode(int dst_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
bool vector256);
void vex_prefix(Address adr, int nds_enc, int xreg_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, bool vector256);
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, bool vector256);
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, bool vector256 = false);
void simd_prefix(XMMRegister dst, Address src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
simd_prefix(dst, xnoreg, src, pre, opc);
}
void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
simd_prefix(src, dst, pre);
}
void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre) {
bool rex_w = true;
simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, bool vector256 = false);
int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
}
// Move/convert 32-bit integer value.
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre) {
// It is OK to cast from Register to XMMRegister to pass argument here
// since only encoding is used in simd_prefix_and_encode() and number of
// Gen and Xmm registers are the same.
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
}
int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
return simd_prefix_and_encode(dst, xnoreg, src, pre);
}
int simd_prefix_and_encode(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
}
// Move/convert 64-bit integer value.
int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre) {
bool rex_w = true;
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
}
int simd_prefix_and_encode_q(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
bool rex_w = true;
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
}
// Helper functions for groups of instructions // Helper functions for groups of instructions
void emit_arith_b(int op1, int op2, Register dst, int imm8); void emit_arith_b(int op1, int op2, Register dst, int imm8);
...@@ -764,6 +864,7 @@ private: ...@@ -764,6 +864,7 @@ private:
void addss(XMMRegister dst, Address src); void addss(XMMRegister dst, Address src);
void addss(XMMRegister dst, XMMRegister src); void addss(XMMRegister dst, XMMRegister src);
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32); void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src); void andl(Register dst, Address src);
void andl(Register dst, Register src); void andl(Register dst, Register src);
...@@ -774,9 +875,11 @@ private: ...@@ -774,9 +875,11 @@ private:
void andq(Register dst, Register src); void andq(Register dst, Register src);
// Bitwise Logical AND of Packed Double-Precision Floating-Point Values // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
void andpd(XMMRegister dst, Address src);
void andpd(XMMRegister dst, XMMRegister src); void andpd(XMMRegister dst, XMMRegister src);
// Bitwise Logical AND of Packed Single-Precision Floating-Point Values
void andps(XMMRegister dst, XMMRegister src);
void bsfl(Register dst, Register src); void bsfl(Register dst, Register src);
void bsrl(Register dst, Register src); void bsrl(Register dst, Register src);
...@@ -837,9 +940,11 @@ private: ...@@ -837,9 +940,11 @@ private:
// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void comisd(XMMRegister dst, Address src); void comisd(XMMRegister dst, Address src);
void comisd(XMMRegister dst, XMMRegister src);
// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
void comiss(XMMRegister dst, Address src); void comiss(XMMRegister dst, Address src);
void comiss(XMMRegister dst, XMMRegister src);
// Identify processor type and features // Identify processor type and features
void cpuid() { void cpuid() {
...@@ -849,14 +954,19 @@ private: ...@@ -849,14 +954,19 @@ private:
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
void cvtsd2ss(XMMRegister dst, XMMRegister src); void cvtsd2ss(XMMRegister dst, XMMRegister src);
void cvtsd2ss(XMMRegister dst, Address src);
// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
void cvtsi2sdl(XMMRegister dst, Register src); void cvtsi2sdl(XMMRegister dst, Register src);
void cvtsi2sdl(XMMRegister dst, Address src);
void cvtsi2sdq(XMMRegister dst, Register src); void cvtsi2sdq(XMMRegister dst, Register src);
void cvtsi2sdq(XMMRegister dst, Address src);
// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
void cvtsi2ssl(XMMRegister dst, Register src); void cvtsi2ssl(XMMRegister dst, Register src);
void cvtsi2ssl(XMMRegister dst, Address src);
void cvtsi2ssq(XMMRegister dst, Register src); void cvtsi2ssq(XMMRegister dst, Register src);
void cvtsi2ssq(XMMRegister dst, Address src);
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src); void cvtdq2pd(XMMRegister dst, XMMRegister src);
...@@ -866,6 +976,7 @@ private: ...@@ -866,6 +976,7 @@ private:
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src); void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtss2sd(XMMRegister dst, Address src);
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
void cvttsd2sil(Register dst, Address src); void cvttsd2sil(Register dst, Address src);
...@@ -1140,8 +1251,6 @@ private: ...@@ -1140,8 +1251,6 @@ private:
void movdq(Register dst, XMMRegister src); void movdq(Register dst, XMMRegister src);
// Move Aligned Double Quadword // Move Aligned Double Quadword
void movdqa(Address dst, XMMRegister src);
void movdqa(XMMRegister dst, Address src);
void movdqa(XMMRegister dst, XMMRegister src); void movdqa(XMMRegister dst, XMMRegister src);
// Move Unaligned Double Quadword // Move Unaligned Double Quadword
...@@ -1261,10 +1370,18 @@ private: ...@@ -1261,10 +1370,18 @@ private:
void orq(Register dst, Address src); void orq(Register dst, Address src);
void orq(Register dst, Register src); void orq(Register dst, Register src);
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
// SSE4.2 string instructions // SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8); void pcmpestri(XMMRegister xmm1, Address src, int imm8);
// SSE4.1 packed move
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
#ifndef _LP64 // no 32bit push/pop on amd64 #ifndef _LP64 // no 32bit push/pop on amd64
void popl(Address dst); void popl(Address dst);
#endif #endif
...@@ -1292,6 +1409,7 @@ private: ...@@ -1292,6 +1409,7 @@ private:
// POR - Bitwise logical OR // POR - Bitwise logical OR
void por(XMMRegister dst, XMMRegister src); void por(XMMRegister dst, XMMRegister src);
void por(XMMRegister dst, Address src);
// Shuffle Packed Doublewords // Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, XMMRegister src, int mode);
...@@ -1313,6 +1431,11 @@ private: ...@@ -1313,6 +1431,11 @@ private:
// Interleave Low Bytes // Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src); void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src);
// Interleave Low Doublewords
void punpckldq(XMMRegister dst, XMMRegister src);
void punpckldq(XMMRegister dst, Address src);
#ifndef _LP64 // no 32bit push/pop on amd64 #ifndef _LP64 // no 32bit push/pop on amd64
void pushl(Address src); void pushl(Address src);
...@@ -1429,6 +1552,13 @@ private: ...@@ -1429,6 +1552,13 @@ private:
void xchgq(Register reg, Address adr); void xchgq(Register reg, Address adr);
void xchgq(Register dst, Register src); void xchgq(Register dst, Register src);
// Get Value of Extended Control Register
void xgetbv() {
emit_byte(0x0F);
emit_byte(0x01);
emit_byte(0xD0);
}
void xorl(Register dst, int32_t imm32); void xorl(Register dst, int32_t imm32);
void xorl(Register dst, Address src); void xorl(Register dst, Address src);
void xorl(Register dst, Register src); void xorl(Register dst, Register src);
...@@ -1437,14 +1567,21 @@ private: ...@@ -1437,14 +1567,21 @@ private:
void xorq(Register dst, Register src); void xorq(Register dst, Register src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, Address src);
void xorpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, XMMRegister src);
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
void xorps(XMMRegister dst, Address src);
void xorps(XMMRegister dst, XMMRegister src); void xorps(XMMRegister dst, XMMRegister src);
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
void andpd(XMMRegister dst, Address src);
void andps(XMMRegister dst, Address src);
void xorpd(XMMRegister dst, Address src);
void xorps(XMMRegister dst, Address src);
}; };
...@@ -2175,9 +2312,15 @@ class MacroAssembler: public Assembler { ...@@ -2175,9 +2312,15 @@ class MacroAssembler: public Assembler {
void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
void andpd(XMMRegister dst, AddressLiteral src); void andpd(XMMRegister dst, AddressLiteral src);
void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, AddressLiteral src);
void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, AddressLiteral src); void comiss(XMMRegister dst, AddressLiteral src);
void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
void comisd(XMMRegister dst, AddressLiteral src); void comisd(XMMRegister dst, AddressLiteral src);
...@@ -2211,62 +2354,62 @@ private: ...@@ -2211,62 +2354,62 @@ private:
void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
void movss(XMMRegister dst, AddressLiteral src); void movss(XMMRegister dst, AddressLiteral src);
void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
void movlpd(XMMRegister dst, AddressLiteral src); void movlpd(XMMRegister dst, AddressLiteral src);
public: public:
void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); } void addsd(XMMRegister dst, AddressLiteral src);
void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); } void addss(XMMRegister dst, AddressLiteral src);
void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); } void divsd(XMMRegister dst, AddressLiteral src);
void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); } void divss(XMMRegister dst, AddressLiteral src);
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); } void movsd(XMMRegister dst, AddressLiteral src);
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); } void mulsd(XMMRegister dst, AddressLiteral src);
void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); } void mulss(XMMRegister dst, AddressLiteral src);
void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); } void sqrtsd(XMMRegister dst, AddressLiteral src);
void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); } void sqrtss(XMMRegister dst, AddressLiteral src);
void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); } void subsd(XMMRegister dst, AddressLiteral src);
void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); } void subss(XMMRegister dst, AddressLiteral src);
void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, AddressLiteral src); void ucomiss(XMMRegister dst, AddressLiteral src);
void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, AddressLiteral src); void ucomisd(XMMRegister dst, AddressLiteral src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
......
...@@ -86,6 +86,7 @@ inline void Assembler::prefix(Address adr, Register reg, bool byteinst) {} ...@@ -86,6 +86,7 @@ inline void Assembler::prefix(Address adr, Register reg, bool byteinst) {}
inline void Assembler::prefixq(Address adr, Register reg) {} inline void Assembler::prefixq(Address adr, Register reg) {}
inline void Assembler::prefix(Address adr, XMMRegister reg) {} inline void Assembler::prefix(Address adr, XMMRegister reg) {}
inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
#else #else
inline void Assembler::emit_long64(jlong x) { inline void Assembler::emit_long64(jlong x) {
*(jlong*) _code_pos = x; *(jlong*) _code_pos = x;
......
...@@ -237,9 +237,21 @@ int NativeMovRegMem::instruction_start() const { ...@@ -237,9 +237,21 @@ int NativeMovRegMem::instruction_start() const {
int off = 0; int off = 0;
u_char instr_0 = ubyte_at(off); u_char instr_0 = ubyte_at(off);
// See comment in Assembler::locate_operand() about VEX prefixes.
if (instr_0 == instruction_VEX_prefix_2bytes) {
assert((UseAVX > 0), "shouldn't have VEX prefix");
NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
return 2;
}
if (instr_0 == instruction_VEX_prefix_3bytes) {
assert((UseAVX > 0), "shouldn't have VEX prefix");
NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
return 3;
}
// First check to see if we have a (prefixed or not) xor // First check to see if we have a (prefixed or not) xor
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 if (instr_0 >= instruction_prefix_wide_lo && // 0x40
instr_0 <= instruction_prefix_wide_hi) { // 0x4f instr_0 <= instruction_prefix_wide_hi) { // 0x4f
off++; off++;
instr_0 = ubyte_at(off); instr_0 = ubyte_at(off);
} }
...@@ -256,13 +268,13 @@ int NativeMovRegMem::instruction_start() const { ...@@ -256,13 +268,13 @@ int NativeMovRegMem::instruction_start() const {
instr_0 = ubyte_at(off); instr_0 = ubyte_at(off);
} }
if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3 if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2 instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2
off++; off++;
instr_0 = ubyte_at(off); instr_0 = ubyte_at(off);
} }
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
instr_0 <= instruction_prefix_wide_hi) { // 0x4f instr_0 <= instruction_prefix_wide_hi) { // 0x4f
off++; off++;
instr_0 = ubyte_at(off); instr_0 = ubyte_at(off);
......
...@@ -287,6 +287,9 @@ class NativeMovRegMem: public NativeInstruction { ...@@ -287,6 +287,9 @@ class NativeMovRegMem: public NativeInstruction {
instruction_code_xmm_store = 0x11, instruction_code_xmm_store = 0x11,
instruction_code_xmm_lpd = 0x12, instruction_code_xmm_lpd = 0x12,
instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes,
instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes,
instruction_size = 4, instruction_size = 4,
instruction_offset = 0, instruction_offset = 0,
data_offset = 2, data_offset = 2,
......
...@@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r14); ...@@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r14);
REGISTER_DEFINITION(Register, r15); REGISTER_DEFINITION(Register, r15);
#endif // AMD64 #endif // AMD64
REGISTER_DEFINITION(XMMRegister, xnoreg);
REGISTER_DEFINITION(XMMRegister, xmm0 ); REGISTER_DEFINITION(XMMRegister, xmm0 );
REGISTER_DEFINITION(XMMRegister, xmm1 ); REGISTER_DEFINITION(XMMRegister, xmm1 );
REGISTER_DEFINITION(XMMRegister, xmm2 ); REGISTER_DEFINITION(XMMRegister, xmm2 );
...@@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r12_heapbase); ...@@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r12_heapbase);
REGISTER_DEFINITION(Register, r15_thread); REGISTER_DEFINITION(Register, r15_thread);
#endif // AMD64 #endif // AMD64
REGISTER_DEFINITION(MMXRegister, mnoreg );
REGISTER_DEFINITION(MMXRegister, mmx0 ); REGISTER_DEFINITION(MMXRegister, mmx0 );
REGISTER_DEFINITION(MMXRegister, mmx1 ); REGISTER_DEFINITION(MMXRegister, mmx1 );
REGISTER_DEFINITION(MMXRegister, mmx2 ); REGISTER_DEFINITION(MMXRegister, mmx2 );
......
...@@ -50,7 +50,7 @@ const char* VM_Version::_features_str = ""; ...@@ -50,7 +50,7 @@ const char* VM_Version::_features_str = "";
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
static BufferBlob* stub_blob; static BufferBlob* stub_blob;
static const int stub_size = 400; static const int stub_size = 500;
extern "C" { extern "C" {
typedef void (*getPsrInfo_stub_t)(void*); typedef void (*getPsrInfo_stub_t)(void*);
...@@ -73,7 +73,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { ...@@ -73,7 +73,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label ext_cpuid1, ext_cpuid5, done; Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done;
StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
# define __ _masm-> # define __ _masm->
...@@ -229,6 +229,41 @@ class VM_Version_StubGenerator: public StubCodeGenerator { ...@@ -229,6 +229,41 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx); __ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx); __ movl(Address(rsi,12), rdx);
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports AVX
//
__ andl(rcx, 0x18000000);
__ cmpl(rcx, 0x18000000);
__ jccb(Assembler::notEqual, sef_cpuid);
//
// XCR0, XFEATURE_ENABLED_MASK register
//
__ xorl(rcx, rcx); // zero for XCR0 register
__ xgetbv();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rdx);
//
// cpuid(0x7) Structured Extended Features
//
__ bind(sef_cpuid);
__ movl(rax, 7);
__ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
__ jccb(Assembler::greater, ext_cpuid);
__ xorl(rcx, rcx);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
//
// Extended cpuid(0x80000000)
//
__ bind(ext_cpuid);
__ movl(rax, 0x80000000); __ movl(rax, 0x80000000);
__ cpuid(); __ cpuid();
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
...@@ -359,13 +394,19 @@ void VM_Version::get_processor_features() { ...@@ -359,13 +394,19 @@ void VM_Version::get_processor_features() {
if (UseSSE < 1) if (UseSSE < 1)
_cpuFeatures &= ~CPU_SSE; _cpuFeatures &= ~CPU_SSE;
if (UseAVX < 2)
_cpuFeatures &= ~CPU_AVX2;
if (UseAVX < 1)
_cpuFeatures &= ~CPU_AVX;
if (logical_processors_per_package() == 1) { if (logical_processors_per_package() == 1) {
// HT processor could be installed on a system which doesn't support HT. // HT processor could be installed on a system which doesn't support HT.
_cpuFeatures &= ~CPU_HT; _cpuFeatures &= ~CPU_HT;
} }
char buf[256]; char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(), cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping, cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""), (supports_cmov() ? ", cmov" : ""),
...@@ -379,6 +420,8 @@ void VM_Version::get_processor_features() { ...@@ -379,6 +420,8 @@ void VM_Version::get_processor_features() {
(supports_sse4_1() ? ", sse4.1" : ""), (supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""), (supports_sse4_2() ? ", sse4.2" : ""),
(supports_popcnt() ? ", popcnt" : ""), (supports_popcnt() ? ", popcnt" : ""),
(supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""),
(supports_mmx_ext() ? ", mmxext" : ""), (supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""), (supports_lzcnt() ? ", lzcnt": ""),
...@@ -389,17 +432,24 @@ void VM_Version::get_processor_features() { ...@@ -389,17 +432,24 @@ void VM_Version::get_processor_features() {
// UseSSE is set to the smaller of what hardware supports and what // UseSSE is set to the smaller of what hardware supports and what
// the command line requires. I.e., you cannot set UseSSE to 2 on // the command line requires. I.e., you cannot set UseSSE to 2 on
// older Pentiums which do not support it. // older Pentiums which do not support it.
if( UseSSE > 4 ) UseSSE=4; if (UseSSE > 4) UseSSE=4;
if( UseSSE < 0 ) UseSSE=0; if (UseSSE < 0) UseSSE=0;
if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
UseSSE = MIN2((intx)3,UseSSE); UseSSE = MIN2((intx)3,UseSSE);
if( !supports_sse3() ) // Drop to 2 if no SSE3 support if (!supports_sse3()) // Drop to 2 if no SSE3 support
UseSSE = MIN2((intx)2,UseSSE); UseSSE = MIN2((intx)2,UseSSE);
if( !supports_sse2() ) // Drop to 1 if no SSE2 support if (!supports_sse2()) // Drop to 1 if no SSE2 support
UseSSE = MIN2((intx)1,UseSSE); UseSSE = MIN2((intx)1,UseSSE);
if( !supports_sse () ) // Drop to 0 if no SSE support if (!supports_sse ()) // Drop to 0 if no SSE support
UseSSE = 0; UseSSE = 0;
if (UseAVX > 2) UseAVX=2;
if (UseAVX < 0) UseAVX=0;
if (!supports_avx2()) // Drop to 1 if no AVX2 support
UseAVX = MIN2((intx)1,UseAVX);
if (!supports_avx ()) // Drop to 0 if no AVX support
UseAVX = 0;
// On new cpus instructions which update whole XMM register should be used // On new cpus instructions which update whole XMM register should be used
// to prevent partial register stall due to dependencies on high half. // to prevent partial register stall due to dependencies on high half.
// //
...@@ -534,6 +584,9 @@ void VM_Version::get_processor_features() { ...@@ -534,6 +584,9 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
UsePopCountInstruction = true; UsePopCountInstruction = true;
} }
} else if (UsePopCountInstruction) {
warning("POPCNT instruction is not available on this CPU");
FLAG_SET_DEFAULT(UsePopCountInstruction, false);
} }
#ifdef COMPILER2 #ifdef COMPILER2
...@@ -605,7 +658,11 @@ void VM_Version::get_processor_features() { ...@@ -605,7 +658,11 @@ void VM_Version::get_processor_features() {
if (PrintMiscellaneous && Verbose) { if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per core: %u", tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package()); logical_processors_per_package());
tty->print_cr("UseSSE=%d",UseSSE); tty->print("UseSSE=%d",UseSSE);
if (UseAVX > 0) {
tty->print(" UseAVX=%d",UseAVX);
}
tty->cr();
tty->print("Allocation"); tty->print("Allocation");
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
tty->print_cr(": no prefetching"); tty->print_cr(": no prefetching");
......
...@@ -78,7 +78,10 @@ public: ...@@ -78,7 +78,10 @@ public:
sse4_2 : 1, sse4_2 : 1,
: 2, : 2,
popcnt : 1, popcnt : 1,
: 8; : 3,
osxsave : 1,
avx : 1,
: 3;
} bits; } bits;
}; };
...@@ -176,6 +179,34 @@ public: ...@@ -176,6 +179,34 @@ public:
} bits; } bits;
}; };
union SefCpuid7Eax {
uint32_t value;
};
union SefCpuid7Ebx {
uint32_t value;
struct {
uint32_t fsgsbase : 1,
: 2,
bmi1 : 1,
: 1,
avx2 : 1,
: 2,
bmi2 : 1,
: 23;
} bits;
};
union XemXcr0Eax {
uint32_t value;
struct {
uint32_t x87 : 1,
sse : 1,
ymm : 1,
: 29;
} bits;
};
protected: protected:
static int _cpu; static int _cpu;
static int _model; static int _model;
...@@ -200,7 +231,9 @@ protected: ...@@ -200,7 +231,9 @@ protected:
CPU_SSE4_1 = (1 << 11), CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12), CPU_SSE4_2 = (1 << 12),
CPU_POPCNT = (1 << 13), CPU_POPCNT = (1 << 13),
CPU_LZCNT = (1 << 14) CPU_LZCNT = (1 << 14),
CPU_AVX = (1 << 15),
CPU_AVX2 = (1 << 16)
} cpuFeatureFlags; } cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with // cpuid information block. All info derived from executing cpuid with
...@@ -228,6 +261,12 @@ protected: ...@@ -228,6 +261,12 @@ protected:
uint32_t dcp_cpuid4_ecx; // unused currently uint32_t dcp_cpuid4_ecx; // unused currently
uint32_t dcp_cpuid4_edx; // unused currently uint32_t dcp_cpuid4_edx; // unused currently
// cpuid function 7 (structured extended features)
SefCpuid7Eax sef_cpuid7_eax;
SefCpuid7Ebx sef_cpuid7_ebx;
uint32_t sef_cpuid7_ecx; // unused currently
uint32_t sef_cpuid7_edx; // unused currently
// cpuid function 0xB (processor topology) // cpuid function 0xB (processor topology)
// ecx = 0 // ecx = 0
uint32_t tpl_cpuidB0_eax; uint32_t tpl_cpuidB0_eax;
...@@ -275,6 +314,10 @@ protected: ...@@ -275,6 +314,10 @@ protected:
uint32_t ext_cpuid8_ebx; // reserved uint32_t ext_cpuid8_ebx; // reserved
ExtCpuid8Ecx ext_cpuid8_ecx; ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved uint32_t ext_cpuid8_edx; // reserved
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
XemXcr0Eax xem_xcr0_eax;
uint32_t xem_xcr0_edx; // reserved
}; };
// The actual cpuid info block // The actual cpuid info block
...@@ -328,6 +371,14 @@ protected: ...@@ -328,6 +371,14 @@ protected:
result |= CPU_SSE4_2; result |= CPU_SSE4_2;
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT; result |= CPU_POPCNT;
if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
_cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
_cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
result |= CPU_AVX;
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
result |= CPU_AVX2;
}
// AMD features. // AMD features.
if (is_amd()) { if (is_amd()) {
...@@ -350,12 +401,14 @@ public: ...@@ -350,12 +401,14 @@ public:
static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
// Initialization // Initialization
static void initialize(); static void initialize();
...@@ -447,6 +500,8 @@ public: ...@@ -447,6 +500,8 @@ public:
static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; }
static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
// //
// AMD features // AMD features
// //
......
此差异已折叠。
此差异已折叠。
...@@ -525,6 +525,9 @@ class CommandLineFlags { ...@@ -525,6 +525,9 @@ class CommandLineFlags {
product(intx, UseSSE, 99, \ product(intx, UseSSE, 99, \
"Highest supported SSE instructions set on x86/x64") \ "Highest supported SSE instructions set on x86/x64") \
\ \
product(intx, UseAVX, 99, \
"Highest supported AVX instructions set on x86/x64") \
\
product(intx, UseVIS, 99, \ product(intx, UseVIS, 99, \
"Highest supported VIS instructions set on Sparc") \ "Highest supported VIS instructions set on Sparc") \
\ \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册