提交 41479e87 编写于 作者: K kvn

7116452: Add support for AVX instructions

Summary: Added support for AVX extension to the x86 instruction set.
Reviewed-by: never
上级 8c079e48
...@@ -533,6 +533,19 @@ address Assembler::locate_operand(address inst, WhichOperand which) { ...@@ -533,6 +533,19 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case 0x0F: // movx..., etc. case 0x0F: // movx..., etc.
switch (0xFF & *ip++) { switch (0xFF & *ip++) {
case 0x3A: // pcmpestri
tail_size = 1;
case 0x38: // ptest, pmovzxbw
ip++; // skip opcode
debug_only(has_disp32 = true); // has both kinds of operands!
break;
case 0x70: // pshufd r, r/a, #8
debug_only(has_disp32 = true); // has both kinds of operands!
case 0x73: // psrldq r, #8
tail_size = 1;
break;
case 0x12: // movlps case 0x12: // movlps
case 0x28: // movaps case 0x28: // movaps
case 0x2E: // ucomiss case 0x2E: // ucomiss
...@@ -543,9 +556,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { ...@@ -543,9 +556,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case 0x57: // xorps case 0x57: // xorps
case 0x6E: // movd case 0x6E: // movd
case 0x7E: // movd case 0x7E: // movd
case 0xAE: // ldmxcsr a case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
// 64bit side says it these have both operands but that doesn't
// appear to be true
debug_only(has_disp32 = true); debug_only(has_disp32 = true);
break; break;
...@@ -565,6 +576,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) { ...@@ -565,6 +576,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
// fall out of the switch to decode the address // fall out of the switch to decode the address
break; break;
case 0xC4: // pinsrw r, a, #8
debug_only(has_disp32 = true);
case 0xC5: // pextrw r, r, #8
tail_size = 1; // the imm8
break;
case 0xAC: // shrd r, a, #8 case 0xAC: // shrd r, a, #8
debug_only(has_disp32 = true); debug_only(has_disp32 = true);
tail_size = 1; // the imm8 tail_size = 1; // the imm8
...@@ -625,11 +642,44 @@ address Assembler::locate_operand(address inst, WhichOperand which) { ...@@ -625,11 +642,44 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
tail_size = 1; // the imm8 tail_size = 1; // the imm8
break; break;
case 0xE8: // call rdisp32 case 0xC4: // VEX_3bytes
case 0xE9: // jmp rdisp32 case 0xC5: // VEX_2bytes
if (which == end_pc_operand) return ip + 4; assert((UseAVX > 0), "shouldn't have VEX prefix");
assert(which == call32_operand, "call has no disp32 or imm"); assert(ip == inst+1, "no prefixes allowed");
return ip; // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
// but they have prefix 0x0F and processed when 0x0F processed above.
//
// In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
// instructions (these instructions are not supported in 64-bit mode).
// To distinguish them bits [7:6] are set in the VEX second byte since
// ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
// those VEX bits REX and vvvv bits are inverted.
//
// Fortunately C2 doesn't generate these instructions so we don't need
// to check for them in product version.
// Check second byte
NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
// First byte
if ((0xFF & *inst) == VEX_3bytes) {
ip++; // third byte
is_64bit = ((VEX_W & *ip) == VEX_W);
}
ip++; // opcode
// To find the end of instruction (which == end_pc_operand).
switch (0xFF & *ip) {
case 0x61: // pcmpestri r, r/a, #8
case 0x70: // pshufd r, r/a, #8
case 0x73: // psrldq r, #8
tail_size = 1; // the imm8
break;
default:
break;
}
ip++; // skip opcode
debug_only(has_disp32 = true); // has both kinds of operands!
break;
case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
...@@ -643,6 +693,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) { ...@@ -643,6 +693,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
debug_only(has_disp32 = true); debug_only(has_disp32 = true);
break; break;
case 0xE8: // call rdisp32
case 0xE9: // jmp rdisp32
if (which == end_pc_operand) return ip + 4;
assert(which == call32_operand, "call has no disp32 or imm");
return ip;
case 0xF0: // Lock case 0xF0: // Lock
assert(os::is_MP(), "only on MP"); assert(os::is_MP(), "only on MP");
goto again_after_prefix; goto again_after_prefix;
...@@ -918,9 +974,7 @@ void Assembler::addr_nop_8() { ...@@ -918,9 +974,7 @@ void Assembler::addr_nop_8() {
void Assembler::addsd(XMMRegister dst, XMMRegister src) { void Assembler::addsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x58); emit_byte(0x58);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -928,18 +982,14 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { ...@@ -928,18 +982,14 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) {
void Assembler::addsd(XMMRegister dst, Address src) { void Assembler::addsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF2); simd_prefix(dst, dst, src, VEX_SIMD_F2);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x58); emit_byte(0x58);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::addss(XMMRegister dst, XMMRegister src) { void Assembler::addss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x58); emit_byte(0x58);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -947,13 +997,19 @@ void Assembler::addss(XMMRegister dst, XMMRegister src) { ...@@ -947,13 +997,19 @@ void Assembler::addss(XMMRegister dst, XMMRegister src) {
void Assembler::addss(XMMRegister dst, Address src) { void Assembler::addss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, dst, src, VEX_SIMD_F3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x58); emit_byte(0x58);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::andl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
emit_byte(0x81);
emit_operand(rsp, dst, 4);
emit_long(imm32);
}
void Assembler::andl(Register dst, int32_t imm32) { void Assembler::andl(Register dst, int32_t imm32) {
prefix(dst); prefix(dst);
emit_arith(0x81, 0xE0, dst, imm32); emit_arith(0x81, 0xE0, dst, imm32);
...@@ -974,13 +1030,33 @@ void Assembler::andl(Register dst, Register src) { ...@@ -974,13 +1030,33 @@ void Assembler::andl(Register dst, Register src) {
void Assembler::andpd(XMMRegister dst, Address src) { void Assembler::andpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, dst, src, VEX_SIMD_66);
prefix(src, dst); emit_byte(0x54);
emit_byte(0x0F); emit_operand(dst, src);
}
void Assembler::andpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
emit_byte(0x54);
emit_byte(0xC0 | encode);
}
void Assembler::andps(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_NONE);
emit_byte(0x54); emit_byte(0x54);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::andps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
emit_byte(0x54);
emit_byte(0xC0 | encode);
}
void Assembler::bsfl(Register dst, Register src) { void Assembler::bsfl(Register dst, Register src) {
int encode = prefix_and_encode(dst->encoding(), src->encoding()); int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F); emit_byte(0x0F);
...@@ -1025,19 +1101,7 @@ void Assembler::call(Label& L, relocInfo::relocType rtype) { ...@@ -1025,19 +1101,7 @@ void Assembler::call(Label& L, relocInfo::relocType rtype) {
} }
void Assembler::call(Register dst) { void Assembler::call(Register dst) {
// This was originally using a 32bit register encoding int encode = prefix_and_encode(dst->encoding());
// and surely we want 64bit!
// this is a 32bit encoding but in 64bit mode the default
// operand size is 64bit so there is no need for the
// wide prefix. So prefix only happens if we use the
// new registers. Much like push/pop.
int x = offset();
// this may be true but dbx disassembles it as if it
// were 32bits...
// int encode = prefix_and_encode(dst->encoding());
// if (offset() != x) assert(dst->encoding() >= 8, "what?");
int encode = prefixq_and_encode(dst->encoding());
emit_byte(0xFF); emit_byte(0xFF);
emit_byte(0xD0 | encode); emit_byte(0xD0 | encode);
} }
...@@ -1157,87 +1221,119 @@ void Assembler::comisd(XMMRegister dst, Address src) { ...@@ -1157,87 +1221,119 @@ void Assembler::comisd(XMMRegister dst, Address src) {
// NOTE: dbx seems to decode this as comiss even though the // NOTE: dbx seems to decode this as comiss even though the
// 0x66 is there. Strangly ucomisd comes out correct // 0x66 is there. Strangly ucomisd comes out correct
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66); InstructionMark im(this);
comiss(dst, src); simd_prefix(dst, src, VEX_SIMD_66);
emit_byte(0x2F);
emit_operand(dst, src);
}
void Assembler::comisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
emit_byte(0x2F);
emit_byte(0xC0 | encode);
} }
void Assembler::comiss(XMMRegister dst, Address src) { void Assembler::comiss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
prefix(src, dst); simd_prefix(dst, src, VEX_SIMD_NONE);
emit_byte(0x0F);
emit_byte(0x2F); emit_byte(0x2F);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::comiss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
emit_byte(0x2F);
emit_byte(0xC0 | encode);
}
void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xE6); emit_byte(0xE6);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = prefix_and_encode(dst->encoding(), src->encoding()); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
emit_byte(0x0F);
emit_byte(0x5B); emit_byte(0x5B);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x5A); emit_byte(0x5A);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x5A);
emit_operand(dst, src);
}
void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x2A); emit_byte(0x2A);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x2A);
emit_operand(dst, src);
}
void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x2A); emit_byte(0x2A);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x2A);
emit_operand(dst, src);
}
void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x5A); emit_byte(0x5A);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvtss2sd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x5A);
emit_operand(dst, src);
}
void Assembler::cvttsd2sil(Register dst, XMMRegister src) { void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x2C); emit_byte(0x2C);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvttss2sil(Register dst, XMMRegister src) { void Assembler::cvttss2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x2C); emit_byte(0x2C);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -1253,18 +1349,14 @@ void Assembler::decl(Address dst) { ...@@ -1253,18 +1349,14 @@ void Assembler::decl(Address dst) {
void Assembler::divsd(XMMRegister dst, Address src) { void Assembler::divsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF2); simd_prefix(dst, dst, src, VEX_SIMD_F2);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x5E); emit_byte(0x5E);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::divsd(XMMRegister dst, XMMRegister src) { void Assembler::divsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x5E); emit_byte(0x5E);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -1272,18 +1364,14 @@ void Assembler::divsd(XMMRegister dst, XMMRegister src) { ...@@ -1272,18 +1364,14 @@ void Assembler::divsd(XMMRegister dst, XMMRegister src) {
void Assembler::divss(XMMRegister dst, Address src) { void Assembler::divss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, dst, src, VEX_SIMD_F3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x5E); emit_byte(0x5E);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::divss(XMMRegister dst, XMMRegister src) { void Assembler::divss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x5E); emit_byte(0x5E);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -1509,49 +1597,16 @@ void Assembler::mov(Register dst, Register src) { ...@@ -1509,49 +1597,16 @@ void Assembler::mov(Register dst, Register src) {
void Assembler::movapd(XMMRegister dst, XMMRegister src) { void Assembler::movapd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int dstenc = dst->encoding(); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
int srcenc = src->encoding();
emit_byte(0x66);
if (dstenc < 8) {
if (srcenc >= 8) {
prefix(REX_B);
srcenc -= 8;
}
} else {
if (srcenc < 8) {
prefix(REX_R);
} else {
prefix(REX_RB);
srcenc -= 8;
}
dstenc -= 8;
}
emit_byte(0x0F);
emit_byte(0x28); emit_byte(0x28);
emit_byte(0xC0 | dstenc << 3 | srcenc); emit_byte(0xC0 | encode);
} }
void Assembler::movaps(XMMRegister dst, XMMRegister src) { void Assembler::movaps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
int dstenc = dst->encoding(); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
int srcenc = src->encoding();
if (dstenc < 8) {
if (srcenc >= 8) {
prefix(REX_B);
srcenc -= 8;
}
} else {
if (srcenc < 8) {
prefix(REX_R);
} else {
prefix(REX_RB);
srcenc -= 8;
}
dstenc -= 8;
}
emit_byte(0x0F);
emit_byte(0x28); emit_byte(0x28);
emit_byte(0xC0 | dstenc << 3 | srcenc); emit_byte(0xC0 | encode);
} }
void Assembler::movb(Register dst, Address src) { void Assembler::movb(Register dst, Address src) {
...@@ -1582,19 +1637,15 @@ void Assembler::movb(Address dst, Register src) { ...@@ -1582,19 +1637,15 @@ void Assembler::movb(Address dst, Register src) {
void Assembler::movdl(XMMRegister dst, Register src) { void Assembler::movdl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x6E); emit_byte(0x6E);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::movdl(Register dst, XMMRegister src) { void Assembler::movdl(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66);
// swap src/dst to get correct prefix // swap src/dst to get correct prefix
int encode = prefix_and_encode(src->encoding(), dst->encoding()); int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
emit_byte(0x0F);
emit_byte(0x7E); emit_byte(0x7E);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -1602,58 +1653,29 @@ void Assembler::movdl(Register dst, XMMRegister src) { ...@@ -1602,58 +1653,29 @@ void Assembler::movdl(Register dst, XMMRegister src) {
void Assembler::movdl(XMMRegister dst, Address src) { void Assembler::movdl(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, src, VEX_SIMD_66);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x6E); emit_byte(0x6E);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::movdqa(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
emit_byte(0x66);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x6F);
emit_operand(dst, src);
}
void Assembler::movdqa(XMMRegister dst, XMMRegister src) { void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x6F); emit_byte(0x6F);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::movdqa(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
emit_byte(0x66);
prefix(dst, src);
emit_byte(0x0F);
emit_byte(0x7F);
emit_operand(src, dst);
}
void Assembler::movdqu(XMMRegister dst, Address src) { void Assembler::movdqu(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, src, VEX_SIMD_F3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x6F); emit_byte(0x6F);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::movdqu(XMMRegister dst, XMMRegister src) { void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x6F); emit_byte(0x6F);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -1661,9 +1683,7 @@ void Assembler::movdqu(XMMRegister dst, XMMRegister src) { ...@@ -1661,9 +1683,7 @@ void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
void Assembler::movdqu(Address dst, XMMRegister src) { void Assembler::movdqu(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, src, VEX_SIMD_F3);
prefix(dst, src);
emit_byte(0x0F);
emit_byte(0x7F); emit_byte(0x7F);
emit_operand(src, dst); emit_operand(src, dst);
} }
...@@ -1710,9 +1730,7 @@ void Assembler::movl(Address dst, Register src) { ...@@ -1710,9 +1730,7 @@ void Assembler::movl(Address dst, Register src) {
void Assembler::movlpd(XMMRegister dst, Address src) { void Assembler::movlpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, dst, src, VEX_SIMD_66);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x12); emit_byte(0x12);
emit_operand(dst, src); emit_operand(dst, src);
} }
...@@ -1740,9 +1758,7 @@ void Assembler::movq( Address dst, MMXRegister src ) { ...@@ -1740,9 +1758,7 @@ void Assembler::movq( Address dst, MMXRegister src ) {
void Assembler::movq(XMMRegister dst, Address src) { void Assembler::movq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, src, VEX_SIMD_F3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x7E); emit_byte(0x7E);
emit_operand(dst, src); emit_operand(dst, src);
} }
...@@ -1750,9 +1766,7 @@ void Assembler::movq(XMMRegister dst, Address src) { ...@@ -1750,9 +1766,7 @@ void Assembler::movq(XMMRegister dst, Address src) {
void Assembler::movq(Address dst, XMMRegister src) { void Assembler::movq(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, src, VEX_SIMD_66);
prefix(dst, src);
emit_byte(0x0F);
emit_byte(0xD6); emit_byte(0xD6);
emit_operand(src, dst); emit_operand(src, dst);
} }
...@@ -1775,9 +1789,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb ...@@ -1775,9 +1789,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb
void Assembler::movsd(XMMRegister dst, XMMRegister src) { void Assembler::movsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x10); emit_byte(0x10);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -1785,9 +1797,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { ...@@ -1785,9 +1797,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) {
void Assembler::movsd(XMMRegister dst, Address src) { void Assembler::movsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF2); simd_prefix(dst, src, VEX_SIMD_F2);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x10); emit_byte(0x10);
emit_operand(dst, src); emit_operand(dst, src);
} }
...@@ -1795,18 +1805,14 @@ void Assembler::movsd(XMMRegister dst, Address src) { ...@@ -1795,18 +1805,14 @@ void Assembler::movsd(XMMRegister dst, Address src) {
void Assembler::movsd(Address dst, XMMRegister src) { void Assembler::movsd(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF2); simd_prefix(dst, src, VEX_SIMD_F2);
prefix(dst, src);
emit_byte(0x0F);
emit_byte(0x11); emit_byte(0x11);
emit_operand(src, dst); emit_operand(src, dst);
} }
void Assembler::movss(XMMRegister dst, XMMRegister src) { void Assembler::movss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x10); emit_byte(0x10);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -1814,9 +1820,7 @@ void Assembler::movss(XMMRegister dst, XMMRegister src) { ...@@ -1814,9 +1820,7 @@ void Assembler::movss(XMMRegister dst, XMMRegister src) {
void Assembler::movss(XMMRegister dst, Address src) { void Assembler::movss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, src, VEX_SIMD_F3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x10); emit_byte(0x10);
emit_operand(dst, src); emit_operand(dst, src);
} }
...@@ -1824,9 +1828,7 @@ void Assembler::movss(XMMRegister dst, Address src) { ...@@ -1824,9 +1828,7 @@ void Assembler::movss(XMMRegister dst, Address src) {
void Assembler::movss(Address dst, XMMRegister src) { void Assembler::movss(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, src, VEX_SIMD_F3);
prefix(dst, src);
emit_byte(0x0F);
emit_byte(0x11); emit_byte(0x11);
emit_operand(src, dst); emit_operand(src, dst);
} }
...@@ -1919,18 +1921,14 @@ void Assembler::mull(Register src) { ...@@ -1919,18 +1921,14 @@ void Assembler::mull(Register src) {
void Assembler::mulsd(XMMRegister dst, Address src) { void Assembler::mulsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF2); simd_prefix(dst, dst, src, VEX_SIMD_F2);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x59); emit_byte(0x59);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::mulsd(XMMRegister dst, XMMRegister src) { void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x59); emit_byte(0x59);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -1938,18 +1936,14 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) { ...@@ -1938,18 +1936,14 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
void Assembler::mulss(XMMRegister dst, Address src) { void Assembler::mulss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, dst, src, VEX_SIMD_F3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x59); emit_byte(0x59);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::mulss(XMMRegister dst, XMMRegister src) { void Assembler::mulss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x59); emit_byte(0x59);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -2237,14 +2231,26 @@ void Assembler::orl(Register dst, Register src) { ...@@ -2237,14 +2231,26 @@ void Assembler::orl(Register dst, Register src) {
emit_arith(0x0B, 0xC0, dst, src); emit_arith(0x0B, 0xC0, dst, src);
} }
void Assembler::packuswb(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66);
emit_byte(0x67);
emit_operand(dst, src);
}
void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
emit_byte(0x67);
emit_byte(0xC0 | encode);
}
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_2(), ""); assert(VM_Version::supports_sse4_2(), "");
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x3A);
emit_byte(0x61); emit_byte(0x61);
emit_operand(dst, src); emit_operand(dst, src);
emit_byte(imm8); emit_byte(imm8);
...@@ -2252,16 +2258,27 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { ...@@ -2252,16 +2258,27 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_2(), ""); assert(VM_Version::supports_sse4_2(), "");
int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
emit_byte(0x66);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x3A);
emit_byte(0x61); emit_byte(0x61);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
emit_byte(imm8); emit_byte(imm8);
} }
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x30);
emit_operand(dst, src);
}
void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x30);
emit_byte(0xC0 | encode);
}
// generic // generic
void Assembler::pop(Register dst) { void Assembler::pop(Register dst) {
int encode = prefix_and_encode(dst->encoding()); int encode = prefix_and_encode(dst->encoding());
...@@ -2360,22 +2377,24 @@ void Assembler::prefix(Prefix p) { ...@@ -2360,22 +2377,24 @@ void Assembler::prefix(Prefix p) {
void Assembler::por(XMMRegister dst, XMMRegister src) { void Assembler::por(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
emit_byte(0x66);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xEB); emit_byte(0xEB);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::por(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66);
emit_byte(0xEB);
emit_operand(dst, src);
}
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value"); assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
emit_byte(0x66);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x70); emit_byte(0x70);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
emit_byte(mode & 0xFF); emit_byte(mode & 0xFF);
...@@ -2385,11 +2404,9 @@ void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { ...@@ -2385,11 +2404,9 @@ void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
void Assembler::pshufd(XMMRegister dst, Address src, int mode) { void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
assert(isByte(mode), "invalid value"); assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, src, VEX_SIMD_66);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x70); emit_byte(0x70);
emit_operand(dst, src); emit_operand(dst, src);
emit_byte(mode & 0xFF); emit_byte(mode & 0xFF);
...@@ -2398,10 +2415,7 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) { ...@@ -2398,10 +2415,7 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value"); assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
emit_byte(0xF2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x70); emit_byte(0x70);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
emit_byte(mode & 0xFF); emit_byte(mode & 0xFF);
...@@ -2410,11 +2424,9 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { ...@@ -2410,11 +2424,9 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
assert(isByte(mode), "invalid value"); assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF2); simd_prefix(dst, src, VEX_SIMD_F2);
prefix(src, dst); // QQ new
emit_byte(0x0F);
emit_byte(0x70); emit_byte(0x70);
emit_operand(dst, src); emit_operand(dst, src);
emit_byte(mode & 0xFF); emit_byte(mode & 0xFF);
...@@ -2425,11 +2437,8 @@ void Assembler::psrlq(XMMRegister dst, int shift) { ...@@ -2425,11 +2437,8 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
// HMM Table D-1 says sse2 or mmx. // HMM Table D-1 says sse2 or mmx.
// Do not confuse it with psrldq SSE2 instruction which // Do not confuse it with psrldq SSE2 instruction which
// shifts 128 bit value in xmm register by number of bytes. // shifts 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
emit_byte(0x66);
emit_byte(0x0F);
emit_byte(0x73); emit_byte(0x73);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
emit_byte(shift); emit_byte(shift);
...@@ -2438,10 +2447,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) { ...@@ -2438,10 +2447,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
void Assembler::psrldq(XMMRegister dst, int shift) { void Assembler::psrldq(XMMRegister dst, int shift) {
// Shift 128 bit value in xmm register by number of bytes. // Shift 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
emit_byte(0x66);
emit_byte(0x0F);
emit_byte(0x73); emit_byte(0x73);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
emit_byte(shift); emit_byte(shift);
...@@ -2449,36 +2455,52 @@ void Assembler::psrldq(XMMRegister dst, int shift) { ...@@ -2449,36 +2455,52 @@ void Assembler::psrldq(XMMRegister dst, int shift) {
void Assembler::ptest(XMMRegister dst, Address src) { void Assembler::ptest(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), ""); assert(VM_Version::supports_sse4_1(), "");
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x38);
emit_byte(0x17); emit_byte(0x17);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::ptest(XMMRegister dst, XMMRegister src) { void Assembler::ptest(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), ""); assert(VM_Version::supports_sse4_1(), "");
int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x66);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x38);
emit_byte(0x17); emit_byte(0x17);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::punpcklbw(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66);
emit_byte(0x60);
emit_operand(dst, src);
}
void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x60); emit_byte(0x60);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::punpckldq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
simd_prefix(dst, dst, src, VEX_SIMD_66);
emit_byte(0x62);
emit_operand(dst, src);
}
void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
emit_byte(0x62);
emit_byte(0xC0 | encode);
}
void Assembler::push(int32_t imm32) { void Assembler::push(int32_t imm32) {
// in 64bits we push 64bits onto the stack but only // in 64bits we push 64bits onto the stack but only
// take a 32bit immediate // take a 32bit immediate
...@@ -2508,20 +2530,16 @@ void Assembler::pushl(Address src) { ...@@ -2508,20 +2530,16 @@ void Assembler::pushl(Address src) {
void Assembler::pxor(XMMRegister dst, Address src) { void Assembler::pxor(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, dst, src, VEX_SIMD_66);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0xEF); emit_byte(0xEF);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::pxor(XMMRegister dst, XMMRegister src) { void Assembler::pxor(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
emit_byte(0x66);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xEF); emit_byte(0xEF);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -2683,12 +2701,8 @@ void Assembler::smovl() { ...@@ -2683,12 +2701,8 @@ void Assembler::smovl() {
} }
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
// HMM Table D-1 says sse2
// NOT_LP64(assert(VM_Version::supports_sse(), ""));
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x51); emit_byte(0x51);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -2696,30 +2710,22 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { ...@@ -2696,30 +2710,22 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
void Assembler::sqrtsd(XMMRegister dst, Address src) { void Assembler::sqrtsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF2); simd_prefix(dst, dst, src, VEX_SIMD_F2);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x51); emit_byte(0x51);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
// HMM Table D-1 says sse2 NOT_LP64(assert(VM_Version::supports_sse(), ""));
// NOT_LP64(assert(VM_Version::supports_sse(), "")); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x51); emit_byte(0x51);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::sqrtss(XMMRegister dst, Address src) { void Assembler::sqrtss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, dst, src, VEX_SIMD_F3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x51); emit_byte(0x51);
emit_operand(dst, src); emit_operand(dst, src);
} }
...@@ -2765,9 +2771,7 @@ void Assembler::subl(Register dst, Register src) { ...@@ -2765,9 +2771,7 @@ void Assembler::subl(Register dst, Register src) {
void Assembler::subsd(XMMRegister dst, XMMRegister src) { void Assembler::subsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x5C); emit_byte(0x5C);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -2775,18 +2779,14 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { ...@@ -2775,18 +2779,14 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) {
void Assembler::subsd(XMMRegister dst, Address src) { void Assembler::subsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF2); simd_prefix(dst, dst, src, VEX_SIMD_F2);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x5C); emit_byte(0x5C);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::subss(XMMRegister dst, XMMRegister src) { void Assembler::subss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x5C); emit_byte(0x5C);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -2794,9 +2794,7 @@ void Assembler::subss(XMMRegister dst, XMMRegister src) { ...@@ -2794,9 +2794,7 @@ void Assembler::subss(XMMRegister dst, XMMRegister src) {
void Assembler::subss(XMMRegister dst, Address src) { void Assembler::subss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0xF3); simd_prefix(dst, dst, src, VEX_SIMD_F3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x5C); emit_byte(0x5C);
emit_operand(dst, src); emit_operand(dst, src);
} }
...@@ -2836,30 +2834,30 @@ void Assembler::testl(Register dst, Address src) { ...@@ -2836,30 +2834,30 @@ void Assembler::testl(Register dst, Address src) {
void Assembler::ucomisd(XMMRegister dst, Address src) { void Assembler::ucomisd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66); InstructionMark im(this);
ucomiss(dst, src); simd_prefix(dst, src, VEX_SIMD_66);
emit_byte(0x2E);
emit_operand(dst, src);
} }
void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
ucomiss(dst, src); emit_byte(0x2E);
emit_byte(0xC0 | encode);
} }
void Assembler::ucomiss(XMMRegister dst, Address src) { void Assembler::ucomiss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
prefix(src, dst); simd_prefix(dst, src, VEX_SIMD_NONE);
emit_byte(0x0F);
emit_byte(0x2E); emit_byte(0x2E);
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
int encode = prefix_and_encode(dst->encoding(), src->encoding()); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
emit_byte(0x0F);
emit_byte(0x2E); emit_byte(0x2E);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -2905,16 +2903,15 @@ void Assembler::xorl(Register dst, Register src) { ...@@ -2905,16 +2903,15 @@ void Assembler::xorl(Register dst, Register src) {
void Assembler::xorpd(XMMRegister dst, XMMRegister src) { void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
xorps(dst, src); emit_byte(0x57);
emit_byte(0xC0 | encode);
} }
void Assembler::xorpd(XMMRegister dst, Address src) { void Assembler::xorpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
emit_byte(0x66); simd_prefix(dst, dst, src, VEX_SIMD_66);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x57); emit_byte(0x57);
emit_operand(dst, src); emit_operand(dst, src);
} }
...@@ -2922,8 +2919,7 @@ void Assembler::xorpd(XMMRegister dst, Address src) { ...@@ -2922,8 +2919,7 @@ void Assembler::xorpd(XMMRegister dst, Address src) {
void Assembler::xorps(XMMRegister dst, XMMRegister src) { void Assembler::xorps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
int encode = prefix_and_encode(dst->encoding(), src->encoding()); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
emit_byte(0x0F);
emit_byte(0x57); emit_byte(0x57);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -2931,8 +2927,7 @@ void Assembler::xorps(XMMRegister dst, XMMRegister src) { ...@@ -2931,8 +2927,7 @@ void Assembler::xorps(XMMRegister dst, XMMRegister src) {
void Assembler::xorps(XMMRegister dst, Address src) { void Assembler::xorps(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
prefix(src, dst); simd_prefix(dst, dst, src, VEX_SIMD_NONE);
emit_byte(0x0F);
emit_byte(0x57); emit_byte(0x57);
emit_operand(dst, src); emit_operand(dst, src);
} }
...@@ -3394,6 +3389,108 @@ void Assembler::fyl2x() { ...@@ -3394,6 +3389,108 @@ void Assembler::fyl2x() {
emit_byte(0xF1); emit_byte(0xF1);
} }
// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
static int simd_opc[4] = { 0, 0, 0x38, 0x3A };
// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
if (pre > 0) {
emit_byte(simd_pre[pre]);
}
if (rex_w) {
prefixq(adr, xreg);
} else {
prefix(adr, xreg);
}
if (opc > 0) {
emit_byte(0x0F);
int opc2 = simd_opc[opc];
if (opc2 > 0) {
emit_byte(opc2);
}
}
}
int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
if (pre > 0) {
emit_byte(simd_pre[pre]);
}
int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
prefix_and_encode(dst_enc, src_enc);
if (opc > 0) {
emit_byte(0x0F);
int opc2 = simd_opc[opc];
if (opc2 > 0) {
emit_byte(opc2);
}
}
return encode;
}
void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
prefix(VEX_3bytes);
int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
byte1 = (~byte1) & 0xE0;
byte1 |= opc;
a_byte(byte1);
int byte2 = ((~nds_enc) & 0xf) << 3;
byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
emit_byte(byte2);
} else {
prefix(VEX_2bytes);
int byte1 = vex_r ? VEX_R : 0;
byte1 = (~byte1) & 0x80;
byte1 |= ((~nds_enc) & 0xf) << 3;
byte1 |= (vector256 ? 4 : 0) | pre;
emit_byte(byte1);
}
}
void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
bool vex_r = (xreg_enc >= 8);
bool vex_b = adr.base_needs_rex();
bool vex_x = adr.index_needs_rex();
vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
}
int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
bool vex_r = (dst_enc >= 8);
bool vex_b = (src_enc >= 8);
bool vex_x = false;
vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
return (((dst_enc & 7) << 3) | (src_enc & 7));
}
void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
if (UseAVX > 0) {
int xreg_enc = xreg->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
} else {
assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
rex_prefix(adr, xreg, pre, opc, rex_w);
}
}
int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
int dst_enc = dst->encoding();
int src_enc = src->encoding();
if (UseAVX > 0) {
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
} else {
assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
}
}
#ifndef _LP64 #ifndef _LP64
...@@ -3756,6 +3853,38 @@ void Assembler::prefix(Address adr, XMMRegister reg) { ...@@ -3756,6 +3853,38 @@ void Assembler::prefix(Address adr, XMMRegister reg) {
} }
} }
void Assembler::prefixq(Address adr, XMMRegister src) {
if (src->encoding() < 8) {
if (adr.base_needs_rex()) {
if (adr.index_needs_rex()) {
prefix(REX_WXB);
} else {
prefix(REX_WB);
}
} else {
if (adr.index_needs_rex()) {
prefix(REX_WX);
} else {
prefix(REX_W);
}
}
} else {
if (adr.base_needs_rex()) {
if (adr.index_needs_rex()) {
prefix(REX_WRXB);
} else {
prefix(REX_WRB);
}
} else {
if (adr.index_needs_rex()) {
prefix(REX_WRX);
} else {
prefix(REX_WR);
}
}
}
}
void Assembler::adcq(Register dst, int32_t imm32) { void Assembler::adcq(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding()); (void) prefixq_and_encode(dst->encoding());
emit_arith(0x81, 0xD0, dst, imm32); emit_arith(0x81, 0xD0, dst, imm32);
...@@ -3918,36 +4047,44 @@ void Assembler::cmpxchgq(Register reg, Address adr) { ...@@ -3918,36 +4047,44 @@ void Assembler::cmpxchgq(Register reg, Address adr) {
void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x2A); emit_byte(0x2A);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x2A);
emit_operand(dst, src);
}
void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x2A); emit_byte(0x2A);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x2A);
emit_operand(dst, src);
}
void Assembler::cvttsd2siq(Register dst, XMMRegister src) { void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF2); int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x2C); emit_byte(0x2C);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::cvttss2siq(Register dst, XMMRegister src) { void Assembler::cvttss2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_byte(0xF3); int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x2C); emit_byte(0x2C);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -4107,21 +4244,17 @@ void Assembler::lzcntq(Register dst, Register src) { ...@@ -4107,21 +4244,17 @@ void Assembler::lzcntq(Register dst, Register src) {
void Assembler::movdq(XMMRegister dst, Register src) { void Assembler::movdq(XMMRegister dst, Register src) {
// table D-1 says MMX/SSE2 // table D-1 says MMX/SSE2
NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66); int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x6E); emit_byte(0x6E);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
void Assembler::movdq(Register dst, XMMRegister src) { void Assembler::movdq(Register dst, XMMRegister src) {
// table D-1 says MMX/SSE2 // table D-1 says MMX/SSE2
NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0x66);
// swap src/dst to get correct prefix // swap src/dst to get correct prefix
int encode = prefixq_and_encode(src->encoding(), dst->encoding()); int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
emit_byte(0x0F);
emit_byte(0x7E); emit_byte(0x7E);
emit_byte(0xC0 | encode); emit_byte(0xC0 | encode);
} }
...@@ -5680,6 +5813,24 @@ void MacroAssembler::addptr(Address dst, Register src) { ...@@ -5680,6 +5813,24 @@ void MacroAssembler::addptr(Address dst, Register src) {
LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
} }
void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::addsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::addsd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
addss(dst, as_Address(src));
} else {
lea(rscratch1, src);
addss(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::align(int modulus) { void MacroAssembler::align(int modulus) {
if (offset() % modulus != 0) { if (offset() % modulus != 0) {
nop(modulus - (offset() % modulus)); nop(modulus - (offset() % modulus));
...@@ -5687,11 +5838,24 @@ void MacroAssembler::align(int modulus) { ...@@ -5687,11 +5838,24 @@ void MacroAssembler::align(int modulus) {
} }
void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
// Used in sign-masking with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::andpd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::andpd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
// Used in sign-masking with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) { if (reachable(src)) {
andpd(dst, as_Address(src)); Assembler::andps(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch1, src);
andpd(dst, Address(rscratch1, 0)); Assembler::andps(dst, Address(rscratch1, 0));
} }
} }
...@@ -6268,19 +6432,19 @@ void MacroAssembler::cmpxchgptr(Register reg, Address adr) { ...@@ -6268,19 +6432,19 @@ void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) { if (reachable(src)) {
comisd(dst, as_Address(src)); Assembler::comisd(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch1, src);
comisd(dst, Address(rscratch1, 0)); Assembler::comisd(dst, Address(rscratch1, 0));
} }
} }
void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) { if (reachable(src)) {
comiss(dst, as_Address(src)); Assembler::comiss(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch1, src);
comiss(dst, Address(rscratch1, 0)); Assembler::comiss(dst, Address(rscratch1, 0));
} }
} }
...@@ -6364,6 +6528,24 @@ void MacroAssembler::division_with_shift (Register reg, int shift_value) { ...@@ -6364,6 +6528,24 @@ void MacroAssembler::division_with_shift (Register reg, int shift_value) {
sarl(reg, shift_value); sarl(reg, shift_value);
} }
void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::divsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::divsd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::divss(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::divss(dst, Address(rscratch1, 0));
}
}
// !defined(COMPILER2) is because of stupid core builds // !defined(COMPILER2) is because of stupid core builds
#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
void MacroAssembler::empty_FPU_stack() { void MacroAssembler::empty_FPU_stack() {
...@@ -6803,12 +6985,39 @@ void MacroAssembler::movptr(Address dst, Register src) { ...@@ -6803,12 +6985,39 @@ void MacroAssembler::movptr(Address dst, Register src) {
LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
} }
void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::movsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::movsd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) { if (reachable(src)) {
movss(dst, as_Address(src)); Assembler::movss(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch1, src);
movss(dst, Address(rscratch1, 0)); Assembler::movss(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::mulsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::mulsd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::mulss(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::mulss(dst, Address(rscratch1, 0));
} }
} }
...@@ -6990,6 +7199,42 @@ void MacroAssembler::testl(Register dst, AddressLiteral src) { ...@@ -6990,6 +7199,42 @@ void MacroAssembler::testl(Register dst, AddressLiteral src) {
testl(dst, as_Address(src)); testl(dst, as_Address(src));
} }
void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::sqrtsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::sqrtsd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::sqrtss(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::sqrtss(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::subsd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::subsd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::subss(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::subss(dst, Address(rscratch1, 0));
}
}
////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////
#ifndef SERIALGC #ifndef SERIALGC
...@@ -7875,28 +8120,42 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ...@@ -7875,28 +8120,42 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
ucomisd(dst, as_Address(src)); if (reachable(src)) {
Assembler::ucomisd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::ucomisd(dst, Address(rscratch1, 0));
}
} }
void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
ucomiss(dst, as_Address(src)); if (reachable(src)) {
Assembler::ucomiss(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::ucomiss(dst, Address(rscratch1, 0));
}
} }
void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) { if (reachable(src)) {
xorpd(dst, as_Address(src)); Assembler::xorpd(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch1, src);
xorpd(dst, Address(rscratch1, 0)); Assembler::xorpd(dst, Address(rscratch1, 0));
} }
} }
void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) { if (reachable(src)) {
xorps(dst, as_Address(src)); Assembler::xorps(dst, as_Address(src));
} else { } else {
lea(rscratch1, src); lea(rscratch1, src);
xorps(dst, Address(rscratch1, 0)); Assembler::xorps(dst, Address(rscratch1, 0));
} }
} }
......
...@@ -503,7 +503,31 @@ class Assembler : public AbstractAssembler { ...@@ -503,7 +503,31 @@ class Assembler : public AbstractAssembler {
REX_WR = 0x4C, REX_WR = 0x4C,
REX_WRB = 0x4D, REX_WRB = 0x4D,
REX_WRX = 0x4E, REX_WRX = 0x4E,
REX_WRXB = 0x4F REX_WRXB = 0x4F,
VEX_3bytes = 0xC4,
VEX_2bytes = 0xC5
};
enum VexPrefix {
VEX_B = 0x20,
VEX_X = 0x40,
VEX_R = 0x80,
VEX_W = 0x80
};
enum VexSimdPrefix {
VEX_SIMD_NONE = 0x0,
VEX_SIMD_66 = 0x1,
VEX_SIMD_F3 = 0x2,
VEX_SIMD_F2 = 0x3
};
enum VexOpcode {
VEX_OPCODE_NONE = 0x0,
VEX_OPCODE_0F = 0x1,
VEX_OPCODE_0F_38 = 0x2,
VEX_OPCODE_0F_3A = 0x3
}; };
enum WhichOperand { enum WhichOperand {
...@@ -546,12 +570,88 @@ private: ...@@ -546,12 +570,88 @@ private:
void prefixq(Address adr); void prefixq(Address adr);
void prefix(Address adr, Register reg, bool byteinst = false); void prefix(Address adr, Register reg, bool byteinst = false);
void prefixq(Address adr, Register reg);
void prefix(Address adr, XMMRegister reg); void prefix(Address adr, XMMRegister reg);
void prefixq(Address adr, Register reg);
void prefixq(Address adr, XMMRegister reg);
void prefetch_prefix(Address src); void prefetch_prefix(Address src);
void rex_prefix(Address adr, XMMRegister xreg,
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
int rex_prefix_and_encode(int dst_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
bool vector256);
void vex_prefix(Address adr, int nds_enc, int xreg_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, bool vector256);
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, bool vector256);
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, bool vector256 = false);
void simd_prefix(XMMRegister dst, Address src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
simd_prefix(dst, xnoreg, src, pre, opc);
}
void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
simd_prefix(src, dst, pre);
}
void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre) {
bool rex_w = true;
simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, bool vector256 = false);
int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
}
// Move/convert 32-bit integer value.
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre) {
// It is OK to cast from Register to XMMRegister to pass argument here
// since only encoding is used in simd_prefix_and_encode() and number of
// Gen and Xmm registers are the same.
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
}
int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
return simd_prefix_and_encode(dst, xnoreg, src, pre);
}
int simd_prefix_and_encode(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
}
// Move/convert 64-bit integer value.
int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre) {
bool rex_w = true;
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
}
int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
}
int simd_prefix_and_encode_q(Register dst, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
bool rex_w = true;
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
}
// Helper functions for groups of instructions // Helper functions for groups of instructions
void emit_arith_b(int op1, int op2, Register dst, int imm8); void emit_arith_b(int op1, int op2, Register dst, int imm8);
...@@ -764,6 +864,7 @@ private: ...@@ -764,6 +864,7 @@ private:
void addss(XMMRegister dst, Address src); void addss(XMMRegister dst, Address src);
void addss(XMMRegister dst, XMMRegister src); void addss(XMMRegister dst, XMMRegister src);
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32); void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src); void andl(Register dst, Address src);
void andl(Register dst, Register src); void andl(Register dst, Register src);
...@@ -774,9 +875,11 @@ private: ...@@ -774,9 +875,11 @@ private:
void andq(Register dst, Register src); void andq(Register dst, Register src);
// Bitwise Logical AND of Packed Double-Precision Floating-Point Values // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
void andpd(XMMRegister dst, Address src);
void andpd(XMMRegister dst, XMMRegister src); void andpd(XMMRegister dst, XMMRegister src);
// Bitwise Logical AND of Packed Single-Precision Floating-Point Values
void andps(XMMRegister dst, XMMRegister src);
void bsfl(Register dst, Register src); void bsfl(Register dst, Register src);
void bsrl(Register dst, Register src); void bsrl(Register dst, Register src);
...@@ -837,9 +940,11 @@ private: ...@@ -837,9 +940,11 @@ private:
// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void comisd(XMMRegister dst, Address src); void comisd(XMMRegister dst, Address src);
void comisd(XMMRegister dst, XMMRegister src);
// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
void comiss(XMMRegister dst, Address src); void comiss(XMMRegister dst, Address src);
void comiss(XMMRegister dst, XMMRegister src);
// Identify processor type and features // Identify processor type and features
void cpuid() { void cpuid() {
...@@ -849,14 +954,19 @@ private: ...@@ -849,14 +954,19 @@ private:
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
void cvtsd2ss(XMMRegister dst, XMMRegister src); void cvtsd2ss(XMMRegister dst, XMMRegister src);
void cvtsd2ss(XMMRegister dst, Address src);
// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
void cvtsi2sdl(XMMRegister dst, Register src); void cvtsi2sdl(XMMRegister dst, Register src);
void cvtsi2sdl(XMMRegister dst, Address src);
void cvtsi2sdq(XMMRegister dst, Register src); void cvtsi2sdq(XMMRegister dst, Register src);
void cvtsi2sdq(XMMRegister dst, Address src);
// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
void cvtsi2ssl(XMMRegister dst, Register src); void cvtsi2ssl(XMMRegister dst, Register src);
void cvtsi2ssl(XMMRegister dst, Address src);
void cvtsi2ssq(XMMRegister dst, Register src); void cvtsi2ssq(XMMRegister dst, Register src);
void cvtsi2ssq(XMMRegister dst, Address src);
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src); void cvtdq2pd(XMMRegister dst, XMMRegister src);
...@@ -866,6 +976,7 @@ private: ...@@ -866,6 +976,7 @@ private:
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src); void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtss2sd(XMMRegister dst, Address src);
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
void cvttsd2sil(Register dst, Address src); void cvttsd2sil(Register dst, Address src);
...@@ -1140,8 +1251,6 @@ private: ...@@ -1140,8 +1251,6 @@ private:
void movdq(Register dst, XMMRegister src); void movdq(Register dst, XMMRegister src);
// Move Aligned Double Quadword // Move Aligned Double Quadword
void movdqa(Address dst, XMMRegister src);
void movdqa(XMMRegister dst, Address src);
void movdqa(XMMRegister dst, XMMRegister src); void movdqa(XMMRegister dst, XMMRegister src);
// Move Unaligned Double Quadword // Move Unaligned Double Quadword
...@@ -1261,10 +1370,18 @@ private: ...@@ -1261,10 +1370,18 @@ private:
void orq(Register dst, Address src); void orq(Register dst, Address src);
void orq(Register dst, Register src); void orq(Register dst, Register src);
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
// SSE4.2 string instructions // SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8); void pcmpestri(XMMRegister xmm1, Address src, int imm8);
// SSE4.1 packed move
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
#ifndef _LP64 // no 32bit push/pop on amd64 #ifndef _LP64 // no 32bit push/pop on amd64
void popl(Address dst); void popl(Address dst);
#endif #endif
...@@ -1292,6 +1409,7 @@ private: ...@@ -1292,6 +1409,7 @@ private:
// POR - Bitwise logical OR // POR - Bitwise logical OR
void por(XMMRegister dst, XMMRegister src); void por(XMMRegister dst, XMMRegister src);
void por(XMMRegister dst, Address src);
// Shuffle Packed Doublewords // Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, XMMRegister src, int mode);
...@@ -1313,6 +1431,11 @@ private: ...@@ -1313,6 +1431,11 @@ private:
// Interleave Low Bytes // Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src); void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src);
// Interleave Low Doublewords
void punpckldq(XMMRegister dst, XMMRegister src);
void punpckldq(XMMRegister dst, Address src);
#ifndef _LP64 // no 32bit push/pop on amd64 #ifndef _LP64 // no 32bit push/pop on amd64
void pushl(Address src); void pushl(Address src);
...@@ -1429,6 +1552,13 @@ private: ...@@ -1429,6 +1552,13 @@ private:
void xchgq(Register reg, Address adr); void xchgq(Register reg, Address adr);
void xchgq(Register dst, Register src); void xchgq(Register dst, Register src);
// Get Value of Extended Control Register
void xgetbv() {
emit_byte(0x0F);
emit_byte(0x01);
emit_byte(0xD0);
}
void xorl(Register dst, int32_t imm32); void xorl(Register dst, int32_t imm32);
void xorl(Register dst, Address src); void xorl(Register dst, Address src);
void xorl(Register dst, Register src); void xorl(Register dst, Register src);
...@@ -1437,14 +1567,21 @@ private: ...@@ -1437,14 +1567,21 @@ private:
void xorq(Register dst, Register src); void xorq(Register dst, Register src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, Address src);
void xorpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, XMMRegister src);
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
void xorps(XMMRegister dst, Address src);
void xorps(XMMRegister dst, XMMRegister src); void xorps(XMMRegister dst, XMMRegister src);
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
void andpd(XMMRegister dst, Address src);
void andps(XMMRegister dst, Address src);
void xorpd(XMMRegister dst, Address src);
void xorps(XMMRegister dst, Address src);
}; };
...@@ -2175,9 +2312,15 @@ class MacroAssembler: public Assembler { ...@@ -2175,9 +2312,15 @@ class MacroAssembler: public Assembler {
void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
void andpd(XMMRegister dst, AddressLiteral src); void andpd(XMMRegister dst, AddressLiteral src);
void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, AddressLiteral src);
void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, AddressLiteral src); void comiss(XMMRegister dst, AddressLiteral src);
void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
void comisd(XMMRegister dst, AddressLiteral src); void comisd(XMMRegister dst, AddressLiteral src);
...@@ -2218,48 +2361,48 @@ public: ...@@ -2218,48 +2361,48 @@ public:
void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); } void addsd(XMMRegister dst, AddressLiteral src);
void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); } void addss(XMMRegister dst, AddressLiteral src);
void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); } void divsd(XMMRegister dst, AddressLiteral src);
void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); } void divss(XMMRegister dst, AddressLiteral src);
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); } void movsd(XMMRegister dst, AddressLiteral src);
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); } void mulsd(XMMRegister dst, AddressLiteral src);
void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); } void mulss(XMMRegister dst, AddressLiteral src);
void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); } void sqrtsd(XMMRegister dst, AddressLiteral src);
void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); } void sqrtss(XMMRegister dst, AddressLiteral src);
void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); } void subsd(XMMRegister dst, AddressLiteral src);
void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); } void subss(XMMRegister dst, AddressLiteral src);
void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
......
...@@ -86,6 +86,7 @@ inline void Assembler::prefix(Address adr, Register reg, bool byteinst) {} ...@@ -86,6 +86,7 @@ inline void Assembler::prefix(Address adr, Register reg, bool byteinst) {}
inline void Assembler::prefixq(Address adr, Register reg) {} inline void Assembler::prefixq(Address adr, Register reg) {}
inline void Assembler::prefix(Address adr, XMMRegister reg) {} inline void Assembler::prefix(Address adr, XMMRegister reg) {}
inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
#else #else
inline void Assembler::emit_long64(jlong x) { inline void Assembler::emit_long64(jlong x) {
*(jlong*) _code_pos = x; *(jlong*) _code_pos = x;
......
...@@ -237,8 +237,20 @@ int NativeMovRegMem::instruction_start() const { ...@@ -237,8 +237,20 @@ int NativeMovRegMem::instruction_start() const {
int off = 0; int off = 0;
u_char instr_0 = ubyte_at(off); u_char instr_0 = ubyte_at(off);
// See comment in Assembler::locate_operand() about VEX prefixes.
if (instr_0 == instruction_VEX_prefix_2bytes) {
assert((UseAVX > 0), "shouldn't have VEX prefix");
NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
return 2;
}
if (instr_0 == instruction_VEX_prefix_3bytes) {
assert((UseAVX > 0), "shouldn't have VEX prefix");
NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
return 3;
}
// First check to see if we have a (prefixed or not) xor // First check to see if we have a (prefixed or not) xor
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 if (instr_0 >= instruction_prefix_wide_lo && // 0x40
instr_0 <= instruction_prefix_wide_hi) { // 0x4f instr_0 <= instruction_prefix_wide_hi) { // 0x4f
off++; off++;
instr_0 = ubyte_at(off); instr_0 = ubyte_at(off);
......
...@@ -287,6 +287,9 @@ class NativeMovRegMem: public NativeInstruction { ...@@ -287,6 +287,9 @@ class NativeMovRegMem: public NativeInstruction {
instruction_code_xmm_store = 0x11, instruction_code_xmm_store = 0x11,
instruction_code_xmm_lpd = 0x12, instruction_code_xmm_lpd = 0x12,
instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes,
instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes,
instruction_size = 4, instruction_size = 4,
instruction_offset = 0, instruction_offset = 0,
data_offset = 2, data_offset = 2,
......
...@@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r14); ...@@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r14);
REGISTER_DEFINITION(Register, r15); REGISTER_DEFINITION(Register, r15);
#endif // AMD64 #endif // AMD64
REGISTER_DEFINITION(XMMRegister, xnoreg);
REGISTER_DEFINITION(XMMRegister, xmm0 ); REGISTER_DEFINITION(XMMRegister, xmm0 );
REGISTER_DEFINITION(XMMRegister, xmm1 ); REGISTER_DEFINITION(XMMRegister, xmm1 );
REGISTER_DEFINITION(XMMRegister, xmm2 ); REGISTER_DEFINITION(XMMRegister, xmm2 );
...@@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r12_heapbase); ...@@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r12_heapbase);
REGISTER_DEFINITION(Register, r15_thread); REGISTER_DEFINITION(Register, r15_thread);
#endif // AMD64 #endif // AMD64
REGISTER_DEFINITION(MMXRegister, mnoreg );
REGISTER_DEFINITION(MMXRegister, mmx0 ); REGISTER_DEFINITION(MMXRegister, mmx0 );
REGISTER_DEFINITION(MMXRegister, mmx1 ); REGISTER_DEFINITION(MMXRegister, mmx1 );
REGISTER_DEFINITION(MMXRegister, mmx2 ); REGISTER_DEFINITION(MMXRegister, mmx2 );
......
...@@ -50,7 +50,7 @@ const char* VM_Version::_features_str = ""; ...@@ -50,7 +50,7 @@ const char* VM_Version::_features_str = "";
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
static BufferBlob* stub_blob; static BufferBlob* stub_blob;
static const int stub_size = 400; static const int stub_size = 500;
extern "C" { extern "C" {
typedef void (*getPsrInfo_stub_t)(void*); typedef void (*getPsrInfo_stub_t)(void*);
...@@ -73,7 +73,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { ...@@ -73,7 +73,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label ext_cpuid1, ext_cpuid5, done; Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done;
StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
# define __ _masm-> # define __ _masm->
...@@ -229,6 +229,41 @@ class VM_Version_StubGenerator: public StubCodeGenerator { ...@@ -229,6 +229,41 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx); __ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx); __ movl(Address(rsi,12), rdx);
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports AVX
//
__ andl(rcx, 0x18000000);
__ cmpl(rcx, 0x18000000);
__ jccb(Assembler::notEqual, sef_cpuid);
//
// XCR0, XFEATURE_ENABLED_MASK register
//
__ xorl(rcx, rcx); // zero for XCR0 register
__ xgetbv();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rdx);
//
// cpuid(0x7) Structured Extended Features
//
__ bind(sef_cpuid);
__ movl(rax, 7);
__ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
__ jccb(Assembler::greater, ext_cpuid);
__ xorl(rcx, rcx);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
//
// Extended cpuid(0x80000000)
//
__ bind(ext_cpuid);
__ movl(rax, 0x80000000); __ movl(rax, 0x80000000);
__ cpuid(); __ cpuid();
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
...@@ -359,13 +394,19 @@ void VM_Version::get_processor_features() { ...@@ -359,13 +394,19 @@ void VM_Version::get_processor_features() {
if (UseSSE < 1) if (UseSSE < 1)
_cpuFeatures &= ~CPU_SSE; _cpuFeatures &= ~CPU_SSE;
if (UseAVX < 2)
_cpuFeatures &= ~CPU_AVX2;
if (UseAVX < 1)
_cpuFeatures &= ~CPU_AVX;
if (logical_processors_per_package() == 1) { if (logical_processors_per_package() == 1) {
// HT processor could be installed on a system which doesn't support HT. // HT processor could be installed on a system which doesn't support HT.
_cpuFeatures &= ~CPU_HT; _cpuFeatures &= ~CPU_HT;
} }
char buf[256]; char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(), cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping, cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""), (supports_cmov() ? ", cmov" : ""),
...@@ -379,6 +420,8 @@ void VM_Version::get_processor_features() { ...@@ -379,6 +420,8 @@ void VM_Version::get_processor_features() {
(supports_sse4_1() ? ", sse4.1" : ""), (supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""), (supports_sse4_2() ? ", sse4.2" : ""),
(supports_popcnt() ? ", popcnt" : ""), (supports_popcnt() ? ", popcnt" : ""),
(supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""),
(supports_mmx_ext() ? ", mmxext" : ""), (supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""), (supports_lzcnt() ? ", lzcnt": ""),
...@@ -389,17 +432,24 @@ void VM_Version::get_processor_features() { ...@@ -389,17 +432,24 @@ void VM_Version::get_processor_features() {
// UseSSE is set to the smaller of what hardware supports and what // UseSSE is set to the smaller of what hardware supports and what
// the command line requires. I.e., you cannot set UseSSE to 2 on // the command line requires. I.e., you cannot set UseSSE to 2 on
// older Pentiums which do not support it. // older Pentiums which do not support it.
if( UseSSE > 4 ) UseSSE=4; if (UseSSE > 4) UseSSE=4;
if( UseSSE < 0 ) UseSSE=0; if (UseSSE < 0) UseSSE=0;
if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
UseSSE = MIN2((intx)3,UseSSE); UseSSE = MIN2((intx)3,UseSSE);
if( !supports_sse3() ) // Drop to 2 if no SSE3 support if (!supports_sse3()) // Drop to 2 if no SSE3 support
UseSSE = MIN2((intx)2,UseSSE); UseSSE = MIN2((intx)2,UseSSE);
if( !supports_sse2() ) // Drop to 1 if no SSE2 support if (!supports_sse2()) // Drop to 1 if no SSE2 support
UseSSE = MIN2((intx)1,UseSSE); UseSSE = MIN2((intx)1,UseSSE);
if( !supports_sse () ) // Drop to 0 if no SSE support if (!supports_sse ()) // Drop to 0 if no SSE support
UseSSE = 0; UseSSE = 0;
if (UseAVX > 2) UseAVX=2;
if (UseAVX < 0) UseAVX=0;
if (!supports_avx2()) // Drop to 1 if no AVX2 support
UseAVX = MIN2((intx)1,UseAVX);
if (!supports_avx ()) // Drop to 0 if no AVX support
UseAVX = 0;
// On new cpus instructions which update whole XMM register should be used // On new cpus instructions which update whole XMM register should be used
// to prevent partial register stall due to dependencies on high half. // to prevent partial register stall due to dependencies on high half.
// //
...@@ -534,6 +584,9 @@ void VM_Version::get_processor_features() { ...@@ -534,6 +584,9 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
UsePopCountInstruction = true; UsePopCountInstruction = true;
} }
} else if (UsePopCountInstruction) {
warning("POPCNT instruction is not available on this CPU");
FLAG_SET_DEFAULT(UsePopCountInstruction, false);
} }
#ifdef COMPILER2 #ifdef COMPILER2
...@@ -605,7 +658,11 @@ void VM_Version::get_processor_features() { ...@@ -605,7 +658,11 @@ void VM_Version::get_processor_features() {
if (PrintMiscellaneous && Verbose) { if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per core: %u", tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package()); logical_processors_per_package());
tty->print_cr("UseSSE=%d",UseSSE); tty->print("UseSSE=%d",UseSSE);
if (UseAVX > 0) {
tty->print(" UseAVX=%d",UseAVX);
}
tty->cr();
tty->print("Allocation"); tty->print("Allocation");
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
tty->print_cr(": no prefetching"); tty->print_cr(": no prefetching");
......
...@@ -78,7 +78,10 @@ public: ...@@ -78,7 +78,10 @@ public:
sse4_2 : 1, sse4_2 : 1,
: 2, : 2,
popcnt : 1, popcnt : 1,
: 8; : 3,
osxsave : 1,
avx : 1,
: 3;
} bits; } bits;
}; };
...@@ -176,6 +179,34 @@ public: ...@@ -176,6 +179,34 @@ public:
} bits; } bits;
}; };
union SefCpuid7Eax {
uint32_t value;
};
union SefCpuid7Ebx {
uint32_t value;
struct {
uint32_t fsgsbase : 1,
: 2,
bmi1 : 1,
: 1,
avx2 : 1,
: 2,
bmi2 : 1,
: 23;
} bits;
};
union XemXcr0Eax {
uint32_t value;
struct {
uint32_t x87 : 1,
sse : 1,
ymm : 1,
: 29;
} bits;
};
protected: protected:
static int _cpu; static int _cpu;
static int _model; static int _model;
...@@ -200,7 +231,9 @@ protected: ...@@ -200,7 +231,9 @@ protected:
CPU_SSE4_1 = (1 << 11), CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12), CPU_SSE4_2 = (1 << 12),
CPU_POPCNT = (1 << 13), CPU_POPCNT = (1 << 13),
CPU_LZCNT = (1 << 14) CPU_LZCNT = (1 << 14),
CPU_AVX = (1 << 15),
CPU_AVX2 = (1 << 16)
} cpuFeatureFlags; } cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with // cpuid information block. All info derived from executing cpuid with
...@@ -228,6 +261,12 @@ protected: ...@@ -228,6 +261,12 @@ protected:
uint32_t dcp_cpuid4_ecx; // unused currently uint32_t dcp_cpuid4_ecx; // unused currently
uint32_t dcp_cpuid4_edx; // unused currently uint32_t dcp_cpuid4_edx; // unused currently
// cpuid function 7 (structured extended features)
SefCpuid7Eax sef_cpuid7_eax;
SefCpuid7Ebx sef_cpuid7_ebx;
uint32_t sef_cpuid7_ecx; // unused currently
uint32_t sef_cpuid7_edx; // unused currently
// cpuid function 0xB (processor topology) // cpuid function 0xB (processor topology)
// ecx = 0 // ecx = 0
uint32_t tpl_cpuidB0_eax; uint32_t tpl_cpuidB0_eax;
...@@ -275,6 +314,10 @@ protected: ...@@ -275,6 +314,10 @@ protected:
uint32_t ext_cpuid8_ebx; // reserved uint32_t ext_cpuid8_ebx; // reserved
ExtCpuid8Ecx ext_cpuid8_ecx; ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved uint32_t ext_cpuid8_edx; // reserved
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
XemXcr0Eax xem_xcr0_eax;
uint32_t xem_xcr0_edx; // reserved
}; };
// The actual cpuid info block // The actual cpuid info block
...@@ -328,6 +371,14 @@ protected: ...@@ -328,6 +371,14 @@ protected:
result |= CPU_SSE4_2; result |= CPU_SSE4_2;
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT; result |= CPU_POPCNT;
if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
_cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
_cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
result |= CPU_AVX;
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
result |= CPU_AVX2;
}
// AMD features. // AMD features.
if (is_amd()) { if (is_amd()) {
...@@ -350,12 +401,14 @@ public: ...@@ -350,12 +401,14 @@ public:
static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
// Initialization // Initialization
static void initialize(); static void initialize();
...@@ -447,6 +500,8 @@ public: ...@@ -447,6 +500,8 @@ public:
static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; }
static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
// //
// AMD features // AMD features
// //
......
...@@ -281,7 +281,7 @@ static int pre_call_FPU_size() { ...@@ -281,7 +281,7 @@ static int pre_call_FPU_size() {
} }
static int preserve_SP_size() { static int preserve_SP_size() {
return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) return 2; // op, rm(reg/reg)
} }
// !!!!! Special hack to get all type of calls to specify the byte offset // !!!!! Special hack to get all type of calls to specify the byte offset
...@@ -495,14 +495,34 @@ void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { ...@@ -495,14 +495,34 @@ void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
} }
} }
void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { void emit_cmpfp_fixup(MacroAssembler& _masm) {
if( dst_encoding == src_encoding ) { Label exit;
// reg-reg copy, use an empty encoding __ jccb(Assembler::noParity, exit);
} else { __ pushf();
MacroAssembler _masm(&cbuf); //
// comiss/ucomiss instructions set ZF,PF,CF flags and
// zero OF,AF,SF for NaN values.
// Fixup flags by zeroing ZF,PF so that compare of NaN
// values returns 'less than' result (CF is set).
// Leave the rest of flags unchanged.
//
// 7 6 5 4 3 2 1 0
// |S|Z|r|A|r|P|r|C| (r - reserved bit)
// 0 0 1 0 1 0 1 1 (0x2B)
//
__ andl(Address(rsp, 0), 0xffffff2b);
__ popf();
__ bind(exit);
}
__ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
} Label done;
__ movl(dst, -1);
__ jcc(Assembler::parity, done);
__ jcc(Assembler::below, done);
__ setb(Assembler::notEqual, dst);
__ movzbl(dst, dst);
__ bind(done);
} }
...@@ -792,32 +812,32 @@ static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset ...@@ -792,32 +812,32 @@ static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset
// Helper for XMM registers. Extra opcode bits, limited syntax. // Helper for XMM registers. Extra opcode bits, limited syntax.
static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
if( cbuf ) { if (cbuf) {
if( reg_lo+1 == reg_hi ) { // double move? MacroAssembler _masm(cbuf);
if( is_load && !UseXmmLoadAndClearUpper ) if (reg_lo+1 == reg_hi) { // double move?
emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load if (is_load) {
else __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
} else { } else {
emit_opcode(*cbuf, 0xF3 ); __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
} }
emit_opcode(*cbuf, 0x0F ); } else {
if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper ) if (is_load) {
emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
else } else {
emit_opcode(*cbuf, is_load ? 0x10 : 0x11 ); __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false); }
}
#ifndef PRODUCT #ifndef PRODUCT
} else if( !do_size ) { } else if (!do_size) {
if( size != 0 ) st->print("\n\t"); if (size != 0) st->print("\n\t");
if( reg_lo+1 == reg_hi ) { // double move? if (reg_lo+1 == reg_hi) { // double move?
if( is_load ) st->print("%s %s,[ESP + #%d]", if (is_load) st->print("%s %s,[ESP + #%d]",
UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
Matcher::regName[reg_lo], offset); Matcher::regName[reg_lo], offset);
else st->print("MOVSD [ESP + #%d],%s", else st->print("MOVSD [ESP + #%d],%s",
offset, Matcher::regName[reg_lo]); offset, Matcher::regName[reg_lo]);
} else { } else {
if( is_load ) st->print("MOVSS %s,[ESP + #%d]", if (is_load) st->print("MOVSS %s,[ESP + #%d]",
Matcher::regName[reg_lo], offset); Matcher::regName[reg_lo], offset);
else st->print("MOVSS [ESP + #%d],%s", else st->print("MOVSS [ESP + #%d],%s",
offset, Matcher::regName[reg_lo]); offset, Matcher::regName[reg_lo]);
...@@ -825,59 +845,55 @@ static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, ...@@ -825,59 +845,55 @@ static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
#endif #endif
} }
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
return size+5+offset_size; return size+5+offset_size;
} }
static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, int size, outputStream* st ) { int src_hi, int dst_hi, int size, outputStream* st ) {
if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers if (cbuf) {
if( cbuf ) { MacroAssembler _masm(cbuf);
if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
emit_opcode(*cbuf, 0x66 ); __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
as_XMMRegister(Matcher::_regEncode[src_lo]));
} else {
__ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
as_XMMRegister(Matcher::_regEncode[src_lo]));
} }
emit_opcode(*cbuf, 0x0F );
emit_opcode(*cbuf, 0x28 );
emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
#ifndef PRODUCT #ifndef PRODUCT
} else if( !do_size ) { } else if (!do_size) {
if( size != 0 ) st->print("\n\t"); if (size != 0) st->print("\n\t");
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} else { } else {
st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} }
#endif
}
return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
} else { } else {
if( cbuf ) {
emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
emit_opcode(*cbuf, 0x0F );
emit_opcode(*cbuf, 0x10 );
emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
#ifndef PRODUCT
} else if( !do_size ) {
if( size != 0 ) st->print("\n\t");
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} else { } else {
st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} }
#endif
} }
return size+4; #endif
} }
// VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
// Only MOVAPS SSE prefix uses 1 byte.
int sz = 4;
if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
return size + sz;
} }
static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, int size, outputStream* st ) { int src_hi, int dst_hi, int size, outputStream* st ) {
// 32-bit // 32-bit
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0x66); MacroAssembler _masm(cbuf);
emit_opcode(*cbuf, 0x0F); __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
emit_opcode(*cbuf, 0x6E); as_Register(Matcher::_regEncode[src_lo]));
emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
...@@ -891,10 +907,9 @@ static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int ...@@ -891,10 +907,9 @@ static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int
int src_hi, int dst_hi, int size, outputStream* st ) { int src_hi, int dst_hi, int size, outputStream* st ) {
// 32-bit // 32-bit
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0x66); MacroAssembler _masm(cbuf);
emit_opcode(*cbuf, 0x0F); __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
emit_opcode(*cbuf, 0x7E); as_XMMRegister(Matcher::_regEncode[src_lo]));
emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
...@@ -1931,11 +1946,6 @@ encode %{ ...@@ -1931,11 +1946,6 @@ encode %{
%} %}
enc_class Xor_Reg (eRegI dst) %{
emit_opcode(cbuf, 0x33);
emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
%}
// Following encoding is no longer used, but may be restored if calling // Following encoding is no longer used, but may be restored if calling
// convention changes significantly. // convention changes significantly.
// Became: Xor_Reg(EBP), Java_To_Runtime( labl ) // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
...@@ -2013,64 +2023,6 @@ encode %{ ...@@ -2013,64 +2023,6 @@ encode %{
%} %}
enc_class MovI2X_reg(regX dst, eRegI src) %{
emit_opcode(cbuf, 0x66 ); // MOVD dst,src
emit_opcode(cbuf, 0x0F );
emit_opcode(cbuf, 0x6E );
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
enc_class MovX2I_reg(eRegI dst, regX src) %{
emit_opcode(cbuf, 0x66 ); // MOVD dst,src
emit_opcode(cbuf, 0x0F );
emit_opcode(cbuf, 0x7E );
emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
%}
enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
{ // MOVD $dst,$src.lo
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x6E);
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
}
{ // MOVD $tmp,$src.hi
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x6E);
emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
}
{ // PUNPCKLDQ $dst,$tmp
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x62);
emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
}
%}
enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
{ // MOVD $dst.lo,$src
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x7E);
emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
}
{ // PSHUFLW $tmp,$src,0x4E (01001110b)
emit_opcode(cbuf,0xF2);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x70);
emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
emit_d8(cbuf, 0x4E);
}
{ // MOVD $dst.hi,$tmp
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x7E);
emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
}
%}
// Encode a reg-reg copy. If it is useless, then empty encoding. // Encode a reg-reg copy. If it is useless, then empty encoding.
enc_class enc_Copy( eRegI dst, eRegI src ) %{ enc_class enc_Copy( eRegI dst, eRegI src ) %{
encode_Copy( cbuf, $dst$$reg, $src$$reg ); encode_Copy( cbuf, $dst$$reg, $src$$reg );
...@@ -2080,11 +2032,6 @@ encode %{ ...@@ -2080,11 +2032,6 @@ encode %{
encode_Copy( cbuf, $dst$$reg, $src$$reg ); encode_Copy( cbuf, $dst$$reg, $src$$reg );
%} %}
// Encode xmm reg-reg copy. If it is useless, then empty encoding.
enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
%}
enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%} %}
...@@ -2634,116 +2581,59 @@ encode %{ ...@@ -2634,116 +2581,59 @@ encode %{
} }
%} %}
enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ enc_class Push_ModD_encoding(regXD src0, regXD src1) %{
// Allocate a word MacroAssembler _masm(&cbuf);
emit_opcode(cbuf,0x83); // SUB ESP,8 __ subptr(rsp, 8);
emit_opcode(cbuf,0xEC); __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
emit_d8(cbuf,0x08); __ fld_d(Address(rsp, 0));
__ movdbl(Address(rsp, 0), $src0$$XMMRegister);
emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 __ fld_d(Address(rsp, 0));
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
%} %}
enc_class Push_ModX_encoding( regX src0, regX src1) %{ enc_class Push_ModX_encoding(regX src0, regX src1) %{
// Allocate a word MacroAssembler _masm(&cbuf);
emit_opcode(cbuf,0x83); // SUB ESP,4 __ subptr(rsp, 4);
emit_opcode(cbuf,0xEC); __ movflt(Address(rsp, 0), $src1$$XMMRegister);
emit_d8(cbuf,0x04); __ fld_s(Address(rsp, 0));
__ movflt(Address(rsp, 0), $src0$$XMMRegister);
emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 __ fld_s(Address(rsp, 0));
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xD9 ); // FLD [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xD9 ); // FLD [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
%} %}
enc_class Push_ResultXD(regXD dst) %{ enc_class Push_ResultXD(regXD dst) %{
store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP] MacroAssembler _masm(&cbuf);
__ fstp_d(Address(rsp, 0));
// UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp] __ movdbl($dst$$XMMRegister, Address(rsp, 0));
emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); __ addptr(rsp, 8);
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0x83); // ADD ESP,8
emit_opcode(cbuf,0xC4);
emit_d8(cbuf,0x08);
%} %}
enc_class Push_ResultX(regX dst, immI d8) %{ enc_class Push_ResultX(regX dst, immI d8) %{
store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP] MacroAssembler _masm(&cbuf);
__ fstp_s(Address(rsp, 0));
emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] __ movflt($dst$$XMMRegister, Address(rsp, 0));
emit_opcode (cbuf, 0x0F ); __ addptr(rsp, $d8$$constant);
emit_opcode (cbuf, 0x10 );
encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
emit_opcode(cbuf,0xC4);
emit_d8(cbuf,$d8$$constant);
%} %}
enc_class Push_SrcXD(regXD src) %{ enc_class Push_SrcXD(regXD src) %{
// Allocate a word MacroAssembler _masm(&cbuf);
emit_opcode(cbuf,0x83); // SUB ESP,8 __ subptr(rsp, 8);
emit_opcode(cbuf,0xEC); __ movdbl(Address(rsp, 0), $src$$XMMRegister);
emit_d8(cbuf,0x08); __ fld_d(Address(rsp, 0));
emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
%} %}
enc_class push_stack_temp_qword() %{ enc_class push_stack_temp_qword() %{
emit_opcode(cbuf,0x83); // SUB ESP,8 MacroAssembler _masm(&cbuf);
emit_opcode(cbuf,0xEC); __ subptr(rsp, 8);
emit_d8 (cbuf,0x08);
%} %}
enc_class pop_stack_temp_qword() %{ enc_class pop_stack_temp_qword() %{
emit_opcode(cbuf,0x83); // ADD ESP,8 MacroAssembler _masm(&cbuf);
emit_opcode(cbuf,0xC4); __ addptr(rsp, 8);
emit_d8 (cbuf,0x08);
%} %}
enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ enc_class push_xmm_to_fpr1(regXD src) %{
emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src MacroAssembler _masm(&cbuf);
emit_opcode (cbuf, 0x0F ); __ movdbl(Address(rsp, 0), $src$$XMMRegister);
emit_opcode (cbuf, 0x11 ); __ fld_d(Address(rsp, 0));
encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
%} %}
// Compute X^Y using Intel's fast hardware instructions, if possible. // Compute X^Y using Intel's fast hardware instructions, if possible.
...@@ -2922,24 +2812,6 @@ encode %{ ...@@ -2922,24 +2812,6 @@ encode %{
%} %}
// XMM version of CmpF_Result. Because the XMM compare
// instructions set the EFLAGS directly. It becomes simpler than
// the float version above.
enc_class CmpX_Result(eRegI dst) %{
MacroAssembler _masm(&cbuf);
Label nan, inc, done;
__ jccb(Assembler::parity, nan);
__ jccb(Assembler::equal, done);
__ jccb(Assembler::above, inc);
__ bind(nan);
__ decrement(as_Register($dst$$reg)); // NO L qqq
__ jmpb(done);
__ bind(inc);
__ increment(as_Register($dst$$reg)); // NO L qqq
__ bind(done);
%}
// Compare the longs and set flags // Compare the longs and set flags
// BROKEN! Do Not use as-is // BROKEN! Do Not use as-is
enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
...@@ -3162,48 +3034,6 @@ encode %{ ...@@ -3162,48 +3034,6 @@ encode %{
emit_d8 (cbuf,0 ); emit_d8 (cbuf,0 );
%} %}
enc_class movq_ld(regXD dst, memory mem) %{
MacroAssembler _masm(&cbuf);
__ movq($dst$$XMMRegister, $mem$$Address);
%}
enc_class movq_st(memory mem, regXD src) %{
MacroAssembler _masm(&cbuf);
__ movq($mem$$Address, $src$$XMMRegister);
%}
enc_class pshufd_8x8(regX dst, regX src) %{
MacroAssembler _masm(&cbuf);
encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
__ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
__ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
%}
enc_class pshufd_4x16(regX dst, regX src) %{
MacroAssembler _masm(&cbuf);
__ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
%}
enc_class pshufd(regXD dst, regXD src, int mode) %{
MacroAssembler _masm(&cbuf);
__ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
%}
enc_class pxor(regXD dst, regXD src) %{
MacroAssembler _masm(&cbuf);
__ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
%}
enc_class mov_i2x(regXD dst, eRegI src) %{
MacroAssembler _masm(&cbuf);
__ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
%}
// Because the transitions from emitted code to the runtime // Because the transitions from emitted code to the runtime
// monitorenter/exit helper stubs are so slow it's critical that // monitorenter/exit helper stubs are so slow it's critical that
...@@ -3842,273 +3672,6 @@ encode %{ ...@@ -3842,273 +3672,6 @@ encode %{
// Carry on here... // Carry on here...
%} %}
enc_class X2L_encoding( regX src ) %{
// Allocate a word
emit_opcode(cbuf,0x83); // SUB ESP,8
emit_opcode(cbuf,0xEC);
emit_d8(cbuf,0x08);
emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xD9); // FLDCW trunc
emit_opcode(cbuf,0x2D);
emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
// Encoding assumes a double has been pushed into FPR0.
// Store down the double as a long, popping the FPU stack
emit_opcode(cbuf,0xDF); // FISTP [ESP]
emit_opcode(cbuf,0x3C);
emit_d8(cbuf,0x24);
// Restore the rounding mode; mask the exception
emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
emit_opcode(cbuf,0x2D);
emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
: (int)StubRoutines::addr_fpu_cntrl_wrd_std());
// Load the converted int; adjust CPU stack
emit_opcode(cbuf,0x58); // POP EAX
emit_opcode(cbuf,0x5A); // POP EDX
emit_opcode(cbuf,0x81); // CMP EDX,imm
emit_d8 (cbuf,0xFA); // rdx
emit_d32 (cbuf,0x80000000);// 0x80000000
emit_opcode(cbuf,0x75); // JNE around_slow_call
emit_d8 (cbuf,0x13+4); // Size of slow_call
emit_opcode(cbuf,0x85); // TEST EAX,EAX
emit_opcode(cbuf,0xC0); // 2/rax,/rax,
emit_opcode(cbuf,0x75); // JNE around_slow_call
emit_d8 (cbuf,0x13); // Size of slow_call
// Allocate a word
emit_opcode(cbuf,0x83); // SUB ESP,4
emit_opcode(cbuf,0xEC);
emit_d8(cbuf,0x04);
emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0x83); // ADD ESP,4
emit_opcode(cbuf,0xC4);
emit_d8(cbuf,0x04);
// CALL directly to the runtime
cbuf.set_insts_mark();
emit_opcode(cbuf,0xE8); // Call into runtime
emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
// Carry on here...
%}
enc_class XD2L_encoding( regXD src ) %{
// Allocate a word
emit_opcode(cbuf,0x83); // SUB ESP,8
emit_opcode(cbuf,0xEC);
emit_d8(cbuf,0x08);
emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xD9); // FLDCW trunc
emit_opcode(cbuf,0x2D);
emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
// Encoding assumes a double has been pushed into FPR0.
// Store down the double as a long, popping the FPU stack
emit_opcode(cbuf,0xDF); // FISTP [ESP]
emit_opcode(cbuf,0x3C);
emit_d8(cbuf,0x24);
// Restore the rounding mode; mask the exception
emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
emit_opcode(cbuf,0x2D);
emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
: (int)StubRoutines::addr_fpu_cntrl_wrd_std());
// Load the converted int; adjust CPU stack
emit_opcode(cbuf,0x58); // POP EAX
emit_opcode(cbuf,0x5A); // POP EDX
emit_opcode(cbuf,0x81); // CMP EDX,imm
emit_d8 (cbuf,0xFA); // rdx
emit_d32 (cbuf,0x80000000); // 0x80000000
emit_opcode(cbuf,0x75); // JNE around_slow_call
emit_d8 (cbuf,0x13+4); // Size of slow_call
emit_opcode(cbuf,0x85); // TEST EAX,EAX
emit_opcode(cbuf,0xC0); // 2/rax,/rax,
emit_opcode(cbuf,0x75); // JNE around_slow_call
emit_d8 (cbuf,0x13); // Size of slow_call
// Push src onto stack slow-path
// Allocate a word
emit_opcode(cbuf,0x83); // SUB ESP,8
emit_opcode(cbuf,0xEC);
emit_d8(cbuf,0x08);
emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0x83); // ADD ESP,8
emit_opcode(cbuf,0xC4);
emit_d8(cbuf,0x08);
// CALL directly to the runtime
cbuf.set_insts_mark();
emit_opcode(cbuf,0xE8); // Call into runtime
emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
// Carry on here...
%}
enc_class D2X_encoding( regX dst, regD src ) %{
// Allocate a word
emit_opcode(cbuf,0x83); // SUB ESP,4
emit_opcode(cbuf,0xEC);
emit_d8(cbuf,0x04);
int pop = 0x02;
if ($src$$reg != FPR1L_enc) {
emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
emit_d8( cbuf, 0xC0-1+$src$$reg );
pop = 0x03;
}
store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP]
emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x10 );
encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0x83); // ADD ESP,4
emit_opcode(cbuf,0xC4);
emit_d8(cbuf,0x04);
// Carry on here...
%}
enc_class FX2I_encoding( regX src, eRegI dst ) %{
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
// Compare the result to see if we need to go to the slow path
emit_opcode(cbuf,0x81); // CMP dst,imm
emit_rm (cbuf,0x3,0x7,$dst$$reg);
emit_d32 (cbuf,0x80000000); // 0x80000000
emit_opcode(cbuf,0x75); // JNE around_slow_call
emit_d8 (cbuf,0x13); // Size of slow_call
// Store xmm to a temp memory
// location and push it onto stack.
emit_opcode(cbuf,0x83); // SUB ESP,4
emit_opcode(cbuf,0xEC);
emit_d8(cbuf, $primary ? 0x8 : 0x4);
emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0x83); // ADD ESP,4
emit_opcode(cbuf,0xC4);
emit_d8(cbuf, $primary ? 0x8 : 0x4);
// CALL directly to the runtime
cbuf.set_insts_mark();
emit_opcode(cbuf,0xE8); // Call into runtime
emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
// Carry on here...
%}
enc_class X2D_encoding( regD dst, regX src ) %{
// Allocate a word
emit_opcode(cbuf,0x83); // SUB ESP,4
emit_opcode(cbuf,0xEC);
emit_d8(cbuf,0x04);
emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, 0x11 );
encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
emit_opcode(cbuf,0x83); // ADD ESP,4
emit_opcode(cbuf,0xC4);
emit_d8(cbuf,0x04);
// Carry on here...
%}
enc_class AbsXF_encoding(regX dst) %{
address signmask_address=(address)float_signmask_pool;
// andpd:\tANDPS $dst,[signconst]
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x54);
emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
emit_d32(cbuf, (int)signmask_address);
%}
enc_class AbsXD_encoding(regXD dst) %{
address signmask_address=(address)double_signmask_pool;
// andpd:\tANDPD $dst,[signconst]
emit_opcode(cbuf, 0x66);
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x54);
emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
emit_d32(cbuf, (int)signmask_address);
%}
enc_class NegXF_encoding(regX dst) %{
address signmask_address=(address)float_signflip_pool;
// andpd:\tXORPS $dst,[signconst]
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x57);
emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
emit_d32(cbuf, (int)signmask_address);
%}
enc_class NegXD_encoding(regXD dst) %{
address signmask_address=(address)double_signflip_pool;
// andpd:\tXORPD $dst,[signconst]
emit_opcode(cbuf, 0x66);
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x57);
emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
emit_d32(cbuf, (int)signmask_address);
%}
enc_class FMul_ST_reg( eRegF src1 ) %{ enc_class FMul_ST_reg( eRegF src1 ) %{
// Operand was loaded from memory into fp ST (stack top) // Operand was loaded from memory into fp ST (stack top)
// FMUL ST,$src /* D8 C8+i */ // FMUL ST,$src /* D8 C8+i */
...@@ -4176,66 +3739,6 @@ encode %{ ...@@ -4176,66 +3739,6 @@ encode %{
store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
%} %}
enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
{ // Atomic long load
// UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
int base = $mem$$base;
int index = $mem$$index;
int scale = $mem$$scale;
int displace = $mem$$disp;
bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
}
{ // MOVSD $dst,$tmp ! atomic long store
emit_opcode(cbuf,0xF2);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x11);
int base = $dst$$base;
int index = $dst$$index;
int scale = $dst$$scale;
int displace = $dst$$disp;
bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
}
%}
enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
{ // Atomic long load
// UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
int base = $mem$$base;
int index = $mem$$index;
int scale = $mem$$scale;
int displace = $mem$$disp;
bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
}
{ // MOVD $dst.lo,$tmp
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x7E);
emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
}
{ // PSRLQ $tmp,32
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x73);
emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
emit_d8(cbuf, 0x20);
}
{ // MOVD $dst.hi,$tmp
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x7E);
emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
}
%}
// Volatile Store Long. Must be atomic, so move it into // Volatile Store Long. Must be atomic, so move it into
// the FP TOS and then do a 64-bit FIST. Has to probe the // the FP TOS and then do a 64-bit FIST. Has to probe the
// target address before the store (for null-ptr checks) // target address before the store (for null-ptr checks)
...@@ -4243,74 +3746,14 @@ encode %{ ...@@ -4243,74 +3746,14 @@ encode %{
enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
emit_opcode(cbuf,0xDF); emit_opcode(cbuf,0xDF);
int rm_byte_opcode = 0x07; int rm_byte_opcode = 0x07;
int base = $mem$$base;
int index = $mem$$index;
int scale = $mem$$scale;
int displace = $mem$$disp;
bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
%}
enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
{ // Atomic long load
// UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
int base = $src$$base;
int index = $src$$index;
int scale = $src$$scale;
int displace = $src$$disp;
bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
}
cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
{ // MOVSD $mem,$tmp ! atomic long store
emit_opcode(cbuf,0xF2);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x11);
int base = $mem$$base;
int index = $mem$$index;
int scale = $mem$$scale;
int displace = $mem$$disp;
bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
}
%}
enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
{ // MOVD $tmp,$src.lo
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x6E);
emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
}
{ // MOVD $tmp2,$src.hi
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x6E);
emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
}
{ // PUNPCKLDQ $tmp,$tmp2
emit_opcode(cbuf,0x66);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x62);
emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
}
cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
{ // MOVSD $mem,$tmp ! atomic long store
emit_opcode(cbuf,0xF2);
emit_opcode(cbuf,0x0F);
emit_opcode(cbuf,0x11);
int base = $mem$$base; int base = $mem$$base;
int index = $mem$$index; int index = $mem$$index;
int scale = $mem$$scale; int scale = $mem$$scale;
int displace = $mem$$disp; int displace = $mem$$disp;
bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
}
%} %}
// Safepoint Poll. This polls the safepoint page, and causes an // Safepoint Poll. This polls the safepoint page, and causes an
...@@ -6877,7 +6320,10 @@ instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ ...@@ -6877,7 +6320,10 @@ instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
ins_cost(180); ins_cost(180);
format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
"MOVSD $dst,$tmp" %} "MOVSD $dst,$tmp" %}
ins_encode(enc_loadLX_volatile(mem, dst, tmp)); ins_encode %{
__ movdbl($tmp$$XMMRegister, $mem$$Address);
__ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6890,7 +6336,12 @@ instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ ...@@ -6890,7 +6336,12 @@ instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
"MOVD $dst.lo,$tmp\n\t" "MOVD $dst.lo,$tmp\n\t"
"PSRLQ $tmp,32\n\t" "PSRLQ $tmp,32\n\t"
"MOVD $dst.hi,$tmp" %} "MOVD $dst.hi,$tmp" %}
ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); ins_encode %{
__ movdbl($tmp$$XMMRegister, $mem$$Address);
__ movdl($dst$$Register, $tmp$$XMMRegister);
__ psrlq($tmp$$XMMRegister, 32);
__ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6948,7 +6399,9 @@ instruct loadXD(regXD dst, memory mem) %{ ...@@ -6948,7 +6399,9 @@ instruct loadXD(regXD dst, memory mem) %{
match(Set dst (LoadD mem)); match(Set dst (LoadD mem));
ins_cost(145); ins_cost(145);
format %{ "MOVSD $dst,$mem" %} format %{ "MOVSD $dst,$mem" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); ins_encode %{
__ movdbl ($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6957,7 +6410,9 @@ instruct loadXD_partial(regXD dst, memory mem) %{ ...@@ -6957,7 +6410,9 @@ instruct loadXD_partial(regXD dst, memory mem) %{
match(Set dst (LoadD mem)); match(Set dst (LoadD mem));
ins_cost(145); ins_cost(145);
format %{ "MOVLPD $dst,$mem" %} format %{ "MOVLPD $dst,$mem" %}
ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem)); ins_encode %{
__ movdbl ($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6968,7 +6423,9 @@ instruct loadX(regX dst, memory mem) %{ ...@@ -6968,7 +6423,9 @@ instruct loadX(regX dst, memory mem) %{
match(Set dst (LoadF mem)); match(Set dst (LoadF mem));
ins_cost(145); ins_cost(145);
format %{ "MOVSS $dst,$mem" %} format %{ "MOVSS $dst,$mem" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); ins_encode %{
__ movflt ($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6992,7 +6449,9 @@ instruct loadA8B(regXD dst, memory mem) %{ ...@@ -6992,7 +6449,9 @@ instruct loadA8B(regXD dst, memory mem) %{
match(Set dst (Load8B mem)); match(Set dst (Load8B mem));
ins_cost(125); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed8B" %} format %{ "MOVQ $dst,$mem\t! packed8B" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7002,7 +6461,9 @@ instruct loadA4S(regXD dst, memory mem) %{ ...@@ -7002,7 +6461,9 @@ instruct loadA4S(regXD dst, memory mem) %{
match(Set dst (Load4S mem)); match(Set dst (Load4S mem));
ins_cost(125); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4S" %} format %{ "MOVQ $dst,$mem\t! packed4S" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7012,7 +6473,9 @@ instruct loadA4C(regXD dst, memory mem) %{ ...@@ -7012,7 +6473,9 @@ instruct loadA4C(regXD dst, memory mem) %{
match(Set dst (Load4C mem)); match(Set dst (Load4C mem));
ins_cost(125); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4C" %} format %{ "MOVQ $dst,$mem\t! packed4C" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7022,7 +6485,9 @@ instruct load2IU(regXD dst, memory mem) %{ ...@@ -7022,7 +6485,9 @@ instruct load2IU(regXD dst, memory mem) %{
match(Set dst (Load2I mem)); match(Set dst (Load2I mem));
ins_cost(125); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2I" %} format %{ "MOVQ $dst,$mem\t! packed2I" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7032,7 +6497,9 @@ instruct loadA2F(regXD dst, memory mem) %{ ...@@ -7032,7 +6497,9 @@ instruct loadA2F(regXD dst, memory mem) %{
match(Set dst (Load2F mem)); match(Set dst (Load2F mem));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $dst,$mem\t! packed2F" %} format %{ "MOVQ $dst,$mem\t! packed2F" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7258,7 +6725,9 @@ instruct loadConXD0(regXD dst, immXD0 src) %{ ...@@ -7258,7 +6725,9 @@ instruct loadConXD0(regXD dst, immXD0 src) %{
match(Set dst src); match(Set dst src);
ins_cost(100); ins_cost(100);
format %{ "XORPD $dst,$dst\t# double 0.0" %} format %{ "XORPD $dst,$dst\t# double 0.0" %}
ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); ins_encode %{
__ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7560,8 +7029,11 @@ instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) % ...@@ -7560,8 +7029,11 @@ instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %
format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
"MOVSD $tmp,$src\n\t" "MOVSD $tmp,$src\n\t"
"MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
opcode(0x3B); ins_encode %{
ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp)); __ cmpl(rax, $mem$$Address);
__ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
__ movdbl($mem$$Address, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7575,8 +7047,13 @@ instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFla ...@@ -7575,8 +7047,13 @@ instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFla
"MOVD $tmp2,$src.hi\n\t" "MOVD $tmp2,$src.hi\n\t"
"PUNPCKLDQ $tmp,$tmp2\n\t" "PUNPCKLDQ $tmp,$tmp2\n\t"
"MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
opcode(0x3B); ins_encode %{
ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2)); __ cmpl(rax, $mem$$Address);
__ movdl($tmp$$XMMRegister, $src$$Register);
__ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ movdbl($mem$$Address, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7643,7 +7120,9 @@ instruct storeA8B(memory mem, regXD src) %{ ...@@ -7643,7 +7120,9 @@ instruct storeA8B(memory mem, regXD src) %{
match(Set mem (Store8B mem src)); match(Set mem (Store8B mem src));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed8B" %} format %{ "MOVQ $mem,$src\t! packed8B" %}
ins_encode( movq_st(mem, src)); ins_encode %{
__ movq($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7653,7 +7132,9 @@ instruct storeA4C(memory mem, regXD src) %{ ...@@ -7653,7 +7132,9 @@ instruct storeA4C(memory mem, regXD src) %{
match(Set mem (Store4C mem src)); match(Set mem (Store4C mem src));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed4C" %} format %{ "MOVQ $mem,$src\t! packed4C" %}
ins_encode( movq_st(mem, src)); ins_encode %{
__ movq($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7663,7 +7144,9 @@ instruct storeA2I(memory mem, regXD src) %{ ...@@ -7663,7 +7144,9 @@ instruct storeA2I(memory mem, regXD src) %{
match(Set mem (Store2I mem src)); match(Set mem (Store2I mem src));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2I" %} format %{ "MOVQ $mem,$src\t! packed2I" %}
ins_encode( movq_st(mem, src)); ins_encode %{
__ movq($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7709,7 +7192,9 @@ instruct storeXD(memory mem, regXD src) %{ ...@@ -7709,7 +7192,9 @@ instruct storeXD(memory mem, regXD src) %{
match(Set mem (StoreD mem src)); match(Set mem (StoreD mem src));
ins_cost(95); ins_cost(95);
format %{ "MOVSD $mem,$src" %} format %{ "MOVSD $mem,$src" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); ins_encode %{
__ movdbl($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7720,7 +7205,9 @@ instruct storeX(memory mem, regX src) %{ ...@@ -7720,7 +7205,9 @@ instruct storeX(memory mem, regX src) %{
match(Set mem (StoreF mem src)); match(Set mem (StoreF mem src));
ins_cost(95); ins_cost(95);
format %{ "MOVSS $mem,$src" %} format %{ "MOVSS $mem,$src" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); ins_encode %{
__ movflt($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7730,7 +7217,9 @@ instruct storeA2F(memory mem, regXD src) %{ ...@@ -7730,7 +7217,9 @@ instruct storeA2F(memory mem, regXD src) %{
match(Set mem (Store2F mem src)); match(Set mem (Store2F mem src));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2F" %} format %{ "MOVQ $mem,$src\t! packed2F" %}
ins_encode( movq_st(mem, src)); ins_encode %{
__ movq($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -8440,7 +7929,7 @@ instruct loadPLocked(eRegP dst, memory mem) %{ ...@@ -8440,7 +7929,7 @@ instruct loadPLocked(eRegP dst, memory mem) %{
%} %}
// LoadLong-locked - same as a volatile long load when used with compare-swap // LoadLong-locked - same as a volatile long load when used with compare-swap
instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ instruct loadLLocked(stackSlotL dst, memory mem) %{
predicate(UseSSE<=1); predicate(UseSSE<=1);
match(Set dst (LoadLLocked mem)); match(Set dst (LoadLLocked mem));
...@@ -8451,18 +7940,21 @@ instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ ...@@ -8451,18 +7940,21 @@ instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
ins_pipe( fpu_reg_mem ); ins_pipe( fpu_reg_mem );
%} %}
instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{ instruct loadLX_Locked(stackSlotL dst, memory mem, regXD tmp) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (LoadLLocked mem)); match(Set dst (LoadLLocked mem));
effect(TEMP tmp); effect(TEMP tmp);
ins_cost(180); ins_cost(180);
format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
"MOVSD $dst,$tmp" %} "MOVSD $dst,$tmp" %}
ins_encode(enc_loadLX_volatile(mem, dst, tmp)); ins_encode %{
__ movdbl($tmp$$XMMRegister, $mem$$Address);
__ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ instruct loadLX_reg_Locked(eRegL dst, memory mem, regXD tmp) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (LoadLLocked mem)); match(Set dst (LoadLLocked mem));
effect(TEMP tmp); effect(TEMP tmp);
...@@ -8471,7 +7963,12 @@ instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ ...@@ -8471,7 +7963,12 @@ instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
"MOVD $dst.lo,$tmp\n\t" "MOVD $dst.lo,$tmp\n\t"
"PSRLQ $tmp,32\n\t" "PSRLQ $tmp,32\n\t"
"MOVD $dst.hi,$tmp" %} "MOVD $dst.hi,$tmp" %}
ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); ins_encode %{
__ movdbl($tmp$$XMMRegister, $mem$$Address);
__ movdl($dst$$Register, $tmp$$XMMRegister);
__ psrlq($tmp$$XMMRegister, 32);
__ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10133,98 +9630,100 @@ instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ ...@@ -10133,98 +9630,100 @@ instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
%} %}
// float compare and set condition codes in EFLAGS by XMM regs // float compare and set condition codes in EFLAGS by XMM regs
instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{ instruct cmpXD_cc(eFlagsRegU cr, regXD src1, regXD src2) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set cr (CmpD dst src)); match(Set cr (CmpD src1 src2));
effect(KILL rax); ins_cost(145);
ins_cost(125); format %{ "UCOMISD $src1,$src2\n\t"
format %{ "COMISD $dst,$src\n" "JNP,s exit\n\t"
"\tJNP exit\n" "PUSHF\t# saw NaN, set CF\n\t"
"\tMOV ah,1 // saw a NaN, set CF\n" "AND [rsp], #0xffffff2b\n\t"
"\tSAHF\n" "POPF\n"
"exit:\tNOP // avoid branch to branch" %} "exit:" %}
opcode(0x66, 0x0F, 0x2F); ins_encode %{
ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup); __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
emit_cmpfp_fixup(_masm);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{ instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD src1, regXD src2) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set cr (CmpD dst src)); match(Set cr (CmpD src1 src2));
ins_cost(100); ins_cost(100);
format %{ "COMISD $dst,$src" %} format %{ "UCOMISD $src1,$src2" %}
opcode(0x66, 0x0F, 0x2F); ins_encode %{
ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
// float compare and set condition codes in EFLAGS by XMM regs // float compare and set condition codes in EFLAGS by XMM regs
instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{ instruct cmpXD_ccmem(eFlagsRegU cr, regXD src1, memory src2) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set cr (CmpD dst (LoadD src))); match(Set cr (CmpD src1 (LoadD src2)));
effect(KILL rax);
ins_cost(145); ins_cost(145);
format %{ "COMISD $dst,$src\n" format %{ "UCOMISD $src1,$src2\n\t"
"\tJNP exit\n" "JNP,s exit\n\t"
"\tMOV ah,1 // saw a NaN, set CF\n" "PUSHF\t# saw NaN, set CF\n\t"
"\tSAHF\n" "AND [rsp], #0xffffff2b\n\t"
"exit:\tNOP // avoid branch to branch" %} "POPF\n"
opcode(0x66, 0x0F, 0x2F); "exit:" %}
ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup); ins_encode %{
__ ucomisd($src1$$XMMRegister, $src2$$Address);
emit_cmpfp_fixup(_masm);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{ instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD src1, memory src2) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set cr (CmpD dst (LoadD src))); match(Set cr (CmpD src1 (LoadD src2)));
ins_cost(100); ins_cost(100);
format %{ "COMISD $dst,$src" %} format %{ "UCOMISD $src1,$src2" %}
opcode(0x66, 0x0F, 0x2F); ins_encode %{
ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src)); __ ucomisd($src1$$XMMRegister, $src2$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
// Compare into -1,0,1 in XMM // Compare into -1,0,1 in XMM
instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ instruct cmpXD_reg(xRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (CmpD3 src1 src2)); match(Set dst (CmpD3 src1 src2));
effect(KILL cr); effect(KILL cr);
ins_cost(255); ins_cost(255);
format %{ "XOR $dst,$dst\n" format %{ "UCOMISD $src1, $src2\n\t"
"\tCOMISD $src1,$src2\n" "MOV $dst, #-1\n\t"
"\tJP,s nan\n" "JP,s done\n\t"
"\tJEQ,s exit\n" "JB,s done\n\t"
"\tJA,s inc\n" "SETNE $dst\n\t"
"nan:\tDEC $dst\n" "MOVZB $dst, $dst\n"
"\tJMP,s exit\n" "done:" %}
"inc:\tINC $dst\n" ins_encode %{
"exit:" __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
emit_cmpfp3(_masm, $dst$$Register);
%} %}
opcode(0x66, 0x0F, 0x2F);
ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
CmpX_Result(dst));
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
// Compare into -1,0,1 in XMM and memory // Compare into -1,0,1 in XMM and memory
instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{ instruct cmpXD_regmem(xRegI dst, regXD src1, memory src2, eFlagsReg cr) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (CmpD3 src1 (LoadD mem))); match(Set dst (CmpD3 src1 (LoadD src2)));
effect(KILL cr); effect(KILL cr);
ins_cost(275); ins_cost(275);
format %{ "COMISD $src1,$mem\n" format %{ "UCOMISD $src1, $src2\n\t"
"\tMOV $dst,0\t\t# do not blow flags\n" "MOV $dst, #-1\n\t"
"\tJP,s nan\n" "JP,s done\n\t"
"\tJEQ,s exit\n" "JB,s done\n\t"
"\tJA,s inc\n" "SETNE $dst\n\t"
"nan:\tDEC $dst\n" "MOVZB $dst, $dst\n"
"\tJMP,s exit\n" "done:" %}
"inc:\tINC $dst\n" ins_encode %{
"exit:" __ ucomisd($src1$$XMMRegister, $src2$$Address);
emit_cmpfp3(_masm, $dst$$Register);
%} %}
opcode(0x66, 0x0F, 0x2F);
ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
LdImmI(dst,0x0), CmpX_Result(dst));
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10283,8 +9782,12 @@ instruct absD_reg(regDPR1 dst, regDPR1 src) %{ ...@@ -10283,8 +9782,12 @@ instruct absD_reg(regDPR1 dst, regDPR1 src) %{
instruct absXD_reg( regXD dst ) %{ instruct absXD_reg( regXD dst ) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (AbsD dst)); match(Set dst (AbsD dst));
ins_cost(150);
format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %} format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
ins_encode( AbsXD_encoding(dst)); ins_encode %{
__ andpd($dst$$XMMRegister,
ExternalAddress((address)double_signmask_pool));
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10301,6 +9804,7 @@ instruct negD_reg(regDPR1 dst, regDPR1 src) %{ ...@@ -10301,6 +9804,7 @@ instruct negD_reg(regDPR1 dst, regDPR1 src) %{
instruct negXD_reg( regXD dst ) %{ instruct negXD_reg( regXD dst ) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (NegD dst)); match(Set dst (NegD dst));
ins_cost(150);
format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %} format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
ins_encode %{ ins_encode %{
__ xorpd($dst$$XMMRegister, __ xorpd($dst$$XMMRegister,
...@@ -10414,7 +9918,9 @@ instruct addXD_reg(regXD dst, regXD src) %{ ...@@ -10414,7 +9918,9 @@ instruct addXD_reg(regXD dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (AddD dst src)); match(Set dst (AddD dst src));
format %{ "ADDSD $dst,$src" %} format %{ "ADDSD $dst,$src" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); ins_encode %{
__ addsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10432,7 +9938,9 @@ instruct addXD_mem(regXD dst, memory mem) %{ ...@@ -10432,7 +9938,9 @@ instruct addXD_mem(regXD dst, memory mem) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (AddD dst (LoadD mem))); match(Set dst (AddD dst (LoadD mem)));
format %{ "ADDSD $dst,$mem" %} format %{ "ADDSD $dst,$mem" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem)); ins_encode %{
__ addsd($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10440,14 +9948,18 @@ instruct addXD_mem(regXD dst, memory mem) %{ ...@@ -10440,14 +9948,18 @@ instruct addXD_mem(regXD dst, memory mem) %{
instruct subXD_reg(regXD dst, regXD src) %{ instruct subXD_reg(regXD dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (SubD dst src)); match(Set dst (SubD dst src));
ins_cost(150);
format %{ "SUBSD $dst,$src" %} format %{ "SUBSD $dst,$src" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); ins_encode %{
__ subsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct subXD_imm(regXD dst, immXD con) %{ instruct subXD_imm(regXD dst, immXD con) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (SubD dst con)); match(Set dst (SubD dst con));
ins_cost(150);
format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
ins_encode %{ ins_encode %{
__ subsd($dst$$XMMRegister, $constantaddress($con)); __ subsd($dst$$XMMRegister, $constantaddress($con));
...@@ -10458,8 +9970,11 @@ instruct subXD_imm(regXD dst, immXD con) %{ ...@@ -10458,8 +9970,11 @@ instruct subXD_imm(regXD dst, immXD con) %{
instruct subXD_mem(regXD dst, memory mem) %{ instruct subXD_mem(regXD dst, memory mem) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (SubD dst (LoadD mem))); match(Set dst (SubD dst (LoadD mem)));
ins_cost(150);
format %{ "SUBSD $dst,$mem" %} format %{ "SUBSD $dst,$mem" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); ins_encode %{
__ subsd($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10468,7 +9983,9 @@ instruct mulXD_reg(regXD dst, regXD src) %{ ...@@ -10468,7 +9983,9 @@ instruct mulXD_reg(regXD dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (MulD dst src)); match(Set dst (MulD dst src));
format %{ "MULSD $dst,$src" %} format %{ "MULSD $dst,$src" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); ins_encode %{
__ mulsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10486,7 +10003,9 @@ instruct mulXD_mem(regXD dst, memory mem) %{ ...@@ -10486,7 +10003,9 @@ instruct mulXD_mem(regXD dst, memory mem) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (MulD dst (LoadD mem))); match(Set dst (MulD dst (LoadD mem)));
format %{ "MULSD $dst,$mem" %} format %{ "MULSD $dst,$mem" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); ins_encode %{
__ mulsd($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10496,7 +10015,9 @@ instruct divXD_reg(regXD dst, regXD src) %{ ...@@ -10496,7 +10015,9 @@ instruct divXD_reg(regXD dst, regXD src) %{
match(Set dst (DivD dst src)); match(Set dst (DivD dst src));
format %{ "DIVSD $dst,$src" %} format %{ "DIVSD $dst,$src" %}
opcode(0xF2, 0x0F, 0x5E); opcode(0xF2, 0x0F, 0x5E);
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); ins_encode %{
__ divsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -10514,7 +10035,9 @@ instruct divXD_mem(regXD dst, memory mem) %{ ...@@ -10514,7 +10035,9 @@ instruct divXD_mem(regXD dst, memory mem) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (DivD dst (LoadD mem))); match(Set dst (DivD dst (LoadD mem)));
format %{ "DIVSD $dst,$mem" %} format %{ "DIVSD $dst,$mem" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); ins_encode %{
__ divsd($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11146,96 +10669,100 @@ instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ ...@@ -11146,96 +10669,100 @@ instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
%} %}
// float compare and set condition codes in EFLAGS by XMM regs // float compare and set condition codes in EFLAGS by XMM regs
instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{ instruct cmpX_cc(eFlagsRegU cr, regX src1, regX src2) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set cr (CmpF dst src)); match(Set cr (CmpF src1 src2));
effect(KILL rax);
ins_cost(145); ins_cost(145);
format %{ "COMISS $dst,$src\n" format %{ "UCOMISS $src1,$src2\n\t"
"\tJNP exit\n" "JNP,s exit\n\t"
"\tMOV ah,1 // saw a NaN, set CF\n" "PUSHF\t# saw NaN, set CF\n\t"
"\tSAHF\n" "AND [rsp], #0xffffff2b\n\t"
"exit:\tNOP // avoid branch to branch" %} "POPF\n"
opcode(0x0F, 0x2F); "exit:" %}
ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup); ins_encode %{
__ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
emit_cmpfp_fixup(_masm);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{ instruct cmpX_ccCF(eFlagsRegUCF cr, regX src1, regX src2) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set cr (CmpF dst src)); match(Set cr (CmpF src1 src2));
ins_cost(100); ins_cost(100);
format %{ "COMISS $dst,$src" %} format %{ "UCOMISS $src1,$src2" %}
opcode(0x0F, 0x2F); ins_encode %{
ins_encode(OpcP, OpcS, RegReg(dst, src)); __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
// float compare and set condition codes in EFLAGS by XMM regs // float compare and set condition codes in EFLAGS by XMM regs
instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{ instruct cmpX_ccmem(eFlagsRegU cr, regX src1, memory src2) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set cr (CmpF dst (LoadF src))); match(Set cr (CmpF src1 (LoadF src2)));
effect(KILL rax);
ins_cost(165); ins_cost(165);
format %{ "COMISS $dst,$src\n" format %{ "UCOMISS $src1,$src2\n\t"
"\tJNP exit\n" "JNP,s exit\n\t"
"\tMOV ah,1 // saw a NaN, set CF\n" "PUSHF\t# saw NaN, set CF\n\t"
"\tSAHF\n" "AND [rsp], #0xffffff2b\n\t"
"exit:\tNOP // avoid branch to branch" %} "POPF\n"
opcode(0x0F, 0x2F); "exit:" %}
ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup); ins_encode %{
__ ucomiss($src1$$XMMRegister, $src2$$Address);
emit_cmpfp_fixup(_masm);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{ instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX src1, memory src2) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set cr (CmpF dst (LoadF src))); match(Set cr (CmpF src1 (LoadF src2)));
ins_cost(100); ins_cost(100);
format %{ "COMISS $dst,$src" %} format %{ "UCOMISS $src1,$src2" %}
opcode(0x0F, 0x2F); ins_encode %{
ins_encode(OpcP, OpcS, RegMem(dst, src)); __ ucomiss($src1$$XMMRegister, $src2$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
// Compare into -1,0,1 in XMM // Compare into -1,0,1 in XMM
instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{ instruct cmpX_reg(xRegI dst, regX src1, regX src2, eFlagsReg cr) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (CmpF3 src1 src2)); match(Set dst (CmpF3 src1 src2));
effect(KILL cr); effect(KILL cr);
ins_cost(255); ins_cost(255);
format %{ "XOR $dst,$dst\n" format %{ "UCOMISS $src1, $src2\n\t"
"\tCOMISS $src1,$src2\n" "MOV $dst, #-1\n\t"
"\tJP,s nan\n" "JP,s done\n\t"
"\tJEQ,s exit\n" "JB,s done\n\t"
"\tJA,s inc\n" "SETNE $dst\n\t"
"nan:\tDEC $dst\n" "MOVZB $dst, $dst\n"
"\tJMP,s exit\n" "done:" %}
"inc:\tINC $dst\n" ins_encode %{
"exit:" __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
emit_cmpfp3(_masm, $dst$$Register);
%} %}
opcode(0x0F, 0x2F);
ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
// Compare into -1,0,1 in XMM and memory // Compare into -1,0,1 in XMM and memory
instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{ instruct cmpX_regmem(xRegI dst, regX src1, memory src2, eFlagsReg cr) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (CmpF3 src1 (LoadF mem))); match(Set dst (CmpF3 src1 (LoadF src2)));
effect(KILL cr); effect(KILL cr);
ins_cost(275); ins_cost(275);
format %{ "COMISS $src1,$mem\n" format %{ "UCOMISS $src1, $src2\n\t"
"\tMOV $dst,0\t\t# do not blow flags\n" "MOV $dst, #-1\n\t"
"\tJP,s nan\n" "JP,s done\n\t"
"\tJEQ,s exit\n" "JB,s done\n\t"
"\tJA,s inc\n" "SETNE $dst\n\t"
"nan:\tDEC $dst\n" "MOVZB $dst, $dst\n"
"\tJMP,s exit\n" "done:" %}
"inc:\tINC $dst\n" ins_encode %{
"exit:" __ ucomiss($src1$$XMMRegister, $src2$$Address);
emit_cmpfp3(_masm, $dst$$Register);
%} %}
opcode(0x0F, 0x2F);
ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11295,7 +10822,9 @@ instruct addX_reg(regX dst, regX src) %{ ...@@ -11295,7 +10822,9 @@ instruct addX_reg(regX dst, regX src) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (AddF dst src)); match(Set dst (AddF dst src));
format %{ "ADDSS $dst,$src" %} format %{ "ADDSS $dst,$src" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); ins_encode %{
__ addss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11313,7 +10842,9 @@ instruct addX_mem(regX dst, memory mem) %{ ...@@ -11313,7 +10842,9 @@ instruct addX_mem(regX dst, memory mem) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (AddF dst (LoadF mem))); match(Set dst (AddF dst (LoadF mem)));
format %{ "ADDSS $dst,$mem" %} format %{ "ADDSS $dst,$mem" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem)); ins_encode %{
__ addss($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11321,14 +10852,18 @@ instruct addX_mem(regX dst, memory mem) %{ ...@@ -11321,14 +10852,18 @@ instruct addX_mem(regX dst, memory mem) %{
instruct subX_reg(regX dst, regX src) %{ instruct subX_reg(regX dst, regX src) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (SubF dst src)); match(Set dst (SubF dst src));
ins_cost(150);
format %{ "SUBSS $dst,$src" %} format %{ "SUBSS $dst,$src" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); ins_encode %{
__ subss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct subX_imm(regX dst, immXF con) %{ instruct subX_imm(regX dst, immXF con) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (SubF dst con)); match(Set dst (SubF dst con));
ins_cost(150);
format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
ins_encode %{ ins_encode %{
__ subss($dst$$XMMRegister, $constantaddress($con)); __ subss($dst$$XMMRegister, $constantaddress($con));
...@@ -11339,8 +10874,11 @@ instruct subX_imm(regX dst, immXF con) %{ ...@@ -11339,8 +10874,11 @@ instruct subX_imm(regX dst, immXF con) %{
instruct subX_mem(regX dst, memory mem) %{ instruct subX_mem(regX dst, memory mem) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (SubF dst (LoadF mem))); match(Set dst (SubF dst (LoadF mem)));
ins_cost(150);
format %{ "SUBSS $dst,$mem" %} format %{ "SUBSS $dst,$mem" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); ins_encode %{
__ subss($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11349,7 +10887,9 @@ instruct mulX_reg(regX dst, regX src) %{ ...@@ -11349,7 +10887,9 @@ instruct mulX_reg(regX dst, regX src) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (MulF dst src)); match(Set dst (MulF dst src));
format %{ "MULSS $dst,$src" %} format %{ "MULSS $dst,$src" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); ins_encode %{
__ mulss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11367,7 +10907,9 @@ instruct mulX_mem(regX dst, memory mem) %{ ...@@ -11367,7 +10907,9 @@ instruct mulX_mem(regX dst, memory mem) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (MulF dst (LoadF mem))); match(Set dst (MulF dst (LoadF mem)));
format %{ "MULSS $dst,$mem" %} format %{ "MULSS $dst,$mem" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); ins_encode %{
__ mulss($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11376,7 +10918,9 @@ instruct divX_reg(regX dst, regX src) %{ ...@@ -11376,7 +10918,9 @@ instruct divX_reg(regX dst, regX src) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (DivF dst src)); match(Set dst (DivF dst src));
format %{ "DIVSS $dst,$src" %} format %{ "DIVSS $dst,$src" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); ins_encode %{
__ divss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11394,7 +10938,9 @@ instruct divX_mem(regX dst, memory mem) %{ ...@@ -11394,7 +10938,9 @@ instruct divX_mem(regX dst, memory mem) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (DivF dst (LoadF mem))); match(Set dst (DivF dst (LoadF mem)));
format %{ "DIVSS $dst,$mem" %} format %{ "DIVSS $dst,$mem" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); ins_encode %{
__ divss($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11402,16 +10948,22 @@ instruct divX_mem(regX dst, memory mem) %{ ...@@ -11402,16 +10948,22 @@ instruct divX_mem(regX dst, memory mem) %{
instruct sqrtX_reg(regX dst, regX src) %{ instruct sqrtX_reg(regX dst, regX src) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
ins_cost(150);
format %{ "SQRTSS $dst,$src" %} format %{ "SQRTSS $dst,$src" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); ins_encode %{
__ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct sqrtX_mem(regX dst, memory mem) %{ instruct sqrtX_mem(regX dst, memory mem) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem))))); match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
ins_cost(150);
format %{ "SQRTSS $dst,$mem" %} format %{ "SQRTSS $dst,$mem" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); ins_encode %{
__ sqrtss($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11419,16 +10971,22 @@ instruct sqrtX_mem(regX dst, memory mem) %{ ...@@ -11419,16 +10971,22 @@ instruct sqrtX_mem(regX dst, memory mem) %{
instruct sqrtXD_reg(regXD dst, regXD src) %{ instruct sqrtXD_reg(regXD dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (SqrtD src)); match(Set dst (SqrtD src));
ins_cost(150);
format %{ "SQRTSD $dst,$src" %} format %{ "SQRTSD $dst,$src" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); ins_encode %{
__ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct sqrtXD_mem(regXD dst, memory mem) %{ instruct sqrtXD_mem(regXD dst, memory mem) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (SqrtD (LoadD mem))); match(Set dst (SqrtD (LoadD mem)));
ins_cost(150);
format %{ "SQRTSD $dst,$mem" %} format %{ "SQRTSD $dst,$mem" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); ins_encode %{
__ sqrtsd($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11445,8 +11003,12 @@ instruct absF_reg(regFPR1 dst, regFPR1 src) %{ ...@@ -11445,8 +11003,12 @@ instruct absF_reg(regFPR1 dst, regFPR1 src) %{
instruct absX_reg(regX dst ) %{ instruct absX_reg(regX dst ) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (AbsF dst)); match(Set dst (AbsF dst));
ins_cost(150);
format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %} format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
ins_encode( AbsXF_encoding(dst)); ins_encode %{
__ andps($dst$$XMMRegister,
ExternalAddress((address)float_signmask_pool));
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11463,8 +11025,12 @@ instruct negF_reg(regFPR1 dst, regFPR1 src) %{ ...@@ -11463,8 +11025,12 @@ instruct negF_reg(regFPR1 dst, regFPR1 src) %{
instruct negX_reg( regX dst ) %{ instruct negX_reg( regX dst ) %{
predicate(UseSSE>=1); predicate(UseSSE>=1);
match(Set dst (NegF dst)); match(Set dst (NegF dst));
ins_cost(150);
format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %} format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
ins_encode( NegXF_encoding(dst)); ins_encode %{
__ xorps($dst$$XMMRegister,
ExternalAddress((address)float_signflip_pool));
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11870,7 +11436,17 @@ instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ ...@@ -11870,7 +11436,17 @@ instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
"FST_S [ESP],$src\t# F-round\n\t" "FST_S [ESP],$src\t# F-round\n\t"
"MOVSS $dst,[ESP]\n\t" "MOVSS $dst,[ESP]\n\t"
"ADD ESP,4" %} "ADD ESP,4" %}
ins_encode( D2X_encoding(dst, src) ); ins_encode %{
__ subptr(rsp, 4);
if ($src$$reg != FPR1L_enc) {
__ fld_s($src$$reg-1);
__ fstp_s(Address(rsp, 0));
} else {
__ fst_s(Address(rsp, 0));
}
__ movflt($dst$$XMMRegister, Address(rsp, 0));
__ addptr(rsp, 4);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11879,8 +11455,9 @@ instruct convXD2X_reg(regX dst, regXD src) %{ ...@@ -11879,8 +11455,9 @@ instruct convXD2X_reg(regX dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (ConvD2F src)); match(Set dst (ConvD2F src));
format %{ "CVTSD2SS $dst,$src\t# F-round" %} format %{ "CVTSD2SS $dst,$src\t# F-round" %}
opcode(0xF2, 0x0F, 0x5A); ins_encode %{
ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11910,7 +11487,13 @@ instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ ...@@ -11910,7 +11487,13 @@ instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
"FLD_S [ESP]\n\t" "FLD_S [ESP]\n\t"
"ADD ESP,4\n\t" "ADD ESP,4\n\t"
"FSTP $dst\t# D-round" %} "FSTP $dst\t# D-round" %}
ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst)); ins_encode %{
__ subptr(rsp, 4);
__ movflt(Address(rsp, 0), $src$$XMMRegister);
__ fld_s(Address(rsp, 0));
__ addptr(rsp, 4);
__ fstp_d($dst$$reg);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11918,8 +11501,9 @@ instruct convX2XD_reg(regXD dst, regX src) %{ ...@@ -11918,8 +11501,9 @@ instruct convX2XD_reg(regXD dst, regX src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (ConvF2D src)); match(Set dst (ConvF2D src));
format %{ "CVTSS2SD $dst,$src\t# D-round" %} format %{ "CVTSS2SD $dst,$src\t# D-round" %}
opcode(0xF3, 0x0F, 0x5A); ins_encode %{
ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11957,8 +11541,18 @@ instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) % ...@@ -11957,8 +11541,18 @@ instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %
"ADD ESP, 8\n\t" "ADD ESP, 8\n\t"
"CALL d2i_wrapper\n" "CALL d2i_wrapper\n"
"fast:" %} "fast:" %}
opcode(0x1); // double-precision conversion ins_encode %{
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); Label fast;
__ cvttsd2sil($dst$$Register, $src$$XMMRegister);
__ cmpl($dst$$Register, 0x80000000);
__ jccb(Assembler::notEqual, fast);
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ addptr(rsp, 8);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
__ bind(fast);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12004,9 +11598,36 @@ instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ ...@@ -12004,9 +11598,36 @@ instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
"SUB ESP,8\n\t" "SUB ESP,8\n\t"
"MOVSD [ESP],$src\n\t" "MOVSD [ESP],$src\n\t"
"FLD_D [ESP]\n\t" "FLD_D [ESP]\n\t"
"ADD ESP,8\n\t"
"CALL d2l_wrapper\n" "CALL d2l_wrapper\n"
"fast:" %} "fast:" %}
ins_encode( XD2L_encoding(src) ); ins_encode %{
Label fast;
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
__ fistp_d(Address(rsp, 0));
// Restore the rounding mode, mask the exception
if (Compile::current()->in_24_bit_fp_mode()) {
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
} else {
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
}
// Load the converted long, adjust CPU stack
__ pop(rax);
__ pop(rdx);
__ cmpl(rdx, 0x80000000);
__ jccb(Assembler::notEqual, fast);
__ testl(rax, rax);
__ jccb(Assembler::notEqual, fast);
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ addptr(rsp, 8);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
__ bind(fast);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12050,8 +11671,18 @@ instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ ...@@ -12050,8 +11671,18 @@ instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
"ADD ESP, 4\n\t" "ADD ESP, 4\n\t"
"CALL d2i_wrapper\n" "CALL d2i_wrapper\n"
"fast:" %} "fast:" %}
opcode(0x0); // single-precision conversion ins_encode %{
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); Label fast;
__ cvttss2sil($dst$$Register, $src$$XMMRegister);
__ cmpl($dst$$Register, 0x80000000);
__ jccb(Assembler::notEqual, fast);
__ subptr(rsp, 4);
__ movflt(Address(rsp, 0), $src$$XMMRegister);
__ fld_s(Address(rsp, 0));
__ addptr(rsp, 4);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
__ bind(fast);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12101,7 +11732,33 @@ instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ ...@@ -12101,7 +11732,33 @@ instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
"ADD ESP,4\n\t" "ADD ESP,4\n\t"
"CALL d2l_wrapper\n" "CALL d2l_wrapper\n"
"fast:" %} "fast:" %}
ins_encode( X2L_encoding(src) ); ins_encode %{
Label fast;
__ subptr(rsp, 8);
__ movflt(Address(rsp, 0), $src$$XMMRegister);
__ fld_s(Address(rsp, 0));
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
__ fistp_d(Address(rsp, 0));
// Restore the rounding mode, mask the exception
if (Compile::current()->in_24_bit_fp_mode()) {
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
} else {
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
}
// Load the converted long, adjust CPU stack
__ pop(rax);
__ pop(rdx);
__ cmpl(rdx, 0x80000000);
__ jccb(Assembler::notEqual, fast);
__ testl(rax, rax);
__ jccb(Assembler::notEqual, fast);
__ subptr(rsp, 4);
__ movflt(Address(rsp, 0), $src$$XMMRegister);
__ fld_s(Address(rsp, 0));
__ addptr(rsp, 4);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
__ bind(fast);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12119,8 +11776,9 @@ instruct convI2XD_reg(regXD dst, eRegI src) %{ ...@@ -12119,8 +11776,9 @@ instruct convI2XD_reg(regXD dst, eRegI src) %{
predicate( UseSSE>=2 && !UseXmmI2D ); predicate( UseSSE>=2 && !UseXmmI2D );
match(Set dst (ConvI2D src)); match(Set dst (ConvI2D src));
format %{ "CVTSI2SD $dst,$src" %} format %{ "CVTSI2SD $dst,$src" %}
opcode(0xF2, 0x0F, 0x2A); ins_encode %{
ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12128,8 +11786,9 @@ instruct convI2XD_mem(regXD dst, memory mem) %{ ...@@ -12128,8 +11786,9 @@ instruct convI2XD_mem(regXD dst, memory mem) %{
predicate( UseSSE>=2 ); predicate( UseSSE>=2 );
match(Set dst (ConvI2D (LoadI mem))); match(Set dst (ConvI2D (LoadI mem)));
format %{ "CVTSI2SD $dst,$mem" %} format %{ "CVTSI2SD $dst,$mem" %}
opcode(0xF2, 0x0F, 0x2A); ins_encode %{
ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem)); __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12225,9 +11884,9 @@ instruct convI2X_reg(regX dst, eRegI src) %{ ...@@ -12225,9 +11884,9 @@ instruct convI2X_reg(regX dst, eRegI src) %{
predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
match(Set dst (ConvI2F src)); match(Set dst (ConvI2F src));
format %{ "CVTSI2SS $dst, $src" %} format %{ "CVTSI2SS $dst, $src" %}
ins_encode %{
opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12351,8 +12010,9 @@ instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ ...@@ -12351,8 +12010,9 @@ instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
effect( DEF dst, USE src ); effect( DEF dst, USE src );
ins_cost(100); ins_cost(100);
format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
opcode(0x8B); ins_encode %{
ins_encode( OpcP, RegMem(dst,src)); __ movl($dst$$Register, Address(rsp, $src$$disp));
%}
ins_pipe( ialu_reg_mem ); ins_pipe( ialu_reg_mem );
%} %}
...@@ -12374,7 +12034,9 @@ instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ ...@@ -12374,7 +12034,9 @@ instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
ins_cost(95); ins_cost(95);
format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst)); ins_encode %{
__ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12384,7 +12046,9 @@ instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ ...@@ -12384,7 +12046,9 @@ instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
effect( DEF dst, USE src ); effect( DEF dst, USE src );
ins_cost(85); ins_cost(85);
format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
ins_encode( MovX2I_reg(dst, src)); ins_encode %{
__ movdl($dst$$Register, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12394,8 +12058,9 @@ instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ ...@@ -12394,8 +12058,9 @@ instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
ins_cost(100); ins_cost(100);
format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
opcode(0x89); ins_encode %{
ins_encode( OpcPRegSS( dst, src ) ); __ movl(Address(rsp, $dst$$disp), $src$$Register);
%}
ins_pipe( ialu_mem_reg ); ins_pipe( ialu_mem_reg );
%} %}
...@@ -12421,7 +12086,9 @@ instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ ...@@ -12421,7 +12086,9 @@ instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
ins_cost(95); ins_cost(95);
format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); ins_encode %{
__ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12432,7 +12099,9 @@ instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ ...@@ -12432,7 +12099,9 @@ instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
ins_cost(85); ins_cost(85);
format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
ins_encode( MovI2X_reg(dst, src) ); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12464,9 +12133,10 @@ instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ ...@@ -12464,9 +12133,10 @@ instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
match(Set dst (MoveD2L src)); match(Set dst (MoveD2L src));
effect(DEF dst, USE src); effect(DEF dst, USE src);
ins_cost(95); ins_cost(95);
format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst)); ins_encode %{
__ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12478,7 +12148,11 @@ instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ ...@@ -12478,7 +12148,11 @@ instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
format %{ "MOVD $dst.lo,$src\n\t" format %{ "MOVD $dst.lo,$src\n\t"
"PSHUFLW $tmp,$src,0x4E\n\t" "PSHUFLW $tmp,$src,0x4E\n\t"
"MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
ins_encode( MovXD2L_reg(dst, src, tmp) ); ins_encode %{
__ movdl($dst$$Register, $src$$XMMRegister);
__ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
__ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12517,7 +12191,9 @@ instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ ...@@ -12517,7 +12191,9 @@ instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
ins_cost(95); ins_cost(95);
format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); ins_encode %{
__ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12528,7 +12204,9 @@ instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ ...@@ -12528,7 +12204,9 @@ instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
ins_cost(95); ins_cost(95);
format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src)); ins_encode %{
__ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12540,7 +12218,11 @@ instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ ...@@ -12540,7 +12218,11 @@ instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
format %{ "MOVD $dst,$src.lo\n\t" format %{ "MOVD $dst,$src.lo\n\t"
"MOVD $tmp,$src.hi\n\t" "MOVD $tmp,$src.hi\n\t"
"PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
ins_encode( MovL2XD_reg(dst, src, tmp) ); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12551,7 +12233,13 @@ instruct Repl8B_reg(regXD dst, regXD src) %{ ...@@ -12551,7 +12233,13 @@ instruct Repl8B_reg(regXD dst, regXD src) %{
format %{ "MOVDQA $dst,$src\n\t" format %{ "MOVDQA $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t" "PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %} "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
ins_encode( pshufd_8x8(dst, src)); ins_encode %{
if ($dst$$reg != $src$$reg) {
__ movdqa($dst$$XMMRegister, $src$$XMMRegister);
}
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12562,7 +12250,11 @@ instruct Repl8B_eRegI(regXD dst, eRegI src) %{ ...@@ -12562,7 +12250,11 @@ instruct Repl8B_eRegI(regXD dst, eRegI src) %{
format %{ "MOVD $dst,$src\n\t" format %{ "MOVD $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t" "PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %} "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -12571,7 +12263,9 @@ instruct Repl8B_immI0(regXD dst, immI0 zero) %{ ...@@ -12571,7 +12263,9 @@ instruct Repl8B_immI0(regXD dst, immI0 zero) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate8B zero)); match(Set dst (Replicate8B zero));
format %{ "PXOR $dst,$dst\t! replicate8B" %} format %{ "PXOR $dst,$dst\t! replicate8B" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12580,7 +12274,9 @@ instruct Repl4S_reg(regXD dst, regXD src) %{ ...@@ -12580,7 +12274,9 @@ instruct Repl4S_reg(regXD dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate4S src)); match(Set dst (Replicate4S src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
ins_encode( pshufd_4x16(dst, src)); ins_encode %{
__ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12590,7 +12286,10 @@ instruct Repl4S_eRegI(regXD dst, eRegI src) %{ ...@@ -12590,7 +12286,10 @@ instruct Repl4S_eRegI(regXD dst, eRegI src) %{
match(Set dst (Replicate4S src)); match(Set dst (Replicate4S src));
format %{ "MOVD $dst,$src\n\t" format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4S" %} "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12599,7 +12298,9 @@ instruct Repl4S_immI0(regXD dst, immI0 zero) %{ ...@@ -12599,7 +12298,9 @@ instruct Repl4S_immI0(regXD dst, immI0 zero) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate4S zero)); match(Set dst (Replicate4S zero));
format %{ "PXOR $dst,$dst\t! replicate4S" %} format %{ "PXOR $dst,$dst\t! replicate4S" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12608,7 +12309,9 @@ instruct Repl4C_reg(regXD dst, regXD src) %{ ...@@ -12608,7 +12309,9 @@ instruct Repl4C_reg(regXD dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate4C src)); match(Set dst (Replicate4C src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
ins_encode( pshufd_4x16(dst, src)); ins_encode %{
__ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12618,7 +12321,10 @@ instruct Repl4C_eRegI(regXD dst, eRegI src) %{ ...@@ -12618,7 +12321,10 @@ instruct Repl4C_eRegI(regXD dst, eRegI src) %{
match(Set dst (Replicate4C src)); match(Set dst (Replicate4C src));
format %{ "MOVD $dst,$src\n\t" format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4C" %} "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12627,7 +12333,9 @@ instruct Repl4C_immI0(regXD dst, immI0 zero) %{ ...@@ -12627,7 +12333,9 @@ instruct Repl4C_immI0(regXD dst, immI0 zero) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate4C zero)); match(Set dst (Replicate4C zero));
format %{ "PXOR $dst,$dst\t! replicate4C" %} format %{ "PXOR $dst,$dst\t! replicate4C" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12636,7 +12344,9 @@ instruct Repl2I_reg(regXD dst, regXD src) %{ ...@@ -12636,7 +12344,9 @@ instruct Repl2I_reg(regXD dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate2I src)); match(Set dst (Replicate2I src));
format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
ins_encode( pshufd(dst, src, 0x00)); ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12646,7 +12356,10 @@ instruct Repl2I_eRegI(regXD dst, eRegI src) %{ ...@@ -12646,7 +12356,10 @@ instruct Repl2I_eRegI(regXD dst, eRegI src) %{
match(Set dst (Replicate2I src)); match(Set dst (Replicate2I src));
format %{ "MOVD $dst,$src\n\t" format %{ "MOVD $dst,$src\n\t"
"PSHUFD $dst,$dst,0x00\t! replicate2I" %} "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12655,7 +12368,9 @@ instruct Repl2I_immI0(regXD dst, immI0 zero) %{ ...@@ -12655,7 +12368,9 @@ instruct Repl2I_immI0(regXD dst, immI0 zero) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate2I zero)); match(Set dst (Replicate2I zero));
format %{ "PXOR $dst,$dst\t! replicate2I" %} format %{ "PXOR $dst,$dst\t! replicate2I" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12664,7 +12379,9 @@ instruct Repl2F_reg(regXD dst, regXD src) %{ ...@@ -12664,7 +12379,9 @@ instruct Repl2F_reg(regXD dst, regXD src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate2F src)); match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
ins_encode( pshufd(dst, src, 0xe0)); ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12673,7 +12390,9 @@ instruct Repl2F_regX(regXD dst, regX src) %{ ...@@ -12673,7 +12390,9 @@ instruct Repl2F_regX(regXD dst, regX src) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate2F src)); match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
ins_encode( pshufd(dst, src, 0xe0)); ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -12682,7 +12401,9 @@ instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ ...@@ -12682,7 +12401,9 @@ instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
predicate(UseSSE>=2); predicate(UseSSE>=2);
match(Set dst (Replicate2F zero)); match(Set dst (Replicate2F zero));
format %{ "PXOR $dst,$dst\t! replicate2F" %} format %{ "PXOR $dst,$dst\t! replicate2F" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
......
...@@ -552,7 +552,7 @@ source %{ ...@@ -552,7 +552,7 @@ source %{
#define __ _masm. #define __ _masm.
static int preserve_SP_size() { static int preserve_SP_size() {
return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) return 3; // rex.w, op, rm(reg/reg)
} }
// !!!!! Special hack to get all types of calls to specify the byte offset // !!!!! Special hack to get all types of calls to specify the byte offset
...@@ -797,48 +797,35 @@ void encode_RegMem(CodeBuffer &cbuf, ...@@ -797,48 +797,35 @@ void encode_RegMem(CodeBuffer &cbuf,
} }
} }
void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
{
if (dstenc != srcenc) {
if (dstenc < 8) {
if (srcenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
srcenc -= 8;
}
} else {
if (srcenc < 8) {
emit_opcode(cbuf, Assembler::REX_R);
} else {
emit_opcode(cbuf, Assembler::REX_RB);
srcenc -= 8;
}
dstenc -= 8;
}
emit_opcode(cbuf, 0x8B);
emit_rm(cbuf, 0x3, dstenc, srcenc);
}
}
void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
if( dst_encoding == src_encoding ) {
// reg-reg copy, use an empty encoding
} else {
MacroAssembler _masm(&cbuf);
__ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
}
}
// This could be in MacroAssembler but it's fairly C2 specific // This could be in MacroAssembler but it's fairly C2 specific
void emit_cmpfp_fixup(MacroAssembler& _masm) { void emit_cmpfp_fixup(MacroAssembler& _masm) {
Label exit; Label exit;
__ jccb(Assembler::noParity, exit); __ jccb(Assembler::noParity, exit);
__ pushf(); __ pushf();
//
// comiss/ucomiss instructions set ZF,PF,CF flags and
// zero OF,AF,SF for NaN values.
// Fixup flags by zeroing ZF,PF so that compare of NaN
// values returns 'less than' result (CF is set).
// Leave the rest of flags unchanged.
//
// 7 6 5 4 3 2 1 0
// |S|Z|r|A|r|P|r|C| (r - reserved bit)
// 0 0 1 0 1 0 1 1 (0x2B)
//
__ andq(Address(rsp, 0), 0xffffff2b); __ andq(Address(rsp, 0), 0xffffff2b);
__ popf(); __ popf();
__ bind(exit); __ bind(exit);
__ nop(); // (target for branch to avoid branch to branch) }
void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
Label done;
__ movl(dst, -1);
__ jcc(Assembler::parity, done);
__ jcc(Assembler::below, done);
__ setb(Assembler::notEqual, dst);
__ movzbl(dst, dst);
__ bind(done);
} }
...@@ -1274,16 +1261,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1274,16 +1261,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
// 64-bit // 64-bit
int offset = ra_->reg2offset(src_first); int offset = ra_->reg2offset(src_first);
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[dst_first] >= 8) { __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
emit_opcode(*cbuf, Assembler::REX_R);
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
encode_RegMem(*cbuf,
Matcher::_regEncode[dst_first],
RSP_enc, 0x4, 0, offset,
false);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("%s %s, [rsp + #%d]\t# spill", st->print("%s %s, [rsp + #%d]\t# spill",
...@@ -1294,25 +1273,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1294,25 +1273,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
} }
return return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
((Matcher::_regEncode[dst_first] < 8) ((Matcher::_regEncode[dst_first] >= 8)
? 5 ? 6
: 6); // REX : (5 + ((UseAVX>0)?1:0))); // REX
} else { } else {
// 32-bit // 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
int offset = ra_->reg2offset(src_first); int offset = ra_->reg2offset(src_first);
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0xF3); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[dst_first] >= 8) { __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
emit_opcode(*cbuf, Assembler::REX_R);
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x10);
encode_RegMem(*cbuf,
Matcher::_regEncode[dst_first],
RSP_enc, 0x4, 0, offset,
false);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movss %s, [rsp + #%d]\t# spill", st->print("movss %s, [rsp + #%d]\t# spill",
...@@ -1322,9 +1293,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1322,9 +1293,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
} }
return return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
((Matcher::_regEncode[dst_first] < 8) ((Matcher::_regEncode[dst_first] >= 8)
? 5 ? 6
: 6); // REX : (5 + ((UseAVX>0)?1:0))); // REX
} }
} }
} else if (src_first_rc == rc_int) { } else if (src_first_rc == rc_int) {
...@@ -1450,25 +1421,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1450,25 +1421,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) { (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit // 64-bit
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0x66); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[dst_first] < 8) { __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
if (Matcher::_regEncode[src_first] < 8) {
emit_opcode(*cbuf, Assembler::REX_W);
} else {
emit_opcode(*cbuf, Assembler::REX_WB);
}
} else {
if (Matcher::_regEncode[src_first] < 8) {
emit_opcode(*cbuf, Assembler::REX_WR);
} else {
emit_opcode(*cbuf, Assembler::REX_WRB);
}
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x6E);
emit_rm(*cbuf, 0x3,
Matcher::_regEncode[dst_first] & 7,
Matcher::_regEncode[src_first] & 7);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movdq %s, %s\t# spill", st->print("movdq %s, %s\t# spill",
...@@ -1482,23 +1436,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1482,23 +1436,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0x66); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[dst_first] < 8) { __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
if (Matcher::_regEncode[src_first] >= 8) {
emit_opcode(*cbuf, Assembler::REX_B);
}
} else {
if (Matcher::_regEncode[src_first] < 8) {
emit_opcode(*cbuf, Assembler::REX_R);
} else {
emit_opcode(*cbuf, Assembler::REX_RB);
}
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x6E);
emit_rm(*cbuf, 0x3,
Matcher::_regEncode[dst_first] & 7,
Matcher::_regEncode[src_first] & 7);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movdl %s, %s\t# spill", st->print("movdl %s, %s\t# spill",
...@@ -1507,9 +1446,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1507,9 +1446,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif #endif
} }
return return
(Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
? 4 ? 5
: 5; // REX : (4 + ((UseAVX>0)?1:0)); // REX
} }
} }
} else if (src_first_rc == rc_float) { } else if (src_first_rc == rc_float) {
...@@ -1521,16 +1460,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1521,16 +1460,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
// 64-bit // 64-bit
int offset = ra_->reg2offset(dst_first); int offset = ra_->reg2offset(dst_first);
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0xF2); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[src_first] >= 8) { __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
emit_opcode(*cbuf, Assembler::REX_R);
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x11);
encode_RegMem(*cbuf,
Matcher::_regEncode[src_first],
RSP_enc, 0x4, 0, offset,
false);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movsd [rsp + #%d], %s\t# spill", st->print("movsd [rsp + #%d], %s\t# spill",
...@@ -1540,25 +1471,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1540,25 +1471,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
} }
return return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
((Matcher::_regEncode[src_first] < 8) ((Matcher::_regEncode[src_first] >= 8)
? 5 ? 6
: 6); // REX : (5 + ((UseAVX>0)?1:0))); // REX
} else { } else {
// 32-bit // 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
int offset = ra_->reg2offset(dst_first); int offset = ra_->reg2offset(dst_first);
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0xF3); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[src_first] >= 8) { __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
emit_opcode(*cbuf, Assembler::REX_R);
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x11);
encode_RegMem(*cbuf,
Matcher::_regEncode[src_first],
RSP_enc, 0x4, 0, offset,
false);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movss [rsp + #%d], %s\t# spill", st->print("movss [rsp + #%d], %s\t# spill",
...@@ -1568,9 +1491,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1568,9 +1491,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
} }
return return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
((Matcher::_regEncode[src_first] < 8) ((Matcher::_regEncode[src_first] >=8)
? 5 ? 6
: 6); // REX : (5 + ((UseAVX>0)?1:0))); // REX
} }
} else if (dst_first_rc == rc_int) { } else if (dst_first_rc == rc_int) {
// xmm -> gpr // xmm -> gpr
...@@ -1578,25 +1501,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1578,25 +1501,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) { (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit // 64-bit
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0x66); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[dst_first] < 8) { __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
if (Matcher::_regEncode[src_first] < 8) {
emit_opcode(*cbuf, Assembler::REX_W);
} else {
emit_opcode(*cbuf, Assembler::REX_WR); // attention!
}
} else {
if (Matcher::_regEncode[src_first] < 8) {
emit_opcode(*cbuf, Assembler::REX_WB); // attention!
} else {
emit_opcode(*cbuf, Assembler::REX_WRB);
}
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x7E);
emit_rm(*cbuf, 0x3,
Matcher::_regEncode[src_first] & 7,
Matcher::_regEncode[dst_first] & 7);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movdq %s, %s\t# spill", st->print("movdq %s, %s\t# spill",
...@@ -1610,23 +1516,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1610,23 +1516,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, 0x66); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[dst_first] < 8) { __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
if (Matcher::_regEncode[src_first] >= 8) {
emit_opcode(*cbuf, Assembler::REX_R); // attention!
}
} else {
if (Matcher::_regEncode[src_first] < 8) {
emit_opcode(*cbuf, Assembler::REX_B); // attention!
} else {
emit_opcode(*cbuf, Assembler::REX_RB);
}
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x7E);
emit_rm(*cbuf, 0x3,
Matcher::_regEncode[src_first] & 7,
Matcher::_regEncode[dst_first] & 7);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("movdl %s, %s\t# spill", st->print("movdl %s, %s\t# spill",
...@@ -1635,9 +1526,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1635,9 +1526,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif #endif
} }
return return
(Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
? 4 ? 5
: 5; // REX : (4 + ((UseAVX>0)?1:0)); // REX
} }
} else if (dst_first_rc == rc_float) { } else if (dst_first_rc == rc_float) {
// xmm -> xmm // xmm -> xmm
...@@ -1645,23 +1536,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1645,23 +1536,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) { (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit // 64-bit
if (cbuf) { if (cbuf) {
emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2); MacroAssembler _masm(cbuf);
if (Matcher::_regEncode[dst_first] < 8) { __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
if (Matcher::_regEncode[src_first] >= 8) {
emit_opcode(*cbuf, Assembler::REX_B);
}
} else {
if (Matcher::_regEncode[src_first] < 8) {
emit_opcode(*cbuf, Assembler::REX_R);
} else {
emit_opcode(*cbuf, Assembler::REX_RB);
}
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
emit_rm(*cbuf, 0x3,
Matcher::_regEncode[dst_first] & 7,
Matcher::_regEncode[src_first] & 7);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("%s %s, %s\t# spill", st->print("%s %s, %s\t# spill",
...@@ -1671,32 +1547,16 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1671,32 +1547,16 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif #endif
} }
return return
(Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
? 4 ? 5
: 5; // REX : (4 + ((UseAVX>0)?1:0)); // REX
} else { } else {
// 32-bit // 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) { if (cbuf) {
if (!UseXmmRegToRegMoveAll) MacroAssembler _masm(cbuf);
emit_opcode(*cbuf, 0xF3); __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
if (Matcher::_regEncode[dst_first] < 8) {
if (Matcher::_regEncode[src_first] >= 8) {
emit_opcode(*cbuf, Assembler::REX_B);
}
} else {
if (Matcher::_regEncode[src_first] < 8) {
emit_opcode(*cbuf, Assembler::REX_R);
} else {
emit_opcode(*cbuf, Assembler::REX_RB);
}
}
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
emit_rm(*cbuf, 0x3,
Matcher::_regEncode[dst_first] & 7,
Matcher::_regEncode[src_first] & 7);
#ifndef PRODUCT #ifndef PRODUCT
} else if (!do_size) { } else if (!do_size) {
st->print("%s %s, %s\t# spill", st->print("%s %s, %s\t# spill",
...@@ -1705,10 +1565,10 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, ...@@ -1705,10 +1565,10 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
Matcher::regName[src_first]); Matcher::regName[src_first]);
#endif #endif
} }
return return ((UseAVX>0) ? 5:
(Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
? (UseXmmRegToRegMoveAll ? 3 : 4) ? (UseXmmRegToRegMoveAll ? 4 : 5)
: (UseXmmRegToRegMoveAll ? 4 : 5); // REX : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
} }
} }
} }
...@@ -2205,47 +2065,6 @@ encode %{ ...@@ -2205,47 +2065,6 @@ encode %{
emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7); emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
%} %}
enc_class cmpfp_fixup() %{
MacroAssembler _masm(&cbuf);
emit_cmpfp_fixup(_masm);
%}
enc_class cmpfp3(rRegI dst)
%{
int dstenc = $dst$$reg;
// movl $dst, -1
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
emit_opcode(cbuf, 0xB8 | (dstenc & 7));
emit_d32(cbuf, -1);
// jp,s done
emit_opcode(cbuf, 0x7A);
emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
// jb,s done
emit_opcode(cbuf, 0x72);
emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
// setne $dst
if (dstenc >= 4) {
emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x95);
emit_opcode(cbuf, 0xC0 | (dstenc & 7));
// movzbl $dst, $dst
if (dstenc >= 4) {
emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0xB6);
emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
%}
enc_class cdql_enc(no_rax_rdx_RegI div) enc_class cdql_enc(no_rax_rdx_RegI div)
%{ %{
// Full implementation of Java idiv and irem; checks for // Full implementation of Java idiv and irem; checks for
...@@ -2472,55 +2291,6 @@ encode %{ ...@@ -2472,55 +2291,6 @@ encode %{
emit_cc(cbuf, $secondary, $cop$$cmpcode); emit_cc(cbuf, $secondary, $cop$$cmpcode);
%} %}
enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
%{
// Invert sense of branch from sense of cmov
emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
? (UseXmmRegToRegMoveAll ? 3 : 4)
: (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
// UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
if ($dst$$reg < 8) {
if ($src$$reg >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
} else {
if ($src$$reg < 8) {
emit_opcode(cbuf, Assembler::REX_R);
} else {
emit_opcode(cbuf, Assembler::REX_RB);
}
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
%}
enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
%{
// Invert sense of branch from sense of cmov
emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
// UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
if ($dst$$reg < 8) {
if ($src$$reg >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
} else {
if ($src$$reg < 8) {
emit_opcode(cbuf, Assembler::REX_R);
} else {
emit_opcode(cbuf, Assembler::REX_RB);
}
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
%}
enc_class enc_PartialSubtypeCheck() enc_class enc_PartialSubtypeCheck()
%{ %{
Register Rrdi = as_Register(RDI_enc); // result register Register Rrdi = as_Register(RDI_enc); // result register
...@@ -2751,68 +2521,6 @@ encode %{ ...@@ -2751,68 +2521,6 @@ encode %{
} }
%} %}
// Encode a reg-reg copy. If it is useless, then empty encoding.
enc_class enc_copy(rRegI dst, rRegI src)
%{
encode_copy(cbuf, $dst$$reg, $src$$reg);
%}
// Encode xmm reg-reg copy. If it is useless, then empty encoding.
enc_class enc_CopyXD( RegD dst, RegD src ) %{
encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
%}
enc_class enc_copy_always(rRegI dst, rRegI src)
%{
int srcenc = $src$$reg;
int dstenc = $dst$$reg;
if (dstenc < 8) {
if (srcenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
srcenc -= 8;
}
} else {
if (srcenc < 8) {
emit_opcode(cbuf, Assembler::REX_R);
} else {
emit_opcode(cbuf, Assembler::REX_RB);
srcenc -= 8;
}
dstenc -= 8;
}
emit_opcode(cbuf, 0x8B);
emit_rm(cbuf, 0x3, dstenc, srcenc);
%}
enc_class enc_copy_wide(rRegL dst, rRegL src)
%{
int srcenc = $src$$reg;
int dstenc = $dst$$reg;
if (dstenc != srcenc) {
if (dstenc < 8) {
if (srcenc < 8) {
emit_opcode(cbuf, Assembler::REX_W);
} else {
emit_opcode(cbuf, Assembler::REX_WB);
srcenc -= 8;
}
} else {
if (srcenc < 8) {
emit_opcode(cbuf, Assembler::REX_WR);
} else {
emit_opcode(cbuf, Assembler::REX_WRB);
srcenc -= 8;
}
dstenc -= 8;
}
emit_opcode(cbuf, 0x8B);
emit_rm(cbuf, 0x3, dstenc, srcenc);
}
%}
enc_class Con32(immI src) enc_class Con32(immI src)
%{ %{
// Output immediate // Output immediate
...@@ -3212,92 +2920,19 @@ encode %{ ...@@ -3212,92 +2920,19 @@ encode %{
%} %}
enc_class Push_ResultXD(regD dst) %{ enc_class Push_ResultXD(regD dst) %{
int dstenc = $dst$$reg;
store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
// UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
}
emit_opcode (cbuf, 0x0F );
emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
// add rsp,8
emit_opcode(cbuf, Assembler::REX_W);
emit_opcode(cbuf,0x83);
emit_rm(cbuf,0x3, 0x0, RSP_enc);
emit_d8(cbuf,0x08);
%}
enc_class Push_SrcXD(regD src) %{
int srcenc = $src$$reg;
// subq rsp,#8
emit_opcode(cbuf, Assembler::REX_W);
emit_opcode(cbuf, 0x83);
emit_rm(cbuf, 0x3, 0x5, RSP_enc);
emit_d8(cbuf, 0x8);
// movsd [rsp],src
emit_opcode(cbuf, 0xF2);
if (srcenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x11);
encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
// fldd [rsp]
emit_opcode(cbuf, 0x66);
emit_opcode(cbuf, 0xDD);
encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
%}
enc_class movq_ld(regD dst, memory mem) %{
MacroAssembler _masm(&cbuf);
__ movq($dst$$XMMRegister, $mem$$Address);
%}
enc_class movq_st(memory mem, regD src) %{
MacroAssembler _masm(&cbuf);
__ movq($mem$$Address, $src$$XMMRegister);
%}
enc_class pshufd_8x8(regF dst, regF src) %{
MacroAssembler _masm(&cbuf);
encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
__ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
__ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
%}
enc_class pshufd_4x16(regF dst, regF src) %{
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
__ fstp_d(Address(rsp, 0));
__ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); __ movdbl($dst$$XMMRegister, Address(rsp, 0));
__ addptr(rsp, 8);
%} %}
enc_class pshufd(regD dst, regD src, int mode) %{ enc_class Push_SrcXD(regD src) %{
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
__ subptr(rsp, 8);
__ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); __ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ fld_d(Address(rsp, 0));
%} %}
enc_class pxor(regD dst, regD src) %{
MacroAssembler _masm(&cbuf);
__ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
%}
enc_class mov_i2x(regD dst, rRegI src) %{
MacroAssembler _masm(&cbuf);
__ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
%}
// obj: object to lock // obj: object to lock
// box: box address (header location) -- killed // box: box address (header location) -- killed
...@@ -3534,303 +3169,6 @@ encode %{ ...@@ -3534,303 +3169,6 @@ encode %{
RELOC_DISP32); RELOC_DISP32);
%} %}
enc_class absF_encoding(regF dst)
%{
int dstenc = $dst$$reg;
address signmask_address = (address) StubRoutines::x86::float_sign_mask();
cbuf.set_insts_mark();
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
dstenc -= 8;
}
// XXX reg_mem doesn't support RIP-relative addressing yet
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x54);
emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
emit_d32_reloc(cbuf, signmask_address);
%}
enc_class absD_encoding(regD dst)
%{
int dstenc = $dst$$reg;
address signmask_address = (address) StubRoutines::x86::double_sign_mask();
cbuf.set_insts_mark();
emit_opcode(cbuf, 0x66);
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
dstenc -= 8;
}
// XXX reg_mem doesn't support RIP-relative addressing yet
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x54);
emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
emit_d32_reloc(cbuf, signmask_address);
%}
enc_class negF_encoding(regF dst)
%{
int dstenc = $dst$$reg;
address signflip_address = (address) StubRoutines::x86::float_sign_flip();
cbuf.set_insts_mark();
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
dstenc -= 8;
}
// XXX reg_mem doesn't support RIP-relative addressing yet
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x57);
emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
emit_d32_reloc(cbuf, signflip_address);
%}
enc_class negD_encoding(regD dst)
%{
int dstenc = $dst$$reg;
address signflip_address = (address) StubRoutines::x86::double_sign_flip();
cbuf.set_insts_mark();
emit_opcode(cbuf, 0x66);
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
dstenc -= 8;
}
// XXX reg_mem doesn't support RIP-relative addressing yet
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x57);
emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
emit_d32_reloc(cbuf, signflip_address);
%}
enc_class f2i_fixup(rRegI dst, regF src)
%{
int dstenc = $dst$$reg;
int srcenc = $src$$reg;
// cmpl $dst, #0x80000000
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
emit_opcode(cbuf, 0x81);
emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
emit_d32(cbuf, 0x80000000);
// jne,s done
emit_opcode(cbuf, 0x75);
if (srcenc < 8 && dstenc < 8) {
emit_d8(cbuf, 0xF);
} else if (srcenc >= 8 && dstenc >= 8) {
emit_d8(cbuf, 0x11);
} else {
emit_d8(cbuf, 0x10);
}
// subq rsp, #8
emit_opcode(cbuf, Assembler::REX_W);
emit_opcode(cbuf, 0x83);
emit_rm(cbuf, 0x3, 0x5, RSP_enc);
emit_d8(cbuf, 8);
// movss [rsp], $src
emit_opcode(cbuf, 0xF3);
if (srcenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x11);
encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
// call f2i_fixup
cbuf.set_insts_mark();
emit_opcode(cbuf, 0xE8);
emit_d32_reloc(cbuf,
(int)
(StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
runtime_call_Relocation::spec(),
RELOC_DISP32);
// popq $dst
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
emit_opcode(cbuf, 0x58 | (dstenc & 7));
// done:
%}
enc_class f2l_fixup(rRegL dst, regF src)
%{
int dstenc = $dst$$reg;
int srcenc = $src$$reg;
address const_address = (address) StubRoutines::x86::double_sign_flip();
// cmpq $dst, [0x8000000000000000]
cbuf.set_insts_mark();
emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
emit_opcode(cbuf, 0x39);
// XXX reg_mem doesn't support RIP-relative addressing yet
emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
emit_d32_reloc(cbuf, const_address);
// jne,s done
emit_opcode(cbuf, 0x75);
if (srcenc < 8 && dstenc < 8) {
emit_d8(cbuf, 0xF);
} else if (srcenc >= 8 && dstenc >= 8) {
emit_d8(cbuf, 0x11);
} else {
emit_d8(cbuf, 0x10);
}
// subq rsp, #8
emit_opcode(cbuf, Assembler::REX_W);
emit_opcode(cbuf, 0x83);
emit_rm(cbuf, 0x3, 0x5, RSP_enc);
emit_d8(cbuf, 8);
// movss [rsp], $src
emit_opcode(cbuf, 0xF3);
if (srcenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x11);
encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
// call f2l_fixup
cbuf.set_insts_mark();
emit_opcode(cbuf, 0xE8);
emit_d32_reloc(cbuf,
(int)
(StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
runtime_call_Relocation::spec(),
RELOC_DISP32);
// popq $dst
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
emit_opcode(cbuf, 0x58 | (dstenc & 7));
// done:
%}
enc_class d2i_fixup(rRegI dst, regD src)
%{
int dstenc = $dst$$reg;
int srcenc = $src$$reg;
// cmpl $dst, #0x80000000
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
emit_opcode(cbuf, 0x81);
emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
emit_d32(cbuf, 0x80000000);
// jne,s done
emit_opcode(cbuf, 0x75);
if (srcenc < 8 && dstenc < 8) {
emit_d8(cbuf, 0xF);
} else if (srcenc >= 8 && dstenc >= 8) {
emit_d8(cbuf, 0x11);
} else {
emit_d8(cbuf, 0x10);
}
// subq rsp, #8
emit_opcode(cbuf, Assembler::REX_W);
emit_opcode(cbuf, 0x83);
emit_rm(cbuf, 0x3, 0x5, RSP_enc);
emit_d8(cbuf, 8);
// movsd [rsp], $src
emit_opcode(cbuf, 0xF2);
if (srcenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x11);
encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
// call d2i_fixup
cbuf.set_insts_mark();
emit_opcode(cbuf, 0xE8);
emit_d32_reloc(cbuf,
(int)
(StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
runtime_call_Relocation::spec(),
RELOC_DISP32);
// popq $dst
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
emit_opcode(cbuf, 0x58 | (dstenc & 7));
// done:
%}
enc_class d2l_fixup(rRegL dst, regD src)
%{
int dstenc = $dst$$reg;
int srcenc = $src$$reg;
address const_address = (address) StubRoutines::x86::double_sign_flip();
// cmpq $dst, [0x8000000000000000]
cbuf.set_insts_mark();
emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
emit_opcode(cbuf, 0x39);
// XXX reg_mem doesn't support RIP-relative addressing yet
emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
emit_d32_reloc(cbuf, const_address);
// jne,s done
emit_opcode(cbuf, 0x75);
if (srcenc < 8 && dstenc < 8) {
emit_d8(cbuf, 0xF);
} else if (srcenc >= 8 && dstenc >= 8) {
emit_d8(cbuf, 0x11);
} else {
emit_d8(cbuf, 0x10);
}
// subq rsp, #8
emit_opcode(cbuf, Assembler::REX_W);
emit_opcode(cbuf, 0x83);
emit_rm(cbuf, 0x3, 0x5, RSP_enc);
emit_d8(cbuf, 8);
// movsd [rsp], $src
emit_opcode(cbuf, 0xF2);
if (srcenc >= 8) {
emit_opcode(cbuf, Assembler::REX_R);
}
emit_opcode(cbuf, 0x0F);
emit_opcode(cbuf, 0x11);
encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
// call d2l_fixup
cbuf.set_insts_mark();
emit_opcode(cbuf, 0xE8);
emit_d32_reloc(cbuf,
(int)
(StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
runtime_call_Relocation::spec(),
RELOC_DISP32);
// popq $dst
if (dstenc >= 8) {
emit_opcode(cbuf, Assembler::REX_B);
}
emit_opcode(cbuf, 0x58 | (dstenc & 7));
// done:
%}
%} %}
...@@ -6156,8 +5494,9 @@ instruct loadF(regF dst, memory mem) ...@@ -6156,8 +5494,9 @@ instruct loadF(regF dst, memory mem)
ins_cost(145); // XXX ins_cost(145); // XXX
format %{ "movss $dst, $mem\t# float" %} format %{ "movss $dst, $mem\t# float" %}
opcode(0xF3, 0x0F, 0x10); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); __ movflt($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -6169,8 +5508,9 @@ instruct loadD_partial(regD dst, memory mem) ...@@ -6169,8 +5508,9 @@ instruct loadD_partial(regD dst, memory mem)
ins_cost(145); // XXX ins_cost(145); // XXX
format %{ "movlpd $dst, $mem\t# double" %} format %{ "movlpd $dst, $mem\t# double" %}
opcode(0x66, 0x0F, 0x12); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); __ movdbl($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -6181,8 +5521,9 @@ instruct loadD(regD dst, memory mem) ...@@ -6181,8 +5521,9 @@ instruct loadD(regD dst, memory mem)
ins_cost(145); // XXX ins_cost(145); // XXX
format %{ "movsd $dst, $mem\t# double" %} format %{ "movsd $dst, $mem\t# double" %}
opcode(0xF2, 0x0F, 0x10); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); __ movdbl($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -6191,7 +5532,9 @@ instruct loadA8B(regD dst, memory mem) %{ ...@@ -6191,7 +5532,9 @@ instruct loadA8B(regD dst, memory mem) %{
match(Set dst (Load8B mem)); match(Set dst (Load8B mem));
ins_cost(125); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed8B" %} format %{ "MOVQ $dst,$mem\t! packed8B" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6200,7 +5543,9 @@ instruct loadA4S(regD dst, memory mem) %{ ...@@ -6200,7 +5543,9 @@ instruct loadA4S(regD dst, memory mem) %{
match(Set dst (Load4S mem)); match(Set dst (Load4S mem));
ins_cost(125); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4S" %} format %{ "MOVQ $dst,$mem\t! packed4S" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6209,7 +5554,9 @@ instruct loadA4C(regD dst, memory mem) %{ ...@@ -6209,7 +5554,9 @@ instruct loadA4C(regD dst, memory mem) %{
match(Set dst (Load4C mem)); match(Set dst (Load4C mem));
ins_cost(125); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4C" %} format %{ "MOVQ $dst,$mem\t! packed4C" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6218,16 +5565,20 @@ instruct load2IU(regD dst, memory mem) %{ ...@@ -6218,16 +5565,20 @@ instruct load2IU(regD dst, memory mem) %{
match(Set dst (Load2I mem)); match(Set dst (Load2I mem));
ins_cost(125); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2I" %} format %{ "MOVQ $dst,$mem\t! packed2I" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
// Load Aligned Packed Single to XMM // Load Aligned Packed Single to XMM
instruct loadA2F(regD dst, memory mem) %{ instruct loadA2F(regD dst, memory mem) %{
match(Set dst (Load2F mem)); match(Set dst (Load2F mem));
ins_cost(145); ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2F" %} format %{ "MOVQ $dst,$mem\t! packed2F" %}
ins_encode( movq_ld(dst, mem)); ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6540,8 +5891,9 @@ instruct loadConF0(regF dst, immF0 src) ...@@ -6540,8 +5891,9 @@ instruct loadConF0(regF dst, immF0 src)
ins_cost(100); ins_cost(100);
format %{ "xorps $dst, $dst\t# float 0.0" %} format %{ "xorps $dst, $dst\t# float 0.0" %}
opcode(0x0F, 0x57); ins_encode %{
ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst)); __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -6562,8 +5914,9 @@ instruct loadConD0(regD dst, immD0 src) ...@@ -6562,8 +5914,9 @@ instruct loadConD0(regD dst, immD0 src)
ins_cost(100); ins_cost(100);
format %{ "xorpd $dst, $dst\t# double 0.0" %} format %{ "xorpd $dst, $dst\t# double 0.0" %}
opcode(0x66, 0x0F, 0x57); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst)); __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -6606,8 +5959,9 @@ instruct loadSSF(regF dst, stackSlotF src) ...@@ -6606,8 +5959,9 @@ instruct loadSSF(regF dst, stackSlotF src)
ins_cost(125); ins_cost(125);
format %{ "movss $dst, $src\t# float stk" %} format %{ "movss $dst, $src\t# float stk" %}
opcode(0xF3, 0x0F, 0x10); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -6972,7 +6326,9 @@ instruct storeA8B(memory mem, regD src) %{ ...@@ -6972,7 +6326,9 @@ instruct storeA8B(memory mem, regD src) %{
match(Set mem (Store8B mem src)); match(Set mem (Store8B mem src));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed8B" %} format %{ "MOVQ $mem,$src\t! packed8B" %}
ins_encode( movq_st(mem, src)); ins_encode %{
__ movq($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6981,7 +6337,9 @@ instruct storeA4C(memory mem, regD src) %{ ...@@ -6981,7 +6337,9 @@ instruct storeA4C(memory mem, regD src) %{
match(Set mem (Store4C mem src)); match(Set mem (Store4C mem src));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed4C" %} format %{ "MOVQ $mem,$src\t! packed4C" %}
ins_encode( movq_st(mem, src)); ins_encode %{
__ movq($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -6990,7 +6348,9 @@ instruct storeA2I(memory mem, regD src) %{ ...@@ -6990,7 +6348,9 @@ instruct storeA2I(memory mem, regD src) %{
match(Set mem (Store2I mem src)); match(Set mem (Store2I mem src));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2I" %} format %{ "MOVQ $mem,$src\t! packed2I" %}
ins_encode( movq_st(mem, src)); ins_encode %{
__ movq($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7024,7 +6384,9 @@ instruct storeA2F(memory mem, regD src) %{ ...@@ -7024,7 +6384,9 @@ instruct storeA2F(memory mem, regD src) %{
match(Set mem (Store2F mem src)); match(Set mem (Store2F mem src));
ins_cost(145); ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2F" %} format %{ "MOVQ $mem,$src\t! packed2F" %}
ins_encode( movq_st(mem, src)); ins_encode %{
__ movq($mem$$Address, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -7035,8 +6397,9 @@ instruct storeF(memory mem, regF src) ...@@ -7035,8 +6397,9 @@ instruct storeF(memory mem, regF src)
ins_cost(95); // XXX ins_cost(95); // XXX
format %{ "movss $mem, $src\t# float" %} format %{ "movss $mem, $src\t# float" %}
opcode(0xF3, 0x0F, 0x11); ins_encode %{
ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem)); __ movflt($mem$$Address, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -7072,8 +6435,9 @@ instruct storeD(memory mem, regD src) ...@@ -7072,8 +6435,9 @@ instruct storeD(memory mem, regD src)
ins_cost(95); // XXX ins_cost(95); // XXX
format %{ "movsd $mem, $src\t# double" %} format %{ "movsd $mem, $src\t# double" %}
opcode(0xF2, 0x0F, 0x11); ins_encode %{
ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem)); __ movdbl($mem$$Address, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -7142,8 +6506,9 @@ instruct storeSSF(stackSlotF dst, regF src) ...@@ -7142,8 +6506,9 @@ instruct storeSSF(stackSlotF dst, regF src)
ins_cost(95); // XXX ins_cost(95); // XXX
format %{ "movss $dst, $src\t# float stk" %} format %{ "movss $dst, $src\t# float stk" %}
opcode(0xF3, 0x0F, 0x11); ins_encode %{
ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -7153,8 +6518,9 @@ instruct storeSSD(stackSlotD dst, regD src) ...@@ -7153,8 +6518,9 @@ instruct storeSSD(stackSlotD dst, regD src)
ins_cost(95); // XXX ins_cost(95); // XXX
format %{ "movsd $dst, $src\t# double stk" %} format %{ "movsd $dst, $src\t# double stk" %}
opcode(0xF2, 0x0F, 0x11); ins_encode %{
ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -7451,7 +6817,11 @@ instruct castX2P(rRegP dst, rRegL src) ...@@ -7451,7 +6817,11 @@ instruct castX2P(rRegP dst, rRegL src)
match(Set dst (CastX2P src)); match(Set dst (CastX2P src));
format %{ "movq $dst, $src\t# long->ptr" %} format %{ "movq $dst, $src\t# long->ptr" %}
ins_encode(enc_copy_wide(dst, src)); ins_encode %{
if ($dst$$reg != $src$$reg) {
__ movptr($dst$$Register, $src$$Register);
}
%}
ins_pipe(ialu_reg_reg); // XXX ins_pipe(ialu_reg_reg); // XXX
%} %}
...@@ -7460,7 +6830,11 @@ instruct castP2X(rRegL dst, rRegP src) ...@@ -7460,7 +6830,11 @@ instruct castP2X(rRegL dst, rRegP src)
match(Set dst (CastP2X src)); match(Set dst (CastP2X src));
format %{ "movq $dst, $src\t# ptr -> long" %} format %{ "movq $dst, $src\t# ptr -> long" %}
ins_encode(enc_copy_wide(dst, src)); ins_encode %{
if ($dst$$reg != $src$$reg) {
__ movptr($dst$$Register, $src$$Register);
}
%}
ins_pipe(ialu_reg_reg); // XXX ins_pipe(ialu_reg_reg); // XXX
%} %}
...@@ -7813,7 +7187,13 @@ instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src) ...@@ -7813,7 +7187,13 @@ instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
format %{ "jn$cop skip\t# signed cmove float\n\t" format %{ "jn$cop skip\t# signed cmove float\n\t"
"movss $dst, $src\n" "movss $dst, $src\n"
"skip:" %} "skip:" %}
ins_encode(enc_cmovf_branch(cop, dst, src)); ins_encode %{
Label Lskip;
// Invert sense of branch from sense of CMOV
__ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
__ bind(Lskip);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -7837,7 +7217,13 @@ instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src) ...@@ -7837,7 +7217,13 @@ instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
format %{ "jn$cop skip\t# unsigned cmove float\n\t" format %{ "jn$cop skip\t# unsigned cmove float\n\t"
"movss $dst, $src\n" "movss $dst, $src\n"
"skip:" %} "skip:" %}
ins_encode(enc_cmovf_branch(cop, dst, src)); ins_encode %{
Label Lskip;
// Invert sense of branch from sense of CMOV
__ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
__ bind(Lskip);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -7857,7 +7243,13 @@ instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src) ...@@ -7857,7 +7243,13 @@ instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
format %{ "jn$cop skip\t# signed cmove double\n\t" format %{ "jn$cop skip\t# signed cmove double\n\t"
"movsd $dst, $src\n" "movsd $dst, $src\n"
"skip:" %} "skip:" %}
ins_encode(enc_cmovd_branch(cop, dst, src)); ins_encode %{
Label Lskip;
// Invert sense of branch from sense of CMOV
__ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
__ bind(Lskip);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -7869,7 +7261,13 @@ instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src) ...@@ -7869,7 +7261,13 @@ instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
format %{ "jn$cop skip\t# unsigned cmove double\n\t" format %{ "jn$cop skip\t# unsigned cmove double\n\t"
"movsd $dst, $src\n" "movsd $dst, $src\n"
"skip:" %} "skip:" %}
ins_encode(enc_cmovd_branch(cop, dst, src)); ins_encode %{
Label Lskip;
// Invert sense of branch from sense of CMOV
__ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
__ bind(Lskip);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10191,17 +9589,18 @@ instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2) ...@@ -10191,17 +9589,18 @@ instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
"pushfq\t# saw NaN, set CF\n\t" "pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t" "andq [rsp], #0xffffff2b\n\t"
"popfq\n" "popfq\n"
"exit: nop\t# avoid branch to branch" %} "exit:" %}
opcode(0x0F, 0x2E); ins_encode %{
ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2), __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
cmpfp_fixup); emit_cmpfp_fixup(_masm);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{ instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
match(Set cr (CmpF src1 src2)); match(Set cr (CmpF src1 src2));
ins_cost(145); ins_cost(100);
format %{ "ucomiss $src1, $src2" %} format %{ "ucomiss $src1, $src2" %}
ins_encode %{ ins_encode %{
__ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
...@@ -10219,10 +9618,11 @@ instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2) ...@@ -10219,10 +9618,11 @@ instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
"pushfq\t# saw NaN, set CF\n\t" "pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t" "andq [rsp], #0xffffff2b\n\t"
"popfq\n" "popfq\n"
"exit: nop\t# avoid branch to branch" %} "exit:" %}
opcode(0x0F, 0x2E); ins_encode %{
ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2), __ ucomiss($src1$$XMMRegister, $src2$$Address);
cmpfp_fixup); emit_cmpfp_fixup(_masm);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10231,8 +9631,9 @@ instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{ ...@@ -10231,8 +9631,9 @@ instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
ins_cost(100); ins_cost(100);
format %{ "ucomiss $src1, $src2" %} format %{ "ucomiss $src1, $src2" %}
opcode(0x0F, 0x2E); ins_encode %{
ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2)); __ ucomiss($src1$$XMMRegister, $src2$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10245,7 +9646,7 @@ instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{ ...@@ -10245,7 +9646,7 @@ instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
"pushfq\t# saw NaN, set CF\n\t" "pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t" "andq [rsp], #0xffffff2b\n\t"
"popfq\n" "popfq\n"
"exit: nop\t# avoid branch to branch" %} "exit:" %}
ins_encode %{ ins_encode %{
__ ucomiss($src$$XMMRegister, $constantaddress($con)); __ ucomiss($src$$XMMRegister, $constantaddress($con));
emit_cmpfp_fixup(_masm); emit_cmpfp_fixup(_masm);
...@@ -10273,10 +9674,11 @@ instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2) ...@@ -10273,10 +9674,11 @@ instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
"pushfq\t# saw NaN, set CF\n\t" "pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t" "andq [rsp], #0xffffff2b\n\t"
"popfq\n" "popfq\n"
"exit: nop\t# avoid branch to branch" %} "exit:" %}
opcode(0x66, 0x0F, 0x2E); ins_encode %{
ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2), __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
cmpfp_fixup); emit_cmpfp_fixup(_masm);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10301,10 +9703,11 @@ instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2) ...@@ -10301,10 +9703,11 @@ instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
"pushfq\t# saw NaN, set CF\n\t" "pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t" "andq [rsp], #0xffffff2b\n\t"
"popfq\n" "popfq\n"
"exit: nop\t# avoid branch to branch" %} "exit:" %}
opcode(0x66, 0x0F, 0x2E); ins_encode %{
ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2), __ ucomisd($src1$$XMMRegister, $src2$$Address);
cmpfp_fixup); emit_cmpfp_fixup(_masm);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10313,8 +9716,9 @@ instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{ ...@@ -10313,8 +9716,9 @@ instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
ins_cost(100); ins_cost(100);
format %{ "ucomisd $src1, $src2" %} format %{ "ucomisd $src1, $src2" %}
opcode(0x66, 0x0F, 0x2E); ins_encode %{
ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2)); __ ucomisd($src1$$XMMRegister, $src2$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10327,7 +9731,7 @@ instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{ ...@@ -10327,7 +9731,7 @@ instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
"pushfq\t# saw NaN, set CF\n\t" "pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t" "andq [rsp], #0xffffff2b\n\t"
"popfq\n" "popfq\n"
"exit: nop\t# avoid branch to branch" %} "exit:" %}
ins_encode %{ ins_encode %{
__ ucomisd($src$$XMMRegister, $constantaddress($con)); __ ucomisd($src$$XMMRegister, $constantaddress($con));
emit_cmpfp_fixup(_masm); emit_cmpfp_fixup(_masm);
...@@ -10359,10 +9763,10 @@ instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr) ...@@ -10359,10 +9763,10 @@ instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
"setne $dst\n\t" "setne $dst\n\t"
"movzbl $dst, $dst\n" "movzbl $dst, $dst\n"
"done:" %} "done:" %}
ins_encode %{
opcode(0x0F, 0x2E); __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2), emit_cmpfp3(_masm, $dst$$Register);
cmpfp3(dst)); %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10380,10 +9784,10 @@ instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr) ...@@ -10380,10 +9784,10 @@ instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
"setne $dst\n\t" "setne $dst\n\t"
"movzbl $dst, $dst\n" "movzbl $dst, $dst\n"
"done:" %} "done:" %}
ins_encode %{
opcode(0x0F, 0x2E); __ ucomiss($src1$$XMMRegister, $src2$$Address);
ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2), emit_cmpfp3(_masm, $dst$$Register);
cmpfp3(dst)); %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10401,15 +9805,8 @@ instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{ ...@@ -10401,15 +9805,8 @@ instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
"movzbl $dst, $dst\n" "movzbl $dst, $dst\n"
"done:" %} "done:" %}
ins_encode %{ ins_encode %{
Label L_done;
Register Rdst = $dst$$Register;
__ ucomiss($src$$XMMRegister, $constantaddress($con)); __ ucomiss($src$$XMMRegister, $constantaddress($con));
__ movl(Rdst, -1); emit_cmpfp3(_masm, $dst$$Register);
__ jcc(Assembler::parity, L_done);
__ jcc(Assembler::below, L_done);
__ setb(Assembler::notEqual, Rdst);
__ movzbl(Rdst, Rdst);
__ bind(L_done);
%} %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10428,10 +9825,10 @@ instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr) ...@@ -10428,10 +9825,10 @@ instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
"setne $dst\n\t" "setne $dst\n\t"
"movzbl $dst, $dst\n" "movzbl $dst, $dst\n"
"done:" %} "done:" %}
ins_encode %{
opcode(0x66, 0x0F, 0x2E); __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2), emit_cmpfp3(_masm, $dst$$Register);
cmpfp3(dst)); %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10449,10 +9846,10 @@ instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr) ...@@ -10449,10 +9846,10 @@ instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
"setne $dst\n\t" "setne $dst\n\t"
"movzbl $dst, $dst\n" "movzbl $dst, $dst\n"
"done:" %} "done:" %}
ins_encode %{
opcode(0x66, 0x0F, 0x2E); __ ucomisd($src1$$XMMRegister, $src2$$Address);
ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2), emit_cmpfp3(_masm, $dst$$Register);
cmpfp3(dst)); %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10470,15 +9867,8 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{ ...@@ -10470,15 +9867,8 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
"movzbl $dst, $dst\n" "movzbl $dst, $dst\n"
"done:" %} "done:" %}
ins_encode %{ ins_encode %{
Register Rdst = $dst$$Register;
Label L_done;
__ ucomisd($src$$XMMRegister, $constantaddress($con)); __ ucomisd($src$$XMMRegister, $constantaddress($con));
__ movl(Rdst, -1); emit_cmpfp3(_masm, $dst$$Register);
__ jcc(Assembler::parity, L_done);
__ jcc(Assembler::below, L_done);
__ setb(Assembler::notEqual, Rdst);
__ movzbl(Rdst, Rdst);
__ bind(L_done);
%} %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10489,8 +9879,9 @@ instruct addF_reg(regF dst, regF src) ...@@ -10489,8 +9879,9 @@ instruct addF_reg(regF dst, regF src)
format %{ "addss $dst, $src" %} format %{ "addss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x58); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ addss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10500,8 +9891,9 @@ instruct addF_mem(regF dst, memory src) ...@@ -10500,8 +9891,9 @@ instruct addF_mem(regF dst, memory src)
format %{ "addss $dst, $src" %} format %{ "addss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x58); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ addss($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10521,8 +9913,9 @@ instruct addD_reg(regD dst, regD src) ...@@ -10521,8 +9913,9 @@ instruct addD_reg(regD dst, regD src)
format %{ "addsd $dst, $src" %} format %{ "addsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x58); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ addsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10532,8 +9925,9 @@ instruct addD_mem(regD dst, memory src) ...@@ -10532,8 +9925,9 @@ instruct addD_mem(regD dst, memory src)
format %{ "addsd $dst, $src" %} format %{ "addsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x58); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ addsd($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10553,8 +9947,9 @@ instruct subF_reg(regF dst, regF src) ...@@ -10553,8 +9947,9 @@ instruct subF_reg(regF dst, regF src)
format %{ "subss $dst, $src" %} format %{ "subss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x5C); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ subss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10564,8 +9959,9 @@ instruct subF_mem(regF dst, memory src) ...@@ -10564,8 +9959,9 @@ instruct subF_mem(regF dst, memory src)
format %{ "subss $dst, $src" %} format %{ "subss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x5C); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ subss($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10585,8 +9981,9 @@ instruct subD_reg(regD dst, regD src) ...@@ -10585,8 +9981,9 @@ instruct subD_reg(regD dst, regD src)
format %{ "subsd $dst, $src" %} format %{ "subsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x5C); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ subsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10596,8 +9993,9 @@ instruct subD_mem(regD dst, memory src) ...@@ -10596,8 +9993,9 @@ instruct subD_mem(regD dst, memory src)
format %{ "subsd $dst, $src" %} format %{ "subsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x5C); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ subsd($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10617,8 +10015,9 @@ instruct mulF_reg(regF dst, regF src) ...@@ -10617,8 +10015,9 @@ instruct mulF_reg(regF dst, regF src)
format %{ "mulss $dst, $src" %} format %{ "mulss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x59); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ mulss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10628,8 +10027,9 @@ instruct mulF_mem(regF dst, memory src) ...@@ -10628,8 +10027,9 @@ instruct mulF_mem(regF dst, memory src)
format %{ "mulss $dst, $src" %} format %{ "mulss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x59); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ mulss($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10649,8 +10049,9 @@ instruct mulD_reg(regD dst, regD src) ...@@ -10649,8 +10049,9 @@ instruct mulD_reg(regD dst, regD src)
format %{ "mulsd $dst, $src" %} format %{ "mulsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x59); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10660,8 +10061,9 @@ instruct mulD_mem(regD dst, memory src) ...@@ -10660,8 +10061,9 @@ instruct mulD_mem(regD dst, memory src)
format %{ "mulsd $dst, $src" %} format %{ "mulsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x59); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ mulsd($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10681,8 +10083,9 @@ instruct divF_reg(regF dst, regF src) ...@@ -10681,8 +10083,9 @@ instruct divF_reg(regF dst, regF src)
format %{ "divss $dst, $src" %} format %{ "divss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x5E); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ divss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10692,8 +10095,9 @@ instruct divF_mem(regF dst, memory src) ...@@ -10692,8 +10095,9 @@ instruct divF_mem(regF dst, memory src)
format %{ "divss $dst, $src" %} format %{ "divss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x5E); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ divss($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10713,8 +10117,9 @@ instruct divD_reg(regD dst, regD src) ...@@ -10713,8 +10117,9 @@ instruct divD_reg(regD dst, regD src)
format %{ "divsd $dst, $src" %} format %{ "divsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x5E); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ divsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10724,8 +10129,9 @@ instruct divD_mem(regD dst, memory src) ...@@ -10724,8 +10129,9 @@ instruct divD_mem(regD dst, memory src)
format %{ "divsd $dst, $src" %} format %{ "divsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x5E); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ divsd($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10745,8 +10151,9 @@ instruct sqrtF_reg(regF dst, regF src) ...@@ -10745,8 +10151,9 @@ instruct sqrtF_reg(regF dst, regF src)
format %{ "sqrtss $dst, $src" %} format %{ "sqrtss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x51); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10756,8 +10163,9 @@ instruct sqrtF_mem(regF dst, memory src) ...@@ -10756,8 +10163,9 @@ instruct sqrtF_mem(regF dst, memory src)
format %{ "sqrtss $dst, $src" %} format %{ "sqrtss $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF3, 0x0F, 0x51); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ sqrtss($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10777,8 +10185,9 @@ instruct sqrtD_reg(regD dst, regD src) ...@@ -10777,8 +10185,9 @@ instruct sqrtD_reg(regD dst, regD src)
format %{ "sqrtsd $dst, $src" %} format %{ "sqrtsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x51); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10788,8 +10197,9 @@ instruct sqrtD_mem(regD dst, memory src) ...@@ -10788,8 +10197,9 @@ instruct sqrtD_mem(regD dst, memory src)
format %{ "sqrtsd $dst, $src" %} format %{ "sqrtsd $dst, $src" %}
ins_cost(150); // XXX ins_cost(150); // XXX
opcode(0xF2, 0x0F, 0x51); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ sqrtsd($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10806,38 +10216,50 @@ instruct sqrtD_imm(regD dst, immD con) %{ ...@@ -10806,38 +10216,50 @@ instruct sqrtD_imm(regD dst, immD con) %{
instruct absF_reg(regF dst) instruct absF_reg(regF dst)
%{ %{
match(Set dst (AbsF dst)); match(Set dst (AbsF dst));
ins_cost(150); // XXX
format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
ins_encode(absF_encoding(dst)); ins_encode %{
__ andps($dst$$XMMRegister,
ExternalAddress((address) StubRoutines::x86::float_sign_mask()));
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
instruct absD_reg(regD dst) instruct absD_reg(regD dst)
%{ %{
match(Set dst (AbsD dst)); match(Set dst (AbsD dst));
ins_cost(150); // XXX
format %{ "andpd $dst, [0x7fffffffffffffff]\t" format %{ "andpd $dst, [0x7fffffffffffffff]\t"
"# abs double by sign masking" %} "# abs double by sign masking" %}
ins_encode(absD_encoding(dst)); ins_encode %{
__ andpd($dst$$XMMRegister,
ExternalAddress((address) StubRoutines::x86::double_sign_mask()));
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
instruct negF_reg(regF dst) instruct negF_reg(regF dst)
%{ %{
match(Set dst (NegF dst)); match(Set dst (NegF dst));
ins_cost(150); // XXX
format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
ins_encode(negF_encoding(dst)); ins_encode %{
__ xorps($dst$$XMMRegister,
ExternalAddress((address) StubRoutines::x86::float_sign_flip()));
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
instruct negD_reg(regD dst) instruct negD_reg(regD dst)
%{ %{
match(Set dst (NegD dst)); match(Set dst (NegD dst));
ins_cost(150); // XXX
format %{ "xorpd $dst, [0x8000000000000000]\t" format %{ "xorpd $dst, [0x8000000000000000]\t"
"# neg double by sign flipping" %} "# neg double by sign flipping" %}
ins_encode(negD_encoding(dst)); ins_encode %{
__ xorpd($dst$$XMMRegister,
ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10929,8 +10351,9 @@ instruct convF2D_reg_reg(regD dst, regF src) ...@@ -10929,8 +10351,9 @@ instruct convF2D_reg_reg(regD dst, regF src)
match(Set dst (ConvF2D src)); match(Set dst (ConvF2D src));
format %{ "cvtss2sd $dst, $src" %} format %{ "cvtss2sd $dst, $src" %}
opcode(0xF3, 0x0F, 0x5A); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -10939,8 +10362,9 @@ instruct convF2D_reg_mem(regD dst, memory src) ...@@ -10939,8 +10362,9 @@ instruct convF2D_reg_mem(regD dst, memory src)
match(Set dst (ConvF2D (LoadF src))); match(Set dst (ConvF2D (LoadF src)));
format %{ "cvtss2sd $dst, $src" %} format %{ "cvtss2sd $dst, $src" %}
opcode(0xF3, 0x0F, 0x5A); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -10949,8 +10373,9 @@ instruct convD2F_reg_reg(regF dst, regD src) ...@@ -10949,8 +10373,9 @@ instruct convD2F_reg_reg(regF dst, regD src)
match(Set dst (ConvD2F src)); match(Set dst (ConvD2F src));
format %{ "cvtsd2ss $dst, $src" %} format %{ "cvtsd2ss $dst, $src" %}
opcode(0xF2, 0x0F, 0x5A); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -10959,8 +10384,9 @@ instruct convD2F_reg_mem(regF dst, memory src) ...@@ -10959,8 +10384,9 @@ instruct convD2F_reg_mem(regF dst, memory src)
match(Set dst (ConvD2F (LoadD src))); match(Set dst (ConvD2F (LoadD src)));
format %{ "cvtsd2ss $dst, $src" %} format %{ "cvtsd2ss $dst, $src" %}
opcode(0xF2, 0x0F, 0x5A); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -10978,9 +10404,17 @@ instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr) ...@@ -10978,9 +10404,17 @@ instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
"call f2i_fixup\n\t" "call f2i_fixup\n\t"
"popq $dst\n" "popq $dst\n"
"done: "%} "done: "%}
opcode(0xF3, 0x0F, 0x2C); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src), Label done;
f2i_fixup(dst, src)); __ cvttss2sil($dst$$Register, $src$$XMMRegister);
__ cmpl($dst$$Register, 0x80000000);
__ jccb(Assembler::notEqual, done);
__ subptr(rsp, 8);
__ movflt(Address(rsp, 0), $src$$XMMRegister);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
__ pop($dst$$Register);
__ bind(done);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -10997,9 +10431,18 @@ instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr) ...@@ -10997,9 +10431,18 @@ instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
"call f2l_fixup\n\t" "call f2l_fixup\n\t"
"popq $dst\n" "popq $dst\n"
"done: "%} "done: "%}
opcode(0xF3, 0x0F, 0x2C); ins_encode %{
ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src), Label done;
f2l_fixup(dst, src)); __ cvttss2siq($dst$$Register, $src$$XMMRegister);
__ cmp64($dst$$Register,
ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
__ jccb(Assembler::notEqual, done);
__ subptr(rsp, 8);
__ movflt(Address(rsp, 0), $src$$XMMRegister);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
__ pop($dst$$Register);
__ bind(done);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11016,9 +10459,17 @@ instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr) ...@@ -11016,9 +10459,17 @@ instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
"call d2i_fixup\n\t" "call d2i_fixup\n\t"
"popq $dst\n" "popq $dst\n"
"done: "%} "done: "%}
opcode(0xF2, 0x0F, 0x2C); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src), Label done;
d2i_fixup(dst, src)); __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
__ cmpl($dst$$Register, 0x80000000);
__ jccb(Assembler::notEqual, done);
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
__ pop($dst$$Register);
__ bind(done);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11035,9 +10486,18 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr) ...@@ -11035,9 +10486,18 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
"call d2l_fixup\n\t" "call d2l_fixup\n\t"
"popq $dst\n" "popq $dst\n"
"done: "%} "done: "%}
opcode(0xF2, 0x0F, 0x2C); ins_encode %{
ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src), Label done;
d2l_fixup(dst, src)); __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
__ cmp64($dst$$Register,
ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
__ jccb(Assembler::notEqual, done);
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
__ pop($dst$$Register);
__ bind(done);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11047,8 +10507,9 @@ instruct convI2F_reg_reg(regF dst, rRegI src) ...@@ -11047,8 +10507,9 @@ instruct convI2F_reg_reg(regF dst, rRegI src)
match(Set dst (ConvI2F src)); match(Set dst (ConvI2F src));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %} format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
opcode(0xF3, 0x0F, 0x2A); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -11057,8 +10518,9 @@ instruct convI2F_reg_mem(regF dst, memory src) ...@@ -11057,8 +10518,9 @@ instruct convI2F_reg_mem(regF dst, memory src)
match(Set dst (ConvI2F (LoadI src))); match(Set dst (ConvI2F (LoadI src)));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %} format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
opcode(0xF3, 0x0F, 0x2A); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -11068,8 +10530,9 @@ instruct convI2D_reg_reg(regD dst, rRegI src) ...@@ -11068,8 +10530,9 @@ instruct convI2D_reg_reg(regD dst, rRegI src)
match(Set dst (ConvI2D src)); match(Set dst (ConvI2D src));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %} format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
opcode(0xF2, 0x0F, 0x2A); ins_encode %{
ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -11078,8 +10541,9 @@ instruct convI2D_reg_mem(regD dst, memory src) ...@@ -11078,8 +10541,9 @@ instruct convI2D_reg_mem(regD dst, memory src)
match(Set dst (ConvI2D (LoadI src))); match(Set dst (ConvI2D (LoadI src)));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %} format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
opcode(0xF2, 0x0F, 0x2A); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -11116,8 +10580,9 @@ instruct convL2F_reg_reg(regF dst, rRegL src) ...@@ -11116,8 +10580,9 @@ instruct convL2F_reg_reg(regF dst, rRegL src)
match(Set dst (ConvL2F src)); match(Set dst (ConvL2F src));
format %{ "cvtsi2ssq $dst, $src\t# l2f" %} format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
opcode(0xF3, 0x0F, 0x2A); ins_encode %{
ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -11126,8 +10591,9 @@ instruct convL2F_reg_mem(regF dst, memory src) ...@@ -11126,8 +10591,9 @@ instruct convL2F_reg_mem(regF dst, memory src)
match(Set dst (ConvL2F (LoadL src))); match(Set dst (ConvL2F (LoadL src)));
format %{ "cvtsi2ssq $dst, $src\t# l2f" %} format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
opcode(0xF3, 0x0F, 0x2A); ins_encode %{
ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -11136,8 +10602,9 @@ instruct convL2D_reg_reg(regD dst, rRegL src) ...@@ -11136,8 +10602,9 @@ instruct convL2D_reg_reg(regD dst, rRegL src)
match(Set dst (ConvL2D src)); match(Set dst (ConvL2D src));
format %{ "cvtsi2sdq $dst, $src\t# l2d" %} format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
opcode(0xF2, 0x0F, 0x2A); ins_encode %{
ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src)); __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -11146,8 +10613,9 @@ instruct convL2D_reg_mem(regD dst, memory src) ...@@ -11146,8 +10613,9 @@ instruct convL2D_reg_mem(regD dst, memory src)
match(Set dst (ConvL2D (LoadL src))); match(Set dst (ConvL2D (LoadL src)));
format %{ "cvtsi2sdq $dst, $src\t# l2d" %} format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
opcode(0xF2, 0x0F, 0x2A); ins_encode %{
ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
%}
ins_pipe(pipe_slow); // XXX ins_pipe(pipe_slow); // XXX
%} %}
...@@ -11186,7 +10654,11 @@ instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask) ...@@ -11186,7 +10654,11 @@ instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
match(Set dst (AndL (ConvI2L src) mask)); match(Set dst (AndL (ConvI2L src) mask));
format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %} format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
ins_encode(enc_copy(dst, src)); ins_encode %{
if ($dst$$reg != $src$$reg) {
__ movl($dst$$Register, $src$$Register);
}
%}
ins_pipe(ialu_reg_reg); ins_pipe(ialu_reg_reg);
%} %}
...@@ -11196,8 +10668,9 @@ instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask) ...@@ -11196,8 +10668,9 @@ instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
match(Set dst (AndL (ConvI2L (LoadI src)) mask)); match(Set dst (AndL (ConvI2L (LoadI src)) mask));
format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %} format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
opcode(0x8B); ins_encode %{
ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src)); __ movl($dst$$Register, $src$$Address);
%}
ins_pipe(ialu_reg_mem); ins_pipe(ialu_reg_mem);
%} %}
...@@ -11206,7 +10679,9 @@ instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask) ...@@ -11206,7 +10679,9 @@ instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
match(Set dst (AndL src mask)); match(Set dst (AndL src mask));
format %{ "movl $dst, $src\t# zero-extend long" %} format %{ "movl $dst, $src\t# zero-extend long" %}
ins_encode(enc_copy_always(dst, src)); ins_encode %{
__ movl($dst$$Register, $src$$Register);
%}
ins_pipe(ialu_reg_reg); ins_pipe(ialu_reg_reg);
%} %}
...@@ -11215,7 +10690,9 @@ instruct convL2I_reg_reg(rRegI dst, rRegL src) ...@@ -11215,7 +10690,9 @@ instruct convL2I_reg_reg(rRegI dst, rRegL src)
match(Set dst (ConvL2I src)); match(Set dst (ConvL2I src));
format %{ "movl $dst, $src\t# l2i" %} format %{ "movl $dst, $src\t# l2i" %}
ins_encode(enc_copy_always(dst, src)); ins_encode %{
__ movl($dst$$Register, $src$$Register);
%}
ins_pipe(ialu_reg_reg); ins_pipe(ialu_reg_reg);
%} %}
...@@ -11226,8 +10703,9 @@ instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ ...@@ -11226,8 +10703,9 @@ instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
ins_cost(125); ins_cost(125);
format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %} format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
opcode(0x8B); ins_encode %{
ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src)); __ movl($dst$$Register, Address(rsp, $src$$disp));
%}
ins_pipe(ialu_reg_mem); ins_pipe(ialu_reg_mem);
%} %}
...@@ -11237,8 +10715,9 @@ instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ ...@@ -11237,8 +10715,9 @@ instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
ins_cost(125); ins_cost(125);
format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %} format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
opcode(0xF3, 0x0F, 0x10); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11248,8 +10727,9 @@ instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{ ...@@ -11248,8 +10727,9 @@ instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
ins_cost(125); ins_cost(125);
format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %} format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
opcode(0x8B); ins_encode %{
ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src)); __ movq($dst$$Register, Address(rsp, $src$$disp));
%}
ins_pipe(ialu_reg_mem); ins_pipe(ialu_reg_mem);
%} %}
...@@ -11260,8 +10740,9 @@ instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{ ...@@ -11260,8 +10740,9 @@ instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
ins_cost(125); ins_cost(125);
format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %} format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
opcode(0x66, 0x0F, 0x12); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11272,8 +10753,9 @@ instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ ...@@ -11272,8 +10753,9 @@ instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
ins_cost(125); ins_cost(125);
format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %} format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
opcode(0xF2, 0x0F, 0x10); ins_encode %{
ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11284,8 +10766,9 @@ instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ ...@@ -11284,8 +10766,9 @@ instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
ins_cost(95); // XXX ins_cost(95); // XXX
format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %} format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
opcode(0xF3, 0x0F, 0x11); ins_encode %{
ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11295,8 +10778,9 @@ instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ ...@@ -11295,8 +10778,9 @@ instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
ins_cost(100); ins_cost(100);
format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %} format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
opcode(0x89); ins_encode %{
ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst)); __ movl(Address(rsp, $dst$$disp), $src$$Register);
%}
ins_pipe( ialu_mem_reg ); ins_pipe( ialu_mem_reg );
%} %}
...@@ -11306,8 +10790,9 @@ instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ ...@@ -11306,8 +10790,9 @@ instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
ins_cost(95); // XXX ins_cost(95); // XXX
format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %} format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
opcode(0xF2, 0x0F, 0x11); ins_encode %{
ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
%}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
...@@ -11317,8 +10802,9 @@ instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{ ...@@ -11317,8 +10802,9 @@ instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
ins_cost(100); ins_cost(100);
format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %} format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
opcode(0x89); ins_encode %{
ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst)); __ movq(Address(rsp, $dst$$disp), $src$$Register);
%}
ins_pipe(ialu_mem_reg); ins_pipe(ialu_mem_reg);
%} %}
...@@ -11327,7 +10813,9 @@ instruct MoveF2I_reg_reg(rRegI dst, regF src) %{ ...@@ -11327,7 +10813,9 @@ instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
effect(DEF dst, USE src); effect(DEF dst, USE src);
ins_cost(85); ins_cost(85);
format %{ "movd $dst,$src\t# MoveF2I" %} format %{ "movd $dst,$src\t# MoveF2I" %}
ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %} ins_encode %{
__ movdl($dst$$Register, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11336,7 +10824,9 @@ instruct MoveD2L_reg_reg(rRegL dst, regD src) %{ ...@@ -11336,7 +10824,9 @@ instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
effect(DEF dst, USE src); effect(DEF dst, USE src);
ins_cost(85); ins_cost(85);
format %{ "movd $dst,$src\t# MoveD2L" %} format %{ "movd $dst,$src\t# MoveD2L" %}
ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %} ins_encode %{
__ movdq($dst$$Register, $src$$XMMRegister);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11346,7 +10836,9 @@ instruct MoveI2F_reg_reg(regF dst, rRegI src) %{ ...@@ -11346,7 +10836,9 @@ instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
effect(DEF dst, USE src); effect(DEF dst, USE src);
ins_cost(300); ins_cost(300);
format %{ "movd $dst,$src\t# MoveI2F" %} format %{ "movd $dst,$src\t# MoveI2F" %}
ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %} ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11355,7 +10847,9 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{ ...@@ -11355,7 +10847,9 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
effect(DEF dst, USE src); effect(DEF dst, USE src);
ins_cost(300); ins_cost(300);
format %{ "movd $dst,$src\t# MoveL2D" %} format %{ "movd $dst,$src\t# MoveL2D" %}
ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %} ins_encode %{
__ movdq($dst$$XMMRegister, $src$$Register);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11365,7 +10859,13 @@ instruct Repl8B_reg(regD dst, regD src) %{ ...@@ -11365,7 +10859,13 @@ instruct Repl8B_reg(regD dst, regD src) %{
format %{ "MOVDQA $dst,$src\n\t" format %{ "MOVDQA $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t" "PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %} "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
ins_encode( pshufd_8x8(dst, src)); ins_encode %{
if ($dst$$reg != $src$$reg) {
__ movdqa($dst$$XMMRegister, $src$$XMMRegister);
}
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11375,7 +10875,11 @@ instruct Repl8B_rRegI(regD dst, rRegI src) %{ ...@@ -11375,7 +10875,11 @@ instruct Repl8B_rRegI(regD dst, rRegI src) %{
format %{ "MOVD $dst,$src\n\t" format %{ "MOVD $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t" "PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %} "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
...@@ -11383,7 +10887,9 @@ instruct Repl8B_rRegI(regD dst, rRegI src) %{ ...@@ -11383,7 +10887,9 @@ instruct Repl8B_rRegI(regD dst, rRegI src) %{
instruct Repl8B_immI0(regD dst, immI0 zero) %{ instruct Repl8B_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate8B zero)); match(Set dst (Replicate8B zero));
format %{ "PXOR $dst,$dst\t! replicate8B" %} format %{ "PXOR $dst,$dst\t! replicate8B" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11391,7 +10897,9 @@ instruct Repl8B_immI0(regD dst, immI0 zero) %{ ...@@ -11391,7 +10897,9 @@ instruct Repl8B_immI0(regD dst, immI0 zero) %{
instruct Repl4S_reg(regD dst, regD src) %{ instruct Repl4S_reg(regD dst, regD src) %{
match(Set dst (Replicate4S src)); match(Set dst (Replicate4S src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
ins_encode( pshufd_4x16(dst, src)); ins_encode %{
__ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11400,7 +10908,10 @@ instruct Repl4S_rRegI(regD dst, rRegI src) %{ ...@@ -11400,7 +10908,10 @@ instruct Repl4S_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate4S src)); match(Set dst (Replicate4S src));
format %{ "MOVD $dst,$src\n\t" format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4S" %} "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11408,7 +10919,9 @@ instruct Repl4S_rRegI(regD dst, rRegI src) %{ ...@@ -11408,7 +10919,9 @@ instruct Repl4S_rRegI(regD dst, rRegI src) %{
instruct Repl4S_immI0(regD dst, immI0 zero) %{ instruct Repl4S_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate4S zero)); match(Set dst (Replicate4S zero));
format %{ "PXOR $dst,$dst\t! replicate4S" %} format %{ "PXOR $dst,$dst\t! replicate4S" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11416,7 +10929,9 @@ instruct Repl4S_immI0(regD dst, immI0 zero) %{ ...@@ -11416,7 +10929,9 @@ instruct Repl4S_immI0(regD dst, immI0 zero) %{
instruct Repl4C_reg(regD dst, regD src) %{ instruct Repl4C_reg(regD dst, regD src) %{
match(Set dst (Replicate4C src)); match(Set dst (Replicate4C src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
ins_encode( pshufd_4x16(dst, src)); ins_encode %{
__ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11425,7 +10940,10 @@ instruct Repl4C_rRegI(regD dst, rRegI src) %{ ...@@ -11425,7 +10940,10 @@ instruct Repl4C_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate4C src)); match(Set dst (Replicate4C src));
format %{ "MOVD $dst,$src\n\t" format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4C" %} "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11433,7 +10951,9 @@ instruct Repl4C_rRegI(regD dst, rRegI src) %{ ...@@ -11433,7 +10951,9 @@ instruct Repl4C_rRegI(regD dst, rRegI src) %{
instruct Repl4C_immI0(regD dst, immI0 zero) %{ instruct Repl4C_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate4C zero)); match(Set dst (Replicate4C zero));
format %{ "PXOR $dst,$dst\t! replicate4C" %} format %{ "PXOR $dst,$dst\t! replicate4C" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11441,7 +10961,9 @@ instruct Repl4C_immI0(regD dst, immI0 zero) %{ ...@@ -11441,7 +10961,9 @@ instruct Repl4C_immI0(regD dst, immI0 zero) %{
instruct Repl2I_reg(regD dst, regD src) %{ instruct Repl2I_reg(regD dst, regD src) %{
match(Set dst (Replicate2I src)); match(Set dst (Replicate2I src));
format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
ins_encode( pshufd(dst, src, 0x00)); ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11450,7 +10972,10 @@ instruct Repl2I_rRegI(regD dst, rRegI src) %{ ...@@ -11450,7 +10972,10 @@ instruct Repl2I_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate2I src)); match(Set dst (Replicate2I src));
format %{ "MOVD $dst,$src\n\t" format %{ "MOVD $dst,$src\n\t"
"PSHUFD $dst,$dst,0x00\t! replicate2I" %} "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11458,7 +10983,9 @@ instruct Repl2I_rRegI(regD dst, rRegI src) %{ ...@@ -11458,7 +10983,9 @@ instruct Repl2I_rRegI(regD dst, rRegI src) %{
instruct Repl2I_immI0(regD dst, immI0 zero) %{ instruct Repl2I_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate2I zero)); match(Set dst (Replicate2I zero));
format %{ "PXOR $dst,$dst\t! replicate2I" %} format %{ "PXOR $dst,$dst\t! replicate2I" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11466,7 +10993,9 @@ instruct Repl2I_immI0(regD dst, immI0 zero) %{ ...@@ -11466,7 +10993,9 @@ instruct Repl2I_immI0(regD dst, immI0 zero) %{
instruct Repl2F_reg(regD dst, regD src) %{ instruct Repl2F_reg(regD dst, regD src) %{
match(Set dst (Replicate2F src)); match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
ins_encode( pshufd(dst, src, 0xe0)); ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11474,7 +11003,9 @@ instruct Repl2F_reg(regD dst, regD src) %{ ...@@ -11474,7 +11003,9 @@ instruct Repl2F_reg(regD dst, regD src) %{
instruct Repl2F_regF(regD dst, regF src) %{ instruct Repl2F_regF(regD dst, regF src) %{
match(Set dst (Replicate2F src)); match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
ins_encode( pshufd(dst, src, 0xe0)); ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
...@@ -11482,7 +11013,9 @@ instruct Repl2F_regF(regD dst, regF src) %{ ...@@ -11482,7 +11013,9 @@ instruct Repl2F_regF(regD dst, regF src) %{
instruct Repl2F_immF0(regD dst, immF0 zero) %{ instruct Repl2F_immF0(regD dst, immF0 zero) %{
match(Set dst (Replicate2F zero)); match(Set dst (Replicate2F zero));
format %{ "PXOR $dst,$dst\t! replicate2F" %} format %{ "PXOR $dst,$dst\t! replicate2F" %}
ins_encode( pxor(dst, dst)); ins_encode %{
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
......
...@@ -525,6 +525,9 @@ class CommandLineFlags { ...@@ -525,6 +525,9 @@ class CommandLineFlags {
product(intx, UseSSE, 99, \ product(intx, UseSSE, 99, \
"Highest supported SSE instructions set on x86/x64") \ "Highest supported SSE instructions set on x86/x64") \
\ \
product(intx, UseAVX, 99, \
"Highest supported AVX instructions set on x86/x64") \
\
product(intx, UseVIS, 99, \ product(intx, UseVIS, 99, \
"Highest supported VIS instructions set on Sparc") \ "Highest supported VIS instructions set on Sparc") \
\ \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册