提交 160654b5 编写于 作者: R roland

Merge

......@@ -298,7 +298,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
for (int i = 0; i < _bytes_to_copy; i++) {
address ptr = (address)(_pc_start + i);
int a_byte = (*ptr) & 0xFF;
__ a_byte (a_byte);
__ emit_int8 (a_byte);
}
}
......@@ -340,10 +340,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
// Emit the patch record. We need to emit a full word, so emit an extra empty byte
__ a_byte(0);
__ a_byte(being_initialized_entry_offset);
__ a_byte(bytes_to_skip);
__ a_byte(_bytes_to_copy);
__ emit_int8(0);
__ emit_int8(being_initialized_entry_offset);
__ emit_int8(bytes_to_skip);
__ emit_int8(_bytes_to_copy);
address patch_info_pc = __ pc();
assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
......
......@@ -100,34 +100,6 @@ const char* Argument::name() const {
bool AbstractAssembler::pd_check_instruction_mark() { return false; }
#endif
void MacroAssembler::print_instruction(int inst) {
const char* s;
switch (inv_op(inst)) {
default: s = "????"; break;
case call_op: s = "call"; break;
case branch_op:
switch (inv_op2(inst)) {
case fb_op2: s = "fb"; break;
case fbp_op2: s = "fbp"; break;
case br_op2: s = "br"; break;
case bp_op2: s = "bp"; break;
case cb_op2: s = "cb"; break;
case bpr_op2: {
if (is_cbcond(inst)) {
s = is_cxb(inst) ? "cxb" : "cwb";
} else {
s = "bpr";
}
break;
}
default: s = "????"; break;
}
}
::tty->print("%s", s);
}
// Patch instruction inst at offset inst_pos to refer to dest_pos
// and return the resulting instruction.
// We should have pcs, not offsets, but since all is relative, it will work out
......
......@@ -603,7 +603,6 @@ class MacroAssembler : public Assembler {
friend class Label;
protected:
static void print_instruction(int inst);
static int patched_branch(int dest_pos, int inst, int inst_pos);
static int branch_destination(int inst, int pos);
......@@ -759,9 +758,6 @@ class MacroAssembler : public Assembler {
// Required platform-specific helpers for Label::patch_instructions.
// They _shadow_ the declarations in AbstractAssembler, which are undefined.
void pd_patch_instruction(address branch, address target);
#ifndef PRODUCT
static void pd_print_patched_instruction(address branch);
#endif
// sethi Macro handles optimizations and relocations
private:
......
......@@ -43,14 +43,6 @@ inline void MacroAssembler::pd_patch_instruction(address branch, address target)
stub_inst = patched_branch(target - branch, stub_inst, 0);
}
#ifndef PRODUCT
inline void MacroAssembler::pd_print_patched_instruction(address branch) {
jint stub_inst = *(jint*) branch;
print_instruction(stub_inst);
::tty->print("%s", " (unresolved)");
}
#endif // PRODUCT
// Use the right loads/stores for the platform
inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) {
#ifdef _LP64
......
......@@ -10224,7 +10224,7 @@ instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result
//---------- Zeros Count Instructions ------------------------------------------
instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
match(Set dst (CountLeadingZerosI src));
effect(TEMP dst, TEMP tmp, KILL cr);
......@@ -10321,7 +10321,7 @@ instruct countLeadingZerosL(iRegIsafe dst, iRegL src, iRegL tmp, flagsReg cr) %{
ins_pipe(ialu_reg);
%}
instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{
predicate(UsePopCountInstruction); // See Matcher::match_rule_supported
match(Set dst (CountTrailingZerosI src));
effect(TEMP dst, KILL cr);
......@@ -10364,19 +10364,21 @@ instruct countTrailingZerosL(iRegIsafe dst, iRegL src, flagsReg cr) %{
//---------- Population Count Instructions -------------------------------------
instruct popCountI(iRegI dst, iRegI src) %{
instruct popCountI(iRegIsafe dst, iRegI src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI src));
format %{ "POPC $src, $dst" %}
format %{ "SRL $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t"
"POPC $dst, $dst" %}
ins_encode %{
__ popc($src$$Register, $dst$$Register);
__ srl($src$$Register, G0, $dst$$Register);
__ popc($dst$$Register, $dst$$Register);
%}
ins_pipe(ialu_reg);
%}
// Note: Long.bitCount(long) returns an int.
instruct popCountL(iRegI dst, iRegL src) %{
instruct popCountL(iRegIsafe dst, iRegL src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL src));
......
......@@ -434,7 +434,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe
// the frame is greater than one page in size, so check against
// the bottom of the stack
__ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
__ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check);
// the stack will overflow, throw an exception
......
......@@ -313,10 +313,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
#endif
} else {
// make a copy the code which is going to be patched.
for ( int i = 0; i < _bytes_to_copy; i++) {
for (int i = 0; i < _bytes_to_copy; i++) {
address ptr = (address)(_pc_start + i);
int a_byte = (*ptr) & 0xFF;
__ a_byte (a_byte);
__ emit_int8(a_byte);
*ptr = 0x90; // make the site look like a nop
}
}
......@@ -363,11 +363,11 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
// emit the offsets needed to find the code to patch
int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record;
__ a_byte(0xB8);
__ a_byte(0);
__ a_byte(being_initialized_entry_offset);
__ a_byte(bytes_to_skip);
__ a_byte(_bytes_to_copy);
__ emit_int8((unsigned char)0xB8);
__ emit_int8(0);
__ emit_int8(being_initialized_entry_offset);
__ emit_int8(bytes_to_skip);
__ emit_int8(_bytes_to_copy);
address patch_info_pc = __ pc();
assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
......
......@@ -1023,7 +1023,7 @@ void MacroAssembler::lea(Address dst, AddressLiteral adr) {
void MacroAssembler::leave() {
// %%% is this really better? Why not on 32bit too?
emit_byte(0xC9); // LEAVE
emit_int8((unsigned char)0xC9); // LEAVE
}
void MacroAssembler::lneg(Register hi, Register lo) {
......@@ -2112,11 +2112,11 @@ void MacroAssembler::fat_nop() {
if (UseAddressNop) {
addr_nop_5();
} else {
emit_byte(0x26); // es:
emit_byte(0x2e); // cs:
emit_byte(0x64); // fs:
emit_byte(0x65); // gs:
emit_byte(0x90);
emit_int8(0x26); // es:
emit_int8(0x2e); // cs:
emit_int8(0x64); // fs:
emit_int8(0x65); // gs:
emit_int8((unsigned char)0x90);
}
}
......@@ -2534,12 +2534,12 @@ void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
int offs = (intptr_t)dst.target() - ((intptr_t)pc());
if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
// 0111 tttn #8-bit disp
emit_byte(0x70 | cc);
emit_byte((offs - short_size) & 0xFF);
emit_int8(0x70 | cc);
emit_int8((offs - short_size) & 0xFF);
} else {
// 0000 1111 1000 tttn #32-bit disp
emit_byte(0x0F);
emit_byte(0x80 | cc);
emit_int8(0x0F);
emit_int8((unsigned char)(0x80 | cc));
emit_long(offs - long_size);
}
} else {
......@@ -3085,7 +3085,8 @@ void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
bool aligned_adr = (((intptr_t)src.target() & 15) == 0);
assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::pshufb(dst, as_Address(src));
} else {
......
......@@ -126,25 +126,6 @@ class MacroAssembler: public Assembler {
}
}
#ifndef PRODUCT
static void pd_print_patched_instruction(address branch) {
const char* s;
unsigned char op = branch[0];
if (op == 0xE8) {
s = "call";
} else if (op == 0xE9 || op == 0xEB) {
s = "jmp";
} else if ((op & 0xF0) == 0x70) {
s = "jcc";
} else if (op == 0x0F) {
s = "jcc";
} else {
s = "????";
}
tty->print("%s (unresolved)", s);
}
#endif
// The following 4 methods return the offset of the appropriate move instruction
// Support for fast byte/short loading with zero extension (depending on particular CPU)
......
......@@ -2174,13 +2174,13 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg2 - K (key) in little endian int array
//
address generate_aescrypt_encryptBlock() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
Label L_doLast;
address start = __ pc();
const Register from = rsi; // source array address
const Register from = rdx; // source array address
const Register to = rdx; // destination array address
const Register key = rcx; // key array address
const Register keylen = rax;
......@@ -2189,47 +2189,74 @@ class StubGenerator: public StubCodeGenerator {
const Address key_param (rbp, 8+8);
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_temp = xmm1;
const XMMRegister xmm_key_shuf_mask = xmm2;
const XMMRegister xmm_key_shuf_mask = xmm1;
const XMMRegister xmm_temp1 = xmm2;
const XMMRegister xmm_temp2 = xmm3;
const XMMRegister xmm_temp3 = xmm4;
const XMMRegister xmm_temp4 = xmm5;
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ movptr(from , from_param);
__ movptr(to , to_param);
__ movptr(key , key_param);
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ movptr(from, from_param);
__ movptr(key, key_param);
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// keylen = # of 32-bit words, convert to 128-bit words
__ shrl(keylen, 2);
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
__ movptr(to, to_param);
// For encryption, the java expanded key ordering is just what we need
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
__ pxor(xmm_result, xmm_temp);
for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
}
load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
__ cmpl(keylen, 0);
__ jcc(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
__ subl(keylen, 2);
__ jcc(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
__ pxor(xmm_result, xmm_temp1);
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
__ aesenc(xmm_result, xmm_temp3);
__ aesenc(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
__ aesenc(xmm_result, xmm_temp3);
__ aesenc(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
__ cmpl(keylen, 44);
__ jccb(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(keylen, 52);
__ jccb(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
__ BIND(L_doLast);
__ aesenclast(xmm_result, xmm_temp);
__ aesenc(xmm_result, xmm_temp1);
__ aesenclast(xmm_result, xmm_temp2);
__ movdqu(Address(to, 0), xmm_result); // store the result
__ xorptr(rax, rax); // return 0
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
......@@ -2245,13 +2272,13 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg2 - K (key) in little endian int array
//
address generate_aescrypt_decryptBlock() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
Label L_doLast;
address start = __ pc();
const Register from = rsi; // source array address
const Register from = rdx; // source array address
const Register to = rdx; // destination array address
const Register key = rcx; // key array address
const Register keylen = rax;
......@@ -2260,51 +2287,76 @@ class StubGenerator: public StubCodeGenerator {
const Address key_param (rbp, 8+8);
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_temp = xmm1;
const XMMRegister xmm_key_shuf_mask = xmm2;
const XMMRegister xmm_key_shuf_mask = xmm1;
const XMMRegister xmm_temp1 = xmm2;
const XMMRegister xmm_temp2 = xmm3;
const XMMRegister xmm_temp3 = xmm4;
const XMMRegister xmm_temp4 = xmm5;
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ movptr(from , from_param);
__ movptr(to , to_param);
__ movptr(key , key_param);
__ movptr(from, from_param);
__ movptr(key, key_param);
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// keylen = # of 32-bit words, convert to 128-bit words
__ shrl(keylen, 2);
__ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0));
__ movptr(to, to_param);
// for decryption java expanded key ordering is rotated one position from what we want
// so we start from 0x10 here and hit 0x00 last
// we don't know if the key is aligned, hence not using load-execute form
load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
__ pxor (xmm_result, xmm_temp);
for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
}
__ cmpl(keylen, 0);
__ jcc(Assembler::equal, L_doLast);
// only in 192 and 256 bit keys
aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
__ subl(keylen, 2);
__ jcc(Assembler::equal, L_doLast);
// only in 256 bit keys
aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
__ pxor (xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdec(xmm_result, xmm_temp3);
__ aesdec(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdec(xmm_result, xmm_temp3);
__ aesdec(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
__ cmpl(keylen, 44);
__ jccb(Assembler::equal, L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(keylen, 52);
__ jccb(Assembler::equal, L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
__ BIND(L_doLast);
// for decryption the aesdeclast operation is always on key+0x00
load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
__ aesdeclast(xmm_result, xmm_temp);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
// for decryption the aesdeclast operation is always on key+0x00
__ aesdeclast(xmm_result, xmm_temp3);
__ movdqu(Address(to, 0), xmm_result); // store the result
__ xorptr(rax, rax); // return 0
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
......@@ -2340,7 +2392,7 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg4 - input length
//
address generate_cipherBlockChaining_encryptAESCrypt() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
address start = __ pc();
......@@ -2393,7 +2445,7 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::notEqual, L_key_192_256);
// 128 bit code follows here
__ movptr(pos, 0);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_128);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
......@@ -2423,15 +2475,15 @@ class StubGenerator: public StubCodeGenerator {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
__ BIND(L_key_192_256);
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
__ BIND(L_key_192_256);
// here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
__ cmpl(rax, 52);
__ jcc(Assembler::notEqual, L_key_256);
// 192-bit code follows here (could be changed to use more xmm registers)
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_192);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_192);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
......@@ -2452,11 +2504,11 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::notEqual, L_loopTop_192);
__ jmp(L_exit);
__ BIND(L_key_256);
__ BIND(L_key_256);
// 256-bit code follows here (could be changed to use more xmm registers)
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_256);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_256);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
......@@ -2495,7 +2547,7 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_cipherBlockChaining_decryptAESCrypt() {
assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
address start = __ pc();
......@@ -2556,9 +2608,9 @@ class StubGenerator: public StubCodeGenerator {
// 128-bit code follows here, parallelized
__ movptr(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_128);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_128);
__ cmpptr(len_reg, 0); // any blocks left??
__ jcc(Assembler::equal, L_exit);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
......@@ -2597,7 +2649,7 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::notEqual, L_key_256);
// 192-bit code follows here (could be optimized to use parallelism)
__ movptr(pos, 0);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_192);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
......@@ -2622,7 +2674,7 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_key_256);
// 256-bit code follows here (could be optimized to use parallelism)
__ movptr(pos, 0);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_256);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
......
......@@ -489,8 +489,8 @@ void VM_Version::get_processor_features() {
}
// The AES intrinsic stubs require AES instruction support (of course)
// but also require AVX and sse3 modes for instructions it use.
if (UseAES && (UseAVX > 0) && (UseSSE > 2)) {
// but also require sse3 mode for instructions it use.
if (UseAES && (UseSSE > 2)) {
if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
UseAESIntrinsics = true;
}
......
......@@ -56,15 +56,9 @@ void Assembler::pd_patch_instruction(address branch, address target) {
ShouldNotCallThis();
}
#ifndef PRODUCT
void Assembler::pd_print_patched_instruction(address branch) {
ShouldNotCallThis();
}
#endif // PRODUCT
void MacroAssembler::align(int modulus) {
while (offset() % modulus != 0)
emit_byte(AbstractAssembler::code_fill_byte());
emit_int8(AbstractAssembler::code_fill_byte());
}
void MacroAssembler::bang_stack_with_offset(int offset) {
......@@ -72,8 +66,7 @@ void MacroAssembler::bang_stack_with_offset(int offset) {
}
void MacroAssembler::advance(int bytes) {
_code_pos += bytes;
sync();
code_section()->set_end(code_section()->end() + bytes);
}
RegisterOrConstant MacroAssembler::delayed_value_impl(
......
......@@ -37,9 +37,6 @@ class Assembler : public AbstractAssembler {
public:
void pd_patch_instruction(address branch, address target);
#ifndef PRODUCT
static void pd_print_patched_instruction(address branch);
#endif // PRODUCT
};
class MacroAssembler : public Assembler {
......
......@@ -116,7 +116,7 @@ void MacroAssembler::get_thread(Register thread) {
ThreadLocalStorage::pd_tlsAccessMode tlsMode = ThreadLocalStorage::pd_getTlsAccessMode ();
if (tlsMode == ThreadLocalStorage::pd_tlsAccessIndirect) { // T1
// Use thread as a temporary: mov r, gs:[0]; mov r, [r+tlsOffset]
emit_byte (segment);
emit_int8 (segment);
// ExternalAddress doesn't work because it can't take NULL
AddressLiteral null(0, relocInfo::none);
movptr (thread, null);
......@@ -125,7 +125,7 @@ void MacroAssembler::get_thread(Register thread) {
} else
if (tlsMode == ThreadLocalStorage::pd_tlsAccessDirect) { // T2
// mov r, gs:[tlsOffset]
emit_byte (segment);
emit_int8 (segment);
AddressLiteral tls_off((address)ThreadLocalStorage::pd_getTlsOffset(), relocInfo::none);
movptr (thread, tls_off);
return ;
......
......@@ -30,7 +30,7 @@
void MacroAssembler::int3() {
emit_byte(0xCC);
emit_int8((unsigned char)0xCC);
}
#ifndef _LP64
......
......@@ -109,37 +109,6 @@ void AbstractAssembler::flush() {
ICache::invalidate_range(addr_at(0), offset());
}
void AbstractAssembler::a_byte(int x) {
emit_byte(x);
}
void AbstractAssembler::a_long(jint x) {
emit_long(x);
}
// Labels refer to positions in the (to be) generated code. There are bound
// and unbound
//
// Bound labels refer to known positions in the already generated code.
// offset() is the position the label refers to.
//
// Unbound labels refer to unknown positions in the code to be generated; it
// may contain a list of unresolved displacements that refer to it
#ifndef PRODUCT
void AbstractAssembler::print(Label& L) {
if (L.is_bound()) {
tty->print_cr("bound label to %d|%d", L.loc_pos(), L.loc_sect());
} else if (L.is_unbound()) {
L.print_instructions((MacroAssembler*)this);
} else {
tty->print_cr("label in inconsistent state (loc = %d)", L.loc());
}
}
#endif // PRODUCT
void AbstractAssembler::bind(Label& L) {
if (L.is_bound()) {
// Assembler can bind a label more than once to the same place.
......@@ -342,28 +311,3 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
#endif
return offset < 0 || os::vm_page_size() <= offset;
}
#ifndef PRODUCT
void Label::print_instructions(MacroAssembler* masm) const {
CodeBuffer* cb = masm->code();
for (int i = 0; i < _patch_index; ++i) {
int branch_loc;
if (i >= PatchCacheSize) {
branch_loc = _patch_overflow->at(i - PatchCacheSize);
} else {
branch_loc = _patches[i];
}
int branch_pos = CodeBuffer::locator_pos(branch_loc);
int branch_sect = CodeBuffer::locator_sect(branch_loc);
address branch = cb->locator_address(branch_loc);
tty->print_cr("unbound label");
tty->print("@ %d|%d ", branch_pos, branch_sect);
if (branch_sect == CodeBuffer::SECT_CONSTS) {
tty->print_cr(PTR_FORMAT, *(address*)branch);
continue;
}
masm->pd_print_patched_instruction(branch);
tty->cr();
}
}
#endif // ndef PRODUCT
......@@ -216,17 +216,6 @@ class AbstractAssembler : public ResourceObj {
bool isByte(int x) const { return 0 <= x && x < 0x100; }
bool isShiftCount(int x) const { return 0 <= x && x < 32; }
void emit_int8( int8_t x) { code_section()->emit_int8( x); }
void emit_int16( int16_t x) { code_section()->emit_int16( x); }
void emit_int32( int32_t x) { code_section()->emit_int32( x); }
void emit_int64( int64_t x) { code_section()->emit_int64( x); }
void emit_float( jfloat x) { code_section()->emit_float( x); }
void emit_double( jdouble x) { code_section()->emit_double( x); }
void emit_address(address x) { code_section()->emit_address(x); }
void emit_byte(int x) { emit_int8 (x); } // deprecated
void emit_word(int x) { emit_int16(x); } // deprecated
void emit_long(jint x) { emit_int32(x); } // deprecated
// Instruction boundaries (required when emitting relocatable values).
......@@ -277,9 +266,6 @@ class AbstractAssembler : public ResourceObj {
};
#endif
// Label functions
void print(Label& L);
public:
// Creation
......@@ -288,6 +274,15 @@ class AbstractAssembler : public ResourceObj {
// ensure buf contains all code (call this before using/copying the code)
void flush();
void emit_int8( int8_t x) { code_section()->emit_int8( x); }
void emit_int16( int16_t x) { code_section()->emit_int16( x); }
void emit_int32( int32_t x) { code_section()->emit_int32( x); }
void emit_int64( int64_t x) { code_section()->emit_int64( x); }
void emit_float( jfloat x) { code_section()->emit_float( x); }
void emit_double( jdouble x) { code_section()->emit_double( x); }
void emit_address(address x) { code_section()->emit_address(x); }
// min and max values for signed immediate ranges
static int min_simm(int nbits) { return -(intptr_t(1) << (nbits - 1)) ; }
static int max_simm(int nbits) { return (intptr_t(1) << (nbits - 1)) - 1; }
......@@ -327,8 +322,6 @@ class AbstractAssembler : public ResourceObj {
void clear_inst_mark() { code_section()->clear_mark(); }
// Constants in code
void a_byte(int x);
void a_long(jint x);
void relocate(RelocationHolder const& rspec, int format = 0) {
assert(!pd_check_instruction_mark()
|| inst_mark() == NULL || inst_mark() == code_section()->end(),
......@@ -441,15 +434,6 @@ class AbstractAssembler : public ResourceObj {
*/
void pd_patch_instruction(address branch, address target);
#ifndef PRODUCT
/**
* Platform-dependent method of printing an instruction that needs to be
* patched.
*
* @param branch the instruction to be patched in the buffer.
*/
static void pd_print_patched_instruction(address branch);
#endif // PRODUCT
};
#ifdef TARGET_ARCH_x86
......
......@@ -3442,6 +3442,11 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
preserves_state = true;
break;
case vmIntrinsics::_loadFence :
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence :
break;
default : return false; // do not inline
}
// create intrinsic node
......
......@@ -2977,6 +2977,16 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
do_CompareAndSwap(x, longType);
break;
case vmIntrinsics::_loadFence :
if (os::is_MP()) __ membar_acquire();
break;
case vmIntrinsics::_storeFence:
if (os::is_MP()) __ membar_release();
break;
case vmIntrinsics::_fullFence :
if (os::is_MP()) __ membar();
break;
case vmIntrinsics::_Reference_get:
do_Reference_get(x);
break;
......
......@@ -366,10 +366,12 @@ bool ciField::will_link(ciInstanceKlass* accessing_klass,
// ------------------------------------------------------------------
// ciField::print
void ciField::print() {
tty->print("<ciField ");
tty->print("<ciField name=");
_holder->print_name();
tty->print(".");
_name->print_symbol();
tty->print(" signature=");
_signature->print_symbol();
tty->print(" offset=%d type=", _offset);
if (_type != NULL) _type->print_name();
else tty->print("(reference)");
......
......@@ -169,16 +169,18 @@ void ClassLoaderData::add_dependency(Handle dependency, TRAPS) {
ok = (objArrayOop)ok->obj_at(1);
}
// Must handle over GC points
assert (last != NULL, "dependencies should be initialized");
objArrayHandle last_handle(THREAD, last);
// Create a new dependency node with fields for (class_loader or mirror, next)
objArrayOop deps = oopFactory::new_objectArray(2, CHECK);
deps->obj_at_put(0, dependency());
// Must handle over more GC points
// Must handle over GC points
objArrayHandle new_dependency(THREAD, deps);
// Add the dependency under lock
assert (last != NULL, "dependencies should be initialized");
objArrayHandle last_handle(THREAD, last);
locked_add_dependency(last_handle, new_dependency);
}
......
......@@ -756,6 +756,15 @@
do_intrinsic(_unpark, sun_misc_Unsafe, unpark_name, unpark_signature, F_RN) \
do_name( unpark_name, "unpark") \
do_alias( unpark_signature, /*(LObject;)V*/ object_void_signature) \
do_intrinsic(_loadFence, sun_misc_Unsafe, loadFence_name, loadFence_signature, F_RN) \
do_name( loadFence_name, "loadFence") \
do_alias( loadFence_signature, void_method_signature) \
do_intrinsic(_storeFence, sun_misc_Unsafe, storeFence_name, storeFence_signature, F_RN) \
do_name( storeFence_name, "storeFence") \
do_alias( storeFence_signature, void_method_signature) \
do_intrinsic(_fullFence, sun_misc_Unsafe, fullFence_name, fullFence_signature, F_RN) \
do_name( fullFence_name, "fullFence") \
do_alias( fullFence_signature, void_method_signature) \
\
/* unsafe memory references (there are a lot of them...) */ \
do_signature(getObject_signature, "(Ljava/lang/Object;J)Ljava/lang/Object;") \
......@@ -897,12 +906,14 @@
do_intrinsic(_getAndAddLong, sun_misc_Unsafe, getAndAddLong_name, getAndAddLong_signature, F_R) \
do_name( getAndAddLong_name, "getAndAddLong") \
do_signature(getAndAddLong_signature, "(Ljava/lang/Object;JJ)J" ) \
do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSet_name, getAndSetInt_signature, F_R) \
do_name( getAndSet_name, "getAndSet") \
do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSetInt_name, getAndSetInt_signature, F_R) \
do_name( getAndSetInt_name, "getAndSetInt") \
do_alias( getAndSetInt_signature, /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature) \
do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSet_name, getAndSetLong_signature, F_R) \
do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSetLong_name, getAndSetLong_signature, F_R) \
do_name( getAndSetLong_name, "getAndSetLong") \
do_alias( getAndSetLong_signature, /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature) \
do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSet_name, getAndSetObject_signature, F_R) \
do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSetObject_name, getAndSetObject_signature, F_R)\
do_name( getAndSetObject_name, "getAndSetObject") \
do_signature(getAndSetObject_signature, "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
\
/* prefetch_signature is shared by all prefetch variants */ \
......
......@@ -538,6 +538,7 @@ void CompilerOracle::parse_from_line(char* line) {
if (match != NULL) {
if (!_quiet) {
ResourceMark rm;
tty->print("CompilerOracle: %s ", command_names[command]);
match->print();
}
......
......@@ -189,6 +189,11 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) {
set_req(1, addx);
set_req(2, a22);
progress = this;
PhaseIterGVN *igvn = phase->is_IterGVN();
if (add2->outcnt() == 0 && igvn) {
// add disconnected.
igvn->_worklist.push(add2);
}
}
}
......@@ -624,6 +629,11 @@ Node *AddPNode::Ideal(PhaseGVN *phase, bool can_reshape) {
if( t22->singleton() && (t22 != Type::TOP) ) { // Right input is an add of a constant?
set_req(Address, phase->transform(new (phase->C) AddPNode(in(Base),in(Address),add->in(1))));
set_req(Offset, add->in(2));
PhaseIterGVN *igvn = phase->is_IterGVN();
if (add->outcnt() == 0 && igvn) {
// add disconnected.
igvn->_worklist.push((Node*)add);
}
return this; // Made progress
}
}
......
......@@ -403,7 +403,7 @@ const char* InlineTree::check_can_parse(ciMethod* callee) {
//------------------------------print_inlining---------------------------------
// Really, the failure_msg can be a success message also.
void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
CompileTask::print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
C->print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
if (callee_method == NULL) tty->print(" callee not monotonic or profiled");
if (Verbose && callee_method) {
const InlineTree *top = this;
......
......@@ -274,6 +274,9 @@ class LateInlineCallGenerator : public DirectCallGenerator {
virtual void do_late_inline();
virtual JVMState* generate(JVMState* jvms) {
Compile *C = Compile::current();
C->print_inlining_skip(this);
// Record that this call site should be revisited once the main
// parse is finished.
Compile::current()->add_late_inline(this);
......@@ -284,7 +287,6 @@ class LateInlineCallGenerator : public DirectCallGenerator {
// as is done for allocations and macro expansion.
return DirectCallGenerator::generate(jvms);
}
};
......@@ -307,7 +309,9 @@ void LateInlineCallGenerator::do_late_inline() {
// Make sure the state is a MergeMem for parsing.
if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory));
C->initial_gvn()->set_type_bottom(mem);
map->set_req(TypeFunc::Memory, mem);
}
// Make enough space for the expression stack and transfer the incoming arguments
......@@ -320,6 +324,8 @@ void LateInlineCallGenerator::do_late_inline() {
}
}
C->print_inlining_insert(this);
CompileLog* log = C->log();
if (log != NULL) {
log->head("late_inline method='%d'", log->identify(method()));
......@@ -608,7 +614,7 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
if (cg != NULL && cg->is_inline())
return cg;
} else {
if (PrintInlining) CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
if (PrintInlining) C->print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
}
}
break;
......
......@@ -147,9 +147,9 @@ class CallGenerator : public ResourceObj {
CallGenerator* cg);
virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) {
if (PrintInlining)
CompileTask::print_inlining(callee, inline_level, bci, msg);
C->print_inlining(callee, inline_level, bci, msg);
}
};
......
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册