diff --git a/.hgtags b/.hgtags index c04b27715a86cf07d30f85abbbe52f96f60ac95d..7db2a557cb4b5cb33083f5af56f79b25f101801b 100644 --- a/.hgtags +++ b/.hgtags @@ -299,3 +299,7 @@ cfc5309f03b7bd6c1567618b63cf1fc74c0f2a8f hs25-b10 b61d9c88b759d1594b8af1655598e8fa00393672 hs25-b11 25bdce771bb3a7ae9825261a284d292cda700122 jdk8-b67 a35a72dd2e1255239d31f796f9f693e49b36bc9f hs25-b12 +121aa71316af6cd877bf455e775fa3fdbcdd4b65 jdk8-b68 +b6c9c0109a608eedbb6b868d260952990e3c91fe hs25-b13 +cb8a4e04bc8c104de8a2f67463c7e31232bf8d68 jdk8-b69 +990bbd393c239d95310ccc38094e57923bbf1d4a hs25-b14 diff --git a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java index 3e30820206a5b4864dd6df67f2da5d59b26e3be8..ad495d73bea91e1e08f59cc08f285251ab23b76e 100644 --- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java +++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java @@ -69,6 +69,8 @@ public class ConstMethod extends VMObject { signatureIndex = new CIntField(type.getCIntegerField("_signature_index"), 0); idnum = new CIntField(type.getCIntegerField("_method_idnum"), 0); maxStack = new CIntField(type.getCIntegerField("_max_stack"), 0); + maxLocals = new CIntField(type.getCIntegerField("_max_locals"), 0); + sizeOfParameters = new CIntField(type.getCIntegerField("_size_of_parameters"), 0); // start of byte code bytecodeOffset = type.getSize(); @@ -96,6 +98,8 @@ public class ConstMethod extends VMObject { private static CIntField signatureIndex; private static CIntField idnum; private static CIntField maxStack; + private static CIntField maxLocals; + private static CIntField sizeOfParameters; // start of bytecode private static long bytecodeOffset; @@ -151,6 +155,14 @@ public class ConstMethod extends VMObject { return maxStack.getValue(this); } + public long getMaxLocals() { + return maxLocals.getValue(this); + } + + public long getSizeOfParameters() { + return sizeOfParameters.getValue(this); + } + public Symbol getName() { return getMethod().getName(); } @@ -247,6 +259,8 @@ public class ConstMethod extends VMObject { visitor.doCInt(signatureIndex, true); visitor.doCInt(codeSize, true); visitor.doCInt(maxStack, true); + visitor.doCInt(maxLocals, true); + visitor.doCInt(sizeOfParameters, true); } // Accessors diff --git a/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java b/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java index a12d0b1e84e5f2afd62bcd292a519c99dece8ffc..31dc39c543171f1cbefe3276c1f1be7ce4a1f03d 100644 --- a/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java +++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java @@ -50,8 +50,6 @@ public class Method extends Metadata { constMethod = type.getAddressField("_constMethod"); methodData = type.getAddressField("_method_data"); methodSize = new CIntField(type.getCIntegerField("_method_size"), 0); - maxLocals = new CIntField(type.getCIntegerField("_max_locals"), 0); - sizeOfParameters = new CIntField(type.getCIntegerField("_size_of_parameters"), 0); accessFlags = new CIntField(type.getCIntegerField("_access_flags"), 0); code = type.getAddressField("_code"); vtableIndex = new CIntField(type.getCIntegerField("_vtable_index"), 0); @@ -83,8 +81,6 @@ public class Method extends Metadata { private static AddressField constMethod; private static AddressField methodData; private static CIntField methodSize; - private static CIntField maxLocals; - private static CIntField sizeOfParameters; private static CIntField accessFlags; private static CIntField vtableIndex; private static CIntField invocationCounter; @@ -134,8 +130,8 @@ public class Method extends Metadata { /** WARNING: this is in words, not useful in this system; use getObjectSize() instead */ public long getMethodSize() { return methodSize.getValue(this); } public long getMaxStack() { return getConstMethod().getMaxStack(); } - public long getMaxLocals() { return maxLocals.getValue(this); } - public long getSizeOfParameters() { return sizeOfParameters.getValue(this); } + public long getMaxLocals() { return getConstMethod().getMaxLocals(); } + public long getSizeOfParameters() { return getConstMethod().getSizeOfParameters(); } public long getNameIndex() { return getConstMethod().getNameIndex(); } public long getSignatureIndex() { return getConstMethod().getSignatureIndex(); } public long getGenericSignatureIndex() { return getConstMethod().getGenericSignatureIndex(); } @@ -282,8 +278,6 @@ public class Method extends Metadata { public void iterateFields(MetadataVisitor visitor) { visitor.doCInt(methodSize, true); - visitor.doCInt(maxLocals, true); - visitor.doCInt(sizeOfParameters, true); visitor.doCInt(accessFlags, true); } diff --git a/make/hotspot_version b/make/hotspot_version index 7883b9bfb56d4cac69b051f3e370cd67569761f0..d6959b3b86c3b16d8e07848f4b2324998f6172c9 100644 --- a/make/hotspot_version +++ b/make/hotspot_version @@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2012 HS_MAJOR_VER=25 HS_MINOR_VER=0 -HS_BUILD_NUMBER=13 +HS_BUILD_NUMBER=15 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff --git a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp index 30a8983f9eb330e1b8f379a92aaa09ab05368fe1..fa5ce4fa6bef53540a3f54877cae06ad89206602 100644 --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp @@ -298,7 +298,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) { for (int i = 0; i < _bytes_to_copy; i++) { address ptr = (address)(_pc_start + i); int a_byte = (*ptr) & 0xFF; - __ a_byte (a_byte); + __ emit_int8 (a_byte); } } @@ -340,10 +340,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) { int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record; // Emit the patch record. We need to emit a full word, so emit an extra empty byte - __ a_byte(0); - __ a_byte(being_initialized_entry_offset); - __ a_byte(bytes_to_skip); - __ a_byte(_bytes_to_copy); + __ emit_int8(0); + __ emit_int8(being_initialized_entry_offset); + __ emit_int8(bytes_to_skip); + __ emit_int8(_bytes_to_copy); address patch_info_pc = __ pc(); assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info"); diff --git a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp index eacb4d3fd668840dd69b6382dd0ffe3c8608d857..7031597b6cfa7aa3a95a76585e48bb0b92f500fa 100644 --- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp +++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp @@ -582,7 +582,9 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // the following temporary registers are used during frame creation const Register Gtmp1 = G3_scratch ; const Register Gtmp2 = G1_scratch; - const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset())); + const Register RconstMethod = Gtmp1; + const Address constMethod(G5_method, 0, in_bytes(Method::const_offset())); + const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset())); bool inc_counter = UseCompiler || CountCompiledCalls; @@ -618,6 +620,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { } #endif // ASSERT + __ ld_ptr(constMethod, RconstMethod); __ lduh(size_of_parameters, Gtmp1); __ sll(Gtmp1, LogBytesPerWord, Gtmp2); // parameter size in bytes __ add(Gargs, Gtmp2, Gargs); // points to first local + BytesPerWord @@ -1047,8 +1050,6 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register const Register Gtmp = G3_scratch; const Address constMethod (G5_method, 0, in_bytes(Method::const_offset())); const Address access_flags (G5_method, 0, in_bytes(Method::access_flags_offset())); - const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset())); - const Address size_of_locals (G5_method, 0, in_bytes(Method::size_of_locals_offset())); // slop factor is two extra slots on the expression stack so that // we always have room to store a result when returning from a call without parameters @@ -1066,6 +1067,9 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register // Now compute new frame size if (native) { + const Register RconstMethod = Gtmp; + const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset())); + __ ld_ptr(constMethod, RconstMethod); __ lduh( size_of_parameters, Gtmp ); __ calc_mem_param_words(Gtmp, Gtmp); // space for native call parameters passed on the stack in words } else { @@ -1236,9 +1240,13 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register } if (init_value != noreg) { Label clear_loop; + const Register RconstMethod = O1; + const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset())); + const Address size_of_locals (RconstMethod, 0, in_bytes(ConstMethod::size_of_locals_offset())); // NOTE: If you change the frame layout, this code will need to // be updated! + __ ld_ptr( constMethod, RconstMethod ); __ lduh( size_of_locals, O2 ); __ lduh( size_of_parameters, O1 ); __ sll( O2, LogBytesPerWord, O2); @@ -1483,13 +1491,16 @@ void CppInterpreterGenerator::adjust_callers_stack(Register args) { // // assert_different_registers(state, prev_state); const Register Gtmp = G3_scratch; + const RconstMethod = G3_scratch; const Register tmp = O2; - const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset())); - const Address size_of_locals (G5_method, 0, in_bytes(Method::size_of_locals_offset())); + const Address constMethod(G5_method, 0, in_bytes(Method::const_offset())); + const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset())); + const Address size_of_locals (RconstMethod, 0, in_bytes(ConstMethod::size_of_locals_offset())); + __ ld_ptr(constMethod, RconstMethod); __ lduh(size_of_parameters, tmp); - __ sll(tmp, LogBytesPerWord, Gtmp); // parameter size in bytes - __ add(args, Gtmp, Gargs); // points to first local + BytesPerWord + __ sll(tmp, LogBytesPerWord, Gargs); // parameter size in bytes + __ add(args, Gargs, Gargs); // points to first local + BytesPerWord // NEW __ add(Gargs, -wordSize, Gargs); // points to first local[0] // determine extra space for non-argument locals & adjust caller's SP @@ -1541,8 +1552,6 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { const Address constMethod (G5_method, 0, in_bytes(Method::const_offset())); const Address access_flags (G5_method, 0, in_bytes(Method::access_flags_offset())); - const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset())); - const Address size_of_locals (G5_method, 0, in_bytes(Method::size_of_locals_offset())); address entry_point = __ pc(); __ mov(G0, prevState); // no current activation @@ -1750,7 +1759,9 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { __ ld_ptr(STATE(_result._to_call._callee), L4_scratch); // called method __ ld_ptr(STATE(_stack), L1_scratch); // get top of java expr stack - __ lduh(L4_scratch, in_bytes(Method::size_of_parameters_offset()), L2_scratch); // get parameter size + // get parameter size + __ ld_ptr(L4_scratch, in_bytes(Method::const_offset()), L2_scratch); + __ lduh(L2_scratch, in_bytes(ConstMethod::size_of_parameters_offset()), L2_scratch); __ sll(L2_scratch, LogBytesPerWord, L2_scratch ); // parameter size in bytes __ add(L1_scratch, L2_scratch, L1_scratch); // stack destination for result __ ld(L4_scratch, in_bytes(Method::result_index_offset()), L3_scratch); // called method result type index diff --git a/src/cpu/sparc/vm/macroAssembler_sparc.cpp b/src/cpu/sparc/vm/macroAssembler_sparc.cpp index 2801a35a10d1e7c7f912ce313c4916bb9a0bf2b7..ac56573e76658b94b2df171cbb2868fa0a63a29f 100644 --- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp +++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp @@ -100,34 +100,6 @@ const char* Argument::name() const { bool AbstractAssembler::pd_check_instruction_mark() { return false; } #endif - -void MacroAssembler::print_instruction(int inst) { - const char* s; - switch (inv_op(inst)) { - default: s = "????"; break; - case call_op: s = "call"; break; - case branch_op: - switch (inv_op2(inst)) { - case fb_op2: s = "fb"; break; - case fbp_op2: s = "fbp"; break; - case br_op2: s = "br"; break; - case bp_op2: s = "bp"; break; - case cb_op2: s = "cb"; break; - case bpr_op2: { - if (is_cbcond(inst)) { - s = is_cxb(inst) ? "cxb" : "cwb"; - } else { - s = "bpr"; - } - break; - } - default: s = "????"; break; - } - } - ::tty->print("%s", s); -} - - // Patch instruction inst at offset inst_pos to refer to dest_pos // and return the resulting instruction. // We should have pcs, not offsets, but since all is relative, it will work out diff --git a/src/cpu/sparc/vm/macroAssembler_sparc.hpp b/src/cpu/sparc/vm/macroAssembler_sparc.hpp index 1e1e6de9e47d8d302efe8a9d6e2932c15207db70..dffef7e794b291aef2d0edbb2692941263b056e5 100644 --- a/src/cpu/sparc/vm/macroAssembler_sparc.hpp +++ b/src/cpu/sparc/vm/macroAssembler_sparc.hpp @@ -603,7 +603,6 @@ class MacroAssembler : public Assembler { friend class Label; protected: - static void print_instruction(int inst); static int patched_branch(int dest_pos, int inst, int inst_pos); static int branch_destination(int inst, int pos); @@ -759,9 +758,6 @@ class MacroAssembler : public Assembler { // Required platform-specific helpers for Label::patch_instructions. // They _shadow_ the declarations in AbstractAssembler, which are undefined. void pd_patch_instruction(address branch, address target); -#ifndef PRODUCT - static void pd_print_patched_instruction(address branch); -#endif // sethi Macro handles optimizations and relocations private: diff --git a/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp b/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp index e9bdaea79ad4b6d161ea5ba44c7b73b31b8b21a2..871853c684d0420edb946ee428108be35629d0ec 100644 --- a/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp +++ b/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp @@ -43,14 +43,6 @@ inline void MacroAssembler::pd_patch_instruction(address branch, address target) stub_inst = patched_branch(target - branch, stub_inst, 0); } -#ifndef PRODUCT -inline void MacroAssembler::pd_print_patched_instruction(address branch) { - jint stub_inst = *(jint*) branch; - print_instruction(stub_inst); - ::tty->print("%s", " (unresolved)"); -} -#endif // PRODUCT - // Use the right loads/stores for the platform inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) { #ifdef _LP64 diff --git a/src/cpu/sparc/vm/methodHandles_sparc.cpp b/src/cpu/sparc/vm/methodHandles_sparc.cpp index f4637bf76de78af1a9b8e0c9b2504c6af046f00f..546cfaf08f1b2987a9522cac4cc82a7de38a33af 100644 --- a/src/cpu/sparc/vm/methodHandles_sparc.cpp +++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp @@ -171,7 +171,8 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, if (VerifyMethodHandles && !for_compiler_entry) { // make sure recv is already on stack - __ load_sized_value(Address(method_temp, Method::size_of_parameters_offset()), + __ ld_ptr(method_temp, in_bytes(Method::const_offset()), temp2); + __ load_sized_value(Address(temp2, ConstMethod::size_of_parameters_offset()), temp2, sizeof(u2), /*is_signed*/ false); // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); @@ -233,7 +234,8 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { - __ load_sized_value(Address(G5_method, Method::size_of_parameters_offset()), + __ ld_ptr(G5_method, in_bytes(Method::const_offset()), O4_param_size); + __ load_sized_value(Address(O4_param_size, ConstMethod::size_of_parameters_offset()), O4_param_size, sizeof(u2), /*is_signed*/ false); // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); diff --git a/src/cpu/sparc/vm/sparc.ad b/src/cpu/sparc/vm/sparc.ad index b7029faa604ebf069e802981311f663479819df9..96accce02565c45f0a72b876487354c45059b66f 100644 --- a/src/cpu/sparc/vm/sparc.ad +++ b/src/cpu/sparc/vm/sparc.ad @@ -10224,7 +10224,7 @@ instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result //---------- Zeros Count Instructions ------------------------------------------ -instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{ +instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{ predicate(UsePopCountInstruction); // See Matcher::match_rule_supported match(Set dst (CountLeadingZerosI src)); effect(TEMP dst, TEMP tmp, KILL cr); @@ -10321,7 +10321,7 @@ instruct countLeadingZerosL(iRegIsafe dst, iRegL src, iRegL tmp, flagsReg cr) %{ ins_pipe(ialu_reg); %} -instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{ +instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{ predicate(UsePopCountInstruction); // See Matcher::match_rule_supported match(Set dst (CountTrailingZerosI src)); effect(TEMP dst, KILL cr); @@ -10364,19 +10364,21 @@ instruct countTrailingZerosL(iRegIsafe dst, iRegL src, flagsReg cr) %{ //---------- Population Count Instructions ------------------------------------- -instruct popCountI(iRegI dst, iRegI src) %{ +instruct popCountI(iRegIsafe dst, iRegI src) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI src)); - format %{ "POPC $src, $dst" %} + format %{ "SRL $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t" + "POPC $dst, $dst" %} ins_encode %{ - __ popc($src$$Register, $dst$$Register); + __ srl($src$$Register, G0, $dst$$Register); + __ popc($dst$$Register, $dst$$Register); %} ins_pipe(ialu_reg); %} // Note: Long.bitCount(long) returns an int. -instruct popCountL(iRegI dst, iRegL src) %{ +instruct popCountL(iRegIsafe dst, iRegL src) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL src)); diff --git a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp index 2de866adb0744fb1b5a33e1fef282680fb0b81ad..0827c0b1a9c420802ac2032ccc412a02a879ff20 100644 --- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp +++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp @@ -434,7 +434,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe // the frame is greater than one page in size, so check against // the bottom of the stack - __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check); + __ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check); // the stack will overflow, throw an exception @@ -494,9 +494,6 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { // (gri - 2/25/2000) - const Address size_of_parameters(G5_method, Method::size_of_parameters_offset()); - const Address size_of_locals (G5_method, Method::size_of_locals_offset()); - const Address constMethod (G5_method, Method::const_offset()); int rounded_vm_local_words = round_to( frame::interpreter_frame_vm_local_words, WordsPerLong ); const int extra_space = @@ -506,11 +503,15 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { (native_call ? frame::interpreter_frame_extra_outgoing_argument_words : 0); const Register Glocals_size = G3; + const Register RconstMethod = Glocals_size; const Register Otmp1 = O3; const Register Otmp2 = O4; // Lscratch can't be used as a temporary because the call_stub uses // it to assert that the stack frame was setup correctly. + const Address constMethod (G5_method, Method::const_offset()); + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); + __ ld_ptr( constMethod, RconstMethod ); __ lduh( size_of_parameters, Glocals_size); // Gargs points to first local + BytesPerWord @@ -530,6 +531,8 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { // // Compute number of locals in method apart from incoming parameters // + const Address size_of_locals (Otmp1, ConstMethod::size_of_locals_offset()); + __ ld_ptr( constMethod, Otmp1 ); __ lduh( size_of_locals, Otmp1 ); __ sub( Otmp1, Glocals_size, Glocals_size ); __ round_to( Glocals_size, WordsPerLong ); @@ -1256,8 +1259,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { // make sure registers are different! assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2); - const Address size_of_parameters(G5_method, Method::size_of_parameters_offset()); - const Address size_of_locals (G5_method, Method::size_of_locals_offset()); + const Address constMethod (G5_method, Method::const_offset()); // Seems like G5_method is live at the point this is used. So we could make this look consistent // and use in the asserts. const Address access_flags (Lmethod, Method::access_flags_offset()); @@ -1307,8 +1309,13 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { init_value = G0; Label clear_loop; + const Register RconstMethod = O1; + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); + const Address size_of_locals (RconstMethod, ConstMethod::size_of_locals_offset()); + // NOTE: If you change the frame layout, this code will need to // be updated! + __ ld_ptr( constMethod, RconstMethod ); __ lduh( size_of_locals, O2 ); __ lduh( size_of_parameters, O1 ); __ sll( O2, Interpreter::logStackElementSize, O2); @@ -1823,9 +1830,13 @@ void TemplateInterpreterGenerator::generate_throw_exception() { const Register Gtmp1 = G3_scratch; const Register Gtmp2 = G1_scratch; + const Register RconstMethod = Gtmp1; + const Address constMethod(Lmethod, Method::const_offset()); + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); // Compute size of arguments for saving when returning to deoptimized caller - __ lduh(Lmethod, in_bytes(Method::size_of_parameters_offset()), Gtmp1); + __ ld_ptr(constMethod, RconstMethod); + __ lduh(size_of_parameters, Gtmp1); __ sll(Gtmp1, Interpreter::logStackElementSize, Gtmp1); __ sub(Llocals, Gtmp1, Gtmp2); __ add(Gtmp2, wordSize, Gtmp2); diff --git a/src/cpu/sparc/vm/templateTable_sparc.cpp b/src/cpu/sparc/vm/templateTable_sparc.cpp index 5b1887dbbcae19b6ad50f593e16fa1471cd29846..931342ded121a8db79f5218e1ee9e7d7b880e612 100644 --- a/src/cpu/sparc/vm/templateTable_sparc.cpp +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp @@ -3040,7 +3040,8 @@ void TemplateTable::invokevfinal_helper(Register Rscratch, Register Rret) { Register Rtemp = G4_scratch; // Load receiver from stack slot - __ lduh(G5_method, in_bytes(Method::size_of_parameters_offset()), G4_scratch); + __ ld_ptr(G5_method, in_bytes(Method::const_offset()), G4_scratch); + __ lduh(G4_scratch, in_bytes(ConstMethod::size_of_parameters_offset()), G4_scratch); __ load_receiver(G4_scratch, O0); // receiver NULL check diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp index 0a51534ed2425bdc7dec0a922ab695a8d427df76..6f97f963084c3a6475b9f305e9a3ffdf6d838f8c 100644 --- a/src/cpu/x86/vm/assembler_x86.cpp +++ b/src/cpu/x86/vm/assembler_x86.cpp @@ -226,9 +226,9 @@ void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { assert(isByte(op1) && isByte(op2), "wrong opcode"); assert(isByte(imm8), "not a byte"); assert((op1 & 0x01) == 0, "should be 8bit operation"); - emit_byte(op1); - emit_byte(op2 | encode(dst)); - emit_byte(imm8); + emit_int8(op1); + emit_int8(op2 | encode(dst)); + emit_int8(imm8); } @@ -237,12 +237,12 @@ void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { assert((op1 & 0x01) == 1, "should be 32bit operation"); assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); if (is8bit(imm32)) { - emit_byte(op1 | 0x02); // set sign bit - emit_byte(op2 | encode(dst)); - emit_byte(imm32 & 0xFF); + emit_int8(op1 | 0x02); // set sign bit + emit_int8(op2 | encode(dst)); + emit_int8(imm32 & 0xFF); } else { - emit_byte(op1); - emit_byte(op2 | encode(dst)); + emit_int8(op1); + emit_int8(op2 | encode(dst)); emit_long(imm32); } } @@ -252,8 +252,8 @@ void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) assert(isByte(op1) && isByte(op2), "wrong opcode"); assert((op1 & 0x01) == 1, "should be 32bit operation"); assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); - emit_byte(op1); - emit_byte(op2 | encode(dst)); + emit_int8(op1); + emit_int8(op2 | encode(dst)); emit_long(imm32); } @@ -262,11 +262,11 @@ void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t im assert((op1 & 0x01) == 1, "should be 32bit operation"); assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); if (is8bit(imm32)) { - emit_byte(op1 | 0x02); // set sign bit + emit_int8(op1 | 0x02); // set sign bit emit_operand(rm, adr, 1); - emit_byte(imm32 & 0xFF); + emit_int8(imm32 & 0xFF); } else { - emit_byte(op1); + emit_int8(op1); emit_operand(rm, adr, 4); emit_long(imm32); } @@ -275,8 +275,8 @@ void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t im void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { assert(isByte(op1) && isByte(op2), "wrong opcode"); - emit_byte(op1); - emit_byte(op2 | encode(dst) << 3 | encode(src)); + emit_int8(op1); + emit_int8(op2 | encode(dst) << 3 | encode(src)); } @@ -301,21 +301,21 @@ void Assembler::emit_operand(Register reg, Register base, Register index, // [base + index*scale] // [00 reg 100][ss index base] assert(index != rsp, "illegal addressing mode"); - emit_byte(0x04 | regenc); - emit_byte(scale << 6 | indexenc | baseenc); + emit_int8(0x04 | regenc); + emit_int8(scale << 6 | indexenc | baseenc); } else if (is8bit(disp) && rtype == relocInfo::none) { // [base + index*scale + imm8] // [01 reg 100][ss index base] imm8 assert(index != rsp, "illegal addressing mode"); - emit_byte(0x44 | regenc); - emit_byte(scale << 6 | indexenc | baseenc); - emit_byte(disp & 0xFF); + emit_int8(0x44 | regenc); + emit_int8(scale << 6 | indexenc | baseenc); + emit_int8(disp & 0xFF); } else { // [base + index*scale + disp32] // [10 reg 100][ss index base] disp32 assert(index != rsp, "illegal addressing mode"); - emit_byte(0x84 | regenc); - emit_byte(scale << 6 | indexenc | baseenc); + emit_int8(0x84 | regenc); + emit_int8(scale << 6 | indexenc | baseenc); emit_data(disp, rspec, disp32_operand); } } else if (base == rsp LP64_ONLY(|| base == r12)) { @@ -323,19 +323,19 @@ void Assembler::emit_operand(Register reg, Register base, Register index, if (disp == 0 && rtype == relocInfo::none) { // [rsp] // [00 reg 100][00 100 100] - emit_byte(0x04 | regenc); - emit_byte(0x24); + emit_int8(0x04 | regenc); + emit_int8(0x24); } else if (is8bit(disp) && rtype == relocInfo::none) { // [rsp + imm8] // [01 reg 100][00 100 100] disp8 - emit_byte(0x44 | regenc); - emit_byte(0x24); - emit_byte(disp & 0xFF); + emit_int8(0x44 | regenc); + emit_int8(0x24); + emit_int8(disp & 0xFF); } else { // [rsp + imm32] // [10 reg 100][00 100 100] disp32 - emit_byte(0x84 | regenc); - emit_byte(0x24); + emit_int8(0x84 | regenc); + emit_int8(0x24); emit_data(disp, rspec, disp32_operand); } } else { @@ -345,16 +345,16 @@ void Assembler::emit_operand(Register reg, Register base, Register index, base != rbp LP64_ONLY(&& base != r13)) { // [base] // [00 reg base] - emit_byte(0x00 | regenc | baseenc); + emit_int8(0x00 | regenc | baseenc); } else if (is8bit(disp) && rtype == relocInfo::none) { // [base + disp8] // [01 reg base] disp8 - emit_byte(0x40 | regenc | baseenc); - emit_byte(disp & 0xFF); + emit_int8(0x40 | regenc | baseenc); + emit_int8(disp & 0xFF); } else { // [base + disp32] // [10 reg base] disp32 - emit_byte(0x80 | regenc | baseenc); + emit_int8(0x80 | regenc | baseenc); emit_data(disp, rspec, disp32_operand); } } @@ -364,14 +364,14 @@ void Assembler::emit_operand(Register reg, Register base, Register index, // [index*scale + disp] // [00 reg 100][ss index 101] disp32 assert(index != rsp, "illegal addressing mode"); - emit_byte(0x04 | regenc); - emit_byte(scale << 6 | indexenc | 0x05); + emit_int8(0x04 | regenc); + emit_int8(scale << 6 | indexenc | 0x05); emit_data(disp, rspec, disp32_operand); } else if (rtype != relocInfo::none ) { // [disp] (64bit) RIP-RELATIVE (32bit) abs // [00 000 101] disp32 - emit_byte(0x05 | regenc); + emit_int8(0x05 | regenc); // Note that the RIP-rel. correction applies to the generated // disp field, but _not_ to the target address in the rspec. @@ -391,8 +391,8 @@ void Assembler::emit_operand(Register reg, Register base, Register index, // 32bit never did this, did everything as the rip-rel/disp code above // [disp] ABSOLUTE // [00 reg 100][00 100 101] disp32 - emit_byte(0x04 | regenc); - emit_byte(0x25); + emit_int8(0x04 | regenc); + emit_int8(0x25); emit_data(disp, rspec, disp32_operand); } } @@ -883,8 +883,8 @@ void Assembler::emit_operand(Address adr, MMXRegister reg) { void Assembler::emit_farith(int b1, int b2, int i) { assert(isByte(b1) && isByte(b2), "wrong opcode"); assert(0 <= i && i < 8, "illegal stack offset"); - emit_byte(b1); - emit_byte(b2 + i); + emit_int8(b1); + emit_int8(b2 + i); } @@ -899,7 +899,7 @@ void Assembler::adcl(Address dst, int32_t imm32) { void Assembler::adcl(Address dst, Register src) { InstructionMark im(this); prefix(dst, src); - emit_byte(0x11); + emit_int8(0x11); emit_operand(src, dst); } @@ -911,7 +911,7 @@ void Assembler::adcl(Register dst, int32_t imm32) { void Assembler::adcl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x13); + emit_int8(0x13); emit_operand(dst, src); } @@ -929,7 +929,7 @@ void Assembler::addl(Address dst, int32_t imm32) { void Assembler::addl(Address dst, Register src) { InstructionMark im(this); prefix(dst, src); - emit_byte(0x01); + emit_int8(0x01); emit_operand(src, dst); } @@ -941,7 +941,7 @@ void Assembler::addl(Register dst, int32_t imm32) { void Assembler::addl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x03); + emit_int8(0x03); emit_operand(dst, src); } @@ -953,38 +953,40 @@ void Assembler::addl(Register dst, Register src) { void Assembler::addr_nop_4() { assert(UseAddressNop, "no CPU support"); // 4 bytes: NOP DWORD PTR [EAX+0] - emit_byte(0x0F); - emit_byte(0x1F); - emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); - emit_byte(0); // 8-bits offset (1 byte) + emit_int8(0x0F); + emit_int8(0x1F); + emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); + emit_int8(0); // 8-bits offset (1 byte) } void Assembler::addr_nop_5() { assert(UseAddressNop, "no CPU support"); // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset - emit_byte(0x0F); - emit_byte(0x1F); - emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); - emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); - emit_byte(0); // 8-bits offset (1 byte) + emit_int8(0x0F); + emit_int8(0x1F); + emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); + emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); + emit_int8(0); // 8-bits offset (1 byte) } void Assembler::addr_nop_7() { assert(UseAddressNop, "no CPU support"); // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset - emit_byte(0x0F); - emit_byte(0x1F); - emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); + emit_int8(0x0F); + emit_int8(0x1F); + emit_int8((unsigned char)0x80); + // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); emit_long(0); // 32-bits offset (4 bytes) } void Assembler::addr_nop_8() { assert(UseAddressNop, "no CPU support"); // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset - emit_byte(0x0F); - emit_byte(0x1F); - emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); - emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); + emit_int8(0x0F); + emit_int8(0x1F); + emit_int8((unsigned char)0x84); + // emit_rm(cbuf, 0x2, EAX_enc, 0x4); + emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); emit_long(0); // 32-bits offset (4 bytes) } @@ -1012,67 +1014,67 @@ void Assembler::aesdec(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0xde); + emit_int8((unsigned char)0xDE); emit_operand(dst, src); } void Assembler::aesdec(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0xde); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0xDE); + emit_int8(0xC0 | encode); } void Assembler::aesdeclast(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0xdf); + emit_int8((unsigned char)0xDF); emit_operand(dst, src); } void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0xdf); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0xDF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::aesenc(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0xdc); + emit_int8((unsigned char)0xDC); emit_operand(dst, src); } void Assembler::aesenc(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0xdc); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0xDC); + emit_int8(0xC0 | encode); } void Assembler::aesenclast(XMMRegister dst, Address src) { assert(VM_Version::supports_aes(), ""); InstructionMark im(this); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0xdd); + emit_int8((unsigned char)0xDD); emit_operand(dst, src); } void Assembler::aesenclast(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_aes(), ""); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0xdd); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0xDD); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andl(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); - emit_byte(0x81); + emit_int8((unsigned char)0x81); emit_operand(rsp, dst, 4); emit_long(imm32); } @@ -1085,7 +1087,7 @@ void Assembler::andl(Register dst, int32_t imm32) { void Assembler::andl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x23); + emit_int8(0x23); emit_operand(dst, src); } @@ -1096,23 +1098,23 @@ void Assembler::andl(Register dst, Register src) { void Assembler::bsfl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBC); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBC); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::bsrl(Register dst, Register src) { assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBD); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBD); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::bswapl(Register reg) { // bswap int encode = prefix_and_encode(reg->encoding()); - emit_byte(0x0F); - emit_byte(0xC8 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)(0xC8 | encode)); } void Assembler::call(Label& L, relocInfo::relocType rtype) { @@ -1125,36 +1127,36 @@ void Assembler::call(Label& L, relocInfo::relocType rtype) { assert(offs <= 0, "assembler error"); InstructionMark im(this); // 1110 1000 #32-bit disp - emit_byte(0xE8); + emit_int8((unsigned char)0xE8); emit_data(offs - long_size, rtype, operand); } else { InstructionMark im(this); // 1110 1000 #32-bit disp L.add_patch_at(code(), locator()); - emit_byte(0xE8); + emit_int8((unsigned char)0xE8); emit_data(int(0), rtype, operand); } } void Assembler::call(Register dst) { int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xFF); - emit_byte(0xD0 | encode); + emit_int8((unsigned char)0xFF); + emit_int8((unsigned char)(0xD0 | encode)); } void Assembler::call(Address adr) { InstructionMark im(this); prefix(adr); - emit_byte(0xFF); + emit_int8((unsigned char)0xFF); emit_operand(rdx, adr); } void Assembler::call_literal(address entry, RelocationHolder const& rspec) { assert(entry != NULL, "call most probably wrong"); InstructionMark im(this); - emit_byte(0xE8); + emit_int8((unsigned char)0xE8); intptr_t disp = entry - (pc() + sizeof(int32_t)); assert(is_simm32(disp), "must be 32bit offset (call2)"); // Technically, should use call32_operand, but this format is @@ -1165,42 +1167,42 @@ void Assembler::call_literal(address entry, RelocationHolder const& rspec) { } void Assembler::cdql() { - emit_byte(0x99); + emit_int8((unsigned char)0x99); } void Assembler::cld() { - emit_byte(0xfc); + emit_int8((unsigned char)0xFC); } void Assembler::cmovl(Condition cc, Register dst, Register src) { NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0x40 | cc); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8(0x40 | cc); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cmovl(Condition cc, Register dst, Address src) { NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x40 | cc); + emit_int8(0x0F); + emit_int8(0x40 | cc); emit_operand(dst, src); } void Assembler::cmpb(Address dst, int imm8) { InstructionMark im(this); prefix(dst); - emit_byte(0x80); + emit_int8((unsigned char)0x80); emit_operand(rdi, dst, 1); - emit_byte(imm8); + emit_int8(imm8); } void Assembler::cmpl(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); - emit_byte(0x81); + emit_int8((unsigned char)0x81); emit_operand(rdi, dst, 4); emit_long(imm32); } @@ -1219,17 +1221,17 @@ void Assembler::cmpl(Register dst, Register src) { void Assembler::cmpl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x3B); + emit_int8((unsigned char)0x3B); emit_operand(dst, src); } void Assembler::cmpw(Address dst, int imm16) { InstructionMark im(this); assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); - emit_byte(0x66); - emit_byte(0x81); + emit_int8(0x66); + emit_int8((unsigned char)0x81); emit_operand(rdi, dst, 2); - emit_word(imm16); + emit_int16(imm16); } // The 32-bit cmpxchg compares the value at adr with the contents of rax, @@ -1238,8 +1240,8 @@ void Assembler::cmpw(Address dst, int imm16) { void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg InstructionMark im(this); prefix(adr, reg); - emit_byte(0x0F); - emit_byte(0xB1); + emit_int8(0x0F); + emit_int8((unsigned char)0xB1); emit_operand(reg, adr); } @@ -1266,8 +1268,8 @@ void Assembler::comiss(XMMRegister dst, XMMRegister src) { } void Assembler::cpuid() { - emit_byte(0x0F); - emit_byte(0xA2); + emit_int8(0x0F); + emit_int8((unsigned char)0xA2); } void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { @@ -1293,8 +1295,8 @@ void Assembler::cvtsd2ss(XMMRegister dst, Address src) { void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x2A); - emit_byte(0xC0 | encode); + emit_int8(0x2A); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { @@ -1305,8 +1307,8 @@ void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x2A); - emit_byte(0xC0 | encode); + emit_int8(0x2A); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { @@ -1328,22 +1330,22 @@ void Assembler::cvtss2sd(XMMRegister dst, Address src) { void Assembler::cvttsd2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); - emit_byte(0x2C); - emit_byte(0xC0 | encode); + emit_int8(0x2C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvttss2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); - emit_byte(0x2C); - emit_byte(0xC0 | encode); + emit_int8(0x2C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::decl(Address dst) { // Don't use it directly. Use MacroAssembler::decrement() instead. InstructionMark im(this); prefix(dst); - emit_byte(0xFF); + emit_int8((unsigned char)0xFF); emit_operand(rcx, dst); } @@ -1369,43 +1371,43 @@ void Assembler::divss(XMMRegister dst, XMMRegister src) { void Assembler::emms() { NOT_LP64(assert(VM_Version::supports_mmx(), "")); - emit_byte(0x0F); - emit_byte(0x77); + emit_int8(0x0F); + emit_int8(0x77); } void Assembler::hlt() { - emit_byte(0xF4); + emit_int8((unsigned char)0xF4); } void Assembler::idivl(Register src) { int encode = prefix_and_encode(src->encoding()); - emit_byte(0xF7); - emit_byte(0xF8 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xF8 | encode)); } void Assembler::divl(Register src) { // Unsigned int encode = prefix_and_encode(src->encoding()); - emit_byte(0xF7); - emit_byte(0xF0 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xF0 | encode)); } void Assembler::imull(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xAF); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xAF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::imull(Register dst, Register src, int value) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); if (is8bit(value)) { - emit_byte(0x6B); - emit_byte(0xC0 | encode); - emit_byte(value & 0xFF); + emit_int8(0x6B); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(value & 0xFF); } else { - emit_byte(0x69); - emit_byte(0xC0 | encode); + emit_int8(0x69); + emit_int8((unsigned char)(0xC0 | encode)); emit_long(value); } } @@ -1414,7 +1416,7 @@ void Assembler::incl(Address dst) { // Don't use it directly. Use MacroAssembler::increment() instead. InstructionMark im(this); prefix(dst); - emit_byte(0xFF); + emit_int8((unsigned char)0xFF); emit_operand(rax, dst); } @@ -1430,14 +1432,14 @@ void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { intptr_t offs = (intptr_t)dst - (intptr_t)pc(); if (maybe_short && is8bit(offs - short_size)) { // 0111 tttn #8-bit disp - emit_byte(0x70 | cc); - emit_byte((offs - short_size) & 0xFF); + emit_int8(0x70 | cc); + emit_int8((offs - short_size) & 0xFF); } else { // 0000 1111 1000 tttn #32-bit disp assert(is_simm32(offs - long_size), "must be 32bit offset (call4)"); - emit_byte(0x0F); - emit_byte(0x80 | cc); + emit_int8(0x0F); + emit_int8((unsigned char)(0x80 | cc)); emit_long(offs - long_size); } } else { @@ -1446,8 +1448,8 @@ void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { // Note: use jccb() if label to be bound is very close to get // an 8-bit displacement L.add_patch_at(code(), locator()); - emit_byte(0x0F); - emit_byte(0x80 | cc); + emit_int8(0x0F); + emit_int8((unsigned char)(0x80 | cc)); emit_long(0); } } @@ -1466,20 +1468,20 @@ void Assembler::jccb(Condition cc, Label& L) { #endif intptr_t offs = (intptr_t)entry - (intptr_t)pc(); // 0111 tttn #8-bit disp - emit_byte(0x70 | cc); - emit_byte((offs - short_size) & 0xFF); + emit_int8(0x70 | cc); + emit_int8((offs - short_size) & 0xFF); } else { InstructionMark im(this); L.add_patch_at(code(), locator()); - emit_byte(0x70 | cc); - emit_byte(0); + emit_int8(0x70 | cc); + emit_int8(0); } } void Assembler::jmp(Address adr) { InstructionMark im(this); prefix(adr); - emit_byte(0xFF); + emit_int8((unsigned char)0xFF); emit_operand(rsp, adr); } @@ -1492,10 +1494,10 @@ void Assembler::jmp(Label& L, bool maybe_short) { const int long_size = 5; intptr_t offs = entry - pc(); if (maybe_short && is8bit(offs - short_size)) { - emit_byte(0xEB); - emit_byte((offs - short_size) & 0xFF); + emit_int8((unsigned char)0xEB); + emit_int8((offs - short_size) & 0xFF); } else { - emit_byte(0xE9); + emit_int8((unsigned char)0xE9); emit_long(offs - long_size); } } else { @@ -1505,20 +1507,20 @@ void Assembler::jmp(Label& L, bool maybe_short) { // force an 8-bit displacement. InstructionMark im(this); L.add_patch_at(code(), locator()); - emit_byte(0xE9); + emit_int8((unsigned char)0xE9); emit_long(0); } } void Assembler::jmp(Register entry) { int encode = prefix_and_encode(entry->encoding()); - emit_byte(0xFF); - emit_byte(0xE0 | encode); + emit_int8((unsigned char)0xFF); + emit_int8((unsigned char)(0xE0 | encode)); } void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { InstructionMark im(this); - emit_byte(0xE9); + emit_int8((unsigned char)0xE9); assert(dest != NULL, "must have a target"); intptr_t disp = dest - (pc() + sizeof(int32_t)); assert(is_simm32(disp), "must be 32bit offset (jmp)"); @@ -1539,13 +1541,13 @@ void Assembler::jmpb(Label& L) { assert(is8bit(dist), "Dispacement too large for a short jmp"); #endif intptr_t offs = entry - pc(); - emit_byte(0xEB); - emit_byte((offs - short_size) & 0xFF); + emit_int8((unsigned char)0xEB); + emit_int8((offs - short_size) & 0xFF); } else { InstructionMark im(this); L.add_patch_at(code(), locator()); - emit_byte(0xEB); - emit_byte(0); + emit_int8((unsigned char)0xEB); + emit_int8(0); } } @@ -1553,46 +1555,46 @@ void Assembler::ldmxcsr( Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); prefix(src); - emit_byte(0x0F); - emit_byte(0xAE); + emit_int8(0x0F); + emit_int8((unsigned char)0xAE); emit_operand(as_Register(2), src); } void Assembler::leal(Register dst, Address src) { InstructionMark im(this); #ifdef _LP64 - emit_byte(0x67); // addr32 + emit_int8(0x67); // addr32 prefix(src, dst); #endif // LP64 - emit_byte(0x8D); + emit_int8((unsigned char)0x8D); emit_operand(dst, src); } void Assembler::lfence() { - emit_byte(0x0F); - emit_byte(0xAE); - emit_byte(0xE8); + emit_int8(0x0F); + emit_int8((unsigned char)0xAE); + emit_int8((unsigned char)0xE8); } void Assembler::lock() { - emit_byte(0xF0); + emit_int8((unsigned char)0xF0); } void Assembler::lzcntl(Register dst, Register src) { assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); - emit_byte(0xF3); + emit_int8((unsigned char)0xF3); int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBD); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBD); + emit_int8((unsigned char)(0xC0 | encode)); } // Emit mfence instruction void Assembler::mfence() { NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) - emit_byte( 0x0F ); - emit_byte( 0xAE ); - emit_byte( 0xF0 ); + emit_int8(0x0F); + emit_int8((unsigned char)0xAE); + emit_int8((unsigned char)0xF0); } void Assembler::mov(Register dst, Register src) { @@ -1612,15 +1614,15 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) { void Assembler::movlhps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); - emit_byte(0x16); - emit_byte(0xC0 | encode); + emit_int8(0x16); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movb(Register dst, Address src) { NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); InstructionMark im(this); prefix(src, dst, true); - emit_byte(0x8A); + emit_int8((unsigned char)0x8A); emit_operand(dst, src); } @@ -1628,9 +1630,9 @@ void Assembler::movb(Register dst, Address src) { void Assembler::movb(Address dst, int imm8) { InstructionMark im(this); prefix(dst); - emit_byte(0xC6); + emit_int8((unsigned char)0xC6); emit_operand(rax, dst, 1); - emit_byte(imm8); + emit_int8(imm8); } @@ -1638,30 +1640,30 @@ void Assembler::movb(Address dst, Register src) { assert(src->has_byte_register(), "must have byte register"); InstructionMark im(this); prefix(dst, src, true); - emit_byte(0x88); + emit_int8((unsigned char)0x88); emit_operand(src, dst); } void Assembler::movdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); - emit_byte(0x6E); - emit_byte(0xC0 | encode); + emit_int8(0x6E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdl(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // swap src/dst to get correct prefix int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); - emit_byte(0x7E); - emit_byte(0xC0 | encode); + emit_int8(0x7E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_66); - emit_byte(0x6E); + emit_int8(0x6E); emit_operand(dst, src); } @@ -1669,7 +1671,7 @@ void Assembler::movdl(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_66); - emit_byte(0x7E); + emit_int8(0x7E); emit_operand(src, dst); } @@ -1692,7 +1694,7 @@ void Assembler::movdqu(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_F3); - emit_byte(0x7F); + emit_int8(0x7F); emit_operand(src, dst); } @@ -1701,8 +1703,8 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { assert(UseAVX, ""); bool vector256 = true; int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); - emit_byte(0x6F); - emit_byte(0xC0 | encode); + emit_int8(0x6F); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vmovdqu(XMMRegister dst, Address src) { @@ -1710,7 +1712,7 @@ void Assembler::vmovdqu(XMMRegister dst, Address src) { InstructionMark im(this); bool vector256 = true; vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); - emit_byte(0x6F); + emit_int8(0x6F); emit_operand(dst, src); } @@ -1721,7 +1723,7 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) { // swap src<->dst for encoding assert(src != xnoreg, "sanity"); vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); - emit_byte(0x7F); + emit_int8(0x7F); emit_operand(src, dst); } @@ -1729,27 +1731,27 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) { void Assembler::movl(Register dst, int32_t imm32) { int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xB8 | encode); + emit_int8((unsigned char)(0xB8 | encode)); emit_long(imm32); } void Assembler::movl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x8B); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0x8B); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x8B); + emit_int8((unsigned char)0x8B); emit_operand(dst, src); } void Assembler::movl(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); - emit_byte(0xC7); + emit_int8((unsigned char)0xC7); emit_operand(rax, dst, 4); emit_long(imm32); } @@ -1757,7 +1759,7 @@ void Assembler::movl(Address dst, int32_t imm32) { void Assembler::movl(Address dst, Register src) { InstructionMark im(this); prefix(dst, src); - emit_byte(0x89); + emit_int8((unsigned char)0x89); emit_operand(src, dst); } @@ -1771,15 +1773,15 @@ void Assembler::movlpd(XMMRegister dst, Address src) { void Assembler::movq( MMXRegister dst, Address src ) { assert( VM_Version::supports_mmx(), "" ); - emit_byte(0x0F); - emit_byte(0x6F); + emit_int8(0x0F); + emit_int8(0x6F); emit_operand(dst, src); } void Assembler::movq( Address dst, MMXRegister src ) { assert( VM_Version::supports_mmx(), "" ); - emit_byte(0x0F); - emit_byte(0x7F); + emit_int8(0x0F); + emit_int8(0x7F); // workaround gcc (3.2.1-7a) bug // In that version of gcc with only an emit_operand(MMX, Address) // gcc will tail jump and try and reverse the parameters completely @@ -1793,7 +1795,7 @@ void Assembler::movq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_F3); - emit_byte(0x7E); + emit_int8(0x7E); emit_operand(dst, src); } @@ -1801,24 +1803,24 @@ void Assembler::movq(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_66); - emit_byte(0xD6); + emit_int8((unsigned char)0xD6); emit_operand(src, dst); } void Assembler::movsbl(Register dst, Address src) { // movsxb InstructionMark im(this); prefix(src, dst); - emit_byte(0x0F); - emit_byte(0xBE); + emit_int8(0x0F); + emit_int8((unsigned char)0xBE); emit_operand(dst, src); } void Assembler::movsbl(Register dst, Register src) { // movsxb NOT_LP64(assert(src->has_byte_register(), "must have byte register")); int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); - emit_byte(0x0F); - emit_byte(0xBE); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBE); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movsd(XMMRegister dst, XMMRegister src) { @@ -1835,7 +1837,7 @@ void Assembler::movsd(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_F2); - emit_byte(0x11); + emit_int8(0x11); emit_operand(src, dst); } @@ -1853,93 +1855,93 @@ void Assembler::movss(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_F3); - emit_byte(0x11); + emit_int8(0x11); emit_operand(src, dst); } void Assembler::movswl(Register dst, Address src) { // movsxw InstructionMark im(this); prefix(src, dst); - emit_byte(0x0F); - emit_byte(0xBF); + emit_int8(0x0F); + emit_int8((unsigned char)0xBF); emit_operand(dst, src); } void Assembler::movswl(Register dst, Register src) { // movsxw int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBF); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movw(Address dst, int imm16) { InstructionMark im(this); - emit_byte(0x66); // switch to 16-bit mode + emit_int8(0x66); // switch to 16-bit mode prefix(dst); - emit_byte(0xC7); + emit_int8((unsigned char)0xC7); emit_operand(rax, dst, 2); - emit_word(imm16); + emit_int16(imm16); } void Assembler::movw(Register dst, Address src) { InstructionMark im(this); - emit_byte(0x66); + emit_int8(0x66); prefix(src, dst); - emit_byte(0x8B); + emit_int8((unsigned char)0x8B); emit_operand(dst, src); } void Assembler::movw(Address dst, Register src) { InstructionMark im(this); - emit_byte(0x66); + emit_int8(0x66); prefix(dst, src); - emit_byte(0x89); + emit_int8((unsigned char)0x89); emit_operand(src, dst); } void Assembler::movzbl(Register dst, Address src) { // movzxb InstructionMark im(this); prefix(src, dst); - emit_byte(0x0F); - emit_byte(0xB6); + emit_int8(0x0F); + emit_int8((unsigned char)0xB6); emit_operand(dst, src); } void Assembler::movzbl(Register dst, Register src) { // movzxb NOT_LP64(assert(src->has_byte_register(), "must have byte register")); int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); - emit_byte(0x0F); - emit_byte(0xB6); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xB6); + emit_int8(0xC0 | encode); } void Assembler::movzwl(Register dst, Address src) { // movzxw InstructionMark im(this); prefix(src, dst); - emit_byte(0x0F); - emit_byte(0xB7); + emit_int8(0x0F); + emit_int8((unsigned char)0xB7); emit_operand(dst, src); } void Assembler::movzwl(Register dst, Register src) { // movzxw int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xB7); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xB7); + emit_int8(0xC0 | encode); } void Assembler::mull(Address src) { InstructionMark im(this); prefix(src); - emit_byte(0xF7); + emit_int8((unsigned char)0xF7); emit_operand(rsp, src); } void Assembler::mull(Register src) { int encode = prefix_and_encode(src->encoding()); - emit_byte(0xF7); - emit_byte(0xE0 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xE0 | encode)); } void Assembler::mulsd(XMMRegister dst, Address src) { @@ -1964,8 +1966,8 @@ void Assembler::mulss(XMMRegister dst, XMMRegister src) { void Assembler::negl(Register dst) { int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xF7); - emit_byte(0xD8 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xD8 | encode)); } void Assembler::nop(int i) { @@ -1976,7 +1978,7 @@ void Assembler::nop(int i) { // speed is not an issue so simply use the single byte traditional nop // to do alignment. - for (; i > 0 ; i--) emit_byte(0x90); + for (; i > 0 ; i--) emit_int8((unsigned char)0x90); return; #endif // ASSERT @@ -2006,33 +2008,35 @@ void Assembler::nop(int i) { while(i >= 15) { // For Intel don't generate consecutive addess nops (mix with regular nops) i -= 15; - emit_byte(0x66); // size prefix - emit_byte(0x66); // size prefix - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix addr_nop_8(); - emit_byte(0x66); // size prefix - emit_byte(0x66); // size prefix - emit_byte(0x66); // size prefix - emit_byte(0x90); // nop + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8((unsigned char)0x90); + // nop } switch (i) { case 14: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 13: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 12: addr_nop_8(); - emit_byte(0x66); // size prefix - emit_byte(0x66); // size prefix - emit_byte(0x66); // size prefix - emit_byte(0x90); // nop + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8((unsigned char)0x90); + // nop break; case 11: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 10: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 9: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 8: addr_nop_8(); break; @@ -2040,7 +2044,7 @@ void Assembler::nop(int i) { addr_nop_7(); break; case 6: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 5: addr_nop_5(); break; @@ -2049,11 +2053,12 @@ void Assembler::nop(int i) { break; case 3: // Don't use "0x0F 0x1F 0x00" - need patching safe padding - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 2: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 1: - emit_byte(0x90); // nop + emit_int8((unsigned char)0x90); + // nop break; default: assert(i == 0, " "); @@ -2086,24 +2091,24 @@ void Assembler::nop(int i) { while(i >= 22) { i -= 11; - emit_byte(0x66); // size prefix - emit_byte(0x66); // size prefix - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix addr_nop_8(); } // Generate first nop for size between 21-12 switch (i) { case 21: i -= 1; - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 20: case 19: i -= 1; - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 18: case 17: i -= 1; - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 16: case 15: i -= 8; @@ -2116,7 +2121,7 @@ void Assembler::nop(int i) { break; case 12: i -= 6; - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix addr_nop_5(); break; default: @@ -2126,11 +2131,11 @@ void Assembler::nop(int i) { // Generate second nop for size between 11-1 switch (i) { case 11: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 10: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 9: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 8: addr_nop_8(); break; @@ -2138,7 +2143,7 @@ void Assembler::nop(int i) { addr_nop_7(); break; case 6: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 5: addr_nop_5(); break; @@ -2147,11 +2152,12 @@ void Assembler::nop(int i) { break; case 3: // Don't use "0x0F 0x1F 0x00" - need patching safe padding - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 2: - emit_byte(0x66); // size prefix + emit_int8(0x66); // size prefix case 1: - emit_byte(0x90); // nop + emit_int8((unsigned char)0x90); + // nop break; default: assert(i == 0, " "); @@ -2174,42 +2180,43 @@ void Assembler::nop(int i) { // while(i > 12) { i -= 4; - emit_byte(0x66); // size prefix - emit_byte(0x66); - emit_byte(0x66); - emit_byte(0x90); // nop + emit_int8(0x66); // size prefix + emit_int8(0x66); + emit_int8(0x66); + emit_int8((unsigned char)0x90); + // nop } // 1 - 12 nops if(i > 8) { if(i > 9) { i -= 1; - emit_byte(0x66); + emit_int8(0x66); } i -= 3; - emit_byte(0x66); - emit_byte(0x66); - emit_byte(0x90); + emit_int8(0x66); + emit_int8(0x66); + emit_int8((unsigned char)0x90); } // 1 - 8 nops if(i > 4) { if(i > 6) { i -= 1; - emit_byte(0x66); + emit_int8(0x66); } i -= 3; - emit_byte(0x66); - emit_byte(0x66); - emit_byte(0x90); + emit_int8(0x66); + emit_int8(0x66); + emit_int8((unsigned char)0x90); } switch (i) { case 4: - emit_byte(0x66); + emit_int8(0x66); case 3: - emit_byte(0x66); + emit_int8(0x66); case 2: - emit_byte(0x66); + emit_int8(0x66); case 1: - emit_byte(0x90); + emit_int8((unsigned char)0x90); break; default: assert(i == 0, " "); @@ -2218,8 +2225,8 @@ void Assembler::nop(int i) { void Assembler::notl(Register dst) { int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xF7); - emit_byte(0xD0 | encode ); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xD0 | encode)); } void Assembler::orl(Address dst, int32_t imm32) { @@ -2236,7 +2243,7 @@ void Assembler::orl(Register dst, int32_t imm32) { void Assembler::orl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x0B); + emit_int8(0x0B); emit_operand(dst, src); } @@ -2260,61 +2267,61 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); - emit_byte(0x61); + emit_int8(0x61); emit_operand(dst, src); - emit_byte(imm8); + emit_int8(imm8); } void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); - emit_byte(0x61); - emit_byte(0xC0 | encode); - emit_byte(imm8); + emit_int8(0x61); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(imm8); } void Assembler::pmovzxbw(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0x30); + emit_int8(0x30); emit_operand(dst, src); } void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0x30); - emit_byte(0xC0 | encode); + emit_int8(0x30); + emit_int8((unsigned char)(0xC0 | encode)); } // generic void Assembler::pop(Register dst) { int encode = prefix_and_encode(dst->encoding()); - emit_byte(0x58 | encode); + emit_int8(0x58 | encode); } void Assembler::popcntl(Register dst, Address src) { assert(VM_Version::supports_popcnt(), "must support"); InstructionMark im(this); - emit_byte(0xF3); + emit_int8((unsigned char)0xF3); prefix(src, dst); - emit_byte(0x0F); - emit_byte(0xB8); + emit_int8(0x0F); + emit_int8((unsigned char)0xB8); emit_operand(dst, src); } void Assembler::popcntl(Register dst, Register src) { assert(VM_Version::supports_popcnt(), "must support"); - emit_byte(0xF3); + emit_int8((unsigned char)0xF3); int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xB8); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xB8); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::popf() { - emit_byte(0x9D); + emit_int8((unsigned char)0x9D); } #ifndef _LP64 // no 32bit push/pop on amd64 @@ -2322,21 +2329,21 @@ void Assembler::popl(Address dst) { // NOTE: this will adjust stack by 8byte on 64bits InstructionMark im(this); prefix(dst); - emit_byte(0x8F); + emit_int8((unsigned char)0x8F); emit_operand(rax, dst); } #endif void Assembler::prefetch_prefix(Address src) { prefix(src); - emit_byte(0x0F); + emit_int8(0x0F); } void Assembler::prefetchnta(Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "must support")); InstructionMark im(this); prefetch_prefix(src); - emit_byte(0x18); + emit_int8(0x18); emit_operand(rax, src); // 0, src } @@ -2344,7 +2351,7 @@ void Assembler::prefetchr(Address src) { assert(VM_Version::supports_3dnow_prefetch(), "must support"); InstructionMark im(this); prefetch_prefix(src); - emit_byte(0x0D); + emit_int8(0x0D); emit_operand(rax, src); // 0, src } @@ -2352,7 +2359,7 @@ void Assembler::prefetcht0(Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "must support")); InstructionMark im(this); prefetch_prefix(src); - emit_byte(0x18); + emit_int8(0x18); emit_operand(rcx, src); // 1, src } @@ -2360,7 +2367,7 @@ void Assembler::prefetcht1(Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "must support")); InstructionMark im(this); prefetch_prefix(src); - emit_byte(0x18); + emit_int8(0x18); emit_operand(rdx, src); // 2, src } @@ -2368,7 +2375,7 @@ void Assembler::prefetcht2(Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "must support")); InstructionMark im(this); prefetch_prefix(src); - emit_byte(0x18); + emit_int8(0x18); emit_operand(rbx, src); // 3, src } @@ -2376,27 +2383,26 @@ void Assembler::prefetchw(Address src) { assert(VM_Version::supports_3dnow_prefetch(), "must support"); InstructionMark im(this); prefetch_prefix(src); - emit_byte(0x0D); + emit_int8(0x0D); emit_operand(rcx, src); // 1, src } void Assembler::prefix(Prefix p) { - a_byte(p); + emit_int8(p); } void Assembler::pshufb(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_ssse3(), ""); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0x00); - emit_byte(0xC0 | encode); + emit_int8(0x00); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::pshufb(XMMRegister dst, Address src) { assert(VM_Version::supports_ssse3(), ""); - assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0x00); + emit_int8(0x00); emit_operand(dst, src); } @@ -2404,7 +2410,7 @@ void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); - emit_byte(mode & 0xFF); + emit_int8(mode & 0xFF); } @@ -2414,16 +2420,16 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) { assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_66); - emit_byte(0x70); + emit_int8(0x70); emit_operand(dst, src); - emit_byte(mode & 0xFF); + emit_int8(mode & 0xFF); } void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2); - emit_byte(mode & 0xFF); + emit_int8(mode & 0xFF); } void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { @@ -2432,18 +2438,18 @@ void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_F2); - emit_byte(0x70); + emit_int8(0x70); emit_operand(dst, src); - emit_byte(mode & 0xFF); + emit_int8(mode & 0xFF); } void Assembler::psrldq(XMMRegister dst, int shift) { // Shift 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); - emit_byte(0x73); - emit_byte(0xC0 | encode); - emit_byte(shift); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift); } void Assembler::ptest(XMMRegister dst, Address src) { @@ -2451,15 +2457,15 @@ void Assembler::ptest(XMMRegister dst, Address src) { assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0x17); + emit_int8(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0x17); - emit_byte(0xC0 | encode); + emit_int8(0x17); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::punpcklbw(XMMRegister dst, Address src) { @@ -2492,18 +2498,18 @@ void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { void Assembler::push(int32_t imm32) { // in 64bits we push 64bits onto the stack but only // take a 32bit immediate - emit_byte(0x68); + emit_int8(0x68); emit_long(imm32); } void Assembler::push(Register src) { int encode = prefix_and_encode(src->encoding()); - emit_byte(0x50 | encode); + emit_int8(0x50 | encode); } void Assembler::pushf() { - emit_byte(0x9C); + emit_int8((unsigned char)0x9C); } #ifndef _LP64 // no 32bit push/pop on amd64 @@ -2511,7 +2517,7 @@ void Assembler::pushl(Address src) { // Note this will push 64bit on 64bit InstructionMark im(this); prefix(src); - emit_byte(0xFF); + emit_int8((unsigned char)0xFF); emit_operand(rsi, src); } #endif @@ -2520,58 +2526,58 @@ void Assembler::rcll(Register dst, int imm8) { assert(isShiftCount(imm8), "illegal shift count"); int encode = prefix_and_encode(dst->encoding()); if (imm8 == 1) { - emit_byte(0xD1); - emit_byte(0xD0 | encode); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xD0 | encode)); } else { - emit_byte(0xC1); - emit_byte(0xD0 | encode); - emit_byte(imm8); + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)0xD0 | encode); + emit_int8(imm8); } } // copies data from [esi] to [edi] using rcx pointer sized words // generic void Assembler::rep_mov() { - emit_byte(0xF3); + emit_int8((unsigned char)0xF3); // MOVSQ LP64_ONLY(prefix(REX_W)); - emit_byte(0xA5); + emit_int8((unsigned char)0xA5); } // sets rcx pointer sized words with rax, value at [edi] // generic void Assembler::rep_set() { // rep_set - emit_byte(0xF3); + emit_int8((unsigned char)0xF3); // STOSQ LP64_ONLY(prefix(REX_W)); - emit_byte(0xAB); + emit_int8((unsigned char)0xAB); } // scans rcx pointer sized words at [edi] for occurance of rax, // generic void Assembler::repne_scan() { // repne_scan - emit_byte(0xF2); + emit_int8((unsigned char)0xF2); // SCASQ LP64_ONLY(prefix(REX_W)); - emit_byte(0xAF); + emit_int8((unsigned char)0xAF); } #ifdef _LP64 // scans rcx 4 byte words at [edi] for occurance of rax, // generic void Assembler::repne_scanl() { // repne_scan - emit_byte(0xF2); + emit_int8((unsigned char)0xF2); // SCASL - emit_byte(0xAF); + emit_int8((unsigned char)0xAF); } #endif void Assembler::ret(int imm16) { if (imm16 == 0) { - emit_byte(0xC3); + emit_int8((unsigned char)0xC3); } else { - emit_byte(0xC2); - emit_word(imm16); + emit_int8((unsigned char)0xC2); + emit_int16(imm16); } } @@ -2580,26 +2586,26 @@ void Assembler::sahf() { // Not supported in 64bit mode ShouldNotReachHere(); #endif - emit_byte(0x9E); + emit_int8((unsigned char)0x9E); } void Assembler::sarl(Register dst, int imm8) { int encode = prefix_and_encode(dst->encoding()); assert(isShiftCount(imm8), "illegal shift count"); if (imm8 == 1) { - emit_byte(0xD1); - emit_byte(0xF8 | encode); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xF8 | encode)); } else { - emit_byte(0xC1); - emit_byte(0xF8 | encode); - emit_byte(imm8); + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xF8 | encode)); + emit_int8(imm8); } } void Assembler::sarl(Register dst) { int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xD3); - emit_byte(0xF8 | encode); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xF8 | encode)); } void Assembler::sbbl(Address dst, int32_t imm32) { @@ -2617,7 +2623,7 @@ void Assembler::sbbl(Register dst, int32_t imm32) { void Assembler::sbbl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x1B); + emit_int8(0x1B); emit_operand(dst, src); } @@ -2629,47 +2635,47 @@ void Assembler::sbbl(Register dst, Register src) { void Assembler::setb(Condition cc, Register dst) { assert(0 <= cc && cc < 16, "illegal cc"); int encode = prefix_and_encode(dst->encoding(), true); - emit_byte(0x0F); - emit_byte(0x90 | cc); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0x90 | cc); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::shll(Register dst, int imm8) { assert(isShiftCount(imm8), "illegal shift count"); int encode = prefix_and_encode(dst->encoding()); if (imm8 == 1 ) { - emit_byte(0xD1); - emit_byte(0xE0 | encode); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xE0 | encode)); } else { - emit_byte(0xC1); - emit_byte(0xE0 | encode); - emit_byte(imm8); + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xE0 | encode)); + emit_int8(imm8); } } void Assembler::shll(Register dst) { int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xD3); - emit_byte(0xE0 | encode); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xE0 | encode)); } void Assembler::shrl(Register dst, int imm8) { assert(isShiftCount(imm8), "illegal shift count"); int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xC1); - emit_byte(0xE8 | encode); - emit_byte(imm8); + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xE8 | encode)); + emit_int8(imm8); } void Assembler::shrl(Register dst) { int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xD3); - emit_byte(0xE8 | encode); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xE8 | encode)); } // copies a single word from [esi] to [edi] void Assembler::smovl() { - emit_byte(0xA5); + emit_int8((unsigned char)0xA5); } void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { @@ -2688,7 +2694,7 @@ void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { } void Assembler::std() { - emit_byte(0xfd); + emit_int8((unsigned char)0xFD); } void Assembler::sqrtss(XMMRegister dst, Address src) { @@ -2700,8 +2706,8 @@ void Assembler::stmxcsr( Address dst) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); prefix(dst); - emit_byte(0x0F); - emit_byte(0xAE); + emit_int8(0x0F); + emit_int8((unsigned char)0xAE); emit_operand(as_Register(3), dst); } @@ -2714,7 +2720,7 @@ void Assembler::subl(Address dst, int32_t imm32) { void Assembler::subl(Address dst, Register src) { InstructionMark im(this); prefix(dst, src); - emit_byte(0x29); + emit_int8(0x29); emit_operand(src, dst); } @@ -2732,7 +2738,7 @@ void Assembler::subl_imm32(Register dst, int32_t imm32) { void Assembler::subl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x2B); + emit_int8(0x2B); emit_operand(dst, src); } @@ -2773,11 +2779,11 @@ void Assembler::testl(Register dst, int32_t imm32) { // 8bit operands int encode = dst->encoding(); if (encode == 0) { - emit_byte(0xA9); + emit_int8((unsigned char)0xA9); } else { encode = prefix_and_encode(encode); - emit_byte(0xF7); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xC0 | encode)); } emit_long(imm32); } @@ -2790,7 +2796,7 @@ void Assembler::testl(Register dst, Register src) { void Assembler::testl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x85); + emit_int8((unsigned char)0x85); emit_operand(dst, src); } @@ -2818,28 +2824,28 @@ void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { void Assembler::xaddl(Address dst, Register src) { InstructionMark im(this); prefix(dst, src); - emit_byte(0x0F); - emit_byte(0xC1); + emit_int8(0x0F); + emit_int8((unsigned char)0xC1); emit_operand(src, dst); } void Assembler::xchgl(Register dst, Address src) { // xchg InstructionMark im(this); prefix(src, dst); - emit_byte(0x87); + emit_int8((unsigned char)0x87); emit_operand(dst, src); } void Assembler::xchgl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x87); - emit_byte(0xc0 | encode); + emit_int8((unsigned char)0x87); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::xgetbv() { - emit_byte(0x0F); - emit_byte(0x01); - emit_byte(0xD0); + emit_int8(0x0F); + emit_int8(0x01); + emit_int8((unsigned char)0xD0); } void Assembler::xorl(Register dst, int32_t imm32) { @@ -2850,7 +2856,7 @@ void Assembler::xorl(Register dst, int32_t imm32) { void Assembler::xorl(Register dst, Address src) { InstructionMark im(this); prefix(src, dst); - emit_byte(0x33); + emit_int8(0x33); emit_operand(dst, src); } @@ -3276,8 +3282,8 @@ void Assembler::pmullw(XMMRegister dst, XMMRegister src) { void Assembler::pmulld(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); - emit_byte(0x40); - emit_byte(0xC0 | encode); + emit_int8(0x40); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { @@ -3288,8 +3294,8 @@ void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); - emit_byte(0x40); - emit_byte(0xC0 | encode); + emit_int8(0x40); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { @@ -3303,7 +3309,7 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vect int dst_enc = dst->encoding(); int nds_enc = nds->is_valid() ? nds->encoding() : 0; vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); - emit_byte(0x40); + emit_int8(0x40); emit_operand(dst, src); } @@ -3312,27 +3318,27 @@ void Assembler::psllw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM6 is for /6 encoding: 66 0F 71 /6 ib int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); - emit_byte(0x71); - emit_byte(0xC0 | encode); - emit_byte(shift & 0xFF); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); } void Assembler::pslld(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM6 is for /6 encoding: 66 0F 72 /6 ib int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); - emit_byte(0x72); - emit_byte(0xC0 | encode); - emit_byte(shift & 0xFF); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); } void Assembler::psllq(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM6 is for /6 encoding: 66 0F 73 /6 ib int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); - emit_byte(0x73); - emit_byte(0xC0 | encode); - emit_byte(shift & 0xFF); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); } void Assembler::psllw(XMMRegister dst, XMMRegister shift) { @@ -3354,21 +3360,21 @@ void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector2 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); // XMM6 is for /6 encoding: 66 0F 71 /6 ib emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256); - emit_byte(shift & 0xFF); + emit_int8(shift & 0xFF); } void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); // XMM6 is for /6 encoding: 66 0F 72 /6 ib emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256); - emit_byte(shift & 0xFF); + emit_int8(shift & 0xFF); } void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); // XMM6 is for /6 encoding: 66 0F 73 /6 ib emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256); - emit_byte(shift & 0xFF); + emit_int8(shift & 0xFF); } void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { @@ -3391,18 +3397,18 @@ void Assembler::psrlw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM2 is for /2 encoding: 66 0F 71 /2 ib int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); - emit_byte(0x71); - emit_byte(0xC0 | encode); - emit_byte(shift & 0xFF); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); } void Assembler::psrld(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM2 is for /2 encoding: 66 0F 72 /2 ib int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); - emit_byte(0x72); - emit_byte(0xC0 | encode); - emit_byte(shift & 0xFF); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); } void Assembler::psrlq(XMMRegister dst, int shift) { @@ -3411,9 +3417,9 @@ void Assembler::psrlq(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM2 is for /2 encoding: 66 0F 73 /2 ib int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); - emit_byte(0x73); - emit_byte(0xC0 | encode); - emit_byte(shift & 0xFF); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); } void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { @@ -3435,21 +3441,21 @@ void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector2 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); // XMM2 is for /2 encoding: 66 0F 73 /2 ib emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256); - emit_byte(shift & 0xFF); + emit_int8(shift & 0xFF); } void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); // XMM2 is for /2 encoding: 66 0F 73 /2 ib emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256); - emit_byte(shift & 0xFF); + emit_int8(shift & 0xFF); } void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); // XMM2 is for /2 encoding: 66 0F 73 /2 ib emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256); - emit_byte(shift & 0xFF); + emit_int8(shift & 0xFF); } void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { @@ -3472,18 +3478,18 @@ void Assembler::psraw(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM4 is for /4 encoding: 66 0F 71 /4 ib int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); - emit_byte(0x71); - emit_byte(0xC0 | encode); - emit_byte(shift & 0xFF); + emit_int8(0x71); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); } void Assembler::psrad(XMMRegister dst, int shift) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // XMM4 is for /4 encoding: 66 0F 72 /4 ib int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); - emit_byte(0x72); - emit_byte(0xC0 | encode); - emit_byte(shift & 0xFF); + emit_int8(0x72); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); } void Assembler::psraw(XMMRegister dst, XMMRegister shift) { @@ -3500,14 +3506,14 @@ void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector2 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); // XMM4 is for /4 encoding: 66 0F 71 /4 ib emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256); - emit_byte(shift & 0xFF); + emit_int8(shift & 0xFF); } void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) { assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); // XMM4 is for /4 encoding: 66 0F 71 /4 ib emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256); - emit_byte(shift & 0xFF); + emit_int8(shift & 0xFF); } void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { @@ -3572,11 +3578,11 @@ void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) assert(VM_Version::supports_avx(), ""); bool vector256 = true; int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); - emit_byte(0x18); - emit_byte(0xC0 | encode); + emit_int8(0x18); + emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_byte(0x01); + emit_int8(0x01); } void Assembler::vinsertf128h(XMMRegister dst, Address src) { @@ -3587,10 +3593,10 @@ void Assembler::vinsertf128h(XMMRegister dst, Address src) { int dst_enc = dst->encoding(); // swap src<->dst for encoding vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); - emit_byte(0x18); + emit_int8(0x18); emit_operand(dst, src); // 0x01 - insert into upper 128 bits - emit_byte(0x01); + emit_int8(0x01); } void Assembler::vextractf128h(Address dst, XMMRegister src) { @@ -3600,21 +3606,21 @@ void Assembler::vextractf128h(Address dst, XMMRegister src) { assert(src != xnoreg, "sanity"); int src_enc = src->encoding(); vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); - emit_byte(0x19); + emit_int8(0x19); emit_operand(src, dst); // 0x01 - extract from upper 128 bits - emit_byte(0x01); + emit_int8(0x01); } void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); bool vector256 = true; int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); - emit_byte(0x38); - emit_byte(0xC0 | encode); + emit_int8(0x38); + emit_int8((unsigned char)(0xC0 | encode)); // 0x00 - insert into lower 128 bits // 0x01 - insert into upper 128 bits - emit_byte(0x01); + emit_int8(0x01); } void Assembler::vinserti128h(XMMRegister dst, Address src) { @@ -3625,10 +3631,10 @@ void Assembler::vinserti128h(XMMRegister dst, Address src) { int dst_enc = dst->encoding(); // swap src<->dst for encoding vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); - emit_byte(0x38); + emit_int8(0x38); emit_operand(dst, src); // 0x01 - insert into upper 128 bits - emit_byte(0x01); + emit_int8(0x01); } void Assembler::vextracti128h(Address dst, XMMRegister src) { @@ -3638,16 +3644,16 @@ void Assembler::vextracti128h(Address dst, XMMRegister src) { assert(src != xnoreg, "sanity"); int src_enc = src->encoding(); vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); - emit_byte(0x39); + emit_int8(0x39); emit_operand(src, dst); // 0x01 - extract from upper 128 bits - emit_byte(0x01); + emit_int8(0x01); } void Assembler::vzeroupper() { assert(VM_Version::supports_avx(), ""); (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); - emit_byte(0x77); + emit_int8(0x77); } @@ -3657,15 +3663,15 @@ void Assembler::vzeroupper() { void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { // NO PREFIX AS NEVER 64BIT InstructionMark im(this); - emit_byte(0x81); - emit_byte(0xF8 | src1->encoding()); + emit_int8((unsigned char)0x81); + emit_int8((unsigned char)(0xF8 | src1->encoding())); emit_data(imm32, rspec, 0); } void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs InstructionMark im(this); - emit_byte(0x81); + emit_int8((unsigned char)0x81); emit_operand(rdi, src1); emit_data(imm32, rspec, 0); } @@ -3675,14 +3681,14 @@ void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder cons // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. void Assembler::cmpxchg8(Address adr) { InstructionMark im(this); - emit_byte(0x0F); - emit_byte(0xc7); + emit_int8(0x0F); + emit_int8((unsigned char)0xC7); emit_operand(rcx, adr); } void Assembler::decl(Register dst) { // Don't use it directly. Use MacroAssembler::decrementl() instead. - emit_byte(0x48 | dst->encoding()); + emit_int8(0x48 | dst->encoding()); } #endif // _LP64 @@ -3690,8 +3696,8 @@ void Assembler::decl(Register dst) { // 64bit typically doesn't use the x87 but needs to for the trig funcs void Assembler::fabs() { - emit_byte(0xD9); - emit_byte(0xE1); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xE1); } void Assembler::fadd(int i) { @@ -3700,13 +3706,13 @@ void Assembler::fadd(int i) { void Assembler::fadd_d(Address src) { InstructionMark im(this); - emit_byte(0xDC); + emit_int8((unsigned char)0xDC); emit_operand32(rax, src); } void Assembler::fadd_s(Address src) { InstructionMark im(this); - emit_byte(0xD8); + emit_int8((unsigned char)0xD8); emit_operand32(rax, src); } @@ -3719,8 +3725,8 @@ void Assembler::faddp(int i) { } void Assembler::fchs() { - emit_byte(0xD9); - emit_byte(0xE0); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xE0); } void Assembler::fcom(int i) { @@ -3733,29 +3739,29 @@ void Assembler::fcomp(int i) { void Assembler::fcomp_d(Address src) { InstructionMark im(this); - emit_byte(0xDC); + emit_int8((unsigned char)0xDC); emit_operand32(rbx, src); } void Assembler::fcomp_s(Address src) { InstructionMark im(this); - emit_byte(0xD8); + emit_int8((unsigned char)0xD8); emit_operand32(rbx, src); } void Assembler::fcompp() { - emit_byte(0xDE); - emit_byte(0xD9); + emit_int8((unsigned char)0xDE); + emit_int8((unsigned char)0xD9); } void Assembler::fcos() { - emit_byte(0xD9); - emit_byte(0xFF); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xFF); } void Assembler::fdecstp() { - emit_byte(0xD9); - emit_byte(0xF6); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xF6); } void Assembler::fdiv(int i) { @@ -3764,13 +3770,13 @@ void Assembler::fdiv(int i) { void Assembler::fdiv_d(Address src) { InstructionMark im(this); - emit_byte(0xDC); + emit_int8((unsigned char)0xDC); emit_operand32(rsi, src); } void Assembler::fdiv_s(Address src) { InstructionMark im(this); - emit_byte(0xD8); + emit_int8((unsigned char)0xD8); emit_operand32(rsi, src); } @@ -3791,13 +3797,13 @@ void Assembler::fdivr(int i) { void Assembler::fdivr_d(Address src) { InstructionMark im(this); - emit_byte(0xDC); + emit_int8((unsigned char)0xDC); emit_operand32(rdi, src); } void Assembler::fdivr_s(Address src) { InstructionMark im(this); - emit_byte(0xD8); + emit_int8((unsigned char)0xD8); emit_operand32(rdi, src); } @@ -3815,59 +3821,59 @@ void Assembler::ffree(int i) { void Assembler::fild_d(Address adr) { InstructionMark im(this); - emit_byte(0xDF); + emit_int8((unsigned char)0xDF); emit_operand32(rbp, adr); } void Assembler::fild_s(Address adr) { InstructionMark im(this); - emit_byte(0xDB); + emit_int8((unsigned char)0xDB); emit_operand32(rax, adr); } void Assembler::fincstp() { - emit_byte(0xD9); - emit_byte(0xF7); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xF7); } void Assembler::finit() { - emit_byte(0x9B); - emit_byte(0xDB); - emit_byte(0xE3); + emit_int8((unsigned char)0x9B); + emit_int8((unsigned char)0xDB); + emit_int8((unsigned char)0xE3); } void Assembler::fist_s(Address adr) { InstructionMark im(this); - emit_byte(0xDB); + emit_int8((unsigned char)0xDB); emit_operand32(rdx, adr); } void Assembler::fistp_d(Address adr) { InstructionMark im(this); - emit_byte(0xDF); + emit_int8((unsigned char)0xDF); emit_operand32(rdi, adr); } void Assembler::fistp_s(Address adr) { InstructionMark im(this); - emit_byte(0xDB); + emit_int8((unsigned char)0xDB); emit_operand32(rbx, adr); } void Assembler::fld1() { - emit_byte(0xD9); - emit_byte(0xE8); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xE8); } void Assembler::fld_d(Address adr) { InstructionMark im(this); - emit_byte(0xDD); + emit_int8((unsigned char)0xDD); emit_operand32(rax, adr); } void Assembler::fld_s(Address adr) { InstructionMark im(this); - emit_byte(0xD9); + emit_int8((unsigned char)0xD9); emit_operand32(rax, adr); } @@ -3878,35 +3884,35 @@ void Assembler::fld_s(int index) { void Assembler::fld_x(Address adr) { InstructionMark im(this); - emit_byte(0xDB); + emit_int8((unsigned char)0xDB); emit_operand32(rbp, adr); } void Assembler::fldcw(Address src) { InstructionMark im(this); - emit_byte(0xd9); + emit_int8((unsigned char)0xD9); emit_operand32(rbp, src); } void Assembler::fldenv(Address src) { InstructionMark im(this); - emit_byte(0xD9); + emit_int8((unsigned char)0xD9); emit_operand32(rsp, src); } void Assembler::fldlg2() { - emit_byte(0xD9); - emit_byte(0xEC); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xEC); } void Assembler::fldln2() { - emit_byte(0xD9); - emit_byte(0xED); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xED); } void Assembler::fldz() { - emit_byte(0xD9); - emit_byte(0xEE); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xEE); } void Assembler::flog() { @@ -3927,13 +3933,13 @@ void Assembler::fmul(int i) { void Assembler::fmul_d(Address src) { InstructionMark im(this); - emit_byte(0xDC); + emit_int8((unsigned char)0xDC); emit_operand32(rcx, src); } void Assembler::fmul_s(Address src) { InstructionMark im(this); - emit_byte(0xD8); + emit_int8((unsigned char)0xD8); emit_operand32(rcx, src); } @@ -3947,63 +3953,63 @@ void Assembler::fmulp(int i) { void Assembler::fnsave(Address dst) { InstructionMark im(this); - emit_byte(0xDD); + emit_int8((unsigned char)0xDD); emit_operand32(rsi, dst); } void Assembler::fnstcw(Address src) { InstructionMark im(this); - emit_byte(0x9B); - emit_byte(0xD9); + emit_int8((unsigned char)0x9B); + emit_int8((unsigned char)0xD9); emit_operand32(rdi, src); } void Assembler::fnstsw_ax() { - emit_byte(0xdF); - emit_byte(0xE0); + emit_int8((unsigned char)0xDF); + emit_int8((unsigned char)0xE0); } void Assembler::fprem() { - emit_byte(0xD9); - emit_byte(0xF8); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xF8); } void Assembler::fprem1() { - emit_byte(0xD9); - emit_byte(0xF5); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xF5); } void Assembler::frstor(Address src) { InstructionMark im(this); - emit_byte(0xDD); + emit_int8((unsigned char)0xDD); emit_operand32(rsp, src); } void Assembler::fsin() { - emit_byte(0xD9); - emit_byte(0xFE); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xFE); } void Assembler::fsqrt() { - emit_byte(0xD9); - emit_byte(0xFA); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xFA); } void Assembler::fst_d(Address adr) { InstructionMark im(this); - emit_byte(0xDD); + emit_int8((unsigned char)0xDD); emit_operand32(rdx, adr); } void Assembler::fst_s(Address adr) { InstructionMark im(this); - emit_byte(0xD9); + emit_int8((unsigned char)0xD9); emit_operand32(rdx, adr); } void Assembler::fstp_d(Address adr) { InstructionMark im(this); - emit_byte(0xDD); + emit_int8((unsigned char)0xDD); emit_operand32(rbx, adr); } @@ -4013,13 +4019,13 @@ void Assembler::fstp_d(int index) { void Assembler::fstp_s(Address adr) { InstructionMark im(this); - emit_byte(0xD9); + emit_int8((unsigned char)0xD9); emit_operand32(rbx, adr); } void Assembler::fstp_x(Address adr) { InstructionMark im(this); - emit_byte(0xDB); + emit_int8((unsigned char)0xDB); emit_operand32(rdi, adr); } @@ -4029,13 +4035,13 @@ void Assembler::fsub(int i) { void Assembler::fsub_d(Address src) { InstructionMark im(this); - emit_byte(0xDC); + emit_int8((unsigned char)0xDC); emit_operand32(rsp, src); } void Assembler::fsub_s(Address src) { InstructionMark im(this); - emit_byte(0xD8); + emit_int8((unsigned char)0xD8); emit_operand32(rsp, src); } @@ -4053,13 +4059,13 @@ void Assembler::fsubr(int i) { void Assembler::fsubr_d(Address src) { InstructionMark im(this); - emit_byte(0xDC); + emit_int8((unsigned char)0xDC); emit_operand32(rbp, src); } void Assembler::fsubr_s(Address src) { InstructionMark im(this); - emit_byte(0xD8); + emit_int8((unsigned char)0xD8); emit_operand32(rbp, src); } @@ -4072,15 +4078,15 @@ void Assembler::fsubrp(int i) { } void Assembler::ftan() { - emit_byte(0xD9); - emit_byte(0xF2); - emit_byte(0xDD); - emit_byte(0xD8); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)0xDD); + emit_int8((unsigned char)0xD8); } void Assembler::ftst() { - emit_byte(0xD9); - emit_byte(0xE4); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xE4); } void Assembler::fucomi(int i) { @@ -4096,7 +4102,7 @@ void Assembler::fucomip(int i) { } void Assembler::fwait() { - emit_byte(0x9B); + emit_int8((unsigned char)0x9B); } void Assembler::fxch(int i) { @@ -4104,23 +4110,23 @@ void Assembler::fxch(int i) { } void Assembler::fyl2x() { - emit_byte(0xD9); - emit_byte(0xF1); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xF1); } void Assembler::frndint() { - emit_byte(0xD9); - emit_byte(0xFC); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xFC); } void Assembler::f2xm1() { - emit_byte(0xD9); - emit_byte(0xF0); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xF0); } void Assembler::fldl2e() { - emit_byte(0xD9); - emit_byte(0xEA); + emit_int8((unsigned char)0xD9); + emit_int8((unsigned char)0xEA); } // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. @@ -4131,7 +4137,7 @@ static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { if (pre > 0) { - emit_byte(simd_pre[pre]); + emit_int8(simd_pre[pre]); } if (rex_w) { prefixq(adr, xreg); @@ -4139,25 +4145,25 @@ void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, Vex prefix(adr, xreg); } if (opc > 0) { - emit_byte(0x0F); + emit_int8(0x0F); int opc2 = simd_opc[opc]; if (opc2 > 0) { - emit_byte(opc2); + emit_int8(opc2); } } } int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { if (pre > 0) { - emit_byte(simd_pre[pre]); + emit_int8(simd_pre[pre]); } int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc); if (opc > 0) { - emit_byte(0x0F); + emit_int8(0x0F); int opc2 = simd_opc[opc]; if (opc2 > 0) { - emit_byte(opc2); + emit_int8(opc2); } } return encode; @@ -4171,11 +4177,11 @@ void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int n int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); byte1 = (~byte1) & 0xE0; byte1 |= opc; - a_byte(byte1); + emit_int8(byte1); int byte2 = ((~nds_enc) & 0xf) << 3; byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; - emit_byte(byte2); + emit_int8(byte2); } else { prefix(VEX_2bytes); @@ -4183,7 +4189,7 @@ void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int n byte1 = (~byte1) & 0x80; byte1 |= ((~nds_enc) & 0xf) << 3; byte1 |= (vector256 ? 4 : 0) | pre; - emit_byte(byte1); + emit_int8(byte1); } } @@ -4229,28 +4235,28 @@ int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegis void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { InstructionMark im(this); simd_prefix(dst, dst, src, pre); - emit_byte(opcode); + emit_int8(opcode); emit_operand(dst, src); } void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { int encode = simd_prefix_and_encode(dst, dst, src, pre); - emit_byte(opcode); - emit_byte(0xC0 | encode); + emit_int8(opcode); + emit_int8((unsigned char)(0xC0 | encode)); } // Versions with no second source register (non-destructive source). void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { InstructionMark im(this); simd_prefix(dst, xnoreg, src, pre); - emit_byte(opcode); + emit_int8(opcode); emit_operand(dst, src); } void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { int encode = simd_prefix_and_encode(dst, xnoreg, src, pre); - emit_byte(opcode); - emit_byte(0xC0 | encode); + emit_int8(opcode); + emit_int8((unsigned char)(0xC0 | encode)); } // 3-operands AVX instructions @@ -4258,22 +4264,22 @@ void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src, VexSimdPrefix pre, bool vector256) { InstructionMark im(this); vex_prefix(dst, nds, src, pre, vector256); - emit_byte(opcode); + emit_int8(opcode); emit_operand(dst, src); } void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, bool vector256) { int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256); - emit_byte(opcode); - emit_byte(0xC0 | encode); + emit_int8(opcode); + emit_int8((unsigned char)(0xC0 | encode)); } #ifndef _LP64 void Assembler::incl(Register dst) { // Don't use it directly. Use MacroAssembler::incrementl() instead. - emit_byte(0x40 | dst->encoding()); + emit_int8(0x40 | dst->encoding()); } void Assembler::lea(Register dst, Address src) { @@ -4282,7 +4288,7 @@ void Assembler::lea(Register dst, Address src) { void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); - emit_byte(0xC7); + emit_int8((unsigned char)0xC7); emit_operand(rax, dst); emit_data((int)imm32, rspec, 0); } @@ -4290,49 +4296,49 @@ void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder cons void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xB8 | encode); + emit_int8((unsigned char)(0xB8 | encode)); emit_data((int)imm32, rspec, 0); } void Assembler::popa() { // 32bit - emit_byte(0x61); + emit_int8(0x61); } void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); - emit_byte(0x68); + emit_int8(0x68); emit_data(imm32, rspec, 0); } void Assembler::pusha() { // 32bit - emit_byte(0x60); + emit_int8(0x60); } void Assembler::set_byte_if_not_zero(Register dst) { - emit_byte(0x0F); - emit_byte(0x95); - emit_byte(0xE0 | dst->encoding()); + emit_int8(0x0F); + emit_int8((unsigned char)0x95); + emit_int8((unsigned char)(0xE0 | dst->encoding())); } void Assembler::shldl(Register dst, Register src) { - emit_byte(0x0F); - emit_byte(0xA5); - emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); + emit_int8(0x0F); + emit_int8((unsigned char)0xA5); + emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding())); } void Assembler::shrdl(Register dst, Register src) { - emit_byte(0x0F); - emit_byte(0xAD); - emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); + emit_int8(0x0F); + emit_int8((unsigned char)0xAD); + emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding())); } #else // LP64 void Assembler::set_byte_if_not_zero(Register dst) { int enc = prefix_and_encode(dst->encoding(), true); - emit_byte(0x0F); - emit_byte(0x95); - emit_byte(0xE0 | enc); + emit_int8(0x0F); + emit_int8((unsigned char)0x95); + emit_int8((unsigned char)(0xE0 | enc)); } // 64bit only pieces of the assembler @@ -4670,7 +4676,7 @@ void Assembler::adcq(Register dst, int32_t imm32) { void Assembler::adcq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x13); + emit_int8(0x13); emit_operand(dst, src); } @@ -4688,7 +4694,7 @@ void Assembler::addq(Address dst, int32_t imm32) { void Assembler::addq(Address dst, Register src) { InstructionMark im(this); prefixq(dst, src); - emit_byte(0x01); + emit_int8(0x01); emit_operand(src, dst); } @@ -4700,7 +4706,7 @@ void Assembler::addq(Register dst, int32_t imm32) { void Assembler::addq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x03); + emit_int8(0x03); emit_operand(dst, src); } @@ -4712,7 +4718,7 @@ void Assembler::addq(Register dst, Register src) { void Assembler::andq(Address dst, int32_t imm32) { InstructionMark im(this); prefixq(dst); - emit_byte(0x81); + emit_int8((unsigned char)0x81); emit_operand(rsp, dst, 4); emit_long(imm32); } @@ -4725,7 +4731,7 @@ void Assembler::andq(Register dst, int32_t imm32) { void Assembler::andq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x23); + emit_int8(0x23); emit_operand(dst, src); } @@ -4736,56 +4742,56 @@ void Assembler::andq(Register dst, Register src) { void Assembler::bsfq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBC); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBC); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::bsrq(Register dst, Register src) { assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBD); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBD); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::bswapq(Register reg) { int encode = prefixq_and_encode(reg->encoding()); - emit_byte(0x0F); - emit_byte(0xC8 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)(0xC8 | encode)); } void Assembler::cdqq() { prefix(REX_W); - emit_byte(0x99); + emit_int8((unsigned char)0x99); } void Assembler::clflush(Address adr) { prefix(adr); - emit_byte(0x0F); - emit_byte(0xAE); + emit_int8(0x0F); + emit_int8((unsigned char)0xAE); emit_operand(rdi, adr); } void Assembler::cmovq(Condition cc, Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0x40 | cc); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8(0x40 | cc); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cmovq(Condition cc, Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x0F); - emit_byte(0x40 | cc); + emit_int8(0x0F); + emit_int8(0x40 | cc); emit_operand(dst, src); } void Assembler::cmpq(Address dst, int32_t imm32) { InstructionMark im(this); prefixq(dst); - emit_byte(0x81); + emit_int8((unsigned char)0x81); emit_operand(rdi, dst, 4); emit_long(imm32); } @@ -4798,7 +4804,7 @@ void Assembler::cmpq(Register dst, int32_t imm32) { void Assembler::cmpq(Address dst, Register src) { InstructionMark im(this); prefixq(dst, src); - emit_byte(0x3B); + emit_int8(0x3B); emit_operand(src, dst); } @@ -4810,122 +4816,122 @@ void Assembler::cmpq(Register dst, Register src) { void Assembler::cmpq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x3B); + emit_int8(0x3B); emit_operand(dst, src); } void Assembler::cmpxchgq(Register reg, Address adr) { InstructionMark im(this); prefixq(adr, reg); - emit_byte(0x0F); - emit_byte(0xB1); + emit_int8(0x0F); + emit_int8((unsigned char)0xB1); emit_operand(reg, adr); } void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x2A); - emit_byte(0xC0 | encode); + emit_int8(0x2A); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); simd_prefix_q(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x2A); + emit_int8(0x2A); emit_operand(dst, src); } void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x2A); - emit_byte(0xC0 | encode); + emit_int8(0x2A); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); simd_prefix_q(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x2A); + emit_int8(0x2A); emit_operand(dst, src); } void Assembler::cvttsd2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); - emit_byte(0x2C); - emit_byte(0xC0 | encode); + emit_int8(0x2C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::cvttss2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); - emit_byte(0x2C); - emit_byte(0xC0 | encode); + emit_int8(0x2C); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::decl(Register dst) { // Don't use it directly. Use MacroAssembler::decrementl() instead. // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xFF); - emit_byte(0xC8 | encode); + emit_int8((unsigned char)0xFF); + emit_int8((unsigned char)(0xC8 | encode)); } void Assembler::decq(Register dst) { // Don't use it directly. Use MacroAssembler::decrementq() instead. // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xFF); - emit_byte(0xC8 | encode); + emit_int8((unsigned char)0xFF); + emit_int8(0xC8 | encode); } void Assembler::decq(Address dst) { // Don't use it directly. Use MacroAssembler::decrementq() instead. InstructionMark im(this); prefixq(dst); - emit_byte(0xFF); + emit_int8((unsigned char)0xFF); emit_operand(rcx, dst); } void Assembler::fxrstor(Address src) { prefixq(src); - emit_byte(0x0F); - emit_byte(0xAE); + emit_int8(0x0F); + emit_int8((unsigned char)0xAE); emit_operand(as_Register(1), src); } void Assembler::fxsave(Address dst) { prefixq(dst); - emit_byte(0x0F); - emit_byte(0xAE); + emit_int8(0x0F); + emit_int8((unsigned char)0xAE); emit_operand(as_Register(0), dst); } void Assembler::idivq(Register src) { int encode = prefixq_and_encode(src->encoding()); - emit_byte(0xF7); - emit_byte(0xF8 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xF8 | encode)); } void Assembler::imulq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xAF); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xAF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::imulq(Register dst, Register src, int value) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); if (is8bit(value)) { - emit_byte(0x6B); - emit_byte(0xC0 | encode); - emit_byte(value & 0xFF); + emit_int8(0x6B); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(value & 0xFF); } else { - emit_byte(0x69); - emit_byte(0xC0 | encode); + emit_int8(0x69); + emit_int8((unsigned char)(0xC0 | encode)); emit_long(value); } } @@ -4934,23 +4940,23 @@ void Assembler::incl(Register dst) { // Don't use it directly. Use MacroAssembler::incrementl() instead. // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xFF); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0xFF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::incq(Register dst) { // Don't use it directly. Use MacroAssembler::incrementq() instead. // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xFF); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0xFF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::incq(Address dst) { // Don't use it directly. Use MacroAssembler::incrementq() instead. InstructionMark im(this); prefixq(dst); - emit_byte(0xFF); + emit_int8((unsigned char)0xFF); emit_operand(rax, dst); } @@ -4961,35 +4967,35 @@ void Assembler::lea(Register dst, Address src) { void Assembler::leaq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x8D); + emit_int8((unsigned char)0x8D); emit_operand(dst, src); } void Assembler::mov64(Register dst, int64_t imm64) { InstructionMark im(this); int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xB8 | encode); + emit_int8((unsigned char)(0xB8 | encode)); emit_int64(imm64); } void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { InstructionMark im(this); int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xB8 | encode); + emit_int8(0xB8 | encode); emit_data64(imm64, rspec); } void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); int encode = prefix_and_encode(dst->encoding()); - emit_byte(0xB8 | encode); + emit_int8((unsigned char)(0xB8 | encode)); emit_data((int)imm32, rspec, narrow_oop_operand); } void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); prefix(dst); - emit_byte(0xC7); + emit_int8((unsigned char)0xC7); emit_operand(rax, dst, 4); emit_data((int)imm32, rspec, narrow_oop_operand); } @@ -4997,34 +5003,34 @@ void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder con void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); int encode = prefix_and_encode(src1->encoding()); - emit_byte(0x81); - emit_byte(0xF8 | encode); + emit_int8((unsigned char)0x81); + emit_int8((unsigned char)(0xF8 | encode)); emit_data((int)imm32, rspec, narrow_oop_operand); } void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { InstructionMark im(this); prefix(src1); - emit_byte(0x81); + emit_int8((unsigned char)0x81); emit_operand(rax, src1, 4); emit_data((int)imm32, rspec, narrow_oop_operand); } void Assembler::lzcntq(Register dst, Register src) { assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); - emit_byte(0xF3); + emit_int8((unsigned char)0xF3); int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBD); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBD); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); - emit_byte(0x6E); - emit_byte(0xC0 | encode); + emit_int8(0x6E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movdq(Register dst, XMMRegister src) { @@ -5032,43 +5038,43 @@ void Assembler::movdq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); // swap src/dst to get correct prefix int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); - emit_byte(0x7E); - emit_byte(0xC0 | encode); + emit_int8(0x7E); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x8B); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0x8B); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x8B); + emit_int8((unsigned char)0x8B); emit_operand(dst, src); } void Assembler::movq(Address dst, Register src) { InstructionMark im(this); prefixq(dst, src); - emit_byte(0x89); + emit_int8((unsigned char)0x89); emit_operand(src, dst); } void Assembler::movsbq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x0F); - emit_byte(0xBE); + emit_int8(0x0F); + emit_int8((unsigned char)0xBE); emit_operand(dst, src); } void Assembler::movsbq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBE); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xBE); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movslq(Register dst, int32_t imm32) { @@ -5078,7 +5084,7 @@ void Assembler::movslq(Register dst, int32_t imm32) { ShouldNotReachHere(); InstructionMark im(this); int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xC7 | encode); + emit_int8((unsigned char)(0xC7 | encode)); emit_long(imm32); } @@ -5086,7 +5092,7 @@ void Assembler::movslq(Address dst, int32_t imm32) { assert(is_simm32(imm32), "lost bits"); InstructionMark im(this); prefixq(dst); - emit_byte(0xC7); + emit_int8((unsigned char)0xC7); emit_operand(rax, dst, 4); emit_long(imm32); } @@ -5094,77 +5100,77 @@ void Assembler::movslq(Address dst, int32_t imm32) { void Assembler::movslq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x63); + emit_int8(0x63); emit_operand(dst, src); } void Assembler::movslq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x63); - emit_byte(0xC0 | encode); + emit_int8(0x63); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movswq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x0F); - emit_byte(0xBF); + emit_int8(0x0F); + emit_int8((unsigned char)0xBF); emit_operand(dst, src); } void Assembler::movswq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xBF); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)0xBF); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::movzbq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x0F); - emit_byte(0xB6); + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)0xB6); emit_operand(dst, src); } void Assembler::movzbq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xB6); - emit_byte(0xC0 | encode); + emit_int8(0x0F); + emit_int8((unsigned char)0xB6); + emit_int8(0xC0 | encode); } void Assembler::movzwq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x0F); - emit_byte(0xB7); + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)0xB7); emit_operand(dst, src); } void Assembler::movzwq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xB7); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)0xB7); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::negq(Register dst) { int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xF7); - emit_byte(0xD8 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xD8 | encode)); } void Assembler::notq(Register dst) { int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xF7); - emit_byte(0xD0 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xD0 | encode)); } void Assembler::orq(Address dst, int32_t imm32) { InstructionMark im(this); prefixq(dst); - emit_byte(0x81); + emit_int8((unsigned char)0x81); emit_operand(rcx, dst, 4); emit_long(imm32); } @@ -5177,7 +5183,7 @@ void Assembler::orq(Register dst, int32_t imm32) { void Assembler::orq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x0B); + emit_int8(0x0B); emit_operand(dst, src); } @@ -5210,26 +5216,26 @@ void Assembler::popa() { // 64bit void Assembler::popcntq(Register dst, Address src) { assert(VM_Version::supports_popcnt(), "must support"); InstructionMark im(this); - emit_byte(0xF3); + emit_int8((unsigned char)0xF3); prefixq(src, dst); - emit_byte(0x0F); - emit_byte(0xB8); + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)0xB8); emit_operand(dst, src); } void Assembler::popcntq(Register dst, Register src) { assert(VM_Version::supports_popcnt(), "must support"); - emit_byte(0xF3); + emit_int8((unsigned char)0xF3); int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0xB8); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)0xB8); + emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::popq(Address dst) { InstructionMark im(this); prefixq(dst); - emit_byte(0x8F); + emit_int8((unsigned char)0x8F); emit_operand(rax, dst); } @@ -5261,7 +5267,7 @@ void Assembler::pusha() { // 64bit void Assembler::pushq(Address src) { InstructionMark im(this); prefixq(src); - emit_byte(0xFF); + emit_int8((unsigned char)0xFF); emit_operand(rsi, src); } @@ -5269,31 +5275,31 @@ void Assembler::rclq(Register dst, int imm8) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); int encode = prefixq_and_encode(dst->encoding()); if (imm8 == 1) { - emit_byte(0xD1); - emit_byte(0xD0 | encode); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xD0 | encode)); } else { - emit_byte(0xC1); - emit_byte(0xD0 | encode); - emit_byte(imm8); + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xD0 | encode)); + emit_int8(imm8); } } void Assembler::sarq(Register dst, int imm8) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); int encode = prefixq_and_encode(dst->encoding()); if (imm8 == 1) { - emit_byte(0xD1); - emit_byte(0xF8 | encode); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xF8 | encode)); } else { - emit_byte(0xC1); - emit_byte(0xF8 | encode); - emit_byte(imm8); + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xF8 | encode)); + emit_int8(imm8); } } void Assembler::sarq(Register dst) { int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xD3); - emit_byte(0xF8 | encode); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xF8 | encode)); } void Assembler::sbbq(Address dst, int32_t imm32) { @@ -5310,7 +5316,7 @@ void Assembler::sbbq(Register dst, int32_t imm32) { void Assembler::sbbq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x1B); + emit_int8(0x1B); emit_operand(dst, src); } @@ -5323,33 +5329,33 @@ void Assembler::shlq(Register dst, int imm8) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); int encode = prefixq_and_encode(dst->encoding()); if (imm8 == 1) { - emit_byte(0xD1); - emit_byte(0xE0 | encode); + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xE0 | encode)); } else { - emit_byte(0xC1); - emit_byte(0xE0 | encode); - emit_byte(imm8); + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xE0 | encode)); + emit_int8(imm8); } } void Assembler::shlq(Register dst) { int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xD3); - emit_byte(0xE0 | encode); + emit_int8((unsigned char)0xD3); + emit_int8((unsigned char)(0xE0 | encode)); } void Assembler::shrq(Register dst, int imm8) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xC1); - emit_byte(0xE8 | encode); - emit_byte(imm8); + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xE8 | encode)); + emit_int8(imm8); } void Assembler::shrq(Register dst) { int encode = prefixq_and_encode(dst->encoding()); - emit_byte(0xD3); - emit_byte(0xE8 | encode); + emit_int8((unsigned char)0xD3); + emit_int8(0xE8 | encode); } void Assembler::subq(Address dst, int32_t imm32) { @@ -5361,7 +5367,7 @@ void Assembler::subq(Address dst, int32_t imm32) { void Assembler::subq(Address dst, Register src) { InstructionMark im(this); prefixq(dst, src); - emit_byte(0x29); + emit_int8(0x29); emit_operand(src, dst); } @@ -5379,7 +5385,7 @@ void Assembler::subq_imm32(Register dst, int32_t imm32) { void Assembler::subq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x2B); + emit_int8(0x2B); emit_operand(dst, src); } @@ -5395,11 +5401,11 @@ void Assembler::testq(Register dst, int32_t imm32) { int encode = dst->encoding(); if (encode == 0) { prefix(REX_W); - emit_byte(0xA9); + emit_int8((unsigned char)0xA9); } else { encode = prefixq_and_encode(encode); - emit_byte(0xF7); - emit_byte(0xC0 | encode); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xC0 | encode)); } emit_long(imm32); } @@ -5412,22 +5418,22 @@ void Assembler::testq(Register dst, Register src) { void Assembler::xaddq(Address dst, Register src) { InstructionMark im(this); prefixq(dst, src); - emit_byte(0x0F); - emit_byte(0xC1); + emit_int8(0x0F); + emit_int8((unsigned char)0xC1); emit_operand(src, dst); } void Assembler::xchgq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x87); + emit_int8((unsigned char)0x87); emit_operand(dst, src); } void Assembler::xchgq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x87); - emit_byte(0xc0 | encode); + emit_int8((unsigned char)0x87); + emit_int8((unsigned char)(0xc0 | encode)); } void Assembler::xorq(Register dst, Register src) { @@ -5438,7 +5444,7 @@ void Assembler::xorq(Register dst, Register src) { void Assembler::xorq(Register dst, Address src) { InstructionMark im(this); prefixq(src, dst); - emit_byte(0x33); + emit_int8(0x33); emit_operand(dst, src); } diff --git a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp index be2c1097a45920148ce351b73910238c5e0ba1e7..53c7cbacd1c28a8c54c5281fc518eaeb17af2ac5 100644 --- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp +++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp @@ -313,10 +313,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) { #endif } else { // make a copy the code which is going to be patched. - for ( int i = 0; i < _bytes_to_copy; i++) { + for (int i = 0; i < _bytes_to_copy; i++) { address ptr = (address)(_pc_start + i); int a_byte = (*ptr) & 0xFF; - __ a_byte (a_byte); + __ emit_int8(a_byte); *ptr = 0x90; // make the site look like a nop } } @@ -363,11 +363,11 @@ void PatchingStub::emit_code(LIR_Assembler* ce) { // emit the offsets needed to find the code to patch int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record; - __ a_byte(0xB8); - __ a_byte(0); - __ a_byte(being_initialized_entry_offset); - __ a_byte(bytes_to_skip); - __ a_byte(_bytes_to_copy); + __ emit_int8((unsigned char)0xB8); + __ emit_int8(0); + __ emit_int8(being_initialized_entry_offset); + __ emit_int8(bytes_to_skip); + __ emit_int8(_bytes_to_copy); address patch_info_pc = __ pc(); assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info"); diff --git a/src/cpu/x86/vm/cppInterpreter_x86.cpp b/src/cpu/x86/vm/cppInterpreter_x86.cpp index 946c400e59021d8bddde3016d0b02f13cfc330da..9c3a2f31aebab64a2e96af8b1c55e97f36443239 100644 --- a/src/cpu/x86/vm/cppInterpreter_x86.cpp +++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp @@ -611,8 +611,6 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { // C++ interpreter only // rsi/r13 - previous interpreter state pointer - const Address size_of_parameters(rbx, Method::size_of_parameters_offset()); - // InterpreterRuntime::frequency_counter_overflow takes one argument // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp). // The call returns the address of the verified entry point for the method or NULL @@ -977,15 +975,16 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // to save/restore. address entry_point = __ pc(); - const Address size_of_parameters(rbx, Method::size_of_parameters_offset()); - const Address size_of_locals (rbx, Method::size_of_locals_offset()); + const Address constMethod (rbx, Method::const_offset()); const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset()); const Address access_flags (rbx, Method::access_flags_offset()); + const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset()); // rsi/r13 == state/locals rdi == prevstate const Register locals = rdi; // get parameter size (always needed) + __ movptr(rcx, constMethod); __ load_unsigned_short(rcx, size_of_parameters); // rbx: Method* @@ -994,6 +993,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // for natives the size of locals is zero // compute beginning of parameters /locals + __ lea(locals, Address(rsp, rcx, Address::times_ptr, -wordSize)); // initialize fixed part of activation frame @@ -1107,11 +1107,14 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { const Register method = rbx; const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rdi); const Register t = InterpreterRuntime::SignatureHandlerGenerator::temp(); // rcx|rscratch1 + const Address constMethod (method, Method::const_offset()); + const Address size_of_parameters(t, ConstMethod::size_of_parameters_offset()); // allocate space for parameters __ movptr(method, STATE(_method)); __ verify_method_ptr(method); - __ load_unsigned_short(t, Address(method, Method::size_of_parameters_offset())); + __ movptr(t, constMethod); + __ load_unsigned_short(t, size_of_parameters); __ shll(t, 2); #ifdef _LP64 __ subptr(rsp, t); @@ -1700,15 +1703,17 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { // save sender sp __ push(rcx); - const Address size_of_parameters(rbx, Method::size_of_parameters_offset()); - const Address size_of_locals (rbx, Method::size_of_locals_offset()); + const Address constMethod (rbx, Method::const_offset()); const Address access_flags (rbx, Method::access_flags_offset()); + const Address size_of_parameters(rdx, ConstMethod::size_of_parameters_offset()); + const Address size_of_locals (rdx, ConstMethod::size_of_locals_offset()); // const Address monitor_block_top (rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize); // const Address monitor_block_bot (rbp, frame::interpreter_frame_initial_sp_offset * wordSize); // const Address monitor(rbp, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); // get parameter size (always needed) + __ movptr(rdx, constMethod); __ load_unsigned_short(rcx, size_of_parameters); // rbx: Method* @@ -1989,7 +1994,9 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { __ movptr(rbx, STATE(_result._to_call._callee)); // callee left args on top of expression stack, remove them - __ load_unsigned_short(rcx, Address(rbx, Method::size_of_parameters_offset())); + __ movptr(rcx, constMethod); + __ load_unsigned_short(rcx, Address(rcx, ConstMethod::size_of_parameters_offset())); + __ lea(rsp, Address(rsp, rcx, Address::times_ptr)); __ movl(rcx, Address(rbx, Method::result_index_offset())); @@ -2159,7 +2166,9 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { // Make it look like call_stub calling conventions // Get (potential) receiver - __ load_unsigned_short(rcx, size_of_parameters); // get size of parameters in words + // get size of parameters in words + __ movptr(rcx, constMethod); + __ load_unsigned_short(rcx, Address(rcx, ConstMethod::size_of_parameters_offset())); ExternalAddress recursive(CAST_FROM_FN_PTR(address, RecursiveInterpreterActivation)); __ pushptr(recursive.addr()); // make it look good in the debugger diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp index fa2d7fa4376d292be3a0bac2edf5e2add5549bd1..b9d85636cf88c6c27614423506865482291de16a 100644 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -1023,7 +1023,7 @@ void MacroAssembler::lea(Address dst, AddressLiteral adr) { void MacroAssembler::leave() { // %%% is this really better? Why not on 32bit too? - emit_byte(0xC9); // LEAVE + emit_int8((unsigned char)0xC9); // LEAVE } void MacroAssembler::lneg(Register hi, Register lo) { @@ -2112,11 +2112,11 @@ void MacroAssembler::fat_nop() { if (UseAddressNop) { addr_nop_5(); } else { - emit_byte(0x26); // es: - emit_byte(0x2e); // cs: - emit_byte(0x64); // fs: - emit_byte(0x65); // gs: - emit_byte(0x90); + emit_int8(0x26); // es: + emit_int8(0x2e); // cs: + emit_int8(0x64); // fs: + emit_int8(0x65); // gs: + emit_int8((unsigned char)0x90); } } @@ -2534,12 +2534,12 @@ void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { int offs = (intptr_t)dst.target() - ((intptr_t)pc()); if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { // 0111 tttn #8-bit disp - emit_byte(0x70 | cc); - emit_byte((offs - short_size) & 0xFF); + emit_int8(0x70 | cc); + emit_int8((offs - short_size) & 0xFF); } else { // 0000 1111 1000 tttn #32-bit disp - emit_byte(0x0F); - emit_byte(0x80 | cc); + emit_int8(0x0F); + emit_int8((unsigned char)(0x80 | cc)); emit_long(offs - long_size); } } else { @@ -3085,7 +3085,8 @@ void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { // Used in sign-bit flipping with aligned address. - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + bool aligned_adr = (((intptr_t)src.target() & 15) == 0); + assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::pshufb(dst, as_Address(src)); } else { diff --git a/src/cpu/x86/vm/macroAssembler_x86.hpp b/src/cpu/x86/vm/macroAssembler_x86.hpp index 36d4d2311550b430a945cbb6a7ef19fbc776cc3a..10eab0c8a4fe648bd06e01acee507adc517f3326 100644 --- a/src/cpu/x86/vm/macroAssembler_x86.hpp +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp @@ -126,25 +126,6 @@ class MacroAssembler: public Assembler { } } -#ifndef PRODUCT - static void pd_print_patched_instruction(address branch) { - const char* s; - unsigned char op = branch[0]; - if (op == 0xE8) { - s = "call"; - } else if (op == 0xE9 || op == 0xEB) { - s = "jmp"; - } else if ((op & 0xF0) == 0x70) { - s = "jcc"; - } else if (op == 0x0F) { - s = "jcc"; - } else { - s = "????"; - } - tty->print("%s (unresolved)", s); - } -#endif - // The following 4 methods return the offset of the appropriate move instruction // Support for fast byte/short loading with zero extension (depending on particular CPU) diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp index 7da3a2c423ab87650843b8056877a495ac45b9bf..95d5123f406fb3d6a2605359f5181384b0ec31ab 100644 --- a/src/cpu/x86/vm/methodHandles_x86.cpp +++ b/src/cpu/x86/vm/methodHandles_x86.cpp @@ -169,8 +169,9 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, if (VerifyMethodHandles && !for_compiler_entry) { // make sure recv is already on stack + __ movptr(temp2, Address(method_temp, Method::const_offset())); __ load_sized_value(temp2, - Address(method_temp, Method::size_of_parameters_offset()), + Address(temp2, ConstMethod::size_of_parameters_offset()), sizeof(u2), /*is_signed*/ false); // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); Label L; @@ -234,8 +235,9 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ movptr(rdx_argp, Address(rbx_method, Method::const_offset())); __ load_sized_value(rdx_argp, - Address(rbx_method, Method::size_of_parameters_offset()), + Address(rdx_argp, ConstMethod::size_of_parameters_offset()), sizeof(u2), /*is_signed*/ false); // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); rdx_first_arg_addr = __ argument_address(rdx_argp, -1); diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp index 52e3f4169af3363248920ce0b244f27cef0cf7c6..3bf5dc5e0fcc8ede911feb11c74e58eb502e20e7 100644 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp @@ -2174,13 +2174,13 @@ class StubGenerator: public StubCodeGenerator { // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_encryptBlock() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); Label L_doLast; address start = __ pc(); - const Register from = rsi; // source array address + const Register from = rdx; // source array address const Register to = rdx; // destination array address const Register key = rcx; // key array address const Register keylen = rax; @@ -2189,47 +2189,74 @@ class StubGenerator: public StubCodeGenerator { const Address key_param (rbp, 8+8); const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - const XMMRegister xmm_key_shuf_mask = xmm2; + const XMMRegister xmm_key_shuf_mask = xmm1; + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(key , key_param); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ movptr(from, from_param); + __ movptr(key, key_param); + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input + __ movptr(to, to_param); // For encryption, the java expanded key ordering is just what we need - load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); - __ pxor(xmm_result, xmm_temp); - for (int offset = 0x10; offset <= 0x90; offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); - } - load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); - __ cmpl(keylen, 0); - __ jcc(Assembler::equal, L_doLast); - __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys - aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); - load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); - __ subl(keylen, 2); - __ jcc(Assembler::equal, L_doLast); - __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys - aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); - load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); + load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp1); + + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); - __ aesenclast(xmm_result, xmm_temp); + __ aesenc(xmm_result, xmm_temp1); + __ aesenclast(xmm_result, xmm_temp2); __ movdqu(Address(to, 0), xmm_result); // store the result __ xorptr(rax, rax); // return 0 - __ pop(rsi); __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); @@ -2245,13 +2272,13 @@ class StubGenerator: public StubCodeGenerator { // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_decryptBlock() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); Label L_doLast; address start = __ pc(); - const Register from = rsi; // source array address + const Register from = rdx; // source array address const Register to = rdx; // destination array address const Register key = rcx; // key array address const Register keylen = rax; @@ -2260,51 +2287,76 @@ class StubGenerator: public StubCodeGenerator { const Address key_param (rbp, 8+8); const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - const XMMRegister xmm_key_shuf_mask = xmm2; + const XMMRegister xmm_key_shuf_mask = xmm1; + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(key , key_param); + __ movptr(from, from_param); + __ movptr(key, key_param); + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); + __ movptr(to, to_param); // for decryption java expanded key ordering is rotated one position from what we want // so we start from 0x10 here and hit 0x00 last // we don't know if the key is aligned, hence not using load-execute form - load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); - __ pxor (xmm_result, xmm_temp); - for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { - aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); - } - __ cmpl(keylen, 0); - __ jcc(Assembler::equal, L_doLast); - // only in 192 and 256 bit keys - aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); - aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); - __ subl(keylen, 2); - __ jcc(Assembler::equal, L_doLast); - // only in 256 bit keys - aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); - aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ pxor (xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); - // for decryption the aesdeclast operation is always on key+0x00 - load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); - __ aesdeclast(xmm_result, xmm_temp); + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + // for decryption the aesdeclast operation is always on key+0x00 + __ aesdeclast(xmm_result, xmm_temp3); __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 - __ pop(rsi); __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); @@ -2340,7 +2392,7 @@ class StubGenerator: public StubCodeGenerator { // c_rarg4 - input length // address generate_cipherBlockChaining_encryptAESCrypt() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); address start = __ pc(); @@ -2393,7 +2445,7 @@ class StubGenerator: public StubCodeGenerator { __ jcc(Assembler::notEqual, L_key_192_256); // 128 bit code follows here - __ movptr(pos, 0); + __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_loopTop_128); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input @@ -2423,15 +2475,15 @@ class StubGenerator: public StubCodeGenerator { __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - __ BIND(L_key_192_256); - // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be changed to use more xmm registers) - __ movptr(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_192); + __ movl(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_192); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector @@ -2452,11 +2504,11 @@ class StubGenerator: public StubCodeGenerator { __ jcc(Assembler::notEqual, L_loopTop_192); __ jmp(L_exit); - __ BIND(L_key_256); + __ BIND(L_key_256); // 256-bit code follows here (could be changed to use more xmm registers) - __ movptr(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_256); + __ movl(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_256); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector @@ -2495,7 +2547,7 @@ class StubGenerator: public StubCodeGenerator { // address generate_cipherBlockChaining_decryptAESCrypt() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); address start = __ pc(); @@ -2556,9 +2608,9 @@ class StubGenerator: public StubCodeGenerator { // 128-bit code follows here, parallelized - __ movptr(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_singleBlock_loopTop_128); + __ movl(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_128); __ cmpptr(len_reg, 0); // any blocks left?? __ jcc(Assembler::equal, L_exit); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input @@ -2597,7 +2649,7 @@ class StubGenerator: public StubCodeGenerator { __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be optimized to use parallelism) - __ movptr(pos, 0); + __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_192); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input @@ -2622,7 +2674,7 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_key_256); // 256-bit code follows here (could be optimized to use parallelism) - __ movptr(pos, 0); + __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_256); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 48f4af8dc17598be11605a7683556b6577dcf464..00fa0e9ccc0cdb1ce0e22d3e61b3a1375fc87806 100644 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -2953,21 +2953,6 @@ class StubGenerator: public StubCodeGenerator { } } - // aesenc using specified key+offset - // can optionally specify that the shuffle mask is already in an xmmregister - void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { - load_key(xmmtmp, key, offset, xmm_shuf_mask); - __ aesenc(xmmdst, xmmtmp); - } - - // aesdec using specified key+offset - // can optionally specify that the shuffle mask is already in an xmmregister - void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { - load_key(xmmtmp, key, offset, xmm_shuf_mask); - __ aesdec(xmmdst, xmmtmp); - } - - // Arguments: // // Inputs: @@ -2976,7 +2961,7 @@ class StubGenerator: public StubCodeGenerator { // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_encryptBlock() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); Label L_doLast; @@ -2988,15 +2973,17 @@ class StubGenerator: public StubCodeGenerator { const Register keylen = rax; const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - const XMMRegister xmm_key_shuf_mask = xmm2; + const XMMRegister xmm_key_shuf_mask = xmm1; + // On win64 xmm6-xmm15 must be preserved so don't use them. + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; __ enter(); // required for proper stackwalking of RuntimeStub frame + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input @@ -3004,25 +2991,53 @@ class StubGenerator: public StubCodeGenerator { // For encryption, the java expanded key ordering is just what we need // we don't know if the key is aligned, hence not using load-execute form - load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); - __ pxor(xmm_result, xmm_temp); - for (int offset = 0x10; offset <= 0x90; offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); - } - load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); - __ cmpl(keylen, 0); - __ jcc(Assembler::equal, L_doLast); - __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys - aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); - load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); - __ subl(keylen, 2); - __ jcc(Assembler::equal, L_doLast); - __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys - aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); - load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); + load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp1); + + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); - __ aesenclast(xmm_result, xmm_temp); + __ aesenc(xmm_result, xmm_temp1); + __ aesenclast(xmm_result, xmm_temp2); __ movdqu(Address(to, 0), xmm_result); // store the result __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame @@ -3040,7 +3055,7 @@ class StubGenerator: public StubCodeGenerator { // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_decryptBlock() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); Label L_doLast; @@ -3052,15 +3067,17 @@ class StubGenerator: public StubCodeGenerator { const Register keylen = rax; const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - const XMMRegister xmm_key_shuf_mask = xmm2; + const XMMRegister xmm_key_shuf_mask = xmm1; + // On win64 xmm6-xmm15 must be preserved so don't use them. + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; __ enter(); // required for proper stackwalking of RuntimeStub frame + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); @@ -3068,29 +3085,55 @@ class StubGenerator: public StubCodeGenerator { // for decryption java expanded key ordering is rotated one position from what we want // so we start from 0x10 here and hit 0x00 last // we don't know if the key is aligned, hence not using load-execute form - load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); - __ pxor (xmm_result, xmm_temp); - for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { - aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); - } - __ cmpl(keylen, 0); - __ jcc(Assembler::equal, L_doLast); - // only in 192 and 256 bit keys - aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); - aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); - __ subl(keylen, 2); - __ jcc(Assembler::equal, L_doLast); - // only in 256 bit keys - aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); - aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ pxor (xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); - // for decryption the aesdeclast operation is always on key+0x00 - load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); - __ aesdeclast(xmm_result, xmm_temp); + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + // for decryption the aesdeclast operation is always on key+0x00 + __ aesdeclast(xmm_result, xmm_temp3); __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); @@ -3109,7 +3152,7 @@ class StubGenerator: public StubCodeGenerator { // c_rarg4 - input length // address generate_cipherBlockChaining_encryptAESCrypt() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); address start = __ pc(); @@ -3133,16 +3176,19 @@ class StubGenerator: public StubCodeGenerator { const XMMRegister xmm_temp = xmm1; // keys 0-10 preloaded into xmm2-xmm12 const int XMM_REG_NUM_KEY_FIRST = 2; - const int XMM_REG_NUM_KEY_LAST = 12; + const int XMM_REG_NUM_KEY_LAST = 15; const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); - const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_LAST); + const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10); + const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11); + const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12); + const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13); __ enter(); // required for proper stackwalking of RuntimeStub frame #ifdef _WIN64 // on win64, fill len_reg from stack position __ movl(len_reg, len_mem); - // save the xmm registers which must be preserved 6-12 + // save the xmm registers which must be preserved 6-15 __ subptr(rsp, -rsp_after_call_off * wordSize); for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { __ movdqu(xmm_save(i), as_XMMRegister(i)); @@ -3151,12 +3197,11 @@ class StubGenerator: public StubCodeGenerator { const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); - // load up xmm regs 2 thru 12 with key 0x00 - 0xa0 - for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) { load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); offset += 0x10; } - __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) @@ -3167,16 +3212,15 @@ class StubGenerator: public StubCodeGenerator { // 128 bit code follows here __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_loopTop_128); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) { __ aesenc(xmm_result, as_XMMRegister(rnum)); } __ aesenclast(xmm_result, xmm_key10); - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit __ addptr(pos, AESBlockSize); @@ -3198,24 +3242,23 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_key_192_256); // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask); __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be changed to use more xmm registers) __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_loopTop_192); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) { __ aesenc(xmm_result, as_XMMRegister(rnum)); } - aes_enc_key(xmm_result, xmm_temp, key, 0xb0); - load_key(xmm_temp, key, 0xc0); - __ aesenclast(xmm_result, xmm_temp); - + __ aesenclast(xmm_result, xmm_key12); __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit __ addptr(pos, AESBlockSize); @@ -3225,22 +3268,19 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_key_256); // 256-bit code follows here (could be changed to use more xmm registers) + load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask); __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_loopTop_256); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) { __ aesenc(xmm_result, as_XMMRegister(rnum)); } - aes_enc_key(xmm_result, xmm_temp, key, 0xb0); - aes_enc_key(xmm_result, xmm_temp, key, 0xc0); - aes_enc_key(xmm_result, xmm_temp, key, 0xd0); load_key(xmm_temp, key, 0xe0); __ aesenclast(xmm_result, xmm_temp); - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit __ addptr(pos, AESBlockSize); @@ -3267,7 +3307,7 @@ class StubGenerator: public StubCodeGenerator { // address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { - assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); address start = __ pc(); @@ -3288,12 +3328,10 @@ class StubGenerator: public StubCodeGenerator { #endif const Register pos = rax; - // xmm register assignments for the loops below - const XMMRegister xmm_result = xmm0; // keys 0-10 preloaded into xmm2-xmm12 const int XMM_REG_NUM_KEY_FIRST = 5; const int XMM_REG_NUM_KEY_LAST = 15; - const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST); __ enter(); // required for proper stackwalking of RuntimeStub frame @@ -3312,13 +3350,14 @@ class StubGenerator: public StubCodeGenerator { const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00 - for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00; + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) { load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); offset += 0x10; } + load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask); const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block + // registers holding the four results in the parallelized loop const XMMRegister xmm_result0 = xmm0; const XMMRegister xmm_result1 = xmm2; @@ -3376,8 +3415,12 @@ class StubGenerator: public StubCodeGenerator { __ jmp(L_multiBlock_loopTop_128); // registers used in the non-parallelized loops + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; const XMMRegister xmm_prev_block_cipher_save = xmm2; - const XMMRegister xmm_temp = xmm3; + const XMMRegister xmm_key11 = xmm3; + const XMMRegister xmm_key12 = xmm4; + const XMMRegister xmm_temp = xmm4; __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_128); @@ -3415,12 +3458,15 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_key_192_256); // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + load_key(xmm_key11, key, 0xb0); __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be optimized to use parallelism) + load_key(xmm_key12, key, 0xc0); // 192-bit key goes up to c0 __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_192); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector @@ -3428,14 +3474,13 @@ class StubGenerator: public StubCodeGenerator { for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { __ aesdec(xmm_result, as_XMMRegister(rnum)); } - aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 192-bit key goes up to c0 - aes_dec_key(xmm_result, xmm_temp, key, 0xc0); + __ aesdec(xmm_result, xmm_key11); + __ aesdec(xmm_result, xmm_key12); __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit - __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block - + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block __ addptr(pos, AESBlockSize); __ subptr(len_reg, AESBlockSize); __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); @@ -3445,23 +3490,26 @@ class StubGenerator: public StubCodeGenerator { // 256-bit code follows here (could be optimized to use parallelism) __ movptr(pos, 0); __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_256); - __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { __ aesdec(xmm_result, as_XMMRegister(rnum)); } - aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 256-bit key goes up to e0 - aes_dec_key(xmm_result, xmm_temp, key, 0xc0); - aes_dec_key(xmm_result, xmm_temp, key, 0xd0); - aes_dec_key(xmm_result, xmm_temp, key, 0xe0); - __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 + __ aesdec(xmm_result, xmm_key11); + load_key(xmm_temp, key, 0xc0); + __ aesdec(xmm_result, xmm_temp); + load_key(xmm_temp, key, 0xd0); + __ aesdec(xmm_result, xmm_temp); + load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0 + __ aesdec(xmm_result, xmm_temp); + __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output // no need to store r to memory until we exit - __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block - + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block __ addptr(pos, AESBlockSize); __ subptr(len_reg, AESBlockSize); __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); diff --git a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp index 19a2a45c9f7e15eda230641cadd6e90f200f2d61..aabc3dbbacd40c9fda33afdd62e5064bb25bb8f2 100644 --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp @@ -424,8 +424,6 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { // C++ interpreter only // rsi - previous interpreter state pointer - const Address size_of_parameters(rbx, Method::size_of_parameters_offset()); - // InterpreterRuntime::frequency_counter_overflow takes one argument // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp). // The call returns the address of the verified entry point for the method or NULL @@ -868,12 +866,13 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // rsi: previous interpreter state (C++ interpreter) must preserve address entry_point = __ pc(); - - const Address size_of_parameters(rbx, Method::size_of_parameters_offset()); + const Address constMethod (rbx, Method::const_offset()); const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset()); const Address access_flags (rbx, Method::access_flags_offset()); + const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset()); // get parameter size (always needed) + __ movptr(rcx, constMethod); __ load_unsigned_short(rcx, size_of_parameters); // native calls don't need the stack size check since they have no expression stack @@ -988,7 +987,9 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // allocate space for parameters __ get_method(method); - __ load_unsigned_short(t, Address(method, Method::size_of_parameters_offset())); + __ movptr(t, Address(method, Method::const_offset())); + __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); + __ shlptr(t, Interpreter::logStackElementSize); __ addptr(t, 2*wordSize); // allocate two more slots for JNIEnv and possible mirror __ subptr(rsp, t); @@ -1297,13 +1298,14 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { // rsi: sender sp address entry_point = __ pc(); - - const Address size_of_parameters(rbx, Method::size_of_parameters_offset()); - const Address size_of_locals (rbx, Method::size_of_locals_offset()); + const Address constMethod (rbx, Method::const_offset()); const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset()); const Address access_flags (rbx, Method::access_flags_offset()); + const Address size_of_parameters(rdx, ConstMethod::size_of_parameters_offset()); + const Address size_of_locals (rdx, ConstMethod::size_of_locals_offset()); // get parameter size (always needed) + __ movptr(rdx, constMethod); __ load_unsigned_short(rcx, size_of_parameters); // rbx,: Method* @@ -1734,7 +1736,8 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // Compute size of arguments for saving when returning to deoptimized caller __ get_method(rax); - __ load_unsigned_short(rax, Address(rax, in_bytes(Method::size_of_parameters_offset()))); + __ movptr(rax, Address(rax, Method::const_offset())); + __ load_unsigned_short(rax, Address(rax, ConstMethod::size_of_parameters_offset())); __ shlptr(rax, Interpreter::logStackElementSize); __ restore_locals(); __ subptr(rdi, rax); diff --git a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp index e2b46fc78bca4790b482bfab14bbff21b9fe5e2e..3e3cc0fc1f5d9f1afa17833cb0ddf7528ec94427 100644 --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp @@ -369,9 +369,6 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { // Everything as it was on entry // rdx is not restored. Doesn't appear to really be set. - const Address size_of_parameters(rbx, - Method::size_of_parameters_offset()); - // InterpreterRuntime::frequency_counter_overflow takes two // arguments, the first (thread) is passed by call_VM, the second // indicates if the counter overflow occurs at a backwards branch @@ -844,14 +841,17 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { address entry_point = __ pc(); - const Address size_of_parameters(rbx, Method:: - size_of_parameters_offset()); + const Address constMethod (rbx, Method::const_offset()); const Address invocation_counter(rbx, Method:: invocation_counter_offset() + InvocationCounter::counter_offset()); const Address access_flags (rbx, Method::access_flags_offset()); + const Address size_of_parameters(rcx, ConstMethod:: + size_of_parameters_offset()); + // get parameter size (always needed) + __ movptr(rcx, constMethod); __ load_unsigned_short(rcx, size_of_parameters); // native calls don't need the stack size check since they have no @@ -967,9 +967,8 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // allocate space for parameters __ get_method(method); - __ load_unsigned_short(t, - Address(method, - Method::size_of_parameters_offset())); + __ movptr(t, Address(method, Method::const_offset())); + __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); __ shll(t, Interpreter::logStackElementSize); __ subptr(rsp, t); @@ -1302,15 +1301,18 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { // r13: sender sp address entry_point = __ pc(); - const Address size_of_parameters(rbx, - Method::size_of_parameters_offset()); - const Address size_of_locals(rbx, Method::size_of_locals_offset()); + const Address constMethod(rbx, Method::const_offset()); const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset()); const Address access_flags(rbx, Method::access_flags_offset()); + const Address size_of_parameters(rdx, + ConstMethod::size_of_parameters_offset()); + const Address size_of_locals(rdx, ConstMethod::size_of_locals_offset()); + // get parameter size (always needed) + __ movptr(rdx, constMethod); __ load_unsigned_short(rcx, size_of_parameters); // rbx: Method* @@ -1752,7 +1754,8 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // Compute size of arguments for saving when returning to // deoptimized caller __ get_method(rax); - __ load_unsigned_short(rax, Address(rax, in_bytes(Method:: + __ movptr(rax, Address(rax, Method::const_offset())); + __ load_unsigned_short(rax, Address(rax, in_bytes(ConstMethod:: size_of_parameters_offset()))); __ shll(rax, Interpreter::logStackElementSize); __ restore_locals(); // XXX do we need this? diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp index f48e66012a815e0acc8bd54745068b01b7567e17..fc0466767393ad21dab5e24c67916244a8aec901 100644 --- a/src/cpu/x86/vm/vm_version_x86.cpp +++ b/src/cpu/x86/vm/vm_version_x86.cpp @@ -489,8 +489,8 @@ void VM_Version::get_processor_features() { } // The AES intrinsic stubs require AES instruction support (of course) - // but also require AVX and sse3 modes for instructions it use. - if (UseAES && (UseAVX > 0) && (UseSSE > 2)) { + // but also require sse3 mode for instructions it use. + if (UseAES && (UseSSE > 2)) { if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { UseAESIntrinsics = true; } diff --git a/src/cpu/zero/vm/assembler_zero.cpp b/src/cpu/zero/vm/assembler_zero.cpp index e37938c1fa73fdce9a101f8c327655a843bc035c..d70a2adc746eec7fa3bb61fe9a055fa19bbab8cb 100644 --- a/src/cpu/zero/vm/assembler_zero.cpp +++ b/src/cpu/zero/vm/assembler_zero.cpp @@ -56,15 +56,9 @@ void Assembler::pd_patch_instruction(address branch, address target) { ShouldNotCallThis(); } -#ifndef PRODUCT -void Assembler::pd_print_patched_instruction(address branch) { - ShouldNotCallThis(); -} -#endif // PRODUCT - void MacroAssembler::align(int modulus) { while (offset() % modulus != 0) - emit_byte(AbstractAssembler::code_fill_byte()); + emit_int8(AbstractAssembler::code_fill_byte()); } void MacroAssembler::bang_stack_with_offset(int offset) { @@ -72,8 +66,7 @@ void MacroAssembler::bang_stack_with_offset(int offset) { } void MacroAssembler::advance(int bytes) { - _code_pos += bytes; - sync(); + code_section()->set_end(code_section()->end() + bytes); } RegisterOrConstant MacroAssembler::delayed_value_impl( diff --git a/src/cpu/zero/vm/assembler_zero.hpp b/src/cpu/zero/vm/assembler_zero.hpp index 724153982ac2e7601f3d840bcceb2eba66823480..f9564a54c0ac5acbd9908947d4de7e3084262177 100644 --- a/src/cpu/zero/vm/assembler_zero.hpp +++ b/src/cpu/zero/vm/assembler_zero.hpp @@ -37,9 +37,6 @@ class Assembler : public AbstractAssembler { public: void pd_patch_instruction(address branch, address target); -#ifndef PRODUCT - static void pd_print_patched_instruction(address branch); -#endif // PRODUCT }; class MacroAssembler : public Assembler { diff --git a/src/os/posix/vm/os_posix.cpp b/src/os/posix/vm/os_posix.cpp index cbffa3279af40546a263fefce488d879966fcf49..af75b3b362d8156ee770fd635f065474e50be648 100644 --- a/src/os/posix/vm/os_posix.cpp +++ b/src/os/posix/vm/os_posix.cpp @@ -93,6 +93,47 @@ void os::wait_for_keypress_at_exit(void) { return; } +// Multiple threads can race in this code, and can remap over each other with MAP_FIXED, +// so on posix, unmap the section at the start and at the end of the chunk that we mapped +// rather than unmapping and remapping the whole chunk to get requested alignment. +char* os::reserve_memory_aligned(size_t size, size_t alignment) { + assert((alignment & (os::vm_allocation_granularity() - 1)) == 0, + "Alignment must be a multiple of allocation granularity (page size)"); + assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned"); + + size_t extra_size = size + alignment; + assert(extra_size >= size, "overflow, size is too large to allow alignment"); + + char* extra_base = os::reserve_memory(extra_size, NULL, alignment); + + if (extra_base == NULL) { + return NULL; + } + + // Do manual alignment + char* aligned_base = (char*) align_size_up((uintptr_t) extra_base, alignment); + + // [ | | ] + // ^ extra_base + // ^ extra_base + begin_offset == aligned_base + // extra_base + begin_offset + size ^ + // extra_base + extra_size ^ + // |<>| == begin_offset + // end_offset == |<>| + size_t begin_offset = aligned_base - extra_base; + size_t end_offset = (extra_base + extra_size) - (aligned_base + size); + + if (begin_offset > 0) { + os::release_memory(extra_base, begin_offset); + } + + if (end_offset > 0) { + os::release_memory(extra_base + begin_offset + size, end_offset); + } + + return aligned_base; +} + void os::Posix::print_load_average(outputStream* st) { st->print("load average:"); double loadavg[3]; diff --git a/src/os/windows/vm/os_windows.cpp b/src/os/windows/vm/os_windows.cpp index 2881bd519466427dab6603ae0334a0d1c7461f15..b8498d5a1e26ca28da021240c17671dc524b2ba3 100644 --- a/src/os/windows/vm/os_windows.cpp +++ b/src/os/windows/vm/os_windows.cpp @@ -2895,6 +2895,36 @@ void os::pd_split_reserved_memory(char *base, size_t size, size_t split, } } +// Multiple threads can race in this code but it's not possible to unmap small sections of +// virtual space to get requested alignment, like posix-like os's. +// Windows prevents multiple thread from remapping over each other so this loop is thread-safe. +char* os::reserve_memory_aligned(size_t size, size_t alignment) { + assert((alignment & (os::vm_allocation_granularity() - 1)) == 0, + "Alignment must be a multiple of allocation granularity (page size)"); + assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned"); + + size_t extra_size = size + alignment; + assert(extra_size >= size, "overflow, size is too large to allow alignment"); + + char* aligned_base = NULL; + + do { + char* extra_base = os::reserve_memory(extra_size, NULL, alignment); + if (extra_base == NULL) { + return NULL; + } + // Do manual alignment + aligned_base = (char*) align_size_up((uintptr_t) extra_base, alignment); + + os::release_memory(extra_base, extra_size); + + aligned_base = os::reserve_memory(size, aligned_base); + + } while (aligned_base == NULL); + + return aligned_base; +} + char* os::pd_reserve_memory(size_t bytes, char* addr, size_t alignment_hint) { assert((size_t)addr % os::vm_allocation_granularity() == 0, "reserve alignment"); diff --git a/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp b/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp index ed25848bca81d3a83faae9453e8eb96437a41d34..203da611c01850fb1a01742f26f6e54e5e8a1161 100644 --- a/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp +++ b/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp @@ -116,7 +116,7 @@ void MacroAssembler::get_thread(Register thread) { ThreadLocalStorage::pd_tlsAccessMode tlsMode = ThreadLocalStorage::pd_getTlsAccessMode (); if (tlsMode == ThreadLocalStorage::pd_tlsAccessIndirect) { // T1 // Use thread as a temporary: mov r, gs:[0]; mov r, [r+tlsOffset] - emit_byte (segment); + emit_int8 (segment); // ExternalAddress doesn't work because it can't take NULL AddressLiteral null(0, relocInfo::none); movptr (thread, null); @@ -125,7 +125,7 @@ void MacroAssembler::get_thread(Register thread) { } else if (tlsMode == ThreadLocalStorage::pd_tlsAccessDirect) { // T2 // mov r, gs:[tlsOffset] - emit_byte (segment); + emit_int8 (segment); AddressLiteral tls_off((address)ThreadLocalStorage::pd_getTlsOffset(), relocInfo::none); movptr (thread, tls_off); return ; diff --git a/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp b/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp index ce629e20762ac9797e18e28ba8c7e3f102623820..ce48421c6d4469b53ef006b9b8c0d6758d32212f 100644 --- a/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp +++ b/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp @@ -30,7 +30,7 @@ void MacroAssembler::int3() { - emit_byte(0xCC); + emit_int8((unsigned char)0xCC); } #ifndef _LP64 diff --git a/src/share/vm/asm/assembler.cpp b/src/share/vm/asm/assembler.cpp index 6a34989ec7b66b12a01693ac06de1f8679533afe..669f49039fc346102a328f35f1ff750fa31ca606 100644 --- a/src/share/vm/asm/assembler.cpp +++ b/src/share/vm/asm/assembler.cpp @@ -109,37 +109,6 @@ void AbstractAssembler::flush() { ICache::invalidate_range(addr_at(0), offset()); } - -void AbstractAssembler::a_byte(int x) { - emit_byte(x); -} - - -void AbstractAssembler::a_long(jint x) { - emit_long(x); -} - -// Labels refer to positions in the (to be) generated code. There are bound -// and unbound -// -// Bound labels refer to known positions in the already generated code. -// offset() is the position the label refers to. -// -// Unbound labels refer to unknown positions in the code to be generated; it -// may contain a list of unresolved displacements that refer to it -#ifndef PRODUCT -void AbstractAssembler::print(Label& L) { - if (L.is_bound()) { - tty->print_cr("bound label to %d|%d", L.loc_pos(), L.loc_sect()); - } else if (L.is_unbound()) { - L.print_instructions((MacroAssembler*)this); - } else { - tty->print_cr("label in inconsistent state (loc = %d)", L.loc()); - } -} -#endif // PRODUCT - - void AbstractAssembler::bind(Label& L) { if (L.is_bound()) { // Assembler can bind a label more than once to the same place. @@ -342,28 +311,3 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) { #endif return offset < 0 || os::vm_page_size() <= offset; } - -#ifndef PRODUCT -void Label::print_instructions(MacroAssembler* masm) const { - CodeBuffer* cb = masm->code(); - for (int i = 0; i < _patch_index; ++i) { - int branch_loc; - if (i >= PatchCacheSize) { - branch_loc = _patch_overflow->at(i - PatchCacheSize); - } else { - branch_loc = _patches[i]; - } - int branch_pos = CodeBuffer::locator_pos(branch_loc); - int branch_sect = CodeBuffer::locator_sect(branch_loc); - address branch = cb->locator_address(branch_loc); - tty->print_cr("unbound label"); - tty->print("@ %d|%d ", branch_pos, branch_sect); - if (branch_sect == CodeBuffer::SECT_CONSTS) { - tty->print_cr(PTR_FORMAT, *(address*)branch); - continue; - } - masm->pd_print_patched_instruction(branch); - tty->cr(); - } -} -#endif // ndef PRODUCT diff --git a/src/share/vm/asm/assembler.hpp b/src/share/vm/asm/assembler.hpp index 5de33dd503edd959f7ffe343bc46488843606065..ca26ea5bba9f34ba92aa004d2abb0f7b8930937d 100644 --- a/src/share/vm/asm/assembler.hpp +++ b/src/share/vm/asm/assembler.hpp @@ -216,17 +216,6 @@ class AbstractAssembler : public ResourceObj { bool isByte(int x) const { return 0 <= x && x < 0x100; } bool isShiftCount(int x) const { return 0 <= x && x < 32; } - void emit_int8( int8_t x) { code_section()->emit_int8( x); } - void emit_int16( int16_t x) { code_section()->emit_int16( x); } - void emit_int32( int32_t x) { code_section()->emit_int32( x); } - void emit_int64( int64_t x) { code_section()->emit_int64( x); } - - void emit_float( jfloat x) { code_section()->emit_float( x); } - void emit_double( jdouble x) { code_section()->emit_double( x); } - void emit_address(address x) { code_section()->emit_address(x); } - - void emit_byte(int x) { emit_int8 (x); } // deprecated - void emit_word(int x) { emit_int16(x); } // deprecated void emit_long(jint x) { emit_int32(x); } // deprecated // Instruction boundaries (required when emitting relocatable values). @@ -277,9 +266,6 @@ class AbstractAssembler : public ResourceObj { }; #endif - // Label functions - void print(Label& L); - public: // Creation @@ -288,6 +274,15 @@ class AbstractAssembler : public ResourceObj { // ensure buf contains all code (call this before using/copying the code) void flush(); + void emit_int8( int8_t x) { code_section()->emit_int8( x); } + void emit_int16( int16_t x) { code_section()->emit_int16( x); } + void emit_int32( int32_t x) { code_section()->emit_int32( x); } + void emit_int64( int64_t x) { code_section()->emit_int64( x); } + + void emit_float( jfloat x) { code_section()->emit_float( x); } + void emit_double( jdouble x) { code_section()->emit_double( x); } + void emit_address(address x) { code_section()->emit_address(x); } + // min and max values for signed immediate ranges static int min_simm(int nbits) { return -(intptr_t(1) << (nbits - 1)) ; } static int max_simm(int nbits) { return (intptr_t(1) << (nbits - 1)) - 1; } @@ -327,8 +322,6 @@ class AbstractAssembler : public ResourceObj { void clear_inst_mark() { code_section()->clear_mark(); } // Constants in code - void a_byte(int x); - void a_long(jint x); void relocate(RelocationHolder const& rspec, int format = 0) { assert(!pd_check_instruction_mark() || inst_mark() == NULL || inst_mark() == code_section()->end(), @@ -441,15 +434,6 @@ class AbstractAssembler : public ResourceObj { */ void pd_patch_instruction(address branch, address target); -#ifndef PRODUCT - /** - * Platform-dependent method of printing an instruction that needs to be - * patched. - * - * @param branch the instruction to be patched in the buffer. - */ - static void pd_print_patched_instruction(address branch); -#endif // PRODUCT }; #ifdef TARGET_ARCH_x86 diff --git a/src/share/vm/c1/c1_GraphBuilder.cpp b/src/share/vm/c1/c1_GraphBuilder.cpp index 70b69157530a278ff747d0f6cbcb597a5450da1b..85c3aa1c9a9e84ce411566ac1e4e85420689f434 100644 --- a/src/share/vm/c1/c1_GraphBuilder.cpp +++ b/src/share/vm/c1/c1_GraphBuilder.cpp @@ -3442,6 +3442,11 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) { preserves_state = true; break; + case vmIntrinsics::_loadFence : + case vmIntrinsics::_storeFence: + case vmIntrinsics::_fullFence : + break; + default : return false; // do not inline } // create intrinsic node diff --git a/src/share/vm/c1/c1_LIRGenerator.cpp b/src/share/vm/c1/c1_LIRGenerator.cpp index cf865f1b557e378d5e2cd08d2e27b8f763241792..65bc34e3de65d867422946a33e39f6fa28b7f77f 100644 --- a/src/share/vm/c1/c1_LIRGenerator.cpp +++ b/src/share/vm/c1/c1_LIRGenerator.cpp @@ -2977,6 +2977,16 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) { do_CompareAndSwap(x, longType); break; + case vmIntrinsics::_loadFence : + if (os::is_MP()) __ membar_acquire(); + break; + case vmIntrinsics::_storeFence: + if (os::is_MP()) __ membar_release(); + break; + case vmIntrinsics::_fullFence : + if (os::is_MP()) __ membar(); + break; + case vmIntrinsics::_Reference_get: do_Reference_get(x); break; diff --git a/src/share/vm/ci/ciField.cpp b/src/share/vm/ci/ciField.cpp index 5cfa47b277a2d385e2aeb5949449cf656e6a11fd..fe967554c3a62458a694bcb5bdb6c81df0425207 100644 --- a/src/share/vm/ci/ciField.cpp +++ b/src/share/vm/ci/ciField.cpp @@ -366,10 +366,12 @@ bool ciField::will_link(ciInstanceKlass* accessing_klass, // ------------------------------------------------------------------ // ciField::print void ciField::print() { - tty->print("print("print_symbol(); + tty->print(" signature="); + _signature->print_symbol(); tty->print(" offset=%d type=", _offset); if (_type != NULL) _type->print_name(); else tty->print("(reference)"); diff --git a/src/share/vm/classfile/classLoaderData.cpp b/src/share/vm/classfile/classLoaderData.cpp index 41e6815dbd02b866ba42496f447d84aa5e9662a3..eaf829738d0a15f897f1c307d4062e2c4daa5c20 100644 --- a/src/share/vm/classfile/classLoaderData.cpp +++ b/src/share/vm/classfile/classLoaderData.cpp @@ -169,16 +169,18 @@ void ClassLoaderData::add_dependency(Handle dependency, TRAPS) { ok = (objArrayOop)ok->obj_at(1); } + // Must handle over GC points + assert (last != NULL, "dependencies should be initialized"); + objArrayHandle last_handle(THREAD, last); + // Create a new dependency node with fields for (class_loader or mirror, next) objArrayOop deps = oopFactory::new_objectArray(2, CHECK); deps->obj_at_put(0, dependency()); - // Must handle over more GC points + // Must handle over GC points objArrayHandle new_dependency(THREAD, deps); // Add the dependency under lock - assert (last != NULL, "dependencies should be initialized"); - objArrayHandle last_handle(THREAD, last); locked_add_dependency(last_handle, new_dependency); } diff --git a/src/share/vm/classfile/javaClasses.cpp b/src/share/vm/classfile/javaClasses.cpp index 79a35008575e8274e04dbcc6e647912bd2b6dc32..9c2caaa7397fc17f5ac33a9ecd56feda8aecdbd7 100644 --- a/src/share/vm/classfile/javaClasses.cpp +++ b/src/share/vm/classfile/javaClasses.cpp @@ -327,14 +327,14 @@ jchar* java_lang_String::as_unicode_string(oop java_string, int& length) { return result; } -unsigned int java_lang_String::to_hash(oop java_string) { +unsigned int java_lang_String::hash_code(oop java_string) { int length = java_lang_String::length(java_string); - // Zero length string will hash to zero with String.toHash() function. + // Zero length string will hash to zero with String.hashCode() function. if (length == 0) return 0; typeArrayOop value = java_lang_String::value(java_string); int offset = java_lang_String::offset(java_string); - return java_lang_String::to_hash(value->char_at_addr(offset), length); + return java_lang_String::hash_code(value->char_at_addr(offset), length); } char* java_lang_String::as_quoted_ascii(oop java_string) { diff --git a/src/share/vm/classfile/javaClasses.hpp b/src/share/vm/classfile/javaClasses.hpp index 1a4b51f59978d948be975d4286bd91c84c329ff0..00ce4576499d3b8c9f44c24f54a185f86027aee7 100644 --- a/src/share/vm/classfile/javaClasses.hpp +++ b/src/share/vm/classfile/javaClasses.hpp @@ -166,8 +166,8 @@ class java_lang_String : AllStatic { // objects in the shared archive file. // hash P(31) from Kernighan & Ritchie // - // For this reason, THIS ALGORITHM MUST MATCH String.toHash(). - template static unsigned int to_hash(T* s, int len) { + // For this reason, THIS ALGORITHM MUST MATCH String.hashCode(). + template static unsigned int hash_code(T* s, int len) { unsigned int h = 0; while (len-- > 0) { h = 31*h + (unsigned int) *s; @@ -175,10 +175,10 @@ class java_lang_String : AllStatic { } return h; } - static unsigned int to_hash(oop java_string); + static unsigned int hash_code(oop java_string); // This is the string hash code used by the StringTable, which may be - // the same as String.toHash or an alternate hash code. + // the same as String.hashCode or an alternate hash code. static unsigned int hash_string(oop java_string); static bool equals(oop java_string, jchar* chars, int len); diff --git a/src/share/vm/classfile/symbolTable.cpp b/src/share/vm/classfile/symbolTable.cpp index 3c93662052124ad904d04cd53f47c0b26c38dc41..87de232d7a509f483219e6e95a44ff5f1f91298a 100644 --- a/src/share/vm/classfile/symbolTable.cpp +++ b/src/share/vm/classfile/symbolTable.cpp @@ -179,7 +179,7 @@ Symbol* SymbolTable::lookup(int index, const char* name, unsigned int SymbolTable::hash_symbol(const char* s, int len) { return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), (const jbyte*)s, len) : - java_lang_String::to_hash(s, len); + java_lang_String::hash_code(s, len); } @@ -617,7 +617,7 @@ bool StringTable::_needs_rehashing = false; // Pick hashing algorithm unsigned int StringTable::hash_string(const jchar* s, int len) { return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) : - java_lang_String::to_hash(s, len); + java_lang_String::hash_code(s, len); } oop StringTable::lookup(int index, jchar* name, diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp index 5c9b6327eaaa5b24f638abb369dab6d36bacf049..e5425b4f24637700444a83ea1cfbe3628a922b26 100644 --- a/src/share/vm/classfile/vmSymbols.hpp +++ b/src/share/vm/classfile/vmSymbols.hpp @@ -761,6 +761,15 @@ do_intrinsic(_unpark, sun_misc_Unsafe, unpark_name, unpark_signature, F_RN) \ do_name( unpark_name, "unpark") \ do_alias( unpark_signature, /*(LObject;)V*/ object_void_signature) \ + do_intrinsic(_loadFence, sun_misc_Unsafe, loadFence_name, loadFence_signature, F_RN) \ + do_name( loadFence_name, "loadFence") \ + do_alias( loadFence_signature, void_method_signature) \ + do_intrinsic(_storeFence, sun_misc_Unsafe, storeFence_name, storeFence_signature, F_RN) \ + do_name( storeFence_name, "storeFence") \ + do_alias( storeFence_signature, void_method_signature) \ + do_intrinsic(_fullFence, sun_misc_Unsafe, fullFence_name, fullFence_signature, F_RN) \ + do_name( fullFence_name, "fullFence") \ + do_alias( fullFence_signature, void_method_signature) \ \ /* unsafe memory references (there are a lot of them...) */ \ do_signature(getObject_signature, "(Ljava/lang/Object;J)Ljava/lang/Object;") \ @@ -902,12 +911,14 @@ do_intrinsic(_getAndAddLong, sun_misc_Unsafe, getAndAddLong_name, getAndAddLong_signature, F_R) \ do_name( getAndAddLong_name, "getAndAddLong") \ do_signature(getAndAddLong_signature, "(Ljava/lang/Object;JJ)J" ) \ - do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSet_name, getAndSetInt_signature, F_R) \ - do_name( getAndSet_name, "getAndSet") \ + do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSetInt_name, getAndSetInt_signature, F_R) \ + do_name( getAndSetInt_name, "getAndSetInt") \ do_alias( getAndSetInt_signature, /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature) \ - do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSet_name, getAndSetLong_signature, F_R) \ + do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSetLong_name, getAndSetLong_signature, F_R) \ + do_name( getAndSetLong_name, "getAndSetLong") \ do_alias( getAndSetLong_signature, /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature) \ - do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSet_name, getAndSetObject_signature, F_R) \ + do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSetObject_name, getAndSetObject_signature, F_R)\ + do_name( getAndSetObject_name, "getAndSetObject") \ do_signature(getAndSetObject_signature, "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \ \ /* prefetch_signature is shared by all prefetch variants */ \ diff --git a/src/share/vm/compiler/compilerOracle.cpp b/src/share/vm/compiler/compilerOracle.cpp index 7e620cec9fd7fc1a2526b57c2df6a2c581fe38d2..73e348bc97c4f09e46c1afe87aabb13fe871be49 100644 --- a/src/share/vm/compiler/compilerOracle.cpp +++ b/src/share/vm/compiler/compilerOracle.cpp @@ -538,6 +538,7 @@ void CompilerOracle::parse_from_line(char* line) { if (match != NULL) { if (!_quiet) { + ResourceMark rm; tty->print("CompilerOracle: %s ", command_names[command]); match->print(); } diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/src/share/vm/gc_implementation/g1/concurrentMark.cpp index d3dc1eb7bb73f169b439721ee45949584babc9f0..e03b2f41cfc4c072515104f99cc104f0b4507c2e 100644 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp @@ -46,27 +46,11 @@ // Concurrent marking bit map wrapper -CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : - _bm((uintptr_t*)NULL,0), +CMBitMapRO::CMBitMapRO(int shifter) : + _bm(), _shifter(shifter) { - _bmStartWord = (HeapWord*)(rs.base()); - _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes - ReservedSpace brs(ReservedSpace::allocation_align_size_up( - (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); - - MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); - - guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); - // For now we'll just commit all of the bit map up fromt. - // Later on we'll try to be more parsimonious with swap. - guarantee(_virtual_space.initialize(brs, brs.size()), - "couldn't reseve backing store for concurrent marking bit map"); - assert(_virtual_space.committed_size() == brs.size(), - "didn't reserve backing store for all of concurrent marking bit map?"); - _bm.set_map((uintptr_t*)_virtual_space.low()); - assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= - _bmWordSize, "inconsistency in bit map sizing"); - _bm.set_size(_bmWordSize >> _shifter); + _bmStartWord = 0; + _bmWordSize = 0; } HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, @@ -108,15 +92,40 @@ int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { } #ifndef PRODUCT -bool CMBitMapRO::covers(ReservedSpace rs) const { +bool CMBitMapRO::covers(ReservedSpace heap_rs) const { // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, "size inconsistency"); - return _bmStartWord == (HeapWord*)(rs.base()) && - _bmWordSize == rs.size()>>LogHeapWordSize; + return _bmStartWord == (HeapWord*)(heap_rs.base()) && + _bmWordSize == heap_rs.size()>>LogHeapWordSize; } #endif +bool CMBitMap::allocate(ReservedSpace heap_rs) { + _bmStartWord = (HeapWord*)(heap_rs.base()); + _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes + ReservedSpace brs(ReservedSpace::allocation_align_size_up( + (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); + if (!brs.is_reserved()) { + warning("ConcurrentMark marking bit map allocation failure"); + return false; + } + MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); + // For now we'll just commit all of the bit map up front. + // Later on we'll try to be more parsimonious with swap. + if (!_virtual_space.initialize(brs, brs.size())) { + warning("ConcurrentMark marking bit map backing store failure"); + return false; + } + assert(_virtual_space.committed_size() == brs.size(), + "didn't reserve backing store for all of concurrent marking bit map?"); + _bm.set_map((uintptr_t*)_virtual_space.low()); + assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= + _bmWordSize, "inconsistency in bit map sizing"); + _bm.set_size(_bmWordSize >> _shifter); + return true; +} + void CMBitMap::clearAll() { _bm.clear(); return; @@ -163,20 +172,79 @@ CMMarkStack::CMMarkStack(ConcurrentMark* cm) : #endif {} -void CMMarkStack::allocate(size_t size) { - _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); - if (_base == NULL) { - vm_exit_during_initialization("Failed to allocate CM region mark stack"); +bool CMMarkStack::allocate(size_t capacity) { + // allocate a stack of the requisite depth + ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); + if (!rs.is_reserved()) { + warning("ConcurrentMark MarkStack allocation failure"); + return false; } - _index = 0; - _capacity = (jint) size; + MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); + if (!_virtual_space.initialize(rs, rs.size())) { + warning("ConcurrentMark MarkStack backing store failure"); + // Release the virtual memory reserved for the marking stack + rs.release(); + return false; + } + assert(_virtual_space.committed_size() == rs.size(), + "Didn't reserve backing store for all of ConcurrentMark stack?"); + _base = (oop*) _virtual_space.low(); + setEmpty(); + _capacity = (jint) capacity; _saved_index = -1; NOT_PRODUCT(_max_depth = 0); + return true; +} + +void CMMarkStack::expand() { + // Called, during remark, if we've overflown the marking stack during marking. + assert(isEmpty(), "stack should been emptied while handling overflow"); + assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); + // Clear expansion flag + _should_expand = false; + if (_capacity == (jint) MarkStackSizeMax) { + if (PrintGCDetails && Verbose) { + gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); + } + return; + } + // Double capacity if possible + jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); + // Do not give up existing stack until we have managed to + // get the double capacity that we desired. + ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * + sizeof(oop))); + if (rs.is_reserved()) { + // Release the backing store associated with old stack + _virtual_space.release(); + // Reinitialize virtual space for new stack + if (!_virtual_space.initialize(rs, rs.size())) { + fatal("Not enough swap for expanded marking stack capacity"); + } + _base = (oop*)(_virtual_space.low()); + _index = 0; + _capacity = new_capacity; + } else { + if (PrintGCDetails && Verbose) { + // Failed to double capacity, continue; + gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " + SIZE_FORMAT"K to " SIZE_FORMAT"K", + _capacity / K, new_capacity / K); + } + } +} + +void CMMarkStack::set_should_expand() { + // If we're resetting the marking state because of an + // marking stack overflow, record that we should, if + // possible, expand the stack. + _should_expand = _cm->has_overflown(); } CMMarkStack::~CMMarkStack() { if (_base != NULL) { - FREE_C_HEAP_ARRAY(oop, _base, mtGC); + _base = NULL; + _virtual_space.release(); } } @@ -217,7 +285,7 @@ void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { jint res = Atomic::cmpxchg(next_index, &_index, index); if (res == index) { for (int i = 0; i < n; i++) { - int ind = index + i; + int ind = index + i; assert(ind < _capacity, "By overflow test above."); _base[ind] = ptr_arr[i]; } @@ -228,7 +296,6 @@ void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { } } - void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); jint start = _index; @@ -244,9 +311,9 @@ void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { assert(ind < _capacity, "By overflow test above."); _base[ind] = ptr_arr[i]; } + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); } - bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); jint index = _index; @@ -255,7 +322,7 @@ bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { return false; } else { int k = MIN2(max, index); - jint new_ind = index - k; + jint new_ind = index - k; for (int j = 0; j < k; j++) { ptr_arr[j] = _base[new_ind + j]; } @@ -404,9 +471,10 @@ uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { return MAX2((n_par_threads + 2) / 4, 1U); } -ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : - _markBitMap1(rs, MinObjAlignment - 1), - _markBitMap2(rs, MinObjAlignment - 1), +ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) : + _g1h(g1h), + _markBitMap1(MinObjAlignment - 1), + _markBitMap2(MinObjAlignment - 1), _parallel_marking_threads(0), _max_parallel_marking_threads(0), @@ -415,10 +483,10 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : _cleanup_sleep_factor(0.0), _cleanup_task_overhead(1.0), _cleanup_list("Cleanup List"), - _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), - _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> - CardTableModRefBS::card_shift, - false /* in_resource_area*/), + _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), + _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >> + CardTableModRefBS::card_shift, + false /* in_resource_area*/), _prevMarkBitMap(&_markBitMap1), _nextMarkBitMap(&_markBitMap2), @@ -449,7 +517,8 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : _parallel_workers(NULL), _count_card_bitmaps(NULL), - _count_marked_bytes(NULL) { + _count_marked_bytes(NULL), + _completed_initialization(false) { CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; if (verbose_level < no_verbose) { verbose_level = no_verbose; @@ -464,61 +533,34 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : "heap end = "PTR_FORMAT, _heap_start, _heap_end); } - _markStack.allocate(MarkStackSize); + if (!_markBitMap1.allocate(heap_rs)) { + warning("Failed to allocate first CM bit map"); + return; + } + if (!_markBitMap2.allocate(heap_rs)) { + warning("Failed to allocate second CM bit map"); + return; + } // Create & start a ConcurrentMark thread. _cmThread = new ConcurrentMarkThread(this); assert(cmThread() != NULL, "CM Thread should have been created"); assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); - _g1h = G1CollectedHeap::heap(); assert(CGC_lock != NULL, "Where's the CGC_lock?"); - assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); - assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); + assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency"); + assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency"); SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); satb_qs.set_buffer_size(G1SATBBufferSize); _root_regions.init(_g1h, this); - _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); - _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); - - _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); - _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); - - BitMap::idx_t card_bm_size = _card_bm.size(); - - // so that the assertion in MarkingTaskQueue::task_queue doesn't fail - _active_tasks = _max_worker_id; - for (uint i = 0; i < _max_worker_id; ++i) { - CMTaskQueue* task_queue = new CMTaskQueue(); - task_queue->initialize(); - _task_queues->register_queue(i, task_queue); - - _count_card_bitmaps[i] = BitMap(card_bm_size, false); - _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); - - _tasks[i] = new CMTask(i, this, - _count_marked_bytes[i], - &_count_card_bitmaps[i], - task_queue, _task_queues); - - _accum_task_vtime[i] = 0.0; - } - - // Calculate the card number for the bottom of the heap. Used - // in biasing indexes into the accounting card bitmaps. - _heap_bottom_card_num = - intptr_t(uintptr_t(_g1h->reserved_region().start()) >> - CardTableModRefBS::card_shift); - - // Clear all the liveness counting data - clear_all_count_data(); - if (ConcGCThreads > ParallelGCThreads) { - vm_exit_during_initialization("Can't have more ConcGCThreads " - "than ParallelGCThreads."); + warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") " + "than ParallelGCThreads (" UINT32_FORMAT ").", + ConcGCThreads, ParallelGCThreads); + return; } if (ParallelGCThreads == 0) { // if we are not running with any parallel GC threads we will not @@ -590,9 +632,86 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : } } + if (FLAG_IS_DEFAULT(MarkStackSize)) { + uintx mark_stack_size = + MIN2(MarkStackSizeMax, + MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE))); + // Verify that the calculated value for MarkStackSize is in range. + // It would be nice to use the private utility routine from Arguments. + if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { + warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): " + "must be between " UINTX_FORMAT " and " UINTX_FORMAT, + mark_stack_size, 1, MarkStackSizeMax); + return; + } + FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size); + } else { + // Verify MarkStackSize is in range. + if (FLAG_IS_CMDLINE(MarkStackSize)) { + if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { + if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { + warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): " + "must be between " UINTX_FORMAT " and " UINTX_FORMAT, + MarkStackSize, 1, MarkStackSizeMax); + return; + } + } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { + if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { + warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")" + " or for MarkStackSizeMax (" UINTX_FORMAT ")", + MarkStackSize, MarkStackSizeMax); + return; + } + } + } + } + + if (!_markStack.allocate(MarkStackSize)) { + warning("Failed to allocate CM marking stack"); + return; + } + + _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); + _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); + + _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); + _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); + + BitMap::idx_t card_bm_size = _card_bm.size(); + + // so that the assertion in MarkingTaskQueue::task_queue doesn't fail + _active_tasks = _max_worker_id; + + size_t max_regions = (size_t) _g1h->max_regions(); + for (uint i = 0; i < _max_worker_id; ++i) { + CMTaskQueue* task_queue = new CMTaskQueue(); + task_queue->initialize(); + _task_queues->register_queue(i, task_queue); + + _count_card_bitmaps[i] = BitMap(card_bm_size, false); + _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); + + _tasks[i] = new CMTask(i, this, + _count_marked_bytes[i], + &_count_card_bitmaps[i], + task_queue, _task_queues); + + _accum_task_vtime[i] = 0.0; + } + + // Calculate the card number for the bottom of the heap. Used + // in biasing indexes into the accounting card bitmaps. + _heap_bottom_card_num = + intptr_t(uintptr_t(_g1h->reserved_region().start()) >> + CardTableModRefBS::card_shift); + + // Clear all the liveness counting data + clear_all_count_data(); + // so that the call below can read a sensible value - _heap_start = (HeapWord*) rs.base(); + _heap_start = (HeapWord*) heap_rs.base(); set_non_marking_state(); + _completed_initialization = true; } void ConcurrentMark::update_g1_committed(bool force) { @@ -1165,6 +1284,11 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { assert(!restart_for_overflow(), "sanity"); } + // Expand the marking stack, if we have to and if we can. + if (_markStack.should_expand()) { + _markStack.expand(); + } + // Reset the marking state if marking completed if (!restart_for_overflow()) { set_non_marking_state(); @@ -2785,7 +2909,7 @@ void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, // Verify entries on the task queues for (uint i = 0; i < _max_worker_id; i += 1) { cl.set_phase(VerifyNoCSetOopsQueues, i); - OopTaskQueue* queue = _task_queues->queue(i); + CMTaskQueue* queue = _task_queues->queue(i); queue->oops_do(&cl); } } @@ -2840,8 +2964,8 @@ void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, #endif // PRODUCT void ConcurrentMark::clear_marking_state(bool clear_overflow) { - _markStack.setEmpty(); - _markStack.clear_overflow(); + _markStack.set_should_expand(); + _markStack.setEmpty(); // Also clears the _markStack overflow flag if (clear_overflow) { clear_has_overflown(); } else { @@ -2850,7 +2974,7 @@ void ConcurrentMark::clear_marking_state(bool clear_overflow) { _finger = _heap_start; for (uint i = 0; i < _max_worker_id; ++i) { - OopTaskQueue* queue = _task_queues->queue(i); + CMTaskQueue* queue = _task_queues->queue(i); queue->set_empty(); } } diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/src/share/vm/gc_implementation/g1/concurrentMark.hpp index 5bac7a6b34d5e40686c5437a545cc8dcfc871e40..8089c98294aa352a2715cc930f3c7ebd609f5eb1 100644 --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp @@ -63,7 +63,7 @@ class CMBitMapRO VALUE_OBJ_CLASS_SPEC { public: // constructor - CMBitMapRO(ReservedSpace rs, int shifter); + CMBitMapRO(int shifter); enum { do_yield = true }; @@ -117,8 +117,11 @@ class CMBitMap : public CMBitMapRO { public: // constructor - CMBitMap(ReservedSpace rs, int shifter) : - CMBitMapRO(rs, shifter) {} + CMBitMap(int shifter) : + CMBitMapRO(shifter) {} + + // Allocates the back store for the marking bitmap + bool allocate(ReservedSpace heap_rs); // write marks void mark(HeapWord* addr) { @@ -155,17 +158,18 @@ class CMBitMap : public CMBitMapRO { MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr); }; -// Represents a marking stack used by the CM collector. -// Ideally this should be GrowableArray<> just like MSC's marking stack(s). +// Represents a marking stack used by ConcurrentMarking in the G1 collector. class CMMarkStack VALUE_OBJ_CLASS_SPEC { + VirtualSpace _virtual_space; // Underlying backing store for actual stack ConcurrentMark* _cm; oop* _base; // bottom of stack - jint _index; // one more than last occupied index - jint _capacity; // max #elements - jint _saved_index; // value of _index saved at start of GC - NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run + jint _index; // one more than last occupied index + jint _capacity; // max #elements + jint _saved_index; // value of _index saved at start of GC + NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run - bool _overflow; + bool _overflow; + bool _should_expand; DEBUG_ONLY(bool _drain_in_progress;) DEBUG_ONLY(bool _drain_in_progress_yields;) @@ -173,7 +177,13 @@ class CMMarkStack VALUE_OBJ_CLASS_SPEC { CMMarkStack(ConcurrentMark* cm); ~CMMarkStack(); - void allocate(size_t size); +#ifndef PRODUCT + jint max_depth() const { + return _max_depth; + } +#endif + + bool allocate(size_t capacity); oop pop() { if (!isEmpty()) { @@ -231,11 +241,17 @@ class CMMarkStack VALUE_OBJ_CLASS_SPEC { bool isEmpty() { return _index == 0; } bool isFull() { return _index == _capacity; } - int maxElems() { return _capacity; } + int maxElems() { return _capacity; } bool overflow() { return _overflow; } void clear_overflow() { _overflow = false; } + bool should_expand() const { return _should_expand; } + void set_should_expand(); + + // Expand the stack, typically in response to an overflow condition + void expand(); + int size() { return _index; } void setEmpty() { _index = 0; clear_overflow(); } @@ -344,6 +360,7 @@ public: class ConcurrentMarkThread; class ConcurrentMark: public CHeapObj { + friend class CMMarkStack; friend class ConcurrentMarkThread; friend class CMTask; friend class CMBitMapClosure; @@ -577,6 +594,9 @@ protected: // the card bitmaps. intptr_t _heap_bottom_card_num; + // Set to true when initialization is complete + bool _completed_initialization; + public: // Manipulation of the global mark stack. // Notice that the first mark_stack_push is CAS-based, whereas the @@ -636,7 +656,7 @@ public: return _task_queues->steal(worker_id, hash_seed, obj); } - ConcurrentMark(ReservedSpace rs, uint max_regions); + ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs); ~ConcurrentMark(); ConcurrentMarkThread* cmThread() { return _cmThread; } @@ -907,6 +927,11 @@ public: // Should *not* be called from parallel code. inline bool mark_and_count(oop obj); + // Returns true if initialization was successfully completed. + bool completed_initialization() const { + return _completed_initialization; + } + protected: // Clear all the per-task bitmaps and arrays used to store the // counting data. diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 70b45fea4f09680cc2d74062f75c6a517022f6e7..448c5707787cad1e789605d3f51c6c44ae0ea2f5 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -2079,7 +2079,11 @@ jint G1CollectedHeap::initialize() { // Create the ConcurrentMark data structure and thread. // (Must do this late, so that "max_regions" is defined.) - _cm = new ConcurrentMark(heap_rs, max_regions()); + _cm = new ConcurrentMark(this, heap_rs); + if (_cm == NULL || !_cm->completed_initialization()) { + vm_shutdown_during_initialization("Could not create/initialize ConcurrentMark"); + return JNI_ENOMEM; + } _cmThread = _cm->cmThread(); // Initialize the from_card cache structure of HeapRegionRemSet. @@ -2087,7 +2091,7 @@ jint G1CollectedHeap::initialize() { // Now expand into the initial heap size. if (!expand(init_byte_size)) { - vm_exit_during_initialization("Failed to allocate initial heap."); + vm_shutdown_during_initialization("Failed to allocate initial heap."); return JNI_ENOMEM; } diff --git a/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp b/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp index e21f7a7a6e443dad2ea0282129170edc82f58d91..e5ca1f5c097bba5582ad020de0d2ed1db7aa448e 100644 --- a/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp +++ b/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "gc_implementation/parallelScavenge/adjoiningVirtualSpaces.hpp" +#include "memory/allocation.inline.hpp" #include "runtime/java.hpp" AdjoiningVirtualSpaces::AdjoiningVirtualSpaces(ReservedSpace rs, diff --git a/src/share/vm/gc_implementation/shared/gcStats.cpp b/src/share/vm/gc_implementation/shared/gcStats.cpp index faef280f632d4b61d8b2644ce07a2e890a8d10bf..745f8f3ab65eb60ca973d8209715c49d6f6f5e24 100644 --- a/src/share/vm/gc_implementation/shared/gcStats.cpp +++ b/src/share/vm/gc_implementation/shared/gcStats.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "gc_implementation/shared/gcStats.hpp" #include "gc_implementation/shared/gcUtil.hpp" +#include "memory/allocation.inline.hpp" GCStats::GCStats() { _avg_promoted = new AdaptivePaddedNoZeroDevAverage( diff --git a/src/share/vm/memory/allocation.hpp b/src/share/vm/memory/allocation.hpp index 8bf63ac72f64c17141f39c319b8808b5ff15e67e..3cade72c2603ff3c4c03a980258b138bd9f0b429 100644 --- a/src/share/vm/memory/allocation.hpp +++ b/src/share/vm/memory/allocation.hpp @@ -202,7 +202,7 @@ template class CHeapObj ALLOCATION_SUPER_CLASS_SPEC { // Calling new or delete will result in fatal error. class StackObj ALLOCATION_SUPER_CLASS_SPEC { - public: + private: void* operator new(size_t size); void operator delete(void* p); }; @@ -226,7 +226,7 @@ class StackObj ALLOCATION_SUPER_CLASS_SPEC { // be defined as a an empty string "". // class _ValueObj { - public: + private: void* operator new(size_t size); void operator delete(void* p); }; diff --git a/src/share/vm/memory/metaspace.cpp b/src/share/vm/memory/metaspace.cpp index 42870cd0fda3b8022a59358decd6a226e50f1230..6c0b4210faa55b25ca01aeb20d04b215b2892ba9 100644 --- a/src/share/vm/memory/metaspace.cpp +++ b/src/share/vm/memory/metaspace.cpp @@ -2192,11 +2192,6 @@ void SpaceManager::mangle_freed_chunks() { // MetaspaceAux -size_t MetaspaceAux::used_in_bytes() { - return (Metaspace::class_space_list()->used_words_sum() + - Metaspace::space_list()->used_words_sum()) * BytesPerWord; -} - size_t MetaspaceAux::used_in_bytes(Metaspace::MetadataType mdtype) { size_t used = 0; ClassLoaderDataGraphMetaspaceIterator iter; @@ -2222,14 +2217,6 @@ size_t MetaspaceAux::free_in_bytes(Metaspace::MetadataType mdtype) { return free * BytesPerWord; } -// The total words available for metadata allocation. This -// uses Metaspace capacity_words() which is the total words -// in chunks allocated for a Metaspace. -size_t MetaspaceAux::capacity_in_bytes() { - return (Metaspace::class_space_list()->capacity_words_sum() + - Metaspace::space_list()->capacity_words_sum()) * BytesPerWord; -} - size_t MetaspaceAux::capacity_in_bytes(Metaspace::MetadataType mdtype) { size_t capacity = free_chunks_total(mdtype); ClassLoaderDataGraphMetaspaceIterator iter; @@ -2242,11 +2229,6 @@ size_t MetaspaceAux::capacity_in_bytes(Metaspace::MetadataType mdtype) { return capacity * BytesPerWord; } -size_t MetaspaceAux::reserved_in_bytes() { - return (Metaspace::class_space_list()->virtual_space_total() + - Metaspace::space_list()->virtual_space_total()) * BytesPerWord; -} - size_t MetaspaceAux::reserved_in_bytes(Metaspace::MetadataType mdtype) { size_t reserved = (mdtype == Metaspace::ClassType) ? Metaspace::class_space_list()->virtual_space_total() : diff --git a/src/share/vm/memory/metaspace.hpp b/src/share/vm/memory/metaspace.hpp index 993419436bba731c1601f11177e9ba2d8c9d8133..bfc6d90ae6588d5c0467a9917922eca646910752 100644 --- a/src/share/vm/memory/metaspace.hpp +++ b/src/share/vm/memory/metaspace.hpp @@ -156,16 +156,25 @@ class MetaspaceAux : AllStatic { public: // Total of space allocated to metadata in all Metaspaces - static size_t used_in_bytes(); + static size_t used_in_bytes() { + return used_in_bytes(Metaspace::ClassType) + + used_in_bytes(Metaspace::NonClassType); + } // Total of available space in all Metaspaces // Total of capacity allocated to all Metaspaces. This includes // space in Metachunks not yet allocated and in the Metachunk // freelist. - static size_t capacity_in_bytes(); + static size_t capacity_in_bytes() { + return capacity_in_bytes(Metaspace::ClassType) + + capacity_in_bytes(Metaspace::NonClassType); + } // Total space reserved in all Metaspaces - static size_t reserved_in_bytes(); + static size_t reserved_in_bytes() { + return reserved_in_bytes(Metaspace::ClassType) + + reserved_in_bytes(Metaspace::NonClassType); + } static size_t min_chunk_size(); diff --git a/src/share/vm/oops/constMethod.hpp b/src/share/vm/oops/constMethod.hpp index 771bf7b169c89ea3aa89385c09adc4e7c1597067..2819b3e7756fa301472ca4537d178db4d9368696 100644 --- a/src/share/vm/oops/constMethod.hpp +++ b/src/share/vm/oops/constMethod.hpp @@ -46,6 +46,7 @@ // | interp_kind | flags | code_size | // | name index | signature index | // | method_idnum | max_stack | +// | max_locals | size_of_parameters | // |------------------------------------------------------| // | | // | byte codes | @@ -150,7 +151,8 @@ private: // initially corresponds to the index into the methods array. // but this may change with redefinition u2 _max_stack; // Maximum number of entries on the expression stack - + u2 _max_locals; // Number of local variables used by this method + u2 _size_of_parameters; // size of the parameter block (receiver + arguments) in words // Constructor ConstMethod(int byte_code_size, @@ -338,6 +340,11 @@ public: static ByteSize max_stack_offset() { return byte_offset_of(ConstMethod, _max_stack); } + static ByteSize size_of_locals_offset() + { return byte_offset_of(ConstMethod, _max_locals); } + static ByteSize size_of_parameters_offset() + { return byte_offset_of(ConstMethod, _size_of_parameters); } + // Unique id for the method static const u2 MAX_IDNUM; @@ -349,6 +356,14 @@ public: int max_stack() const { return _max_stack; } void set_max_stack(int size) { _max_stack = size; } + // max locals + int max_locals() const { return _max_locals; } + void set_max_locals(int size) { _max_locals = size; } + + // size of parameters + int size_of_parameters() const { return _size_of_parameters; } + void set_size_of_parameters(int size) { _size_of_parameters = size; } + // Deallocation for RedefineClasses void deallocate_contents(ClassLoaderData* loader_data); bool is_klass() const { return false; } diff --git a/src/share/vm/oops/method.hpp b/src/share/vm/oops/method.hpp index 5cf153197895ca48b99a94f0805a425c627a537f..50305294ec925d5ef727352cb55b94d00acab801 100644 --- a/src/share/vm/oops/method.hpp +++ b/src/share/vm/oops/method.hpp @@ -73,8 +73,7 @@ // |------------------------------------------------------| // | result_index (C++ interpreter only) | // |------------------------------------------------------| -// | method_size | max_locals | -// | size_of_parameters | intrinsic_id| flags | +// | method_size | intrinsic_id| flags | // |------------------------------------------------------| // | throwout_count | num_breakpoints | // |------------------------------------------------------| @@ -116,8 +115,6 @@ class Method : public Metadata { int _result_index; // C++ interpreter needs for converting results to/from stack #endif u2 _method_size; // size of this object - u2 _max_locals; // Number of local variables used by this method - u2 _size_of_parameters; // size of the parameter block (receiver + arguments) in words u1 _intrinsic_id; // vmSymbols::intrinsic_id (0 == _none) u1 _jfr_towrite : 1, // Flags _force_inline : 1, @@ -299,8 +296,8 @@ class Method : public Metadata { void set_max_stack(int size) { constMethod()->set_max_stack(size); } // max locals - int max_locals() const { return _max_locals; } - void set_max_locals(int size) { _max_locals = size; } + int max_locals() const { return constMethod()->max_locals(); } + void set_max_locals(int size) { constMethod()->set_max_locals(size); } int highest_comp_level() const; void set_highest_comp_level(int level); @@ -318,7 +315,8 @@ class Method : public Metadata { void set_interpreter_throwout_count(int count) { _interpreter_throwout_count = count; } // size of parameters - int size_of_parameters() const { return _size_of_parameters; } + int size_of_parameters() const { return constMethod()->size_of_parameters(); } + void set_size_of_parameters(int size) { constMethod()->set_size_of_parameters(size); } bool has_stackmap_table() const { return constMethod()->has_stackmap_table(); @@ -595,8 +593,6 @@ class Method : public Metadata { #ifdef CC_INTERP static ByteSize result_index_offset() { return byte_offset_of(Method, _result_index ); } #endif /* CC_INTERP */ - static ByteSize size_of_locals_offset() { return byte_offset_of(Method, _max_locals ); } - static ByteSize size_of_parameters_offset() { return byte_offset_of(Method, _size_of_parameters); } static ByteSize from_compiled_offset() { return byte_offset_of(Method, _from_compiled_entry); } static ByteSize code_offset() { return byte_offset_of(Method, _code); } static ByteSize invocation_counter_offset() { return byte_offset_of(Method, _invocation_counter); } @@ -804,9 +800,6 @@ class Method : public Metadata { Array* methods_type_annotations, bool idempotent = false); - // size of parameters - void set_size_of_parameters(int size) { _size_of_parameters = size; } - // Deallocation function for redefine classes or if an error occurs void deallocate_contents(ClassLoaderData* loader_data); diff --git a/src/share/vm/opto/addnode.cpp b/src/share/vm/opto/addnode.cpp index 6e54990281beb1f163cf5785316f61b24315a637..045e4f20918779eb4a11f0351b2d0dd1f486907f 100644 --- a/src/share/vm/opto/addnode.cpp +++ b/src/share/vm/opto/addnode.cpp @@ -189,6 +189,11 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) { set_req(1, addx); set_req(2, a22); progress = this; + PhaseIterGVN *igvn = phase->is_IterGVN(); + if (add2->outcnt() == 0 && igvn) { + // add disconnected. + igvn->_worklist.push(add2); + } } } @@ -624,6 +629,11 @@ Node *AddPNode::Ideal(PhaseGVN *phase, bool can_reshape) { if( t22->singleton() && (t22 != Type::TOP) ) { // Right input is an add of a constant? set_req(Address, phase->transform(new (phase->C) AddPNode(in(Base),in(Address),add->in(1)))); set_req(Offset, add->in(2)); + PhaseIterGVN *igvn = phase->is_IterGVN(); + if (add->outcnt() == 0 && igvn) { + // add disconnected. + igvn->_worklist.push((Node*)add); + } return this; // Made progress } } diff --git a/src/share/vm/opto/bytecodeInfo.cpp b/src/share/vm/opto/bytecodeInfo.cpp index 80117960dc58cccbe16f66701123200e9c15cda7..7392ee0f614dbd4e0474ea37c6e4a8be8b239c1e 100644 --- a/src/share/vm/opto/bytecodeInfo.cpp +++ b/src/share/vm/opto/bytecodeInfo.cpp @@ -403,7 +403,7 @@ const char* InlineTree::check_can_parse(ciMethod* callee) { //------------------------------print_inlining--------------------------------- // Really, the failure_msg can be a success message also. void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const { - CompileTask::print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline"); + C->print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline"); if (callee_method == NULL) tty->print(" callee not monotonic or profiled"); if (Verbose && callee_method) { const InlineTree *top = this; diff --git a/src/share/vm/opto/callGenerator.cpp b/src/share/vm/opto/callGenerator.cpp index 33d8c0c6241534d5127d5c0d11bb9bcd134d0400..66b53abf8adb5d1670e68b3f4711b27cb4b1fe13 100644 --- a/src/share/vm/opto/callGenerator.cpp +++ b/src/share/vm/opto/callGenerator.cpp @@ -274,6 +274,9 @@ class LateInlineCallGenerator : public DirectCallGenerator { virtual void do_late_inline(); virtual JVMState* generate(JVMState* jvms) { + Compile *C = Compile::current(); + C->print_inlining_skip(this); + // Record that this call site should be revisited once the main // parse is finished. Compile::current()->add_late_inline(this); @@ -284,7 +287,6 @@ class LateInlineCallGenerator : public DirectCallGenerator { // as is done for allocations and macro expansion. return DirectCallGenerator::generate(jvms); } - }; @@ -307,7 +309,9 @@ void LateInlineCallGenerator::do_late_inline() { // Make sure the state is a MergeMem for parsing. if (!map->in(TypeFunc::Memory)->is_MergeMem()) { - map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory))); + Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory)); + C->initial_gvn()->set_type_bottom(mem); + map->set_req(TypeFunc::Memory, mem); } // Make enough space for the expression stack and transfer the incoming arguments @@ -320,6 +324,8 @@ void LateInlineCallGenerator::do_late_inline() { } } + C->print_inlining_insert(this); + CompileLog* log = C->log(); if (log != NULL) { log->head("late_inline method='%d'", log->identify(method())); @@ -608,7 +614,7 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* if (cg != NULL && cg->is_inline()) return cg; } else { - if (PrintInlining) CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant"); + if (PrintInlining) C->print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant"); } } break; diff --git a/src/share/vm/opto/callGenerator.hpp b/src/share/vm/opto/callGenerator.hpp index ae59173bf7d40e6585842bdd77a90388ab14c737..8b3bdada39f924a5739ee2bba51219092ddb9697 100644 --- a/src/share/vm/opto/callGenerator.hpp +++ b/src/share/vm/opto/callGenerator.hpp @@ -147,9 +147,9 @@ class CallGenerator : public ResourceObj { CallGenerator* cg); virtual Node* generate_predicate(JVMState* jvms) { return NULL; }; - static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) { + static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) { if (PrintInlining) - CompileTask::print_inlining(callee, inline_level, bci, msg); + C->print_inlining(callee, inline_level, bci, msg); } }; diff --git a/src/share/vm/opto/callnode.cpp b/src/share/vm/opto/callnode.cpp index 067e9197d20ffb18ce0d9045fc924adbb4cc243c..95f1934c6c185709b57f0f5a4c6157eb31d0c040 100644 --- a/src/share/vm/opto/callnode.cpp +++ b/src/share/vm/opto/callnode.cpp @@ -751,7 +751,7 @@ void CallNode::extract_projections(CallProjections* projs, bool separate_io_proj projs->fallthrough_ioproj = pn; for (DUIterator j = pn->outs(); pn->has_out(j); j++) { Node* e = pn->out(j); - if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) { + if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj() && e->outcnt() > 0) { assert(projs->exobj == NULL, "only one"); projs->exobj = e; } diff --git a/src/share/vm/opto/cfgnode.cpp b/src/share/vm/opto/cfgnode.cpp index 24d51afd2fa952929628802afbf12d5a18e0cb7a..3ee9fffe04be4d8c620dc4c2996bef146ce612e9 100644 --- a/src/share/vm/opto/cfgnode.cpp +++ b/src/share/vm/opto/cfgnode.cpp @@ -1566,6 +1566,10 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) { Node* n = in(j); // Get the input if (rc == NULL || phase->type(rc) == Type::TOP) { if (n != top) { // Not already top? + PhaseIterGVN *igvn = phase->is_IterGVN(); + if (can_reshape && igvn != NULL) { + igvn->_worklist.push(r); + } set_req(j, top); // Nuke it down progress = this; // Record progress } diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp index ffb7ed7b8d58f057b1f418311d20b6e659d0b0aa..5ea8049c7359042f360e0f8dc5020f7dc79b80cf 100644 --- a/src/share/vm/opto/compile.cpp +++ b/src/share/vm/opto/compile.cpp @@ -610,7 +610,9 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")), _printer(IdealGraphPrinter::printer()), #endif - _congraph(NULL) { + _congraph(NULL), + _print_inlining_list(NULL), + _print_inlining(0) { C = this; CompileWrapper cw(this); @@ -666,6 +668,9 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr PhaseGVN gvn(node_arena(), estimated_size); set_initial_gvn(&gvn); + if (PrintInlining) { + _print_inlining_list = new (comp_arena())GrowableArray(comp_arena(), 1, 1, PrintInliningBuffer()); + } { // Scope for timing the parser TracePhase t3("parse", &_t_parser, true); @@ -754,6 +759,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr } } assert(_late_inlines.length() == 0, "should have been processed"); + dump_inlining(); print_method("Before RemoveUseless", 3); @@ -899,7 +905,9 @@ Compile::Compile( ciEnv* ci_env, #endif _dead_node_list(comp_arena()), _dead_node_count(0), - _congraph(NULL) { + _congraph(NULL), + _print_inlining_list(NULL), + _print_inlining(0) { C = this; #ifndef PRODUCT @@ -3351,3 +3359,11 @@ void Compile::ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type); } } + +void Compile::dump_inlining() { + if (PrintInlining) { + for (int i = 0; i < _print_inlining_list->length(); i++) { + tty->print(_print_inlining_list->at(i).ss()->as_string()); + } + } +} diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp index aacb72cbfb45dba47a48fe9e52e017e274dd1b13..32767e2ddd8958d46a9eefb76fa4ab90521c80bd 100644 --- a/src/share/vm/opto/compile.hpp +++ b/src/share/vm/opto/compile.hpp @@ -30,6 +30,7 @@ #include "code/debugInfoRec.hpp" #include "code/exceptionHandlerTable.hpp" #include "compiler/compilerOracle.hpp" +#include "compiler/compileBroker.hpp" #include "libadt/dict.hpp" #include "libadt/port.hpp" #include "libadt/vectset.hpp" @@ -369,6 +370,61 @@ class Compile : public Phase { GrowableArray _late_inlines; // List of CallGenerators to be revisited after // main parsing has finished. + // Inlining may not happen in parse order which would make + // PrintInlining output confusing. Keep track of PrintInlining + // pieces in order. + class PrintInliningBuffer : public ResourceObj { + private: + CallGenerator* _cg; + stringStream* _ss; + + public: + PrintInliningBuffer() + : _cg(NULL) { _ss = new stringStream(); } + + stringStream* ss() const { return _ss; } + CallGenerator* cg() const { return _cg; } + void set_cg(CallGenerator* cg) { _cg = cg; } + }; + + GrowableArray* _print_inlining_list; + int _print_inlining; + + public: + + outputStream* print_inlining_stream() const { + return _print_inlining_list->at(_print_inlining).ss(); + } + + void print_inlining_skip(CallGenerator* cg) { + if (PrintInlining) { + _print_inlining_list->at(_print_inlining).set_cg(cg); + _print_inlining++; + _print_inlining_list->insert_before(_print_inlining, PrintInliningBuffer()); + } + } + + void print_inlining_insert(CallGenerator* cg) { + if (PrintInlining) { + for (int i = 0; i < _print_inlining_list->length(); i++) { + if (_print_inlining_list->at(i).cg() == cg) { + _print_inlining_list->insert_before(i+1, PrintInliningBuffer()); + _print_inlining = i+1; + _print_inlining_list->at(i).set_cg(NULL); + return; + } + } + ShouldNotReachHere(); + } + } + + void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL) { + stringStream ss; + CompileTask::print_inlining(&ss, method, inline_level, bci, msg); + print_inlining_stream()->print(ss.as_string()); + } + + private: // Matching, CFG layout, allocation, code generation PhaseCFG* _cfg; // Results of CFG finding bool _select_24_bit_instr; // We selected an instruction with a 24-bit result @@ -591,7 +647,7 @@ class Compile : public Phase { void reset_dead_node_list() { _dead_node_list.Reset(); _dead_node_count = 0; } - uint live_nodes() { + uint live_nodes() const { int val = _unique - _dead_node_count; assert (val >= 0, err_msg_res("number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count)); return (uint) val; @@ -702,7 +758,7 @@ class Compile : public Phase { void identify_useful_nodes(Unique_Node_List &useful); void update_dead_node_list(Unique_Node_List &useful); - void remove_useless_nodes (Unique_Node_List &useful); + void remove_useless_nodes (Unique_Node_List &useful); WarmCallInfo* warm_calls() const { return _warm_calls; } void set_warm_calls(WarmCallInfo* l) { _warm_calls = l; } @@ -711,6 +767,8 @@ class Compile : public Phase { // Record this CallGenerator for inlining at the end of parsing. void add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); } + void dump_inlining(); + // Matching, CFG layout, allocation, code generation PhaseCFG* cfg() { return _cfg; } bool select_24_bit_instr() const { return _select_24_bit_instr; } diff --git a/src/share/vm/opto/doCall.cpp b/src/share/vm/opto/doCall.cpp index ca9a0a6d67e476a9d76a004c58bd87ad08f98b9e..1f21043c32ea6780c4acdf43a36d43fdcabf9067 100644 --- a/src/share/vm/opto/doCall.cpp +++ b/src/share/vm/opto/doCall.cpp @@ -40,19 +40,24 @@ #include "prims/nativeLookup.hpp" #include "runtime/sharedRuntime.hpp" -void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) { +void trace_type_profile(Compile* C, ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) { if (TraceTypeProfile || PrintInlining NOT_PRODUCT(|| PrintOptoInlining)) { + outputStream* out = tty; if (!PrintInlining) { if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) { method->print_short_name(); tty->cr(); } CompileTask::print_inlining(prof_method, depth, bci); + } else { + out = C->print_inlining_stream(); } - CompileTask::print_inline_indent(depth); - tty->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count); - prof_klass->name()->print_symbol(); - tty->cr(); + CompileTask::print_inline_indent(depth, out); + out->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count); + stringStream ss; + prof_klass->name()->print_symbol_on(&ss); + out->print(ss.as_string()); + out->cr(); } } @@ -233,13 +238,13 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool } if (miss_cg != NULL) { if (next_hit_cg != NULL) { - trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1)); + trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1)); // We don't need to record dependency on a receiver here and below. // Whenever we inline, the dependency is added by Parse::Parse(). miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX); } if (miss_cg != NULL) { - trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count); + trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count); CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0)); if (cg != NULL) return cg; } diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp index ba7ec2caf77443eb8dd6a6b9698e9bc7a9370686..115952ad65ce285ac7dd35a89344c4a07e0a500e 100644 --- a/src/share/vm/opto/graphKit.cpp +++ b/src/share/vm/opto/graphKit.cpp @@ -1771,11 +1771,21 @@ void GraphKit::replace_call(CallNode* call, Node* result) { CallProjections callprojs; call->extract_projections(&callprojs, true); - // Replace all the old call edges with the edges from the inlining result - C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control)); - C->gvn_replace_by(callprojs.fallthrough_memproj, final_state->in(TypeFunc::Memory)); - C->gvn_replace_by(callprojs.fallthrough_ioproj, final_state->in(TypeFunc::I_O)); + Node* init_mem = call->in(TypeFunc::Memory); Node* final_mem = final_state->in(TypeFunc::Memory); + Node* final_ctl = final_state->in(TypeFunc::Control); + Node* final_io = final_state->in(TypeFunc::I_O); + + // Replace all the old call edges with the edges from the inlining result + if (callprojs.fallthrough_catchproj != NULL) { + C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl); + } + if (callprojs.fallthrough_memproj != NULL) { + C->gvn_replace_by(callprojs.fallthrough_memproj, final_mem); + } + if (callprojs.fallthrough_ioproj != NULL) { + C->gvn_replace_by(callprojs.fallthrough_ioproj, final_io); + } // Replace the result with the new result if it exists and is used if (callprojs.resproj != NULL && result != NULL) { @@ -2980,7 +2990,7 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc, set_control( _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Control) ) ); // create memory projection for i_o set_memory ( _gvn.transform( new (C) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx ); - make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true); + make_slow_call_ex(allocx, env()->Throwable_klass(), true); // create a memory projection as for the normal control path Node* malloc = _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Memory)); diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp index 53b89e4d71c62324f1cc3f5e06d82a7ddf54ccdf..313339edfded5ce58382654ed6956881f451c5f4 100644 --- a/src/share/vm/opto/library_call.cpp +++ b/src/share/vm/opto/library_call.cpp @@ -282,6 +282,7 @@ class LibraryCallKit : public GraphKit { typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind; bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind); bool inline_unsafe_ordered_store(BasicType type); + bool inline_unsafe_fence(vmIntrinsics::ID id); bool inline_fp_conversions(vmIntrinsics::ID id); bool inline_number_methods(vmIntrinsics::ID id); bool inline_reference_get(); @@ -334,6 +335,9 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { case vmIntrinsics::_getAndSetInt: case vmIntrinsics::_getAndSetLong: case vmIntrinsics::_getAndSetObject: + case vmIntrinsics::_loadFence: + case vmIntrinsics::_storeFence: + case vmIntrinsics::_fullFence: break; // InlineNatives does not control String.compareTo case vmIntrinsics::_Reference_get: break; // InlineNatives does not control Reference.get @@ -536,7 +540,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) { // Try to inline the intrinsic. if (kit.try_to_inline()) { if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) { - CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); + C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked); if (C->log()) { @@ -555,7 +559,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) { if (jvms->has_method()) { // Not a root compile. const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)"; - CompileTask::print_inlining(callee, jvms->depth() - 1, bci, msg); + C->print_inlining(callee, jvms->depth() - 1, bci, msg); } else { // Root compile tty->print("Did not generate intrinsic %s%s at bci:%d in", @@ -585,7 +589,7 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) { Node* slow_ctl = kit.try_to_predicate(); if (!kit.failing()) { if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) { - CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); + C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked); if (C->log()) { @@ -602,12 +606,12 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) { if (jvms->has_method()) { // Not a root compile. const char* msg = "failed to generate predicate for intrinsic"; - CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, bci, msg); + C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg); } else { // Root compile - tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in", - vmIntrinsics::name_at(intrinsic_id()), - (is_virtual() ? " (virtual)" : ""), bci); + C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in", + vmIntrinsics::name_at(intrinsic_id()), + (is_virtual() ? " (virtual)" : ""), bci); } } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed); @@ -732,6 +736,10 @@ bool LibraryCallKit::try_to_inline() { case vmIntrinsics::_getAndSetLong: return inline_unsafe_load_store(T_LONG, LS_xchg); case vmIntrinsics::_getAndSetObject: return inline_unsafe_load_store(T_OBJECT, LS_xchg); + case vmIntrinsics::_loadFence: + case vmIntrinsics::_storeFence: + case vmIntrinsics::_fullFence: return inline_unsafe_fence(intrinsic_id()); + case vmIntrinsics::_currentThread: return inline_native_currentThread(); case vmIntrinsics::_isInterrupted: return inline_native_isInterrupted(); @@ -2840,6 +2848,26 @@ bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) { return true; } +bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) { + // Regardless of form, don't allow previous ld/st to move down, + // then issue acquire, release, or volatile mem_bar. + insert_mem_bar(Op_MemBarCPUOrder); + switch(id) { + case vmIntrinsics::_loadFence: + insert_mem_bar(Op_MemBarAcquire); + return true; + case vmIntrinsics::_storeFence: + insert_mem_bar(Op_MemBarRelease); + return true; + case vmIntrinsics::_fullFence: + insert_mem_bar(Op_MemBarVolatile); + return true; + default: + fatal_unexpected_iid(id); + return false; + } +} + //----------------------------inline_unsafe_allocate--------------------------- // public native Object sun.mics.Unsafe.allocateInstance(Class cls); bool LibraryCallKit::inline_unsafe_allocate() { @@ -2952,14 +2980,23 @@ bool LibraryCallKit::inline_native_isInterrupted() { // We only go to the fast case code if we pass two guards. // Paths which do not pass are accumulated in the slow_region. + + enum { + no_int_result_path = 1, // t == Thread.current() && !TLS._osthread._interrupted + no_clear_result_path = 2, // t == Thread.current() && TLS._osthread._interrupted && !clear_int + slow_result_path = 3, // slow path: t.isInterrupted(clear_int) + PATH_LIMIT + }; + + // Ensure that it's not possible to move the load of TLS._osthread._interrupted flag + // out of the function. + insert_mem_bar(Op_MemBarCPUOrder); + + RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT); + PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL); + RegionNode* slow_region = new (C) RegionNode(1); record_for_igvn(slow_region); - RegionNode* result_rgn = new (C) RegionNode(1+3); // fast1, fast2, slow - PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL); - enum { no_int_result_path = 1, - no_clear_result_path = 2, - slow_result_path = 3 - }; // (a) Receiving thread must be the current thread. Node* rec_thr = argument(0); @@ -2968,14 +3005,13 @@ bool LibraryCallKit::inline_native_isInterrupted() { Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) ); Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) ); - bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO); - if (!known_current_thread) - generate_slow_guard(bol_thr, slow_region); + generate_slow_guard(bol_thr, slow_region); // (b) Interrupt bit on TLS must be false. Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset())); Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS); p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset())); + // Set the control input on the field _interrupted read to prevent it floating up. Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT); Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) ); @@ -3020,22 +3056,20 @@ bool LibraryCallKit::inline_native_isInterrupted() { Node* slow_val = set_results_for_java_call(slow_call); // this->control() comes from set_results_for_java_call - // If we know that the result of the slow call will be true, tell the optimizer! - if (known_current_thread) slow_val = intcon(1); - Node* fast_io = slow_call->in(TypeFunc::I_O); Node* fast_mem = slow_call->in(TypeFunc::Memory); + // These two phis are pre-filled with copies of of the fast IO and Memory - Node* io_phi = PhiNode::make(result_rgn, fast_io, Type::ABIO); - Node* mem_phi = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM); + PhiNode* result_mem = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM); + PhiNode* result_io = PhiNode::make(result_rgn, fast_io, Type::ABIO); result_rgn->init_req(slow_result_path, control()); - io_phi ->init_req(slow_result_path, i_o()); - mem_phi ->init_req(slow_result_path, reset_memory()); + result_io ->init_req(slow_result_path, i_o()); + result_mem->init_req(slow_result_path, reset_memory()); result_val->init_req(slow_result_path, slow_val); - set_all_memory( _gvn.transform(mem_phi) ); - set_i_o( _gvn.transform(io_phi) ); + set_all_memory(_gvn.transform(result_mem)); + set_i_o( _gvn.transform(result_io)); } C->set_has_split_ifs(true); // Has chance for split-if optimization @@ -3319,7 +3353,7 @@ bool LibraryCallKit::inline_native_subtype_check() { Node* arg = args[which_arg]; arg = null_check(arg); if (stopped()) break; - args[which_arg] = _gvn.transform(arg); + args[which_arg] = arg; Node* p = basic_plus_adr(arg, class_klass_offset); Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type); diff --git a/src/share/vm/opto/node.cpp b/src/share/vm/opto/node.cpp index 4c83d906003ab8225bb336b7a60db97c01dbb1e9..9983c84d8e26d8494e6e3ec84bdcbaa51b3f1cf8 100644 --- a/src/share/vm/opto/node.cpp +++ b/src/share/vm/opto/node.cpp @@ -1839,15 +1839,16 @@ uint Node::match_edge(uint idx) const { return idx; // True for other than index 0 (control) } +static RegMask _not_used_at_all; // Register classes are defined for specific machines const RegMask &Node::out_RegMask() const { ShouldNotCallThis(); - return *(new RegMask()); + return _not_used_at_all; } const RegMask &Node::in_RegMask(uint) const { ShouldNotCallThis(); - return *(new RegMask()); + return _not_used_at_all; } //============================================================================= diff --git a/src/share/vm/opto/parse3.cpp b/src/share/vm/opto/parse3.cpp index 0f92b53f135ebd44a05cb779948444d0811bc5ec..ac6f4ec8f7662af4c6f4f0d874942858d928f57d 100644 --- a/src/share/vm/opto/parse3.cpp +++ b/src/share/vm/opto/parse3.cpp @@ -509,6 +509,7 @@ void Parse::do_multianewarray() { makecon(TypeKlassPtr::make(array_klass)), dims); } + make_slow_call_ex(c, env()->Throwable_klass(), false); Node* res = _gvn.transform(new (C) ProjNode(c, TypeFunc::Parms)); diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp index fb90705726ea2ab32b3170242ba828fe8c000562..7edb97e0bbaba8055de57cf124181db76d76263c 100644 --- a/src/share/vm/opto/runtime.cpp +++ b/src/share/vm/opto/runtime.cpp @@ -989,7 +989,7 @@ JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* t // since we're notifying the VM on every catch. // Force deoptimization and the rest of the lookup // will be fine. - deoptimize_caller_frame(thread, true); + deoptimize_caller_frame(thread); } // Check the stack guard pages. If enabled, look for handler in this frame; @@ -1143,19 +1143,24 @@ const TypeFunc *OptoRuntime::rethrow_Type() { void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) { - // Deoptimize frame - if (doit) { - // Called from within the owner thread, so no need for safepoint - RegisterMap reg_map(thread); - frame stub_frame = thread->last_frame(); - assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check"); - frame caller_frame = stub_frame.sender(®_map); - - // Deoptimize the caller frame. - Deoptimization::deoptimize_frame(thread, caller_frame.id()); + // Deoptimize the caller before continuing, as the compiled + // exception handler table may not be valid. + if (!StressCompiledExceptionHandlers && doit) { + deoptimize_caller_frame(thread); } } +void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) { + // Called from within the owner thread, so no need for safepoint + RegisterMap reg_map(thread); + frame stub_frame = thread->last_frame(); + assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check"); + frame caller_frame = stub_frame.sender(®_map); + + // Deoptimize the caller frame. + Deoptimization::deoptimize_frame(thread, caller_frame.id()); +} + bool OptoRuntime::is_deoptimized_caller_frame(JavaThread *thread) { // Called from within the owner thread, so no need for safepoint diff --git a/src/share/vm/opto/runtime.hpp b/src/share/vm/opto/runtime.hpp index 13da255b74299f696b20a2aca3bf050a85f1952a..295b71237577376ef7d1517af6ff6b843d143e80 100644 --- a/src/share/vm/opto/runtime.hpp +++ b/src/share/vm/opto/runtime.hpp @@ -174,6 +174,7 @@ private: static address handle_exception_C (JavaThread* thread); static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm); static address rethrow_C (oopDesc* exception, JavaThread *thread, address return_pc ); + static void deoptimize_caller_frame (JavaThread *thread); static void deoptimize_caller_frame (JavaThread *thread, bool doit); static bool is_deoptimized_caller_frame (JavaThread *thread); diff --git a/src/share/vm/opto/stringopts.cpp b/src/share/vm/opto/stringopts.cpp index c471887d88101b9913936e9fb99cac5aafc92b29..4e00945c73ae009385611d30563917888b9630f7 100644 --- a/src/share/vm/opto/stringopts.cpp +++ b/src/share/vm/opto/stringopts.cpp @@ -744,7 +744,9 @@ bool StringConcat::validate_control_flow() { ctrl_path.push(cn); ctrl_path.push(cn->proj_out(0)); ctrl_path.push(cn->proj_out(0)->unique_out()); - ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0)); + if (cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0) != NULL) { + ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0)); + } } else { ShouldNotReachHere(); } @@ -762,6 +764,12 @@ bool StringConcat::validate_control_flow() { } else if (ptr->is_IfTrue()) { IfNode* iff = ptr->in(0)->as_If(); BoolNode* b = iff->in(1)->isa_Bool(); + + if (b == NULL) { + fail = true; + break; + } + Node* cmp = b->in(1); Node* v1 = cmp->in(1); Node* v2 = cmp->in(2); @@ -1408,71 +1416,76 @@ void PhaseStringOpts::replace_string_concat(StringConcat* sc) { Deoptimization::Action_make_not_entrant); } - // length now contains the number of characters needed for the - // char[] so create a new AllocateArray for the char[] - Node* char_array = NULL; - { - PreserveReexecuteState preexecs(&kit); - // The original jvms is for an allocation of either a String or - // StringBuffer so no stack adjustment is necessary for proper - // reexecution. If we deoptimize in the slow path the bytecode - // will be reexecuted and the char[] allocation will be thrown away. - kit.jvms()->set_should_reexecute(true); - char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))), - length, 1); - } - - // Mark the allocation so that zeroing is skipped since the code - // below will overwrite the entire array - AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn); - char_alloc->maybe_set_complete(_gvn); - - // Now copy the string representations into the final char[] - Node* start = __ intcon(0); - for (int argi = 0; argi < sc->num_arguments(); argi++) { - Node* arg = sc->argument(argi); - switch (sc->mode(argi)) { - case StringConcat::IntMode: { - Node* end = __ AddI(start, string_sizes->in(argi)); - // getChars words backwards so pass the ending point as well as the start - int_getChars(kit, arg, char_array, start, end); - start = end; - break; - } - case StringConcat::StringNullCheckMode: - case StringConcat::StringMode: { - start = copy_string(kit, arg, char_array, start); - break; - } - case StringConcat::CharMode: { - __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR), - arg, T_CHAR, char_adr_idx); - start = __ AddI(start, __ intcon(1)); - break; + Node* result; + if (!kit.stopped()) { + + // length now contains the number of characters needed for the + // char[] so create a new AllocateArray for the char[] + Node* char_array = NULL; + { + PreserveReexecuteState preexecs(&kit); + // The original jvms is for an allocation of either a String or + // StringBuffer so no stack adjustment is necessary for proper + // reexecution. If we deoptimize in the slow path the bytecode + // will be reexecuted and the char[] allocation will be thrown away. + kit.jvms()->set_should_reexecute(true); + char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))), + length, 1); + } + + // Mark the allocation so that zeroing is skipped since the code + // below will overwrite the entire array + AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn); + char_alloc->maybe_set_complete(_gvn); + + // Now copy the string representations into the final char[] + Node* start = __ intcon(0); + for (int argi = 0; argi < sc->num_arguments(); argi++) { + Node* arg = sc->argument(argi); + switch (sc->mode(argi)) { + case StringConcat::IntMode: { + Node* end = __ AddI(start, string_sizes->in(argi)); + // getChars words backwards so pass the ending point as well as the start + int_getChars(kit, arg, char_array, start, end); + start = end; + break; + } + case StringConcat::StringNullCheckMode: + case StringConcat::StringMode: { + start = copy_string(kit, arg, char_array, start); + break; + } + case StringConcat::CharMode: { + __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR), + arg, T_CHAR, char_adr_idx); + start = __ AddI(start, __ intcon(1)); + break; + } + default: + ShouldNotReachHere(); } - default: - ShouldNotReachHere(); } - } - // If we're not reusing an existing String allocation then allocate one here. - Node* result = sc->string_alloc(); - if (result == NULL) { - PreserveReexecuteState preexecs(&kit); - // The original jvms is for an allocation of either a String or - // StringBuffer so no stack adjustment is necessary for proper - // reexecution. - kit.jvms()->set_should_reexecute(true); - result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass()))); - } + // If we're not reusing an existing String allocation then allocate one here. + result = sc->string_alloc(); + if (result == NULL) { + PreserveReexecuteState preexecs(&kit); + // The original jvms is for an allocation of either a String or + // StringBuffer so no stack adjustment is necessary for proper + // reexecution. + kit.jvms()->set_should_reexecute(true); + result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass()))); + } - // Intialize the string - if (java_lang_String::has_offset_field()) { - kit.store_String_offset(kit.control(), result, __ intcon(0)); - kit.store_String_length(kit.control(), result, length); + // Intialize the string + if (java_lang_String::has_offset_field()) { + kit.store_String_offset(kit.control(), result, __ intcon(0)); + kit.store_String_length(kit.control(), result, length); + } + kit.store_String_value(kit.control(), result, char_array); + } else { + result = C->top(); } - kit.store_String_value(kit.control(), result, char_array); - // hook up the outgoing control and result kit.replace_call(sc->end(), result); diff --git a/src/share/vm/prims/methodHandles.cpp b/src/share/vm/prims/methodHandles.cpp index 67e6f954c80c2b0d9bb6e57d6e3338e1a1aabdcc..c3b587967238c544b95a485db22d47899a0d0ffe 100644 --- a/src/share/vm/prims/methodHandles.cpp +++ b/src/share/vm/prims/methodHandles.cpp @@ -1168,8 +1168,8 @@ JVM_ENTRY(void, MHN_setCallSiteTargetNormal(JNIEnv* env, jobject igcls, jobject // Walk all nmethods depending on this call site. MutexLocker mu(Compile_lock, thread); Universe::flush_dependents_on(call_site, target); + java_lang_invoke_CallSite::set_target(call_site(), target()); } - java_lang_invoke_CallSite::set_target(call_site(), target()); } JVM_END @@ -1180,8 +1180,8 @@ JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobjec // Walk all nmethods depending on this call site. MutexLocker mu(Compile_lock, thread); Universe::flush_dependents_on(call_site, target); + java_lang_invoke_CallSite::set_target_volatile(call_site(), target()); } - java_lang_invoke_CallSite::set_target_volatile(call_site(), target()); } JVM_END diff --git a/src/share/vm/prims/unsafe.cpp b/src/share/vm/prims/unsafe.cpp index cb7f0fcf3df353e8bcc8a8d47484aa83fd68dd06..e56277b65d7796a0a3c315c3092ce25ee36bc42e 100644 --- a/src/share/vm/prims/unsafe.cpp +++ b/src/share/vm/prims/unsafe.cpp @@ -468,6 +468,21 @@ UNSAFE_ENTRY(void, Unsafe_SetOrderedLong(JNIEnv *env, jobject unsafe, jobject ob #endif UNSAFE_END +UNSAFE_ENTRY(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe)) + UnsafeWrapper("Unsafe_LoadFence"); + OrderAccess::acquire(); +UNSAFE_END + +UNSAFE_ENTRY(void, Unsafe_StoreFence(JNIEnv *env, jobject unsafe)) + UnsafeWrapper("Unsafe_StoreFence"); + OrderAccess::release(); +UNSAFE_END + +UNSAFE_ENTRY(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe)) + UnsafeWrapper("Unsafe_FullFence"); + OrderAccess::fence(); +UNSAFE_END + ////// Data in the C heap. // Note: These do not throw NullPointerException for bad pointers. @@ -1550,6 +1565,9 @@ static JNINativeMethod methods[] = { {CC"putOrderedObject", CC"("OBJ"J"OBJ")V", FN_PTR(Unsafe_SetOrderedObject)}, {CC"putOrderedInt", CC"("OBJ"JI)V", FN_PTR(Unsafe_SetOrderedInt)}, {CC"putOrderedLong", CC"("OBJ"JJ)V", FN_PTR(Unsafe_SetOrderedLong)}, + {CC"loadFence", CC"()V", FN_PTR(Unsafe_LoadFence)}, + {CC"storeFence", CC"()V", FN_PTR(Unsafe_StoreFence)}, + {CC"fullFence", CC"()V", FN_PTR(Unsafe_FullFence)}, {CC"park", CC"(ZJ)V", FN_PTR(Unsafe_Park)}, {CC"unpark", CC"("OBJ")V", FN_PTR(Unsafe_Unpark)} diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp index ef8bdab4798e588145de47312c4d32edbea77d46..b42f4ce4891fe26eecb22ed1e571510f2ac722b7 100644 --- a/src/share/vm/runtime/arguments.cpp +++ b/src/share/vm/runtime/arguments.cpp @@ -1499,13 +1499,12 @@ void Arguments::set_g1_gc_flags() { Abstract_VM_Version::parallel_worker_threads()); } - if (FLAG_IS_DEFAULT(MarkStackSize)) { - FLAG_SET_DEFAULT(MarkStackSize, 128 * TASKQUEUE_SIZE); - } - if (PrintGCDetails && Verbose) { - tty->print_cr("MarkStackSize: %uk MarkStackSizeMax: %uk", - MarkStackSize / K, MarkStackSizeMax / K); - tty->print_cr("ConcGCThreads: %u", ConcGCThreads); + // MarkStackSize will be set (if it hasn't been set by the user) + // when concurrent marking is initialized. + // Its value will be based upon the number of parallel marking threads. + // But we do set the maximum mark stack size here. + if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { + FLAG_SET_DEFAULT(MarkStackSizeMax, 128 * TASKQUEUE_SIZE); } if (FLAG_IS_DEFAULT(GCTimeRatio) || GCTimeRatio == 0) { @@ -1517,6 +1516,12 @@ void Arguments::set_g1_gc_flags() { // is allocation). We might consider increase it further. FLAG_SET_DEFAULT(GCTimeRatio, 9); } + + if (PrintGCDetails && Verbose) { + tty->print_cr("MarkStackSize: %uk MarkStackSizeMax: %uk", + MarkStackSize / K, MarkStackSizeMax / K); + tty->print_cr("ConcGCThreads: %u", ConcGCThreads); + } } void Arguments::set_heap_size() { @@ -1980,6 +1985,9 @@ bool Arguments::check_vm_args_consistency() { status = status && verify_min_value(ClassMetaspaceSize, 1*M, "ClassMetaspaceSize"); + status = status && verify_interval(MarkStackSizeMax, + 1, (max_jint - 1), "MarkStackSizeMax"); + #ifdef SPARC if (UseConcMarkSweepGC || UseG1GC) { // Issue a stern warning if the user has explicitly set diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp index 7743835dcacf73da11d234b215815350a041f943..dd3750cafb6e22233da4471fc67a22ed07d01772 100644 --- a/src/share/vm/runtime/globals.hpp +++ b/src/share/vm/runtime/globals.hpp @@ -922,6 +922,9 @@ class CommandLineFlags { develop(bool, PrintExceptionHandlers, false, \ "Print exception handler tables for all nmethods when generated") \ \ + develop(bool, StressCompiledExceptionHandlers, false, \ + "Exercise compiled exception handlers") \ + \ develop(bool, InterceptOSException, false, \ "Starts debugger when an implicit OS (e.g., NULL) " \ "exception happens") \ diff --git a/src/share/vm/runtime/os.hpp b/src/share/vm/runtime/os.hpp index ad43ddf83f0f2f414e61c9af1faa60b2e2ca521e..235005f9890c8fdb87edd583ac71f60fbed7725c 100644 --- a/src/share/vm/runtime/os.hpp +++ b/src/share/vm/runtime/os.hpp @@ -255,6 +255,7 @@ class os: AllStatic { static int vm_allocation_granularity(); static char* reserve_memory(size_t bytes, char* addr = 0, size_t alignment_hint = 0); + static char* reserve_memory_aligned(size_t size, size_t alignment); static char* attempt_reserve_memory_at(size_t bytes, char* addr); static void split_reserved_memory(char *base, size_t size, size_t split, bool realloc); diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp index 9ca52bec2cf389c1dbfa0181641b6a74b724f7b2..db3cace3d64b3456600fef6c381bc237f6ef462c 100644 --- a/src/share/vm/runtime/thread.cpp +++ b/src/share/vm/runtime/thread.cpp @@ -2190,7 +2190,7 @@ void JavaThread::send_thread_stop(oop java_throwable) { // BiasedLocking needs an updated RegisterMap for the revoke monitors pass RegisterMap reg_map(this, UseBiasedLocking); frame compiled_frame = f.sender(®_map); - if (compiled_frame.can_be_deoptimized()) { + if (!StressCompiledExceptionHandlers && compiled_frame.can_be_deoptimized()) { Deoptimization::deoptimize(this, compiled_frame, ®_map); } } @@ -3527,11 +3527,12 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { java_lang_Thread::set_thread_status(thread_object, java_lang_Thread::RUNNABLE); - // The VM preresolve methods to these classes. Make sure that get initialized - initialize_class(vmSymbols::java_lang_reflect_Method(), CHECK_0); - initialize_class(vmSymbols::java_lang_ref_Finalizer(), CHECK_0); // The VM creates & returns objects of this class. Make sure it's initialized. initialize_class(vmSymbols::java_lang_Class(), CHECK_0); + + // The VM preresolves methods to these classes. Make sure that they get initialized + initialize_class(vmSymbols::java_lang_reflect_Method(), CHECK_0); + initialize_class(vmSymbols::java_lang_ref_Finalizer(), CHECK_0); call_initializeSystemClass(CHECK_0); // get the Java runtime name after java.lang.System is initialized diff --git a/src/share/vm/runtime/virtualspace.cpp b/src/share/vm/runtime/virtualspace.cpp index 18c147a6ad32a8f58da00c2862087854f3164404..f33c1995e342d67849f134bbd87a9c77488b4720 100644 --- a/src/share/vm/runtime/virtualspace.cpp +++ b/src/share/vm/runtime/virtualspace.cpp @@ -329,20 +329,9 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large, if ((((size_t)base + noaccess_prefix) & (alignment - 1)) != 0) { // Base not aligned, retry if (!os::release_memory(base, size)) fatal("os::release_memory failed"); - // Reserve size large enough to do manual alignment and - // increase size to a multiple of the desired alignment + // Make sure that size is aligned size = align_size_up(size, alignment); - size_t extra_size = size + alignment; - do { - char* extra_base = os::reserve_memory(extra_size, NULL, alignment); - if (extra_base == NULL) return; - // Do manual alignement - base = (char*) align_size_up((uintptr_t) extra_base, alignment); - assert(base >= extra_base, "just checking"); - // Re-reserve the region at the aligned base address. - os::release_memory(extra_base, extra_size); - base = os::reserve_memory(size, base); - } while (base == NULL); + base = os::reserve_memory_aligned(size, alignment); if (requested_address != 0 && failed_to_reserve_as_requested(base, requested_address, size, false)) { diff --git a/src/share/vm/runtime/vmStructs.cpp b/src/share/vm/runtime/vmStructs.cpp index 9346d6c72a72fb0d90c2279fdf9f8c6a62f6307c..9ac10e26d4c043b7cad9dd8818ba6a98ef1c636c 100644 --- a/src/share/vm/runtime/vmStructs.cpp +++ b/src/share/vm/runtime/vmStructs.cpp @@ -355,8 +355,6 @@ typedef BinaryTreeDictionary MetablockTreeDictionary; nonstatic_field(Method, _access_flags, AccessFlags) \ nonstatic_field(Method, _vtable_index, int) \ nonstatic_field(Method, _method_size, u2) \ - nonstatic_field(Method, _max_locals, u2) \ - nonstatic_field(Method, _size_of_parameters, u2) \ nonstatic_field(Method, _interpreter_throwout_count, u2) \ nonstatic_field(Method, _number_of_breakpoints, u2) \ nonstatic_field(Method, _invocation_counter, InvocationCounter) \ @@ -378,6 +376,8 @@ typedef BinaryTreeDictionary MetablockTreeDictionary; nonstatic_field(ConstMethod, _signature_index, u2) \ nonstatic_field(ConstMethod, _method_idnum, u2) \ nonstatic_field(ConstMethod, _max_stack, u2) \ + nonstatic_field(ConstMethod, _max_locals, u2) \ + nonstatic_field(ConstMethod, _size_of_parameters, u2) \ nonstatic_field(ObjArrayKlass, _element_klass, Klass*) \ nonstatic_field(ObjArrayKlass, _bottom_klass, Klass*) \ volatile_nonstatic_field(Symbol, _refcount, int) \ diff --git a/src/share/vm/services/memBaseline.hpp b/src/share/vm/services/memBaseline.hpp index 5f98e30d451d9da3151ee6a64f16e575b4bf738b..c409ec3face44d3f5db3a29ee14c72b0c45e46a2 100644 --- a/src/share/vm/services/memBaseline.hpp +++ b/src/share/vm/services/memBaseline.hpp @@ -334,7 +334,7 @@ class MemBaseline : public _ValueObj { // create a memory baseline MemBaseline(); - virtual ~MemBaseline(); + ~MemBaseline(); inline bool baselined() const { return _baselined; diff --git a/src/share/vm/utilities/workgroup.hpp b/src/share/vm/utilities/workgroup.hpp index b82478eac1d55ad3797633bff0d7089e69a8c8ee..6a93536246bc849952dd76ec08510bf8b2f3d11a 100644 --- a/src/share/vm/utilities/workgroup.hpp +++ b/src/share/vm/utilities/workgroup.hpp @@ -90,7 +90,7 @@ protected: NOT_PRODUCT(_name = name); _counter = 0; } - virtual ~AbstractGangTask() { } + ~AbstractGangTask() { } public: }; diff --git a/src/share/vm/utilities/yieldingWorkgroup.hpp b/src/share/vm/utilities/yieldingWorkgroup.hpp index a39f8227f579e49f2e7b0c24c4638cd506cbbe7a..f9646f91f7fdb993f4053374c055f2eb750ae880 100644 --- a/src/share/vm/utilities/yieldingWorkgroup.hpp +++ b/src/share/vm/utilities/yieldingWorkgroup.hpp @@ -106,7 +106,7 @@ protected: _status(INACTIVE), _gang(NULL) { } - virtual ~YieldingFlexibleGangTask() { } + ~YieldingFlexibleGangTask() { } friend class YieldingFlexibleWorkGang; friend class YieldingFlexibleGangWorker; diff --git a/test/compiler/7184394/TestAESBase.java b/test/compiler/7184394/TestAESBase.java index ad6c835cc8458036f7d1679a44765578f442c876..511b97dc6c6b5216fda18172a9e91f15d806a93e 100644 --- a/test/compiler/7184394/TestAESBase.java +++ b/test/compiler/7184394/TestAESBase.java @@ -54,7 +54,6 @@ abstract public class TestAESBase { String paddingStr = "PKCS5Padding"; AlgorithmParameters algParams; SecretKey key; - int ivLen; static int numThreads = 0; int threadId; @@ -68,7 +67,7 @@ abstract public class TestAESBase { public void prepare() { try { - System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput); + System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput); int keyLenBytes = (keySize == 0 ? 16 : keySize/8); byte keyBytes[] = new byte[keyLenBytes]; @@ -90,10 +89,14 @@ abstract public class TestAESBase { cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); - ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0); - IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]); - - cipher.init(Cipher.ENCRYPT_MODE, key, initVector); + if (mode.equals("CBC")) { + int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0); + IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]); + cipher.init(Cipher.ENCRYPT_MODE, key, initVector); + } else { + algParams = cipher.getParameters(); + cipher.init(Cipher.ENCRYPT_MODE, key, algParams); + } algParams = cipher.getParameters(); dCipher.init(Cipher.DECRYPT_MODE, key, algParams); if (threadId == 0) { diff --git a/test/compiler/7184394/TestAESMain.java b/test/compiler/7184394/TestAESMain.java index ca2cb43dc8692f0507743cdffce40209414987f0..48baf572b3ecdf1d6f85ffa04e2f6c1d51fa9d79 100644 --- a/test/compiler/7184394/TestAESMain.java +++ b/test/compiler/7184394/TestAESMain.java @@ -27,7 +27,8 @@ * @bug 7184394 * @summary add intrinsics to use AES instructions * - * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain * * @author Tom Deneau */ diff --git a/test/compiler/8004741/Test8004741.java b/test/compiler/8004741/Test8004741.java new file mode 100644 index 0000000000000000000000000000000000000000..95e63b9c0c1c1bba875fdfaba67e7cda61b75e65 --- /dev/null +++ b/test/compiler/8004741/Test8004741.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test Test8004741.java + * @bug 8004741 + * @summary Missing compiled exception handle table entry for multidimensional array allocation + * @run main/othervm -Xmx64m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:+StressCompiledExceptionHandlers Test8004741 + * + */ + +import java.util.*; + +public class Test8004741 extends Thread { + + static int[][] test(int a, int b) throws Exception { + int[][] ar = null; + try { + ar = new int[a][b]; + } catch (Error e) { + System.out.println("test got Error"); + passed = true; + throw(e); + } catch (Exception e) { + System.out.println("test got Exception"); + throw(e); + } + return ar; + } + + static boolean passed = false; + + public void run() { + System.out.println("test started"); + try { + while(true) { + test(2,20000); + } + } catch (ThreadDeath e) { + System.out.println("test got ThreadDeath"); + passed = true; + } catch (Error e) { + e.printStackTrace(); + System.out.println("test got Error"); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("test got Exception"); + } + } + + public static void main(String[] args) throws Exception { + for (int n = 0; n < 11000; n++) { + test(2, 20); + } + + // First test exception catch + Test8004741 t = new Test8004741(); + + passed = false; + t.start(); + Thread.sleep(1000); + t.stop(); + + Thread.sleep(5000); + t.join(); + if (passed) { + System.out.println("PASSED"); + } else { + System.out.println("FAILED"); + System.exit(97); + } + } + +}; diff --git a/test/compiler/8005033/Test8005033.java b/test/compiler/8005033/Test8005033.java new file mode 100644 index 0000000000000000000000000000000000000000..e67063908414e40854f130181538e19b0f3fd2a1 --- /dev/null +++ b/test/compiler/8005033/Test8005033.java @@ -0,0 +1,50 @@ +/* + * Copyright 2012 SAP AG. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8005033 + * @summary On sparcv9, C2's intrinsic for Integer.bitCount(OV) returns wrong result if OV is the result of an operation with int overflow. + * @run main/othervm -Xcomp -XX:CompileOnly=Test8005033::testBitCount Test8005033 + * @author Richard Reingruber richard DOT reingruber AT sap DOT com + */ + +public class Test8005033 { + public static int MINUS_ONE = -1; + + public static void main(String[] args) { + System.out.println("EXECUTING test."); + Integer.bitCount(1); // load class + int expectedBitCount = 0; + int calculatedBitCount = testBitCount(); + if (expectedBitCount != calculatedBitCount) { + throw new InternalError("got " + calculatedBitCount + " but expected " + expectedBitCount); + } + System.out.println("SUCCESSFULLY passed test."); + } + + // testBitCount will be compiled using the Integer.bitCount() intrinsic if possible + private static int testBitCount() { + return Integer.bitCount(MINUS_ONE+1); // -1 + 1 => int overflow + } +} diff --git a/test/sanity/ExecuteInternalVMTests.java b/test/sanity/ExecuteInternalVMTests.java new file mode 100644 index 0000000000000000000000000000000000000000..b3e3724f287523bb656c8d58985f4c0bc960de5e --- /dev/null +++ b/test/sanity/ExecuteInternalVMTests.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* @test ExecuteInternalVMTests + * @bug 8004691 + * @summary Add a jtreg test that exercises the ExecuteInternalVMTests flag + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+ExecuteInternalVMTests ExecuteInternalVMTests + */ +public class ExecuteInternalVMTests { + public static void main(String[] args) throws Exception { + // The tests that are run are the HotSpot internal tests which are + // executed only when the flag -XX:+ExecuteInternalVMTests is used. + + // The flag -XX:+ExecuteInternalVMTests can only be used for + // non-product builds of HotSpot. Therefore, the flag + // -XX:+IgnoreUnrecognizedVMOptions is also used, which means that this + // test will do nothing on a product build. + } +}