diff --git a/.hgtags b/.hgtags
index c04b27715a86cf07d30f85abbbe52f96f60ac95d..7db2a557cb4b5cb33083f5af56f79b25f101801b 100644
--- a/.hgtags
+++ b/.hgtags
@@ -299,3 +299,7 @@ cfc5309f03b7bd6c1567618b63cf1fc74c0f2a8f hs25-b10
 b61d9c88b759d1594b8af1655598e8fa00393672 hs25-b11
 25bdce771bb3a7ae9825261a284d292cda700122 jdk8-b67
 a35a72dd2e1255239d31f796f9f693e49b36bc9f hs25-b12
+121aa71316af6cd877bf455e775fa3fdbcdd4b65 jdk8-b68
+b6c9c0109a608eedbb6b868d260952990e3c91fe hs25-b13
+cb8a4e04bc8c104de8a2f67463c7e31232bf8d68 jdk8-b69
+990bbd393c239d95310ccc38094e57923bbf1d4a hs25-b14
diff --git a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java
index 3e30820206a5b4864dd6df67f2da5d59b26e3be8..ad495d73bea91e1e08f59cc08f285251ab23b76e 100644
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java
@@ -69,6 +69,8 @@ public class ConstMethod extends VMObject {
     signatureIndex             = new CIntField(type.getCIntegerField("_signature_index"), 0);
     idnum                      = new CIntField(type.getCIntegerField("_method_idnum"), 0);
     maxStack                   = new CIntField(type.getCIntegerField("_max_stack"), 0);
+    maxLocals                  = new CIntField(type.getCIntegerField("_max_locals"), 0);
+    sizeOfParameters           = new CIntField(type.getCIntegerField("_size_of_parameters"), 0);
 
     // start of byte code
     bytecodeOffset = type.getSize();
@@ -96,6 +98,8 @@ public class ConstMethod extends VMObject {
   private static CIntField signatureIndex;
   private static CIntField idnum;
   private static CIntField maxStack;
+  private static CIntField maxLocals;
+  private static CIntField sizeOfParameters;
 
   // start of bytecode
   private static long bytecodeOffset;
@@ -151,6 +155,14 @@ public class ConstMethod extends VMObject {
     return maxStack.getValue(this);
   }
 
+  public long getMaxLocals() {
+    return maxLocals.getValue(this);
+  }
+
+  public long getSizeOfParameters() {
+    return sizeOfParameters.getValue(this);
+  }
+
   public Symbol getName() {
     return getMethod().getName();
   }
@@ -247,6 +259,8 @@ public class ConstMethod extends VMObject {
       visitor.doCInt(signatureIndex, true);
       visitor.doCInt(codeSize, true);
       visitor.doCInt(maxStack, true);
+      visitor.doCInt(maxLocals, true);
+      visitor.doCInt(sizeOfParameters, true);
     }
 
   // Accessors
diff --git a/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java b/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java
index a12d0b1e84e5f2afd62bcd292a519c99dece8ffc..31dc39c543171f1cbefe3276c1f1be7ce4a1f03d 100644
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java
@@ -50,8 +50,6 @@ public class Method extends Metadata {
     constMethod                = type.getAddressField("_constMethod");
     methodData                 = type.getAddressField("_method_data");
     methodSize                 = new CIntField(type.getCIntegerField("_method_size"), 0);
-    maxLocals                  = new CIntField(type.getCIntegerField("_max_locals"), 0);
-    sizeOfParameters           = new CIntField(type.getCIntegerField("_size_of_parameters"), 0);
     accessFlags                = new CIntField(type.getCIntegerField("_access_flags"), 0);
     code                       = type.getAddressField("_code");
     vtableIndex                = new CIntField(type.getCIntegerField("_vtable_index"), 0);
@@ -83,8 +81,6 @@ public class Method extends Metadata {
   private static AddressField  constMethod;
   private static AddressField  methodData;
   private static CIntField methodSize;
-  private static CIntField maxLocals;
-  private static CIntField sizeOfParameters;
   private static CIntField accessFlags;
   private static CIntField vtableIndex;
   private static CIntField invocationCounter;
@@ -134,8 +130,8 @@ public class Method extends Metadata {
   /** WARNING: this is in words, not useful in this system; use getObjectSize() instead */
   public long         getMethodSize()                 { return                methodSize.getValue(this);        }
   public long         getMaxStack()                   { return                getConstMethod().getMaxStack();   }
-  public long         getMaxLocals()                  { return                maxLocals.getValue(this);         }
-  public long         getSizeOfParameters()           { return                sizeOfParameters.getValue(this);  }
+  public long         getMaxLocals()                  { return                getConstMethod().getMaxLocals();         }
+  public long         getSizeOfParameters()           { return                getConstMethod().getSizeOfParameters();  }
   public long         getNameIndex()                  { return                getConstMethod().getNameIndex();  }
   public long         getSignatureIndex()             { return            getConstMethod().getSignatureIndex(); }
   public long         getGenericSignatureIndex()      { return     getConstMethod().getGenericSignatureIndex(); }
@@ -282,8 +278,6 @@ public class Method extends Metadata {
 
   public void iterateFields(MetadataVisitor visitor) {
       visitor.doCInt(methodSize, true);
-      visitor.doCInt(maxLocals, true);
-      visitor.doCInt(sizeOfParameters, true);
       visitor.doCInt(accessFlags, true);
     }
 
diff --git a/make/hotspot_version b/make/hotspot_version
index 7883b9bfb56d4cac69b051f3e370cd67569761f0..d6959b3b86c3b16d8e07848f4b2324998f6172c9 100644
--- a/make/hotspot_version
+++ b/make/hotspot_version
@@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2012
 
 HS_MAJOR_VER=25
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=13
+HS_BUILD_NUMBER=15
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
diff --git a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
index 30a8983f9eb330e1b8f379a92aaa09ab05368fe1..fa5ce4fa6bef53540a3f54877cae06ad89206602 100644
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
@@ -298,7 +298,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
     for (int i = 0; i < _bytes_to_copy; i++) {
       address ptr = (address)(_pc_start + i);
       int a_byte = (*ptr) & 0xFF;
-      __ a_byte (a_byte);
+      __ emit_int8 (a_byte);
     }
   }
 
@@ -340,10 +340,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
   int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
 
   // Emit the patch record.  We need to emit a full word, so emit an extra empty byte
-  __ a_byte(0);
-  __ a_byte(being_initialized_entry_offset);
-  __ a_byte(bytes_to_skip);
-  __ a_byte(_bytes_to_copy);
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
   address patch_info_pc = __ pc();
   assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
 
diff --git a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
index eacb4d3fd668840dd69b6382dd0ffe3c8608d857..7031597b6cfa7aa3a95a76585e48bb0b92f500fa 100644
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
@@ -582,7 +582,9 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
   // the following temporary registers are used during frame creation
   const Register Gtmp1 = G3_scratch ;
   const Register Gtmp2 = G1_scratch;
-  const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset()));
+  const Register RconstMethod = Gtmp1;
+  const Address constMethod(G5_method, 0, in_bytes(Method::const_offset()));
+  const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset()));
 
   bool inc_counter  = UseCompiler || CountCompiledCalls;
 
@@ -618,6 +620,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
   }
 #endif // ASSERT
 
+  __ ld_ptr(constMethod, RconstMethod);
   __ lduh(size_of_parameters, Gtmp1);
   __ sll(Gtmp1, LogBytesPerWord, Gtmp2);       // parameter size in bytes
   __ add(Gargs, Gtmp2, Gargs);                 // points to first local + BytesPerWord
@@ -1047,8 +1050,6 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register
   const Register Gtmp = G3_scratch;
   const Address constMethod       (G5_method, 0, in_bytes(Method::const_offset()));
   const Address access_flags      (G5_method, 0, in_bytes(Method::access_flags_offset()));
-  const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset()));
-  const Address size_of_locals    (G5_method, 0, in_bytes(Method::size_of_locals_offset()));
 
   // slop factor is two extra slots on the expression stack so that
   // we always have room to store a result when returning from a call without parameters
@@ -1066,6 +1067,9 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register
   // Now compute new frame size
 
   if (native) {
+    const Register RconstMethod = Gtmp;
+    const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset()));
+    __ ld_ptr(constMethod, RconstMethod);
     __ lduh( size_of_parameters, Gtmp );
     __ calc_mem_param_words(Gtmp, Gtmp);     // space for native call parameters passed on the stack in words
   } else {
@@ -1236,9 +1240,13 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register
     }
     if (init_value != noreg) {
       Label clear_loop;
+      const Register RconstMethod = O1;
+      const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset()));
+      const Address size_of_locals    (RconstMethod, 0, in_bytes(ConstMethod::size_of_locals_offset()));
 
       // NOTE: If you change the frame layout, this code will need to
       // be updated!
+      __ ld_ptr( constMethod, RconstMethod );
       __ lduh( size_of_locals, O2 );
       __ lduh( size_of_parameters, O1 );
       __ sll( O2, LogBytesPerWord, O2);
@@ -1483,13 +1491,16 @@ void CppInterpreterGenerator::adjust_callers_stack(Register args) {
 //
 //  assert_different_registers(state, prev_state);
   const Register Gtmp = G3_scratch;
+  const RconstMethod = G3_scratch;
   const Register tmp = O2;
-  const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset()));
-  const Address size_of_locals    (G5_method, 0, in_bytes(Method::size_of_locals_offset()));
+  const Address constMethod(G5_method, 0, in_bytes(Method::const_offset()));
+  const Address size_of_parameters(RconstMethod, 0, in_bytes(ConstMethod::size_of_parameters_offset()));
+  const Address size_of_locals    (RconstMethod, 0, in_bytes(ConstMethod::size_of_locals_offset()));
 
+  __ ld_ptr(constMethod, RconstMethod);
   __ lduh(size_of_parameters, tmp);
-  __ sll(tmp, LogBytesPerWord, Gtmp);       // parameter size in bytes
-  __ add(args, Gtmp, Gargs);                // points to first local + BytesPerWord
+  __ sll(tmp, LogBytesPerWord, Gargs);       // parameter size in bytes
+  __ add(args, Gargs, Gargs);                // points to first local + BytesPerWord
   // NEW
   __ add(Gargs, -wordSize, Gargs);             // points to first local[0]
   // determine extra space for non-argument locals & adjust caller's SP
@@ -1541,8 +1552,6 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
 
   const Address constMethod       (G5_method, 0, in_bytes(Method::const_offset()));
   const Address access_flags      (G5_method, 0, in_bytes(Method::access_flags_offset()));
-  const Address size_of_parameters(G5_method, 0, in_bytes(Method::size_of_parameters_offset()));
-  const Address size_of_locals    (G5_method, 0, in_bytes(Method::size_of_locals_offset()));
 
   address entry_point = __ pc();
   __ mov(G0, prevState);                                                 // no current activation
@@ -1750,7 +1759,9 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
 
   __ ld_ptr(STATE(_result._to_call._callee), L4_scratch);                        // called method
   __ ld_ptr(STATE(_stack), L1_scratch);                                          // get top of java expr stack
-  __ lduh(L4_scratch, in_bytes(Method::size_of_parameters_offset()), L2_scratch); // get parameter size
+  // get parameter size
+  __ ld_ptr(L4_scratch, in_bytes(Method::const_offset()), L2_scratch);
+  __ lduh(L2_scratch, in_bytes(ConstMethod::size_of_parameters_offset()), L2_scratch);
   __ sll(L2_scratch, LogBytesPerWord, L2_scratch     );                           // parameter size in bytes
   __ add(L1_scratch, L2_scratch, L1_scratch);                                      // stack destination for result
   __ ld(L4_scratch, in_bytes(Method::result_index_offset()), L3_scratch); // called method result type index
diff --git a/src/cpu/sparc/vm/macroAssembler_sparc.cpp b/src/cpu/sparc/vm/macroAssembler_sparc.cpp
index 2801a35a10d1e7c7f912ce313c4916bb9a0bf2b7..ac56573e76658b94b2df171cbb2868fa0a63a29f 100644
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp
@@ -100,34 +100,6 @@ const char* Argument::name() const {
 bool AbstractAssembler::pd_check_instruction_mark() { return false; }
 #endif
 
-
-void MacroAssembler::print_instruction(int inst) {
-  const char* s;
-  switch (inv_op(inst)) {
-  default:         s = "????"; break;
-  case call_op:    s = "call"; break;
-  case branch_op:
-    switch (inv_op2(inst)) {
-      case fb_op2:     s = "fb";   break;
-      case fbp_op2:    s = "fbp";  break;
-      case br_op2:     s = "br";   break;
-      case bp_op2:     s = "bp";   break;
-      case cb_op2:     s = "cb";   break;
-      case bpr_op2: {
-        if (is_cbcond(inst)) {
-          s = is_cxb(inst) ? "cxb" : "cwb";
-        } else {
-          s = "bpr";
-        }
-        break;
-      }
-      default:         s = "????"; break;
-    }
-  }
-  ::tty->print("%s", s);
-}
-
-
 // Patch instruction inst at offset inst_pos to refer to dest_pos
 // and return the resulting instruction.
 // We should have pcs, not offsets, but since all is relative, it will work out
diff --git a/src/cpu/sparc/vm/macroAssembler_sparc.hpp b/src/cpu/sparc/vm/macroAssembler_sparc.hpp
index 1e1e6de9e47d8d302efe8a9d6e2932c15207db70..dffef7e794b291aef2d0edbb2692941263b056e5 100644
--- a/src/cpu/sparc/vm/macroAssembler_sparc.hpp
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.hpp
@@ -603,7 +603,6 @@ class MacroAssembler : public Assembler {
   friend class Label;
 
  protected:
-  static void print_instruction(int inst);
   static int  patched_branch(int dest_pos, int inst, int inst_pos);
   static int  branch_destination(int inst, int pos);
 
@@ -759,9 +758,6 @@ class MacroAssembler : public Assembler {
   // Required platform-specific helpers for Label::patch_instructions.
   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
   void pd_patch_instruction(address branch, address target);
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch);
-#endif
 
   // sethi Macro handles optimizations and relocations
 private:
diff --git a/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp b/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp
index e9bdaea79ad4b6d161ea5ba44c7b73b31b8b21a2..871853c684d0420edb946ee428108be35629d0ec 100644
--- a/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp
@@ -43,14 +43,6 @@ inline void MacroAssembler::pd_patch_instruction(address branch, address target)
   stub_inst = patched_branch(target - branch, stub_inst, 0);
 }
 
-#ifndef PRODUCT
-inline void MacroAssembler::pd_print_patched_instruction(address branch) {
-  jint stub_inst = *(jint*) branch;
-  print_instruction(stub_inst);
-  ::tty->print("%s", " (unresolved)");
-}
-#endif // PRODUCT
-
 // Use the right loads/stores for the platform
 inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) {
 #ifdef _LP64
diff --git a/src/cpu/sparc/vm/methodHandles_sparc.cpp b/src/cpu/sparc/vm/methodHandles_sparc.cpp
index f4637bf76de78af1a9b8e0c9b2504c6af046f00f..546cfaf08f1b2987a9522cac4cc82a7de38a33af 100644
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp
@@ -171,7 +171,8 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
 
   if (VerifyMethodHandles && !for_compiler_entry) {
     // make sure recv is already on stack
-    __ load_sized_value(Address(method_temp, Method::size_of_parameters_offset()),
+    __ ld_ptr(method_temp, in_bytes(Method::const_offset()), temp2);
+    __ load_sized_value(Address(temp2, ConstMethod::size_of_parameters_offset()),
                         temp2,
                         sizeof(u2), /*is_signed*/ false);
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
@@ -233,7 +234,8 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
   int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
   assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
   if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
-    __ load_sized_value(Address(G5_method, Method::size_of_parameters_offset()),
+    __ ld_ptr(G5_method, in_bytes(Method::const_offset()), O4_param_size);
+    __ load_sized_value(Address(O4_param_size, ConstMethod::size_of_parameters_offset()),
                         O4_param_size,
                         sizeof(u2), /*is_signed*/ false);
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
diff --git a/src/cpu/sparc/vm/sparc.ad b/src/cpu/sparc/vm/sparc.ad
index b7029faa604ebf069e802981311f663479819df9..96accce02565c45f0a72b876487354c45059b66f 100644
--- a/src/cpu/sparc/vm/sparc.ad
+++ b/src/cpu/sparc/vm/sparc.ad
@@ -10224,7 +10224,7 @@ instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result
 
 //---------- Zeros Count Instructions ------------------------------------------
 
-instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
+instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{
   predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
   match(Set dst (CountLeadingZerosI src));
   effect(TEMP dst, TEMP tmp, KILL cr);
@@ -10321,7 +10321,7 @@ instruct countLeadingZerosL(iRegIsafe dst, iRegL src, iRegL tmp, flagsReg cr) %{
   ins_pipe(ialu_reg);
 %}
 
-instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
+instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{
   predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
   match(Set dst (CountTrailingZerosI src));
   effect(TEMP dst, KILL cr);
@@ -10364,19 +10364,21 @@ instruct countTrailingZerosL(iRegIsafe dst, iRegL src, flagsReg cr) %{
 
 //---------- Population Count Instructions -------------------------------------
 
-instruct popCountI(iRegI dst, iRegI src) %{
+instruct popCountI(iRegIsafe dst, iRegI src) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountI src));
 
-  format %{ "POPC   $src, $dst" %}
+  format %{ "SRL    $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t"
+            "POPC   $dst, $dst" %}
   ins_encode %{
-    __ popc($src$$Register, $dst$$Register);
+    __ srl($src$$Register, G0, $dst$$Register);
+    __ popc($dst$$Register, $dst$$Register);
   %}
   ins_pipe(ialu_reg);
 %}
 
 // Note: Long.bitCount(long) returns an int.
-instruct popCountL(iRegI dst, iRegL src) %{
+instruct popCountL(iRegIsafe dst, iRegL src) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountL src));
 
diff --git a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
index 2de866adb0744fb1b5a33e1fef282680fb0b81ad..0827c0b1a9c420802ac2032ccc412a02a879ff20 100644
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
@@ -434,7 +434,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe
 
   // the frame is greater than one page in size, so check against
   // the bottom of the stack
-  __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
+  __ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check);
 
   // the stack will overflow, throw an exception
 
@@ -494,9 +494,6 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
   // (gri - 2/25/2000)
 
 
-  const Address size_of_parameters(G5_method, Method::size_of_parameters_offset());
-  const Address size_of_locals    (G5_method, Method::size_of_locals_offset());
-  const Address constMethod       (G5_method, Method::const_offset());
   int rounded_vm_local_words = round_to( frame::interpreter_frame_vm_local_words, WordsPerLong );
 
   const int extra_space =
@@ -506,11 +503,15 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
     (native_call ? frame::interpreter_frame_extra_outgoing_argument_words : 0);
 
   const Register Glocals_size = G3;
+  const Register RconstMethod = Glocals_size;
   const Register Otmp1 = O3;
   const Register Otmp2 = O4;
   // Lscratch can't be used as a temporary because the call_stub uses
   // it to assert that the stack frame was setup correctly.
+  const Address constMethod       (G5_method, Method::const_offset());
+  const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset());
 
+  __ ld_ptr( constMethod, RconstMethod );
   __ lduh( size_of_parameters, Glocals_size);
 
   // Gargs points to first local + BytesPerWord
@@ -530,6 +531,8 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
     //
     // Compute number of locals in method apart from incoming parameters
     //
+    const Address size_of_locals    (Otmp1, ConstMethod::size_of_locals_offset());
+    __ ld_ptr( constMethod, Otmp1 );
     __ lduh( size_of_locals, Otmp1 );
     __ sub( Otmp1, Glocals_size, Glocals_size );
     __ round_to( Glocals_size, WordsPerLong );
@@ -1256,8 +1259,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
   // make sure registers are different!
   assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2);
 
-  const Address size_of_parameters(G5_method, Method::size_of_parameters_offset());
-  const Address size_of_locals    (G5_method, Method::size_of_locals_offset());
+  const Address constMethod       (G5_method, Method::const_offset());
   // Seems like G5_method is live at the point this is used. So we could make this look consistent
   // and use in the asserts.
   const Address access_flags      (Lmethod,   Method::access_flags_offset());
@@ -1307,8 +1309,13 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
   init_value = G0;
   Label clear_loop;
 
+  const Register RconstMethod = O1;
+  const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals    (RconstMethod, ConstMethod::size_of_locals_offset());
+
   // NOTE: If you change the frame layout, this code will need to
   // be updated!
+  __ ld_ptr( constMethod, RconstMethod );
   __ lduh( size_of_locals, O2 );
   __ lduh( size_of_parameters, O1 );
   __ sll( O2, Interpreter::logStackElementSize, O2);
@@ -1823,9 +1830,13 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
 
     const Register Gtmp1 = G3_scratch;
     const Register Gtmp2 = G1_scratch;
+    const Register RconstMethod = Gtmp1;
+    const Address constMethod(Lmethod, Method::const_offset());
+    const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset());
 
     // Compute size of arguments for saving when returning to deoptimized caller
-    __ lduh(Lmethod, in_bytes(Method::size_of_parameters_offset()), Gtmp1);
+    __ ld_ptr(constMethod, RconstMethod);
+    __ lduh(size_of_parameters, Gtmp1);
     __ sll(Gtmp1, Interpreter::logStackElementSize, Gtmp1);
     __ sub(Llocals, Gtmp1, Gtmp2);
     __ add(Gtmp2, wordSize, Gtmp2);
diff --git a/src/cpu/sparc/vm/templateTable_sparc.cpp b/src/cpu/sparc/vm/templateTable_sparc.cpp
index 5b1887dbbcae19b6ad50f593e16fa1471cd29846..931342ded121a8db79f5218e1ee9e7d7b880e612 100644
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp
@@ -3040,7 +3040,8 @@ void TemplateTable::invokevfinal_helper(Register Rscratch, Register Rret) {
   Register Rtemp = G4_scratch;
 
   // Load receiver from stack slot
-  __ lduh(G5_method, in_bytes(Method::size_of_parameters_offset()), G4_scratch);
+  __ ld_ptr(G5_method, in_bytes(Method::const_offset()), G4_scratch);
+  __ lduh(G4_scratch, in_bytes(ConstMethod::size_of_parameters_offset()), G4_scratch);
   __ load_receiver(G4_scratch, O0);
 
   // receiver NULL check
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 0a51534ed2425bdc7dec0a922ab695a8d427df76..6f97f963084c3a6475b9f305e9a3ffdf6d838f8c 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -226,9 +226,9 @@ void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
   assert(isByte(op1) && isByte(op2), "wrong opcode");
   assert(isByte(imm8), "not a byte");
   assert((op1 & 0x01) == 0, "should be 8bit operation");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst));
-  emit_byte(imm8);
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst));
+  emit_int8(imm8);
 }
 
 
@@ -237,12 +237,12 @@ void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   if (is8bit(imm32)) {
-    emit_byte(op1 | 0x02); // set sign bit
-    emit_byte(op2 | encode(dst));
-    emit_byte(imm32 & 0xFF);
+    emit_int8(op1 | 0x02); // set sign bit
+    emit_int8(op2 | encode(dst));
+    emit_int8(imm32 & 0xFF);
   } else {
-    emit_byte(op1);
-    emit_byte(op2 | encode(dst));
+    emit_int8(op1);
+    emit_int8(op2 | encode(dst));
     emit_long(imm32);
   }
 }
@@ -252,8 +252,8 @@ void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32)
   assert(isByte(op1) && isByte(op2), "wrong opcode");
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst));
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst));
   emit_long(imm32);
 }
 
@@ -262,11 +262,11 @@ void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t im
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   if (is8bit(imm32)) {
-    emit_byte(op1 | 0x02); // set sign bit
+    emit_int8(op1 | 0x02); // set sign bit
     emit_operand(rm, adr, 1);
-    emit_byte(imm32 & 0xFF);
+    emit_int8(imm32 & 0xFF);
   } else {
-    emit_byte(op1);
+    emit_int8(op1);
     emit_operand(rm, adr, 4);
     emit_long(imm32);
   }
@@ -275,8 +275,8 @@ void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t im
 
 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
   assert(isByte(op1) && isByte(op2), "wrong opcode");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst) << 3 | encode(src));
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst) << 3 | encode(src));
 }
 
 
@@ -301,21 +301,21 @@ void Assembler::emit_operand(Register reg, Register base, Register index,
         // [base + index*scale]
         // [00 reg 100][ss index base]
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x04 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
+        emit_int8(0x04 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [base + index*scale + imm8]
         // [01 reg 100][ss index base] imm8
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x44 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x44 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
+        emit_int8(disp & 0xFF);
       } else {
         // [base + index*scale + disp32]
         // [10 reg 100][ss index base] disp32
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x84 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
+        emit_int8(0x84 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
         emit_data(disp, rspec, disp32_operand);
       }
     } else if (base == rsp LP64_ONLY(|| base == r12)) {
@@ -323,19 +323,19 @@ void Assembler::emit_operand(Register reg, Register base, Register index,
       if (disp == 0 && rtype == relocInfo::none) {
         // [rsp]
         // [00 reg 100][00 100 100]
-        emit_byte(0x04 | regenc);
-        emit_byte(0x24);
+        emit_int8(0x04 | regenc);
+        emit_int8(0x24);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [rsp + imm8]
         // [01 reg 100][00 100 100] disp8
-        emit_byte(0x44 | regenc);
-        emit_byte(0x24);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x44 | regenc);
+        emit_int8(0x24);
+        emit_int8(disp & 0xFF);
       } else {
         // [rsp + imm32]
         // [10 reg 100][00 100 100] disp32
-        emit_byte(0x84 | regenc);
-        emit_byte(0x24);
+        emit_int8(0x84 | regenc);
+        emit_int8(0x24);
         emit_data(disp, rspec, disp32_operand);
       }
     } else {
@@ -345,16 +345,16 @@ void Assembler::emit_operand(Register reg, Register base, Register index,
           base != rbp LP64_ONLY(&& base != r13)) {
         // [base]
         // [00 reg base]
-        emit_byte(0x00 | regenc | baseenc);
+        emit_int8(0x00 | regenc | baseenc);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [base + disp8]
         // [01 reg base] disp8
-        emit_byte(0x40 | regenc | baseenc);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x40 | regenc | baseenc);
+        emit_int8(disp & 0xFF);
       } else {
         // [base + disp32]
         // [10 reg base] disp32
-        emit_byte(0x80 | regenc | baseenc);
+        emit_int8(0x80 | regenc | baseenc);
         emit_data(disp, rspec, disp32_operand);
       }
     }
@@ -364,14 +364,14 @@ void Assembler::emit_operand(Register reg, Register base, Register index,
       // [index*scale + disp]
       // [00 reg 100][ss index 101] disp32
       assert(index != rsp, "illegal addressing mode");
-      emit_byte(0x04 | regenc);
-      emit_byte(scale << 6 | indexenc | 0x05);
+      emit_int8(0x04 | regenc);
+      emit_int8(scale << 6 | indexenc | 0x05);
       emit_data(disp, rspec, disp32_operand);
     } else if (rtype != relocInfo::none ) {
       // [disp] (64bit) RIP-RELATIVE (32bit) abs
       // [00 000 101] disp32
 
-      emit_byte(0x05 | regenc);
+      emit_int8(0x05 | regenc);
       // Note that the RIP-rel. correction applies to the generated
       // disp field, but _not_ to the target address in the rspec.
 
@@ -391,8 +391,8 @@ void Assembler::emit_operand(Register reg, Register base, Register index,
       // 32bit never did this, did everything as the rip-rel/disp code above
       // [disp] ABSOLUTE
       // [00 reg 100][00 100 101] disp32
-      emit_byte(0x04 | regenc);
-      emit_byte(0x25);
+      emit_int8(0x04 | regenc);
+      emit_int8(0x25);
       emit_data(disp, rspec, disp32_operand);
     }
   }
@@ -883,8 +883,8 @@ void Assembler::emit_operand(Address adr, MMXRegister reg) {
 void Assembler::emit_farith(int b1, int b2, int i) {
   assert(isByte(b1) && isByte(b2), "wrong opcode");
   assert(0 <= i &&  i < 8, "illegal stack offset");
-  emit_byte(b1);
-  emit_byte(b2 + i);
+  emit_int8(b1);
+  emit_int8(b2 + i);
 }
 
 
@@ -899,7 +899,7 @@ void Assembler::adcl(Address dst, int32_t imm32) {
 void Assembler::adcl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
@@ -911,7 +911,7 @@ void Assembler::adcl(Register dst, int32_t imm32) {
 void Assembler::adcl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x13);
+  emit_int8(0x13);
   emit_operand(dst, src);
 }
 
@@ -929,7 +929,7 @@ void Assembler::addl(Address dst, int32_t imm32) {
 void Assembler::addl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x01);
+  emit_int8(0x01);
   emit_operand(src, dst);
 }
 
@@ -941,7 +941,7 @@ void Assembler::addl(Register dst, int32_t imm32) {
 void Assembler::addl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x03);
+  emit_int8(0x03);
   emit_operand(dst, src);
 }
 
@@ -953,38 +953,40 @@ void Assembler::addl(Register dst, Register src) {
 void Assembler::addr_nop_4() {
   assert(UseAddressNop, "no CPU support");
   // 4 bytes: NOP DWORD PTR [EAX+0]
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
-  emit_byte(0);    // 8-bits offset (1 byte)
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
+  emit_int8(0);    // 8-bits offset (1 byte)
 }
 
 void Assembler::addr_nop_5() {
   assert(UseAddressNop, "no CPU support");
   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
-  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
-  emit_byte(0);    // 8-bits offset (1 byte)
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
+  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
+  emit_int8(0);    // 8-bits offset (1 byte)
 }
 
 void Assembler::addr_nop_7() {
   assert(UseAddressNop, "no CPU support");
   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8((unsigned char)0x80);
+                   // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
   emit_long(0);    // 32-bits offset (4 bytes)
 }
 
 void Assembler::addr_nop_8() {
   assert(UseAddressNop, "no CPU support");
   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
-  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8((unsigned char)0x84);
+                   // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
+  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
   emit_long(0);    // 32-bits offset (4 bytes)
 }
 
@@ -1012,67 +1014,67 @@ void Assembler::aesdec(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xde);
+  emit_int8((unsigned char)0xDE);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xde);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xDE);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::aesdeclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdf);
+  emit_int8((unsigned char)0xDF);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdf);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::aesenc(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdc);
+  emit_int8((unsigned char)0xDC);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdc);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xDC);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::aesenclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdd);
+  emit_int8((unsigned char)0xDD);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdd);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xDD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 void Assembler::andl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rsp, dst, 4);
   emit_long(imm32);
 }
@@ -1085,7 +1087,7 @@ void Assembler::andl(Register dst, int32_t imm32) {
 void Assembler::andl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x23);
+  emit_int8(0x23);
   emit_operand(dst, src);
 }
 
@@ -1096,23 +1098,23 @@ void Assembler::andl(Register dst, Register src) {
 
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBC);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bsrl(Register dst, Register src) {
   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bswapl(Register reg) { // bswap
   int encode = prefix_and_encode(reg->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xC8 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::call(Label& L, relocInfo::relocType rtype) {
@@ -1125,36 +1127,36 @@ void Assembler::call(Label& L, relocInfo::relocType rtype) {
     assert(offs <= 0, "assembler error");
     InstructionMark im(this);
     // 1110 1000 #32-bit disp
-    emit_byte(0xE8);
+    emit_int8((unsigned char)0xE8);
     emit_data(offs - long_size, rtype, operand);
   } else {
     InstructionMark im(this);
     // 1110 1000 #32-bit disp
     L.add_patch_at(code(), locator());
 
-    emit_byte(0xE8);
+    emit_int8((unsigned char)0xE8);
     emit_data(int(0), rtype, operand);
   }
 }
 
 void Assembler::call(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xD0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 
 void Assembler::call(Address adr) {
   InstructionMark im(this);
   prefix(adr);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rdx, adr);
 }
 
 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
   assert(entry != NULL, "call most probably wrong");
   InstructionMark im(this);
-  emit_byte(0xE8);
+  emit_int8((unsigned char)0xE8);
   intptr_t disp = entry - (pc() + sizeof(int32_t));
   assert(is_simm32(disp), "must be 32bit offset (call2)");
   // Technically, should use call32_operand, but this format is
@@ -1165,42 +1167,42 @@ void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
 }
 
 void Assembler::cdql() {
-  emit_byte(0x99);
+  emit_int8((unsigned char)0x99);
 }
 
 void Assembler::cld() {
-  emit_byte(0xfc);
+  emit_int8((unsigned char)0xFC);
 }
 
 void Assembler::cmovl(Condition cc, Register dst, Register src) {
   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 void Assembler::cmovl(Condition cc, Register dst, Address src) {
   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpb(Address dst, int imm8) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x80);
+  emit_int8((unsigned char)0x80);
   emit_operand(rdi, dst, 1);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 void Assembler::cmpl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 4);
   emit_long(imm32);
 }
@@ -1219,17 +1221,17 @@ void Assembler::cmpl(Register dst, Register src) {
 void Assembler::cmpl(Register dst, Address  src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x3B);
+  emit_int8((unsigned char)0x3B);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpw(Address dst, int imm16) {
   InstructionMark im(this);
   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
-  emit_byte(0x66);
-  emit_byte(0x81);
+  emit_int8(0x66);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 2);
-  emit_word(imm16);
+  emit_int16(imm16);
 }
 
 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
@@ -1238,8 +1240,8 @@ void Assembler::cmpw(Address dst, int imm16) {
 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
   InstructionMark im(this);
   prefix(adr, reg);
-  emit_byte(0x0F);
-  emit_byte(0xB1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB1);
   emit_operand(reg, adr);
 }
 
@@ -1266,8 +1268,8 @@ void Assembler::comiss(XMMRegister dst, XMMRegister src) {
 }
 
 void Assembler::cpuid() {
-  emit_byte(0x0F);
-  emit_byte(0xA2);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA2);
 }
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
@@ -1293,8 +1295,8 @@ void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
@@ -1305,8 +1307,8 @@ void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
@@ -1328,22 +1330,22 @@ void Assembler::cvtss2sd(XMMRegister dst, Address src) {
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::decl(Address dst) {
   // Don't use it directly. Use MacroAssembler::decrement() instead.
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rcx, dst);
 }
 
@@ -1369,43 +1371,43 @@ void Assembler::divss(XMMRegister dst, XMMRegister src) {
 
 void Assembler::emms() {
   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
-  emit_byte(0x0F);
-  emit_byte(0x77);
+  emit_int8(0x0F);
+  emit_int8(0x77);
 }
 
 void Assembler::hlt() {
-  emit_byte(0xF4);
+  emit_int8((unsigned char)0xF4);
 }
 
 void Assembler::idivl(Register src) {
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::divl(Register src) { // Unsigned
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF0 | encode));
 }
 
 void Assembler::imull(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xAF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 void Assembler::imull(Register dst, Register src, int value) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   if (is8bit(value)) {
-    emit_byte(0x6B);
-    emit_byte(0xC0 | encode);
-    emit_byte(value & 0xFF);
+    emit_int8(0x6B);
+    emit_int8((unsigned char)(0xC0 | encode));
+    emit_int8(value & 0xFF);
   } else {
-    emit_byte(0x69);
-    emit_byte(0xC0 | encode);
+    emit_int8(0x69);
+    emit_int8((unsigned char)(0xC0 | encode));
     emit_long(value);
   }
 }
@@ -1414,7 +1416,7 @@ void Assembler::incl(Address dst) {
   // Don't use it directly. Use MacroAssembler::increment() instead.
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rax, dst);
 }
 
@@ -1430,14 +1432,14 @@ void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
     if (maybe_short && is8bit(offs - short_size)) {
       // 0111 tttn #8-bit disp
-      emit_byte(0x70 | cc);
-      emit_byte((offs - short_size) & 0xFF);
+      emit_int8(0x70 | cc);
+      emit_int8((offs - short_size) & 0xFF);
     } else {
       // 0000 1111 1000 tttn #32-bit disp
       assert(is_simm32(offs - long_size),
              "must be 32bit offset (call4)");
-      emit_byte(0x0F);
-      emit_byte(0x80 | cc);
+      emit_int8(0x0F);
+      emit_int8((unsigned char)(0x80 | cc));
       emit_long(offs - long_size);
     }
   } else {
@@ -1446,8 +1448,8 @@ void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
     // Note: use jccb() if label to be bound is very close to get
     //       an 8-bit displacement
     L.add_patch_at(code(), locator());
-    emit_byte(0x0F);
-    emit_byte(0x80 | cc);
+    emit_int8(0x0F);
+    emit_int8((unsigned char)(0x80 | cc));
     emit_long(0);
   }
 }
@@ -1466,20 +1468,20 @@ void Assembler::jccb(Condition cc, Label& L) {
 #endif
     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
     // 0111 tttn #8-bit disp
-    emit_byte(0x70 | cc);
-    emit_byte((offs - short_size) & 0xFF);
+    emit_int8(0x70 | cc);
+    emit_int8((offs - short_size) & 0xFF);
   } else {
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0x70 | cc);
-    emit_byte(0);
+    emit_int8(0x70 | cc);
+    emit_int8(0);
   }
 }
 
 void Assembler::jmp(Address adr) {
   InstructionMark im(this);
   prefix(adr);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsp, adr);
 }
 
@@ -1492,10 +1494,10 @@ void Assembler::jmp(Label& L, bool maybe_short) {
     const int long_size = 5;
     intptr_t offs = entry - pc();
     if (maybe_short && is8bit(offs - short_size)) {
-      emit_byte(0xEB);
-      emit_byte((offs - short_size) & 0xFF);
+      emit_int8((unsigned char)0xEB);
+      emit_int8((offs - short_size) & 0xFF);
     } else {
-      emit_byte(0xE9);
+      emit_int8((unsigned char)0xE9);
       emit_long(offs - long_size);
     }
   } else {
@@ -1505,20 +1507,20 @@ void Assembler::jmp(Label& L, bool maybe_short) {
     // force an 8-bit displacement.
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0xE9);
+    emit_int8((unsigned char)0xE9);
     emit_long(0);
   }
 }
 
 void Assembler::jmp(Register entry) {
   int encode = prefix_and_encode(entry->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0xE9);
+  emit_int8((unsigned char)0xE9);
   assert(dest != NULL, "must have a target");
   intptr_t disp = dest - (pc() + sizeof(int32_t));
   assert(is_simm32(disp), "must be 32bit offset (jmp)");
@@ -1539,13 +1541,13 @@ void Assembler::jmpb(Label& L) {
     assert(is8bit(dist), "Dispacement too large for a short jmp");
 #endif
     intptr_t offs = entry - pc();
-    emit_byte(0xEB);
-    emit_byte((offs - short_size) & 0xFF);
+    emit_int8((unsigned char)0xEB);
+    emit_int8((offs - short_size) & 0xFF);
   } else {
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0xEB);
-    emit_byte(0);
+    emit_int8((unsigned char)0xEB);
+    emit_int8(0);
   }
 }
 
@@ -1553,46 +1555,46 @@ void Assembler::ldmxcsr( Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(2), src);
 }
 
 void Assembler::leal(Register dst, Address src) {
   InstructionMark im(this);
 #ifdef _LP64
-  emit_byte(0x67); // addr32
+  emit_int8(0x67); // addr32
   prefix(src, dst);
 #endif // LP64
-  emit_byte(0x8D);
+  emit_int8((unsigned char)0x8D);
   emit_operand(dst, src);
 }
 
 void Assembler::lfence() {
-  emit_byte(0x0F);
-  emit_byte(0xAE);
-  emit_byte(0xE8);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
+  emit_int8((unsigned char)0xE8);
 }
 
 void Assembler::lock() {
-  emit_byte(0xF0);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::lzcntl(Register dst, Register src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Emit mfence instruction
 void Assembler::mfence() {
   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
-  emit_byte( 0x0F );
-  emit_byte( 0xAE );
-  emit_byte( 0xF0 );
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::mov(Register dst, Register src) {
@@ -1612,15 +1614,15 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) {
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
-  emit_byte(0x16);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x16);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movb(Register dst, Address src) {
   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
   InstructionMark im(this);
   prefix(src, dst, true);
-  emit_byte(0x8A);
+  emit_int8((unsigned char)0x8A);
   emit_operand(dst, src);
 }
 
@@ -1628,9 +1630,9 @@ void Assembler::movb(Register dst, Address src) {
 void Assembler::movb(Address dst, int imm8) {
   InstructionMark im(this);
    prefix(dst);
-  emit_byte(0xC6);
+  emit_int8((unsigned char)0xC6);
   emit_operand(rax, dst, 1);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 
@@ -1638,30 +1640,30 @@ void Assembler::movb(Address dst, Register src) {
   assert(src->has_byte_register(), "must have byte register");
   InstructionMark im(this);
   prefix(dst, src, true);
-  emit_byte(0x88);
+  emit_int8((unsigned char)0x88);
   emit_operand(src, dst);
 }
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
-  emit_byte(0x7E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x7E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
+  emit_int8(0x6E);
   emit_operand(dst, src);
 }
 
@@ -1669,7 +1671,7 @@ void Assembler::movdl(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x7E);
+  emit_int8(0x7E);
   emit_operand(src, dst);
 }
 
@@ -1692,7 +1694,7 @@ void Assembler::movdqu(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x7F);
+  emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
@@ -1701,8 +1703,8 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
   assert(UseAVX, "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmovdqu(XMMRegister dst, Address src) {
@@ -1710,7 +1712,7 @@ void Assembler::vmovdqu(XMMRegister dst, Address src) {
   InstructionMark im(this);
   bool vector256 = true;
   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
-  emit_byte(0x6F);
+  emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
@@ -1721,7 +1723,7 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
   // swap src<->dst for encoding
   assert(src != xnoreg, "sanity");
   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
-  emit_byte(0x7F);
+  emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
@@ -1729,27 +1731,27 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
 
 void Assembler::movl(Register dst, int32_t imm32) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_long(imm32);
 }
 
 void Assembler::movl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x8B);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x8B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
   emit_long(imm32);
 }
@@ -1757,7 +1759,7 @@ void Assembler::movl(Address dst, int32_t imm32) {
 void Assembler::movl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
@@ -1771,15 +1773,15 @@ void Assembler::movlpd(XMMRegister dst, Address src) {
 
 void Assembler::movq( MMXRegister dst, Address src ) {
   assert( VM_Version::supports_mmx(), "" );
-  emit_byte(0x0F);
-  emit_byte(0x6F);
+  emit_int8(0x0F);
+  emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::movq( Address dst, MMXRegister src ) {
   assert( VM_Version::supports_mmx(), "" );
-  emit_byte(0x0F);
-  emit_byte(0x7F);
+  emit_int8(0x0F);
+  emit_int8(0x7F);
   // workaround gcc (3.2.1-7a) bug
   // In that version of gcc with only an emit_operand(MMX, Address)
   // gcc will tail jump and try and reverse the parameters completely
@@ -1793,7 +1795,7 @@ void Assembler::movq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x7E);
+  emit_int8(0x7E);
   emit_operand(dst, src);
 }
 
@@ -1801,24 +1803,24 @@ void Assembler::movq(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0xD6);
+  emit_int8((unsigned char)0xD6);
   emit_operand(src, dst);
 }
 
 void Assembler::movsbl(Register dst, Address src) { // movsxb
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
   emit_operand(dst, src);
 }
 
 void Assembler::movsbl(Register dst, Register src) { // movsxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
@@ -1835,7 +1837,7 @@ void Assembler::movsd(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
@@ -1853,93 +1855,93 @@ void Assembler::movss(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movswl(Register dst, Address src) { // movsxw
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBF);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
   emit_operand(dst, src);
 }
 
 void Assembler::movswl(Register dst, Register src) { // movsxw
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movw(Address dst, int imm16) {
   InstructionMark im(this);
 
-  emit_byte(0x66); // switch to 16-bit mode
+  emit_int8(0x66); // switch to 16-bit mode
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 2);
-  emit_word(imm16);
+  emit_int16(imm16);
 }
 
 void Assembler::movw(Register dst, Address src) {
   InstructionMark im(this);
-  emit_byte(0x66);
+  emit_int8(0x66);
   prefix(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movw(Address dst, Register src) {
   InstructionMark im(this);
-  emit_byte(0x66);
+  emit_int8(0x66);
   prefix(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
 void Assembler::movzbl(Register dst, Address src) { // movzxb
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
   emit_operand(dst, src);
 }
 
 void Assembler::movzbl(Register dst, Register src) { // movzxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::movzwl(Register dst, Address src) { // movzxw
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB7);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB7);
   emit_operand(dst, src);
 }
 
 void Assembler::movzwl(Register dst, Register src) { // movzxw
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB7);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB7);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::mull(Address src) {
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0xF7);
+  emit_int8((unsigned char)0xF7);
   emit_operand(rsp, src);
 }
 
 void Assembler::mull(Register src) {
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
@@ -1964,8 +1966,8 @@ void Assembler::mulss(XMMRegister dst, XMMRegister src) {
 
 void Assembler::negl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD8 | encode));
 }
 
 void Assembler::nop(int i) {
@@ -1976,7 +1978,7 @@ void Assembler::nop(int i) {
   // speed is not an issue so simply use the single byte traditional nop
   // to do alignment.
 
-  for (; i > 0 ; i--) emit_byte(0x90);
+  for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
   return;
 
 #endif // ASSERT
@@ -2006,33 +2008,35 @@ void Assembler::nop(int i) {
     while(i >= 15) {
       // For Intel don't generate consecutive addess nops (mix with regular nops)
       i -= 15;
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
       addr_nop_8();
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x90);   // nop
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8((unsigned char)0x90);
+                         // nop
     }
     switch (i) {
       case 14:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 13:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 12:
         addr_nop_8();
-        emit_byte(0x66); // size prefix
-        emit_byte(0x66); // size prefix
-        emit_byte(0x66); // size prefix
-        emit_byte(0x90); // nop
+        emit_int8(0x66); // size prefix
+        emit_int8(0x66); // size prefix
+        emit_int8(0x66); // size prefix
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       case 11:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 10:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 9:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 8:
         addr_nop_8();
         break;
@@ -2040,7 +2044,7 @@ void Assembler::nop(int i) {
         addr_nop_7();
         break;
       case 6:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 5:
         addr_nop_5();
         break;
@@ -2049,11 +2053,12 @@ void Assembler::nop(int i) {
         break;
       case 3:
         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 2:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 1:
-        emit_byte(0x90); // nop
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       default:
         assert(i == 0, " ");
@@ -2086,24 +2091,24 @@ void Assembler::nop(int i) {
 
     while(i >= 22) {
       i -= 11;
-      emit_byte(0x66); // size prefix
-      emit_byte(0x66); // size prefix
-      emit_byte(0x66); // size prefix
+      emit_int8(0x66); // size prefix
+      emit_int8(0x66); // size prefix
+      emit_int8(0x66); // size prefix
       addr_nop_8();
     }
     // Generate first nop for size between 21-12
     switch (i) {
       case 21:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 20:
       case 19:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 18:
       case 17:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 16:
       case 15:
         i -= 8;
@@ -2116,7 +2121,7 @@ void Assembler::nop(int i) {
         break;
       case 12:
         i -= 6;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
         addr_nop_5();
         break;
       default:
@@ -2126,11 +2131,11 @@ void Assembler::nop(int i) {
     // Generate second nop for size between 11-1
     switch (i) {
       case 11:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 10:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 9:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 8:
         addr_nop_8();
         break;
@@ -2138,7 +2143,7 @@ void Assembler::nop(int i) {
         addr_nop_7();
         break;
       case 6:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 5:
         addr_nop_5();
         break;
@@ -2147,11 +2152,12 @@ void Assembler::nop(int i) {
         break;
       case 3:
         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 2:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 1:
-        emit_byte(0x90); // nop
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       default:
         assert(i == 0, " ");
@@ -2174,42 +2180,43 @@ void Assembler::nop(int i) {
   //
   while(i > 12) {
     i -= 4;
-    emit_byte(0x66); // size prefix
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90); // nop
+    emit_int8(0x66); // size prefix
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
+                     // nop
   }
   // 1 - 12 nops
   if(i > 8) {
     if(i > 9) {
       i -= 1;
-      emit_byte(0x66);
+      emit_int8(0x66);
     }
     i -= 3;
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90);
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
   }
   // 1 - 8 nops
   if(i > 4) {
     if(i > 6) {
       i -= 1;
-      emit_byte(0x66);
+      emit_int8(0x66);
     }
     i -= 3;
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90);
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
   }
   switch (i) {
     case 4:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 3:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 2:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 1:
-      emit_byte(0x90);
+      emit_int8((unsigned char)0x90);
       break;
     default:
       assert(i == 0, " ");
@@ -2218,8 +2225,8 @@ void Assembler::nop(int i) {
 
 void Assembler::notl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD0 | encode );
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 void Assembler::orl(Address dst, int32_t imm32) {
@@ -2236,7 +2243,7 @@ void Assembler::orl(Register dst, int32_t imm32) {
 void Assembler::orl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0B);
+  emit_int8(0x0B);
   emit_operand(dst, src);
 }
 
@@ -2260,61 +2267,61 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
-  emit_byte(0x61);
+  emit_int8(0x61);
   emit_operand(dst, src);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
-  emit_byte(0x61);
-  emit_byte(0xC0 | encode);
-  emit_byte(imm8);
+  emit_int8(0x61);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x30);
+  emit_int8(0x30);
   emit_operand(dst, src);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x30);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x30);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // generic
 void Assembler::pop(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0x58 | encode);
+  emit_int8(0x58 | encode);
 }
 
 void Assembler::popcntl(Register dst, Address src) {
   assert(VM_Version::supports_popcnt(), "must support");
   InstructionMark im(this);
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB8);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB8);
   emit_operand(dst, src);
 }
 
 void Assembler::popcntl(Register dst, Register src) {
   assert(VM_Version::supports_popcnt(), "must support");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB8);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::popf() {
-  emit_byte(0x9D);
+  emit_int8((unsigned char)0x9D);
 }
 
 #ifndef _LP64 // no 32bit push/pop on amd64
@@ -2322,21 +2329,21 @@ void Assembler::popl(Address dst) {
   // NOTE: this will adjust stack by 8byte on 64bits
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x8F);
+  emit_int8((unsigned char)0x8F);
   emit_operand(rax, dst);
 }
 #endif
 
 void Assembler::prefetch_prefix(Address src) {
   prefix(src);
-  emit_byte(0x0F);
+  emit_int8(0x0F);
 }
 
 void Assembler::prefetchnta(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rax, src); // 0, src
 }
 
@@ -2344,7 +2351,7 @@ void Assembler::prefetchr(Address src) {
   assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x0D);
+  emit_int8(0x0D);
   emit_operand(rax, src); // 0, src
 }
 
@@ -2352,7 +2359,7 @@ void Assembler::prefetcht0(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rcx, src); // 1, src
 }
 
@@ -2360,7 +2367,7 @@ void Assembler::prefetcht1(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rdx, src); // 2, src
 }
 
@@ -2368,7 +2375,7 @@ void Assembler::prefetcht2(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rbx, src); // 3, src
 }
 
@@ -2376,27 +2383,26 @@ void Assembler::prefetchw(Address src) {
   assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x0D);
+  emit_int8(0x0D);
   emit_operand(rcx, src); // 1, src
 }
 
 void Assembler::prefix(Prefix p) {
-  a_byte(p);
+  emit_int8(p);
 }
 
 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_ssse3(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x00);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x00);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pshufb(XMMRegister dst, Address src) {
   assert(VM_Version::supports_ssse3(), "");
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x00);
+  emit_int8(0x00);
   emit_operand(dst, src);
 }
 
@@ -2404,7 +2410,7 @@ void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 
 }
 
@@ -2414,16 +2420,16 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x70);
+  emit_int8(0x70);
   emit_operand(dst, src);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
@@ -2432,18 +2438,18 @@ void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x70);
+  emit_int8(0x70);
   emit_operand(dst, src);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift);
 }
 
 void Assembler::ptest(XMMRegister dst, Address src) {
@@ -2451,15 +2457,15 @@ void Assembler::ptest(XMMRegister dst, Address src) {
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x17);
+  emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x17);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x17);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
@@ -2492,18 +2498,18 @@ void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
 void Assembler::push(int32_t imm32) {
   // in 64bits we push 64bits onto the stack but only
   // take a 32bit immediate
-  emit_byte(0x68);
+  emit_int8(0x68);
   emit_long(imm32);
 }
 
 void Assembler::push(Register src) {
   int encode = prefix_and_encode(src->encoding());
 
-  emit_byte(0x50 | encode);
+  emit_int8(0x50 | encode);
 }
 
 void Assembler::pushf() {
-  emit_byte(0x9C);
+  emit_int8((unsigned char)0x9C);
 }
 
 #ifndef _LP64 // no 32bit push/pop on amd64
@@ -2511,7 +2517,7 @@ void Assembler::pushl(Address src) {
   // Note this will push 64bit on 64bit
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsi, src);
 }
 #endif
@@ -2520,58 +2526,58 @@ void Assembler::rcll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xD0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xD0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xD0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)0xD0 | encode);
+    emit_int8(imm8);
   }
 }
 
 // copies data from [esi] to [edi] using rcx pointer sized words
 // generic
 void Assembler::rep_mov() {
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   // MOVSQ
   LP64_ONLY(prefix(REX_W));
-  emit_byte(0xA5);
+  emit_int8((unsigned char)0xA5);
 }
 
 // sets rcx pointer sized words with rax, value at [edi]
 // generic
 void Assembler::rep_set() { // rep_set
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   // STOSQ
   LP64_ONLY(prefix(REX_W));
-  emit_byte(0xAB);
+  emit_int8((unsigned char)0xAB);
 }
 
 // scans rcx pointer sized words at [edi] for occurance of rax,
 // generic
 void Assembler::repne_scan() { // repne_scan
-  emit_byte(0xF2);
+  emit_int8((unsigned char)0xF2);
   // SCASQ
   LP64_ONLY(prefix(REX_W));
-  emit_byte(0xAF);
+  emit_int8((unsigned char)0xAF);
 }
 
 #ifdef _LP64
 // scans rcx 4 byte words at [edi] for occurance of rax,
 // generic
 void Assembler::repne_scanl() { // repne_scan
-  emit_byte(0xF2);
+  emit_int8((unsigned char)0xF2);
   // SCASL
-  emit_byte(0xAF);
+  emit_int8((unsigned char)0xAF);
 }
 #endif
 
 void Assembler::ret(int imm16) {
   if (imm16 == 0) {
-    emit_byte(0xC3);
+    emit_int8((unsigned char)0xC3);
   } else {
-    emit_byte(0xC2);
-    emit_word(imm16);
+    emit_int8((unsigned char)0xC2);
+    emit_int16(imm16);
   }
 }
 
@@ -2580,26 +2586,26 @@ void Assembler::sahf() {
   // Not supported in 64bit mode
   ShouldNotReachHere();
 #endif
-  emit_byte(0x9E);
+  emit_int8((unsigned char)0x9E);
 }
 
 void Assembler::sarl(Register dst, int imm8) {
   int encode = prefix_and_encode(dst->encoding());
   assert(isShiftCount(imm8), "illegal shift count");
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xF8 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xF8 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xF8 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xF8 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::sarl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::sbbl(Address dst, int32_t imm32) {
@@ -2617,7 +2623,7 @@ void Assembler::sbbl(Register dst, int32_t imm32) {
 void Assembler::sbbl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x1B);
+  emit_int8(0x1B);
   emit_operand(dst, src);
 }
 
@@ -2629,47 +2635,47 @@ void Assembler::sbbl(Register dst, Register src) {
 void Assembler::setb(Condition cc, Register dst) {
   assert(0 <= cc && cc < 16, "illegal cc");
   int encode = prefix_and_encode(dst->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0x90 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x90 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::shll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
   if (imm8 == 1 ) {
-    emit_byte(0xD1);
-    emit_byte(0xE0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xE0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xE0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xE0 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::shll(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::shrl(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xC1);
-  emit_byte(0xE8 | encode);
-  emit_byte(imm8);
+  emit_int8((unsigned char)0xC1);
+  emit_int8((unsigned char)(0xE8 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::shrl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE8 | encode));
 }
 
 // copies a single word from [esi] to [edi]
 void Assembler::smovl() {
-  emit_byte(0xA5);
+  emit_int8((unsigned char)0xA5);
 }
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
@@ -2688,7 +2694,7 @@ void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
 }
 
 void Assembler::std() {
-  emit_byte(0xfd);
+  emit_int8((unsigned char)0xFD);
 }
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
@@ -2700,8 +2706,8 @@ void Assembler::stmxcsr( Address dst) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(3), dst);
 }
 
@@ -2714,7 +2720,7 @@ void Assembler::subl(Address dst, int32_t imm32) {
 void Assembler::subl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x29);
+  emit_int8(0x29);
   emit_operand(src, dst);
 }
 
@@ -2732,7 +2738,7 @@ void Assembler::subl_imm32(Register dst, int32_t imm32) {
 void Assembler::subl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x2B);
+  emit_int8(0x2B);
   emit_operand(dst, src);
 }
 
@@ -2773,11 +2779,11 @@ void Assembler::testl(Register dst, int32_t imm32) {
   // 8bit operands
   int encode = dst->encoding();
   if (encode == 0) {
-    emit_byte(0xA9);
+    emit_int8((unsigned char)0xA9);
   } else {
     encode = prefix_and_encode(encode);
-    emit_byte(0xF7);
-    emit_byte(0xC0 | encode);
+    emit_int8((unsigned char)0xF7);
+    emit_int8((unsigned char)(0xC0 | encode));
   }
   emit_long(imm32);
 }
@@ -2790,7 +2796,7 @@ void Assembler::testl(Register dst, Register src) {
 void Assembler::testl(Register dst, Address  src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x85);
+  emit_int8((unsigned char)0x85);
   emit_operand(dst, src);
 }
 
@@ -2818,28 +2824,28 @@ void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
 void Assembler::xaddl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x0F);
-  emit_byte(0xC1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC1);
   emit_operand(src, dst);
 }
 
 void Assembler::xchgl(Register dst, Address src) { // xchg
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x87);
+  emit_int8((unsigned char)0x87);
   emit_operand(dst, src);
 }
 
 void Assembler::xchgl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x87);
-  emit_byte(0xc0 | encode);
+  emit_int8((unsigned char)0x87);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xgetbv() {
-  emit_byte(0x0F);
-  emit_byte(0x01);
-  emit_byte(0xD0);
+  emit_int8(0x0F);
+  emit_int8(0x01);
+  emit_int8((unsigned char)0xD0);
 }
 
 void Assembler::xorl(Register dst, int32_t imm32) {
@@ -2850,7 +2856,7 @@ void Assembler::xorl(Register dst, int32_t imm32) {
 void Assembler::xorl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x33);
+  emit_int8(0x33);
   emit_operand(dst, src);
 }
 
@@ -3276,8 +3282,8 @@ void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x40);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x40);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
@@ -3288,8 +3294,8 @@ void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
-  emit_byte(0x40);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x40);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
@@ -3303,7 +3309,7 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vect
   int dst_enc = dst->encoding();
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
-  emit_byte(0x40);
+  emit_int8(0x40);
   emit_operand(dst, src);
 }
 
@@ -3312,27 +3318,27 @@ void Assembler::psllw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::pslld(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllq(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
@@ -3354,21 +3360,21 @@ void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector2
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
   emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3391,18 +3397,18 @@ void Assembler::psrlw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrld(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlq(XMMRegister dst, int shift) {
@@ -3411,9 +3417,9 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
@@ -3435,21 +3441,21 @@ void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector2
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3472,18 +3478,18 @@ void Assembler::psraw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrad(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
@@ -3500,14 +3506,14 @@ void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector2
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3572,11 +3578,11 @@ void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
   assert(VM_Version::supports_avx(), "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
-  emit_byte(0x18);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x18);
+  emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
@@ -3587,10 +3593,10 @@ void Assembler::vinsertf128h(XMMRegister dst, Address src) {
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
@@ -3600,21 +3606,21 @@ void Assembler::vextractf128h(Address dst, XMMRegister src) {
   assert(src != xnoreg, "sanity");
   int src_enc = src->encoding();
   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x19);
+  emit_int8(0x19);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
-  emit_byte(0x38);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x38);
+  emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, Address src) {
@@ -3625,10 +3631,10 @@ void Assembler::vinserti128h(XMMRegister dst, Address src) {
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x38);
+  emit_int8(0x38);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vextracti128h(Address dst, XMMRegister src) {
@@ -3638,16 +3644,16 @@ void Assembler::vextracti128h(Address dst, XMMRegister src) {
   assert(src != xnoreg, "sanity");
   int src_enc = src->encoding();
   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x39);
+  emit_int8(0x39);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
-  emit_byte(0x77);
+  emit_int8(0x77);
 }
 
 
@@ -3657,15 +3663,15 @@ void Assembler::vzeroupper() {
 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
   // NO PREFIX AS NEVER 64BIT
   InstructionMark im(this);
-  emit_byte(0x81);
-  emit_byte(0xF8 | src1->encoding());
+  emit_int8((unsigned char)0x81);
+  emit_int8((unsigned char)(0xF8 | src1->encoding()));
   emit_data(imm32, rspec, 0);
 }
 
 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
   InstructionMark im(this);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, src1);
   emit_data(imm32, rspec, 0);
 }
@@ -3675,14 +3681,14 @@ void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder cons
 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
 void Assembler::cmpxchg8(Address adr) {
   InstructionMark im(this);
-  emit_byte(0x0F);
-  emit_byte(0xc7);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rcx, adr);
 }
 
 void Assembler::decl(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementl() instead.
- emit_byte(0x48 | dst->encoding());
+ emit_int8(0x48 | dst->encoding());
 }
 
 #endif // _LP64
@@ -3690,8 +3696,8 @@ void Assembler::decl(Register dst) {
 // 64bit typically doesn't use the x87 but needs to for the trig funcs
 
 void Assembler::fabs() {
-  emit_byte(0xD9);
-  emit_byte(0xE1);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE1);
 }
 
 void Assembler::fadd(int i) {
@@ -3700,13 +3706,13 @@ void Assembler::fadd(int i) {
 
 void Assembler::fadd_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rax, src);
 }
 
 void Assembler::fadd_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rax, src);
 }
 
@@ -3719,8 +3725,8 @@ void Assembler::faddp(int i) {
 }
 
 void Assembler::fchs() {
-  emit_byte(0xD9);
-  emit_byte(0xE0);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE0);
 }
 
 void Assembler::fcom(int i) {
@@ -3733,29 +3739,29 @@ void Assembler::fcomp(int i) {
 
 void Assembler::fcomp_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rbx, src);
 }
 
 void Assembler::fcomp_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rbx, src);
 }
 
 void Assembler::fcompp() {
-  emit_byte(0xDE);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xDE);
+  emit_int8((unsigned char)0xD9);
 }
 
 void Assembler::fcos() {
-  emit_byte(0xD9);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFF);
 }
 
 void Assembler::fdecstp() {
-  emit_byte(0xD9);
-  emit_byte(0xF6);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF6);
 }
 
 void Assembler::fdiv(int i) {
@@ -3764,13 +3770,13 @@ void Assembler::fdiv(int i) {
 
 void Assembler::fdiv_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rsi, src);
 }
 
 void Assembler::fdiv_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rsi, src);
 }
 
@@ -3791,13 +3797,13 @@ void Assembler::fdivr(int i) {
 
 void Assembler::fdivr_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rdi, src);
 }
 
 void Assembler::fdivr_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rdi, src);
 }
 
@@ -3815,59 +3821,59 @@ void Assembler::ffree(int i) {
 
 void Assembler::fild_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDF);
+  emit_int8((unsigned char)0xDF);
   emit_operand32(rbp, adr);
 }
 
 void Assembler::fild_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rax, adr);
 }
 
 void Assembler::fincstp() {
-  emit_byte(0xD9);
-  emit_byte(0xF7);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF7);
 }
 
 void Assembler::finit() {
-  emit_byte(0x9B);
-  emit_byte(0xDB);
-  emit_byte(0xE3);
+  emit_int8((unsigned char)0x9B);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)0xE3);
 }
 
 void Assembler::fist_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fistp_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDF);
+  emit_int8((unsigned char)0xDF);
   emit_operand32(rdi, adr);
 }
 
 void Assembler::fistp_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rbx, adr);
 }
 
 void Assembler::fld1() {
-  emit_byte(0xD9);
-  emit_byte(0xE8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE8);
 }
 
 void Assembler::fld_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rax, adr);
 }
 
 void Assembler::fld_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rax, adr);
 }
 
@@ -3878,35 +3884,35 @@ void Assembler::fld_s(int index) {
 
 void Assembler::fld_x(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rbp, adr);
 }
 
 void Assembler::fldcw(Address src) {
   InstructionMark im(this);
-  emit_byte(0xd9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rbp, src);
 }
 
 void Assembler::fldenv(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fldlg2() {
-  emit_byte(0xD9);
-  emit_byte(0xEC);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEC);
 }
 
 void Assembler::fldln2() {
-  emit_byte(0xD9);
-  emit_byte(0xED);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xED);
 }
 
 void Assembler::fldz() {
-  emit_byte(0xD9);
-  emit_byte(0xEE);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEE);
 }
 
 void Assembler::flog() {
@@ -3927,13 +3933,13 @@ void Assembler::fmul(int i) {
 
 void Assembler::fmul_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rcx, src);
 }
 
 void Assembler::fmul_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rcx, src);
 }
 
@@ -3947,63 +3953,63 @@ void Assembler::fmulp(int i) {
 
 void Assembler::fnsave(Address dst) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rsi, dst);
 }
 
 void Assembler::fnstcw(Address src) {
   InstructionMark im(this);
-  emit_byte(0x9B);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0x9B);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rdi, src);
 }
 
 void Assembler::fnstsw_ax() {
-  emit_byte(0xdF);
-  emit_byte(0xE0);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)0xE0);
 }
 
 void Assembler::fprem() {
-  emit_byte(0xD9);
-  emit_byte(0xF8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF8);
 }
 
 void Assembler::fprem1() {
-  emit_byte(0xD9);
-  emit_byte(0xF5);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF5);
 }
 
 void Assembler::frstor(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fsin() {
-  emit_byte(0xD9);
-  emit_byte(0xFE);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFE);
 }
 
 void Assembler::fsqrt() {
-  emit_byte(0xD9);
-  emit_byte(0xFA);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFA);
 }
 
 void Assembler::fst_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fst_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fstp_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rbx, adr);
 }
 
@@ -4013,13 +4019,13 @@ void Assembler::fstp_d(int index) {
 
 void Assembler::fstp_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rbx, adr);
 }
 
 void Assembler::fstp_x(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rdi, adr);
 }
 
@@ -4029,13 +4035,13 @@ void Assembler::fsub(int i) {
 
 void Assembler::fsub_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fsub_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rsp, src);
 }
 
@@ -4053,13 +4059,13 @@ void Assembler::fsubr(int i) {
 
 void Assembler::fsubr_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rbp, src);
 }
 
 void Assembler::fsubr_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rbp, src);
 }
 
@@ -4072,15 +4078,15 @@ void Assembler::fsubrp(int i) {
 }
 
 void Assembler::ftan() {
-  emit_byte(0xD9);
-  emit_byte(0xF2);
-  emit_byte(0xDD);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)0xDD);
+  emit_int8((unsigned char)0xD8);
 }
 
 void Assembler::ftst() {
-  emit_byte(0xD9);
-  emit_byte(0xE4);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE4);
 }
 
 void Assembler::fucomi(int i) {
@@ -4096,7 +4102,7 @@ void Assembler::fucomip(int i) {
 }
 
 void Assembler::fwait() {
-  emit_byte(0x9B);
+  emit_int8((unsigned char)0x9B);
 }
 
 void Assembler::fxch(int i) {
@@ -4104,23 +4110,23 @@ void Assembler::fxch(int i) {
 }
 
 void Assembler::fyl2x() {
-  emit_byte(0xD9);
-  emit_byte(0xF1);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF1);
 }
 
 void Assembler::frndint() {
-  emit_byte(0xD9);
-  emit_byte(0xFC);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFC);
 }
 
 void Assembler::f2xm1() {
-  emit_byte(0xD9);
-  emit_byte(0xF0);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::fldl2e() {
-  emit_byte(0xD9);
-  emit_byte(0xEA);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEA);
 }
 
 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
@@ -4131,7 +4137,7 @@ static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
   if (pre > 0) {
-    emit_byte(simd_pre[pre]);
+    emit_int8(simd_pre[pre]);
   }
   if (rex_w) {
     prefixq(adr, xreg);
@@ -4139,25 +4145,25 @@ void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, Vex
     prefix(adr, xreg);
   }
   if (opc > 0) {
-    emit_byte(0x0F);
+    emit_int8(0x0F);
     int opc2 = simd_opc[opc];
     if (opc2 > 0) {
-      emit_byte(opc2);
+      emit_int8(opc2);
     }
   }
 }
 
 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
   if (pre > 0) {
-    emit_byte(simd_pre[pre]);
+    emit_int8(simd_pre[pre]);
   }
   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
                           prefix_and_encode(dst_enc, src_enc);
   if (opc > 0) {
-    emit_byte(0x0F);
+    emit_int8(0x0F);
     int opc2 = simd_opc[opc];
     if (opc2 > 0) {
-      emit_byte(opc2);
+      emit_int8(opc2);
     }
   }
   return encode;
@@ -4171,11 +4177,11 @@ void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int n
     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
     byte1 = (~byte1) & 0xE0;
     byte1 |= opc;
-    a_byte(byte1);
+    emit_int8(byte1);
 
     int byte2 = ((~nds_enc) & 0xf) << 3;
     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
-    emit_byte(byte2);
+    emit_int8(byte2);
   } else {
     prefix(VEX_2bytes);
 
@@ -4183,7 +4189,7 @@ void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int n
     byte1 = (~byte1) & 0x80;
     byte1 |= ((~nds_enc) & 0xf) << 3;
     byte1 |= (vector256 ? 4 : 0) | pre;
-    emit_byte(byte1);
+    emit_int8(byte1);
   }
 }
 
@@ -4229,28 +4235,28 @@ int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegis
 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
   InstructionMark im(this);
   simd_prefix(dst, dst, src, pre);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
   int encode = simd_prefix_and_encode(dst, dst, src, pre);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Versions with no second source register (non-destructive source).
 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
   InstructionMark im(this);
   simd_prefix(dst, xnoreg, src, pre);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // 3-operands AVX instructions
@@ -4258,22 +4264,22 @@ void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
                                Address src, VexSimdPrefix pre, bool vector256) {
   InstructionMark im(this);
   vex_prefix(dst, nds, src, pre, vector256);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
                                XMMRegister src, VexSimdPrefix pre, bool vector256) {
   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementl() instead.
-  emit_byte(0x40 | dst->encoding());
+  emit_int8(0x40 | dst->encoding());
 }
 
 void Assembler::lea(Register dst, Address src) {
@@ -4282,7 +4288,7 @@ void Assembler::lea(Register dst, Address src) {
 
 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst);
   emit_data((int)imm32, rspec, 0);
 }
@@ -4290,49 +4296,49 @@ void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder cons
 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_data((int)imm32, rspec, 0);
 }
 
 void Assembler::popa() { // 32bit
-  emit_byte(0x61);
+  emit_int8(0x61);
 }
 
 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0x68);
+  emit_int8(0x68);
   emit_data(imm32, rspec, 0);
 }
 
 void Assembler::pusha() { // 32bit
-  emit_byte(0x60);
+  emit_int8(0x60);
 }
 
 void Assembler::set_byte_if_not_zero(Register dst) {
-  emit_byte(0x0F);
-  emit_byte(0x95);
-  emit_byte(0xE0 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x95);
+  emit_int8((unsigned char)(0xE0 | dst->encoding()));
 }
 
 void Assembler::shldl(Register dst, Register src) {
-  emit_byte(0x0F);
-  emit_byte(0xA5);
-  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA5);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 
 void Assembler::shrdl(Register dst, Register src) {
-  emit_byte(0x0F);
-  emit_byte(0xAD);
-  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAD);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 
 #else // LP64
 
 void Assembler::set_byte_if_not_zero(Register dst) {
   int enc = prefix_and_encode(dst->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0x95);
-  emit_byte(0xE0 | enc);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x95);
+  emit_int8((unsigned char)(0xE0 | enc));
 }
 
 // 64bit only pieces of the assembler
@@ -4670,7 +4676,7 @@ void Assembler::adcq(Register dst, int32_t imm32) {
 void Assembler::adcq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x13);
+  emit_int8(0x13);
   emit_operand(dst, src);
 }
 
@@ -4688,7 +4694,7 @@ void Assembler::addq(Address dst, int32_t imm32) {
 void Assembler::addq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x01);
+  emit_int8(0x01);
   emit_operand(src, dst);
 }
 
@@ -4700,7 +4706,7 @@ void Assembler::addq(Register dst, int32_t imm32) {
 void Assembler::addq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x03);
+  emit_int8(0x03);
   emit_operand(dst, src);
 }
 
@@ -4712,7 +4718,7 @@ void Assembler::addq(Register dst, Register src) {
 void Assembler::andq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rsp, dst, 4);
   emit_long(imm32);
 }
@@ -4725,7 +4731,7 @@ void Assembler::andq(Register dst, int32_t imm32) {
 void Assembler::andq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x23);
+  emit_int8(0x23);
   emit_operand(dst, src);
 }
 
@@ -4736,56 +4742,56 @@ void Assembler::andq(Register dst, Register src) {
 
 void Assembler::bsfq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBC);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bsrq(Register dst, Register src) {
   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bswapq(Register reg) {
   int encode = prefixq_and_encode(reg->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xC8 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::cdqq() {
   prefix(REX_W);
-  emit_byte(0x99);
+  emit_int8((unsigned char)0x99);
 }
 
 void Assembler::clflush(Address adr) {
   prefix(adr);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(rdi, adr);
 }
 
 void Assembler::cmovq(Condition cc, Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cmovq(Condition cc, Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 4);
   emit_long(imm32);
 }
@@ -4798,7 +4804,7 @@ void Assembler::cmpq(Register dst, int32_t imm32) {
 void Assembler::cmpq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x3B);
+  emit_int8(0x3B);
   emit_operand(src, dst);
 }
 
@@ -4810,122 +4816,122 @@ void Assembler::cmpq(Register dst, Register src) {
 void Assembler::cmpq(Register dst, Address  src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x3B);
+  emit_int8(0x3B);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpxchgq(Register reg, Address adr) {
   InstructionMark im(this);
   prefixq(adr, reg);
-  emit_byte(0x0F);
-  emit_byte(0xB1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB1);
   emit_operand(reg, adr);
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
+  emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
+  emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::decl(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementl() instead.
   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC8 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::decq(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementq() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC8 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8(0xC8 | encode);
 }
 
 void Assembler::decq(Address dst) {
   // Don't use it directly. Use MacroAssembler::decrementq() instead.
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rcx, dst);
 }
 
 void Assembler::fxrstor(Address src) {
   prefixq(src);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(1), src);
 }
 
 void Assembler::fxsave(Address dst) {
   prefixq(dst);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(0), dst);
 }
 
 void Assembler::idivq(Register src) {
   int encode = prefixq_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::imulq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xAF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::imulq(Register dst, Register src, int value) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
   if (is8bit(value)) {
-    emit_byte(0x6B);
-    emit_byte(0xC0 | encode);
-    emit_byte(value & 0xFF);
+    emit_int8(0x6B);
+    emit_int8((unsigned char)(0xC0 | encode));
+    emit_int8(value & 0xFF);
   } else {
-    emit_byte(0x69);
-    emit_byte(0xC0 | encode);
+    emit_int8(0x69);
+    emit_int8((unsigned char)(0xC0 | encode));
     emit_long(value);
   }
 }
@@ -4934,23 +4940,23 @@ void Assembler::incl(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementl() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::incq(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementq() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::incq(Address dst) {
   // Don't use it directly. Use MacroAssembler::incrementq() instead.
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rax, dst);
 }
 
@@ -4961,35 +4967,35 @@ void Assembler::lea(Register dst, Address src) {
 void Assembler::leaq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x8D);
+  emit_int8((unsigned char)0x8D);
   emit_operand(dst, src);
 }
 
 void Assembler::mov64(Register dst, int64_t imm64) {
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_int64(imm64);
 }
 
 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8(0xB8 | encode);
   emit_data64(imm64, rspec);
 }
 
 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
@@ -4997,34 +5003,34 @@ void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder con
 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(src1->encoding());
-  emit_byte(0x81);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0x81);
+  emit_int8((unsigned char)(0xF8 | encode));
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   prefix(src1);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rax, src1, 4);
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::lzcntq(Register dst, Register src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdq(Register dst, XMMRegister src) {
@@ -5032,43 +5038,43 @@ void Assembler::movdq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
-  emit_byte(0x7E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x7E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x8B);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x8B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
 void Assembler::movsbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
   emit_operand(dst, src);
 }
 
 void Assembler::movsbq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBE);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movslq(Register dst, int32_t imm32) {
@@ -5078,7 +5084,7 @@ void Assembler::movslq(Register dst, int32_t imm32) {
   ShouldNotReachHere();
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xC7 | encode);
+  emit_int8((unsigned char)(0xC7 | encode));
   emit_long(imm32);
 }
 
@@ -5086,7 +5092,7 @@ void Assembler::movslq(Address dst, int32_t imm32) {
   assert(is_simm32(imm32), "lost bits");
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
   emit_long(imm32);
 }
@@ -5094,77 +5100,77 @@ void Assembler::movslq(Address dst, int32_t imm32) {
 void Assembler::movslq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x63);
+  emit_int8(0x63);
   emit_operand(dst, src);
 }
 
 void Assembler::movslq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x63);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x63);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movswq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBF);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
   emit_operand(dst, src);
 }
 
 void Assembler::movswq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xBF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movzbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB6);
   emit_operand(dst, src);
 }
 
 void Assembler::movzbq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB6);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::movzwq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB7);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB7);
   emit_operand(dst, src);
 }
 
 void Assembler::movzwq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB7);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB7);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::negq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD8 | encode));
 }
 
 void Assembler::notq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 void Assembler::orq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rcx, dst, 4);
   emit_long(imm32);
 }
@@ -5177,7 +5183,7 @@ void Assembler::orq(Register dst, int32_t imm32) {
 void Assembler::orq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0B);
+  emit_int8(0x0B);
   emit_operand(dst, src);
 }
 
@@ -5210,26 +5216,26 @@ void Assembler::popa() { // 64bit
 void Assembler::popcntq(Register dst, Address src) {
   assert(VM_Version::supports_popcnt(), "must support");
   InstructionMark im(this);
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB8);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB8);
   emit_operand(dst, src);
 }
 
 void Assembler::popcntq(Register dst, Register src) {
   assert(VM_Version::supports_popcnt(), "must support");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB8);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::popq(Address dst) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x8F);
+  emit_int8((unsigned char)0x8F);
   emit_operand(rax, dst);
 }
 
@@ -5261,7 +5267,7 @@ void Assembler::pusha() { // 64bit
 void Assembler::pushq(Address src) {
   InstructionMark im(this);
   prefixq(src);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsi, src);
 }
 
@@ -5269,31 +5275,31 @@ void Assembler::rclq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xD0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xD0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xD0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xD0 | encode));
+    emit_int8(imm8);
   }
 }
 void Assembler::sarq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xF8 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xF8 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xF8 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xF8 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::sarq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::sbbq(Address dst, int32_t imm32) {
@@ -5310,7 +5316,7 @@ void Assembler::sbbq(Register dst, int32_t imm32) {
 void Assembler::sbbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x1B);
+  emit_int8(0x1B);
   emit_operand(dst, src);
 }
 
@@ -5323,33 +5329,33 @@ void Assembler::shlq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xE0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xE0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xE0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xE0 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::shlq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::shrq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xC1);
-  emit_byte(0xE8 | encode);
-  emit_byte(imm8);
+  emit_int8((unsigned char)0xC1);
+  emit_int8((unsigned char)(0xE8 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::shrq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8(0xE8 | encode);
 }
 
 void Assembler::subq(Address dst, int32_t imm32) {
@@ -5361,7 +5367,7 @@ void Assembler::subq(Address dst, int32_t imm32) {
 void Assembler::subq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x29);
+  emit_int8(0x29);
   emit_operand(src, dst);
 }
 
@@ -5379,7 +5385,7 @@ void Assembler::subq_imm32(Register dst, int32_t imm32) {
 void Assembler::subq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x2B);
+  emit_int8(0x2B);
   emit_operand(dst, src);
 }
 
@@ -5395,11 +5401,11 @@ void Assembler::testq(Register dst, int32_t imm32) {
   int encode = dst->encoding();
   if (encode == 0) {
     prefix(REX_W);
-    emit_byte(0xA9);
+    emit_int8((unsigned char)0xA9);
   } else {
     encode = prefixq_and_encode(encode);
-    emit_byte(0xF7);
-    emit_byte(0xC0 | encode);
+    emit_int8((unsigned char)0xF7);
+    emit_int8((unsigned char)(0xC0 | encode));
   }
   emit_long(imm32);
 }
@@ -5412,22 +5418,22 @@ void Assembler::testq(Register dst, Register src) {
 void Assembler::xaddq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x0F);
-  emit_byte(0xC1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC1);
   emit_operand(src, dst);
 }
 
 void Assembler::xchgq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x87);
+  emit_int8((unsigned char)0x87);
   emit_operand(dst, src);
 }
 
 void Assembler::xchgq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x87);
-  emit_byte(0xc0 | encode);
+  emit_int8((unsigned char)0x87);
+  emit_int8((unsigned char)(0xc0 | encode));
 }
 
 void Assembler::xorq(Register dst, Register src) {
@@ -5438,7 +5444,7 @@ void Assembler::xorq(Register dst, Register src) {
 void Assembler::xorq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x33);
+  emit_int8(0x33);
   emit_operand(dst, src);
 }
 
diff --git a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
index be2c1097a45920148ce351b73910238c5e0ba1e7..53c7cbacd1c28a8c54c5281fc518eaeb17af2ac5 100644
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
@@ -313,10 +313,10 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
 #endif
   } else {
     // make a copy the code which is going to be patched.
-    for ( int i = 0; i < _bytes_to_copy; i++) {
+    for (int i = 0; i < _bytes_to_copy; i++) {
       address ptr = (address)(_pc_start + i);
       int a_byte = (*ptr) & 0xFF;
-      __ a_byte (a_byte);
+      __ emit_int8(a_byte);
       *ptr = 0x90; // make the site look like a nop
     }
   }
@@ -363,11 +363,11 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
   // emit the offsets needed to find the code to patch
   int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record;
 
-  __ a_byte(0xB8);
-  __ a_byte(0);
-  __ a_byte(being_initialized_entry_offset);
-  __ a_byte(bytes_to_skip);
-  __ a_byte(_bytes_to_copy);
+  __ emit_int8((unsigned char)0xB8);
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
   address patch_info_pc = __ pc();
   assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
 
diff --git a/src/cpu/x86/vm/cppInterpreter_x86.cpp b/src/cpu/x86/vm/cppInterpreter_x86.cpp
index 946c400e59021d8bddde3016d0b02f13cfc330da..9c3a2f31aebab64a2e96af8b1c55e97f36443239 100644
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp
@@ -611,8 +611,6 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
   // C++ interpreter only
   // rsi/r13 - previous interpreter state pointer
 
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-
   // InterpreterRuntime::frequency_counter_overflow takes one argument
   // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp).
   // The call returns the address of the verified entry point for the method or NULL
@@ -977,15 +975,16 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
   //      to save/restore.
   address entry_point = __ pc();
 
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-  const Address size_of_locals    (rbx, Method::size_of_locals_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset());
 
   // rsi/r13 == state/locals rdi == prevstate
   const Register locals = rdi;
 
   // get parameter size (always needed)
+  __ movptr(rcx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: Method*
@@ -994,6 +993,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
   // for natives the size of locals is zero
 
   // compute beginning of parameters /locals
+
   __ lea(locals, Address(rsp, rcx, Address::times_ptr, -wordSize));
 
   // initialize fixed part of activation frame
@@ -1107,11 +1107,14 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
   const Register method = rbx;
   const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rdi);
   const Register t      = InterpreterRuntime::SignatureHandlerGenerator::temp();    // rcx|rscratch1
+  const Address constMethod       (method, Method::const_offset());
+  const Address size_of_parameters(t, ConstMethod::size_of_parameters_offset());
 
   // allocate space for parameters
   __ movptr(method, STATE(_method));
   __ verify_method_ptr(method);
-  __ load_unsigned_short(t, Address(method, Method::size_of_parameters_offset()));
+  __ movptr(t, constMethod);
+  __ load_unsigned_short(t, size_of_parameters);
   __ shll(t, 2);
 #ifdef _LP64
   __ subptr(rsp, t);
@@ -1700,15 +1703,17 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
   // save sender sp
   __ push(rcx);
 
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-  const Address size_of_locals    (rbx, Method::size_of_locals_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rdx, ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals    (rdx, ConstMethod::size_of_locals_offset());
 
   // const Address monitor_block_top (rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
   // const Address monitor_block_bot (rbp, frame::interpreter_frame_initial_sp_offset        * wordSize);
   // const Address monitor(rbp, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
 
   // get parameter size (always needed)
+  __ movptr(rdx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: Method*
@@ -1989,7 +1994,9 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
   __ movptr(rbx, STATE(_result._to_call._callee));
 
   // callee left args on top of expression stack, remove them
-  __ load_unsigned_short(rcx, Address(rbx, Method::size_of_parameters_offset()));
+  __ movptr(rcx, constMethod);
+  __ load_unsigned_short(rcx, Address(rcx, ConstMethod::size_of_parameters_offset()));
+
   __ lea(rsp, Address(rsp, rcx, Address::times_ptr));
 
   __ movl(rcx, Address(rbx, Method::result_index_offset()));
@@ -2159,7 +2166,9 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
   // Make it look like call_stub calling conventions
 
   // Get (potential) receiver
-  __ load_unsigned_short(rcx, size_of_parameters);                   // get size of parameters in words
+  // get size of parameters in words
+  __ movptr(rcx, constMethod);
+  __ load_unsigned_short(rcx, Address(rcx, ConstMethod::size_of_parameters_offset()));
 
   ExternalAddress recursive(CAST_FROM_FN_PTR(address, RecursiveInterpreterActivation));
   __ pushptr(recursive.addr());                                      // make it look good in the debugger
diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp
index fa2d7fa4376d292be3a0bac2edf5e2add5549bd1..b9d85636cf88c6c27614423506865482291de16a 100644
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -1023,7 +1023,7 @@ void MacroAssembler::lea(Address dst, AddressLiteral adr) {
 
 void MacroAssembler::leave() {
   // %%% is this really better? Why not on 32bit too?
-  emit_byte(0xC9); // LEAVE
+  emit_int8((unsigned char)0xC9); // LEAVE
 }
 
 void MacroAssembler::lneg(Register hi, Register lo) {
@@ -2112,11 +2112,11 @@ void MacroAssembler::fat_nop() {
   if (UseAddressNop) {
     addr_nop_5();
   } else {
-    emit_byte(0x26); // es:
-    emit_byte(0x2e); // cs:
-    emit_byte(0x64); // fs:
-    emit_byte(0x65); // gs:
-    emit_byte(0x90);
+    emit_int8(0x26); // es:
+    emit_int8(0x2e); // cs:
+    emit_int8(0x64); // fs:
+    emit_int8(0x65); // gs:
+    emit_int8((unsigned char)0x90);
   }
 }
 
@@ -2534,12 +2534,12 @@ void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
     int offs = (intptr_t)dst.target() - ((intptr_t)pc());
     if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
       // 0111 tttn #8-bit disp
-      emit_byte(0x70 | cc);
-      emit_byte((offs - short_size) & 0xFF);
+      emit_int8(0x70 | cc);
+      emit_int8((offs - short_size) & 0xFF);
     } else {
       // 0000 1111 1000 tttn #32-bit disp
-      emit_byte(0x0F);
-      emit_byte(0x80 | cc);
+      emit_int8(0x0F);
+      emit_int8((unsigned char)(0x80 | cc));
       emit_long(offs - long_size);
     }
   } else {
@@ -3085,7 +3085,8 @@ void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
 
 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
   // Used in sign-bit flipping with aligned address.
-  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  bool aligned_adr = (((intptr_t)src.target() & 15) == 0);
+  assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes");
   if (reachable(src)) {
     Assembler::pshufb(dst, as_Address(src));
   } else {
diff --git a/src/cpu/x86/vm/macroAssembler_x86.hpp b/src/cpu/x86/vm/macroAssembler_x86.hpp
index 36d4d2311550b430a945cbb6a7ef19fbc776cc3a..10eab0c8a4fe648bd06e01acee507adc517f3326 100644
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp
@@ -126,25 +126,6 @@ class MacroAssembler: public Assembler {
     }
   }
 
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch) {
-    const char* s;
-    unsigned char op = branch[0];
-    if (op == 0xE8) {
-      s = "call";
-    } else if (op == 0xE9 || op == 0xEB) {
-      s = "jmp";
-    } else if ((op & 0xF0) == 0x70) {
-      s = "jcc";
-    } else if (op == 0x0F) {
-      s = "jcc";
-    } else {
-      s = "????";
-    }
-    tty->print("%s (unresolved)", s);
-  }
-#endif
-
   // The following 4 methods return the offset of the appropriate move instruction
 
   // Support for fast byte/short loading with zero extension (depending on particular CPU)
diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp
index 7da3a2c423ab87650843b8056877a495ac45b9bf..95d5123f406fb3d6a2605359f5181384b0ec31ab 100644
--- a/src/cpu/x86/vm/methodHandles_x86.cpp
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp
@@ -169,8 +169,9 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
 
   if (VerifyMethodHandles && !for_compiler_entry) {
     // make sure recv is already on stack
+    __ movptr(temp2, Address(method_temp, Method::const_offset()));
     __ load_sized_value(temp2,
-                        Address(method_temp, Method::size_of_parameters_offset()),
+                        Address(temp2, ConstMethod::size_of_parameters_offset()),
                         sizeof(u2), /*is_signed*/ false);
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
     Label L;
@@ -234,8 +235,9 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
   int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
   assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
   if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ movptr(rdx_argp, Address(rbx_method, Method::const_offset()));
     __ load_sized_value(rdx_argp,
-                        Address(rbx_method, Method::size_of_parameters_offset()),
+                        Address(rdx_argp, ConstMethod::size_of_parameters_offset()),
                         sizeof(u2), /*is_signed*/ false);
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
     rdx_first_arg_addr = __ argument_address(rdx_argp, -1);
diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index 52e3f4169af3363248920ce0b244f27cef0cf7c6..3bf5dc5e0fcc8ede911feb11c74e58eb502e20e7 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -2174,13 +2174,13 @@ class StubGenerator: public StubCodeGenerator {
   //   c_rarg2   - K (key) in little endian int array
   //
   address generate_aescrypt_encryptBlock() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
     Label L_doLast;
     address start = __ pc();
 
-    const Register from        = rsi;      // source array address
+    const Register from        = rdx;      // source array address
     const Register to          = rdx;      // destination array address
     const Register key         = rcx;      // key array address
     const Register keylen      = rax;
@@ -2189,47 +2189,74 @@ class StubGenerator: public StubCodeGenerator {
     const Address  key_param (rbp, 8+8);
 
     const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    const XMMRegister xmm_key_shuf_mask = xmm2;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
 
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ movptr(from , from_param);
-    __ movptr(to   , to_param);
-    __ movptr(key  , key_param);
+    __ enter();   // required for proper stackwalking of RuntimeStub frame
+    __ movptr(from, from_param);
+    __ movptr(key, key_param);
 
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    // keylen = # of 32-bit words, convert to 128-bit words
-    __ shrl(keylen, 2);
-    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
 
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
+    __ movptr(to, to_param);
 
     // For encryption, the java expanded key ordering is just what we need
 
-    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
-    __ pxor(xmm_result, xmm_temp);
-    for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
-      aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
-    }
-    load_key  (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
-    __ cmpl(keylen, 0);
-    __ jcc(Assembler::equal, L_doLast);
-    __ aesenc(xmm_result, xmm_temp);                   // only in 192 and 256 bit keys
-    aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
-    load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
-    __ subl(keylen, 2);
-    __ jcc(Assembler::equal, L_doLast);
-    __ aesenc(xmm_result, xmm_temp);                   // only in 256 bit keys
-    aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
-    load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+    load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
+    __ pxor(xmm_result, xmm_temp1);
+
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
 
     __ BIND(L_doLast);
-    __ aesenclast(xmm_result, xmm_temp);
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenclast(xmm_result, xmm_temp2);
     __ movdqu(Address(to, 0), xmm_result);        // store the result
     __ xorptr(rax, rax); // return 0
-    __ pop(rsi);
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
@@ -2245,13 +2272,13 @@ class StubGenerator: public StubCodeGenerator {
   //   c_rarg2   - K (key) in little endian int array
   //
   address generate_aescrypt_decryptBlock() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
     Label L_doLast;
     address start = __ pc();
 
-    const Register from        = rsi;      // source array address
+    const Register from        = rdx;      // source array address
     const Register to          = rdx;      // destination array address
     const Register key         = rcx;      // key array address
     const Register keylen      = rax;
@@ -2260,51 +2287,76 @@ class StubGenerator: public StubCodeGenerator {
     const Address  key_param (rbp, 8+8);
 
     const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    const XMMRegister xmm_key_shuf_mask = xmm2;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ movptr(from , from_param);
-    __ movptr(to   , to_param);
-    __ movptr(key  , key_param);
+    __ movptr(from, from_param);
+    __ movptr(key, key_param);
 
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    // keylen = # of 32-bit words, convert to 128-bit words
-    __ shrl(keylen, 2);
-    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
 
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     __ movdqu(xmm_result, Address(from, 0));
+    __ movptr(to, to_param);
 
     // for decryption java expanded key ordering is rotated one position from what we want
     // so we start from 0x10 here and hit 0x00 last
     // we don't know if the key is aligned, hence not using load-execute form
-    load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
-    __ pxor  (xmm_result, xmm_temp);
-    for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
-      aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
-    }
-    __ cmpl(keylen, 0);
-    __ jcc(Assembler::equal, L_doLast);
-    // only in 192 and 256 bit keys
-    aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
-    __ subl(keylen, 2);
-    __ jcc(Assembler::equal, L_doLast);
-    // only in 256 bit keys
-    aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ pxor  (xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
 
     __ BIND(L_doLast);
-    // for decryption the aesdeclast operation is always on key+0x00
-    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
-    __ aesdeclast(xmm_result, xmm_temp);
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
 
+    // for decryption the aesdeclast operation is always on key+0x00
+    __ aesdeclast(xmm_result, xmm_temp3);
     __ movdqu(Address(to, 0), xmm_result);  // store the result
-
     __ xorptr(rax, rax); // return 0
-    __ pop(rsi);
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
@@ -2340,7 +2392,7 @@ class StubGenerator: public StubCodeGenerator {
   //   c_rarg4   - input length
   //
   address generate_cipherBlockChaining_encryptAESCrypt() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
     address start = __ pc();
@@ -2393,7 +2445,7 @@ class StubGenerator: public StubCodeGenerator {
     __ jcc(Assembler::notEqual, L_key_192_256);
 
     // 128 bit code follows here
-    __ movptr(pos, 0);
+    __ movl(pos, 0);
     __ align(OptoLoopAlignment);
     __ BIND(L_loopTop_128);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
@@ -2423,15 +2475,15 @@ class StubGenerator: public StubCodeGenerator {
     __ leave();                                  // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-  __ BIND(L_key_192_256);
-  // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
     __ cmpl(rax, 52);
     __ jcc(Assembler::notEqual, L_key_256);
 
     // 192-bit code follows here (could be changed to use more xmm registers)
-    __ movptr(pos, 0);
-  __ align(OptoLoopAlignment);
-  __ BIND(L_loopTop_192);
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_192);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
 
@@ -2452,11 +2504,11 @@ class StubGenerator: public StubCodeGenerator {
     __ jcc(Assembler::notEqual, L_loopTop_192);
     __ jmp(L_exit);
 
-  __ BIND(L_key_256);
+    __ BIND(L_key_256);
     // 256-bit code follows here (could be changed to use more xmm registers)
-    __ movptr(pos, 0);
-  __ align(OptoLoopAlignment);
-  __ BIND(L_loopTop_256);
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_256);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
 
@@ -2495,7 +2547,7 @@ class StubGenerator: public StubCodeGenerator {
   //
 
   address generate_cipherBlockChaining_decryptAESCrypt() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
     address start = __ pc();
@@ -2556,9 +2608,9 @@ class StubGenerator: public StubCodeGenerator {
 
 
     // 128-bit code follows here, parallelized
-    __ movptr(pos, 0);
-  __ align(OptoLoopAlignment);
-  __ BIND(L_singleBlock_loopTop_128);
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_128);
     __ cmpptr(len_reg, 0);           // any blocks left??
     __ jcc(Assembler::equal, L_exit);
     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
@@ -2597,7 +2649,7 @@ class StubGenerator: public StubCodeGenerator {
     __ jcc(Assembler::notEqual, L_key_256);
 
     // 192-bit code follows here (could be optimized to use parallelism)
-    __ movptr(pos, 0);
+    __ movl(pos, 0);
     __ align(OptoLoopAlignment);
     __ BIND(L_singleBlock_loopTop_192);
     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
@@ -2622,7 +2674,7 @@ class StubGenerator: public StubCodeGenerator {
 
     __ BIND(L_key_256);
     // 256-bit code follows here (could be optimized to use parallelism)
-    __ movptr(pos, 0);
+    __ movl(pos, 0);
     __ align(OptoLoopAlignment);
     __ BIND(L_singleBlock_loopTop_256);
     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index 48f4af8dc17598be11605a7683556b6577dcf464..00fa0e9ccc0cdb1ce0e22d3e61b3a1375fc87806 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -2953,21 +2953,6 @@ class StubGenerator: public StubCodeGenerator {
     }
   }
 
-  // aesenc using specified key+offset
-  // can optionally specify that the shuffle mask is already in an xmmregister
-  void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
-    load_key(xmmtmp, key, offset, xmm_shuf_mask);
-    __ aesenc(xmmdst, xmmtmp);
-  }
-
-  // aesdec using specified key+offset
-  // can optionally specify that the shuffle mask is already in an xmmregister
-  void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
-    load_key(xmmtmp, key, offset, xmm_shuf_mask);
-    __ aesdec(xmmdst, xmmtmp);
-  }
-
-
   // Arguments:
   //
   // Inputs:
@@ -2976,7 +2961,7 @@ class StubGenerator: public StubCodeGenerator {
   //   c_rarg2   - K (key) in little endian int array
   //
   address generate_aescrypt_encryptBlock() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
     Label L_doLast;
@@ -2988,15 +2973,17 @@ class StubGenerator: public StubCodeGenerator {
     const Register keylen      = rax;
 
     const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    const XMMRegister xmm_key_shuf_mask = xmm2;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    // On win64 xmm6-xmm15 must be preserved so don't use them.
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
 
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    // keylen = # of 32-bit words, convert to 128-bit words
-    __ shrl(keylen, 2);
-    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
 
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
@@ -3004,25 +2991,53 @@ class StubGenerator: public StubCodeGenerator {
     // For encryption, the java expanded key ordering is just what we need
     // we don't know if the key is aligned, hence not using load-execute form
 
-    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
-    __ pxor(xmm_result, xmm_temp);
-    for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
-      aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
-    }
-    load_key  (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
-    __ cmpl(keylen, 0);
-    __ jcc(Assembler::equal, L_doLast);
-    __ aesenc(xmm_result, xmm_temp);                   // only in 192 and 256 bit keys
-    aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
-    load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
-    __ subl(keylen, 2);
-    __ jcc(Assembler::equal, L_doLast);
-    __ aesenc(xmm_result, xmm_temp);                   // only in 256 bit keys
-    aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
-    load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+    load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
+    __ pxor(xmm_result, xmm_temp1);
+
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
 
     __ BIND(L_doLast);
-    __ aesenclast(xmm_result, xmm_temp);
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenclast(xmm_result, xmm_temp2);
     __ movdqu(Address(to, 0), xmm_result);        // store the result
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -3040,7 +3055,7 @@ class StubGenerator: public StubCodeGenerator {
   //   c_rarg2   - K (key) in little endian int array
   //
   address generate_aescrypt_decryptBlock() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
     Label L_doLast;
@@ -3052,15 +3067,17 @@ class StubGenerator: public StubCodeGenerator {
     const Register keylen      = rax;
 
     const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    const XMMRegister xmm_key_shuf_mask = xmm2;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    // On win64 xmm6-xmm15 must be preserved so don't use them.
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
 
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    // keylen = # of 32-bit words, convert to 128-bit words
-    __ shrl(keylen, 2);
-    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
 
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     __ movdqu(xmm_result, Address(from, 0));
@@ -3068,29 +3085,55 @@ class StubGenerator: public StubCodeGenerator {
     // for decryption java expanded key ordering is rotated one position from what we want
     // so we start from 0x10 here and hit 0x00 last
     // we don't know if the key is aligned, hence not using load-execute form
-    load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
-    __ pxor  (xmm_result, xmm_temp);
-    for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
-      aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
-    }
-    __ cmpl(keylen, 0);
-    __ jcc(Assembler::equal, L_doLast);
-    // only in 192 and 256 bit keys
-    aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
-    __ subl(keylen, 2);
-    __ jcc(Assembler::equal, L_doLast);
-    // only in 256 bit keys
-    aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ pxor  (xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
 
     __ BIND(L_doLast);
-    // for decryption the aesdeclast operation is always on key+0x00
-    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
-    __ aesdeclast(xmm_result, xmm_temp);
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
 
+    // for decryption the aesdeclast operation is always on key+0x00
+    __ aesdeclast(xmm_result, xmm_temp3);
     __ movdqu(Address(to, 0), xmm_result);  // store the result
-
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -3109,7 +3152,7 @@ class StubGenerator: public StubCodeGenerator {
   //   c_rarg4   - input length
   //
   address generate_cipherBlockChaining_encryptAESCrypt() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
     address start = __ pc();
@@ -3133,16 +3176,19 @@ class StubGenerator: public StubCodeGenerator {
     const XMMRegister xmm_temp   = xmm1;
     // keys 0-10 preloaded into xmm2-xmm12
     const int XMM_REG_NUM_KEY_FIRST = 2;
-    const int XMM_REG_NUM_KEY_LAST  = 12;
+    const int XMM_REG_NUM_KEY_LAST  = 15;
     const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
-    const XMMRegister xmm_key10  = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+    const XMMRegister xmm_key10  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10);
+    const XMMRegister xmm_key11  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11);
+    const XMMRegister xmm_key12  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12);
+    const XMMRegister xmm_key13  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13);
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
 
 #ifdef _WIN64
     // on win64, fill len_reg from stack position
     __ movl(len_reg, len_mem);
-    // save the xmm registers which must be preserved 6-12
+    // save the xmm registers which must be preserved 6-15
     __ subptr(rsp, -rsp_after_call_off * wordSize);
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(xmm_save(i), as_XMMRegister(i));
@@ -3151,12 +3197,11 @@ class StubGenerator: public StubCodeGenerator {
 
     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
-    // load up xmm regs 2 thru 12 with key 0x00 - 0xa0
-    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+    // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
       offset += 0x10;
     }
-
     __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
 
     // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
@@ -3167,16 +3212,15 @@ class StubGenerator: public StubCodeGenerator {
     // 128 bit code follows here
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_loopTop_128);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
-
     __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) {
       __ aesenc(xmm_result, as_XMMRegister(rnum));
     }
     __ aesenclast(xmm_result, xmm_key10);
-
     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
     // no need to store r to memory until we exit
     __ addptr(pos, AESBlockSize);
@@ -3198,24 +3242,23 @@ class StubGenerator: public StubCodeGenerator {
 
     __ BIND(L_key_192_256);
     // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask);
     __ cmpl(rax, 52);
     __ jcc(Assembler::notEqual, L_key_256);
 
     // 192-bit code follows here (could be changed to use more xmm registers)
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_loopTop_192);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
-
     __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) {
       __ aesenc(xmm_result, as_XMMRegister(rnum));
     }
-    aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
-    load_key(xmm_temp, key, 0xc0);
-    __ aesenclast(xmm_result, xmm_temp);
-
+    __ aesenclast(xmm_result, xmm_key12);
     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
     // no need to store r to memory until we exit
     __ addptr(pos, AESBlockSize);
@@ -3225,22 +3268,19 @@ class StubGenerator: public StubCodeGenerator {
 
     __ BIND(L_key_256);
     // 256-bit code follows here (could be changed to use more xmm registers)
+    load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask);
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_loopTop_256);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
-
     __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
       __ aesenc(xmm_result, as_XMMRegister(rnum));
     }
-    aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
-    aes_enc_key(xmm_result, xmm_temp, key, 0xc0);
-    aes_enc_key(xmm_result, xmm_temp, key, 0xd0);
     load_key(xmm_temp, key, 0xe0);
     __ aesenclast(xmm_result, xmm_temp);
-
     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
     // no need to store r to memory until we exit
     __ addptr(pos, AESBlockSize);
@@ -3267,7 +3307,7 @@ class StubGenerator: public StubCodeGenerator {
   //
 
   address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
     address start = __ pc();
@@ -3288,12 +3328,10 @@ class StubGenerator: public StubCodeGenerator {
 #endif
     const Register pos         = rax;
 
-    // xmm register assignments for the loops below
-    const XMMRegister xmm_result = xmm0;
     // keys 0-10 preloaded into xmm2-xmm12
     const int XMM_REG_NUM_KEY_FIRST = 5;
     const int XMM_REG_NUM_KEY_LAST  = 15;
-    const XMMRegister xmm_key_first   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+    const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
     const XMMRegister xmm_key_last  = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -3312,13 +3350,14 @@ class StubGenerator: public StubCodeGenerator {
     const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
-    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
-      if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00;
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
       offset += 0x10;
     }
+    load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask);
 
     const XMMRegister xmm_prev_block_cipher = xmm1;  // holds cipher of previous block
+
     // registers holding the four results in the parallelized loop
     const XMMRegister xmm_result0 = xmm0;
     const XMMRegister xmm_result1 = xmm2;
@@ -3376,8 +3415,12 @@ class StubGenerator: public StubCodeGenerator {
     __ jmp(L_multiBlock_loopTop_128);
 
     // registers used in the non-parallelized loops
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
     const XMMRegister xmm_prev_block_cipher_save = xmm2;
-    const XMMRegister xmm_temp   = xmm3;
+    const XMMRegister xmm_key11 = xmm3;
+    const XMMRegister xmm_key12 = xmm4;
+    const XMMRegister xmm_temp  = xmm4;
 
     __ align(OptoLoopAlignment);
     __ BIND(L_singleBlock_loopTop_128);
@@ -3415,12 +3458,15 @@ class StubGenerator: public StubCodeGenerator {
 
     __ BIND(L_key_192_256);
     // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    load_key(xmm_key11, key, 0xb0);
     __ cmpl(rax, 52);
     __ jcc(Assembler::notEqual, L_key_256);
 
     // 192-bit code follows here (could be optimized to use parallelism)
+    load_key(xmm_key12, key, 0xc0);     // 192-bit key goes up to c0
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_singleBlock_loopTop_192);
     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
     __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
@@ -3428,14 +3474,13 @@ class StubGenerator: public StubCodeGenerator {
     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
       __ aesdec(xmm_result, as_XMMRegister(rnum));
     }
-    aes_dec_key(xmm_result, xmm_temp, key, 0xb0);     // 192-bit key goes up to c0
-    aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
+    __ aesdec(xmm_result, xmm_key11);
+    __ aesdec(xmm_result, xmm_key12);
     __ aesdeclast(xmm_result, xmm_key_last);                    // xmm15 always came from key+0
     __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
-    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
     // no need to store r to memory until we exit
-    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);              // set up next r vector with cipher input from this block
-
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
     __ addptr(pos, AESBlockSize);
     __ subptr(len_reg, AESBlockSize);
     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
@@ -3445,23 +3490,26 @@ class StubGenerator: public StubCodeGenerator {
     // 256-bit code follows here (could be optimized to use parallelism)
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_singleBlock_loopTop_256);
-    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
     __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
     __ pxor  (xmm_result, xmm_key_first);               // do the aes dec rounds
     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
       __ aesdec(xmm_result, as_XMMRegister(rnum));
     }
-    aes_dec_key(xmm_result, xmm_temp, key, 0xb0);     // 256-bit key goes up to e0
-    aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xd0);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xe0);
-    __ aesdeclast(xmm_result, xmm_key_last);             // xmm15 came from key+0
+    __ aesdec(xmm_result, xmm_key11);
+    load_key(xmm_temp, key, 0xc0);
+    __ aesdec(xmm_result, xmm_temp);
+    load_key(xmm_temp, key, 0xd0);
+    __ aesdec(xmm_result, xmm_temp);
+    load_key(xmm_temp, key, 0xe0);     // 256-bit key goes up to e0
+    __ aesdec(xmm_result, xmm_temp);
+    __ aesdeclast(xmm_result, xmm_key_last);          // xmm15 came from key+0
     __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
-    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
     // no need to store r to memory until we exit
-    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);              // set up next r vector with cipher input from this block
-
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
     __ addptr(pos, AESBlockSize);
     __ subptr(len_reg, AESBlockSize);
     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
index 19a2a45c9f7e15eda230641cadd6e90f200f2d61..aabc3dbbacd40c9fda33afdd62e5064bb25bb8f2 100644
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
@@ -424,8 +424,6 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
   // C++ interpreter only
   // rsi - previous interpreter state pointer
 
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-
   // InterpreterRuntime::frequency_counter_overflow takes one argument
   // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp).
   // The call returns the address of the verified entry point for the method or NULL
@@ -868,12 +866,13 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
   // rsi: previous interpreter state (C++ interpreter) must preserve
   address entry_point = __ pc();
 
-
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset());
 
   // get parameter size (always needed)
+  __ movptr(rcx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // native calls don't need the stack size check since they have no expression stack
@@ -988,7 +987,9 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
 
   // allocate space for parameters
   __ get_method(method);
-  __ load_unsigned_short(t, Address(method, Method::size_of_parameters_offset()));
+  __ movptr(t, Address(method, Method::const_offset()));
+  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
+
   __ shlptr(t, Interpreter::logStackElementSize);
   __ addptr(t, 2*wordSize);     // allocate two more slots for JNIEnv and possible mirror
   __ subptr(rsp, t);
@@ -1297,13 +1298,14 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
   // rsi: sender sp
   address entry_point = __ pc();
 
-
-  const Address size_of_parameters(rbx, Method::size_of_parameters_offset());
-  const Address size_of_locals    (rbx, Method::size_of_locals_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address invocation_counter(rbx, Method::invocation_counter_offset() + InvocationCounter::counter_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rdx, ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals    (rdx, ConstMethod::size_of_locals_offset());
 
   // get parameter size (always needed)
+  __ movptr(rdx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx,: Method*
@@ -1734,7 +1736,8 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
 
     // Compute size of arguments for saving when returning to deoptimized caller
     __ get_method(rax);
-    __ load_unsigned_short(rax, Address(rax, in_bytes(Method::size_of_parameters_offset())));
+    __ movptr(rax, Address(rax, Method::const_offset()));
+    __ load_unsigned_short(rax, Address(rax, ConstMethod::size_of_parameters_offset()));
     __ shlptr(rax, Interpreter::logStackElementSize);
     __ restore_locals();
     __ subptr(rdi, rax);
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
index e2b46fc78bca4790b482bfab14bbff21b9fe5e2e..3e3cc0fc1f5d9f1afa17833cb0ddf7528ec94427 100644
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
@@ -369,9 +369,6 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
   // Everything as it was on entry
   // rdx is not restored. Doesn't appear to really be set.
 
-  const Address size_of_parameters(rbx,
-                                   Method::size_of_parameters_offset());
-
   // InterpreterRuntime::frequency_counter_overflow takes two
   // arguments, the first (thread) is passed by call_VM, the second
   // indicates if the counter overflow occurs at a backwards branch
@@ -844,14 +841,17 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
 
   address entry_point = __ pc();
 
-  const Address size_of_parameters(rbx, Method::
-                                        size_of_parameters_offset());
+  const Address constMethod       (rbx, Method::const_offset());
   const Address invocation_counter(rbx, Method::
                                         invocation_counter_offset() +
                                         InvocationCounter::counter_offset());
   const Address access_flags      (rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rcx, ConstMethod::
+                                        size_of_parameters_offset());
+
 
   // get parameter size (always needed)
+  __ movptr(rcx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // native calls don't need the stack size check since they have no
@@ -967,9 +967,8 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
 
   // allocate space for parameters
   __ get_method(method);
-  __ load_unsigned_short(t,
-                         Address(method,
-                                 Method::size_of_parameters_offset()));
+  __ movptr(t, Address(method, Method::const_offset()));
+  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
   __ shll(t, Interpreter::logStackElementSize);
 
   __ subptr(rsp, t);
@@ -1302,15 +1301,18 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
   // r13: sender sp
   address entry_point = __ pc();
 
-  const Address size_of_parameters(rbx,
-                                   Method::size_of_parameters_offset());
-  const Address size_of_locals(rbx, Method::size_of_locals_offset());
+  const Address constMethod(rbx, Method::const_offset());
   const Address invocation_counter(rbx,
                                    Method::invocation_counter_offset() +
                                    InvocationCounter::counter_offset());
   const Address access_flags(rbx, Method::access_flags_offset());
+  const Address size_of_parameters(rdx,
+                                   ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals(rdx, ConstMethod::size_of_locals_offset());
+
 
   // get parameter size (always needed)
+  __ movptr(rdx, constMethod);
   __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: Method*
@@ -1752,7 +1754,8 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
     // Compute size of arguments for saving when returning to
     // deoptimized caller
     __ get_method(rax);
-    __ load_unsigned_short(rax, Address(rax, in_bytes(Method::
+    __ movptr(rax, Address(rax, Method::const_offset()));
+    __ load_unsigned_short(rax, Address(rax, in_bytes(ConstMethod::
                                                 size_of_parameters_offset())));
     __ shll(rax, Interpreter::logStackElementSize);
     __ restore_locals(); // XXX do we need this?
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index f48e66012a815e0acc8bd54745068b01b7567e17..fc0466767393ad21dab5e24c67916244a8aec901 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -489,8 +489,8 @@ void VM_Version::get_processor_features() {
   }
 
   // The AES intrinsic stubs require AES instruction support (of course)
-  // but also require AVX and sse3 modes for instructions it use.
-  if (UseAES && (UseAVX > 0) && (UseSSE > 2)) {
+  // but also require sse3 mode for instructions it use.
+  if (UseAES && (UseSSE > 2)) {
     if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
       UseAESIntrinsics = true;
     }
diff --git a/src/cpu/zero/vm/assembler_zero.cpp b/src/cpu/zero/vm/assembler_zero.cpp
index e37938c1fa73fdce9a101f8c327655a843bc035c..d70a2adc746eec7fa3bb61fe9a055fa19bbab8cb 100644
--- a/src/cpu/zero/vm/assembler_zero.cpp
+++ b/src/cpu/zero/vm/assembler_zero.cpp
@@ -56,15 +56,9 @@ void Assembler::pd_patch_instruction(address branch, address target) {
   ShouldNotCallThis();
 }
 
-#ifndef PRODUCT
-void Assembler::pd_print_patched_instruction(address branch) {
-  ShouldNotCallThis();
-}
-#endif // PRODUCT
-
 void MacroAssembler::align(int modulus) {
   while (offset() % modulus != 0)
-    emit_byte(AbstractAssembler::code_fill_byte());
+    emit_int8(AbstractAssembler::code_fill_byte());
 }
 
 void MacroAssembler::bang_stack_with_offset(int offset) {
@@ -72,8 +66,7 @@ void MacroAssembler::bang_stack_with_offset(int offset) {
 }
 
 void MacroAssembler::advance(int bytes) {
-  _code_pos += bytes;
-  sync();
+  code_section()->set_end(code_section()->end() + bytes);
 }
 
 RegisterOrConstant MacroAssembler::delayed_value_impl(
diff --git a/src/cpu/zero/vm/assembler_zero.hpp b/src/cpu/zero/vm/assembler_zero.hpp
index 724153982ac2e7601f3d840bcceb2eba66823480..f9564a54c0ac5acbd9908947d4de7e3084262177 100644
--- a/src/cpu/zero/vm/assembler_zero.hpp
+++ b/src/cpu/zero/vm/assembler_zero.hpp
@@ -37,9 +37,6 @@ class Assembler : public AbstractAssembler {
 
  public:
   void pd_patch_instruction(address branch, address target);
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch);
-#endif // PRODUCT
 };
 
 class MacroAssembler : public Assembler {
diff --git a/src/os/posix/vm/os_posix.cpp b/src/os/posix/vm/os_posix.cpp
index cbffa3279af40546a263fefce488d879966fcf49..af75b3b362d8156ee770fd635f065474e50be648 100644
--- a/src/os/posix/vm/os_posix.cpp
+++ b/src/os/posix/vm/os_posix.cpp
@@ -93,6 +93,47 @@ void os::wait_for_keypress_at_exit(void) {
   return;
 }
 
+// Multiple threads can race in this code, and can remap over each other with MAP_FIXED,
+// so on posix, unmap the section at the start and at the end of the chunk that we mapped
+// rather than unmapping and remapping the whole chunk to get requested alignment.
+char* os::reserve_memory_aligned(size_t size, size_t alignment) {
+  assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
+      "Alignment must be a multiple of allocation granularity (page size)");
+  assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
+
+  size_t extra_size = size + alignment;
+  assert(extra_size >= size, "overflow, size is too large to allow alignment");
+
+  char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
+
+  if (extra_base == NULL) {
+    return NULL;
+  }
+
+  // Do manual alignment
+  char* aligned_base = (char*) align_size_up((uintptr_t) extra_base, alignment);
+
+  // [  |                                       |  ]
+  // ^ extra_base
+  //    ^ extra_base + begin_offset == aligned_base
+  //     extra_base + begin_offset + size       ^
+  //                       extra_base + extra_size ^
+  // |<>| == begin_offset
+  //                              end_offset == |<>|
+  size_t begin_offset = aligned_base - extra_base;
+  size_t end_offset = (extra_base + extra_size) - (aligned_base + size);
+
+  if (begin_offset > 0) {
+      os::release_memory(extra_base, begin_offset);
+  }
+
+  if (end_offset > 0) {
+      os::release_memory(extra_base + begin_offset + size, end_offset);
+  }
+
+  return aligned_base;
+}
+
 void os::Posix::print_load_average(outputStream* st) {
   st->print("load average:");
   double loadavg[3];
diff --git a/src/os/windows/vm/os_windows.cpp b/src/os/windows/vm/os_windows.cpp
index 2881bd519466427dab6603ae0334a0d1c7461f15..b8498d5a1e26ca28da021240c17671dc524b2ba3 100644
--- a/src/os/windows/vm/os_windows.cpp
+++ b/src/os/windows/vm/os_windows.cpp
@@ -2895,6 +2895,36 @@ void os::pd_split_reserved_memory(char *base, size_t size, size_t split,
   }
 }
 
+// Multiple threads can race in this code but it's not possible to unmap small sections of
+// virtual space to get requested alignment, like posix-like os's.
+// Windows prevents multiple thread from remapping over each other so this loop is thread-safe.
+char* os::reserve_memory_aligned(size_t size, size_t alignment) {
+  assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
+      "Alignment must be a multiple of allocation granularity (page size)");
+  assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
+
+  size_t extra_size = size + alignment;
+  assert(extra_size >= size, "overflow, size is too large to allow alignment");
+
+  char* aligned_base = NULL;
+
+  do {
+    char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
+    if (extra_base == NULL) {
+      return NULL;
+    }
+    // Do manual alignment
+    aligned_base = (char*) align_size_up((uintptr_t) extra_base, alignment);
+
+    os::release_memory(extra_base, extra_size);
+
+    aligned_base = os::reserve_memory(size, aligned_base);
+
+  } while (aligned_base == NULL);
+
+  return aligned_base;
+}
+
 char* os::pd_reserve_memory(size_t bytes, char* addr, size_t alignment_hint) {
   assert((size_t)addr % os::vm_allocation_granularity() == 0,
          "reserve alignment");
diff --git a/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp b/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp
index ed25848bca81d3a83faae9453e8eb96437a41d34..203da611c01850fb1a01742f26f6e54e5e8a1161 100644
--- a/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp
+++ b/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp
@@ -116,7 +116,7 @@ void MacroAssembler::get_thread(Register thread) {
   ThreadLocalStorage::pd_tlsAccessMode tlsMode = ThreadLocalStorage::pd_getTlsAccessMode ();
   if (tlsMode == ThreadLocalStorage::pd_tlsAccessIndirect) {            // T1
      // Use thread as a temporary: mov r, gs:[0]; mov r, [r+tlsOffset]
-     emit_byte (segment);
+     emit_int8 (segment);
      // ExternalAddress doesn't work because it can't take NULL
      AddressLiteral null(0, relocInfo::none);
      movptr (thread, null);
@@ -125,7 +125,7 @@ void MacroAssembler::get_thread(Register thread) {
   } else
   if (tlsMode == ThreadLocalStorage::pd_tlsAccessDirect) {              // T2
      // mov r, gs:[tlsOffset]
-     emit_byte (segment);
+     emit_int8 (segment);
      AddressLiteral tls_off((address)ThreadLocalStorage::pd_getTlsOffset(), relocInfo::none);
      movptr (thread, tls_off);
      return ;
diff --git a/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp b/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp
index ce629e20762ac9797e18e28ba8c7e3f102623820..ce48421c6d4469b53ef006b9b8c0d6758d32212f 100644
--- a/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp
+++ b/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp
@@ -30,7 +30,7 @@
 
 
 void MacroAssembler::int3() {
-  emit_byte(0xCC);
+  emit_int8((unsigned char)0xCC);
 }
 
 #ifndef _LP64
diff --git a/src/share/vm/asm/assembler.cpp b/src/share/vm/asm/assembler.cpp
index 6a34989ec7b66b12a01693ac06de1f8679533afe..669f49039fc346102a328f35f1ff750fa31ca606 100644
--- a/src/share/vm/asm/assembler.cpp
+++ b/src/share/vm/asm/assembler.cpp
@@ -109,37 +109,6 @@ void AbstractAssembler::flush() {
   ICache::invalidate_range(addr_at(0), offset());
 }
 
-
-void AbstractAssembler::a_byte(int x) {
-  emit_byte(x);
-}
-
-
-void AbstractAssembler::a_long(jint x) {
-  emit_long(x);
-}
-
-// Labels refer to positions in the (to be) generated code.  There are bound
-// and unbound
-//
-// Bound labels refer to known positions in the already generated code.
-// offset() is the position the label refers to.
-//
-// Unbound labels refer to unknown positions in the code to be generated; it
-// may contain a list of unresolved displacements that refer to it
-#ifndef PRODUCT
-void AbstractAssembler::print(Label& L) {
-  if (L.is_bound()) {
-    tty->print_cr("bound label to %d|%d", L.loc_pos(), L.loc_sect());
-  } else if (L.is_unbound()) {
-    L.print_instructions((MacroAssembler*)this);
-  } else {
-    tty->print_cr("label in inconsistent state (loc = %d)", L.loc());
-  }
-}
-#endif // PRODUCT
-
-
 void AbstractAssembler::bind(Label& L) {
   if (L.is_bound()) {
     // Assembler can bind a label more than once to the same place.
@@ -342,28 +311,3 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
 #endif
   return offset < 0 || os::vm_page_size() <= offset;
 }
-
-#ifndef PRODUCT
-void Label::print_instructions(MacroAssembler* masm) const {
-  CodeBuffer* cb = masm->code();
-  for (int i = 0; i < _patch_index; ++i) {
-    int branch_loc;
-    if (i >= PatchCacheSize) {
-      branch_loc = _patch_overflow->at(i - PatchCacheSize);
-    } else {
-      branch_loc = _patches[i];
-    }
-    int branch_pos  = CodeBuffer::locator_pos(branch_loc);
-    int branch_sect = CodeBuffer::locator_sect(branch_loc);
-    address branch = cb->locator_address(branch_loc);
-    tty->print_cr("unbound label");
-    tty->print("@ %d|%d ", branch_pos, branch_sect);
-    if (branch_sect == CodeBuffer::SECT_CONSTS) {
-      tty->print_cr(PTR_FORMAT, *(address*)branch);
-      continue;
-    }
-    masm->pd_print_patched_instruction(branch);
-    tty->cr();
-  }
-}
-#endif // ndef PRODUCT
diff --git a/src/share/vm/asm/assembler.hpp b/src/share/vm/asm/assembler.hpp
index 5de33dd503edd959f7ffe343bc46488843606065..ca26ea5bba9f34ba92aa004d2abb0f7b8930937d 100644
--- a/src/share/vm/asm/assembler.hpp
+++ b/src/share/vm/asm/assembler.hpp
@@ -216,17 +216,6 @@ class AbstractAssembler : public ResourceObj  {
   bool isByte(int x) const             { return 0 <= x && x < 0x100; }
   bool isShiftCount(int x) const       { return 0 <= x && x < 32; }
 
-  void emit_int8(   int8_t  x) { code_section()->emit_int8(   x); }
-  void emit_int16(  int16_t x) { code_section()->emit_int16(  x); }
-  void emit_int32(  int32_t x) { code_section()->emit_int32(  x); }
-  void emit_int64(  int64_t x) { code_section()->emit_int64(  x); }
-
-  void emit_float(  jfloat  x) { code_section()->emit_float(  x); }
-  void emit_double( jdouble x) { code_section()->emit_double( x); }
-  void emit_address(address x) { code_section()->emit_address(x); }
-
-  void emit_byte(int x)  { emit_int8 (x); }  // deprecated
-  void emit_word(int x)  { emit_int16(x); }  // deprecated
   void emit_long(jint x) { emit_int32(x); }  // deprecated
 
   // Instruction boundaries (required when emitting relocatable values).
@@ -277,9 +266,6 @@ class AbstractAssembler : public ResourceObj  {
   };
 #endif
 
-  // Label functions
-  void print(Label& L);
-
  public:
 
   // Creation
@@ -288,6 +274,15 @@ class AbstractAssembler : public ResourceObj  {
   // ensure buf contains all code (call this before using/copying the code)
   void flush();
 
+  void emit_int8(   int8_t  x) { code_section()->emit_int8(   x); }
+  void emit_int16(  int16_t x) { code_section()->emit_int16(  x); }
+  void emit_int32(  int32_t x) { code_section()->emit_int32(  x); }
+  void emit_int64(  int64_t x) { code_section()->emit_int64(  x); }
+
+  void emit_float(  jfloat  x) { code_section()->emit_float(  x); }
+  void emit_double( jdouble x) { code_section()->emit_double( x); }
+  void emit_address(address x) { code_section()->emit_address(x); }
+
   // min and max values for signed immediate ranges
   static int min_simm(int nbits) { return -(intptr_t(1) << (nbits - 1))    ; }
   static int max_simm(int nbits) { return  (intptr_t(1) << (nbits - 1)) - 1; }
@@ -327,8 +322,6 @@ class AbstractAssembler : public ResourceObj  {
   void    clear_inst_mark()       {        code_section()->clear_mark(); }
 
   // Constants in code
-  void a_byte(int x);
-  void a_long(jint x);
   void relocate(RelocationHolder const& rspec, int format = 0) {
     assert(!pd_check_instruction_mark()
         || inst_mark() == NULL || inst_mark() == code_section()->end(),
@@ -441,15 +434,6 @@ class AbstractAssembler : public ResourceObj  {
    */
   void pd_patch_instruction(address branch, address target);
 
-#ifndef PRODUCT
-  /**
-   * Platform-dependent method of printing an instruction that needs to be
-   * patched.
-   *
-   * @param branch the instruction to be patched in the buffer.
-   */
-  static void pd_print_patched_instruction(address branch);
-#endif // PRODUCT
 };
 
 #ifdef TARGET_ARCH_x86
diff --git a/src/share/vm/c1/c1_GraphBuilder.cpp b/src/share/vm/c1/c1_GraphBuilder.cpp
index 70b69157530a278ff747d0f6cbcb597a5450da1b..85c3aa1c9a9e84ce411566ac1e4e85420689f434 100644
--- a/src/share/vm/c1/c1_GraphBuilder.cpp
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp
@@ -3442,6 +3442,11 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
       preserves_state = true;
       break;
 
+    case vmIntrinsics::_loadFence :
+    case vmIntrinsics::_storeFence:
+    case vmIntrinsics::_fullFence :
+      break;
+
     default                       : return false; // do not inline
   }
   // create intrinsic node
diff --git a/src/share/vm/c1/c1_LIRGenerator.cpp b/src/share/vm/c1/c1_LIRGenerator.cpp
index cf865f1b557e378d5e2cd08d2e27b8f763241792..65bc34e3de65d867422946a33e39f6fa28b7f77f 100644
--- a/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp
@@ -2977,6 +2977,16 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
     do_CompareAndSwap(x, longType);
     break;
 
+  case vmIntrinsics::_loadFence :
+    if (os::is_MP()) __ membar_acquire();
+    break;
+  case vmIntrinsics::_storeFence:
+    if (os::is_MP()) __ membar_release();
+    break;
+  case vmIntrinsics::_fullFence :
+    if (os::is_MP()) __ membar();
+    break;
+
   case vmIntrinsics::_Reference_get:
     do_Reference_get(x);
     break;
diff --git a/src/share/vm/ci/ciField.cpp b/src/share/vm/ci/ciField.cpp
index 5cfa47b277a2d385e2aeb5949449cf656e6a11fd..fe967554c3a62458a694bcb5bdb6c81df0425207 100644
--- a/src/share/vm/ci/ciField.cpp
+++ b/src/share/vm/ci/ciField.cpp
@@ -366,10 +366,12 @@ bool ciField::will_link(ciInstanceKlass* accessing_klass,
 // ------------------------------------------------------------------
 // ciField::print
 void ciField::print() {
-  tty->print("<ciField ");
+  tty->print("<ciField name=");
   _holder->print_name();
   tty->print(".");
   _name->print_symbol();
+  tty->print(" signature=");
+  _signature->print_symbol();
   tty->print(" offset=%d type=", _offset);
   if (_type != NULL) _type->print_name();
   else               tty->print("(reference)");
diff --git a/src/share/vm/classfile/classLoaderData.cpp b/src/share/vm/classfile/classLoaderData.cpp
index 41e6815dbd02b866ba42496f447d84aa5e9662a3..eaf829738d0a15f897f1c307d4062e2c4daa5c20 100644
--- a/src/share/vm/classfile/classLoaderData.cpp
+++ b/src/share/vm/classfile/classLoaderData.cpp
@@ -169,16 +169,18 @@ void ClassLoaderData::add_dependency(Handle dependency, TRAPS) {
     ok = (objArrayOop)ok->obj_at(1);
   }
 
+  // Must handle over GC points
+  assert (last != NULL, "dependencies should be initialized");
+  objArrayHandle last_handle(THREAD, last);
+
   // Create a new dependency node with fields for (class_loader or mirror, next)
   objArrayOop deps = oopFactory::new_objectArray(2, CHECK);
   deps->obj_at_put(0, dependency());
 
-  // Must handle over more GC points
+  // Must handle over GC points
   objArrayHandle new_dependency(THREAD, deps);
 
   // Add the dependency under lock
-  assert (last != NULL, "dependencies should be initialized");
-  objArrayHandle last_handle(THREAD, last);
   locked_add_dependency(last_handle, new_dependency);
 }
 
diff --git a/src/share/vm/classfile/javaClasses.cpp b/src/share/vm/classfile/javaClasses.cpp
index 79a35008575e8274e04dbcc6e647912bd2b6dc32..9c2caaa7397fc17f5ac33a9ecd56feda8aecdbd7 100644
--- a/src/share/vm/classfile/javaClasses.cpp
+++ b/src/share/vm/classfile/javaClasses.cpp
@@ -327,14 +327,14 @@ jchar* java_lang_String::as_unicode_string(oop java_string, int& length) {
   return result;
 }
 
-unsigned int java_lang_String::to_hash(oop java_string) {
+unsigned int java_lang_String::hash_code(oop java_string) {
   int          length = java_lang_String::length(java_string);
-  // Zero length string will hash to zero with String.toHash() function.
+  // Zero length string will hash to zero with String.hashCode() function.
   if (length == 0) return 0;
 
   typeArrayOop value  = java_lang_String::value(java_string);
   int          offset = java_lang_String::offset(java_string);
-  return java_lang_String::to_hash(value->char_at_addr(offset), length);
+  return java_lang_String::hash_code(value->char_at_addr(offset), length);
 }
 
 char* java_lang_String::as_quoted_ascii(oop java_string) {
diff --git a/src/share/vm/classfile/javaClasses.hpp b/src/share/vm/classfile/javaClasses.hpp
index 1a4b51f59978d948be975d4286bd91c84c329ff0..00ce4576499d3b8c9f44c24f54a185f86027aee7 100644
--- a/src/share/vm/classfile/javaClasses.hpp
+++ b/src/share/vm/classfile/javaClasses.hpp
@@ -166,8 +166,8 @@ class java_lang_String : AllStatic {
   // objects in the shared archive file.
   // hash P(31) from Kernighan & Ritchie
   //
-  // For this reason, THIS ALGORITHM MUST MATCH String.toHash().
-  template <typename T> static unsigned int to_hash(T* s, int len) {
+  // For this reason, THIS ALGORITHM MUST MATCH String.hashCode().
+  template <typename T> static unsigned int hash_code(T* s, int len) {
     unsigned int h = 0;
     while (len-- > 0) {
       h = 31*h + (unsigned int) *s;
@@ -175,10 +175,10 @@ class java_lang_String : AllStatic {
     }
     return h;
   }
-  static unsigned int to_hash(oop java_string);
+  static unsigned int hash_code(oop java_string);
 
   // This is the string hash code used by the StringTable, which may be
-  // the same as String.toHash or an alternate hash code.
+  // the same as String.hashCode or an alternate hash code.
   static unsigned int hash_string(oop java_string);
 
   static bool equals(oop java_string, jchar* chars, int len);
diff --git a/src/share/vm/classfile/symbolTable.cpp b/src/share/vm/classfile/symbolTable.cpp
index 3c93662052124ad904d04cd53f47c0b26c38dc41..87de232d7a509f483219e6e95a44ff5f1f91298a 100644
--- a/src/share/vm/classfile/symbolTable.cpp
+++ b/src/share/vm/classfile/symbolTable.cpp
@@ -179,7 +179,7 @@ Symbol* SymbolTable::lookup(int index, const char* name,
 unsigned int SymbolTable::hash_symbol(const char* s, int len) {
   return use_alternate_hashcode() ?
            AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
-           java_lang_String::to_hash(s, len);
+           java_lang_String::hash_code(s, len);
 }
 
 
@@ -617,7 +617,7 @@ bool StringTable::_needs_rehashing = false;
 // Pick hashing algorithm
 unsigned int StringTable::hash_string(const jchar* s, int len) {
   return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
-                                    java_lang_String::to_hash(s, len);
+                                    java_lang_String::hash_code(s, len);
 }
 
 oop StringTable::lookup(int index, jchar* name,
diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp
index 5c9b6327eaaa5b24f638abb369dab6d36bacf049..e5425b4f24637700444a83ea1cfbe3628a922b26 100644
--- a/src/share/vm/classfile/vmSymbols.hpp
+++ b/src/share/vm/classfile/vmSymbols.hpp
@@ -761,6 +761,15 @@
   do_intrinsic(_unpark,                   sun_misc_Unsafe,        unpark_name, unpark_signature,                 F_RN)  \
    do_name(     unpark_name,                                     "unpark")                                              \
    do_alias(    unpark_signature,                               /*(LObject;)V*/ object_void_signature)                  \
+  do_intrinsic(_loadFence,                sun_misc_Unsafe,        loadFence_name, loadFence_signature,           F_RN)  \
+   do_name(     loadFence_name,                                  "loadFence")                                           \
+   do_alias(    loadFence_signature,                              void_method_signature)                                \
+  do_intrinsic(_storeFence,               sun_misc_Unsafe,        storeFence_name, storeFence_signature,         F_RN)  \
+   do_name(     storeFence_name,                                 "storeFence")                                          \
+   do_alias(    storeFence_signature,                             void_method_signature)                                \
+  do_intrinsic(_fullFence,                sun_misc_Unsafe,        fullFence_name, fullFence_signature,           F_RN)  \
+   do_name(     fullFence_name,                                  "fullFence")                                           \
+   do_alias(    fullFence_signature,                              void_method_signature)                                \
                                                                                                                         \
   /* unsafe memory references (there are a lot of them...) */                                                           \
   do_signature(getObject_signature,       "(Ljava/lang/Object;J)Ljava/lang/Object;")                                    \
@@ -902,12 +911,14 @@
   do_intrinsic(_getAndAddLong,            sun_misc_Unsafe,        getAndAddLong_name, getAndAddLong_signature, F_R)     \
    do_name(     getAndAddLong_name,                               "getAndAddLong")                                      \
    do_signature(getAndAddLong_signature,                          "(Ljava/lang/Object;JJ)J" )                           \
-  do_intrinsic(_getAndSetInt,             sun_misc_Unsafe,        getAndSet_name, getAndSetInt_signature, F_R)          \
-   do_name(     getAndSet_name,                                   "getAndSet")                                          \
+  do_intrinsic(_getAndSetInt,             sun_misc_Unsafe,        getAndSetInt_name, getAndSetInt_signature, F_R)       \
+   do_name(     getAndSetInt_name,                                "getAndSetInt")                                       \
    do_alias(    getAndSetInt_signature,                         /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature)   \
-  do_intrinsic(_getAndSetLong,            sun_misc_Unsafe,        getAndSet_name, getAndSetLong_signature, F_R)         \
+  do_intrinsic(_getAndSetLong,            sun_misc_Unsafe,        getAndSetLong_name, getAndSetLong_signature, F_R)     \
+   do_name(     getAndSetLong_name,                               "getAndSetLong")                                      \
    do_alias(    getAndSetLong_signature,                        /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature)  \
-  do_intrinsic(_getAndSetObject,          sun_misc_Unsafe,        getAndSet_name, getAndSetObject_signature,  F_R)      \
+  do_intrinsic(_getAndSetObject,          sun_misc_Unsafe,        getAndSetObject_name, getAndSetObject_signature,  F_R)\
+   do_name(     getAndSetObject_name,                             "getAndSetObject")                                    \
    do_signature(getAndSetObject_signature,                        "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
                                                                                                                         \
   /* prefetch_signature is shared by all prefetch variants */                                                           \
diff --git a/src/share/vm/compiler/compilerOracle.cpp b/src/share/vm/compiler/compilerOracle.cpp
index 7e620cec9fd7fc1a2526b57c2df6a2c581fe38d2..73e348bc97c4f09e46c1afe87aabb13fe871be49 100644
--- a/src/share/vm/compiler/compilerOracle.cpp
+++ b/src/share/vm/compiler/compilerOracle.cpp
@@ -538,6 +538,7 @@ void CompilerOracle::parse_from_line(char* line) {
 
   if (match != NULL) {
     if (!_quiet) {
+      ResourceMark rm;
       tty->print("CompilerOracle: %s ", command_names[command]);
       match->print();
     }
diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
index d3dc1eb7bb73f169b439721ee45949584babc9f0..e03b2f41cfc4c072515104f99cc104f0b4507c2e 100644
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -46,27 +46,11 @@
 
 // Concurrent marking bit map wrapper
 
-CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
-  _bm((uintptr_t*)NULL,0),
+CMBitMapRO::CMBitMapRO(int shifter) :
+  _bm(),
   _shifter(shifter) {
-  _bmStartWord = (HeapWord*)(rs.base());
-  _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
-  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
-                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
-
-  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
-
-  guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
-  // For now we'll just commit all of the bit map up fromt.
-  // Later on we'll try to be more parsimonious with swap.
-  guarantee(_virtual_space.initialize(brs, brs.size()),
-            "couldn't reseve backing store for concurrent marking bit map");
-  assert(_virtual_space.committed_size() == brs.size(),
-         "didn't reserve backing store for all of concurrent marking bit map?");
-  _bm.set_map((uintptr_t*)_virtual_space.low());
-  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
-         _bmWordSize, "inconsistency in bit map sizing");
-  _bm.set_size(_bmWordSize >> _shifter);
+  _bmStartWord = 0;
+  _bmWordSize = 0;
 }
 
 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
@@ -108,15 +92,40 @@ int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 }
 
 #ifndef PRODUCT
-bool CMBitMapRO::covers(ReservedSpace rs) const {
+bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
          "size inconsistency");
-  return _bmStartWord == (HeapWord*)(rs.base()) &&
-         _bmWordSize  == rs.size()>>LogHeapWordSize;
+  return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
+         _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 }
 #endif
 
+bool CMBitMap::allocate(ReservedSpace heap_rs) {
+  _bmStartWord = (HeapWord*)(heap_rs.base());
+  _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
+  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
+                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
+  if (!brs.is_reserved()) {
+    warning("ConcurrentMark marking bit map allocation failure");
+    return false;
+  }
+  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
+  // For now we'll just commit all of the bit map up front.
+  // Later on we'll try to be more parsimonious with swap.
+  if (!_virtual_space.initialize(brs, brs.size())) {
+    warning("ConcurrentMark marking bit map backing store failure");
+    return false;
+  }
+  assert(_virtual_space.committed_size() == brs.size(),
+         "didn't reserve backing store for all of concurrent marking bit map?");
+  _bm.set_map((uintptr_t*)_virtual_space.low());
+  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
+         _bmWordSize, "inconsistency in bit map sizing");
+  _bm.set_size(_bmWordSize >> _shifter);
+  return true;
+}
+
 void CMBitMap::clearAll() {
   _bm.clear();
   return;
@@ -163,20 +172,79 @@ CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 #endif
 {}
 
-void CMMarkStack::allocate(size_t size) {
-  _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
-  if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate CM region mark stack");
+bool CMMarkStack::allocate(size_t capacity) {
+  // allocate a stack of the requisite depth
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
+  if (!rs.is_reserved()) {
+    warning("ConcurrentMark MarkStack allocation failure");
+    return false;
   }
-  _index = 0;
-  _capacity = (jint) size;
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+  if (!_virtual_space.initialize(rs, rs.size())) {
+    warning("ConcurrentMark MarkStack backing store failure");
+    // Release the virtual memory reserved for the marking stack
+    rs.release();
+    return false;
+  }
+  assert(_virtual_space.committed_size() == rs.size(),
+         "Didn't reserve backing store for all of ConcurrentMark stack?");
+  _base = (oop*) _virtual_space.low();
+  setEmpty();
+  _capacity = (jint) capacity;
   _saved_index = -1;
   NOT_PRODUCT(_max_depth = 0);
+  return true;
+}
+
+void CMMarkStack::expand() {
+  // Called, during remark, if we've overflown the marking stack during marking.
+  assert(isEmpty(), "stack should been emptied while handling overflow");
+  assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
+  // Clear expansion flag
+  _should_expand = false;
+  if (_capacity == (jint) MarkStackSizeMax) {
+    if (PrintGCDetails && Verbose) {
+      gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
+    }
+    return;
+  }
+  // Double capacity if possible
+  jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
+  // Do not give up existing stack until we have managed to
+  // get the double capacity that we desired.
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
+                                                           sizeof(oop)));
+  if (rs.is_reserved()) {
+    // Release the backing store associated with old stack
+    _virtual_space.release();
+    // Reinitialize virtual space for new stack
+    if (!_virtual_space.initialize(rs, rs.size())) {
+      fatal("Not enough swap for expanded marking stack capacity");
+    }
+    _base = (oop*)(_virtual_space.low());
+    _index = 0;
+    _capacity = new_capacity;
+  } else {
+    if (PrintGCDetails && Verbose) {
+      // Failed to double capacity, continue;
+      gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
+                          SIZE_FORMAT"K to " SIZE_FORMAT"K",
+                          _capacity / K, new_capacity / K);
+    }
+  }
+}
+
+void CMMarkStack::set_should_expand() {
+  // If we're resetting the marking state because of an
+  // marking stack overflow, record that we should, if
+  // possible, expand the stack.
+  _should_expand = _cm->has_overflown();
 }
 
 CMMarkStack::~CMMarkStack() {
   if (_base != NULL) {
-    FREE_C_HEAP_ARRAY(oop, _base, mtGC);
+    _base = NULL;
+    _virtual_space.release();
   }
 }
 
@@ -217,7 +285,7 @@ void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
     jint res = Atomic::cmpxchg(next_index, &_index, index);
     if (res == index) {
       for (int i = 0; i < n; i++) {
-        int ind = index + i;
+        int  ind = index + i;
         assert(ind < _capacity, "By overflow test above.");
         _base[ind] = ptr_arr[i];
       }
@@ -228,7 +296,6 @@ void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   }
 }
 
-
 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   jint start = _index;
@@ -244,9 +311,9 @@ void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
     assert(ind < _capacity, "By overflow test above.");
     _base[ind] = ptr_arr[i];
   }
+  NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 }
 
-
 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   jint index = _index;
@@ -255,7 +322,7 @@ bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
     return false;
   } else {
     int k = MIN2(max, index);
-    jint new_ind = index - k;
+    jint  new_ind = index - k;
     for (int j = 0; j < k; j++) {
       ptr_arr[j] = _base[new_ind + j];
     }
@@ -404,9 +471,10 @@ uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
   return MAX2((n_par_threads + 2) / 4, 1U);
 }
 
-ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
-  _markBitMap1(rs, MinObjAlignment - 1),
-  _markBitMap2(rs, MinObjAlignment - 1),
+ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
+  _g1h(g1h),
+  _markBitMap1(MinObjAlignment - 1),
+  _markBitMap2(MinObjAlignment - 1),
 
   _parallel_marking_threads(0),
   _max_parallel_marking_threads(0),
@@ -415,10 +483,10 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
   _cleanup_sleep_factor(0.0),
   _cleanup_task_overhead(1.0),
   _cleanup_list("Cleanup List"),
-  _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
-  _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
-           CardTableModRefBS::card_shift,
-           false /* in_resource_area*/),
+  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
+  _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
+            CardTableModRefBS::card_shift,
+            false /* in_resource_area*/),
 
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
@@ -449,7 +517,8 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
   _parallel_workers(NULL),
 
   _count_card_bitmaps(NULL),
-  _count_marked_bytes(NULL) {
+  _count_marked_bytes(NULL),
+  _completed_initialization(false) {
   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   if (verbose_level < no_verbose) {
     verbose_level = no_verbose;
@@ -464,61 +533,34 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   }
 
-  _markStack.allocate(MarkStackSize);
+  if (!_markBitMap1.allocate(heap_rs)) {
+    warning("Failed to allocate first CM bit map");
+    return;
+  }
+  if (!_markBitMap2.allocate(heap_rs)) {
+    warning("Failed to allocate second CM bit map");
+    return;
+  }
 
   // Create & start a ConcurrentMark thread.
   _cmThread = new ConcurrentMarkThread(this);
   assert(cmThread() != NULL, "CM Thread should have been created");
   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 
-  _g1h = G1CollectedHeap::heap();
   assert(CGC_lock != NULL, "Where's the CGC_lock?");
-  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
-  assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
+  assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
+  assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 
   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   satb_qs.set_buffer_size(G1SATBBufferSize);
 
   _root_regions.init(_g1h, this);
 
-  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
-  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
-
-  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
-  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
-
-  BitMap::idx_t card_bm_size = _card_bm.size();
-
-  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
-  _active_tasks = _max_worker_id;
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    CMTaskQueue* task_queue = new CMTaskQueue();
-    task_queue->initialize();
-    _task_queues->register_queue(i, task_queue);
-
-    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
-    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
-
-    _tasks[i] = new CMTask(i, this,
-                           _count_marked_bytes[i],
-                           &_count_card_bitmaps[i],
-                           task_queue, _task_queues);
-
-    _accum_task_vtime[i] = 0.0;
-  }
-
-  // Calculate the card number for the bottom of the heap. Used
-  // in biasing indexes into the accounting card bitmaps.
-  _heap_bottom_card_num =
-    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
-                                CardTableModRefBS::card_shift);
-
-  // Clear all the liveness counting data
-  clear_all_count_data();
-
   if (ConcGCThreads > ParallelGCThreads) {
-    vm_exit_during_initialization("Can't have more ConcGCThreads "
-                                  "than ParallelGCThreads.");
+    warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
+            "than ParallelGCThreads (" UINT32_FORMAT ").",
+            ConcGCThreads, ParallelGCThreads);
+    return;
   }
   if (ParallelGCThreads == 0) {
     // if we are not running with any parallel GC threads we will not
@@ -590,9 +632,86 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
     }
   }
 
+  if (FLAG_IS_DEFAULT(MarkStackSize)) {
+    uintx mark_stack_size =
+      MIN2(MarkStackSizeMax,
+          MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
+    // Verify that the calculated value for MarkStackSize is in range.
+    // It would be nice to use the private utility routine from Arguments.
+    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
+      warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
+              "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
+              mark_stack_size, 1, MarkStackSizeMax);
+      return;
+    }
+    FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
+  } else {
+    // Verify MarkStackSize is in range.
+    if (FLAG_IS_CMDLINE(MarkStackSize)) {
+      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
+        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
+          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
+                  "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
+                  MarkStackSize, 1, MarkStackSizeMax);
+          return;
+        }
+      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
+        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
+          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
+                  " or for MarkStackSizeMax (" UINTX_FORMAT ")",
+                  MarkStackSize, MarkStackSizeMax);
+          return;
+        }
+      }
+    }
+  }
+
+  if (!_markStack.allocate(MarkStackSize)) {
+    warning("Failed to allocate CM marking stack");
+    return;
+  }
+
+  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
+
+  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
+  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
+
+  BitMap::idx_t card_bm_size = _card_bm.size();
+
+  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
+  _active_tasks = _max_worker_id;
+
+  size_t max_regions = (size_t) _g1h->max_regions();
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    CMTaskQueue* task_queue = new CMTaskQueue();
+    task_queue->initialize();
+    _task_queues->register_queue(i, task_queue);
+
+    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
+    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
+
+    _tasks[i] = new CMTask(i, this,
+                           _count_marked_bytes[i],
+                           &_count_card_bitmaps[i],
+                           task_queue, _task_queues);
+
+    _accum_task_vtime[i] = 0.0;
+  }
+
+  // Calculate the card number for the bottom of the heap. Used
+  // in biasing indexes into the accounting card bitmaps.
+  _heap_bottom_card_num =
+    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
+                                CardTableModRefBS::card_shift);
+
+  // Clear all the liveness counting data
+  clear_all_count_data();
+
   // so that the call below can read a sensible value
-  _heap_start = (HeapWord*) rs.base();
+  _heap_start = (HeapWord*) heap_rs.base();
   set_non_marking_state();
+  _completed_initialization = true;
 }
 
 void ConcurrentMark::update_g1_committed(bool force) {
@@ -1165,6 +1284,11 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
     assert(!restart_for_overflow(), "sanity");
   }
 
+  // Expand the marking stack, if we have to and if we can.
+  if (_markStack.should_expand()) {
+    _markStack.expand();
+  }
+
   // Reset the marking state if marking completed
   if (!restart_for_overflow()) {
     set_non_marking_state();
@@ -2785,7 +2909,7 @@ void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
     // Verify entries on the task queues
     for (uint i = 0; i < _max_worker_id; i += 1) {
       cl.set_phase(VerifyNoCSetOopsQueues, i);
-      OopTaskQueue* queue = _task_queues->queue(i);
+      CMTaskQueue* queue = _task_queues->queue(i);
       queue->oops_do(&cl);
     }
   }
@@ -2840,8 +2964,8 @@ void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
 #endif // PRODUCT
 
 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
-  _markStack.setEmpty();
-  _markStack.clear_overflow();
+  _markStack.set_should_expand();
+  _markStack.setEmpty();        // Also clears the _markStack overflow flag
   if (clear_overflow) {
     clear_has_overflown();
   } else {
@@ -2850,7 +2974,7 @@ void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _finger = _heap_start;
 
   for (uint i = 0; i < _max_worker_id; ++i) {
-    OopTaskQueue* queue = _task_queues->queue(i);
+    CMTaskQueue* queue = _task_queues->queue(i);
     queue->set_empty();
   }
 }
diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/src/share/vm/gc_implementation/g1/concurrentMark.hpp
index 5bac7a6b34d5e40686c5437a545cc8dcfc871e40..8089c98294aa352a2715cc930f3c7ebd609f5eb1 100644
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@@ -63,7 +63,7 @@ class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
 
  public:
   // constructor
-  CMBitMapRO(ReservedSpace rs, int shifter);
+  CMBitMapRO(int shifter);
 
   enum { do_yield = true };
 
@@ -117,8 +117,11 @@ class CMBitMap : public CMBitMapRO {
 
  public:
   // constructor
-  CMBitMap(ReservedSpace rs, int shifter) :
-    CMBitMapRO(rs, shifter) {}
+  CMBitMap(int shifter) :
+    CMBitMapRO(shifter) {}
+
+  // Allocates the back store for the marking bitmap
+  bool allocate(ReservedSpace heap_rs);
 
   // write marks
   void mark(HeapWord* addr) {
@@ -155,17 +158,18 @@ class CMBitMap : public CMBitMapRO {
   MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
 };
 
-// Represents a marking stack used by the CM collector.
-// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
+// Represents a marking stack used by ConcurrentMarking in the G1 collector.
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
+  VirtualSpace _virtual_space;   // Underlying backing store for actual stack
   ConcurrentMark* _cm;
   oop*   _base;        // bottom of stack
-  jint   _index;       // one more than last occupied index
-  jint   _capacity;    // max #elements
-  jint   _saved_index; // value of _index saved at start of GC
-  NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
+  jint _index;       // one more than last occupied index
+  jint _capacity;    // max #elements
+  jint _saved_index; // value of _index saved at start of GC
+  NOT_PRODUCT(jint _max_depth;)   // max depth plumbed during run
 
-  bool   _overflow;
+  bool  _overflow;
+  bool  _should_expand;
   DEBUG_ONLY(bool _drain_in_progress;)
   DEBUG_ONLY(bool _drain_in_progress_yields;)
 
@@ -173,7 +177,13 @@ class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   CMMarkStack(ConcurrentMark* cm);
   ~CMMarkStack();
 
-  void allocate(size_t size);
+#ifndef PRODUCT
+  jint max_depth() const {
+    return _max_depth;
+  }
+#endif
+
+  bool allocate(size_t capacity);
 
   oop pop() {
     if (!isEmpty()) {
@@ -231,11 +241,17 @@ class CMMarkStack VALUE_OBJ_CLASS_SPEC {
 
   bool isEmpty()    { return _index == 0; }
   bool isFull()     { return _index == _capacity; }
-  int maxElems()    { return _capacity; }
+  int  maxElems()   { return _capacity; }
 
   bool overflow() { return _overflow; }
   void clear_overflow() { _overflow = false; }
 
+  bool should_expand() const { return _should_expand; }
+  void set_should_expand();
+
+  // Expand the stack, typically in response to an overflow condition
+  void expand();
+
   int  size() { return _index; }
 
   void setEmpty()   { _index = 0; clear_overflow(); }
@@ -344,6 +360,7 @@ public:
 class ConcurrentMarkThread;
 
 class ConcurrentMark: public CHeapObj<mtGC> {
+  friend class CMMarkStack;
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
@@ -577,6 +594,9 @@ protected:
   // the card bitmaps.
   intptr_t _heap_bottom_card_num;
 
+  // Set to true when initialization is complete
+  bool _completed_initialization;
+
 public:
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
@@ -636,7 +656,7 @@ public:
     return _task_queues->steal(worker_id, hash_seed, obj);
   }
 
-  ConcurrentMark(ReservedSpace rs, uint max_regions);
+  ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs);
   ~ConcurrentMark();
 
   ConcurrentMarkThread* cmThread() { return _cmThread; }
@@ -907,6 +927,11 @@ public:
   // Should *not* be called from parallel code.
   inline bool mark_and_count(oop obj);
 
+  // Returns true if initialization was successfully completed.
+  bool completed_initialization() const {
+    return _completed_initialization;
+  }
+
 protected:
   // Clear all the per-task bitmaps and arrays used to store the
   // counting data.
diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index 70b45fea4f09680cc2d74062f75c6a517022f6e7..448c5707787cad1e789605d3f51c6c44ae0ea2f5 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -2079,7 +2079,11 @@ jint G1CollectedHeap::initialize() {
 
   // Create the ConcurrentMark data structure and thread.
   // (Must do this late, so that "max_regions" is defined.)
-  _cm       = new ConcurrentMark(heap_rs, max_regions());
+  _cm = new ConcurrentMark(this, heap_rs);
+  if (_cm == NULL || !_cm->completed_initialization()) {
+    vm_shutdown_during_initialization("Could not create/initialize ConcurrentMark");
+    return JNI_ENOMEM;
+  }
   _cmThread = _cm->cmThread();
 
   // Initialize the from_card cache structure of HeapRegionRemSet.
@@ -2087,7 +2091,7 @@ jint G1CollectedHeap::initialize() {
 
   // Now expand into the initial heap size.
   if (!expand(init_byte_size)) {
-    vm_exit_during_initialization("Failed to allocate initial heap.");
+    vm_shutdown_during_initialization("Failed to allocate initial heap.");
     return JNI_ENOMEM;
   }
 
diff --git a/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp b/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp
index e21f7a7a6e443dad2ea0282129170edc82f58d91..e5ca1f5c097bba5582ad020de0d2ed1db7aa448e 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/adjoiningVirtualSpaces.cpp
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/parallelScavenge/adjoiningVirtualSpaces.hpp"
+#include "memory/allocation.inline.hpp"
 #include "runtime/java.hpp"
 
 AdjoiningVirtualSpaces::AdjoiningVirtualSpaces(ReservedSpace rs,
diff --git a/src/share/vm/gc_implementation/shared/gcStats.cpp b/src/share/vm/gc_implementation/shared/gcStats.cpp
index faef280f632d4b61d8b2644ce07a2e890a8d10bf..745f8f3ab65eb60ca973d8209715c49d6f6f5e24 100644
--- a/src/share/vm/gc_implementation/shared/gcStats.cpp
+++ b/src/share/vm/gc_implementation/shared/gcStats.cpp
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "gc_implementation/shared/gcStats.hpp"
 #include "gc_implementation/shared/gcUtil.hpp"
+#include "memory/allocation.inline.hpp"
 
 GCStats::GCStats() {
     _avg_promoted       = new AdaptivePaddedNoZeroDevAverage(
diff --git a/src/share/vm/memory/allocation.hpp b/src/share/vm/memory/allocation.hpp
index 8bf63ac72f64c17141f39c319b8808b5ff15e67e..3cade72c2603ff3c4c03a980258b138bd9f0b429 100644
--- a/src/share/vm/memory/allocation.hpp
+++ b/src/share/vm/memory/allocation.hpp
@@ -202,7 +202,7 @@ template <MEMFLAGS F> class CHeapObj ALLOCATION_SUPER_CLASS_SPEC {
 // Calling new or delete will result in fatal error.
 
 class StackObj ALLOCATION_SUPER_CLASS_SPEC {
- public:
+ private:
   void* operator new(size_t size);
   void  operator delete(void* p);
 };
@@ -226,7 +226,7 @@ class StackObj ALLOCATION_SUPER_CLASS_SPEC {
 // be defined as a an empty string "".
 //
 class _ValueObj {
- public:
+ private:
   void* operator new(size_t size);
   void operator delete(void* p);
 };
diff --git a/src/share/vm/memory/metaspace.cpp b/src/share/vm/memory/metaspace.cpp
index 42870cd0fda3b8022a59358decd6a226e50f1230..6c0b4210faa55b25ca01aeb20d04b215b2892ba9 100644
--- a/src/share/vm/memory/metaspace.cpp
+++ b/src/share/vm/memory/metaspace.cpp
@@ -2192,11 +2192,6 @@ void SpaceManager::mangle_freed_chunks() {
 
 // MetaspaceAux
 
-size_t MetaspaceAux::used_in_bytes() {
-  return (Metaspace::class_space_list()->used_words_sum() +
-          Metaspace::space_list()->used_words_sum()) * BytesPerWord;
-}
-
 size_t MetaspaceAux::used_in_bytes(Metaspace::MetadataType mdtype) {
   size_t used = 0;
   ClassLoaderDataGraphMetaspaceIterator iter;
@@ -2222,14 +2217,6 @@ size_t MetaspaceAux::free_in_bytes(Metaspace::MetadataType mdtype) {
   return free * BytesPerWord;
 }
 
-// The total words available for metadata allocation.  This
-// uses Metaspace capacity_words() which is the total words
-// in chunks allocated for a Metaspace.
-size_t MetaspaceAux::capacity_in_bytes() {
-  return (Metaspace::class_space_list()->capacity_words_sum() +
-          Metaspace::space_list()->capacity_words_sum()) * BytesPerWord;
-}
-
 size_t MetaspaceAux::capacity_in_bytes(Metaspace::MetadataType mdtype) {
   size_t capacity = free_chunks_total(mdtype);
   ClassLoaderDataGraphMetaspaceIterator iter;
@@ -2242,11 +2229,6 @@ size_t MetaspaceAux::capacity_in_bytes(Metaspace::MetadataType mdtype) {
   return capacity * BytesPerWord;
 }
 
-size_t MetaspaceAux::reserved_in_bytes() {
-  return (Metaspace::class_space_list()->virtual_space_total() +
-          Metaspace::space_list()->virtual_space_total()) * BytesPerWord;
-}
-
 size_t MetaspaceAux::reserved_in_bytes(Metaspace::MetadataType mdtype) {
   size_t reserved = (mdtype == Metaspace::ClassType) ?
                        Metaspace::class_space_list()->virtual_space_total() :
diff --git a/src/share/vm/memory/metaspace.hpp b/src/share/vm/memory/metaspace.hpp
index 993419436bba731c1601f11177e9ba2d8c9d8133..bfc6d90ae6588d5c0467a9917922eca646910752 100644
--- a/src/share/vm/memory/metaspace.hpp
+++ b/src/share/vm/memory/metaspace.hpp
@@ -156,16 +156,25 @@ class MetaspaceAux : AllStatic {
 
  public:
   // Total of space allocated to metadata in all Metaspaces
-  static size_t used_in_bytes();
+  static size_t used_in_bytes() {
+    return used_in_bytes(Metaspace::ClassType) +
+           used_in_bytes(Metaspace::NonClassType);
+  }
 
   // Total of available space in all Metaspaces
   // Total of capacity allocated to all Metaspaces.  This includes
   // space in Metachunks not yet allocated and in the Metachunk
   // freelist.
-  static size_t capacity_in_bytes();
+  static size_t capacity_in_bytes() {
+    return capacity_in_bytes(Metaspace::ClassType) +
+           capacity_in_bytes(Metaspace::NonClassType);
+  }
 
   // Total space reserved in all Metaspaces
-  static size_t reserved_in_bytes();
+  static size_t reserved_in_bytes() {
+    return reserved_in_bytes(Metaspace::ClassType) +
+           reserved_in_bytes(Metaspace::NonClassType);
+  }
 
   static size_t min_chunk_size();
 
diff --git a/src/share/vm/oops/constMethod.hpp b/src/share/vm/oops/constMethod.hpp
index 771bf7b169c89ea3aa89385c09adc4e7c1597067..2819b3e7756fa301472ca4537d178db4d9368696 100644
--- a/src/share/vm/oops/constMethod.hpp
+++ b/src/share/vm/oops/constMethod.hpp
@@ -46,6 +46,7 @@
 // | interp_kind  | flags    | code_size                  |
 // | name index              | signature index            |
 // | method_idnum            | max_stack                  |
+// | max_locals              | size_of_parameters         |
 // |------------------------------------------------------|
 // |                                                      |
 // | byte codes                                           |
@@ -150,7 +151,8 @@ private:
                                                  // initially corresponds to the index into the methods array.
                                                  // but this may change with redefinition
   u2                _max_stack;                  // Maximum number of entries on the expression stack
-
+  u2                _max_locals;                 // Number of local variables used by this method
+  u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
 
   // Constructor
   ConstMethod(int byte_code_size,
@@ -338,6 +340,11 @@ public:
 
   static ByteSize max_stack_offset()
                             { return byte_offset_of(ConstMethod, _max_stack); }
+  static ByteSize size_of_locals_offset()
+                            { return byte_offset_of(ConstMethod, _max_locals); }
+  static ByteSize size_of_parameters_offset()
+                            { return byte_offset_of(ConstMethod, _size_of_parameters); }
+
 
   // Unique id for the method
   static const u2 MAX_IDNUM;
@@ -349,6 +356,14 @@ public:
   int  max_stack() const                         { return _max_stack; }
   void set_max_stack(int size)                   { _max_stack = size; }
 
+  // max locals
+  int  max_locals() const                        { return _max_locals; }
+  void set_max_locals(int size)                  { _max_locals = size; }
+
+  // size of parameters
+  int  size_of_parameters() const                { return _size_of_parameters; }
+  void set_size_of_parameters(int size)          { _size_of_parameters = size; }
+
   // Deallocation for RedefineClasses
   void deallocate_contents(ClassLoaderData* loader_data);
   bool is_klass() const { return false; }
diff --git a/src/share/vm/oops/method.hpp b/src/share/vm/oops/method.hpp
index 5cf153197895ca48b99a94f0805a425c627a537f..50305294ec925d5ef727352cb55b94d00acab801 100644
--- a/src/share/vm/oops/method.hpp
+++ b/src/share/vm/oops/method.hpp
@@ -73,8 +73,7 @@
 // |------------------------------------------------------|
 // | result_index (C++ interpreter only)                  |
 // |------------------------------------------------------|
-// | method_size             |   max_locals               |
-// | size_of_parameters      |   intrinsic_id|   flags    |
+// | method_size             |   intrinsic_id|   flags    |
 // |------------------------------------------------------|
 // | throwout_count          |   num_breakpoints          |
 // |------------------------------------------------------|
@@ -116,8 +115,6 @@ class Method : public Metadata {
   int               _result_index;               // C++ interpreter needs for converting results to/from stack
 #endif
   u2                _method_size;                // size of this object
-  u2                _max_locals;                 // Number of local variables used by this method
-  u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
   u1                _intrinsic_id;               // vmSymbols::intrinsic_id (0 == _none)
   u1                _jfr_towrite  : 1,           // Flags
                     _force_inline : 1,
@@ -299,8 +296,8 @@ class Method : public Metadata {
   void      set_max_stack(int size)              {        constMethod()->set_max_stack(size); }
 
   // max locals
-  int  max_locals() const                        { return _max_locals; }
-  void set_max_locals(int size)                  { _max_locals = size; }
+  int  max_locals() const                        { return constMethod()->max_locals(); }
+  void set_max_locals(int size)                  { constMethod()->set_max_locals(size); }
 
   int highest_comp_level() const;
   void set_highest_comp_level(int level);
@@ -318,7 +315,8 @@ class Method : public Metadata {
   void set_interpreter_throwout_count(int count) { _interpreter_throwout_count = count; }
 
   // size of parameters
-  int  size_of_parameters() const                { return _size_of_parameters; }
+  int  size_of_parameters() const                { return constMethod()->size_of_parameters(); }
+  void set_size_of_parameters(int size)          { constMethod()->set_size_of_parameters(size); }
 
   bool has_stackmap_table() const {
     return constMethod()->has_stackmap_table();
@@ -595,8 +593,6 @@ class Method : public Metadata {
 #ifdef CC_INTERP
   static ByteSize result_index_offset()          { return byte_offset_of(Method, _result_index ); }
 #endif /* CC_INTERP */
-  static ByteSize size_of_locals_offset()        { return byte_offset_of(Method, _max_locals        ); }
-  static ByteSize size_of_parameters_offset()    { return byte_offset_of(Method, _size_of_parameters); }
   static ByteSize from_compiled_offset()         { return byte_offset_of(Method, _from_compiled_entry); }
   static ByteSize code_offset()                  { return byte_offset_of(Method, _code); }
   static ByteSize invocation_counter_offset()    { return byte_offset_of(Method, _invocation_counter); }
@@ -804,9 +800,6 @@ class Method : public Metadata {
                            Array<AnnotationArray*>* methods_type_annotations,
                            bool idempotent = false);
 
-  // size of parameters
-  void set_size_of_parameters(int size)          { _size_of_parameters = size; }
-
   // Deallocation function for redefine classes or if an error occurs
   void deallocate_contents(ClassLoaderData* loader_data);
 
diff --git a/src/share/vm/opto/addnode.cpp b/src/share/vm/opto/addnode.cpp
index 6e54990281beb1f163cf5785316f61b24315a637..045e4f20918779eb4a11f0351b2d0dd1f486907f 100644
--- a/src/share/vm/opto/addnode.cpp
+++ b/src/share/vm/opto/addnode.cpp
@@ -189,6 +189,11 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) {
       set_req(1, addx);
       set_req(2, a22);
       progress = this;
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      if (add2->outcnt() == 0 && igvn) {
+        // add disconnected.
+        igvn->_worklist.push(add2);
+      }
     }
   }
 
@@ -624,6 +629,11 @@ Node *AddPNode::Ideal(PhaseGVN *phase, bool can_reshape) {
     if( t22->singleton() && (t22 != Type::TOP) ) {  // Right input is an add of a constant?
       set_req(Address, phase->transform(new (phase->C) AddPNode(in(Base),in(Address),add->in(1))));
       set_req(Offset, add->in(2));
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      if (add->outcnt() == 0 && igvn) {
+        // add disconnected.
+        igvn->_worklist.push((Node*)add);
+      }
       return this;              // Made progress
     }
   }
diff --git a/src/share/vm/opto/bytecodeInfo.cpp b/src/share/vm/opto/bytecodeInfo.cpp
index 80117960dc58cccbe16f66701123200e9c15cda7..7392ee0f614dbd4e0474ea37c6e4a8be8b239c1e 100644
--- a/src/share/vm/opto/bytecodeInfo.cpp
+++ b/src/share/vm/opto/bytecodeInfo.cpp
@@ -403,7 +403,7 @@ const char* InlineTree::check_can_parse(ciMethod* callee) {
 //------------------------------print_inlining---------------------------------
 // Really, the failure_msg can be a success message also.
 void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
-  CompileTask::print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
+  C->print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
   if (callee_method == NULL)  tty->print(" callee not monotonic or profiled");
   if (Verbose && callee_method) {
     const InlineTree *top = this;
diff --git a/src/share/vm/opto/callGenerator.cpp b/src/share/vm/opto/callGenerator.cpp
index 33d8c0c6241534d5127d5c0d11bb9bcd134d0400..66b53abf8adb5d1670e68b3f4711b27cb4b1fe13 100644
--- a/src/share/vm/opto/callGenerator.cpp
+++ b/src/share/vm/opto/callGenerator.cpp
@@ -274,6 +274,9 @@ class LateInlineCallGenerator : public DirectCallGenerator {
   virtual void do_late_inline();
 
   virtual JVMState* generate(JVMState* jvms) {
+    Compile *C = Compile::current();
+    C->print_inlining_skip(this);
+
     // Record that this call site should be revisited once the main
     // parse is finished.
     Compile::current()->add_late_inline(this);
@@ -284,7 +287,6 @@ class LateInlineCallGenerator : public DirectCallGenerator {
     // as is done for allocations and macro expansion.
     return DirectCallGenerator::generate(jvms);
   }
-
 };
 
 
@@ -307,7 +309,9 @@ void LateInlineCallGenerator::do_late_inline() {
 
   // Make sure the state is a MergeMem for parsing.
   if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
-    map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
+    Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory));
+    C->initial_gvn()->set_type_bottom(mem);
+    map->set_req(TypeFunc::Memory, mem);
   }
 
   // Make enough space for the expression stack and transfer the incoming arguments
@@ -320,6 +324,8 @@ void LateInlineCallGenerator::do_late_inline() {
     }
   }
 
+  C->print_inlining_insert(this);
+
   CompileLog* log = C->log();
   if (log != NULL) {
     log->head("late_inline method='%d'", log->identify(method()));
@@ -608,7 +614,7 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
         if (cg != NULL && cg->is_inline())
           return cg;
       } else {
-        if (PrintInlining)  CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
+        if (PrintInlining)  C->print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
       }
     }
     break;
diff --git a/src/share/vm/opto/callGenerator.hpp b/src/share/vm/opto/callGenerator.hpp
index ae59173bf7d40e6585842bdd77a90388ab14c737..8b3bdada39f924a5739ee2bba51219092ddb9697 100644
--- a/src/share/vm/opto/callGenerator.hpp
+++ b/src/share/vm/opto/callGenerator.hpp
@@ -147,9 +147,9 @@ class CallGenerator : public ResourceObj {
                                                 CallGenerator* cg);
   virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
 
-  static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
+  static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) {
     if (PrintInlining)
-      CompileTask::print_inlining(callee, inline_level, bci, msg);
+      C->print_inlining(callee, inline_level, bci, msg);
   }
 };
 
diff --git a/src/share/vm/opto/callnode.cpp b/src/share/vm/opto/callnode.cpp
index 067e9197d20ffb18ce0d9045fc924adbb4cc243c..95f1934c6c185709b57f0f5a4c6157eb31d0c040 100644
--- a/src/share/vm/opto/callnode.cpp
+++ b/src/share/vm/opto/callnode.cpp
@@ -751,7 +751,7 @@ void CallNode::extract_projections(CallProjections* projs, bool separate_io_proj
         projs->fallthrough_ioproj = pn;
       for (DUIterator j = pn->outs(); pn->has_out(j); j++) {
         Node* e = pn->out(j);
-        if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) {
+        if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj() && e->outcnt() > 0) {
           assert(projs->exobj == NULL, "only one");
           projs->exobj = e;
         }
diff --git a/src/share/vm/opto/cfgnode.cpp b/src/share/vm/opto/cfgnode.cpp
index 24d51afd2fa952929628802afbf12d5a18e0cb7a..3ee9fffe04be4d8c620dc4c2996bef146ce612e9 100644
--- a/src/share/vm/opto/cfgnode.cpp
+++ b/src/share/vm/opto/cfgnode.cpp
@@ -1566,6 +1566,10 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
     Node* n = in(j);            // Get the input
     if (rc == NULL || phase->type(rc) == Type::TOP) {
       if (n != top) {           // Not already top?
+        PhaseIterGVN *igvn = phase->is_IterGVN();
+        if (can_reshape && igvn != NULL) {
+          igvn->_worklist.push(r);
+        }
         set_req(j, top);        // Nuke it down
         progress = this;        // Record progress
       }
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
index ffb7ed7b8d58f057b1f418311d20b6e659d0b0aa..5ea8049c7359042f360e0f8dc5020f7dc79b80cf 100644
--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
@@ -610,7 +610,9 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
                   _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
                   _printer(IdealGraphPrinter::printer()),
 #endif
-                  _congraph(NULL) {
+                  _congraph(NULL),
+                  _print_inlining_list(NULL),
+                  _print_inlining(0) {
   C = this;
 
   CompileWrapper cw(this);
@@ -666,6 +668,9 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
   PhaseGVN gvn(node_arena(), estimated_size);
   set_initial_gvn(&gvn);
 
+  if (PrintInlining) {
+    _print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
+  }
   { // Scope for timing the parser
     TracePhase t3("parse", &_t_parser, true);
 
@@ -754,6 +759,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
       }
     }
     assert(_late_inlines.length() == 0, "should have been processed");
+    dump_inlining();
 
     print_method("Before RemoveUseless", 3);
 
@@ -899,7 +905,9 @@ Compile::Compile( ciEnv* ci_env,
 #endif
     _dead_node_list(comp_arena()),
     _dead_node_count(0),
-    _congraph(NULL) {
+    _congraph(NULL),
+    _print_inlining_list(NULL),
+    _print_inlining(0) {
   C = this;
 
 #ifndef PRODUCT
@@ -3351,3 +3359,11 @@ void Compile::ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n
     cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
   }
 }
+
+void Compile::dump_inlining() {
+  if (PrintInlining) {
+    for (int i = 0; i < _print_inlining_list->length(); i++) {
+      tty->print(_print_inlining_list->at(i).ss()->as_string());
+    }
+  }
+}
diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp
index aacb72cbfb45dba47a48fe9e52e017e274dd1b13..32767e2ddd8958d46a9eefb76fa4ab90521c80bd 100644
--- a/src/share/vm/opto/compile.hpp
+++ b/src/share/vm/opto/compile.hpp
@@ -30,6 +30,7 @@
 #include "code/debugInfoRec.hpp"
 #include "code/exceptionHandlerTable.hpp"
 #include "compiler/compilerOracle.hpp"
+#include "compiler/compileBroker.hpp"
 #include "libadt/dict.hpp"
 #include "libadt/port.hpp"
 #include "libadt/vectset.hpp"
@@ -369,6 +370,61 @@ class Compile : public Phase {
   GrowableArray<CallGenerator*> _late_inlines;  // List of CallGenerators to be revisited after
                                                 // main parsing has finished.
 
+  // Inlining may not happen in parse order which would make
+  // PrintInlining output confusing. Keep track of PrintInlining
+  // pieces in order.
+  class PrintInliningBuffer : public ResourceObj {
+   private:
+    CallGenerator* _cg;
+    stringStream* _ss;
+
+   public:
+    PrintInliningBuffer()
+      : _cg(NULL) { _ss = new stringStream(); }
+
+    stringStream* ss() const { return _ss; }
+    CallGenerator* cg() const { return _cg; }
+    void set_cg(CallGenerator* cg) { _cg = cg; }
+  };
+
+  GrowableArray<PrintInliningBuffer>* _print_inlining_list;
+  int _print_inlining;
+
+ public:
+
+  outputStream* print_inlining_stream() const {
+    return _print_inlining_list->at(_print_inlining).ss();
+  }
+
+  void print_inlining_skip(CallGenerator* cg) {
+    if (PrintInlining) {
+      _print_inlining_list->at(_print_inlining).set_cg(cg);
+      _print_inlining++;
+      _print_inlining_list->insert_before(_print_inlining, PrintInliningBuffer());
+    }
+  }
+
+  void print_inlining_insert(CallGenerator* cg) {
+    if (PrintInlining) {
+      for (int i = 0; i < _print_inlining_list->length(); i++) {
+        if (_print_inlining_list->at(i).cg() == cg) {
+          _print_inlining_list->insert_before(i+1, PrintInliningBuffer());
+          _print_inlining = i+1;
+          _print_inlining_list->at(i).set_cg(NULL);
+          return;
+        }
+      }
+      ShouldNotReachHere();
+    }
+  }
+
+  void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL) {
+    stringStream ss;
+    CompileTask::print_inlining(&ss, method, inline_level, bci, msg);
+    print_inlining_stream()->print(ss.as_string());
+  }
+
+ private:
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*             _cfg;                   // Results of CFG finding
   bool                  _select_24_bit_instr;   // We selected an instruction with a 24-bit result
@@ -591,7 +647,7 @@ class Compile : public Phase {
   void         reset_dead_node_list()      { _dead_node_list.Reset();
                                              _dead_node_count = 0;
                                            }
-  uint          live_nodes()               {
+  uint          live_nodes() const         {
     int  val = _unique - _dead_node_count;
     assert (val >= 0, err_msg_res("number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count));
             return (uint) val;
@@ -702,7 +758,7 @@ class Compile : public Phase {
 
   void              identify_useful_nodes(Unique_Node_List &useful);
   void              update_dead_node_list(Unique_Node_List &useful);
-  void              remove_useless_nodes  (Unique_Node_List &useful);
+  void              remove_useless_nodes (Unique_Node_List &useful);
 
   WarmCallInfo*     warm_calls() const          { return _warm_calls; }
   void          set_warm_calls(WarmCallInfo* l) { _warm_calls = l; }
@@ -711,6 +767,8 @@ class Compile : public Phase {
   // Record this CallGenerator for inlining at the end of parsing.
   void              add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); }
 
+  void dump_inlining();
+
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*         cfg()                       { return _cfg; }
   bool              select_24_bit_instr() const { return _select_24_bit_instr; }
diff --git a/src/share/vm/opto/doCall.cpp b/src/share/vm/opto/doCall.cpp
index ca9a0a6d67e476a9d76a004c58bd87ad08f98b9e..1f21043c32ea6780c4acdf43a36d43fdcabf9067 100644
--- a/src/share/vm/opto/doCall.cpp
+++ b/src/share/vm/opto/doCall.cpp
@@ -40,19 +40,24 @@
 #include "prims/nativeLookup.hpp"
 #include "runtime/sharedRuntime.hpp"
 
-void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
+void trace_type_profile(Compile* C, ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
   if (TraceTypeProfile || PrintInlining NOT_PRODUCT(|| PrintOptoInlining)) {
+    outputStream* out = tty;
     if (!PrintInlining) {
       if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) {
         method->print_short_name();
         tty->cr();
       }
       CompileTask::print_inlining(prof_method, depth, bci);
+    } else {
+      out = C->print_inlining_stream();
     }
-    CompileTask::print_inline_indent(depth);
-    tty->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
-    prof_klass->name()->print_symbol();
-    tty->cr();
+    CompileTask::print_inline_indent(depth, out);
+    out->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
+    stringStream ss;
+    prof_klass->name()->print_symbol_on(&ss);
+    out->print(ss.as_string());
+    out->cr();
   }
 }
 
@@ -233,13 +238,13 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
           }
           if (miss_cg != NULL) {
             if (next_hit_cg != NULL) {
-              trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
+              trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
               // We don't need to record dependency on a receiver here and below.
               // Whenever we inline, the dependency is added by Parse::Parse().
               miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX);
             }
             if (miss_cg != NULL) {
-              trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
+              trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
               CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
               if (cg != NULL)  return cg;
             }
diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp
index ba7ec2caf77443eb8dd6a6b9698e9bc7a9370686..115952ad65ce285ac7dd35a89344c4a07e0a500e 100644
--- a/src/share/vm/opto/graphKit.cpp
+++ b/src/share/vm/opto/graphKit.cpp
@@ -1771,11 +1771,21 @@ void GraphKit::replace_call(CallNode* call, Node* result) {
   CallProjections callprojs;
   call->extract_projections(&callprojs, true);
 
-  // Replace all the old call edges with the edges from the inlining result
-  C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control));
-  C->gvn_replace_by(callprojs.fallthrough_memproj,   final_state->in(TypeFunc::Memory));
-  C->gvn_replace_by(callprojs.fallthrough_ioproj,    final_state->in(TypeFunc::I_O));
+  Node* init_mem = call->in(TypeFunc::Memory);
   Node* final_mem = final_state->in(TypeFunc::Memory);
+  Node* final_ctl = final_state->in(TypeFunc::Control);
+  Node* final_io = final_state->in(TypeFunc::I_O);
+
+  // Replace all the old call edges with the edges from the inlining result
+  if (callprojs.fallthrough_catchproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl);
+  }
+  if (callprojs.fallthrough_memproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_memproj,   final_mem);
+  }
+  if (callprojs.fallthrough_ioproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_ioproj,    final_io);
+  }
 
   // Replace the result with the new result if it exists and is used
   if (callprojs.resproj != NULL && result != NULL) {
@@ -2980,7 +2990,7 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
   set_control( _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Control) ) );
   // create memory projection for i_o
   set_memory ( _gvn.transform( new (C) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx );
-  make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true);
+  make_slow_call_ex(allocx, env()->Throwable_klass(), true);
 
   // create a memory projection as for the normal control path
   Node* malloc = _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Memory));
diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp
index 53b89e4d71c62324f1cc3f5e06d82a7ddf54ccdf..313339edfded5ce58382654ed6956881f451c5f4 100644
--- a/src/share/vm/opto/library_call.cpp
+++ b/src/share/vm/opto/library_call.cpp
@@ -282,6 +282,7 @@ class LibraryCallKit : public GraphKit {
   typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
   bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind);
   bool inline_unsafe_ordered_store(BasicType type);
+  bool inline_unsafe_fence(vmIntrinsics::ID id);
   bool inline_fp_conversions(vmIntrinsics::ID id);
   bool inline_number_methods(vmIntrinsics::ID id);
   bool inline_reference_get();
@@ -334,6 +335,9 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
     case vmIntrinsics::_getAndSetInt:
     case vmIntrinsics::_getAndSetLong:
     case vmIntrinsics::_getAndSetObject:
+    case vmIntrinsics::_loadFence:
+    case vmIntrinsics::_storeFence:
+    case vmIntrinsics::_fullFence:
       break;  // InlineNatives does not control String.compareTo
     case vmIntrinsics::_Reference_get:
       break;  // InlineNatives does not control Reference.get
@@ -536,7 +540,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
   // Try to inline the intrinsic.
   if (kit.try_to_inline()) {
     if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
-      CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
     }
     C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
     if (C->log()) {
@@ -555,7 +559,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
     if (jvms->has_method()) {
       // Not a root compile.
       const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
-      CompileTask::print_inlining(callee, jvms->depth() - 1, bci, msg);
+      C->print_inlining(callee, jvms->depth() - 1, bci, msg);
     } else {
       // Root compile
       tty->print("Did not generate intrinsic %s%s at bci:%d in",
@@ -585,7 +589,7 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
   Node* slow_ctl = kit.try_to_predicate();
   if (!kit.failing()) {
     if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
-      CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
     }
     C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
     if (C->log()) {
@@ -602,12 +606,12 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
     if (jvms->has_method()) {
       // Not a root compile.
       const char* msg = "failed to generate predicate for intrinsic";
-      CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
+      C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
     } else {
       // Root compile
-      tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
-               vmIntrinsics::name_at(intrinsic_id()),
-               (is_virtual() ? " (virtual)" : ""), bci);
+      C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
+                                        vmIntrinsics::name_at(intrinsic_id()),
+                                        (is_virtual() ? " (virtual)" : ""), bci);
     }
   }
   C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
@@ -732,6 +736,10 @@ bool LibraryCallKit::try_to_inline() {
   case vmIntrinsics::_getAndSetLong:            return inline_unsafe_load_store(T_LONG,   LS_xchg);
   case vmIntrinsics::_getAndSetObject:          return inline_unsafe_load_store(T_OBJECT, LS_xchg);
 
+  case vmIntrinsics::_loadFence:
+  case vmIntrinsics::_storeFence:
+  case vmIntrinsics::_fullFence:                return inline_unsafe_fence(intrinsic_id());
+
   case vmIntrinsics::_currentThread:            return inline_native_currentThread();
   case vmIntrinsics::_isInterrupted:            return inline_native_isInterrupted();
 
@@ -2840,6 +2848,26 @@ bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
   return true;
 }
 
+bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
+  // Regardless of form, don't allow previous ld/st to move down,
+  // then issue acquire, release, or volatile mem_bar.
+  insert_mem_bar(Op_MemBarCPUOrder);
+  switch(id) {
+    case vmIntrinsics::_loadFence:
+      insert_mem_bar(Op_MemBarAcquire);
+      return true;
+    case vmIntrinsics::_storeFence:
+      insert_mem_bar(Op_MemBarRelease);
+      return true;
+    case vmIntrinsics::_fullFence:
+      insert_mem_bar(Op_MemBarVolatile);
+      return true;
+    default:
+      fatal_unexpected_iid(id);
+      return false;
+  }
+}
+
 //----------------------------inline_unsafe_allocate---------------------------
 // public native Object sun.mics.Unsafe.allocateInstance(Class<?> cls);
 bool LibraryCallKit::inline_unsafe_allocate() {
@@ -2952,14 +2980,23 @@ bool LibraryCallKit::inline_native_isInterrupted() {
 
   // We only go to the fast case code if we pass two guards.
   // Paths which do not pass are accumulated in the slow_region.
+
+  enum {
+    no_int_result_path   = 1, // t == Thread.current() && !TLS._osthread._interrupted
+    no_clear_result_path = 2, // t == Thread.current() &&  TLS._osthread._interrupted && !clear_int
+    slow_result_path     = 3, // slow path: t.isInterrupted(clear_int)
+    PATH_LIMIT
+  };
+
+  // Ensure that it's not possible to move the load of TLS._osthread._interrupted flag
+  // out of the function.
+  insert_mem_bar(Op_MemBarCPUOrder);
+
+  RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
+  PhiNode*    result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
+
   RegionNode* slow_region = new (C) RegionNode(1);
   record_for_igvn(slow_region);
-  RegionNode* result_rgn = new (C) RegionNode(1+3); // fast1, fast2, slow
-  PhiNode*    result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
-  enum { no_int_result_path   = 1,
-         no_clear_result_path = 2,
-         slow_result_path     = 3
-  };
 
   // (a) Receiving thread must be the current thread.
   Node* rec_thr = argument(0);
@@ -2968,14 +3005,13 @@ bool LibraryCallKit::inline_native_isInterrupted() {
   Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) );
   Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) );
 
-  bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO);
-  if (!known_current_thread)
-    generate_slow_guard(bol_thr, slow_region);
+  generate_slow_guard(bol_thr, slow_region);
 
   // (b) Interrupt bit on TLS must be false.
   Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
   Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
   p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
+
   // Set the control input on the field _interrupted read to prevent it floating up.
   Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT);
   Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) );
@@ -3020,22 +3056,20 @@ bool LibraryCallKit::inline_native_isInterrupted() {
     Node* slow_val = set_results_for_java_call(slow_call);
     // this->control() comes from set_results_for_java_call
 
-    // If we know that the result of the slow call will be true, tell the optimizer!
-    if (known_current_thread)  slow_val = intcon(1);
-
     Node* fast_io  = slow_call->in(TypeFunc::I_O);
     Node* fast_mem = slow_call->in(TypeFunc::Memory);
+
     // These two phis are pre-filled with copies of of the fast IO and Memory
-    Node* io_phi   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
-    Node* mem_phi  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+    PhiNode* result_mem  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+    PhiNode* result_io   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
 
     result_rgn->init_req(slow_result_path, control());
-    io_phi    ->init_req(slow_result_path, i_o());
-    mem_phi   ->init_req(slow_result_path, reset_memory());
+    result_io ->init_req(slow_result_path, i_o());
+    result_mem->init_req(slow_result_path, reset_memory());
     result_val->init_req(slow_result_path, slow_val);
 
-    set_all_memory( _gvn.transform(mem_phi) );
-    set_i_o(        _gvn.transform(io_phi) );
+    set_all_memory(_gvn.transform(result_mem));
+    set_i_o(       _gvn.transform(result_io));
   }
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
@@ -3319,7 +3353,7 @@ bool LibraryCallKit::inline_native_subtype_check() {
     Node* arg = args[which_arg];
     arg = null_check(arg);
     if (stopped())  break;
-    args[which_arg] = _gvn.transform(arg);
+    args[which_arg] = arg;
 
     Node* p = basic_plus_adr(arg, class_klass_offset);
     Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type);
diff --git a/src/share/vm/opto/node.cpp b/src/share/vm/opto/node.cpp
index 4c83d906003ab8225bb336b7a60db97c01dbb1e9..9983c84d8e26d8494e6e3ec84bdcbaa51b3f1cf8 100644
--- a/src/share/vm/opto/node.cpp
+++ b/src/share/vm/opto/node.cpp
@@ -1839,15 +1839,16 @@ uint Node::match_edge(uint idx) const {
   return idx;                   // True for other than index 0 (control)
 }
 
+static RegMask _not_used_at_all;
 // Register classes are defined for specific machines
 const RegMask &Node::out_RegMask() const {
   ShouldNotCallThis();
-  return *(new RegMask());
+  return _not_used_at_all;
 }
 
 const RegMask &Node::in_RegMask(uint) const {
   ShouldNotCallThis();
-  return *(new RegMask());
+  return _not_used_at_all;
 }
 
 //=============================================================================
diff --git a/src/share/vm/opto/parse3.cpp b/src/share/vm/opto/parse3.cpp
index 0f92b53f135ebd44a05cb779948444d0811bc5ec..ac6f4ec8f7662af4c6f4f0d874942858d928f57d 100644
--- a/src/share/vm/opto/parse3.cpp
+++ b/src/share/vm/opto/parse3.cpp
@@ -509,6 +509,7 @@ void Parse::do_multianewarray() {
                           makecon(TypeKlassPtr::make(array_klass)),
                           dims);
   }
+  make_slow_call_ex(c, env()->Throwable_klass(), false);
 
   Node* res = _gvn.transform(new (C) ProjNode(c, TypeFunc::Parms));
 
diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp
index fb90705726ea2ab32b3170242ba828fe8c000562..7edb97e0bbaba8055de57cf124181db76d76263c 100644
--- a/src/share/vm/opto/runtime.cpp
+++ b/src/share/vm/opto/runtime.cpp
@@ -989,7 +989,7 @@ JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* t
       // since we're notifying the VM on every catch.
       // Force deoptimization and the rest of the lookup
       // will be fine.
-      deoptimize_caller_frame(thread, true);
+      deoptimize_caller_frame(thread);
     }
 
     // Check the stack guard pages.  If enabled, look for handler in this frame;
@@ -1143,19 +1143,24 @@ const TypeFunc *OptoRuntime::rethrow_Type() {
 
 
 void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
-  // Deoptimize frame
-  if (doit) {
-    // Called from within the owner thread, so no need for safepoint
-    RegisterMap reg_map(thread);
-    frame stub_frame = thread->last_frame();
-    assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
-    frame caller_frame = stub_frame.sender(&reg_map);
-
-    // Deoptimize the caller frame.
-    Deoptimization::deoptimize_frame(thread, caller_frame.id());
+  // Deoptimize the caller before continuing, as the compiled
+  // exception handler table may not be valid.
+  if (!StressCompiledExceptionHandlers && doit) {
+    deoptimize_caller_frame(thread);
   }
 }
 
+void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) {
+  // Called from within the owner thread, so no need for safepoint
+  RegisterMap reg_map(thread);
+  frame stub_frame = thread->last_frame();
+  assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
+  frame caller_frame = stub_frame.sender(&reg_map);
+
+  // Deoptimize the caller frame.
+  Deoptimization::deoptimize_frame(thread, caller_frame.id());
+}
+
 
 bool OptoRuntime::is_deoptimized_caller_frame(JavaThread *thread) {
   // Called from within the owner thread, so no need for safepoint
diff --git a/src/share/vm/opto/runtime.hpp b/src/share/vm/opto/runtime.hpp
index 13da255b74299f696b20a2aca3bf050a85f1952a..295b71237577376ef7d1517af6ff6b843d143e80 100644
--- a/src/share/vm/opto/runtime.hpp
+++ b/src/share/vm/opto/runtime.hpp
@@ -174,6 +174,7 @@ private:
   static address handle_exception_C       (JavaThread* thread);
   static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
   static address rethrow_C                (oopDesc* exception, JavaThread *thread, address return_pc );
+  static void deoptimize_caller_frame     (JavaThread *thread);
   static void deoptimize_caller_frame     (JavaThread *thread, bool doit);
   static bool is_deoptimized_caller_frame (JavaThread *thread);
 
diff --git a/src/share/vm/opto/stringopts.cpp b/src/share/vm/opto/stringopts.cpp
index c471887d88101b9913936e9fb99cac5aafc92b29..4e00945c73ae009385611d30563917888b9630f7 100644
--- a/src/share/vm/opto/stringopts.cpp
+++ b/src/share/vm/opto/stringopts.cpp
@@ -744,7 +744,9 @@ bool StringConcat::validate_control_flow() {
       ctrl_path.push(cn);
       ctrl_path.push(cn->proj_out(0));
       ctrl_path.push(cn->proj_out(0)->unique_out());
-      ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
+      if (cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0) != NULL) {
+        ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
+      }
     } else {
       ShouldNotReachHere();
     }
@@ -762,6 +764,12 @@ bool StringConcat::validate_control_flow() {
     } else if (ptr->is_IfTrue()) {
       IfNode* iff = ptr->in(0)->as_If();
       BoolNode* b = iff->in(1)->isa_Bool();
+
+      if (b == NULL) {
+        fail = true;
+        break;
+      }
+
       Node* cmp = b->in(1);
       Node* v1 = cmp->in(1);
       Node* v2 = cmp->in(2);
@@ -1408,71 +1416,76 @@ void PhaseStringOpts::replace_string_concat(StringConcat* sc) {
                       Deoptimization::Action_make_not_entrant);
   }
 
-  // length now contains the number of characters needed for the
-  // char[] so create a new AllocateArray for the char[]
-  Node* char_array = NULL;
-  {
-    PreserveReexecuteState preexecs(&kit);
-    // The original jvms is for an allocation of either a String or
-    // StringBuffer so no stack adjustment is necessary for proper
-    // reexecution.  If we deoptimize in the slow path the bytecode
-    // will be reexecuted and the char[] allocation will be thrown away.
-    kit.jvms()->set_should_reexecute(true);
-    char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
-                               length, 1);
-  }
-
-  // Mark the allocation so that zeroing is skipped since the code
-  // below will overwrite the entire array
-  AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
-  char_alloc->maybe_set_complete(_gvn);
-
-  // Now copy the string representations into the final char[]
-  Node* start = __ intcon(0);
-  for (int argi = 0; argi < sc->num_arguments(); argi++) {
-    Node* arg = sc->argument(argi);
-    switch (sc->mode(argi)) {
-      case StringConcat::IntMode: {
-        Node* end = __ AddI(start, string_sizes->in(argi));
-        // getChars words backwards so pass the ending point as well as the start
-        int_getChars(kit, arg, char_array, start, end);
-        start = end;
-        break;
-      }
-      case StringConcat::StringNullCheckMode:
-      case StringConcat::StringMode: {
-        start = copy_string(kit, arg, char_array, start);
-        break;
-      }
-      case StringConcat::CharMode: {
-        __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
-                           arg, T_CHAR, char_adr_idx);
-        start = __ AddI(start, __ intcon(1));
-        break;
+  Node* result;
+  if (!kit.stopped()) {
+
+    // length now contains the number of characters needed for the
+    // char[] so create a new AllocateArray for the char[]
+    Node* char_array = NULL;
+    {
+      PreserveReexecuteState preexecs(&kit);
+      // The original jvms is for an allocation of either a String or
+      // StringBuffer so no stack adjustment is necessary for proper
+      // reexecution.  If we deoptimize in the slow path the bytecode
+      // will be reexecuted and the char[] allocation will be thrown away.
+      kit.jvms()->set_should_reexecute(true);
+      char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
+                                 length, 1);
+    }
+
+    // Mark the allocation so that zeroing is skipped since the code
+    // below will overwrite the entire array
+    AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
+    char_alloc->maybe_set_complete(_gvn);
+
+    // Now copy the string representations into the final char[]
+    Node* start = __ intcon(0);
+    for (int argi = 0; argi < sc->num_arguments(); argi++) {
+      Node* arg = sc->argument(argi);
+      switch (sc->mode(argi)) {
+        case StringConcat::IntMode: {
+          Node* end = __ AddI(start, string_sizes->in(argi));
+          // getChars words backwards so pass the ending point as well as the start
+          int_getChars(kit, arg, char_array, start, end);
+          start = end;
+          break;
+        }
+        case StringConcat::StringNullCheckMode:
+        case StringConcat::StringMode: {
+          start = copy_string(kit, arg, char_array, start);
+          break;
+        }
+        case StringConcat::CharMode: {
+          __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
+                             arg, T_CHAR, char_adr_idx);
+          start = __ AddI(start, __ intcon(1));
+          break;
+        }
+        default:
+          ShouldNotReachHere();
       }
-      default:
-        ShouldNotReachHere();
     }
-  }
 
-  // If we're not reusing an existing String allocation then allocate one here.
-  Node* result = sc->string_alloc();
-  if (result == NULL) {
-    PreserveReexecuteState preexecs(&kit);
-    // The original jvms is for an allocation of either a String or
-    // StringBuffer so no stack adjustment is necessary for proper
-    // reexecution.
-    kit.jvms()->set_should_reexecute(true);
-    result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
-  }
+    // If we're not reusing an existing String allocation then allocate one here.
+    result = sc->string_alloc();
+    if (result == NULL) {
+      PreserveReexecuteState preexecs(&kit);
+      // The original jvms is for an allocation of either a String or
+      // StringBuffer so no stack adjustment is necessary for proper
+      // reexecution.
+      kit.jvms()->set_should_reexecute(true);
+      result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
+    }
 
-  // Intialize the string
-  if (java_lang_String::has_offset_field()) {
-    kit.store_String_offset(kit.control(), result, __ intcon(0));
-    kit.store_String_length(kit.control(), result, length);
+    // Intialize the string
+    if (java_lang_String::has_offset_field()) {
+      kit.store_String_offset(kit.control(), result, __ intcon(0));
+      kit.store_String_length(kit.control(), result, length);
+    }
+    kit.store_String_value(kit.control(), result, char_array);
+  } else {
+    result = C->top();
   }
-  kit.store_String_value(kit.control(), result, char_array);
-
   // hook up the outgoing control and result
   kit.replace_call(sc->end(), result);
 
diff --git a/src/share/vm/prims/methodHandles.cpp b/src/share/vm/prims/methodHandles.cpp
index 67e6f954c80c2b0d9bb6e57d6e3338e1a1aabdcc..c3b587967238c544b95a485db22d47899a0d0ffe 100644
--- a/src/share/vm/prims/methodHandles.cpp
+++ b/src/share/vm/prims/methodHandles.cpp
@@ -1168,8 +1168,8 @@ JVM_ENTRY(void, MHN_setCallSiteTargetNormal(JNIEnv* env, jobject igcls, jobject
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
     Universe::flush_dependents_on(call_site, target);
+    java_lang_invoke_CallSite::set_target(call_site(), target());
   }
-  java_lang_invoke_CallSite::set_target(call_site(), target());
 }
 JVM_END
 
@@ -1180,8 +1180,8 @@ JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobjec
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
     Universe::flush_dependents_on(call_site, target);
+    java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
   }
-  java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
 }
 JVM_END
 
diff --git a/src/share/vm/prims/unsafe.cpp b/src/share/vm/prims/unsafe.cpp
index cb7f0fcf3df353e8bcc8a8d47484aa83fd68dd06..e56277b65d7796a0a3c315c3092ce25ee36bc42e 100644
--- a/src/share/vm/prims/unsafe.cpp
+++ b/src/share/vm/prims/unsafe.cpp
@@ -468,6 +468,21 @@ UNSAFE_ENTRY(void, Unsafe_SetOrderedLong(JNIEnv *env, jobject unsafe, jobject ob
 #endif
 UNSAFE_END
 
+UNSAFE_ENTRY(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_LoadFence");
+  OrderAccess::acquire();
+UNSAFE_END
+
+UNSAFE_ENTRY(void, Unsafe_StoreFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_StoreFence");
+  OrderAccess::release();
+UNSAFE_END
+
+UNSAFE_ENTRY(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_FullFence");
+  OrderAccess::fence();
+UNSAFE_END
+
 ////// Data in the C heap.
 
 // Note:  These do not throw NullPointerException for bad pointers.
@@ -1550,6 +1565,9 @@ static JNINativeMethod methods[] = {
     {CC"putOrderedObject",   CC"("OBJ"J"OBJ")V",         FN_PTR(Unsafe_SetOrderedObject)},
     {CC"putOrderedInt",      CC"("OBJ"JI)V",             FN_PTR(Unsafe_SetOrderedInt)},
     {CC"putOrderedLong",     CC"("OBJ"JJ)V",             FN_PTR(Unsafe_SetOrderedLong)},
+    {CC"loadFence",          CC"()V",                    FN_PTR(Unsafe_LoadFence)},
+    {CC"storeFence",         CC"()V",                    FN_PTR(Unsafe_StoreFence)},
+    {CC"fullFence",          CC"()V",                    FN_PTR(Unsafe_FullFence)},
     {CC"park",               CC"(ZJ)V",                  FN_PTR(Unsafe_Park)},
     {CC"unpark",             CC"("OBJ")V",               FN_PTR(Unsafe_Unpark)}
 
diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp
index ef8bdab4798e588145de47312c4d32edbea77d46..b42f4ce4891fe26eecb22ed1e571510f2ac722b7 100644
--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -1499,13 +1499,12 @@ void Arguments::set_g1_gc_flags() {
                      Abstract_VM_Version::parallel_worker_threads());
   }
 
-  if (FLAG_IS_DEFAULT(MarkStackSize)) {
-    FLAG_SET_DEFAULT(MarkStackSize, 128 * TASKQUEUE_SIZE);
-  }
-  if (PrintGCDetails && Verbose) {
-    tty->print_cr("MarkStackSize: %uk  MarkStackSizeMax: %uk",
-      MarkStackSize / K, MarkStackSizeMax / K);
-    tty->print_cr("ConcGCThreads: %u", ConcGCThreads);
+  // MarkStackSize will be set (if it hasn't been set by the user)
+  // when concurrent marking is initialized.
+  // Its value will be based upon the number of parallel marking threads.
+  // But we do set the maximum mark stack size here.
+  if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
+    FLAG_SET_DEFAULT(MarkStackSizeMax, 128 * TASKQUEUE_SIZE);
   }
 
   if (FLAG_IS_DEFAULT(GCTimeRatio) || GCTimeRatio == 0) {
@@ -1517,6 +1516,12 @@ void Arguments::set_g1_gc_flags() {
     // is allocation). We might consider increase it further.
     FLAG_SET_DEFAULT(GCTimeRatio, 9);
   }
+
+  if (PrintGCDetails && Verbose) {
+    tty->print_cr("MarkStackSize: %uk  MarkStackSizeMax: %uk",
+      MarkStackSize / K, MarkStackSizeMax / K);
+    tty->print_cr("ConcGCThreads: %u", ConcGCThreads);
+  }
 }
 
 void Arguments::set_heap_size() {
@@ -1980,6 +1985,9 @@ bool Arguments::check_vm_args_consistency() {
   status = status && verify_min_value(ClassMetaspaceSize, 1*M,
                                       "ClassMetaspaceSize");
 
+  status = status && verify_interval(MarkStackSizeMax,
+                                  1, (max_jint - 1), "MarkStackSizeMax");
+
 #ifdef SPARC
   if (UseConcMarkSweepGC || UseG1GC) {
     // Issue a stern warning if the user has explicitly set
diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp
index 7743835dcacf73da11d234b215815350a041f943..dd3750cafb6e22233da4471fc67a22ed07d01772 100644
--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -922,6 +922,9 @@ class CommandLineFlags {
   develop(bool, PrintExceptionHandlers, false,                              \
           "Print exception handler tables for all nmethods when generated") \
                                                                             \
+  develop(bool, StressCompiledExceptionHandlers, false,                     \
+         "Exercise compiled exception handlers")                            \
+                                                                            \
   develop(bool, InterceptOSException, false,                                \
           "Starts debugger when an implicit OS (e.g., NULL) "               \
           "exception happens")                                              \
diff --git a/src/share/vm/runtime/os.hpp b/src/share/vm/runtime/os.hpp
index ad43ddf83f0f2f414e61c9af1faa60b2e2ca521e..235005f9890c8fdb87edd583ac71f60fbed7725c 100644
--- a/src/share/vm/runtime/os.hpp
+++ b/src/share/vm/runtime/os.hpp
@@ -255,6 +255,7 @@ class os: AllStatic {
   static int    vm_allocation_granularity();
   static char*  reserve_memory(size_t bytes, char* addr = 0,
                                size_t alignment_hint = 0);
+  static char*  reserve_memory_aligned(size_t size, size_t alignment);
   static char*  attempt_reserve_memory_at(size_t bytes, char* addr);
   static void   split_reserved_memory(char *base, size_t size,
                                       size_t split, bool realloc);
diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp
index 9ca52bec2cf389c1dbfa0181641b6a74b724f7b2..db3cace3d64b3456600fef6c381bc237f6ef462c 100644
--- a/src/share/vm/runtime/thread.cpp
+++ b/src/share/vm/runtime/thread.cpp
@@ -2190,7 +2190,7 @@ void JavaThread::send_thread_stop(oop java_throwable)  {
           // BiasedLocking needs an updated RegisterMap for the revoke monitors pass
           RegisterMap reg_map(this, UseBiasedLocking);
           frame compiled_frame = f.sender(&reg_map);
-          if (compiled_frame.can_be_deoptimized()) {
+          if (!StressCompiledExceptionHandlers && compiled_frame.can_be_deoptimized()) {
             Deoptimization::deoptimize(this, compiled_frame, &reg_map);
           }
         }
@@ -3527,11 +3527,12 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
       java_lang_Thread::set_thread_status(thread_object,
                                           java_lang_Thread::RUNNABLE);
 
-      // The VM preresolve methods to these classes. Make sure that get initialized
-      initialize_class(vmSymbols::java_lang_reflect_Method(), CHECK_0);
-      initialize_class(vmSymbols::java_lang_ref_Finalizer(),  CHECK_0);
       // The VM creates & returns objects of this class. Make sure it's initialized.
       initialize_class(vmSymbols::java_lang_Class(), CHECK_0);
+
+      // The VM preresolves methods to these classes. Make sure that they get initialized
+      initialize_class(vmSymbols::java_lang_reflect_Method(), CHECK_0);
+      initialize_class(vmSymbols::java_lang_ref_Finalizer(),  CHECK_0);
       call_initializeSystemClass(CHECK_0);
 
       // get the Java runtime name after java.lang.System is initialized
diff --git a/src/share/vm/runtime/virtualspace.cpp b/src/share/vm/runtime/virtualspace.cpp
index 18c147a6ad32a8f58da00c2862087854f3164404..f33c1995e342d67849f134bbd87a9c77488b4720 100644
--- a/src/share/vm/runtime/virtualspace.cpp
+++ b/src/share/vm/runtime/virtualspace.cpp
@@ -329,20 +329,9 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large,
     if ((((size_t)base + noaccess_prefix) & (alignment - 1)) != 0) {
       // Base not aligned, retry
       if (!os::release_memory(base, size)) fatal("os::release_memory failed");
-      // Reserve size large enough to do manual alignment and
-      // increase size to a multiple of the desired alignment
+      // Make sure that size is aligned
       size = align_size_up(size, alignment);
-      size_t extra_size = size + alignment;
-      do {
-        char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
-        if (extra_base == NULL) return;
-        // Do manual alignement
-        base = (char*) align_size_up((uintptr_t) extra_base, alignment);
-        assert(base >= extra_base, "just checking");
-        // Re-reserve the region at the aligned base address.
-        os::release_memory(extra_base, extra_size);
-        base = os::reserve_memory(size, base);
-      } while (base == NULL);
+      base = os::reserve_memory_aligned(size, alignment);
 
       if (requested_address != 0 &&
           failed_to_reserve_as_requested(base, requested_address, size, false)) {
diff --git a/src/share/vm/runtime/vmStructs.cpp b/src/share/vm/runtime/vmStructs.cpp
index 9346d6c72a72fb0d90c2279fdf9f8c6a62f6307c..9ac10e26d4c043b7cad9dd8818ba6a98ef1c636c 100644
--- a/src/share/vm/runtime/vmStructs.cpp
+++ b/src/share/vm/runtime/vmStructs.cpp
@@ -355,8 +355,6 @@ typedef BinaryTreeDictionary<Metablock, FreeList> MetablockTreeDictionary;
   nonstatic_field(Method,               _access_flags,                                 AccessFlags)                           \
   nonstatic_field(Method,               _vtable_index,                                 int)                                   \
   nonstatic_field(Method,               _method_size,                                  u2)                                    \
-  nonstatic_field(Method,               _max_locals,                                   u2)                                    \
-  nonstatic_field(Method,               _size_of_parameters,                           u2)                                    \
   nonstatic_field(Method,               _interpreter_throwout_count,                   u2)                                    \
   nonstatic_field(Method,               _number_of_breakpoints,                        u2)                                    \
   nonstatic_field(Method,               _invocation_counter,                           InvocationCounter)                     \
@@ -378,6 +376,8 @@ typedef BinaryTreeDictionary<Metablock, FreeList> MetablockTreeDictionary;
   nonstatic_field(ConstMethod,          _signature_index,                              u2)                                    \
   nonstatic_field(ConstMethod,          _method_idnum,                                 u2)                                    \
   nonstatic_field(ConstMethod,          _max_stack,                                    u2)                                    \
+  nonstatic_field(ConstMethod,          _max_locals,                                   u2)                                    \
+  nonstatic_field(ConstMethod,          _size_of_parameters,                           u2)                                    \
   nonstatic_field(ObjArrayKlass,               _element_klass,                                Klass*)                                \
   nonstatic_field(ObjArrayKlass,               _bottom_klass,                                 Klass*)                                \
   volatile_nonstatic_field(Symbol,             _refcount,                                     int)                                   \
diff --git a/src/share/vm/services/memBaseline.hpp b/src/share/vm/services/memBaseline.hpp
index 5f98e30d451d9da3151ee6a64f16e575b4bf738b..c409ec3face44d3f5db3a29ee14c72b0c45e46a2 100644
--- a/src/share/vm/services/memBaseline.hpp
+++ b/src/share/vm/services/memBaseline.hpp
@@ -334,7 +334,7 @@ class MemBaseline : public _ValueObj {
   // create a memory baseline
   MemBaseline();
 
-  virtual ~MemBaseline();
+  ~MemBaseline();
 
   inline bool baselined() const {
     return _baselined;
diff --git a/src/share/vm/utilities/workgroup.hpp b/src/share/vm/utilities/workgroup.hpp
index b82478eac1d55ad3797633bff0d7089e69a8c8ee..6a93536246bc849952dd76ec08510bf8b2f3d11a 100644
--- a/src/share/vm/utilities/workgroup.hpp
+++ b/src/share/vm/utilities/workgroup.hpp
@@ -90,7 +90,7 @@ protected:
     NOT_PRODUCT(_name = name);
     _counter = 0;
   }
-  virtual ~AbstractGangTask() { }
+  ~AbstractGangTask() { }
 
 public:
 };
diff --git a/src/share/vm/utilities/yieldingWorkgroup.hpp b/src/share/vm/utilities/yieldingWorkgroup.hpp
index a39f8227f579e49f2e7b0c24c4638cd506cbbe7a..f9646f91f7fdb993f4053374c055f2eb750ae880 100644
--- a/src/share/vm/utilities/yieldingWorkgroup.hpp
+++ b/src/share/vm/utilities/yieldingWorkgroup.hpp
@@ -106,7 +106,7 @@ protected:
     _status(INACTIVE),
     _gang(NULL) { }
 
-  virtual ~YieldingFlexibleGangTask() { }
+  ~YieldingFlexibleGangTask() { }
 
   friend class YieldingFlexibleWorkGang;
   friend class YieldingFlexibleGangWorker;
diff --git a/test/compiler/7184394/TestAESBase.java b/test/compiler/7184394/TestAESBase.java
index ad6c835cc8458036f7d1679a44765578f442c876..511b97dc6c6b5216fda18172a9e91f15d806a93e 100644
--- a/test/compiler/7184394/TestAESBase.java
+++ b/test/compiler/7184394/TestAESBase.java
@@ -54,7 +54,6 @@ abstract public class TestAESBase {
   String paddingStr = "PKCS5Padding";
   AlgorithmParameters algParams;
   SecretKey key;
-  int ivLen;
 
   static int numThreads = 0;
   int  threadId;
@@ -68,7 +67,7 @@ abstract public class TestAESBase {
 
   public void prepare() {
     try {
-    System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput);
+    System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput);
 
       int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
       byte keyBytes[] = new byte[keyLenBytes];
@@ -90,10 +89,14 @@ abstract public class TestAESBase {
       cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
       dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
 
-      ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
-      IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
-
-      cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
+      if (mode.equals("CBC")) {
+        int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
+        IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
+        cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
+      } else {
+        algParams = cipher.getParameters();
+        cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+      }
       algParams = cipher.getParameters();
       dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
       if (threadId == 0) {
diff --git a/test/compiler/7184394/TestAESMain.java b/test/compiler/7184394/TestAESMain.java
index ca2cb43dc8692f0507743cdffce40209414987f0..48baf572b3ecdf1d6f85ffa04e2f6c1d51fa9d79 100644
--- a/test/compiler/7184394/TestAESMain.java
+++ b/test/compiler/7184394/TestAESMain.java
@@ -27,7 +27,8 @@
  * @bug 7184394
  * @summary add intrinsics to use AES instructions
  *
- * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain
  *
  * @author Tom Deneau
  */
diff --git a/test/compiler/8004741/Test8004741.java b/test/compiler/8004741/Test8004741.java
new file mode 100644
index 0000000000000000000000000000000000000000..95e63b9c0c1c1bba875fdfaba67e7cda61b75e65
--- /dev/null
+++ b/test/compiler/8004741/Test8004741.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test Test8004741.java
+ * @bug 8004741
+ * @summary Missing compiled exception handle table entry for multidimensional array allocation
+ * @run main/othervm -Xmx64m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:+StressCompiledExceptionHandlers Test8004741
+ *
+ */
+
+import java.util.*;
+
+public class Test8004741 extends Thread {
+
+  static int[][] test(int a, int b) throws Exception {
+    int[][] ar = null;
+    try {
+      ar = new int[a][b];
+    } catch (Error e) {
+      System.out.println("test got Error");
+      passed = true;
+      throw(e);
+    } catch (Exception e) {
+      System.out.println("test got Exception");
+      throw(e);
+    }
+    return ar;
+  }
+
+  static boolean passed = false;
+
+  public void run() {
+      System.out.println("test started");
+      try {
+        while(true) {
+          test(2,20000);
+        }
+      } catch (ThreadDeath e) {
+        System.out.println("test got ThreadDeath");
+        passed = true;
+      } catch (Error e) {
+        e.printStackTrace();
+        System.out.println("test got Error");
+      } catch (Exception e) {
+        e.printStackTrace();
+        System.out.println("test got Exception");
+      }
+  }
+
+  public static void main(String[] args) throws Exception {
+    for (int n = 0; n < 11000; n++) {
+      test(2, 20);
+    }
+
+    // First test exception catch
+    Test8004741 t = new Test8004741();
+
+    passed = false;
+    t.start();
+    Thread.sleep(1000);
+    t.stop();
+
+    Thread.sleep(5000);
+    t.join();
+    if (passed) {
+      System.out.println("PASSED");
+    } else {
+      System.out.println("FAILED");
+      System.exit(97);
+    }
+  }
+
+};
diff --git a/test/compiler/8005033/Test8005033.java b/test/compiler/8005033/Test8005033.java
new file mode 100644
index 0000000000000000000000000000000000000000..e67063908414e40854f130181538e19b0f3fd2a1
--- /dev/null
+++ b/test/compiler/8005033/Test8005033.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2012 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8005033
+ * @summary On sparcv9, C2's intrinsic for Integer.bitCount(OV) returns wrong result if OV is the result of an operation with int overflow.
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test8005033::testBitCount Test8005033
+ * @author Richard Reingruber richard DOT reingruber AT sap DOT com
+ */
+
+public class Test8005033 {
+    public static int MINUS_ONE = -1;
+
+    public static void main(String[] args) {
+        System.out.println("EXECUTING test.");
+        Integer.bitCount(1);   // load class
+        int expectedBitCount = 0;
+        int calculatedBitCount = testBitCount();
+        if (expectedBitCount != calculatedBitCount) {
+            throw new InternalError("got " + calculatedBitCount + " but expected " + expectedBitCount);
+        }
+        System.out.println("SUCCESSFULLY passed test.");
+    }
+
+    // testBitCount will be compiled using the Integer.bitCount() intrinsic if possible
+    private static int testBitCount() {
+        return Integer.bitCount(MINUS_ONE+1);   // -1 + 1 => int overflow
+    }
+}
diff --git a/test/sanity/ExecuteInternalVMTests.java b/test/sanity/ExecuteInternalVMTests.java
new file mode 100644
index 0000000000000000000000000000000000000000..b3e3724f287523bb656c8d58985f4c0bc960de5e
--- /dev/null
+++ b/test/sanity/ExecuteInternalVMTests.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/* @test ExecuteInternalVMTests
+ * @bug 8004691
+ * @summary Add a jtreg test that exercises the ExecuteInternalVMTests flag
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+ExecuteInternalVMTests ExecuteInternalVMTests
+ */
+public class ExecuteInternalVMTests {
+    public static void main(String[] args) throws Exception {
+        // The tests that are run are the HotSpot internal tests which are
+        // executed only when the flag -XX:+ExecuteInternalVMTests is used.
+
+        // The flag -XX:+ExecuteInternalVMTests can only be used for
+        // non-product builds of HotSpot. Therefore, the flag
+        // -XX:+IgnoreUnrecognizedVMOptions is also used, which means that this
+        // test will do nothing on a product build.
+    }
+}