Merge

917a87c4 · morris · 82fb3262 · 1ad1ff0f · 917a87c4 · 917a87c4
26 changed file
--- a/src/cpu/sparc/vm/assembler_sparc.hpp
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp
@@ -123,6 +123,7 @@ class Assembler : public AbstractAssembler  {
    fpop2_op3    = 0x35,
    impdep1_op3  = 0x36,
    aes3_op3     = 0x36,
+    sha_op3      = 0x36,
    alignaddr_op3  = 0x36,
    faligndata_op3 = 0x36,
    flog3_op3    = 0x36,
@@ -223,7 +224,11 @@ class Assembler : public AbstractAssembler  {
    mwtos_opf          = 0x119,

    aes_kexpand0_opf   = 0x130,
-    aes_kexpand2_opf   = 0x131
+    aes_kexpand2_opf   = 0x131,
+
+    sha1_opf           = 0x141,
+    sha256_opf         = 0x142,
+    sha512_opf         = 0x143
  };

  enum op5s {
@@ -595,6 +600,11 @@ class Assembler : public AbstractAssembler  {
  // AES crypto instructions supported only on certain processors
  static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }

+  // SHA crypto instructions supported only on certain processors
+  static void sha1_only()   { assert( VM_Version::has_sha1(),   "This instruction only works on SPARC with SHA1"); }
+  static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); }
+  static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); }
+
  // instruction only in VIS1
  static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }

@@ -1179,7 +1189,6 @@ public:
                                               u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
  inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }

-
  //  VIS1 instructions

  void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); }
@@ -1203,6 +1212,12 @@ public:
  void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
  void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }

+  // Crypto SHA instructions
+
+  void sha1()   { sha1_only();    emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
+  void sha256() { sha256_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); }
+  void sha512() { sha512_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); }
+
  // Creation
  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef CHECK_DELAY

--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp
@@ -4575,6 +4575,219 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+  address generate_sha1_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha1_loop, L_sha1_unaligned_input, L_sha1_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // int[]  SHA.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F4
+    for (i = 0; i < 5; i++) {
+      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha1_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha1_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    __ sha1();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F4 into state and return
+    for (i = 0; i < 4; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
+
+    __ BIND(L_sha1_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha1_unaligned_input_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 9; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    for (i = 0; i < 8; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
+    }
+    __ sha1();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F4 into state and return
+    for (i = 0; i < 4; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
+
+    return start;
+  }
+
+  address generate_sha256_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha256_loop, L_sha256_unaligned_input, L_sha256_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // int[]  SHA2.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F7
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha256_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha256_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    __ sha256();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F7 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
+
+    __ BIND(L_sha256_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha256_unaligned_input_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 9; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    for (i = 0; i < 8; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
+    }
+    __ sha256();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F7 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
+
+    return start;
+  }
+
+  address generate_sha512_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha512_loop, L_sha512_unaligned_input, L_sha512_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // long[] SHA5.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F14
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, state, i*8, as_FloatRegister(i*2));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha512_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha512_loop);
+    // load buf into F16-F46
+    for (i = 0; i < 16; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
+    }
+    __ sha512();
+    if (multi_block) {
+      __ add(ofs, 128, ofs);
+      __ add(buf, 128, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F14 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
+
+    __ BIND(L_sha512_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha512_unaligned_input_loop);
+    // load buf into F16-F46
+    for (i = 0; i < 17; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
+    }
+    for (i = 0; i < 16; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 16), as_FloatRegister(i*2 + 18), as_FloatRegister(i*2 + 16));
+    }
+    __ sha512();
+    if (multi_block) {
+      __ add(ofs, 128, ofs);
+      __ add(buf, 128, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F14 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
+
+    return start;
+  }
+
  void generate_initial() {
    // Generates all stubs and initializes the entry points

@@ -4647,6 +4860,20 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
    }
+
+    // generate SHA1/SHA256/SHA512 intrinsics code
+    if (UseSHA1Intrinsics) {
+      StubRoutines::_sha1_implCompress     = generate_sha1_implCompress(false,   "sha1_implCompress");
+      StubRoutines::_sha1_implCompressMB   = generate_sha1_implCompress(true,    "sha1_implCompressMB");
+    }
+    if (UseSHA256Intrinsics) {
+      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
+    }
+    if (UseSHA512Intrinsics) {
+      StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
+      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true,  "sha512_implCompressMB");
+    }
  }



--- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp
+++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp
@@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc)   {
 enum /* platform_dependent_constants */ {
  // %%%%%%%% May be able to shrink this a lot
  code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 22000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 23000            // simply increase if too small (assembler will crash if too small)
 };

 class Sparc {

--- a/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp
@@ -234,7 +234,7 @@ void VM_Version::initialize() {
  assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");

  char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
               (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
               (has_hardware_popc() ? ", popc" : ""),
               (has_vis1() ? ", vis1" : ""),
@@ -243,6 +243,9 @@ void VM_Version::initialize() {
               (has_blk_init() ? ", blk_init" : ""),
               (has_cbcond() ? ", cbcond" : ""),
               (has_aes() ? ", aes" : ""),
+               (has_sha1() ? ", sha1" : ""),
+               (has_sha256() ? ", sha256" : ""),
+               (has_sha512() ? ", sha512" : ""),
               (is_ultra3() ? ", ultra3" : ""),
               (is_sun4v() ? ", sun4v" : ""),
               (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
@@ -301,6 +304,58 @@ void VM_Version::initialize() {
    }
  }

+  // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
+  if (has_sha1() || has_sha256() || has_sha512()) {
+    if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
+      if (FLAG_IS_DEFAULT(UseSHA)) {
+        FLAG_SET_DEFAULT(UseSHA, true);
+      }
+    } else {
+      if (UseSHA) {
+        warning("SPARC SHA intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
+        FLAG_SET_DEFAULT(UseSHA, false);
+      }
+    }
+  } else if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  if (!UseSHA) {
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  } else {
+    if (has_sha1()) {
+      if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+      }
+    } else if (UseSHA1Intrinsics) {
+      warning("SHA1 instruction is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    }
+    if (has_sha256()) {
+      if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+      }
+    } else if (UseSHA256Intrinsics) {
+      warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    }
+
+    if (has_sha512()) {
+      if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+      }
+    } else if (UseSHA512Intrinsics) {
+      warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+    }
+    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA, false);
+    }
+  }
+
  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
    (cache_line_size > ContendedPaddingWidth))
    ContendedPaddingWidth = cache_line_size;

--- a/src/cpu/sparc/vm/vm_version_sparc.hpp
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,10 @@ protected:
    T_family             = 16,
    T1_model             = 17,
    sparc5_instructions  = 18,
-    aes_instructions     = 19
+    aes_instructions     = 19,
+    sha1_instruction     = 20,
+    sha256_instruction   = 21,
+    sha512_instruction   = 22
  };

  enum Feature_Flag_Set {
@@ -77,6 +80,9 @@ protected:
    T1_model_m              = 1 << T1_model,
    sparc5_instructions_m   = 1 << sparc5_instructions,
    aes_instructions_m      = 1 << aes_instructions,
+    sha1_instruction_m      = 1 << sha1_instruction,
+    sha256_instruction_m    = 1 << sha256_instruction,
+    sha512_instruction_m    = 1 << sha512_instruction,

    generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
    generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -129,6 +135,9 @@ public:
  static bool has_cbcond()              { return (_features & cbcond_instructions_m) != 0; }
  static bool has_sparc5_instr()        { return (_features & sparc5_instructions_m) != 0; }
  static bool has_aes()                 { return (_features & aes_instructions_m) != 0; }
+  static bool has_sha1()                { return (_features & sha1_instruction_m) != 0; }
+  static bool has_sha256()              { return (_features & sha256_instruction_m) != 0; }
+  static bool has_sha512()              { return (_features & sha512_instruction_m) != 0; }

  static bool supports_compare_and_exchange()
                                        { return has_v9(); }

--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -3853,6 +3853,15 @@ void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
  emit_int8((unsigned char)(0xC0 | encode));
 }

+// Carry-Less Multiplication Quadword
+void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
+  assert(VM_Version::supports_clmul(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  emit_int8(0x44);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)mask);
+}
+
 // Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
  assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");

--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -1837,6 +1837,7 @@ private:
  void vpbroadcastd(XMMRegister dst, XMMRegister src);

  // Carry-Less Multiplication Quadword
+  void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
  void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);

  // AVX instruction which is used to clear upper 128 bits of YMM registers and

--- a/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -7316,17 +7316,34 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
 * Fold 128-bit data chunk
 */
 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
-  vpclmulhdq(xtmp, xK, xcrc); // [123:64]
-  vpclmulldq(xcrc, xK, xcrc); // [63:0]
-  vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc); // [123:64]
+    vpclmulldq(xcrc, xK, xcrc); // [63:0]
+    vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);   // [123:64]
+    pclmulldq(xcrc, xK);   // [63:0]
+    pxor(xcrc, xtmp);
+    movdqu(xtmp, Address(buf, offset));
+    pxor(xcrc, xtmp);
+  }
 }

 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
-  vpclmulhdq(xtmp, xK, xcrc);
-  vpclmulldq(xcrc, xK, xcrc);
-  pxor(xcrc, xbuf);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc);
+    vpclmulldq(xcrc, xK, xcrc);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);
+    pclmulldq(xcrc, xK);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  }
 }

 /**
@@ -7444,9 +7461,17 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
  // Fold 128 bits in xmm1 down into 32 bits in crc register.
  BIND(L_fold_128b);
  movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
-  vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
-  vpand(xmm3, xmm0, xmm2, false /* vector256 */);
-  vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  if (UseAVX > 0) {
+    vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
+    vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+    vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  } else {
+    movdqa(xmm2, xmm0);
+    pclmulqdq(xmm2, xmm1, 0x1);
+    movdqa(xmm3, xmm0);
+    pand(xmm3, xmm2);
+    pclmulqdq(xmm0, xmm3, 0x1);
+  }
  psrldq(xmm1, 8);
  psrldq(xmm2, 4);
  pxor(xmm0, xmm1);

--- a/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp
@@ -966,6 +966,16 @@ public:
  void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
  void mulss(XMMRegister dst, AddressLiteral src);

+  // Carry-Less Multiplication Quadword
+  void pclmulldq(XMMRegister dst, XMMRegister src) {
+    // 0x00 - multiply lower 64 bits [0:63]
+    Assembler::pclmulqdq(dst, src, 0x00);
+  }
+  void pclmulhdq(XMMRegister dst, XMMRegister src) {
+    // 0x11 - multiply upper 64 bits [64:127]
+    Assembler::pclmulqdq(dst, src, 0x11);
+  }
+
  void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
  void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
  void sqrtsd(XMMRegister dst, AddressLiteral src);

--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -568,7 +568,7 @@ void VM_Version::get_processor_features() {
    FLAG_SET_DEFAULT(UseCLMUL, false);
  }

-  if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) {
+  if (UseCLMUL && (UseSSE > 2)) {
    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
      UseCRC32Intrinsics = true;
    }
@@ -590,6 +590,17 @@ void VM_Version::get_processor_features() {
    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
  }

+  if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
+    warning("SHA intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  }
+
  // Adjust RTM (Restricted Transactional Memory) flags
  if (!supports_rtm() && UseRTMLocking) {
    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
@@ -803,6 +814,21 @@ void VM_Version::get_processor_features() {
        }
      }
    }
+    if ((cpu_family() == 0x06) &&
+        ((extended_cpu_model() == 0x36) || // Centerton
+         (extended_cpu_model() == 0x37) || // Silvermont
+         (extended_cpu_model() == 0x4D))) {
+#ifdef COMPILER2
+      if (FLAG_IS_DEFAULT(OptoScheduling)) {
+        OptoScheduling = true;
+      }
+#endif
+      if (supports_sse4_2()) { // Silvermont
+        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
+    }
  }

  // Use count leading zeros count instruction if available.
@@ -890,23 +916,25 @@ void VM_Version::get_processor_features() {
  AllocatePrefetchDistance = allocate_prefetch_distance();
  AllocatePrefetchStyle    = allocate_prefetch_style();

-  if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
-    if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
+  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
+    if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
 #ifdef _LP64
      AllocatePrefetchDistance = 384;
 #else
      AllocatePrefetchDistance = 320;
 #endif
    }
-    if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
+    if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
      AllocatePrefetchDistance = 192;
      AllocatePrefetchLines = 4;
+    }
 #ifdef COMPILER2
-      if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+    if (supports_sse4_2()) {
+      if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
      }
-#endif
    }
+#endif
  }
  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");


--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp
 /*
- * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -137,6 +137,21 @@ int VM_Version::platform_features(int features) {
 #endif
    if (av & AV_SPARC_AES)       features |= aes_instructions_m;

+#ifndef AV_SPARC_SHA1
+#define AV_SPARC_SHA1   0x00400000  /* sha1 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA1)         features |= sha1_instruction_m;
+
+#ifndef AV_SPARC_SHA256
+#define AV_SPARC_SHA256 0x00800000  /* sha256 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA256)       features |= sha256_instruction_m;
+
+#ifndef AV_SPARC_SHA512
+#define AV_SPARC_SHA512 0x01000000  /* sha512 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA512)       features |= sha512_instruction_m;
+
  } else {
    // getisax(2) failed, use the old legacy code.
 #ifndef PRODUCT

--- a/src/share/vm/classfile/vmSymbols.hpp
+++ b/src/share/vm/classfile/vmSymbols.hpp
@@ -789,6 +789,26 @@
   do_name(     decrypt_name,                                      "decrypt")                                           \
   do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                        \
+  /* support for sun.security.provider.SHA */                                                                           \
+  do_class(sun_security_provider_sha,                              "sun/security/provider/SHA")                         \
+  do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R)            \
+   do_name(     implCompress_name,                                 "implCompress")                                      \
+   do_signature(implCompress_signature,                            "([BI)V")                                            \
+                                                                                                                        \
+  /* support for sun.security.provider.SHA2 */                                                                          \
+  do_class(sun_security_provider_sha2,                             "sun/security/provider/SHA2")                        \
+  do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R)          \
+                                                                                                                        \
+  /* support for sun.security.provider.SHA5 */                                                                          \
+  do_class(sun_security_provider_sha5,                             "sun/security/provider/SHA5")                        \
+  do_intrinsic(_sha5_implCompress, sun_security_provider_sha5, implCompress_name, implCompress_signature, F_R)          \
+                                                                                                                        \
+  /* support for sun.security.provider.DigestBase */                                                                    \
+  do_class(sun_security_provider_digestbase,                       "sun/security/provider/DigestBase")                  \
+  do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, implCompressMB_signature, F_R)   \
+   do_name(     implCompressMB_name,                               "implCompressMultiBlock")                            \
+   do_signature(implCompressMB_signature,                          "([BII)I")                                           \
+                                                                                                                        \
  /* support for java.util.zip */                                                                                       \
  do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
  do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \

--- a/src/share/vm/opto/callGenerator.cpp
+++ b/src/share/vm/opto/callGenerator.cpp
@@ -710,7 +710,15 @@ JVMState* PredictedCallGenerator::generate(JVMState* jvms, Parse* parent_parser)
  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
  iophi->set_req(2, slow_map->i_o());
  kit.set_i_o(gvn.transform(iophi));
+  // Merge memory
  kit.merge_memory(slow_map->merged_memory(), region, 2);
+  // Transform new memory Phis.
+  for (MergeMemStream mms(kit.merged_memory()); mms.next_non_empty();) {
+    Node* phi = mms.memory();
+    if (phi->is_Phi() && phi->in(0) == region) {
+      mms.set_memory(gvn.transform(phi));
+    }
+  }
  uint tos = kit.jvms()->stkoff() + kit.sp();
  uint limit = slow_map->req();
  for (uint i = TypeFunc::Parms; i < limit; i++) {
@@ -864,15 +872,15 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
 }


-//------------------------PredictedIntrinsicGenerator------------------------------
-// Internal class which handles all predicted Intrinsic calls.
-class PredictedIntrinsicGenerator : public CallGenerator {
+//------------------------PredicatedIntrinsicGenerator------------------------------
+// Internal class which handles all predicated Intrinsic calls.
+class PredicatedIntrinsicGenerator : public CallGenerator {
  CallGenerator* _intrinsic;
  CallGenerator* _cg;

 public:
-  PredictedIntrinsicGenerator(CallGenerator* intrinsic,
-                              CallGenerator* cg)
+  PredicatedIntrinsicGenerator(CallGenerator* intrinsic,
+                               CallGenerator* cg)
    : CallGenerator(cg->method())
  {
    _intrinsic = intrinsic;
@@ -887,103 +895,182 @@ public:
 };


-CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic,
-                                                      CallGenerator* cg) {
-  return new PredictedIntrinsicGenerator(intrinsic, cg);
+CallGenerator* CallGenerator::for_predicated_intrinsic(CallGenerator* intrinsic,
+                                                       CallGenerator* cg) {
+  return new PredicatedIntrinsicGenerator(intrinsic, cg);
 }


-JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* PredicatedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+  // The code we want to generate here is:
+  //    if (receiver == NULL)
+  //        uncommon_Trap
+  //    if (predicate(0))
+  //        do_intrinsic(0)
+  //    else
+  //    if (predicate(1))
+  //        do_intrinsic(1)
+  //    ...
+  //    else
+  //        do_java_comp
+
  GraphKit kit(jvms);
  PhaseGVN& gvn = kit.gvn();

  CompileLog* log = kit.C->log();
  if (log != NULL) {
-    log->elem("predicted_intrinsic bci='%d' method='%d'",
+    log->elem("predicated_intrinsic bci='%d' method='%d'",
              jvms->bci(), log->identify(method()));
  }

-  Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms());
-  if (kit.failing())
-    return NULL;  // might happen because of NodeCountInliningCutoff
+  if (!method()->is_static()) {
+    // We need an explicit receiver null_check before checking its type in predicate.
+    // We share a map with the caller, so his JVMS gets adjusted.
+    Node* receiver = kit.null_check_receiver_before_call(method());
+    if (kit.stopped()) {
+      return kit.transfer_exceptions_into_jvms();
+    }
+  }

-  SafePointNode* slow_map = NULL;
-  JVMState* slow_jvms;
-  if (slow_ctl != NULL) {
-    PreserveJVMState pjvms(&kit);
-    kit.set_control(slow_ctl);
+  int n_predicates = _intrinsic->predicates_count();
+  assert(n_predicates > 0, "sanity");
+
+  JVMState** result_jvms = NEW_RESOURCE_ARRAY(JVMState*, (n_predicates+1));
+
+  // Region for normal compilation code if intrinsic failed.
+  Node* slow_region = new (kit.C) RegionNode(1);
+
+  int results = 0;
+  for (int predicate = 0; (predicate < n_predicates) && !kit.stopped(); predicate++) {
+#ifdef ASSERT
+    JVMState* old_jvms = kit.jvms();
+    SafePointNode* old_map = kit.map();
+    Node* old_io  = old_map->i_o();
+    Node* old_mem = old_map->memory();
+    Node* old_exc = old_map->next_exception();
+#endif
+    Node* else_ctrl = _intrinsic->generate_predicate(kit.sync_jvms(), predicate);
+#ifdef ASSERT
+    // Assert(no_new_memory && no_new_io && no_new_exceptions) after generate_predicate.
+    assert(old_jvms == kit.jvms(), "generate_predicate should not change jvm state");
+    SafePointNode* new_map = kit.map();
+    assert(old_io  == new_map->i_o(), "generate_predicate should not change i_o");
+    assert(old_mem == new_map->memory(), "generate_predicate should not change memory");
+    assert(old_exc == new_map->next_exception(), "generate_predicate should not add exceptions");
+#endif
    if (!kit.stopped()) {
-      slow_jvms = _cg->generate(kit.sync_jvms(), parent_parser);
-      if (kit.failing())
-        return NULL;  // might happen because of NodeCountInliningCutoff
-      assert(slow_jvms != NULL, "must be");
-      kit.add_exception_states_from(slow_jvms);
-      kit.set_map(slow_jvms->map());
-      if (!kit.stopped())
-        slow_map = kit.stop();
+      PreserveJVMState pjvms(&kit);
+      // Generate intrinsic code:
+      JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser);
+      if (new_jvms == NULL) {
+        // Intrinsic failed, use normal compilation path for this predicate.
+        slow_region->add_req(kit.control());
+      } else {
+        kit.add_exception_states_from(new_jvms);
+        kit.set_jvms(new_jvms);
+        if (!kit.stopped()) {
+          result_jvms[results++] = kit.jvms();
+        }
+      }
    }
+    if (else_ctrl == NULL) {
+      else_ctrl = kit.C->top();
+    }
+    kit.set_control(else_ctrl);
  }
-
-  if (kit.stopped()) {
-    // Predicate is always false.
-    kit.set_jvms(slow_jvms);
-    return kit.transfer_exceptions_into_jvms();
+  if (!kit.stopped()) {
+    // Final 'else' after predicates.
+    slow_region->add_req(kit.control());
  }
-
-  // Generate intrinsic code:
-  JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser);
-  if (new_jvms == NULL) {
-    // Intrinsic failed, so use slow code or make a direct call.
-    if (slow_map == NULL) {
-      CallGenerator* cg = CallGenerator::for_direct_call(method());
-      new_jvms = cg->generate(kit.sync_jvms(), parent_parser);
-    } else {
-      kit.set_jvms(slow_jvms);
-      return kit.transfer_exceptions_into_jvms();
+  if (slow_region->req() > 1) {
+    PreserveJVMState pjvms(&kit);
+    // Generate normal compilation code:
+    kit.set_control(gvn.transform(slow_region));
+    JVMState* new_jvms = _cg->generate(kit.sync_jvms(), parent_parser);
+    if (kit.failing())
+      return NULL;  // might happen because of NodeCountInliningCutoff
+    assert(new_jvms != NULL, "must be");
+    kit.add_exception_states_from(new_jvms);
+    kit.set_jvms(new_jvms);
+    if (!kit.stopped()) {
+      result_jvms[results++] = kit.jvms();
    }
  }
-  kit.add_exception_states_from(new_jvms);
-  kit.set_jvms(new_jvms);

-  // Need to merge slow and fast?
-  if (slow_map == NULL) {
-    // The fast path is the only path remaining.
+  if (results == 0) {
+    // All paths ended in uncommon traps.
+    (void) kit.stop();
    return kit.transfer_exceptions_into_jvms();
  }

-  if (kit.stopped()) {
-    // Intrinsic method threw an exception, so it's just the slow path after all.
-    kit.set_jvms(slow_jvms);
+  if (results == 1) { // Only one path
+    kit.set_jvms(result_jvms[0]);
    return kit.transfer_exceptions_into_jvms();
  }

-  // Finish the diamond.
+  // Merge all paths.
  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
-  RegionNode* region = new (kit.C) RegionNode(3);
-  region->init_req(1, kit.control());
-  region->init_req(2, slow_map->control());
-  kit.set_control(gvn.transform(region));
+  RegionNode* region = new (kit.C) RegionNode(results + 1);
  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
-  iophi->set_req(2, slow_map->i_o());
+  for (int i = 0; i < results; i++) {
+    JVMState* jvms = result_jvms[i];
+    int path = i + 1;
+    SafePointNode* map = jvms->map();
+    region->init_req(path, map->control());
+    iophi->set_req(path, map->i_o());
+    if (i == 0) {
+      kit.set_jvms(jvms);
+    } else {
+      kit.merge_memory(map->merged_memory(), region, path);
+    }
+  }
+  kit.set_control(gvn.transform(region));
  kit.set_i_o(gvn.transform(iophi));
-  kit.merge_memory(slow_map->merged_memory(), region, 2);
+  // Transform new memory Phis.
+  for (MergeMemStream mms(kit.merged_memory()); mms.next_non_empty();) {
+    Node* phi = mms.memory();
+    if (phi->is_Phi() && phi->in(0) == region) {
+      mms.set_memory(gvn.transform(phi));
+    }
+  }
+
+  // Merge debug info.
+  Node** ins = NEW_RESOURCE_ARRAY(Node*, results);
  uint tos = kit.jvms()->stkoff() + kit.sp();
-  uint limit = slow_map->req();
+  Node* map = kit.map();
+  uint limit = map->req();
  for (uint i = TypeFunc::Parms; i < limit; i++) {
    // Skip unused stack slots; fast forward to monoff();
    if (i == tos) {
      i = kit.jvms()->monoff();
      if( i >= limit ) break;
    }
-    Node* m = kit.map()->in(i);
-    Node* n = slow_map->in(i);
-    if (m != n) {
-      const Type* t = gvn.type(m)->meet_speculative(gvn.type(n));
-      Node* phi = PhiNode::make(region, m, t);
-      phi->set_req(2, n);
-      kit.map()->set_req(i, gvn.transform(phi));
+    Node* n = map->in(i);
+    ins[0] = n;
+    const Type* t = gvn.type(n);
+    bool needs_phi = false;
+    for (int j = 1; j < results; j++) {
+      JVMState* jvms = result_jvms[j];
+      Node* jmap = jvms->map();
+      Node* m = NULL;
+      if (jmap->req() > i) {
+        m = jmap->in(i);
+        if (m != n) {
+          needs_phi = true;
+          t = t->meet_speculative(gvn.type(m));
+        }
+      }
+      ins[j] = m;
+    }
+    if (needs_phi) {
+      Node* phi = PhiNode::make(region, n, t);
+      for (int j = 1; j < results; j++) {
+        phi->set_req(j + 1, ins[j]);
+      }
+      map->set_req(i, gvn.transform(phi));
    }
  }
+
  return kit.transfer_exceptions_into_jvms();
 }


--- a/src/share/vm/opto/callGenerator.hpp
+++ b/src/share/vm/opto/callGenerator.hpp
@@ -63,8 +63,9 @@ class CallGenerator : public ResourceObj {
  virtual bool      is_virtual() const          { return false; }
  // is_deferred: The decision whether to inline or not is deferred.
  virtual bool      is_deferred() const         { return false; }
-  // is_predicted: Uses an explicit check against a predicted type.
-  virtual bool      is_predicted() const        { return false; }
+  // is_predicated: Uses an explicit check (predicate).
+  virtual bool      is_predicated() const       { return false; }
+  virtual int       predicates_count() const    { return 0; }
  // is_trap: Does not return to the caller.  (E.g., uncommon trap.)
  virtual bool      is_trap() const             { return false; }
  // does_virtual_dispatch: Should try inlining as normal method first.
@@ -157,9 +158,9 @@ class CallGenerator : public ResourceObj {
  // Registry for intrinsics:
  static CallGenerator* for_intrinsic(ciMethod* m);
  static void register_intrinsic(ciMethod* m, CallGenerator* cg);
-  static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic,
-                                                CallGenerator* cg);
-  virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
+  static CallGenerator* for_predicated_intrinsic(CallGenerator* intrinsic,
+                                                 CallGenerator* cg);
+  virtual Node* generate_predicate(JVMState* jvms, int predicate) { return NULL; };

  virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); }


--- a/src/share/vm/opto/doCall.cpp
+++ b/src/share/vm/opto/doCall.cpp
@@ -115,12 +115,12 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
  if (allow_inline && allow_intrinsics) {
    CallGenerator* cg = find_intrinsic(callee, call_does_dispatch);
    if (cg != NULL) {
-      if (cg->is_predicted()) {
+      if (cg->is_predicated()) {
        // Code without intrinsic but, hopefully, inlined.
        CallGenerator* inline_cg = this->call_generator(callee,
              vtable_index, call_does_dispatch, jvms, allow_inline, prof_factor, speculative_receiver_type, false);
        if (inline_cg != NULL) {
-          cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
+          cg = CallGenerator::for_predicated_intrinsic(cg, inline_cg);
        }
      }


--- a/src/share/vm/opto/escape.cpp
+++ b/src/share/vm/opto/escape.cpp
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -938,7 +938,13 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
                  strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
-                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
+                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0)
                  ))) {
            call->dump();
            fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));

--- a/src/share/vm/opto/graphKit.cpp
+++ b/src/share/vm/opto/graphKit.cpp
@@ -2435,23 +2435,24 @@ void GraphKit::merge_memory(Node* new_mem, Node* region, int new_path) {
    Node* new_slice = mms.memory2();
    if (old_slice != new_slice) {
      PhiNode* phi;
-      if (new_slice->is_Phi() && new_slice->as_Phi()->region() == region) {
-        phi = new_slice->as_Phi();
-        #ifdef ASSERT
-        if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region)
-          old_slice = old_slice->in(new_path);
-        // Caller is responsible for ensuring that any pre-existing
-        // phis are already aware of old memory.
-        int old_path = (new_path > 1) ? 1 : 2;  // choose old_path != new_path
-        assert(phi->in(old_path) == old_slice, "pre-existing phis OK");
-        #endif
-        mms.set_memory(phi);
+      if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region) {
+        if (mms.is_empty()) {
+          // clone base memory Phi's inputs for this memory slice
+          assert(old_slice == mms.base_memory(), "sanity");
+          phi = PhiNode::make(region, NULL, Type::MEMORY, mms.adr_type(C));
+          _gvn.set_type(phi, Type::MEMORY);
+          for (uint i = 1; i < phi->req(); i++) {
+            phi->init_req(i, old_slice->in(i));
+          }
+        } else {
+          phi = old_slice->as_Phi(); // Phi was generated already
+        }
      } else {
        phi = PhiNode::make(region, old_slice, Type::MEMORY, mms.adr_type(C));
        _gvn.set_type(phi, Type::MEMORY);
-        phi->set_req(new_path, new_slice);
-        mms.set_memory(_gvn.transform(phi));  // assume it is complete
      }
+      phi->set_req(new_path, new_slice);
+      mms.set_memory(phi);
    }
  }
 }

--- a/src/share/vm/opto/lcm.cpp
+++ b/src/share/vm/opto/lcm.cpp
@@ -484,7 +484,9 @@ Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &re
        iop == Op_CreateEx ||   // Create-exception must start block
        iop == Op_CheckCastPP
        ) {
-      worklist.map(i,worklist.pop());
+      // select the node n
+      // remove n from worklist and retain the order of remaining nodes
+      worklist.remove((uint)i);
      return n;
    }

@@ -570,7 +572,9 @@ Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &re
  assert(idx >= 0, "index should be set");
  Node *n = worklist[(uint)idx];      // Get the winner

-  worklist.map((uint)idx, worklist.pop());     // Compress worklist
+  // select the node n
+  // remove n from worklist and retain the order of remaining nodes
+  worklist.remove((uint)idx);
  return n;
 }


--- a/src/share/vm/opto/library_call.cpp
+++ b/src/share/vm/opto/library_call.cpp
@@ -46,25 +46,28 @@ class LibraryIntrinsic : public InlineCallGenerator {
 public:
 private:
  bool             _is_virtual;
-  bool             _is_predicted;
  bool             _does_virtual_dispatch;
+  int8_t           _predicates_count;  // Intrinsic is predicated by several conditions
+  int8_t           _last_predicate; // Last generated predicate
  vmIntrinsics::ID _intrinsic_id;

 public:
-  LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, bool does_virtual_dispatch, vmIntrinsics::ID id)
+  LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
    : InlineCallGenerator(m),
      _is_virtual(is_virtual),
-      _is_predicted(is_predicted),
      _does_virtual_dispatch(does_virtual_dispatch),
+      _predicates_count((int8_t)predicates_count),
+      _last_predicate((int8_t)-1),
      _intrinsic_id(id)
  {
  }
  virtual bool is_intrinsic() const { return true; }
  virtual bool is_virtual()   const { return _is_virtual; }
-  virtual bool is_predicted()   const { return _is_predicted; }
+  virtual bool is_predicated() const { return _predicates_count > 0; }
+  virtual int  predicates_count() const { return _predicates_count; }
  virtual bool does_virtual_dispatch()   const { return _does_virtual_dispatch; }
  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
-  virtual Node* generate_predicate(JVMState* jvms);
+  virtual Node* generate_predicate(JVMState* jvms, int predicate);
  vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
 };

@@ -107,8 +110,8 @@ class LibraryCallKit : public GraphKit {
  vmIntrinsics::ID  intrinsic_id() const { return _intrinsic->intrinsic_id(); }
  ciMethod*         callee()    const    { return _intrinsic->method(); }

-  bool try_to_inline();
-  Node* try_to_predicate();
+  bool  try_to_inline(int predicate);
+  Node* try_to_predicate(int predicate);

  void push_result() {
    // Push the result onto the stack.
@@ -307,6 +310,14 @@ class LibraryCallKit : public GraphKit {
  Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
  Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
  Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
+  bool inline_sha_implCompress(vmIntrinsics::ID id);
+  bool inline_digestBase_implCompressMB(int predicate);
+  bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
+                                 bool long_state, address stubAddr, const char *stubName,
+                                 Node* src_start, Node* ofs, Node* limit);
+  Node* get_state_from_sha_object(Node *sha_object);
+  Node* get_state_from_sha5_object(Node *sha_object);
+  Node* inline_digestBase_implCompressMB_predicate(int predicate);
  bool inline_encodeISOArray();
  bool inline_updateCRC32();
  bool inline_updateBytesCRC32();
@@ -367,7 +378,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
    }
  }

-  bool is_predicted = false;
+  int predicates = 0;
  bool does_virtual_dispatch = false;

  switch (id) {
@@ -508,7 +519,24 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
    if (!UseAESIntrinsics) return NULL;
    // these two require the predicated logic
-    is_predicted = true;
+    predicates = 1;
+    break;
+
+  case vmIntrinsics::_sha_implCompress:
+    if (!UseSHA1Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_sha2_implCompress:
+    if (!UseSHA256Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_sha5_implCompress:
+    if (!UseSHA512Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_digestBase_implCompressMB:
+    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL;
+    predicates = 3;
    break;

  case vmIntrinsics::_updateCRC32:
@@ -577,7 +605,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
    if (!InlineUnsafeOps)  return NULL;
  }

-  return new LibraryIntrinsic(m, is_virtual, is_predicted, does_virtual_dispatch, (vmIntrinsics::ID) id);
+  return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id);
 }

 //----------------------register_library_intrinsics-----------------------
@@ -601,7 +629,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms, Parse* parent_parser) {
  const int bci    = kit.bci();

  // Try to inline the intrinsic.
-  if (kit.try_to_inline()) {
+  if (kit.try_to_inline(_last_predicate)) {
    if (C->print_intrinsics() || C->print_inlining()) {
      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
    }
@@ -634,12 +662,13 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms, Parse* parent_parser) {
  return NULL;
 }

-Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
+Node* LibraryIntrinsic::generate_predicate(JVMState* jvms, int predicate) {
  LibraryCallKit kit(jvms, this);
  Compile* C = kit.C;
  int nodes = C->unique();
+  _last_predicate = predicate;
 #ifndef PRODUCT
-  assert(is_predicted(), "sanity");
+  assert(is_predicated() && predicate < predicates_count(), "sanity");
  if ((C->print_intrinsics() || C->print_inlining()) && Verbose) {
    char buf[1000];
    const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
@@ -649,10 +678,10 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
  ciMethod* callee = kit.callee();
  const int bci    = kit.bci();

-  Node* slow_ctl = kit.try_to_predicate();
+  Node* slow_ctl = kit.try_to_predicate(predicate);
  if (!kit.failing()) {
    if (C->print_intrinsics() || C->print_inlining()) {
-      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual, predicate)" : "(intrinsic, predicate)");
    }
    C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
    if (C->log()) {
@@ -681,7 +710,7 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
  return NULL;
 }

-bool LibraryCallKit::try_to_inline() {
+bool LibraryCallKit::try_to_inline(int predicate) {
  // Handle symbolic names for otherwise undistinguished boolean switches:
  const bool is_store       = true;
  const bool is_native_ptr  = true;
@@ -875,6 +904,14 @@ bool LibraryCallKit::try_to_inline() {
  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
    return inline_cipherBlockChaining_AESCrypt(intrinsic_id());

+  case vmIntrinsics::_sha_implCompress:
+  case vmIntrinsics::_sha2_implCompress:
+  case vmIntrinsics::_sha5_implCompress:
+    return inline_sha_implCompress(intrinsic_id());
+
+  case vmIntrinsics::_digestBase_implCompressMB:
+    return inline_digestBase_implCompressMB(predicate);
+
  case vmIntrinsics::_encodeISOArray:
    return inline_encodeISOArray();

@@ -898,7 +935,7 @@ bool LibraryCallKit::try_to_inline() {
  }
 }

-Node* LibraryCallKit::try_to_predicate() {
+Node* LibraryCallKit::try_to_predicate(int predicate) {
  if (!jvms()->has_method()) {
    // Root JVMState has a null method.
    assert(map()->memory()->Opcode() == Op_Parm, "");
@@ -912,6 +949,8 @@ Node* LibraryCallKit::try_to_predicate() {
    return inline_cipherBlockChaining_AESCrypt_predicate(false);
  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
    return inline_cipherBlockChaining_AESCrypt_predicate(true);
+  case vmIntrinsics::_digestBase_implCompressMB:
+    return inline_digestBase_implCompressMB_predicate(predicate);

  default:
    // If you get here, it may be that someone has added a new intrinsic
@@ -5866,7 +5905,12 @@ Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * field
  BasicType bt = field->layout_type();

  // Build the resultant type of the load
-  const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+  const Type *type;
+  if (bt == T_OBJECT) {
+    type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+  } else {
+    type = Type::get_const_basic_type(bt);
+  }

  // Build the load.
  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, MemNode::unordered, is_vol);
@@ -5996,7 +6040,7 @@ bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
  assert(tinst != NULL, "CBC obj is null");
  assert(tinst->klass()->is_loaded(), "CBC obj is not loaded");
  ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
-  if (!klass_AESCrypt->is_loaded()) return false;
+  assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded");

  ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
  const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
@@ -6071,11 +6115,8 @@ Node * LibraryCallKit::get_original_key_start_from_aescrypt_object(Node *aescryp
 //    note cipher==plain is more conservative than the original java code but that's OK
 //
 Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) {
-  // First, check receiver for NULL since it is virtual method.
+  // The receiver was checked for NULL already.
  Node* objCBC = argument(0);
-  objCBC = null_check(objCBC);
-
-  if (stopped()) return NULL; // Always NULL

  // Load embeddedCipher field of CipherBlockChaining object.
  Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
@@ -6122,3 +6163,258 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt
  record_for_igvn(region);
  return _gvn.transform(region);
 }
+
+//------------------------------inline_sha_implCompress-----------------------
+//
+// Calculate SHA (i.e., SHA-1) for single-block byte[] array.
+// void com.sun.security.provider.SHA.implCompress(byte[] buf, int ofs)
+//
+// Calculate SHA2 (i.e., SHA-244 or SHA-256) for single-block byte[] array.
+// void com.sun.security.provider.SHA2.implCompress(byte[] buf, int ofs)
+//
+// Calculate SHA5 (i.e., SHA-384 or SHA-512) for single-block byte[] array.
+// void com.sun.security.provider.SHA5.implCompress(byte[] buf, int ofs)
+//
+bool LibraryCallKit::inline_sha_implCompress(vmIntrinsics::ID id) {
+  assert(callee()->signature()->size() == 2, "sha_implCompress has 2 parameters");
+
+  Node* sha_obj = argument(0);
+  Node* src     = argument(1); // type oop
+  Node* ofs     = argument(2); // type int
+
+  const Type* src_type = src->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_BYTE) {
+    return false;
+  }
+  // 'src_start' points to src array + offset
+  Node* src_start = array_element_address(src, ofs, src_elem);
+  Node* state = NULL;
+  address stubAddr;
+  const char *stubName;
+
+  switch(id) {
+  case vmIntrinsics::_sha_implCompress:
+    assert(UseSHA1Intrinsics, "need SHA1 instruction support");
+    state = get_state_from_sha_object(sha_obj);
+    stubAddr = StubRoutines::sha1_implCompress();
+    stubName = "sha1_implCompress";
+    break;
+  case vmIntrinsics::_sha2_implCompress:
+    assert(UseSHA256Intrinsics, "need SHA256 instruction support");
+    state = get_state_from_sha_object(sha_obj);
+    stubAddr = StubRoutines::sha256_implCompress();
+    stubName = "sha256_implCompress";
+    break;
+  case vmIntrinsics::_sha5_implCompress:
+    assert(UseSHA512Intrinsics, "need SHA512 instruction support");
+    state = get_state_from_sha5_object(sha_obj);
+    stubAddr = StubRoutines::sha512_implCompress();
+    stubName = "sha512_implCompress";
+    break;
+  default:
+    fatal_unexpected_iid(id);
+    return false;
+  }
+  if (state == NULL) return false;
+
+  // Call the stub.
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::sha_implCompress_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, state);
+
+  return true;
+}
+
+//------------------------------inline_digestBase_implCompressMB-----------------------
+//
+// Calculate SHA/SHA2/SHA5 for multi-block byte[] array.
+// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+//
+bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
+  assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics,
+         "need SHA1/SHA256/SHA512 instruction support");
+  assert((uint)predicate < 3, "sanity");
+  assert(callee()->signature()->size() == 3, "digestBase_implCompressMB has 3 parameters");
+
+  Node* digestBase_obj = argument(0); // The receiver was checked for NULL already.
+  Node* src            = argument(1); // byte[] array
+  Node* ofs            = argument(2); // type int
+  Node* limit          = argument(3); // type int
+
+  const Type* src_type = src->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_BYTE) {
+    return false;
+  }
+  // 'src_start' points to src array + offset
+  Node* src_start = array_element_address(src, ofs, src_elem);
+
+  const char* klass_SHA_name = NULL;
+  const char* stub_name = NULL;
+  address     stub_addr = NULL;
+  bool        long_state = false;
+
+  switch (predicate) {
+  case 0:
+    if (UseSHA1Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA";
+      stub_name = "sha1_implCompressMB";
+      stub_addr = StubRoutines::sha1_implCompressMB();
+    }
+    break;
+  case 1:
+    if (UseSHA256Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA2";
+      stub_name = "sha256_implCompressMB";
+      stub_addr = StubRoutines::sha256_implCompressMB();
+    }
+    break;
+  case 2:
+    if (UseSHA512Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA5";
+      stub_name = "sha512_implCompressMB";
+      stub_addr = StubRoutines::sha512_implCompressMB();
+      long_state = true;
+    }
+    break;
+  default:
+    fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate));
+  }
+  if (klass_SHA_name != NULL) {
+    // get DigestBase klass to lookup for SHA klass
+    const TypeInstPtr* tinst = _gvn.type(digestBase_obj)->isa_instptr();
+    assert(tinst != NULL, "digestBase_obj is not instance???");
+    assert(tinst->klass()->is_loaded(), "DigestBase is not loaded");
+
+    ciKlass* klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name));
+    assert(klass_SHA->is_loaded(), "predicate checks that this class is loaded");
+    ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass();
+    return inline_sha_implCompressMB(digestBase_obj, instklass_SHA, long_state, stub_addr, stub_name, src_start, ofs, limit);
+  }
+  return false;
+}
+//------------------------------inline_sha_implCompressMB-----------------------
+bool LibraryCallKit::inline_sha_implCompressMB(Node* digestBase_obj, ciInstanceKlass* instklass_SHA,
+                                               bool long_state, address stubAddr, const char *stubName,
+                                               Node* src_start, Node* ofs, Node* limit) {
+  const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_SHA);
+  const TypeOopPtr* xtype = aklass->as_instance_type();
+  Node* sha_obj = new (C) CheckCastPPNode(control(), digestBase_obj, xtype);
+  sha_obj = _gvn.transform(sha_obj);
+
+  Node* state;
+  if (long_state) {
+    state = get_state_from_sha5_object(sha_obj);
+  } else {
+    state = get_state_from_sha_object(sha_obj);
+  }
+  if (state == NULL) return false;
+
+  // Call the stub.
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                 OptoRuntime::digestBase_implCompressMB_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, state, ofs, limit);
+  // return ofs (int)
+  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+
+  return true;
+}
+
+//------------------------------get_state_from_sha_object-----------------------
+Node * LibraryCallKit::get_state_from_sha_object(Node *sha_object) {
+  Node* sha_state = load_field_from_object(sha_object, "state", "[I", /*is_exact*/ false);
+  assert (sha_state != NULL, "wrong version of sun.security.provider.SHA/SHA2");
+  if (sha_state == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the state array
+  Node* state = array_element_address(sha_state, intcon(0), T_INT);
+  return state;
+}
+
+//------------------------------get_state_from_sha5_object-----------------------
+Node * LibraryCallKit::get_state_from_sha5_object(Node *sha_object) {
+  Node* sha_state = load_field_from_object(sha_object, "state", "[J", /*is_exact*/ false);
+  assert (sha_state != NULL, "wrong version of sun.security.provider.SHA5");
+  if (sha_state == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the state array
+  Node* state = array_element_address(sha_state, intcon(0), T_LONG);
+  return state;
+}
+
+//----------------------------inline_digestBase_implCompressMB_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+//    if (digestBaseObj instanceof SHA/SHA2/SHA5) do_intrinsic, else do_javapath
+//
+Node* LibraryCallKit::inline_digestBase_implCompressMB_predicate(int predicate) {
+  assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics,
+         "need SHA1/SHA256/SHA512 instruction support");
+  assert((uint)predicate < 3, "sanity");
+
+  // The receiver was checked for NULL already.
+  Node* digestBaseObj = argument(0);
+
+  // get DigestBase klass for instanceOf check
+  const TypeInstPtr* tinst = _gvn.type(digestBaseObj)->isa_instptr();
+  assert(tinst != NULL, "digestBaseObj is null");
+  assert(tinst->klass()->is_loaded(), "DigestBase is not loaded");
+
+  const char* klass_SHA_name = NULL;
+  switch (predicate) {
+  case 0:
+    if (UseSHA1Intrinsics) {
+      // we want to do an instanceof comparison against the SHA class
+      klass_SHA_name = "sun/security/provider/SHA";
+    }
+    break;
+  case 1:
+    if (UseSHA256Intrinsics) {
+      // we want to do an instanceof comparison against the SHA2 class
+      klass_SHA_name = "sun/security/provider/SHA2";
+    }
+    break;
+  case 2:
+    if (UseSHA512Intrinsics) {
+      // we want to do an instanceof comparison against the SHA5 class
+      klass_SHA_name = "sun/security/provider/SHA5";
+    }
+    break;
+  default:
+    fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate));
+  }
+
+  ciKlass* klass_SHA = NULL;
+  if (klass_SHA_name != NULL) {
+    klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name));
+  }
+  if ((klass_SHA == NULL) || !klass_SHA->is_loaded()) {
+    // if none of SHA/SHA2/SHA5 is loaded, we never take the intrinsic fast path
+    Node* ctrl = control();
+    set_control(top()); // no intrinsic path
+    return ctrl;
+  }
+  ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass();
+
+  Node* instofSHA = gen_instanceof(digestBaseObj, makecon(TypeKlassPtr::make(instklass_SHA)));
+  Node* cmp_instof = _gvn.transform(new (C) CmpINode(instofSHA, intcon(1)));
+  Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
+  Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+  return instof_false;  // even if it is NULL
+}
--- a/src/share/vm/opto/runtime.cpp
+++ b/src/share/vm/opto/runtime.cpp
@@ -898,6 +898,50 @@ const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
  return TypeFunc::make(domain, range);
 }

+/*
+ * void implCompress(byte[] buf, int ofs)
+ */
+const TypeFunc* OptoRuntime::sha_implCompress_Type() {
+  // create input type (domain)
+  int num_args = 2;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL; // buf
+  fields[argp++] = TypePtr::NOTNULL; // state
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+/*
+ * int implCompressMultiBlock(byte[] b, int ofs, int limit)
+ */
+const TypeFunc* OptoRuntime::digestBase_implCompressMB_Type() {
+  // create input type (domain)
+  int num_args = 4;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL; // buf
+  fields[argp++] = TypePtr::NOTNULL; // state
+  fields[argp++] = TypeInt::INT;     // ofs
+  fields[argp++] = TypeInt::INT;     // limit
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // returning ofs (int)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInt::INT; // ofs
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+  return TypeFunc::make(domain, range);
+}
+
 //------------- Interpreter state access for on stack replacement
 const TypeFunc* OptoRuntime::osr_end_Type() {
  // create input type (domain)

--- a/src/share/vm/opto/runtime.hpp
+++ b/src/share/vm/opto/runtime.hpp
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -300,6 +300,9 @@ private:
  static const TypeFunc* aescrypt_block_Type();
  static const TypeFunc* cipherBlockChaining_aescrypt_Type();

+  static const TypeFunc* sha_implCompress_Type();
+  static const TypeFunc* digestBase_implCompressMB_Type();
+
  static const TypeFunc* updateBytesCRC32_Type();

  // leaf on stack replacement interpreter accessor types

--- a/src/share/vm/opto/superword.cpp
+++ b/src/share/vm/opto/superword.cpp
@@ -1374,6 +1374,20 @@ void SuperWord::output() {
      if (n->is_Load()) {
        Node* ctl = n->in(MemNode::Control);
        Node* mem = first->in(MemNode::Memory);
+        SWPointer p1(n->as_Mem(), this);
+        // Identify the memory dependency for the new loadVector node by
+        // walking up through memory chain.
+        // This is done to give flexibility to the new loadVector node so that
+        // it can move above independent storeVector nodes.
+        while (mem->is_StoreVector()) {
+          SWPointer p2(mem->as_Mem(), this);
+          int cmp = p1.cmp(p2);
+          if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
+            mem = mem->in(MemNode::Memory);
+          } else {
+            break; // dependent memory
+          }
+        }
        Node* adr = low_adr->in(MemNode::Address);
        const TypePtr* atyp = n->adr_type();
        vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));

--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -597,6 +597,9 @@ class CommandLineFlags {
  product(bool, UseAES, false,                                              \
          "Control whether AES instructions can be used on x86/x64")        \
                                                                            \
+  product(bool, UseSHA, false,                                              \
+          "Control whether SHA instructions can be used on SPARC")          \
+                                                                            \
  product(uintx, LargePageSizeInBytes, 0,                                   \
          "Large page size (0 to let VM choose the page size)")             \
                                                                            \
@@ -703,6 +706,15 @@ class CommandLineFlags {
  product(bool, UseAESIntrinsics, false,                                    \
          "Use intrinsics for AES versions of crypto")                      \
                                                                            \
+  product(bool, UseSHA1Intrinsics, false,                                   \
+          "Use intrinsics for SHA-1 crypto hash function")                  \
+                                                                            \
+  product(bool, UseSHA256Intrinsics, false,                                 \
+          "Use intrinsics for SHA-224 and SHA-256 crypto hash functions")   \
+                                                                            \
+  product(bool, UseSHA512Intrinsics, false,                                 \
+          "Use intrinsics for SHA-384 and SHA-512 crypto hash functions")   \
+                                                                            \
  product(bool, UseCRC32Intrinsics, false,                                  \
          "use intrinsics for java.util.zip.CRC32")                         \
                                                                            \

--- a/src/share/vm/runtime/stubRoutines.cpp
+++ b/src/share/vm/runtime/stubRoutines.cpp
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -125,6 +125,13 @@ address StubRoutines::_aescrypt_decryptBlock               = NULL;
 address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
 address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;

+address StubRoutines::_sha1_implCompress     = NULL;
+address StubRoutines::_sha1_implCompressMB   = NULL;
+address StubRoutines::_sha256_implCompress   = NULL;
+address StubRoutines::_sha256_implCompressMB = NULL;
+address StubRoutines::_sha512_implCompress   = NULL;
+address StubRoutines::_sha512_implCompressMB = NULL;
+
 address StubRoutines::_updateBytesCRC32 = NULL;
 address StubRoutines::_crc_table_adr = NULL;


--- a/src/share/vm/runtime/stubRoutines.hpp
+++ b/src/share/vm/runtime/stubRoutines.hpp
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -207,6 +207,13 @@ class StubRoutines: AllStatic {
  static address _cipherBlockChaining_encryptAESCrypt;
  static address _cipherBlockChaining_decryptAESCrypt;

+  static address _sha1_implCompress;
+  static address _sha1_implCompressMB;
+  static address _sha256_implCompress;
+  static address _sha256_implCompressMB;
+  static address _sha512_implCompress;
+  static address _sha512_implCompressMB;
+
  static address _updateBytesCRC32;
  static address _crc_table_adr;

@@ -356,6 +363,13 @@ class StubRoutines: AllStatic {
  static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
  static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }

+  static address sha1_implCompress()     { return _sha1_implCompress; }
+  static address sha1_implCompressMB()   { return _sha1_implCompressMB; }
+  static address sha256_implCompress()   { return _sha256_implCompress; }
+  static address sha256_implCompressMB() { return _sha256_implCompressMB; }
+  static address sha512_implCompress()   { return _sha512_implCompress; }
+  static address sha512_implCompressMB() { return _sha512_implCompressMB; }
+
  static address updateBytesCRC32()    { return _updateBytesCRC32; }
  static address crc_table_addr()      { return _crc_table_adr; }


--- a/test/compiler/intrinsics/sha/TestSHA.java
+++ b/test/compiler/intrinsics/sha/TestSHA.java
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8035968
+ * @summary C2 support for SHA on SPARC
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 -Doffset=1 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=SHA-256 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=SHA-512 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Dalgorithm2=SHA-512 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=MD5     TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=MD5     -Dalgorithm2=SHA-1   TestSHA
+ */
+
+import java.security.MessageDigest;
+import java.util.Arrays;
+
+public class TestSHA {
+    private static final int HASH_LEN = 64; /* up to 512-bit */
+    private static final int ALIGN = 8;     /* for different data alignments */
+
+    public static void main(String[] args) throws Exception {
+        String provider = System.getProperty("provider", "SUN");
+        String algorithm = System.getProperty("algorithm", "SHA-1");
+        String algorithm2 = System.getProperty("algorithm2", "");
+        int msgSize = Integer.getInteger("msgSize", 1024);
+        int offset = Integer.getInteger("offset", 0)  % ALIGN;
+        int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 100000);
+        int warmupIters = (args.length > 1 ? Integer.valueOf(args[1]) : 20000);
+
+        testSHA(provider, algorithm, msgSize, offset, iters, warmupIters);
+
+        if (algorithm2.equals("") == false) {
+            testSHA(provider, algorithm2, msgSize, offset, iters, warmupIters);
+        }
+    }
+
+    static void testSHA(String provider, String algorithm, int msgSize,
+                        int offset, int iters, int warmupIters) throws Exception {
+        System.out.println("provider = " + provider);
+        System.out.println("algorithm = " + algorithm);
+        System.out.println("msgSize = " + msgSize + " bytes");
+        System.out.println("offset = " + offset);
+        System.out.println("iters = " + iters);
+
+        byte[] expectedHash = new byte[HASH_LEN];
+        byte[] hash = new byte[HASH_LEN];
+        byte[] data = new byte[msgSize + offset];
+        for (int i = 0; i < (msgSize + offset); i++) {
+            data[i] = (byte)(i & 0xff);
+        }
+
+        try {
+            MessageDigest sha = MessageDigest.getInstance(algorithm, provider);
+
+            /* do once, which doesn't use intrinsics */
+            sha.reset();
+            sha.update(data, offset, msgSize);
+            expectedHash = sha.digest();
+
+            /* warm up */
+            for (int i = 0; i < warmupIters; i++) {
+                sha.reset();
+                sha.update(data, offset, msgSize);
+                hash = sha.digest();
+            }
+
+            /* check result */
+            if (Arrays.equals(hash, expectedHash) == false) {
+                System.out.println("TestSHA Error: ");
+                showArray(expectedHash, "expectedHash");
+                showArray(hash,         "computedHash");
+                //System.exit(1);
+                throw new Exception("TestSHA Error");
+            } else {
+                showArray(hash, "hash");
+            }
+
+            /* measure performance */
+            long start = System.nanoTime();
+            for (int i = 0; i < iters; i++) {
+                sha.reset();
+                sha.update(data, offset, msgSize);
+                hash = sha.digest();
+            }
+            long end = System.nanoTime();
+            double total = (double)(end - start)/1e9;         /* in seconds */
+            double thruput = (double)msgSize*iters/1e6/total; /* in MB/s */
+            System.out.println("TestSHA runtime = " + total + " seconds");
+            System.out.println("TestSHA throughput = " + thruput + " MB/s");
+            System.out.println();
+        } catch (Exception e) {
+            System.out.println("Exception: " + e);
+            //System.exit(1);
+            throw new Exception(e);
+        }
+    }
+
+    static void showArray(byte b[], String name) {
+        System.out.format("%s [%d]: ", name, b.length);
+        for (int i = 0; i < Math.min(b.length, HASH_LEN); i++) {
+            System.out.format("%02x ", b[i] & 0xff);
+        }
+        System.out.println();
+    }
+}