diff --git a/src/cpu/sparc/vm/sparc.ad b/src/cpu/sparc/vm/sparc.ad
index f7040f54ea6c92830d919514d07041cd819f6c59..f6e7d03b6eeb3b5fb7e92ae93053245442c1bc3a 100644
--- a/src/cpu/sparc/vm/sparc.ad
+++ b/src/cpu/sparc/vm/sparc.ad
@@ -1870,6 +1870,11 @@ const int Matcher::vector_ideal_reg(int size) {
   return Op_RegD;
 }
 
+const int Matcher::vector_shift_count_ideal_reg(int size) {
+  fatal("vector shift is not supported");
+  return Node::NotAMachineReg;
+}
+
 // Limits on vector size (number of elements) loaded into vector.
 const int Matcher::max_vector_size(const BasicType bt) {
   assert(is_java_primitive(bt), "only primitive type vectors");
diff --git a/src/cpu/x86/vm/x86.ad b/src/cpu/x86/vm/x86.ad
index c3831006b0b85bd6d82c3657d4207a207daeb44e..a2cf6f7945db2cab12493ed8b9f68ed014ef4343 100644
--- a/src/cpu/x86/vm/x86.ad
+++ b/src/cpu/x86/vm/x86.ad
@@ -571,6 +571,11 @@ const int Matcher::vector_ideal_reg(int size) {
   return 0;
 }
 
+// Only lowest bits of xmm reg are used for vector shift count.
+const int Matcher::vector_shift_count_ideal_reg(int size) {
+  return Op_VecS;
+}
+
 // x86 supports misaligned vectors store/load.
 const bool Matcher::misaligned_vectors_ok() {
   return !AlignVector; // can be changed by flag
@@ -3758,10 +3763,24 @@ instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
   ins_pipe( pipe_slow );
 %}
 
+// ------------------------------ Shift ---------------------------------------
+
+// Left and right shift count vectors are the same on x86
+// (only lowest bits of xmm reg are used for count).
+instruct vshiftcnt(vecS dst, rRegI cnt) %{
+  match(Set dst (LShiftCntV cnt));
+  match(Set dst (RShiftCntV cnt));
+  format %{ "movd    $dst,$cnt\t! load shift count" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $cnt$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 // ------------------------------ LeftShift -----------------------------------
 
 // Shorts/Chars vector left shift
-instruct vsll2S(vecS dst, regF shift) %{
+instruct vsll2S(vecS dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
@@ -3781,7 +3800,7 @@ instruct vsll2S_imm(vecS dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
+instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
@@ -3803,7 +3822,7 @@ instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4S(vecD dst, regF shift) %{
+instruct vsll4S(vecD dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
@@ -3823,7 +3842,7 @@ instruct vsll4S_imm(vecD dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
+instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
@@ -3845,7 +3864,7 @@ instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll8S(vecX dst, regF shift) %{
+instruct vsll8S(vecX dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS dst shift));
   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
@@ -3865,7 +3884,7 @@ instruct vsll8S_imm(vecX dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
+instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
@@ -3887,7 +3906,7 @@ instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
+instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
   match(Set dst (LShiftVS src shift));
   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
@@ -3910,7 +3929,7 @@ instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
 %}
 
 // Integers vector left shift
-instruct vsll2I(vecD dst, regF shift) %{
+instruct vsll2I(vecD dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (LShiftVI dst shift));
   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
@@ -3930,7 +3949,7 @@ instruct vsll2I_imm(vecD dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
+instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVI src shift));
   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
@@ -3952,7 +3971,7 @@ instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4I(vecX dst, regF shift) %{
+instruct vsll4I(vecX dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (LShiftVI dst shift));
   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
@@ -3972,7 +3991,7 @@ instruct vsll4I_imm(vecX dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
+instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVI src shift));
   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
@@ -3994,7 +4013,7 @@ instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
+instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
   match(Set dst (LShiftVI src shift));
   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
@@ -4017,7 +4036,7 @@ instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
 %}
 
 // Longs vector left shift
-instruct vsll2L(vecX dst, regF shift) %{
+instruct vsll2L(vecX dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (LShiftVL dst shift));
   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
@@ -4037,7 +4056,7 @@ instruct vsll2L_imm(vecX dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
+instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
   match(Set dst (LShiftVL src shift));
   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
@@ -4059,7 +4078,7 @@ instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
+instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
   match(Set dst (LShiftVL src shift));
   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
@@ -4088,7 +4107,7 @@ instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
 // sign extension before a shift.
 
 // Integers vector logical right shift
-instruct vsrl2I(vecD dst, regF shift) %{
+instruct vsrl2I(vecD dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (URShiftVI dst shift));
   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
@@ -4108,7 +4127,7 @@ instruct vsrl2I_imm(vecD dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
+instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVI src shift));
   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
@@ -4130,7 +4149,7 @@ instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl4I(vecX dst, regF shift) %{
+instruct vsrl4I(vecX dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (URShiftVI dst shift));
   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
@@ -4150,7 +4169,7 @@ instruct vsrl4I_imm(vecX dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
+instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVI src shift));
   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
@@ -4172,7 +4191,7 @@ instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
+instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
   match(Set dst (URShiftVI src shift));
   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
@@ -4195,7 +4214,7 @@ instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
 %}
 
 // Longs vector logical right shift
-instruct vsrl2L(vecX dst, regF shift) %{
+instruct vsrl2L(vecX dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (URShiftVL dst shift));
   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
@@ -4215,7 +4234,7 @@ instruct vsrl2L_imm(vecX dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
+instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
   match(Set dst (URShiftVL src shift));
   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
@@ -4237,7 +4256,7 @@ instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
+instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
   match(Set dst (URShiftVL src shift));
   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
@@ -4262,7 +4281,7 @@ instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
 // ------------------- ArithmeticRightShift -----------------------------------
 
 // Shorts/Chars vector arithmetic right shift
-instruct vsra2S(vecS dst, regF shift) %{
+instruct vsra2S(vecS dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
@@ -4282,7 +4301,7 @@ instruct vsra2S_imm(vecS dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
+instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
@@ -4304,7 +4323,7 @@ instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra4S(vecD dst, regF shift) %{
+instruct vsra4S(vecD dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
@@ -4324,7 +4343,7 @@ instruct vsra4S_imm(vecD dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
+instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
@@ -4346,7 +4365,7 @@ instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra8S(vecX dst, regF shift) %{
+instruct vsra8S(vecX dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS dst shift));
   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
@@ -4366,7 +4385,7 @@ instruct vsra8S_imm(vecX dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
+instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
@@ -4388,7 +4407,7 @@ instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
+instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
   match(Set dst (RShiftVS src shift));
   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
@@ -4411,7 +4430,7 @@ instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
 %}
 
 // Integers vector arithmetic right shift
-instruct vsra2I(vecD dst, regF shift) %{
+instruct vsra2I(vecD dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (RShiftVI dst shift));
   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
@@ -4431,7 +4450,7 @@ instruct vsra2I_imm(vecD dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
+instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
   match(Set dst (RShiftVI src shift));
   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
@@ -4453,7 +4472,7 @@ instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra4I(vecX dst, regF shift) %{
+instruct vsra4I(vecX dst, vecS shift) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (RShiftVI dst shift));
   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
@@ -4473,7 +4492,7 @@ instruct vsra4I_imm(vecX dst, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
+instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (RShiftVI src shift));
   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
@@ -4495,7 +4514,7 @@ instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
+instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
   match(Set dst (RShiftVI src shift));
   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
diff --git a/src/share/vm/adlc/formssel.cpp b/src/share/vm/adlc/formssel.cpp
index fa5f91ca8004c71b6e55399e8bb92b67bd1d76bd..539bc3a7b4fcdad03cd8fadb4e9955ab6c9deae1 100644
--- a/src/share/vm/adlc/formssel.cpp
+++ b/src/share/vm/adlc/formssel.cpp
@@ -4049,6 +4049,7 @@ bool MatchRule::is_vector() const {
     "MulVS","MulVI","MulVF","MulVD",
     "DivVF","DivVD",
     "AndV" ,"XorV" ,"OrV",
+    "LShiftCntV","RShiftCntV",
     "LShiftVB","LShiftVS","LShiftVI","LShiftVL",
     "RShiftVB","RShiftVS","RShiftVI","RShiftVL",
     "URShiftVB","URShiftVS","URShiftVI","URShiftVL",
diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp
index 13165815734f950a347001c113baf0ca73b42934..ea5ea33f4f5a03b5626ea48ac95d23c49240cde4 100644
--- a/src/share/vm/opto/classes.hpp
+++ b/src/share/vm/opto/classes.hpp
@@ -268,6 +268,8 @@ macro(MulVF)
 macro(MulVD)
 macro(DivVF)
 macro(DivVD)
+macro(LShiftCntV)
+macro(RShiftCntV)
 macro(LShiftVB)
 macro(LShiftVS)
 macro(LShiftVI)
diff --git a/src/share/vm/opto/matcher.hpp b/src/share/vm/opto/matcher.hpp
index 51282db370f881f4582ab91d126c2245bb74e6ef..1936ba97a4a289610ec4e3144a792d658afe12f3 100644
--- a/src/share/vm/opto/matcher.hpp
+++ b/src/share/vm/opto/matcher.hpp
@@ -261,6 +261,7 @@ public:
 
   // Vector ideal reg
   static const int vector_ideal_reg(int len);
+  static const int vector_shift_count_ideal_reg(int len);
 
   // CPU supports misaligned vectors store/load.
   static const bool misaligned_vectors_ok();
diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp
index 20e287dc051749e9e31bbe9ea08ad804a0484002..ab202840ab0c8acdce67b6f4fed81d823c41c66d 100644
--- a/src/share/vm/opto/superword.cpp
+++ b/src/share/vm/opto/superword.cpp
@@ -1436,10 +1436,9 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
       return opd; // input is matching vector
     }
     if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
-      // No vector is needed for shift count.
-      // Vector instructions do not mask shift count, do it here.
       Compile* C = _phase->C;
       Node* cnt = opd;
+      // Vector instructions do not mask shift count, do it here.
       juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
       const TypeInt* t = opd->find_int_type();
       if (t != NULL && t->is_con()) {
@@ -1456,8 +1455,8 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
           _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
         }
         assert(opd->bottom_type()->isa_int(), "int type only");
-        // Move non constant shift count into XMM register.
-        cnt = new (C) MoveI2FNode(cnt);
+        // Move non constant shift count into vector register.
+        cnt = VectorNode::shift_count(C, p0, cnt, vlen, velt_basic_type(p0));
       }
       if (cnt != opd) {
         _igvn.register_new_node_with_optimizer(cnt);
diff --git a/src/share/vm/opto/vectornode.cpp b/src/share/vm/opto/vectornode.cpp
index 180b722770303b019f41bd6052248f273dd6bc4d..d0955a819e1d0c182fa2af15ee30ca638ae39b90 100644
--- a/src/share/vm/opto/vectornode.cpp
+++ b/src/share/vm/opto/vectornode.cpp
@@ -243,6 +243,8 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
 VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
   const TypeVect* vt = TypeVect::make(bt, vlen);
   int vopc = VectorNode::opcode(opc, bt);
+  // This method should not be called for unimplemented vectors.
+  guarantee(vopc > 0, err_msg_res("Vector for '%s' is not implemented", NodeClassNames[opc]));
 
   switch (vopc) {
   case Op_AddVB: return new (C) AddVBNode(n1, n2, vt);
@@ -286,7 +288,7 @@ VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen,
   case Op_OrV:  return new (C) OrVNode (n1, n2, vt);
   case Op_XorV: return new (C) XorVNode(n1, n2, vt);
   }
-  ShouldNotReachHere();
+  fatal(err_msg_res("Missed vector creation for '%s'", NodeClassNames[vopc]));
   return NULL;
 
 }
@@ -312,7 +314,25 @@ VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type
   case T_DOUBLE:
     return new (C) ReplicateDNode(s, vt);
   }
-  ShouldNotReachHere();
+  fatal(err_msg_res("Type '%s' is not supported for vectors", type2name(bt)));
+  return NULL;
+}
+
+VectorNode* VectorNode::shift_count(Compile* C, Node* shift, Node* cnt, uint vlen, BasicType bt) {
+  assert(VectorNode::is_shift(shift) && !cnt->is_Con(), "only variable shift count");
+  // Match shift count type with shift vector type.
+  const TypeVect* vt = TypeVect::make(bt, vlen);
+  switch (shift->Opcode()) {
+  case Op_LShiftI:
+  case Op_LShiftL:
+    return new (C) LShiftCntVNode(cnt, vt);
+  case Op_RShiftI:
+  case Op_RShiftL:
+  case Op_URShiftI:
+  case Op_URShiftL:
+    return new (C) RShiftCntVNode(cnt, vt);
+  }
+  fatal(err_msg_res("Missed vector creation for '%s'", NodeClassNames[shift->Opcode()]));
   return NULL;
 }
 
@@ -335,7 +355,7 @@ PackNode* PackNode::make(Compile* C, Node* s, uint vlen, BasicType bt) {
   case T_DOUBLE:
     return new (C) PackDNode(s, vt);
   }
-  ShouldNotReachHere();
+  fatal(err_msg_res("Type '%s' is not supported for vectors", type2name(bt)));
   return NULL;
 }
 
@@ -371,7 +391,7 @@ PackNode* PackNode::binary_tree_pack(Compile* C, int lo, int hi) {
     case T_DOUBLE:
       return new (C) Pack2DNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
     }
-    ShouldNotReachHere();
+    fatal(err_msg_res("Type '%s' is not supported for vectors", type2name(bt)));
   }
   return NULL;
 }
@@ -381,7 +401,6 @@ LoadVectorNode* LoadVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
                                      Node* adr, const TypePtr* atyp, uint vlen, BasicType bt) {
   const TypeVect* vt = TypeVect::make(bt, vlen);
   return new (C) LoadVectorNode(ctl, mem, adr, atyp, vt);
-  return NULL;
 }
 
 // Return the vector version of a scalar store node.
@@ -413,7 +432,7 @@ Node* ExtractNode::make(Compile* C, Node* v, uint position, BasicType bt) {
   case T_DOUBLE:
     return new (C) ExtractDNode(v, pos);
   }
-  ShouldNotReachHere();
+  fatal(err_msg_res("Type '%s' is not supported for vectors", type2name(bt)));
   return NULL;
 }
 
diff --git a/src/share/vm/opto/vectornode.hpp b/src/share/vm/opto/vectornode.hpp
index 02521bbe1dbf1693cfaedf0ce899604f8ba7c873..ba84406f1d7db77f48a708ebe0e626bcd98d715d 100644
--- a/src/share/vm/opto/vectornode.hpp
+++ b/src/share/vm/opto/vectornode.hpp
@@ -29,7 +29,7 @@
 #include "opto/node.hpp"
 #include "opto/opcodes.hpp"
 
-//------------------------------VectorNode--------------------------------------
+//------------------------------VectorNode-------------------------------------
 // Vector Operation
 class VectorNode : public TypeNode {
  public:
@@ -53,7 +53,7 @@ class VectorNode : public TypeNode {
   virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); }
 
   static VectorNode* scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t);
-
+  static VectorNode* shift_count(Compile* C, Node* shift, Node* cnt, uint vlen, BasicType bt);
   static VectorNode* make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
 
   static int  opcode(int opc, BasicType bt);
@@ -64,9 +64,9 @@ class VectorNode : public TypeNode {
   static void vector_operands(Node* n, uint* start, uint* end);
 };
 
-//===========================Vector=ALU=Operations====================================
+//===========================Vector=ALU=Operations=============================
 
-//------------------------------AddVBNode---------------------------------------
+//------------------------------AddVBNode--------------------------------------
 // Vector add byte
 class AddVBNode : public VectorNode {
  public:
@@ -74,7 +74,7 @@ class AddVBNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------AddVSNode---------------------------------------
+//------------------------------AddVSNode--------------------------------------
 // Vector add char/short
 class AddVSNode : public VectorNode {
  public:
@@ -82,7 +82,7 @@ class AddVSNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------AddVINode---------------------------------------
+//------------------------------AddVINode--------------------------------------
 // Vector add int
 class AddVINode : public VectorNode {
  public:
@@ -90,7 +90,7 @@ class AddVINode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------AddVLNode---------------------------------------
+//------------------------------AddVLNode--------------------------------------
 // Vector add long
 class AddVLNode : public VectorNode {
  public:
@@ -98,7 +98,7 @@ class AddVLNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------AddVFNode---------------------------------------
+//------------------------------AddVFNode--------------------------------------
 // Vector add float
 class AddVFNode : public VectorNode {
  public:
@@ -106,7 +106,7 @@ class AddVFNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------AddVDNode---------------------------------------
+//------------------------------AddVDNode--------------------------------------
 // Vector add double
 class AddVDNode : public VectorNode {
  public:
@@ -114,7 +114,7 @@ class AddVDNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------SubVBNode---------------------------------------
+//------------------------------SubVBNode--------------------------------------
 // Vector subtract byte
 class SubVBNode : public VectorNode {
  public:
@@ -122,7 +122,7 @@ class SubVBNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------SubVSNode---------------------------------------
+//------------------------------SubVSNode--------------------------------------
 // Vector subtract short
 class SubVSNode : public VectorNode {
  public:
@@ -130,7 +130,7 @@ class SubVSNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------SubVINode---------------------------------------
+//------------------------------SubVINode--------------------------------------
 // Vector subtract int
 class SubVINode : public VectorNode {
  public:
@@ -138,7 +138,7 @@ class SubVINode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------SubVLNode---------------------------------------
+//------------------------------SubVLNode--------------------------------------
 // Vector subtract long
 class SubVLNode : public VectorNode {
  public:
@@ -146,7 +146,7 @@ class SubVLNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------SubVFNode---------------------------------------
+//------------------------------SubVFNode--------------------------------------
 // Vector subtract float
 class SubVFNode : public VectorNode {
  public:
@@ -154,7 +154,7 @@ class SubVFNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------SubVDNode---------------------------------------
+//------------------------------SubVDNode--------------------------------------
 // Vector subtract double
 class SubVDNode : public VectorNode {
  public:
@@ -162,7 +162,7 @@ class SubVDNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------MulVSNode---------------------------------------
+//------------------------------MulVSNode--------------------------------------
 // Vector multiply short
 class MulVSNode : public VectorNode {
  public:
@@ -170,7 +170,7 @@ class MulVSNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------MulVINode---------------------------------------
+//------------------------------MulVINode--------------------------------------
 // Vector multiply int
 class MulVINode : public VectorNode {
  public:
@@ -178,7 +178,7 @@ class MulVINode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------MulVFNode---------------------------------------
+//------------------------------MulVFNode--------------------------------------
 // Vector multiply float
 class MulVFNode : public VectorNode {
  public:
@@ -186,7 +186,7 @@ class MulVFNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------MulVDNode---------------------------------------
+//------------------------------MulVDNode--------------------------------------
 // Vector multiply double
 class MulVDNode : public VectorNode {
  public:
@@ -194,7 +194,7 @@ class MulVDNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------DivVFNode---------------------------------------
+//------------------------------DivVFNode--------------------------------------
 // Vector divide float
 class DivVFNode : public VectorNode {
  public:
@@ -202,7 +202,7 @@ class DivVFNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------DivVDNode---------------------------------------
+//------------------------------DivVDNode--------------------------------------
 // Vector Divide double
 class DivVDNode : public VectorNode {
  public:
@@ -210,7 +210,7 @@ class DivVDNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------LShiftVBNode---------------------------------------
+//------------------------------LShiftVBNode-----------------------------------
 // Vector left shift bytes
 class LShiftVBNode : public VectorNode {
  public:
@@ -218,7 +218,7 @@ class LShiftVBNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------LShiftVSNode---------------------------------------
+//------------------------------LShiftVSNode-----------------------------------
 // Vector left shift shorts
 class LShiftVSNode : public VectorNode {
  public:
@@ -226,7 +226,7 @@ class LShiftVSNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------LShiftVINode---------------------------------------
+//------------------------------LShiftVINode-----------------------------------
 // Vector left shift ints
 class LShiftVINode : public VectorNode {
  public:
@@ -234,7 +234,7 @@ class LShiftVINode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------LShiftVLNode---------------------------------------
+//------------------------------LShiftVLNode-----------------------------------
 // Vector left shift longs
 class LShiftVLNode : public VectorNode {
  public:
@@ -242,7 +242,7 @@ class LShiftVLNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------RShiftVBNode---------------------------------------
+//------------------------------RShiftVBNode-----------------------------------
 // Vector right arithmetic (signed) shift bytes
 class RShiftVBNode : public VectorNode {
  public:
@@ -250,7 +250,7 @@ class RShiftVBNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------RShiftVSNode---------------------------------------
+//------------------------------RShiftVSNode-----------------------------------
 // Vector right arithmetic (signed) shift shorts
 class RShiftVSNode : public VectorNode {
  public:
@@ -258,7 +258,7 @@ class RShiftVSNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------RShiftVINode---------------------------------------
+//------------------------------RShiftVINode-----------------------------------
 // Vector right arithmetic (signed) shift ints
 class RShiftVINode : public VectorNode {
  public:
@@ -266,7 +266,7 @@ class RShiftVINode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------RShiftVLNode---------------------------------------
+//------------------------------RShiftVLNode-----------------------------------
 // Vector right arithmetic (signed) shift longs
 class RShiftVLNode : public VectorNode {
  public:
@@ -274,7 +274,7 @@ class RShiftVLNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------URShiftVBNode---------------------------------------
+//------------------------------URShiftVBNode----------------------------------
 // Vector right logical (unsigned) shift bytes
 class URShiftVBNode : public VectorNode {
  public:
@@ -282,7 +282,7 @@ class URShiftVBNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------URShiftVSNode---------------------------------------
+//------------------------------URShiftVSNode----------------------------------
 // Vector right logical (unsigned) shift shorts
 class URShiftVSNode : public VectorNode {
  public:
@@ -290,7 +290,7 @@ class URShiftVSNode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------URShiftVINode---------------------------------------
+//------------------------------URShiftVINode----------------------------------
 // Vector right logical (unsigned) shift ints
 class URShiftVINode : public VectorNode {
  public:
@@ -298,7 +298,7 @@ class URShiftVINode : public VectorNode {
   virtual int Opcode() const;
 };
 
-//------------------------------URShiftVLNode---------------------------------------
+//------------------------------URShiftVLNode----------------------------------
 // Vector right logical (unsigned) shift longs
 class URShiftVLNode : public VectorNode {
  public:
@@ -306,6 +306,24 @@ class URShiftVLNode : public VectorNode {
   virtual int Opcode() const;
 };
 
+//------------------------------LShiftCntVNode---------------------------------
+// Vector left shift count
+class LShiftCntVNode : public VectorNode {
+ public:
+  LShiftCntVNode(Node* cnt, const TypeVect* vt) : VectorNode(cnt,vt) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Matcher::vector_shift_count_ideal_reg(vect_type()->length_in_bytes()); }
+};
+
+//------------------------------RShiftCntVNode---------------------------------
+// Vector right shift count
+class RShiftCntVNode : public VectorNode {
+ public:
+  RShiftCntVNode(Node* cnt, const TypeVect* vt) : VectorNode(cnt,vt) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Matcher::vector_shift_count_ideal_reg(vect_type()->length_in_bytes()); }
+};
+
 
 //------------------------------AndVNode---------------------------------------
 // Vector and integer
@@ -452,7 +470,7 @@ class PackNode : public VectorNode {
   static PackNode* make(Compile* C, Node* s, uint vlen, BasicType bt);
 };
 
-//------------------------------PackBNode---------------------------------------
+//------------------------------PackBNode--------------------------------------
 // Pack byte scalars into vector
 class PackBNode : public PackNode {
  public:
@@ -460,7 +478,7 @@ class PackBNode : public PackNode {
   virtual int Opcode() const;
 };
 
-//------------------------------PackSNode---------------------------------------
+//------------------------------PackSNode--------------------------------------
 // Pack short scalars into a vector
 class PackSNode : public PackNode {
  public:
@@ -469,7 +487,7 @@ class PackSNode : public PackNode {
   virtual int Opcode() const;
 };
 
-//------------------------------PackINode---------------------------------------
+//------------------------------PackINode--------------------------------------
 // Pack integer scalars into a vector
 class PackINode : public PackNode {
  public:
@@ -478,7 +496,7 @@ class PackINode : public PackNode {
   virtual int Opcode() const;
 };
 
-//------------------------------PackLNode---------------------------------------
+//------------------------------PackLNode--------------------------------------
 // Pack long scalars into a vector
 class PackLNode : public PackNode {
  public:
@@ -487,7 +505,7 @@ class PackLNode : public PackNode {
   virtual int Opcode() const;
 };
 
-//------------------------------Pack2LNode--------------------------------------
+//------------------------------Pack2LNode-------------------------------------
 // Pack 2 long scalars into a vector
 class Pack2LNode : public PackNode {
  public:
@@ -495,7 +513,7 @@ class Pack2LNode : public PackNode {
   virtual int Opcode() const;
 };
 
-//------------------------------PackFNode---------------------------------------
+//------------------------------PackFNode--------------------------------------
 // Pack float scalars into vector
 class PackFNode : public PackNode {
  public:
@@ -504,7 +522,7 @@ class PackFNode : public PackNode {
   virtual int Opcode() const;
 };
 
-//------------------------------PackDNode---------------------------------------
+//------------------------------PackDNode--------------------------------------
 // Pack double scalars into a vector
 class PackDNode : public PackNode {
  public:
@@ -513,7 +531,7 @@ class PackDNode : public PackNode {
   virtual int Opcode() const;
 };
 
-//------------------------------Pack2DNode--------------------------------------
+//------------------------------Pack2DNode-------------------------------------
 // Pack 2 double scalars into a vector
 class Pack2DNode : public PackNode {
  public:
@@ -522,9 +540,9 @@ class Pack2DNode : public PackNode {
 };
 
 
-//========================Extract_Scalar_from_Vector===============================
+//========================Extract_Scalar_from_Vector===========================
 
-//------------------------------ExtractNode---------------------------------------
+//------------------------------ExtractNode------------------------------------
 // Extract a scalar from a vector at position "pos"
 class ExtractNode : public Node {
  public:
@@ -537,7 +555,7 @@ class ExtractNode : public Node {
   static Node* make(Compile* C, Node* v, uint position, BasicType bt);
 };
 
-//------------------------------ExtractBNode---------------------------------------
+//------------------------------ExtractBNode-----------------------------------
 // Extract a byte from a vector at position "pos"
 class ExtractBNode : public ExtractNode {
  public:
@@ -547,7 +565,7 @@ class ExtractBNode : public ExtractNode {
   virtual uint ideal_reg() const { return Op_RegI; }
 };
 
-//------------------------------ExtractUBNode--------------------------------------
+//------------------------------ExtractUBNode----------------------------------
 // Extract a boolean from a vector at position "pos"
 class ExtractUBNode : public ExtractNode {
  public:
@@ -557,7 +575,7 @@ class ExtractUBNode : public ExtractNode {
   virtual uint ideal_reg() const { return Op_RegI; }
 };
 
-//------------------------------ExtractCNode---------------------------------------
+//------------------------------ExtractCNode-----------------------------------
 // Extract a char from a vector at position "pos"
 class ExtractCNode : public ExtractNode {
  public:
@@ -567,7 +585,7 @@ class ExtractCNode : public ExtractNode {
   virtual uint ideal_reg() const { return Op_RegI; }
 };
 
-//------------------------------ExtractSNode---------------------------------------
+//------------------------------ExtractSNode-----------------------------------
 // Extract a short from a vector at position "pos"
 class ExtractSNode : public ExtractNode {
  public:
@@ -577,7 +595,7 @@ class ExtractSNode : public ExtractNode {
   virtual uint ideal_reg() const { return Op_RegI; }
 };
 
-//------------------------------ExtractINode---------------------------------------
+//------------------------------ExtractINode-----------------------------------
 // Extract an int from a vector at position "pos"
 class ExtractINode : public ExtractNode {
  public:
@@ -587,7 +605,7 @@ class ExtractINode : public ExtractNode {
   virtual uint ideal_reg() const { return Op_RegI; }
 };
 
-//------------------------------ExtractLNode---------------------------------------
+//------------------------------ExtractLNode-----------------------------------
 // Extract a long from a vector at position "pos"
 class ExtractLNode : public ExtractNode {
  public:
@@ -597,7 +615,7 @@ class ExtractLNode : public ExtractNode {
   virtual uint ideal_reg() const { return Op_RegL; }
 };
 
-//------------------------------ExtractFNode---------------------------------------
+//------------------------------ExtractFNode-----------------------------------
 // Extract a float from a vector at position "pos"
 class ExtractFNode : public ExtractNode {
  public:
@@ -607,7 +625,7 @@ class ExtractFNode : public ExtractNode {
   virtual uint ideal_reg() const { return Op_RegF; }
 };
 
-//------------------------------ExtractDNode---------------------------------------
+//------------------------------ExtractDNode-----------------------------------
 // Extract a double from a vector at position "pos"
 class ExtractDNode : public ExtractNode {
  public:
diff --git a/test/compiler/7200264/Test7200264.sh b/test/compiler/7200264/Test7200264.sh
index 71b68d0f6c50ebe3dcaf5f7e8fc9b6075d0c9345..4276a8f134b385f0da60ac62fd094ef8b41bd0af 100644
--- a/test/compiler/7200264/Test7200264.sh
+++ b/test/compiler/7200264/Test7200264.sh
@@ -77,6 +77,16 @@ then
     exit 0
 fi
 
+# grep for support integer multiply vectors (cpu with SSE4.1)
+${TESTJAVA}${FS}bin${FS}java ${TESTVMOPTS} -XX:+PrintMiscellaneous -XX:+Verbose -version | grep "cores per cpu" | grep "sse4.1"
+
+if [ $? != 0 ]
+then
+    SSE=2
+else
+    SSE=4
+fi
+
 cp ${TESTSRC}${FS}TestIntVect.java .
 ${TESTJAVA}${FS}bin${FS}javac -d . TestIntVect.java
 
@@ -97,6 +107,9 @@ then
     exit 1
 fi
 
+# MulVI is only supported with SSE4.1.
+if [ $SSE -gt 3 ]
+then
 # LShiftVI+SubVI is generated for test_mulc
 COUNT=`grep MulVI test.out | wc -l | awk '{print $1}'`
 if [ $COUNT -lt 2 ]
@@ -104,6 +117,7 @@ then
     echo "Test Failed: MulVI $COUNT < 2"
     exit 1
 fi
+fi
 
 COUNT=`grep AndV test.out | wc -l | awk '{print $1}'`
 if [ $COUNT -lt 3 ]
@@ -126,6 +140,7 @@ then
     exit 1
 fi
 
+# LShiftVI+SubVI is generated for test_mulc
 COUNT=`grep LShiftVI test.out | wc -l | awk '{print $1}'`
 if [ $COUNT -lt 5 ]
 then
@@ -133,11 +148,10 @@ then
     exit 1
 fi
 
-# RShiftVI + URShiftVI
-COUNT=`grep RShiftVI test.out | wc -l | awk '{print $1}'`
-if [ $COUNT -lt 6 ]
+COUNT=`grep RShiftVI test.out | sed '/URShiftVI/d' | wc -l | awk '{print $1}'`
+if [ $COUNT -lt 3 ]
 then
-    echo "Test Failed: RShiftVI $COUNT < 6"
+    echo "Test Failed: RShiftVI $COUNT < 3"
     exit 1
 fi