提交 0276021b 编写于 作者: K kvn

8078113: 8011102 changes may cause incorrect results

Summary: replace Vzeroupper instruction in stubs with zeroing only used ymm registers.
Reviewed-by: kvn
Contributed-by: sandhya.viswanathan@intel.com
上级 c2a5051f
...@@ -6690,7 +6690,7 @@ void MacroAssembler::string_compare(Register str1, Register str2, ...@@ -6690,7 +6690,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
subl(cnt2, stride2); subl(cnt2, stride2);
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
// clean upper bits of YMM registers // clean upper bits of YMM registers
vzeroupper(); vpxor(vec1, vec1);
// compare wide vectors tail // compare wide vectors tail
bind(COMPARE_WIDE_TAIL); bind(COMPARE_WIDE_TAIL);
...@@ -6705,7 +6705,7 @@ void MacroAssembler::string_compare(Register str1, Register str2, ...@@ -6705,7 +6705,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind(VECTOR_NOT_EQUAL); bind(VECTOR_NOT_EQUAL);
// clean upper bits of YMM registers // clean upper bits of YMM registers
vzeroupper(); vpxor(vec1, vec1);
lea(str1, Address(str1, result, scale)); lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale)); lea(str2, Address(str2, result, scale));
jmp(COMPARE_16_CHARS); jmp(COMPARE_16_CHARS);
...@@ -6964,7 +6964,8 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist ...@@ -6964,7 +6964,8 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist
bind(DONE); bind(DONE);
if (UseAVX >= 2) { if (UseAVX >= 2) {
// clean upper bits of YMM registers // clean upper bits of YMM registers
vzeroupper(); vpxor(vec1, vec1);
vpxor(vec2, vec2);
} }
} }
...@@ -7098,7 +7099,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, ...@@ -7098,7 +7099,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
BIND(L_check_fill_8_bytes); BIND(L_check_fill_8_bytes);
// clean upper bits of YMM registers // clean upper bits of YMM registers
vzeroupper(); movdl(xtmp, value);
pshufd(xtmp, xtmp, 0);
} else { } else {
// Fill 32-byte chunks // Fill 32-byte chunks
pshufd(xtmp, xtmp, 0); pshufd(xtmp, xtmp, 0);
...@@ -7261,7 +7263,11 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, ...@@ -7261,7 +7263,11 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
bind(L_copy_16_chars_exit); bind(L_copy_16_chars_exit);
if (UseAVX >= 2) { if (UseAVX >= 2) {
// clean upper bits of YMM registers // clean upper bits of YMM registers
vzeroupper(); vpxor(tmp2Reg, tmp2Reg);
vpxor(tmp3Reg, tmp3Reg);
vpxor(tmp4Reg, tmp4Reg);
movdl(tmp1Reg, tmp5);
pshufd(tmp1Reg, tmp1Reg, 0);
} }
subptr(len, 8); subptr(len, 8);
jccb(Assembler::greater, L_copy_8_chars_exit); jccb(Assembler::greater, L_copy_8_chars_exit);
......
...@@ -837,7 +837,8 @@ class StubGenerator: public StubCodeGenerator { ...@@ -837,7 +837,8 @@ class StubGenerator: public StubCodeGenerator {
if (UseUnalignedLoadStores && (UseAVX >= 2)) { if (UseUnalignedLoadStores && (UseAVX >= 2)) {
// clean upper bits of YMM registers // clean upper bits of YMM registers
__ vzeroupper(); __ vpxor(xmm0, xmm0);
__ vpxor(xmm1, xmm1);
} }
__ addl(qword_count, 8); __ addl(qword_count, 8);
__ jccb(Assembler::zero, L_exit); __ jccb(Assembler::zero, L_exit);
......
...@@ -1328,7 +1328,8 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1328,7 +1328,8 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_end); __ BIND(L_end);
if (UseAVX >= 2) { if (UseAVX >= 2) {
// clean upper bits of YMM registers // clean upper bits of YMM registers
__ vzeroupper(); __ vpxor(xmm0, xmm0);
__ vpxor(xmm1, xmm1);
} }
} else { } else {
// Copy 32-bytes per iteration // Copy 32-bytes per iteration
...@@ -1405,7 +1406,8 @@ class StubGenerator: public StubCodeGenerator { ...@@ -1405,7 +1406,8 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_end); __ BIND(L_end);
if (UseAVX >= 2) { if (UseAVX >= 2) {
// clean upper bits of YMM registers // clean upper bits of YMM registers
__ vzeroupper(); __ vpxor(xmm0, xmm0);
__ vpxor(xmm1, xmm1);
} }
} else { } else {
// Copy 32-bytes per iteration // Copy 32-bytes per iteration
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册