From 0276021b6b68683afaca255af35b59b8144f88ff Mon Sep 17 00:00:00 2001 From: kvn Date: Fri, 17 Apr 2015 17:39:19 -0700 Subject: [PATCH] 8078113: 8011102 changes may cause incorrect results Summary: replace Vzeroupper instruction in stubs with zeroing only used ymm registers. Reviewed-by: kvn Contributed-by: sandhya.viswanathan@intel.com --- src/cpu/x86/vm/macroAssembler_x86.cpp | 16 +++++++++++----- src/cpu/x86/vm/stubGenerator_x86_32.cpp | 3 ++- src/cpu/x86/vm/stubGenerator_x86_64.cpp | 6 ++++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp index 5857a9350..29115a27c 100644 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -6690,7 +6690,7 @@ void MacroAssembler::string_compare(Register str1, Register str2, subl(cnt2, stride2); jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); // compare wide vectors tail bind(COMPARE_WIDE_TAIL); @@ -6705,7 +6705,7 @@ void MacroAssembler::string_compare(Register str1, Register str2, // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. bind(VECTOR_NOT_EQUAL); // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); lea(str1, Address(str1, result, scale)); lea(str2, Address(str2, result, scale)); jmp(COMPARE_16_CHARS); @@ -6964,7 +6964,8 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist bind(DONE); if (UseAVX >= 2) { // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); + vpxor(vec2, vec2); } } @@ -7098,7 +7099,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, BIND(L_check_fill_8_bytes); // clean upper bits of YMM registers - vzeroupper(); + movdl(xtmp, value); + pshufd(xtmp, xtmp, 0); } else { // Fill 32-byte chunks pshufd(xtmp, xtmp, 0); @@ -7261,7 +7263,11 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, bind(L_copy_16_chars_exit); if (UseAVX >= 2) { // clean upper bits of YMM registers - vzeroupper(); + vpxor(tmp2Reg, tmp2Reg); + vpxor(tmp3Reg, tmp3Reg); + vpxor(tmp4Reg, tmp4Reg); + movdl(tmp1Reg, tmp5); + pshufd(tmp1Reg, tmp1Reg, 0); } subptr(len, 8); jccb(Assembler::greater, L_copy_8_chars_exit); diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp index 1622fe5ff..5387ba184 100644 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp @@ -837,7 +837,8 @@ class StubGenerator: public StubCodeGenerator { if (UseUnalignedLoadStores && (UseAVX >= 2)) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } __ addl(qword_count, 8); __ jccb(Assembler::zero, L_exit); diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 0000146f5..0bb6118d7 100644 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -1328,7 +1328,8 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_end); if (UseAVX >= 2) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } } else { // Copy 32-bytes per iteration @@ -1405,7 +1406,8 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_end); if (UseAVX >= 2) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } } else { // Copy 32-bytes per iteration -- GitLab