未验证 提交 c7cada85 编写于 作者: J Jack Zhou 提交者: GitHub

Fix gru performace decline in 1.8.5 (#29455)

上级 acce9621
......@@ -276,7 +276,7 @@ inline void forward_reset_output(
// use eigen
forward_reset_outputV2(*context, value, frame_size);
} else {
if (OpResetOutput::avx && (frame_size & static_cast<int>(8 - 1)) &&
if (OpResetOutput::avx && (frame_size > static_cast<int>(8 - 1)) &&
(sizeof(T) == 4)) {
hl_avx_gru_forward_reset_output(
op_reset_output, value.gate_value, value.reset_output_value,
......@@ -329,7 +329,7 @@ inline void forward_final_output(
// eigen
forward_final_outputV2(*context, value, frame_size);
} else {
if (OpFinalOutput::avx && (frame_size & static_cast<int>(8 - 1)) &&
if (OpFinalOutput::avx && (frame_size > static_cast<int>(8 - 1)) &&
(sizeof(T) == 4)) {
hl_avx_gru_forward_final_output(op_final_output, value.gate_value,
value.prev_out_value,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册