未验证 提交 c7cada85 编写于 作者: J Jack Zhou 提交者: GitHub

Fix gru performace decline in 1.8.5 (#29455)

上级 acce9621
...@@ -276,7 +276,7 @@ inline void forward_reset_output( ...@@ -276,7 +276,7 @@ inline void forward_reset_output(
// use eigen // use eigen
forward_reset_outputV2(*context, value, frame_size); forward_reset_outputV2(*context, value, frame_size);
} else { } else {
if (OpResetOutput::avx && (frame_size & static_cast<int>(8 - 1)) && if (OpResetOutput::avx && (frame_size > static_cast<int>(8 - 1)) &&
(sizeof(T) == 4)) { (sizeof(T) == 4)) {
hl_avx_gru_forward_reset_output( hl_avx_gru_forward_reset_output(
op_reset_output, value.gate_value, value.reset_output_value, op_reset_output, value.gate_value, value.reset_output_value,
...@@ -329,7 +329,7 @@ inline void forward_final_output( ...@@ -329,7 +329,7 @@ inline void forward_final_output(
// eigen // eigen
forward_final_outputV2(*context, value, frame_size); forward_final_outputV2(*context, value, frame_size);
} else { } else {
if (OpFinalOutput::avx && (frame_size & static_cast<int>(8 - 1)) && if (OpFinalOutput::avx && (frame_size > static_cast<int>(8 - 1)) &&
(sizeof(T) == 4)) { (sizeof(T) == 4)) {
hl_avx_gru_forward_final_output(op_final_output, value.gate_value, hl_avx_gru_forward_final_output(op_final_output, value.gate_value,
value.prev_out_value, value.prev_out_value,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册