提交 1f53a72f 编写于 作者: D dangqingqing

Reduce the threads number in the LSTM backward kernel to fix the error occurred in GPU GTX 1080.

上级 c0005d58
...@@ -227,7 +227,7 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op, ...@@ -227,7 +227,7 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op,
grid = dim3(frameBlocks, 1); grid = dim3(frameBlocks, 1);
} else { } else {
/* framePerBlock = 32 batchPerBlock = 32 */ /* framePerBlock = 32 batchPerBlock = 32 */
threads = dim3(32, 32); threads = dim3(32, 16);
grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32); grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
} }
...@@ -244,6 +244,11 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op, ...@@ -244,6 +244,11 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op,
op, value, grad, frameSize, batchSize, active_node, active_gate, op, value, grad, frameSize, batchSize, active_node, active_gate,
active_state); active_state);
} }
cudaStreamSynchronize(stream);
// TODO(qingqing): Add cuda error check for each kernel.
cudaError_t err = cudaGetLastError();
PADDLE_ENFORCE_EQ(err, cudaGetErrorString(err));
} }
} // namespace detail } // namespace detail
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册