Reduce the threads number in the LSTM backward kernel to fix the error occurred in GPU GTX 1080.

1f53a72f · dangqingqing · c0005d58 · 1f53a72f
显示空白变更内容
内联并排

Showing with 6 addition and 1 deletion

paddle/operators/math/detail/lstm_gpu_kernel.h paddle/operators/math/detail/lstm_gpu_kernel.h +6 -1

未找到文件。
--- a/paddle/operators/math/detail/lstm_gpu_kernel.h
+++ b/paddle/operators/math/detail/lstm_gpu_kernel.h
@@ -227,7 +227,7 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op,
    grid = dim3(frameBlocks, 1);
  } else {
    /* framePerBlock = 32 batchPerBlock = 32 */
-    threads = dim3(32, 32);
+    threads = dim3(32, 16);
    grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
  }
@@ -244,6 +244,11 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op,
        op, value, grad, frameSize, batchSize, active_node, active_gate,
        active_state);
  }
+  cudaStreamSynchronize(stream);
+  // TODO(qingqing): Add cuda error check for each kernel.
+  cudaError_t err = cudaGetLastError();
+  PADDLE_ENFORCE_EQ(err, cudaGetErrorString(err));
 }
 }  // namespace detail