From 1f53a72f10c9d4781932d7d4a842a9993106a8d3 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Thu, 2 Nov 2017 00:21:04 +0800
Subject: [PATCH] Reduce the threads number in the LSTM backward kernel to fix
 the error occurred in GPU GTX 1080.

---
 paddle/operators/math/detail/lstm_gpu_kernel.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/paddle/operators/math/detail/lstm_gpu_kernel.h b/paddle/operators/math/detail/lstm_gpu_kernel.h
index d3e5e381a5..e07655eaac 100644
--- a/paddle/operators/math/detail/lstm_gpu_kernel.h
+++ b/paddle/operators/math/detail/lstm_gpu_kernel.h
@@ -227,7 +227,7 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op,
     grid = dim3(frameBlocks, 1);
   } else {
     /* framePerBlock = 32 batchPerBlock = 32 */
-    threads = dim3(32, 32);
+    threads = dim3(32, 16);
     grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
   }
 
@@ -244,6 +244,11 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op,
         op, value, grad, frameSize, batchSize, active_node, active_gate,
         active_state);
   }
+
+  cudaStreamSynchronize(stream);
+  // TODO(qingqing): Add cuda error check for each kernel.
+  cudaError_t err = cudaGetLastError();
+  PADDLE_ENFORCE_EQ(err, cudaGetErrorString(err));
 }
 
 }  // namespace detail
-- 
GitLab