From 42dd5da0fde79261af3c9bcf4f8fa716d515ef26 Mon Sep 17 00:00:00 2001
From: Markus Kliegl <klieglmarkus@baidu.com>
Date: Tue, 14 Nov 2017 04:23:52 +0000
Subject: [PATCH] conv shift: fix return before syncthreads

---
 paddle/operators/conv_shift_op.cu | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/paddle/operators/conv_shift_op.cu b/paddle/operators/conv_shift_op.cu
index 1db77657a03..2a157f457a6 100644
--- a/paddle/operators/conv_shift_op.cu
+++ b/paddle/operators/conv_shift_op.cu
@@ -62,19 +62,19 @@ __global__ void ConvShiftForward(const T *x, const T *y, T *out, int x_width,
   if (tx < num_x) {
     int load_i = (i - y_half_width + x_width) % x_width;
     sx[tx] = x[k * x_width + load_i];
-  } else {
-    return;
   }
   __syncthreads();
 
-  // Compute dot product of sx[tx:tx + y_width] and sy.
-  T sum = 0;
-  for (int j = 0; j < y_width; ++j) {
-    sum += sx[tx + j] * sy[j];
-  }
+  if (tx < num_x) {
+    // Compute dot product of sx[tx:tx + y_width] and sy.
+    T sum = 0;
+    for (int j = 0; j < y_width; ++j) {
+      sum += sx[tx + j] * sy[j];
+    }
 
-  // Save to out[k, i].
-  out[k * x_width + i] = sum;
+    // Save to out[k, i].
+    out[k * x_width + i] = sum;
+  }
 }
 
 // Compute x gradient - initial naive implementation with atomic add.
-- 
GitLab