fix gpu build error

65dbeb6a · qijun · 9e3a9eb2 · 65dbeb6a · 65dbeb6a
显示空白变更内容
内联并排

Showing with 16 addition and 11 deletion

paddle/framework/operator.cc paddle/framework/operator.cc +3 -3

paddle/function/RowConvOpGpu.cu paddle/function/RowConvOpGpu.cu +13 -8

未找到文件。
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -25,9 +25,9 @@ Eigen::DefaultDevice* OpKernel::KernelContext::get_eigen_device<
 #ifndef PADDLE_ONLY_CPU
 template <>
-DeviceType* OpKernel::KernelContext::get_eigen_device<platform::GPUPlace>()
+Eigen::GpuDevice* OpKernel::KernelContext::get_eigen_device<
-    const {
+    platform::GPUPlace, Eigen::GpuDevice>() const {
-  return device_context_.get_eigen_device<DeviceType>();
+  return device_context_.get_eigen_device<Eigen::GpuDevice>();
 }
 #endif

--- a/paddle/function/RowConvOpGpu.cu
+++ b/paddle/function/RowConvOpGpu.cu
@@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy,
      int yoff = start + j;
      // transpose
-      sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
+      sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
-      sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0;
+      x[yoff * width + xoff] : 0.0;
+      sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ?
+      dy[yoff * width + xoff] : 0.0;
      __syncthreads();
      if (tidy < (context - 1)) {
        yoff = yoff - context + 1;
-        sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0;
+        sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ?
+        dy[yoff * width + xoff] : 0.0;
      }
      __syncthreads();
@@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy,
      int yoff = start + j;
      // transpose
-      sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
+      sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
+      x[yoff * width + xoff] : 0.0;
      __syncthreads();
      for (int t = 0; t < context; t++) {
-        sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
+        sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start &&
+        yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
        __syncthreads();
        real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx];