提交 65dbeb6a 编写于 作者: Q qijun

fix gpu build error

上级 9e3a9eb2
...@@ -25,9 +25,9 @@ Eigen::DefaultDevice* OpKernel::KernelContext::get_eigen_device< ...@@ -25,9 +25,9 @@ Eigen::DefaultDevice* OpKernel::KernelContext::get_eigen_device<
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
template <> template <>
DeviceType* OpKernel::KernelContext::get_eigen_device<platform::GPUPlace>() Eigen::GpuDevice* OpKernel::KernelContext::get_eigen_device<
const { platform::GPUPlace, Eigen::GpuDevice>() const {
return device_context_.get_eigen_device<DeviceType>(); return device_context_.get_eigen_device<Eigen::GpuDevice>();
} }
#endif #endif
......
...@@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy, ...@@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy,
int yoff = start + j; int yoff = start + j;
// transpose // transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0; x[yoff * width + xoff] : 0.0;
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ?
dy[yoff * width + xoff] : 0.0;
__syncthreads(); __syncthreads();
if (tidy < (context - 1)) { if (tidy < (context - 1)) {
yoff = yoff - context + 1; yoff = yoff - context + 1;
sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0; sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ?
dy[yoff * width + xoff] : 0.0;
} }
__syncthreads(); __syncthreads();
...@@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy, ...@@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy,
int yoff = start + j; int yoff = start + j;
// transpose // transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
x[yoff * width + xoff] : 0.0;
__syncthreads(); __syncthreads();
for (int t = 0; t < context; t++) { for (int t = 0; t < context; t++) {
sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0; sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start &&
yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
__syncthreads(); __syncthreads();
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx]; real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册