From d0a5620575a3ce94e0a7a5a20192e9307b0b9c93 Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Tue, 5 Jan 2021 11:06:13 +0800 Subject: [PATCH] fix the compiler error when gcc4 cuda9.0 (#29997) --- paddle/fluid/operators/elementwise/elementwise_add_op.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index 731cef3d366..41e97a39466 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -179,6 +179,7 @@ __global__ void MatrixColReduce(const T *__restrict__ in, T *__restrict__ out, } } +#if CUDA_VERSION >= 10000 template __global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in, __half2 *__restrict__ out, size_t width, @@ -199,6 +200,7 @@ __global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in, } #endif } +#endif template __global__ void MatrixReduceLongWidth(const T *__restrict__ in, T *out, @@ -365,6 +367,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel { int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1); int theory_block = (width + blocks.x - 1) / blocks.x; dim3 grids(std::min(theory_block, max_blocks)); +#if CUDA_VERSION >= 10000 if (std::is_same::value && width < 2048 && width % 2 == 0 && height % 64 == 0) { auto &dev_ctx = @@ -382,6 +385,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel { width, height); return; } +#endif if (width / height < 32) { MatrixColReduce<<>>( -- GitLab