未验证 提交 d0a56205 编写于 作者: W wangchaochaohu 提交者: GitHub

fix the compiler error when gcc4 cuda9.0 (#29997)

上级 1fa863da
...@@ -179,6 +179,7 @@ __global__ void MatrixColReduce(const T *__restrict__ in, T *__restrict__ out, ...@@ -179,6 +179,7 @@ __global__ void MatrixColReduce(const T *__restrict__ in, T *__restrict__ out,
} }
} }
#if CUDA_VERSION >= 10000
template <int SIZE> template <int SIZE>
__global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in, __global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in,
__half2 *__restrict__ out, size_t width, __half2 *__restrict__ out, size_t width,
...@@ -199,6 +200,7 @@ __global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in, ...@@ -199,6 +200,7 @@ __global__ void VecFP16MatrixColReduce(const __half2 *__restrict__ in,
} }
#endif #endif
} }
#endif
template <typename T> template <typename T>
__global__ void MatrixReduceLongWidth(const T *__restrict__ in, T *out, __global__ void MatrixReduceLongWidth(const T *__restrict__ in, T *out,
...@@ -365,6 +367,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> { ...@@ -365,6 +367,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1); int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1);
int theory_block = (width + blocks.x - 1) / blocks.x; int theory_block = (width + blocks.x - 1) / blocks.x;
dim3 grids(std::min(theory_block, max_blocks)); dim3 grids(std::min(theory_block, max_blocks));
#if CUDA_VERSION >= 10000
if (std::is_same<T, paddle::platform::float16>::value && width < 2048 && if (std::is_same<T, paddle::platform::float16>::value && width < 2048 &&
width % 2 == 0 && height % 64 == 0) { width % 2 == 0 && height % 64 == 0) {
auto &dev_ctx = auto &dev_ctx =
...@@ -382,6 +385,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> { ...@@ -382,6 +385,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
width, height); width, height);
return; return;
} }
#endif
if (width / height < 32) { if (width / height < 32) {
MatrixColReduce<T, block_x, block_y><<<grids, blocks, 0, stream>>>( MatrixColReduce<T, block_x, block_y><<<grids, blocks, 0, stream>>>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册