未验证 提交 f350aa59 编写于 作者: W wangchaochaohu 提交者: GitHub

Fix the compiler error for half type (#29799)

上级 27aa1515
...@@ -22,6 +22,8 @@ limitations under the License. */ ...@@ -22,6 +22,8 @@ limitations under the License. */
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#ifdef __NVCC__ #ifdef __NVCC__
#include <cuda.h>
#include <cuda_fp16.h>
#include "cub/cub.cuh" #include "cub/cub.cuh"
#endif #endif
#endif #endif
...@@ -361,6 +363,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> { ...@@ -361,6 +363,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1); int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1);
int theory_block = (width + blocks.x - 1) / blocks.x; int theory_block = (width + blocks.x - 1) / blocks.x;
dim3 grids(std::min(theory_block, max_blocks)); dim3 grids(std::min(theory_block, max_blocks));
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
if (std::is_same<T, paddle::platform::float16>::value && width < 2048 && if (std::is_same<T, paddle::platform::float16>::value && width < 2048 &&
width % 2 == 0 && height % 64 == 0) { width % 2 == 0 && height % 64 == 0) {
auto &dev_ctx = auto &dev_ctx =
...@@ -378,6 +381,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> { ...@@ -378,6 +381,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
width, height); width, height);
return; return;
} }
#endif
if (width / height < 32) { if (width / height < 32) {
MatrixColReduce<T, block_x, block_y><<<grids, blocks, 0, stream>>>( MatrixColReduce<T, block_x, block_y><<<grids, blocks, 0, stream>>>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册