diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index 0ef79667b8d66df8beaf512a95820e119816cbff..e78b0c03fcc756335a5adadaa08aa541a0b04942 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -22,6 +22,8 @@ limitations under the License. */ #include "paddle/fluid/operators/math/math_function.h" #ifdef PADDLE_WITH_CUDA #ifdef __NVCC__ +#include +#include #include "cub/cub.cuh" #endif #endif @@ -361,6 +363,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel { int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1); int theory_block = (width + blocks.x - 1) / blocks.x; dim3 grids(std::min(theory_block, max_blocks)); +#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) if (std::is_same::value && width < 2048 && width % 2 == 0 && height % 64 == 0) { auto &dev_ctx = @@ -378,6 +381,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel { width, height); return; } +#endif if (width / height < 32) { MatrixColReduce<<>>(