diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu index b9300f1b23b57281fb2392f4e907565039d1207e..8611249a29f636c07f915cfb2eda7069bbc99a38 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu @@ -29,7 +29,11 @@ namespace operators { using Tensor = framework::Tensor; +#ifdef __HIPCC__ +static constexpr int kNumCUDAThreads = 256; +#else static constexpr int kNumCUDAThreads = 512; +#endif static constexpr int kNumMaxinumNumBlocks = 4096; static inline int NumBlocks(const int N) { diff --git a/paddle/fluid/platform/cuda_helper.h b/paddle/fluid/platform/cuda_helper.h index 30c38236c5244984c75eee9eb88fb452410a20ac..fa4ef3f8c124e407a2494828d390e2c8d6c2c8ca 100644 --- a/paddle/fluid/platform/cuda_helper.h +++ b/paddle/fluid/platform/cuda_helper.h @@ -76,10 +76,17 @@ namespace platform { * */ +#ifdef __HIPCC__ +#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ + int64_t __index__ = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; \ + for (index_type i = __index__; __index__ < (num); \ + __index__ += hipBlockDim_x * hipGridDim_x, i = __index__) +#else #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ int64_t __index__ = blockIdx.x * blockDim.x + threadIdx.x; \ for (index_type i = __index__; __index__ < (num); \ __index__ += blockDim.x * gridDim.x, i = __index__) +#endif #define CUDA_KERNEL_LOOP(i, num) CUDA_KERNEL_LOOP_TYPE(i, num, int)