diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index 84fa0d6af990e22083ec1a0e3993893cefad1ab5..55662e1d0aad7a1edee800bc2875ea5574b45e6e 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -66,18 +66,23 @@ class CrossEntropyFunctor { int batch_size = prob->dims()[0]; int class_num = prob->dims()[1]; +#ifdef __HIPCC__ + constexpr int kMaxBlockDim = 256; +#else + constexpr int kMaxBlockDim = 512; +#endif if (softLabel) { const T* label_data = labels->data(); - int block = class_num > 512 - ? 512 + int block = class_num > kMaxBlockDim + ? kMaxBlockDim : pow(2, static_cast(std::log2(class_num))); SoftCrossEntropyKernel<<>>( loss_data, prob_data, label_data, class_num); } else { const int64_t* label_data = labels->data(); - int block = 512; + int block = kMaxBlockDim; int grid = (batch_size + block - 1) / block; CrossEntropyKernel<<>>( loss_data, prob_data, label_data, batch_size, class_num, diff --git a/paddle/fluid/operators/math/softmax.cu b/paddle/fluid/operators/math/softmax.cu index 879e367281c0a359288a440c0107a47b1bfda95e..9e9fe5b9c1020db9800b187c7d7fc8b156f66b21 100644 --- a/paddle/fluid/operators/math/softmax.cu +++ b/paddle/fluid/operators/math/softmax.cu @@ -54,10 +54,11 @@ void SoftmaxCUDNNFunctor::operator()( xDesc.descriptor(layout, cudnn_tensor_dims); miopenTensorDescriptor_t cudnn_y_desc = xDesc.descriptor(layout, cudnn_tensor_dims); - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSoftmaxForward( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSoftmaxForward_V2( context.cudnn_handle(), CudnnDataType::kOne(), cudnn_x_desc, X->data(), CudnnDataType::kZero(), cudnn_y_desc, - Y->mutable_data(context.GetPlace()))); + Y->mutable_data(context.GetPlace()), MIOPEN_SOFTMAX_ACCURATE, + MIOPEN_SOFTMAX_MODE_INSTANCE)); #else cudnnTensorDescriptor_t cudnn_x_desc = xDesc.descriptor(layout, cudnn_tensor_dims); @@ -96,11 +97,12 @@ void SoftmaxGradCUDNNFunctor::operator()( dxDesc.descriptor(layout, cudnn_tensor_dims); miopenTensorDescriptor_t cudnn_ygrad_desc = dyDesc.descriptor(layout, cudnn_tensor_dims); - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSoftmaxBackward( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSoftmaxBackward_V2( context.cudnn_handle(), CudnnDataType::kOne(), cudnn_y_desc, Y->data(), cudnn_ygrad_desc, YGrad->data(), CudnnDataType::kZero(), cudnn_xgrad_desc, - XGrad->mutable_data(context.GetPlace()))); + XGrad->mutable_data(context.GetPlace()), MIOPEN_SOFTMAX_ACCURATE, + MIOPEN_SOFTMAX_MODE_INSTANCE)); #else cudnnTensorDescriptor_t cudnn_y_desc = yDesc.descriptor(layout, cudnn_tensor_dims); diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index 2257d816d892186e428690e947e40243b88c7aed..140059256c3cc954a56dbae24804d446e7d46ce9 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -672,7 +672,11 @@ template static void SoftmaxWithCrossEntropyFusedKernel( const T* logits_data, const T* labels_data, T* softmax_data, T* loss_data, int64_t n, int64_t d, int axis_dim, gpuStream_t stream) { +#ifdef __HIPCC__ + constexpr int kMaxBlockDim = 256; +#else constexpr int kMaxBlockDim = 512; +#endif int64_t block_dim = axis_dim >= kMaxBlockDim ? kMaxBlockDim : (1 << static_cast(std::log2(axis_dim))); diff --git a/paddle/fluid/platform/dynload/miopen.h b/paddle/fluid/platform/dynload/miopen.h index 15de4c64e3e64569e231e5ad301453458645604b..05b1fc891a0bf991aa1f1931313b00d8c42eea24 100644 --- a/paddle/fluid/platform/dynload/miopen.h +++ b/paddle/fluid/platform/dynload/miopen.h @@ -116,7 +116,9 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(miopenPoolingForward); \ __macro(miopenPoolingBackward); \ __macro(miopenSoftmaxBackward); \ + __macro(miopenSoftmaxBackward_V2); \ __macro(miopenSoftmaxForward); \ + __macro(miopenSoftmaxForward_V2); \ __macro(miopenCreateDropoutDescriptor); \ __macro(miopenDestroyDropoutDescriptor); \ __macro(miopenRestoreDropoutDescriptor); \