From 9e06a6414878fdc287472bd8d93a633c58403381 Mon Sep 17 00:00:00 2001 From: ronnywang <524019753@qq.com> Date: Fri, 2 Apr 2021 13:43:27 +0800 Subject: [PATCH] [ROCM] fix softmax_with_cross_entropy_op (#31982) --- paddle/fluid/operators/math/cross_entropy.cu | 11 ++++++++--- paddle/fluid/operators/math/softmax.cu | 10 ++++++---- .../fluid/operators/softmax_with_cross_entropy_op.cu | 4 ++++ paddle/fluid/platform/dynload/miopen.h | 2 ++ 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index 84fa0d6af9..55662e1d0a 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -66,18 +66,23 @@ class CrossEntropyFunctor { int batch_size = prob->dims()[0]; int class_num = prob->dims()[1]; +#ifdef __HIPCC__ + constexpr int kMaxBlockDim = 256; +#else + constexpr int kMaxBlockDim = 512; +#endif if (softLabel) { const T* label_data = labels->data(); - int block = class_num > 512 - ? 512 + int block = class_num > kMaxBlockDim + ? kMaxBlockDim : pow(2, static_cast(std::log2(class_num))); SoftCrossEntropyKernel<<>>( loss_data, prob_data, label_data, class_num); } else { const int64_t* label_data = labels->data(); - int block = 512; + int block = kMaxBlockDim; int grid = (batch_size + block - 1) / block; CrossEntropyKernel<<>>( loss_data, prob_data, label_data, batch_size, class_num, diff --git a/paddle/fluid/operators/math/softmax.cu b/paddle/fluid/operators/math/softmax.cu index 879e367281..9e9fe5b9c1 100644 --- a/paddle/fluid/operators/math/softmax.cu +++ b/paddle/fluid/operators/math/softmax.cu @@ -54,10 +54,11 @@ void SoftmaxCUDNNFunctor::operator()( xDesc.descriptor(layout, cudnn_tensor_dims); miopenTensorDescriptor_t cudnn_y_desc = xDesc.descriptor(layout, cudnn_tensor_dims); - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSoftmaxForward( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSoftmaxForward_V2( context.cudnn_handle(), CudnnDataType::kOne(), cudnn_x_desc, X->data(), CudnnDataType::kZero(), cudnn_y_desc, - Y->mutable_data(context.GetPlace()))); + Y->mutable_data(context.GetPlace()), MIOPEN_SOFTMAX_ACCURATE, + MIOPEN_SOFTMAX_MODE_INSTANCE)); #else cudnnTensorDescriptor_t cudnn_x_desc = xDesc.descriptor(layout, cudnn_tensor_dims); @@ -96,11 +97,12 @@ void SoftmaxGradCUDNNFunctor::operator()( dxDesc.descriptor(layout, cudnn_tensor_dims); miopenTensorDescriptor_t cudnn_ygrad_desc = dyDesc.descriptor(layout, cudnn_tensor_dims); - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSoftmaxBackward( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::miopenSoftmaxBackward_V2( context.cudnn_handle(), CudnnDataType::kOne(), cudnn_y_desc, Y->data(), cudnn_ygrad_desc, YGrad->data(), CudnnDataType::kZero(), cudnn_xgrad_desc, - XGrad->mutable_data(context.GetPlace()))); + XGrad->mutable_data(context.GetPlace()), MIOPEN_SOFTMAX_ACCURATE, + MIOPEN_SOFTMAX_MODE_INSTANCE)); #else cudnnTensorDescriptor_t cudnn_y_desc = yDesc.descriptor(layout, cudnn_tensor_dims); diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index 2257d816d8..140059256c 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -672,7 +672,11 @@ template static void SoftmaxWithCrossEntropyFusedKernel( const T* logits_data, const T* labels_data, T* softmax_data, T* loss_data, int64_t n, int64_t d, int axis_dim, gpuStream_t stream) { +#ifdef __HIPCC__ + constexpr int kMaxBlockDim = 256; +#else constexpr int kMaxBlockDim = 512; +#endif int64_t block_dim = axis_dim >= kMaxBlockDim ? kMaxBlockDim : (1 << static_cast(std::log2(axis_dim))); diff --git a/paddle/fluid/platform/dynload/miopen.h b/paddle/fluid/platform/dynload/miopen.h index 15de4c64e3..05b1fc891a 100644 --- a/paddle/fluid/platform/dynload/miopen.h +++ b/paddle/fluid/platform/dynload/miopen.h @@ -116,7 +116,9 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(miopenPoolingForward); \ __macro(miopenPoolingBackward); \ __macro(miopenSoftmaxBackward); \ + __macro(miopenSoftmaxBackward_V2); \ __macro(miopenSoftmaxForward); \ + __macro(miopenSoftmaxForward_V2); \ __macro(miopenCreateDropoutDescriptor); \ __macro(miopenDestroyDropoutDescriptor); \ __macro(miopenRestoreDropoutDescriptor); \ -- GitLab