diff --git a/paddle/fluid/operators/dropout_impl.cu.h b/paddle/fluid/operators/dropout_impl.cu.h index 21fdf69ac570ac6972173d77194275d629ce436f..2fa956a2e6515e8b6a8e1c463c8ab8d1476f8d90 100644 --- a/paddle/fluid/operators/dropout_impl.cu.h +++ b/paddle/fluid/operators/dropout_impl.cu.h @@ -36,6 +36,7 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h" #include "paddle/fluid/platform/aligned_vector.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" +#include "paddle/phi/kernels/funcs/functors.h" namespace paddle { namespace operators { @@ -270,32 +271,38 @@ void DropoutGradGPUKernelDriver(const platform::CUDADeviceContext& dev_ctx, const Tensor& mask, int64_t size, Tensor* grad_x, bool is_test = false) { using MT = typename details::MPTypeTrait::Type; - auto dX = EigenVector::Flatten(*grad_x); - auto dY = EigenVector::Flatten(grad_y); - - auto& place = *dev_ctx.eigen_device(); + auto stream = dev_ctx.stream(); + MT factor; if (is_test) { if (dropout_implementation == "upscale_in_train") { - dX.device(place) = static_cast(1) * dY; + factor = static_cast(1.0f); } else { - dX.device(place) = dY * static_cast(1.0f - dropout_prob); + factor = static_cast(1.0f - dropout_prob); } + std::vector ins = {&grad_y}; + std::vector outs = {grad_x}; + auto functor = phi::funcs::ScaleFunctor(factor); + paddle::operators::LaunchSameDimsElementwiseCudaKernel(dev_ctx, ins, + &outs, functor); } else { - auto M = EigenVector::Flatten(mask); + std::vector ins = {&grad_y, &mask}; + std::vector outs = {grad_x}; if (dropout_implementation == "upscale_in_train") { if (dropout_prob == 1.0f) { - dX.device(place) = static_cast(0) * dY; +#ifdef PADDLE_WITH_HIP + hipMemset(grad_x->data(), 0, size * sizeof(T)); +#else + cudaMemset(grad_x->data(), 0, size * sizeof(T)); +#endif } else { - auto factor = static_cast(1.0f / (1.0f - dropout_prob)); - auto stream = dev_ctx.stream(); - std::vector ins = {&grad_y, &mask}; - std::vector outs = {grad_x}; - auto functor = CudaDropoutGradFunctor(factor); + factor = static_cast(1.0f / (1.0f - dropout_prob)); paddle::operators::LaunchSameDimsElementwiseCudaKernel( - dev_ctx, ins, &outs, functor); + dev_ctx, ins, &outs, CudaDropoutGradFunctor(factor)); } } else { - dX.device(place) = dY * M.cast(); + factor = static_cast(1.0f); + paddle::operators::LaunchSameDimsElementwiseCudaKernel( + dev_ctx, ins, &outs, CudaDropoutGradFunctor(factor)); } } } diff --git a/paddle/phi/kernels/funcs/functors.h b/paddle/phi/kernels/funcs/functors.h index 5657bb047d7aa3a9b0f65d845d03e04c5b3636ae..d518a877b26f2c3d295eb0ceda8d4b862006e633 100644 --- a/paddle/phi/kernels/funcs/functors.h +++ b/paddle/phi/kernels/funcs/functors.h @@ -38,12 +38,15 @@ struct AddGradFunctor { template struct ScaleFunctor { - explicit ScaleFunctor(const T coeff) : coeff_(coeff) {} + using MT = typename paddle::operators::details::MPTypeTrait::Type; + explicit ScaleFunctor(const MT coeff) : coeff_(coeff) {} - inline HOSTDEVICE T operator()(T ele) { return ele * coeff_; } + inline HOSTDEVICE T operator()(T ele) { + return static_cast(static_cast(ele) * coeff_); + } private: - T coeff_; + MT coeff_; }; template