diff --git a/paddle/phi/kernels/funcs/dropout_impl.cu.h b/paddle/phi/kernels/funcs/dropout_impl.cu.h index 48a7008463c964ecfddae8335c82cea502ed2cea..a1fc2c225ecf2a82ebfcd7669f9973c33fe1b2f2 100644 --- a/paddle/phi/kernels/funcs/dropout_impl.cu.h +++ b/paddle/phi/kernels/funcs/dropout_impl.cu.h @@ -40,7 +40,7 @@ namespace funcs { template struct DstFunctor { - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; HOSTDEVICE inline DstFunctor(const float retain_prob, const bool is_upscale_in_train, @@ -90,7 +90,7 @@ struct MaskFunctor { template struct DstMaskFunctor { - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; HOSTDEVICE inline DstMaskFunctor(const float retain_prob, const bool is_upscale_in_train) : retain_prob_(retain_prob), is_upscale_in_train_(is_upscale_in_train) { @@ -386,7 +386,7 @@ void DropoutFwGPUKernelDriver( // y = x phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, y); } else { - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; MT factor = static_cast(1.0f - dropout_prob); // y = factor * x ScaleByDropoutFactor(dev_ctx, x, y, factor); @@ -396,7 +396,7 @@ void DropoutFwGPUKernelDriver( template struct CudaDropoutGradFunctor { - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; explicit CudaDropoutGradFunctor(const MT factor) : factor_(factor) {} @@ -419,7 +419,7 @@ void DropoutGradGPUKernelDriver(const phi::GPUContext& dev_ctx, const phi::DenseTensor& mask, phi::DenseTensor* grad_x, bool is_dropout_nd = false) { - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; auto stream = dev_ctx.stream(); if (is_test) { diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h index bf110fcdd9ea4c32a3a3080b44a88da78478f86f..5e738d431dfa6007320342082b8bf8e90ac7a848 100644 --- a/paddle/phi/kernels/funcs/reduce_function.h +++ b/paddle/phi/kernels/funcs/reduce_function.h @@ -1047,7 +1047,7 @@ void ReduceKernel(const KPDevice& dev_ctx, } #endif - using MPType = typename kps::details::MPTypeTrait::Type; + using MPType = typename phi::dtype::MPTypeTrait::Type; auto reducer = ReduceOp(); // launch ReduceHigherDimKernel // when reduce_dim.size() == 1 and reduce_dim[0] != x_dim.size() - 1, this diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu index 6ea21f3bd48596430c8f721abe2cc916fd701a35..dce2f8e5247e7f6639cd6a8a8ec2d90a206f2fdd 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu @@ -62,7 +62,7 @@ __global__ void FuseScaleAddGradRateZero(const T* grad, template struct NoMaskBwFunctor { const float retain_prob_; - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; MT factor_; HOSTDEVICE inline NoMaskBwFunctor(const float retain_prob) : retain_prob_(retain_prob) { @@ -171,7 +171,7 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx, auto* y_grad_data = dev_ctx.template Alloc(y_grad); const auto* out_grad_data = out_grad.data(); - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; int blocks = NumBlocks(numel); int threads = kNumCUDAThreads; diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu index afdef3f4b58fa4917c27c30c087b5e00828c338e..3cb1a6742543a6bbc0292efdc2776495e7f92765 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu @@ -29,7 +29,7 @@ template struct NoMaskFwFunctor { const float retain_prob_; const bool is_upscale_in_train_; - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; MT factor; HOSTDEVICE inline NoMaskFwFunctor(const float retain_prob, const bool is_upscale_in_train) @@ -59,7 +59,7 @@ struct NoMaskFwFunctor { template struct ScaleAddFuctor { - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; explicit ScaleAddFuctor(const MT factor, bool upscale_in_train) : factor_(factor), upscale_in_train_(upscale_in_train) {} @@ -206,7 +206,7 @@ void FusedDropoutAddKernel(const Context& dev_ctx, dst_functor); #undef PD_DROPOUT_KERNEL_NAME } else { - using MT = typename phi::kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; MT factor = static_cast(1.0f - dropout_rate); std::vector outs = {out}; std::vector ins = {&x, &y}; diff --git a/paddle/phi/kernels/gpu/exponential_kernel.cu b/paddle/phi/kernels/gpu/exponential_kernel.cu index 7d6e1d54d1e37ab4508b20640c0e17114b66d0c2..3a29e1dd4a2d7a81ff3fb70fb9b376ff2b5ab937 100644 --- a/paddle/phi/kernels/gpu/exponential_kernel.cu +++ b/paddle/phi/kernels/gpu/exponential_kernel.cu @@ -25,7 +25,7 @@ void ExponentialKernel(const Context &dev_ctx, const DenseTensor &x, float lambda, DenseTensor *out) { - using MT = typename kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; phi::funcs::uniform_distribution dist; phi::funcs::exponential_transform trans(lambda); phi::funcs::distribution_and_transform(dev_ctx, out, dist, trans); diff --git a/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu index 3cbd1d8191cf51be69766da94e97aeeb5ebfdc89..a9980f805f869144e86a5521d03a32ca665d02cf 100644 --- a/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu @@ -107,7 +107,7 @@ __global__ void GroupNormBackward(const T* x, int group_size, float epsilon, T* d_x) { - // using AccT = typename kps::details::MPTypeTrait::Type; + // using AccT = typename phi::dtype::MPTypeTrait::Type; int gid = blockIdx.y; int cid = blockIdx.x; @@ -279,7 +279,7 @@ void GroupNormGradKernel(const Context& dev_ctx, DenseTensor* d_x, DenseTensor* d_scale, DenseTensor* d_bias) { - using AccT = typename kps::details::MPTypeTrait::Type; + using AccT = typename phi::dtype::MPTypeTrait::Type; const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); const auto scale_ptr = scale.get_ptr(); const auto bias_ptr = bias.get_ptr(); diff --git a/paddle/phi/kernels/gpu/multinomial_kernel.cu b/paddle/phi/kernels/gpu/multinomial_kernel.cu index 039a5e2c8b9a3cf4cdb528ee90e073ba6c269592..effc963cd0a3d70c6063946a43edc84de213d809 100644 --- a/paddle/phi/kernels/gpu/multinomial_kernel.cu +++ b/paddle/phi/kernels/gpu/multinomial_kernel.cu @@ -132,7 +132,7 @@ void MultinomialKernel(const Context& dev_ctx, const Scalar& num_samples, bool replacement, DenseTensor* out) { - using MT = typename kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; auto int_num_samples = num_samples.to(); auto* in_data = x.data(); diff --git a/paddle/phi/kernels/gpu/reduce.h b/paddle/phi/kernels/gpu/reduce.h index 5ceb81eabd8cd56a72b9f66785e81e4adffdff9f..cc3cad38f46fbdf9154adea7eb82955600bee230 100644 --- a/paddle/phi/kernels/gpu/reduce.h +++ b/paddle/phi/kernels/gpu/reduce.h @@ -55,7 +55,7 @@ void Reduce(const KPDevice& dev_ctx, out_dtype, "ReduceKernel", ([&] { - using MPType = typename kps::details::MPTypeTrait::Type; + using MPType = typename phi::dtype::MPTypeTrait::Type; phi::funcs::ReduceKernel::Type; + using MPType = typename phi::dtype::MPTypeTrait::Type; phi::funcs::ReduceKernel>( dev_ctx, x, @@ -78,7 +78,7 @@ void Reduce(const KPDevice& dev_ctx, is_mean); } #else - using MPType = typename kps::details::MPTypeTrait::Type; + using MPType = typename phi::dtype::MPTypeTrait::Type; phi::funcs::ReduceKernel>( dev_ctx, x, diff --git a/paddle/phi/kernels/gpu/reduce_amin_amax_common.h b/paddle/phi/kernels/gpu/reduce_amin_amax_common.h index 04befb29b2de11d64754d656473b705e6e2c0470..fb0eace755ed50f8f901c49417ac764376522db8 100644 --- a/paddle/phi/kernels/gpu/reduce_amin_amax_common.h +++ b/paddle/phi/kernels/gpu/reduce_amin_amax_common.h @@ -81,7 +81,7 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx, funcs::BroadcastKernel( dev_ctx, equal_inputs, &equal_outputs, funcs::EqualFunctor(), 0); // 2. equal_count = reduceSum(equal_out) - using MPType = typename kps::details::MPTypeTrait::Type; + using MPType = typename phi::dtype::MPTypeTrait::Type; phi::funcs:: ReduceKernel>( dev_ctx, diff --git a/paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu b/paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu index 13683af9cb9c8113a76a23aa70ceff3e9fc71111..ccf95042b402b4e090094b002955c92efcebfde9 100644 --- a/paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu @@ -52,7 +52,7 @@ void ReduceMeanGradKernel(const Context& dev_ctx, std::vector inputs = {&new_out_grad}; std::vector outputs = {x_grad}; - using MPType = typename kps::details::MPTypeTrait::Type; + using MPType = typename phi::dtype::MPTypeTrait::Type; funcs::BroadcastKernel( dev_ctx, inputs, &outputs, kps::DivideFunctor(reduce_num), 0); } diff --git a/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu b/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu index 3e88506f723d35d89f5d6231f51ed65c31ce7f4c..8083fb1ab2d0f8b600ea5b1f95177a5133d27800 100644 --- a/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu @@ -47,7 +47,7 @@ void ReduceSumGradKernel(const Context& dev_ctx, // call ReduceGrad dev_ctx.Alloc(x_grad, x.dtype()); - using MPType = typename kps::details::MPTypeTrait::Type; + using MPType = typename phi::dtype::MPTypeTrait::Type; phi::ReduceGrad>( dev_ctx, &new_out_grad, diff --git a/paddle/phi/kernels/gpu/rrelu_kernel.cu b/paddle/phi/kernels/gpu/rrelu_kernel.cu index b15e525a3bcce3f37e36201e79a39ef515677c49..78b8696bd107e840fe50f76b6997622da0dffca9 100644 --- a/paddle/phi/kernels/gpu/rrelu_kernel.cu +++ b/paddle/phi/kernels/gpu/rrelu_kernel.cu @@ -93,7 +93,7 @@ void RReluKernel(const Context& ctx, RReluTestCudaFunctor functor(x_data, out_data, noise_data, mid_val); for_range(functor); } else { - using MT = typename kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; funcs::uniform_distribution dist; funcs::uniform_real_transform trans(lower, upper); funcs::distribution_and_transform(ctx, noise, dist, trans); diff --git a/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu index 5c3a886ad87e97ad0eb8a2b3568d331f1c1016a7..653a64b127a254596ea9c8da8a29fa6e21e5854b 100644 --- a/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu +++ b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu @@ -67,7 +67,7 @@ void UniformInplaceKernel(const Context& ctx, ctx.template Alloc(out); if (seed == 0) { // Use global Generator seed - using MT = typename kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; funcs::uniform_distribution dist; funcs::uniform_real_transform trans(min, max); funcs::distribution_and_transform(ctx, out, dist, trans); diff --git a/paddle/phi/kernels/gpu/uniform_kernel.cu b/paddle/phi/kernels/gpu/uniform_kernel.cu index 1ba5847fa29327190448ae4b1284f15117b967a7..04217db0a74c1a4362eee79b3a2227216c268a42 100644 --- a/paddle/phi/kernels/gpu/uniform_kernel.cu +++ b/paddle/phi/kernels/gpu/uniform_kernel.cu @@ -65,7 +65,7 @@ void UniformKernel(const Context& dev_ctx, dev_ctx.template Alloc(out); if (seed == 0) { // Use global Generator seed - using MT = typename kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; funcs::uniform_distribution dist; funcs::uniform_real_transform trans(min.to(), max.to()); funcs::distribution_and_transform(dev_ctx, out, dist, trans); diff --git a/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu b/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu index 211c7accf6fc462849b4c5b722abc1d49f270bf7..609238435c96f1e035bf311cf059f0ddae89879d 100644 --- a/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu @@ -68,7 +68,7 @@ void UniformRawKernel(const Context& dev_ctx, dev_ctx.template Alloc(out); if (seed == 0) { // Use global Generator seed - using MT = typename kps::details::MPTypeTrait::Type; + using MT = typename phi::dtype::MPTypeTrait::Type; funcs::uniform_distribution dist; funcs::uniform_real_transform trans(min.to(), max.to()); funcs::distribution_and_transform(dev_ctx, out, dist, trans); diff --git a/paddle/phi/kernels/primitive/compute_primitives.h b/paddle/phi/kernels/primitive/compute_primitives.h index 24b961abb9ba47290b01f5766b015e178f7b449e..30c2636a2bde9188c31722d5daef16624b560307 100644 --- a/paddle/phi/kernels/primitive/compute_primitives.h +++ b/paddle/phi/kernels/primitive/compute_primitives.h @@ -22,7 +22,7 @@ #endif #include "paddle/phi/backends/gpu/gpu_device_function.h" -#include "paddle/phi/common/float16.h" +#include "paddle/phi/common/amp_type_traits.h" namespace phi { namespace kps { @@ -40,24 +40,6 @@ constexpr int kWarpSize = 32; // kLocalMode: thread reduce, each thread gets an output; enum ReduceMode { kGlobalMode, kLocalMode }; -template -class MPTypeTrait { - public: - using Type = T; -}; - -template <> -class MPTypeTrait { - public: - using Type = float; -}; - -template <> -class MPTypeTrait { - public: - using Type = float; -}; - /** * @brief Will be used in BlockYReduce, get the index of reduce_num in shared * memory.