Remove reduntant definition of MPTypeTrait. (#54756)

f469f176 · Yiqun Liu · GitHub · 80975d45 · f469f176 · f469f176
16 changed file
--- a/paddle/phi/kernels/funcs/dropout_impl.cu.h
+++ b/paddle/phi/kernels/funcs/dropout_impl.cu.h
@@ -40,7 +40,7 @@ namespace funcs {

 template <typename T>
 struct DstFunctor {
-  using MT = typename phi::kps::details::MPTypeTrait<T>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;

  HOSTDEVICE inline DstFunctor(const float retain_prob,
                               const bool is_upscale_in_train,
@@ -90,7 +90,7 @@ struct MaskFunctor {

 template <typename T>
 struct DstMaskFunctor {
-  using MT = typename phi::kps::details::MPTypeTrait<T>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;
  HOSTDEVICE inline DstMaskFunctor(const float retain_prob,
                                   const bool is_upscale_in_train)
      : retain_prob_(retain_prob), is_upscale_in_train_(is_upscale_in_train) {
@@ -386,7 +386,7 @@ void DropoutFwGPUKernelDriver(
      // y = x
      phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, y);
    } else {
-      using MT = typename phi::kps::details::MPTypeTrait<T>::Type;
+      using MT = typename phi::dtype::MPTypeTrait<T>::Type;
      MT factor = static_cast<MT>(1.0f - dropout_prob);
      // y = factor * x
      ScaleByDropoutFactor<T, MT>(dev_ctx, x, y, factor);
@@ -396,7 +396,7 @@ void DropoutFwGPUKernelDriver(

 template <typename T>
 struct CudaDropoutGradFunctor {
-  using MT = typename phi::kps::details::MPTypeTrait<T>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;

  explicit CudaDropoutGradFunctor(const MT factor) : factor_(factor) {}

@@ -419,7 +419,7 @@ void DropoutGradGPUKernelDriver(const phi::GPUContext& dev_ctx,
                                const phi::DenseTensor& mask,
                                phi::DenseTensor* grad_x,
                                bool is_dropout_nd = false) {
-  using MT = typename phi::kps::details::MPTypeTrait<T>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;

  auto stream = dev_ctx.stream();
  if (is_test) {

--- a/paddle/phi/kernels/funcs/reduce_function.h
+++ b/paddle/phi/kernels/funcs/reduce_function.h
@@ -1047,7 +1047,7 @@ void ReduceKernel(const KPDevice& dev_ctx,
  }
 #endif

-  using MPType = typename kps::details::MPTypeTrait<Ty>::Type;
+  using MPType = typename phi::dtype::MPTypeTrait<Ty>::Type;
  auto reducer = ReduceOp<MPType>();
  // launch ReduceHigherDimKernel
  // when reduce_dim.size() == 1 and reduce_dim[0] != x_dim.size() - 1, this

--- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu
+++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu
@@ -62,7 +62,7 @@ __global__ void FuseScaleAddGradRateZero(const T* grad,
 template <typename T1, typename T2 = T1, typename OutT = T1>
 struct NoMaskBwFunctor {
  const float retain_prob_;
-  using MT = typename phi::kps::details::MPTypeTrait<T1>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T1>::Type;
  MT factor_;
  HOSTDEVICE inline NoMaskBwFunctor(const float retain_prob)
      : retain_prob_(retain_prob) {
@@ -171,7 +171,7 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx,
  auto* y_grad_data = dev_ctx.template Alloc<T>(y_grad);

  const auto* out_grad_data = out_grad.data<T>();
-  using MT = typename phi::kps::details::MPTypeTrait<T>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;
  int blocks = NumBlocks(numel);
  int threads = kNumCUDAThreads;


--- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu
+++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu
@@ -29,7 +29,7 @@ template <typename T1, typename T2 = T1, typename OutT = T1>
 struct NoMaskFwFunctor {
  const float retain_prob_;
  const bool is_upscale_in_train_;
-  using MT = typename phi::kps::details::MPTypeTrait<T1>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T1>::Type;
  MT factor;
  HOSTDEVICE inline NoMaskFwFunctor(const float retain_prob,
                                    const bool is_upscale_in_train)
@@ -59,7 +59,7 @@ struct NoMaskFwFunctor {

 template <typename T>
 struct ScaleAddFuctor {
-  using MT = typename phi::kps::details::MPTypeTrait<T>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;
  explicit ScaleAddFuctor(const MT factor, bool upscale_in_train)
      : factor_(factor), upscale_in_train_(upscale_in_train) {}

@@ -206,7 +206,7 @@ void FusedDropoutAddKernel(const Context& dev_ctx,
                                       dst_functor);
 #undef PD_DROPOUT_KERNEL_NAME
  } else {
-    using MT = typename phi::kps::details::MPTypeTrait<T>::Type;
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
    MT factor = static_cast<MT>(1.0f - dropout_rate);
    std::vector<phi::DenseTensor*> outs = {out};
    std::vector<const phi::DenseTensor*> ins = {&x, &y};

--- a/paddle/phi/kernels/gpu/exponential_kernel.cu
+++ b/paddle/phi/kernels/gpu/exponential_kernel.cu
@@ -25,7 +25,7 @@ void ExponentialKernel(const Context &dev_ctx,
                       const DenseTensor &x,
                       float lambda,
                       DenseTensor *out) {
-  using MT = typename kps::details::MPTypeTrait<T>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;
  phi::funcs::uniform_distribution<MT> dist;
  phi::funcs::exponential_transform<MT> trans(lambda);
  phi::funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);

--- a/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
@@ -107,7 +107,7 @@ __global__ void GroupNormBackward(const T* x,
                                  int group_size,
                                  float epsilon,
                                  T* d_x) {
-  // using AccT = typename kps::details::MPTypeTrait<T>::Type;
+  // using AccT = typename phi::dtype::MPTypeTrait<T>::Type;

  int gid = blockIdx.y;
  int cid = blockIdx.x;
@@ -279,7 +279,7 @@ void GroupNormGradKernel(const Context& dev_ctx,
                         DenseTensor* d_x,
                         DenseTensor* d_scale,
                         DenseTensor* d_bias) {
-  using AccT = typename kps::details::MPTypeTrait<T>::Type;
+  using AccT = typename phi::dtype::MPTypeTrait<T>::Type;
  const DataLayout data_layout = phi::StringToDataLayout(data_layout_str);
  const auto scale_ptr = scale.get_ptr();
  const auto bias_ptr = bias.get_ptr();

--- a/paddle/phi/kernels/gpu/multinomial_kernel.cu
+++ b/paddle/phi/kernels/gpu/multinomial_kernel.cu
@@ -132,7 +132,7 @@ void MultinomialKernel(const Context& dev_ctx,
                       const Scalar& num_samples,
                       bool replacement,
                       DenseTensor* out) {
-  using MT = typename kps::details::MPTypeTrait<T>::Type;
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;

  auto int_num_samples = num_samples.to<int>();
  auto* in_data = x.data<T>();

--- a/paddle/phi/kernels/gpu/reduce.h
+++ b/paddle/phi/kernels/gpu/reduce.h
@@ -55,7 +55,7 @@ void Reduce(const KPDevice& dev_ctx,
        out_dtype,
        "ReduceKernel",
        ([&] {
-          using MPType = typename kps::details::MPTypeTrait<data_t>::Type;
+          using MPType = typename phi::dtype::MPTypeTrait<data_t>::Type;
          phi::funcs::ReduceKernel<data_t,
                                   data_t,
                                   ReduceOp,
@@ -68,7 +68,7 @@ void Reduce(const KPDevice& dev_ctx,
              is_mean);
        }));
  } else {
-    using MPType = typename kps::details::MPTypeTrait<T>::Type;
+    using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
    phi::funcs::ReduceKernel<T, T, ReduceOp, TransformOp<T, MPType>>(
        dev_ctx,
        x,
@@ -78,7 +78,7 @@ void Reduce(const KPDevice& dev_ctx,
        is_mean);
  }
 #else
-  using MPType = typename kps::details::MPTypeTrait<T>::Type;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
  phi::funcs::ReduceKernel<T, T, ReduceOp, TransformOp<T, MPType>>(
      dev_ctx,
      x,

--- a/paddle/phi/kernels/gpu/reduce_amin_amax_common.h
+++ b/paddle/phi/kernels/gpu/reduce_amin_amax_common.h
@@ -81,7 +81,7 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx,
  funcs::BroadcastKernel<T>(
      dev_ctx, equal_inputs, &equal_outputs, funcs::EqualFunctor<T>(), 0);
  // 2. equal_count = reduceSum(equal_out)
-  using MPType = typename kps::details::MPTypeTrait<T>::Type;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
  phi::funcs::
      ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T, MPType>>(
          dev_ctx,

--- a/paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu
@@ -52,7 +52,7 @@ void ReduceMeanGradKernel(const Context& dev_ctx,
  std::vector<const DenseTensor*> inputs = {&new_out_grad};
  std::vector<DenseTensor*> outputs = {x_grad};

-  using MPType = typename kps::details::MPTypeTrait<T>::Type;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
  funcs::BroadcastKernel<T>(
      dev_ctx, inputs, &outputs, kps::DivideFunctor<T, MPType>(reduce_num), 0);
 }

--- a/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu
@@ -47,7 +47,7 @@ void ReduceSumGradKernel(const Context& dev_ctx,

  // call ReduceGrad
  dev_ctx.Alloc(x_grad, x.dtype());
-  using MPType = typename kps::details::MPTypeTrait<T>::Type;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
  phi::ReduceGrad<kps::IdentityFunctor<T, MPType>>(
      dev_ctx,
      &new_out_grad,

--- a/paddle/phi/kernels/gpu/rrelu_kernel.cu
+++ b/paddle/phi/kernels/gpu/rrelu_kernel.cu
@@ -93,7 +93,7 @@ void RReluKernel(const Context& ctx,
    RReluTestCudaFunctor<T> functor(x_data, out_data, noise_data, mid_val);
    for_range(functor);
  } else {
-    using MT = typename kps::details::MPTypeTrait<T>::Type;
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
    funcs::uniform_distribution<MT> dist;
    funcs::uniform_real_transform<MT> trans(lower, upper);
    funcs::distribution_and_transform<T>(ctx, noise, dist, trans);

--- a/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
@@ -67,7 +67,7 @@ void UniformInplaceKernel(const Context& ctx,
  ctx.template Alloc<T>(out);
  if (seed == 0) {
    // Use global Generator seed
-    using MT = typename kps::details::MPTypeTrait<T>::Type;
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
    funcs::uniform_distribution<MT> dist;
    funcs::uniform_real_transform<MT> trans(min, max);
    funcs::distribution_and_transform<T>(ctx, out, dist, trans);

--- a/paddle/phi/kernels/gpu/uniform_kernel.cu
+++ b/paddle/phi/kernels/gpu/uniform_kernel.cu
@@ -65,7 +65,7 @@ void UniformKernel(const Context& dev_ctx,
  dev_ctx.template Alloc<T>(out);
  if (seed == 0) {
    // Use global Generator seed
-    using MT = typename kps::details::MPTypeTrait<T>::Type;
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
    funcs::uniform_distribution<MT> dist;
    funcs::uniform_real_transform<MT> trans(min.to<float>(), max.to<float>());
    funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);

--- a/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
+++ b/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
@@ -68,7 +68,7 @@ void UniformRawKernel(const Context& dev_ctx,
  dev_ctx.template Alloc<T>(out);
  if (seed == 0) {
    // Use global Generator seed
-    using MT = typename kps::details::MPTypeTrait<T>::Type;
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
    funcs::uniform_distribution<MT> dist;
    funcs::uniform_real_transform<MT> trans(min.to<float>(), max.to<float>());
    funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);

--- a/paddle/phi/kernels/primitive/compute_primitives.h
+++ b/paddle/phi/kernels/primitive/compute_primitives.h
@@ -22,7 +22,7 @@
 #endif

 #include "paddle/phi/backends/gpu/gpu_device_function.h"
-#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/amp_type_traits.h"

 namespace phi {
 namespace kps {
@@ -40,24 +40,6 @@ constexpr int kWarpSize = 32;
 // kLocalMode: thread reduce, each thread gets an output;
 enum ReduceMode { kGlobalMode, kLocalMode };

-template <typename T>
-class MPTypeTrait {
- public:
-  using Type = T;
-};
-
-template <>
-class MPTypeTrait<phi::dtype::float16> {
- public:
-  using Type = float;
-};
-
-template <>
-class MPTypeTrait<phi::dtype::bfloat16> {
- public:
-  using Type = float;
-};
-
 /**
 * @brief Will be used in BlockYReduce, get the index of reduce_num in shared
 * memory.