[Pten] move paddle/operators/math/functors.h and compound_functors.h (#39514)

* move paddle/operators/math/functors.h * move paddle/operators/math/compound_functors.h

[Pten] move paddle/operators/math/functors.h and compound_functors.h (#39514)
* move paddle/operators/math/functors.h * move paddle/operators/math/compound_functors.h
0d46a108 · Feiyu Chan · GitHub · 70714d1b · 0d46a108 · 0d46a108
10 changed file
--- a/paddle/fluid/operators/center_loss_op.h
+++ b/paddle/fluid/operators/center_loss_op.h
@@ -20,8 +20,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/blas.h"
-#include "paddle/fluid/operators/math/functors.h"
 #include "paddle/fluid/platform/transform.h"
 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu
+++ b/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu
@@ -20,12 +20,11 @@ limitations under the License. */
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/fused/fused_dropout_act_bias.h"
 #include "paddle/fluid/operators/fused/fused_dropout_test.h"
-#include "paddle/fluid/operators/math/functors.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace details = paddle::operators::details;
-namespace math = paddle::operators::math;
 /**
 * @brief the unittest of fused_dropout_act_bias
@@ -283,12 +282,14 @@ static void BaseTest(const bool is_fp16 = false) {
 }
 TEST(FusedDropout, GPUFusedDorpoutActBias) {
-  BaseTest<float, math::ReluFunctor<float>, math::ReluGradFunctor<float>>();
+  BaseTest<float, pten::funcs::ReluFunctor<float>,
+           pten::funcs::ReluGradFunctor<float>>();
  BaseTest<float, paddle::operators::GeluFunctor<float>,
           paddle::operators::GeluGradFunctor<float>>();
 }
 TEST(FusedDropout, GPUFusedDropoutActBiasDouble) {
-  BaseTest<double, math::ReluFunctor<double>, math::ReluGradFunctor<double>>();
+  BaseTest<double, pten::funcs::ReluFunctor<double>,
+           pten::funcs::ReluGradFunctor<double>>();
  BaseTest<double, paddle::operators::GeluFunctor<double>,
           paddle::operators::GeluGradFunctor<double>>();
 }
@@ -296,15 +297,16 @@ TEST(FusedDropout, GPUFusedDropoutActBiasDouble) {
 // test fp16, For inference, check_grad is not required. ref: test_dropout_op.py
 TEST(FusedDropout, GPUFusedDropoutActBiasFp16) {
  using fp16 = platform::float16;
-  BaseTest<fp16, math::ReluFunctor<fp16>, math::ReluGradFunctor<fp16>>(true);
+  BaseTest<fp16, pten::funcs::ReluFunctor<fp16>,
+           pten::funcs::ReluGradFunctor<fp16>>(true);
 }
 TEST(FusedDropout, GPUFusedDropoutActBiasIsUpscaleInTrain) {
  const int rows = 16;
  const int cols = 16;
  for (auto is_upscale_in_train : {true, false}) {
-    TestFusedDropoutActBias<float, math::ReluFunctor<float>,
+    TestFusedDropoutActBias<float, pten::funcs::ReluFunctor<float>,
-                            math::ReluGradFunctor<float>>
+                            pten::funcs::ReluGradFunctor<float>>
        test(rows, cols, 0, 1.0, is_upscale_in_train, false);
    test.Run();
    test.CheckOut(static_cast<float>(1e-5));
@@ -315,8 +317,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasIsUpscaleInTrain) {
 TEST(FusedDropout, GPUFusedDropoutActBiasIsTest) {
  const int rows = 16;
  const int cols = 16;
-  TestFusedDropoutActBias<float, math::ReluFunctor<float>,
+  TestFusedDropoutActBias<float, pten::funcs::ReluFunctor<float>,
-                          math::ReluGradFunctor<float>>
+                          pten::funcs::ReluGradFunctor<float>>
      test(rows, cols, 0, 0.35, true, true);
  test.Run();
  test.CheckOut(static_cast<float>(1e-5));
@@ -326,8 +328,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasIsTest) {
 TEST(FusedDropout, GPUFusedDropoutActBiasSeed) {
  const int rows = 16;
  const int cols = 16;
-  TestFusedDropoutActBias<float, math::ReluFunctor<float>,
+  TestFusedDropoutActBias<float, pten::funcs::ReluFunctor<float>,
-                          math::ReluGradFunctor<float>>
+                          pten::funcs::ReluGradFunctor<float>>
      test(rows, cols, 125, 0.0, false, false);
  test.Run();
  test.CheckOut(static_cast<float>(1e-5));
@@ -337,8 +339,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasSeed) {
 TEST(FusedDropout, GPUFusedDropoutActBiasLargeShape) {
  const int rows = 256;
  const int cols = 4096;
-  TestFusedDropoutActBias<float, math::ReluFunctor<float>,
+  TestFusedDropoutActBias<float, pten::funcs::ReluFunctor<float>,
-                          math::ReluGradFunctor<float>>
+                          pten::funcs::ReluGradFunctor<float>>
      test(rows, cols);
  test.Run();
  test.CheckOut(static_cast<float>(1e-5));

--- a/paddle/fluid/operators/fused/fused_dropout_common.h
+++ b/paddle/fluid/operators/fused/fused_dropout_common.h
@@ -21,12 +21,12 @@ limitations under the License. */
 #include "paddle/fluid/memory/memory.h"
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/layer_norm_kernel.cu.h"
-#include "paddle/fluid/operators/math/functors.h"
 #include "paddle/fluid/platform/aligned_vector.h"
 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
 #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/fused/fused_dropout_helper.h
+++ b/paddle/fluid/operators/fused/fused_dropout_helper.h
@@ -19,7 +19,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/fused/fused_dropout_act_bias.h"
 #include "paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h"
 #include "paddle/fluid/operators/fused/fused_residual_dropout_bias.h"
-#include "paddle/fluid/operators/math/functors.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 namespace paddle {
 namespace operators {
@@ -167,8 +167,8 @@ class FusedDropoutHelper {
          dropout_param_.dropout_prob, dropout_param_.is_upscale_in_train,
          dropout_param_.is_test, src, bias, out, mask, ctx);
    } else if (act_method == "relu") {
-      math::ReluFunctor<T> relu;
+      pten::funcs::ReluFunctor<T> relu;
-      LaunchDropoutActBias<T, MaskType, math::ReluFunctor<T>>(
+      LaunchDropoutActBias<T, MaskType, pten::funcs::ReluFunctor<T>>(
          relu, dropout_param_.seed, rows_, cols_, increment,
          dropout_param_.dropout_prob, dropout_param_.is_upscale_in_train,
          dropout_param_.is_test, src, bias, out, mask, ctx);
@@ -187,8 +187,8 @@ class FusedDropoutHelper {
          gelu_grad, dout, mask, src, bias, dropout_param_.dropout_prob,
          dropout_param_.is_upscale_in_train, rows_, cols_, d_src, d_bias, ctx);
    } else if (act_method == "relu") {
-      math::ReluGradFunctor<T> relu_grad;
+      pten::funcs::ReluGradFunctor<T> relu_grad;
-      LaunchDropoutActBiasGrad<T, MaskType, math::ReluGradFunctor<T>>(
+      LaunchDropoutActBiasGrad<T, MaskType, pten::funcs::ReluGradFunctor<T>>(
          relu_grad, dout, mask, src, bias, dropout_param_.dropout_prob,
          dropout_param_.is_upscale_in_train, rows_, cols_, d_src, d_bias, ctx);
    } else {

--- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
+++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
--- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h
+++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h
@@ -122,12 +122,12 @@ __global__ void FusedLayernormResidualDropoutBias(
  __shared__ U shared_mean[32];
  __shared__ U shared_var[32];
-  math::ReluFunctor<T> relu;
+  pten::funcs::ReluFunctor<T> relu;
  U mean_val = 0;
  U var_val = 0;
  for (int i = col_id * VecSize; i < cols; i += blockDim.x * VecSize) {
    FusedResidualDropoutBiasOneThread<T, MaskType, VecSize, true, false,
-                                      math::ReluFunctor<T>>(
+                                      pten::funcs::ReluFunctor<T>>(
        row_id, i, cols, &state, dropout_prob, factor, src, residual, bias, dst,
        mask, is_test, &mean_val, &var_val, relu);
  }

--- a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h
+++ b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h
@@ -115,12 +115,12 @@ __global__ void FusedResidualDropoutBias(
  curandStatePhilox4_32_10_t state;
  curand_init(seed, idx, increment, &state);
  const T factor = GetFactor<T>(dropout_prob, is_upscale_in_train, is_test);
-  math::ReluFunctor<T> relu;
+  pten::funcs::ReluFunctor<T> relu;
  for (int r = row_id; r < rows; r += blockDim.y * gridDim.y) {
    for (int i = col_id * VecSize; i < cols;
         i += blockDim.x * gridDim.x * VecSize) {
      FusedResidualDropoutBiasOneThread<T, MaskType, VecSize, false, false,
-                                        math::ReluFunctor<T>>(
+                                        pten::funcs::ReluFunctor<T>>(
          r, i, cols, &state, dropout_prob, factor, src, residual, bias, dst,
          mask, is_test, nullptr, nullptr, relu);
    }

--- a/paddle/fluid/operators/log_softmax_op.cu
+++ b/paddle/fluid/operators/log_softmax_op.cu
@@ -15,8 +15,9 @@
 #include <limits>
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/log_softmax_op.h"
-#include "paddle/fluid/operators/math/functors.h"
 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
+#include "paddle/pten/kernels/funcs/elementwise_functor.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 namespace paddle {
 namespace operators {
@@ -213,15 +214,15 @@ __global__ void LogSoftmaxForwardCUDAKernelNotLastAxis(
      for (int d = threadIdx.x; d < dim_size; d += blockDim.x) {
        const AccT value =
            static_cast<AccT>(input[data_offset + d * dim_stride]);
-        max_value = math::MaxFunctor<AccT>()(max_value, value);
+        max_value = pten::funcs::MaxFunctor<AccT>()(max_value, value);
      }
      // If there are more than 1 threads along block x, reduce all max_values
      // and get the global max_value, which is the max value along "axis".
      // If there is only one thread along block x, no need to reduce, as the
      // 'max_value' is the global max_value.
      if (blockDim.x > 1) {
-        max_value =
+        max_value = BlockReduceAlongDimX<AccT, pten::funcs::MaxFunctor>(
-            BlockReduceAlongDimX<AccT, math::MaxFunctor>(sdata, max_value);
+            sdata, max_value);
      }
      // 2. reduce sum
@@ -232,7 +233,7 @@ __global__ void LogSoftmaxForwardCUDAKernelNotLastAxis(
                        max_value);
      }
      if (blockDim.x > 1) {
-        sum = BlockReduceAlongDimX<AccT, math::AddFunctor>(sdata, sum);
+        sum = BlockReduceAlongDimX<AccT, pten::funcs::AddFunctor>(sdata, sum);
      }
      // 3. input-max-log_sum and write to output

--- a/paddle/fluid/operators/math/compound_functors.h
+++ b/paddle/fluid/operators/math/compound_functors.h
@@ -18,9 +18,8 @@ limitations under the License. */
 #include <unordered_set>
 #include <vector>
-namespace paddle {
+namespace pten {
-namespace operators {
+namespace funcs {
-namespace math {
 // Z = BinaryFunctor(X, UnaryFunctor(Y))
 template <typename T, typename BinaryFunctor, typename UnaryFunctor>
@@ -69,8 +68,8 @@ struct BinaryCompoundGradDxFunctor {
    return dout * d_binary_fun_.Dx(x, unary_fun_(y));
  }
-  inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out,
+  inline HOSTDEVICE T
-                                         T dout) {
+  UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) {
    return dout * d_binary_fun_.Dx(x, intermediate_out);
  }
@@ -82,8 +81,11 @@ struct BinaryCompoundGradDxFunctor {
 };
 // Z = BinaryFunctor(X, UnaryFunctor(Y))
-template <typename T, typename DBinaryFun, typename UnaryFun,
+template <typename T,
-          typename DUnaryFun, bool InPlace>
+          typename DBinaryFun,
+          typename UnaryFun,
+          typename DUnaryFun,
+          bool InPlace>
 struct BinaryCompoundGradDyFunctor {
  BinaryCompoundGradDyFunctor(const DBinaryFun &d_binary_fun,
                              const UnaryFun &unary_fun,
@@ -96,8 +98,8 @@ struct BinaryCompoundGradDyFunctor {
    return dout * d_binary_fun_.Dy(x, unary_fun_(y)) * d_unary_fun_.UseX(y);
  }
-  inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out,
+  inline HOSTDEVICE T
-                                         T dout) {
+  UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) {
    if (InPlace) {
      return dout * d_binary_fun_.Dy(x, intermediate_out) *
             d_unary_fun_.UseOut(intermediate_out);
@@ -116,8 +118,11 @@ struct BinaryCompoundGradDyFunctor {
 };
 // Z = UnaryFunctor(BinaryFunctor(X, Y))
-template <typename T, typename DUnaryFun, typename BinaryFun,
+template <typename T,
-          typename DBinaryFun, bool InPlace>
+          typename DUnaryFun,
+          typename BinaryFun,
+          typename DBinaryFun,
+          bool InPlace>
 struct UnaryCompoundGradDxFunctor {
  UnaryCompoundGradDxFunctor(const DUnaryFun &d_unary_fun,
                             const BinaryFun &binary_fun,
@@ -136,8 +141,8 @@ struct UnaryCompoundGradDxFunctor {
    return base * d_binary_fun_.Dx(x, y);
  }
-  inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out,
+  inline HOSTDEVICE T
-                                         T dout) {
+  UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) {
    T base;
    if (InPlace) {
      base = dout * d_unary_fun_.UseOut(out);
@@ -156,8 +161,11 @@ struct UnaryCompoundGradDxFunctor {
 };
 // Z = UnaryFunctor(BinaryFunctor(X, Y))
-template <typename T, typename DUnaryFun, typename BinaryFun,
+template <typename T,
-          typename DBinaryFun, bool InPlace>
+          typename DUnaryFun,
+          typename BinaryFun,
+          typename DBinaryFun,
+          bool InPlace>
 struct UnaryCompoundGradDyFunctor {
  UnaryCompoundGradDyFunctor(const DUnaryFun &d_unary_fun,
                             const BinaryFun &binary_fun,
@@ -176,8 +184,8 @@ struct UnaryCompoundGradDyFunctor {
    return base * d_binary_fun_.Dy(x, y);
  }
-  inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out,
+  inline HOSTDEVICE T
-                                         T dout) {
+  UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) {
    T base;
    if (InPlace) {
      base = dout * d_unary_fun_.UseOut(out);
@@ -206,7 +214,9 @@ struct BinaryCompoundGradDIntermedaiteOutFunctor {
    return dout * d_binary_fun_.Dy(x, unary_fun_(y));
  }
-  inline HOSTDEVICE T UseIntermediateOut(T x, T intermediate_out, T out,
+  inline HOSTDEVICE T UseIntermediateOut(T x,
+                                         T intermediate_out,
+                                         T out,
                                         T dout) {
    return dout * d_binary_fun_.Dy(x, intermediate_out);
  }
@@ -233,7 +243,9 @@ struct UnaryCompoundGradDIntermediateFunctor {
    }
  }
-  inline HOSTDEVICE T UseIntermediateOut(T x, T intermediate_out, T out,
+  inline HOSTDEVICE T UseIntermediateOut(T x,
+                                         T intermediate_out,
+                                         T out,
                                         T dout) {
    if (InPlace) {
      return dout * d_unary_fun_.UseOut(out);
@@ -249,6 +261,5 @@ struct UnaryCompoundGradDIntermediateFunctor {
  BinaryFun binary_fun_;
 };
-}  // namespace math
+}  // namespace funcs
-}  // namespace operators
+}  // namespace pten
-}  // namespace paddle
--- a/paddle/fluid/operators/math/functors.h
+++ b/paddle/fluid/operators/math/functors.h
@@ -17,16 +17,17 @@ limitations under the License. */
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/math.h"
-namespace paddle {
+namespace pten {
-namespace operators {
+namespace funcs {
-namespace math {
+// // MulFunctor
-// MulFunctor
+// // NOTE(chenfeiyu): IT IS NOLONGER USED, use pten::funcs::MultiplyFunctor
-template <typename T>
+// instead
-struct MulFunctor {
+// template <typename T>
-  // out = x * y;
+// struct MulFunctor {
-  inline HOSTDEVICE T operator()(T x, T y) { return x * y; }
+//   // out = x * y;
-};
+//   inline HOSTDEVICE T operator()(T x, T y) { return x * y; }
+// };
 template <typename T>
 struct MulGradFunctor {
@@ -34,12 +35,13 @@ struct MulGradFunctor {
  inline HOSTDEVICE T Dy(T x, T y) { return x; }
 };
-// AddFunctor
+// // AddFunctor
-template <typename T>
+// // NOTE(chenfeiyu): IT IS NOLONGER USED, use pten::funcs::AddFunctor instead
-struct AddFunctor {
+// template <typename T>
-  // out = x + y;
+// struct AddFunctor {
-  inline HOSTDEVICE T operator()(T x, T y) { return x + y; }
+//   // out = x + y;
-};
+//   inline HOSTDEVICE T operator()(T x, T y) { return x + y; }
+// };
 template <typename T>
 struct MaxFunctor {
@@ -102,7 +104,8 @@ struct TanhFunctor {
    // y = 2 / (1 + e^-2x) - 1
    T t0 = static_cast<T>(2) * x;
    T t1 = (t0 < kMin) ? kMin : ((t0 > kMax) ? kMax : t0);
-    return static_cast<T>(2) / (static_cast<T>(1) + real_exp(-t1)) -
+    return static_cast<T>(2) /
+               (static_cast<T>(1) + paddle::operators::real_exp(-t1)) -
           static_cast<T>(1);
  }
 };
@@ -123,7 +126,8 @@ struct SigmoidFunctor {
  inline HOSTDEVICE T operator()(T x) {
    // y = 1 / (1 + e^-x)
    T tmp = (x < kMin) ? kMin : ((x > kMax) ? kMax : x);
-    return static_cast<T>(1) / (static_cast<T>(1) + real_exp(-tmp));
+    return static_cast<T>(1) /
+           (static_cast<T>(1) + paddle::operators::real_exp(-tmp));
  }
 };
@@ -138,7 +142,7 @@ struct SigmoidGradFunctor {
 template <typename T>
 struct GeluFunctor {
-  using MT = typename details::MPTypeTrait<T>::Type;
+  using MT = typename paddle::operators::details::MPTypeTrait<T>::Type;
  inline HOSTDEVICE T operator()(T x) {
    // this function is tanh approximation of gelu
    // actual gelu is:
@@ -154,7 +158,7 @@ struct GeluFunctor {
 template <typename T>
 struct GeluGradFunctor {
-  using MT = typename details::MPTypeTrait<T>::Type;
+  using MT = typename paddle::operators::details::MPTypeTrait<T>::Type;
  inline HOSTDEVICE T UseX(T x) {
    MT mx = static_cast<MT>(x);
    MT tanh_out =
@@ -193,6 +197,5 @@ struct GeluGradFunctor {
  }
 };
-}  // namespace math
+}  // namespace funcs
-}  // namespace operators
+}  // namespace pten
-}  // namespace paddle