From 0d46a1085f58d20e8b9d3693172d5739e73cc08d Mon Sep 17 00:00:00 2001
From: Feiyu Chan <chenfeiyu@baidu.com>
Date: Tue, 15 Feb 2022 15:25:05 +0800
Subject: [PATCH] [Pten] move paddle/operators/math/functors.h and
 compound_functors.h (#39514)

* move paddle/operators/math/functors.h
* move paddle/operators/math/compound_functors.h
---
 paddle/fluid/operators/center_loss_op.h       |   2 +-
 .../fused/fused_dropout_act_bias_test.cu      |  28 +-
 .../operators/fused/fused_dropout_common.h    |   2 +-
 .../operators/fused/fused_dropout_helper.h    |  10 +-
 .../fused/fused_elemwise_activation_op.h      | 257 ++++++++----------
 .../fused_layernorm_residual_dropout_bias.h   |   4 +-
 .../fused/fused_residual_dropout_bias.h       |   4 +-
 paddle/fluid/operators/log_softmax_op.cu      |  11 +-
 .../kernels/funcs}/compound_functors.h        |  55 ++--
 .../math => pten/kernels/funcs}/functors.h    |  49 ++--
 10 files changed, 208 insertions(+), 214 deletions(-)
 mode change 100755 => 100644 paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu
 rename paddle/{fluid/operators/math => pten/kernels/funcs}/compound_functors.h (86%)
 rename paddle/{fluid/operators/math => pten/kernels/funcs}/functors.h (85%)
diff --git a/paddle/fluid/operators/center_loss_op.h b/paddle/fluid/operators/center_loss_op.h
index f134bd0cd3c..565b1cee9f7 100644
--- a/paddle/fluid/operators/center_loss_op.h
+++ b/paddle/fluid/operators/center_loss_op.h
@@ -20,8 +20,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/blas.h"
-#include "paddle/fluid/operators/math/functors.h"
 #include "paddle/fluid/platform/transform.h"
+
 namespace paddle {
 namespace operators {
 
diff --git a/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu b/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu
old mode 100755
new mode 100644
index 0adbf0be4e2..e34335e8597
--- a/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu
+++ b/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu
@@ -20,12 +20,11 @@ limitations under the License. */
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/fused/fused_dropout_act_bias.h"
 #include "paddle/fluid/operators/fused/fused_dropout_test.h"
-#include "paddle/fluid/operators/math/functors.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 
 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace details = paddle::operators::details;
-namespace math = paddle::operators::math;
 
 /**
  * @brief the unittest of fused_dropout_act_bias
@@ -283,12 +282,14 @@ static void BaseTest(const bool is_fp16 = false) {
 }
 
 TEST(FusedDropout, GPUFusedDorpoutActBias) {
-  BaseTest<float, math::ReluFunctor<float>, math::ReluGradFunctor<float>>();
+  BaseTest<float, pten::funcs::ReluFunctor<float>,
+           pten::funcs::ReluGradFunctor<float>>();
   BaseTest<float, paddle::operators::GeluFunctor<float>,
            paddle::operators::GeluGradFunctor<float>>();
 }
 TEST(FusedDropout, GPUFusedDropoutActBiasDouble) {
-  BaseTest<double, math::ReluFunctor<double>, math::ReluGradFunctor<double>>();
+  BaseTest<double, pten::funcs::ReluFunctor<double>,
+           pten::funcs::ReluGradFunctor<double>>();
   BaseTest<double, paddle::operators::GeluFunctor<double>,
            paddle::operators::GeluGradFunctor<double>>();
 }
@@ -296,15 +297,16 @@ TEST(FusedDropout, GPUFusedDropoutActBiasDouble) {
 // test fp16, For inference, check_grad is not required. ref: test_dropout_op.py
 TEST(FusedDropout, GPUFusedDropoutActBiasFp16) {
   using fp16 = platform::float16;
-  BaseTest<fp16, math::ReluFunctor<fp16>, math::ReluGradFunctor<fp16>>(true);
+  BaseTest<fp16, pten::funcs::ReluFunctor<fp16>,
+           pten::funcs::ReluGradFunctor<fp16>>(true);
 }
 
 TEST(FusedDropout, GPUFusedDropoutActBiasIsUpscaleInTrain) {
   const int rows = 16;
   const int cols = 16;
   for (auto is_upscale_in_train : {true, false}) {
-    TestFusedDropoutActBias<float, math::ReluFunctor<float>,
-                            math::ReluGradFunctor<float>>
+    TestFusedDropoutActBias<float, pten::funcs::ReluFunctor<float>,
+                            pten::funcs::ReluGradFunctor<float>>
         test(rows, cols, 0, 1.0, is_upscale_in_train, false);
     test.Run();
     test.CheckOut(static_cast<float>(1e-5));
@@ -315,8 +317,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasIsUpscaleInTrain) {
 TEST(FusedDropout, GPUFusedDropoutActBiasIsTest) {
   const int rows = 16;
   const int cols = 16;
-  TestFusedDropoutActBias<float, math::ReluFunctor<float>,
-                          math::ReluGradFunctor<float>>
+  TestFusedDropoutActBias<float, pten::funcs::ReluFunctor<float>,
+                          pten::funcs::ReluGradFunctor<float>>
       test(rows, cols, 0, 0.35, true, true);
   test.Run();
   test.CheckOut(static_cast<float>(1e-5));
@@ -326,8 +328,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasIsTest) {
 TEST(FusedDropout, GPUFusedDropoutActBiasSeed) {
   const int rows = 16;
   const int cols = 16;
-  TestFusedDropoutActBias<float, math::ReluFunctor<float>,
-                          math::ReluGradFunctor<float>>
+  TestFusedDropoutActBias<float, pten::funcs::ReluFunctor<float>,
+                          pten::funcs::ReluGradFunctor<float>>
       test(rows, cols, 125, 0.0, false, false);
   test.Run();
   test.CheckOut(static_cast<float>(1e-5));
@@ -337,8 +339,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasSeed) {
 TEST(FusedDropout, GPUFusedDropoutActBiasLargeShape) {
   const int rows = 256;
   const int cols = 4096;
-  TestFusedDropoutActBias<float, math::ReluFunctor<float>,
-                          math::ReluGradFunctor<float>>
+  TestFusedDropoutActBias<float, pten::funcs::ReluFunctor<float>,
+                          pten::funcs::ReluGradFunctor<float>>
       test(rows, cols);
   test.Run();
   test.CheckOut(static_cast<float>(1e-5));
diff --git a/paddle/fluid/operators/fused/fused_dropout_common.h b/paddle/fluid/operators/fused/fused_dropout_common.h
index eb651e4ea7b..b21a5fb8219 100644
--- a/paddle/fluid/operators/fused/fused_dropout_common.h
+++ b/paddle/fluid/operators/fused/fused_dropout_common.h
@@ -21,12 +21,12 @@ limitations under the License. */
 #include "paddle/fluid/memory/memory.h"
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/layer_norm_kernel.cu.h"
-#include "paddle/fluid/operators/math/functors.h"
 #include "paddle/fluid/platform/aligned_vector.h"
 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
 #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/fused/fused_dropout_helper.h b/paddle/fluid/operators/fused/fused_dropout_helper.h
index 782c5d70ee0..286f37f4496 100644
--- a/paddle/fluid/operators/fused/fused_dropout_helper.h
+++ b/paddle/fluid/operators/fused/fused_dropout_helper.h
@@ -19,7 +19,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/fused/fused_dropout_act_bias.h"
 #include "paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h"
 #include "paddle/fluid/operators/fused/fused_residual_dropout_bias.h"
-#include "paddle/fluid/operators/math/functors.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 
 namespace paddle {
 namespace operators {
@@ -167,8 +167,8 @@ class FusedDropoutHelper {
           dropout_param_.dropout_prob, dropout_param_.is_upscale_in_train,
           dropout_param_.is_test, src, bias, out, mask, ctx);
     } else if (act_method == "relu") {
-      math::ReluFunctor<T> relu;
-      LaunchDropoutActBias<T, MaskType, math::ReluFunctor<T>>(
+      pten::funcs::ReluFunctor<T> relu;
+      LaunchDropoutActBias<T, MaskType, pten::funcs::ReluFunctor<T>>(
           relu, dropout_param_.seed, rows_, cols_, increment,
           dropout_param_.dropout_prob, dropout_param_.is_upscale_in_train,
           dropout_param_.is_test, src, bias, out, mask, ctx);
@@ -187,8 +187,8 @@ class FusedDropoutHelper {
           gelu_grad, dout, mask, src, bias, dropout_param_.dropout_prob,
           dropout_param_.is_upscale_in_train, rows_, cols_, d_src, d_bias, ctx);
     } else if (act_method == "relu") {
-      math::ReluGradFunctor<T> relu_grad;
-      LaunchDropoutActBiasGrad<T, MaskType, math::ReluGradFunctor<T>>(
+      pten::funcs::ReluGradFunctor<T> relu_grad;
+      LaunchDropoutActBiasGrad<T, MaskType, pten::funcs::ReluGradFunctor<T>>(
           relu_grad, dout, mask, src, bias, dropout_param_.dropout_prob,
           dropout_param_.is_upscale_in_train, rows_, cols_, d_src, d_bias, ctx);
     } else {
diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
index b7dd89a8a28..792069652cd 100644
--- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
+++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
@@ -19,8 +19,9 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
-#include "paddle/fluid/operators/math/compound_functors.h"
-#include "paddle/fluid/operators/math/functors.h"
+#include "paddle/pten/kernels/funcs/compound_functors.h"
+#include "paddle/pten/kernels/funcs/elementwise_functor.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 
 namespace paddle {
 namespace operators {
@@ -53,22 +54,22 @@ static void RunBinaryCompoundFunctor(
   // intermediate_out = Unary(Y)
   // out = Binary(X, Unary(Y))
   // In this case, the shape of intermediate_out and out are different.
-  paddle::operators::math::BinaryCompoundFunctor<T, BinaryFunctor, UnaryFunctor>
+  pten::funcs::BinaryCompoundFunctor<T, BinaryFunctor, UnaryFunctor>
       compound_func(binary_functor, unary_functor);
   int axis = ctx.Attr<int>("axis");
   if (ctx.Attr<bool>("save_intermediate_out")) {
-    FusedElemwiseAndActComputeEx<DeviceContext, T,
-                                 paddle::operators::math::BinaryCompoundFunctor<
-                                     T, BinaryFunctor, UnaryFunctor>,
-                                 true /*KeepIntermediateValue*/,
-                                 false /*SameShapeOfIntermediateOutAndOut*/>(
+    FusedElemwiseAndActComputeEx<
+        DeviceContext, T,
+        pten::funcs::BinaryCompoundFunctor<T, BinaryFunctor, UnaryFunctor>,
+        true /*KeepIntermediateValue*/,
+        false /*SameShapeOfIntermediateOutAndOut*/>(
         ctx, in_x, in_y, axis, compound_func, (*outputs)[0], (*outputs)[1]);
   } else {
-    FusedElemwiseAndActComputeEx<DeviceContext, T,
-                                 paddle::operators::math::BinaryCompoundFunctor<
-                                     T, BinaryFunctor, UnaryFunctor>,
-                                 false /*KeepIntermediateValue*/,
-                                 false /*SameShapeOfIntermediateOutAndOut*/>(
+    FusedElemwiseAndActComputeEx<
+        DeviceContext, T,
+        pten::funcs::BinaryCompoundFunctor<T, BinaryFunctor, UnaryFunctor>,
+        false /*KeepIntermediateValue*/,
+        false /*SameShapeOfIntermediateOutAndOut*/>(
         ctx, in_x, in_y, axis, compound_func, (*outputs)[0], (*outputs)[1]);
   }
 }
@@ -85,22 +86,22 @@ static void RunUnaryCompoundFunctors(
   // In this case, the shape of intermediate_out and out are the same.
   int axis = ctx.Attr<int>("axis");
 
-  paddle::operators::math::UnaryCompoundFunctor<T, UnaryFunctor, BinaryFunctor>
+  pten::funcs::UnaryCompoundFunctor<T, UnaryFunctor, BinaryFunctor>
       compound_func(unary_functor, binary_functor);
 
   if (ctx.Attr<bool>("save_intermediate_out")) {
-    FusedElemwiseAndActComputeEx<DeviceContext, T,
-                                 paddle::operators::math::UnaryCompoundFunctor<
-                                     T, UnaryFunctor, BinaryFunctor>,
-                                 true /*KeepIntermediateValue*/,
-                                 true /*SameShapeOfIntermediateOutAndOut*/>(
+    FusedElemwiseAndActComputeEx<
+        DeviceContext, T,
+        pten::funcs::UnaryCompoundFunctor<T, UnaryFunctor, BinaryFunctor>,
+        true /*KeepIntermediateValue*/,
+        true /*SameShapeOfIntermediateOutAndOut*/>(
         ctx, in_x, in_y, axis, compound_func, (*outputs)[0], (*outputs)[1]);
   } else {
-    FusedElemwiseAndActComputeEx<DeviceContext, T,
-                                 paddle::operators::math::UnaryCompoundFunctor<
-                                     T, UnaryFunctor, BinaryFunctor>,
-                                 false /*KeepIntermediateValue*/,
-                                 true /*SameShapeOfIntermediateOutAndOut*/>(
+    FusedElemwiseAndActComputeEx<
+        DeviceContext, T,
+        pten::funcs::UnaryCompoundFunctor<T, UnaryFunctor, BinaryFunctor>,
+        false /*KeepIntermediateValue*/,
+        true /*SameShapeOfIntermediateOutAndOut*/>(
         ctx, in_x, in_y, axis, compound_func, (*outputs)[0], (*outputs)[1]);
   }
 }
@@ -120,13 +121,12 @@ static void RunBinaryCompoundGradFunctors(
   int axis = ctx.Attr<int>("axis");
 
   using BinaryCompoundDxFunctor =
-      paddle::operators::math::BinaryCompoundGradDxFunctor<T, BinaryGradFunctor,
-                                                           UnaryFunctor>;
-  using BinaryCompoundDyFunctor =
-      paddle::operators::math::BinaryCompoundGradDyFunctor<
-          T, BinaryGradFunctor, UnaryFunctor, UnaryGradFunctor, InPlace>;
+      pten::funcs::BinaryCompoundGradDxFunctor<T, BinaryGradFunctor,
+                                               UnaryFunctor>;
+  using BinaryCompoundDyFunctor = pten::funcs::BinaryCompoundGradDyFunctor<
+      T, BinaryGradFunctor, UnaryFunctor, UnaryGradFunctor, InPlace>;
   using BinaryCompoundDIntermedaiteOutFunctor =
-      paddle::operators::math::BinaryCompoundGradDIntermedaiteOutFunctor<
+      pten::funcs::BinaryCompoundGradDIntermedaiteOutFunctor<
           T, BinaryGradFunctor, UnaryFunctor>;
 
   if (in_intermediate_out) {
@@ -170,14 +170,12 @@ static void RunUnaryCompoundGradFunctors(
   // Z = Unary(Binary(X, Y))
   int axis = ctx.Attr<int>("axis");
 
-  using UnaryCompoundDxFunctor =
-      paddle::operators::math::UnaryCompoundGradDxFunctor<
-          T, UnaryGradFunctor, BinaryFunctor, BinaryGradFunctor, InPlace>;
-  using UnaryCompoundDyFunctor =
-      paddle::operators::math::UnaryCompoundGradDyFunctor<
-          T, UnaryGradFunctor, BinaryFunctor, BinaryGradFunctor, InPlace>;
+  using UnaryCompoundDxFunctor = pten::funcs::UnaryCompoundGradDxFunctor<
+      T, UnaryGradFunctor, BinaryFunctor, BinaryGradFunctor, InPlace>;
+  using UnaryCompoundDyFunctor = pten::funcs::UnaryCompoundGradDyFunctor<
+      T, UnaryGradFunctor, BinaryFunctor, BinaryGradFunctor, InPlace>;
   using UnaryCompoundDIntermediateFunctor =
-      paddle::operators::math::UnaryCompoundGradDIntermediateFunctor<
+      pten::funcs::UnaryCompoundGradDIntermediateFunctor<
           T, UnaryGradFunctor, BinaryFunctor, InPlace>;
 
   if (in_intermediate_out) {
@@ -219,69 +217,60 @@ static void RunFunctors(const framework::ExecutionContext &ctx,
   if (funcs_str == "elementwise_add,scale") {
     // Z = Binary(X, Unary(Y))
     T scale = static_cast<T>(ctx.Attr<float>("scale"));
-    RunBinaryCompoundFunctor<DeviceContext, T,
-                             paddle::operators::math::AddFunctor<T>,
-                             paddle::operators::math::ScaleFunctor<T>>(
-        ctx, paddle::operators::math::AddFunctor<T>(),
-        paddle::operators::math::ScaleFunctor<T>(scale), in_x, in_y, outputs);
+    RunBinaryCompoundFunctor<DeviceContext, T, pten::funcs::AddFunctor<T>,
+                             pten::funcs::ScaleFunctor<T>>(
+        ctx, pten::funcs::AddFunctor<T>(), pten::funcs::ScaleFunctor<T>(scale),
+        in_x, in_y, outputs);
   } else if (funcs_str == "scale,elementwise_add") {
     // Z = Unary(Binary(X, Y))
     T scale = static_cast<T>(ctx.Attr<float>("scale"));
-    RunUnaryCompoundFunctors<DeviceContext, T,
-                             paddle::operators::math::ScaleFunctor<T>,
-                             paddle::operators::math::AddFunctor<T>>(
-        ctx, paddle::operators::math::ScaleFunctor<T>(scale),
-        paddle::operators::math::AddFunctor<T>(), in_x, in_y, outputs);
+    RunUnaryCompoundFunctors<DeviceContext, T, pten::funcs::ScaleFunctor<T>,
+                             pten::funcs::AddFunctor<T>>(
+        ctx, pten::funcs::ScaleFunctor<T>(scale), pten::funcs::AddFunctor<T>(),
+        in_x, in_y, outputs);
   } else if (funcs_str == "elementwise_add,relu") {
     // Z = Binary(X, Unary(Y))
-    RunBinaryCompoundFunctor<DeviceContext, T,
-                             paddle::operators::math::AddFunctor<T>,
-                             paddle::operators::math::ReluFunctor<T>>(
-        ctx, paddle::operators::math::AddFunctor<T>(),
-        paddle::operators::math::ReluFunctor<T>(), in_x, in_y, outputs);
+    RunBinaryCompoundFunctor<DeviceContext, T, pten::funcs::AddFunctor<T>,
+                             pten::funcs::ReluFunctor<T>>(
+        ctx, pten::funcs::AddFunctor<T>(), pten::funcs::ReluFunctor<T>(), in_x,
+        in_y, outputs);
   } else if (funcs_str == "relu,elementwise_add") {
     // Z = Unary(Binary(X, Y))
-    RunUnaryCompoundFunctors<DeviceContext, T,
-                             paddle::operators::math::ReluFunctor<T>,
-                             paddle::operators::math::AddFunctor<T>>(
-        ctx, paddle::operators::math::ReluFunctor<T>(),
-        paddle::operators::math::AddFunctor<T>(), in_x, in_y, outputs);
+    RunUnaryCompoundFunctors<DeviceContext, T, pten::funcs::ReluFunctor<T>,
+                             pten::funcs::AddFunctor<T>>(
+        ctx, pten::funcs::ReluFunctor<T>(), pten::funcs::AddFunctor<T>(), in_x,
+        in_y, outputs);
   } else if (funcs_str == "elementwise_mul,scale") {
     // Z = Binary(X, Unary(Y))
     T scale = static_cast<T>(ctx.Attr<float>("scale"));
-    RunBinaryCompoundFunctor<DeviceContext, T,
-                             paddle::operators::math::MulFunctor<T>,
-                             paddle::operators::math::ScaleFunctor<T>>(
-        ctx, paddle::operators::math::MulFunctor<T>(),
-        paddle::operators::math::ScaleFunctor<T>(scale), in_x, in_y, outputs);
+    RunBinaryCompoundFunctor<DeviceContext, T, pten::funcs::MultiplyFunctor<T>,
+                             pten::funcs::ScaleFunctor<T>>(
+        ctx, pten::funcs::MultiplyFunctor<T>(),
+        pten::funcs::ScaleFunctor<T>(scale), in_x, in_y, outputs);
   } else if (funcs_str == "tanh,elementwise_add") {
     // Z = Unary(Binary(X, Y))
-    RunUnaryCompoundFunctors<DeviceContext, T,
-                             paddle::operators::math::TanhFunctor<T>,
-                             paddle::operators::math::AddFunctor<T>>(
-        ctx, paddle::operators::math::TanhFunctor<T>(),
-        paddle::operators::math::AddFunctor<T>(), in_x, in_y, outputs);
+    RunUnaryCompoundFunctors<DeviceContext, T, pten::funcs::TanhFunctor<T>,
+                             pten::funcs::AddFunctor<T>>(
+        ctx, pten::funcs::TanhFunctor<T>(), pten::funcs::AddFunctor<T>(), in_x,
+        in_y, outputs);
   } else if (funcs_str == "elementwise_mul,tanh") {
     // Z = Binary(X, Unary(Y))
-    RunBinaryCompoundFunctor<DeviceContext, T,
-                             paddle::operators::math::MulFunctor<T>,
-                             paddle::operators::math::TanhFunctor<T>>(
-        ctx, paddle::operators::math::MulFunctor<T>(),
-        paddle::operators::math::TanhFunctor<T>(), in_x, in_y, outputs);
+    RunBinaryCompoundFunctor<DeviceContext, T, pten::funcs::MultiplyFunctor<T>,
+                             pten::funcs::TanhFunctor<T>>(
+        ctx, pten::funcs::MultiplyFunctor<T>(), pten::funcs::TanhFunctor<T>(),
+        in_x, in_y, outputs);
   } else if (funcs_str == "elementwise_mul,sigmoid") {
     // Z = Binary(X, Unary(Y))
-    RunBinaryCompoundFunctor<DeviceContext, T,
-                             paddle::operators::math::MulFunctor<T>,
-                             paddle::operators::math::SigmoidFunctor<T>>(
-        ctx, paddle::operators::math::MulFunctor<T>(),
-        paddle::operators::math::SigmoidFunctor<T>(), in_x, in_y, outputs);
+    RunBinaryCompoundFunctor<DeviceContext, T, pten::funcs::MultiplyFunctor<T>,
+                             pten::funcs::SigmoidFunctor<T>>(
+        ctx, pten::funcs::MultiplyFunctor<T>(),
+        pten::funcs::SigmoidFunctor<T>(), in_x, in_y, outputs);
   } else if (funcs_str == "gelu,elementwise_add") {
     // Z = Unary(Binary(X, Y))
-    RunUnaryCompoundFunctors<DeviceContext, T,
-                             paddle::operators::math::GeluFunctor<T>,
-                             paddle::operators::math::AddFunctor<T>>(
-        ctx, paddle::operators::math::GeluFunctor<T>(),
-        paddle::operators::math::AddFunctor<T>(), in_x, in_y, outputs);
+    RunUnaryCompoundFunctors<DeviceContext, T, pten::funcs::GeluFunctor<T>,
+                             pten::funcs::AddFunctor<T>>(
+        ctx, pten::funcs::GeluFunctor<T>(), pten::funcs::AddFunctor<T>(), in_x,
+        in_y, outputs);
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "%s has not been implemented.", funcs_str));
@@ -301,95 +290,83 @@ static void RunGradFunctors(
   if (funcs_str == "elementwise_add_grad,scale_grad") {
     // The backward of Z = Binary(X, Unary(Y))
     T scale = static_cast<T>(ctx.Attr<float>("scale"));
-    RunBinaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::AddGradFunctor<T>,
-        paddle::operators::math::ScaleFunctor<T>,
-        paddle::operators::math::ScaleGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::AddGradFunctor<T>(),
-        paddle::operators::math::ScaleFunctor<T>(scale),
-        paddle::operators::math::ScaleGradFunctor<T>(scale), in_x, in_y, in_out,
+    RunBinaryCompoundGradFunctors<DeviceContext, T,
+                                  pten::funcs::AddGradFunctor<T>,
+                                  pten::funcs::ScaleFunctor<T>,
+                                  pten::funcs::ScaleGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::AddGradFunctor<T>(),
+        pten::funcs::ScaleFunctor<T>(scale),
+        pten::funcs::ScaleGradFunctor<T>(scale), in_x, in_y, in_out,
         in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
   } else if (funcs_str == "scale_grad,elementwise_add_grad") {
     // The backward of Z = Unary(Binary(X, Y))
     T scale = static_cast<T>(ctx.Attr<float>("scale"));
     RunUnaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::ScaleGradFunctor<T>,
-        paddle::operators::math::AddFunctor<T>,
-        paddle::operators::math::AddGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::ScaleGradFunctor<T>(scale),
-        paddle::operators::math::AddFunctor<T>(),
-        paddle::operators::math::AddGradFunctor<T>(), in_x, in_y, in_out,
-        in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
+        DeviceContext, T, pten::funcs::ScaleGradFunctor<T>,
+        pten::funcs::AddFunctor<T>, pten::funcs::AddGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::ScaleGradFunctor<T>(scale),
+        pten::funcs::AddFunctor<T>(), pten::funcs::AddGradFunctor<T>(), in_x,
+        in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad,
+        d_intermediate_out);
   } else if (funcs_str == "elementwise_add_grad,relu_grad") {
     // The backward of Z = Binary(X, Unary(Y))
     RunBinaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::AddGradFunctor<T>,
-        paddle::operators::math::ReluFunctor<T>,
-        paddle::operators::math::ReluGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::AddGradFunctor<T>(),
-        paddle::operators::math::ReluFunctor<T>(),
-        paddle::operators::math::ReluGradFunctor<T>(), in_x, in_y, in_out,
+        DeviceContext, T, pten::funcs::AddGradFunctor<T>,
+        pten::funcs::ReluFunctor<T>, pten::funcs::ReluGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::AddGradFunctor<T>(), pten::funcs::ReluFunctor<T>(),
+        pten::funcs::ReluGradFunctor<T>(), in_x, in_y, in_out,
         in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
   } else if (funcs_str == "relu_grad,elementwise_add_grad") {
     // The backward of Z = Unary(Binary(X, Y))
     RunUnaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::ReluGradFunctor<T>,
-        paddle::operators::math::AddFunctor<T>,
-        paddle::operators::math::AddGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::ReluGradFunctor<T>(),
-        paddle::operators::math::AddFunctor<T>(),
-        paddle::operators::math::AddGradFunctor<T>(), in_x, in_y, in_out,
+        DeviceContext, T, pten::funcs::ReluGradFunctor<T>,
+        pten::funcs::AddFunctor<T>, pten::funcs::AddGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::ReluGradFunctor<T>(), pten::funcs::AddFunctor<T>(),
+        pten::funcs::AddGradFunctor<T>(), in_x, in_y, in_out,
         in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
   } else if (funcs_str == "elementwise_mul_grad,scale_grad") {
     // The backward of Z = Binary(X, Unary(Y))
     T scale = static_cast<T>(ctx.Attr<float>("scale"));
-    RunBinaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::MulGradFunctor<T>,
-        paddle::operators::math::ScaleFunctor<T>,
-        paddle::operators::math::ScaleGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::MulGradFunctor<T>(),
-        paddle::operators::math::ScaleFunctor<T>(scale),
-        paddle::operators::math::ScaleGradFunctor<T>(scale), in_x, in_y, in_out,
+    RunBinaryCompoundGradFunctors<DeviceContext, T,
+                                  pten::funcs::MulGradFunctor<T>,
+                                  pten::funcs::ScaleFunctor<T>,
+                                  pten::funcs::ScaleGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::MulGradFunctor<T>(),
+        pten::funcs::ScaleFunctor<T>(scale),
+        pten::funcs::ScaleGradFunctor<T>(scale), in_x, in_y, in_out,
         in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
   } else if (funcs_str == "tanh_grad,elementwise_add_grad") {
     // The backward of Z = Unary(Binary(X, Y))
     RunUnaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::TanhGradFunctor<T>,
-        paddle::operators::math::AddFunctor<T>,
-        paddle::operators::math::AddGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::TanhGradFunctor<T>(),
-        paddle::operators::math::AddFunctor<T>(),
-        paddle::operators::math::AddGradFunctor<T>(), in_x, in_y, in_out,
+        DeviceContext, T, pten::funcs::TanhGradFunctor<T>,
+        pten::funcs::AddFunctor<T>, pten::funcs::AddGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::TanhGradFunctor<T>(), pten::funcs::AddFunctor<T>(),
+        pten::funcs::AddGradFunctor<T>(), in_x, in_y, in_out,
         in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
   } else if (funcs_str == "elementwise_mul_grad,tanh_grad") {
     // The backward of Z = Binary(X, Unary(Y))
     RunBinaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::MulGradFunctor<T>,
-        paddle::operators::math::TanhFunctor<T>,
-        paddle::operators::math::TanhGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::MulGradFunctor<T>(),
-        paddle::operators::math::TanhFunctor<T>(),
-        paddle::operators::math::TanhGradFunctor<T>(), in_x, in_y, in_out,
+        DeviceContext, T, pten::funcs::MulGradFunctor<T>,
+        pten::funcs::TanhFunctor<T>, pten::funcs::TanhGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::MulGradFunctor<T>(), pten::funcs::TanhFunctor<T>(),
+        pten::funcs::TanhGradFunctor<T>(), in_x, in_y, in_out,
         in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
   } else if (funcs_str == "elementwise_mul_grad,sigmoid_grad") {
     // The backward of Z = Binary(X, Unary(Y))
-    RunBinaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::MulGradFunctor<T>,
-        paddle::operators::math::SigmoidFunctor<T>,
-        paddle::operators::math::SigmoidGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::MulGradFunctor<T>(),
-        paddle::operators::math::SigmoidFunctor<T>(),
-        paddle::operators::math::SigmoidGradFunctor<T>(), in_x, in_y, in_out,
+    RunBinaryCompoundGradFunctors<DeviceContext, T,
+                                  pten::funcs::MulGradFunctor<T>,
+                                  pten::funcs::SigmoidFunctor<T>,
+                                  pten::funcs::SigmoidGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::MulGradFunctor<T>(), pten::funcs::SigmoidFunctor<T>(),
+        pten::funcs::SigmoidGradFunctor<T>(), in_x, in_y, in_out,
         in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
   } else if (funcs_str == "gelu_grad,elementwise_add_grad") {
     // The backward of Z = Unary(Binary(X, Y))
     RunUnaryCompoundGradFunctors<
-        DeviceContext, T, paddle::operators::math::GeluGradFunctor<T>,
-        paddle::operators::math::AddFunctor<T>,
-        paddle::operators::math::AddGradFunctor<T>, InPlace>(
-        ctx, paddle::operators::math::GeluGradFunctor<T>(),
-        paddle::operators::math::AddFunctor<T>(),
-        paddle::operators::math::AddGradFunctor<T>(), in_x, in_y, in_out,
+        DeviceContext, T, pten::funcs::GeluGradFunctor<T>,
+        pten::funcs::AddFunctor<T>, pten::funcs::AddGradFunctor<T>, InPlace>(
+        ctx, pten::funcs::GeluGradFunctor<T>(), pten::funcs::AddFunctor<T>(),
+        pten::funcs::AddGradFunctor<T>(), in_x, in_y, in_out,
         in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out);
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h
index 911c2cda575..ef61b78d682 100644
--- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h
+++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h
@@ -122,12 +122,12 @@ __global__ void FusedLayernormResidualDropoutBias(
   __shared__ U shared_mean[32];
   __shared__ U shared_var[32];
 
-  math::ReluFunctor<T> relu;
+  pten::funcs::ReluFunctor<T> relu;
   U mean_val = 0;
   U var_val = 0;
   for (int i = col_id * VecSize; i < cols; i += blockDim.x * VecSize) {
     FusedResidualDropoutBiasOneThread<T, MaskType, VecSize, true, false,
-                                      math::ReluFunctor<T>>(
+                                      pten::funcs::ReluFunctor<T>>(
         row_id, i, cols, &state, dropout_prob, factor, src, residual, bias, dst,
         mask, is_test, &mean_val, &var_val, relu);
   }
diff --git a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h
index 2f5ec839fc2..264e2e5f22d 100644
--- a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h
+++ b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h
@@ -115,12 +115,12 @@ __global__ void FusedResidualDropoutBias(
   curandStatePhilox4_32_10_t state;
   curand_init(seed, idx, increment, &state);
   const T factor = GetFactor<T>(dropout_prob, is_upscale_in_train, is_test);
-  math::ReluFunctor<T> relu;
+  pten::funcs::ReluFunctor<T> relu;
   for (int r = row_id; r < rows; r += blockDim.y * gridDim.y) {
     for (int i = col_id * VecSize; i < cols;
          i += blockDim.x * gridDim.x * VecSize) {
       FusedResidualDropoutBiasOneThread<T, MaskType, VecSize, false, false,
-                                        math::ReluFunctor<T>>(
+                                        pten::funcs::ReluFunctor<T>>(
           r, i, cols, &state, dropout_prob, factor, src, residual, bias, dst,
           mask, is_test, nullptr, nullptr, relu);
     }
diff --git a/paddle/fluid/operators/log_softmax_op.cu b/paddle/fluid/operators/log_softmax_op.cu
index 6676cde1caf..c677b4978eb 100644
--- a/paddle/fluid/operators/log_softmax_op.cu
+++ b/paddle/fluid/operators/log_softmax_op.cu
@@ -15,8 +15,9 @@
 #include <limits>
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/log_softmax_op.h"
-#include "paddle/fluid/operators/math/functors.h"
 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
+#include "paddle/pten/kernels/funcs/elementwise_functor.h"
+#include "paddle/pten/kernels/funcs/functors.h"
 
 namespace paddle {
 namespace operators {
@@ -213,15 +214,15 @@ __global__ void LogSoftmaxForwardCUDAKernelNotLastAxis(
       for (int d = threadIdx.x; d < dim_size; d += blockDim.x) {
         const AccT value =
             static_cast<AccT>(input[data_offset + d * dim_stride]);
-        max_value = math::MaxFunctor<AccT>()(max_value, value);
+        max_value = pten::funcs::MaxFunctor<AccT>()(max_value, value);
       }
       // If there are more than 1 threads along block x, reduce all max_values
       // and get the global max_value, which is the max value along "axis".
       // If there is only one thread along block x, no need to reduce, as the
       // 'max_value' is the global max_value.
       if (blockDim.x > 1) {
-        max_value =
-            BlockReduceAlongDimX<AccT, math::MaxFunctor>(sdata, max_value);
+        max_value = BlockReduceAlongDimX<AccT, pten::funcs::MaxFunctor>(
+            sdata, max_value);
       }
 
       // 2. reduce sum
@@ -232,7 +233,7 @@ __global__ void LogSoftmaxForwardCUDAKernelNotLastAxis(
                         max_value);
       }
       if (blockDim.x > 1) {
-        sum = BlockReduceAlongDimX<AccT, math::AddFunctor>(sdata, sum);
+        sum = BlockReduceAlongDimX<AccT, pten::funcs::AddFunctor>(sdata, sum);
       }
 
       // 3. input-max-log_sum and write to output
diff --git a/paddle/fluid/operators/math/compound_functors.h b/paddle/pten/kernels/funcs/compound_functors.h
similarity index 86%
rename from paddle/fluid/operators/math/compound_functors.h
rename to paddle/pten/kernels/funcs/compound_functors.h
index 6a43215bf52..c3d14a50659 100644
--- a/paddle/fluid/operators/math/compound_functors.h
+++ b/paddle/pten/kernels/funcs/compound_functors.h
@@ -18,9 +18,8 @@ limitations under the License. */
 #include <unordered_set>
 #include <vector>
 
-namespace paddle {
-namespace operators {
-namespace math {
+namespace pten {
+namespace funcs {
 
 // Z = BinaryFunctor(X, UnaryFunctor(Y))
 template <typename T, typename BinaryFunctor, typename UnaryFunctor>
@@ -69,8 +68,8 @@ struct BinaryCompoundGradDxFunctor {
     return dout * d_binary_fun_.Dx(x, unary_fun_(y));
   }
 
-  inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out,
-                                         T dout) {
+  inline HOSTDEVICE T
+  UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) {
     return dout * d_binary_fun_.Dx(x, intermediate_out);
   }
 
@@ -82,8 +81,11 @@ struct BinaryCompoundGradDxFunctor {
 };
 
 // Z = BinaryFunctor(X, UnaryFunctor(Y))
-template <typename T, typename DBinaryFun, typename UnaryFun,
-          typename DUnaryFun, bool InPlace>
+template <typename T,
+          typename DBinaryFun,
+          typename UnaryFun,
+          typename DUnaryFun,
+          bool InPlace>
 struct BinaryCompoundGradDyFunctor {
   BinaryCompoundGradDyFunctor(const DBinaryFun &d_binary_fun,
                               const UnaryFun &unary_fun,
@@ -96,8 +98,8 @@ struct BinaryCompoundGradDyFunctor {
     return dout * d_binary_fun_.Dy(x, unary_fun_(y)) * d_unary_fun_.UseX(y);
   }
 
-  inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out,
-                                         T dout) {
+  inline HOSTDEVICE T
+  UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) {
     if (InPlace) {
       return dout * d_binary_fun_.Dy(x, intermediate_out) *
              d_unary_fun_.UseOut(intermediate_out);
@@ -116,8 +118,11 @@ struct BinaryCompoundGradDyFunctor {
 };
 
 // Z = UnaryFunctor(BinaryFunctor(X, Y))
-template <typename T, typename DUnaryFun, typename BinaryFun,
-          typename DBinaryFun, bool InPlace>
+template <typename T,
+          typename DUnaryFun,
+          typename BinaryFun,
+          typename DBinaryFun,
+          bool InPlace>
 struct UnaryCompoundGradDxFunctor {
   UnaryCompoundGradDxFunctor(const DUnaryFun &d_unary_fun,
                              const BinaryFun &binary_fun,
@@ -136,8 +141,8 @@ struct UnaryCompoundGradDxFunctor {
     return base * d_binary_fun_.Dx(x, y);
   }
 
-  inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out,
-                                         T dout) {
+  inline HOSTDEVICE T
+  UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) {
     T base;
     if (InPlace) {
       base = dout * d_unary_fun_.UseOut(out);
@@ -156,8 +161,11 @@ struct UnaryCompoundGradDxFunctor {
 };
 
 // Z = UnaryFunctor(BinaryFunctor(X, Y))
-template <typename T, typename DUnaryFun, typename BinaryFun,
-          typename DBinaryFun, bool InPlace>
+template <typename T,
+          typename DUnaryFun,
+          typename BinaryFun,
+          typename DBinaryFun,
+          bool InPlace>
 struct UnaryCompoundGradDyFunctor {
   UnaryCompoundGradDyFunctor(const DUnaryFun &d_unary_fun,
                              const BinaryFun &binary_fun,
@@ -176,8 +184,8 @@ struct UnaryCompoundGradDyFunctor {
     return base * d_binary_fun_.Dy(x, y);
   }
 
-  inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out,
-                                         T dout) {
+  inline HOSTDEVICE T
+  UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) {
     T base;
     if (InPlace) {
       base = dout * d_unary_fun_.UseOut(out);
@@ -206,7 +214,9 @@ struct BinaryCompoundGradDIntermedaiteOutFunctor {
     return dout * d_binary_fun_.Dy(x, unary_fun_(y));
   }
 
-  inline HOSTDEVICE T UseIntermediateOut(T x, T intermediate_out, T out,
+  inline HOSTDEVICE T UseIntermediateOut(T x,
+                                         T intermediate_out,
+                                         T out,
                                          T dout) {
     return dout * d_binary_fun_.Dy(x, intermediate_out);
   }
@@ -233,7 +243,9 @@ struct UnaryCompoundGradDIntermediateFunctor {
     }
   }
 
-  inline HOSTDEVICE T UseIntermediateOut(T x, T intermediate_out, T out,
+  inline HOSTDEVICE T UseIntermediateOut(T x,
+                                         T intermediate_out,
+                                         T out,
                                          T dout) {
     if (InPlace) {
       return dout * d_unary_fun_.UseOut(out);
@@ -249,6 +261,5 @@ struct UnaryCompoundGradDIntermediateFunctor {
   BinaryFun binary_fun_;
 };
 
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace pten
diff --git a/paddle/fluid/operators/math/functors.h b/paddle/pten/kernels/funcs/functors.h
similarity index 85%
rename from paddle/fluid/operators/math/functors.h
rename to paddle/pten/kernels/funcs/functors.h
index 054018b10e8..8b2bdfd0b1e 100644
--- a/paddle/fluid/operators/math/functors.h
+++ b/paddle/pten/kernels/funcs/functors.h
@@ -17,16 +17,17 @@ limitations under the License. */
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/math.h"
 
-namespace paddle {
-namespace operators {
-namespace math {
-
-// MulFunctor
-template <typename T>
-struct MulFunctor {
-  // out = x * y;
-  inline HOSTDEVICE T operator()(T x, T y) { return x * y; }
-};
+namespace pten {
+namespace funcs {
+
+// // MulFunctor
+// // NOTE(chenfeiyu): IT IS NOLONGER USED, use pten::funcs::MultiplyFunctor
+// instead
+// template <typename T>
+// struct MulFunctor {
+//   // out = x * y;
+//   inline HOSTDEVICE T operator()(T x, T y) { return x * y; }
+// };
 
 template <typename T>
 struct MulGradFunctor {
@@ -34,12 +35,13 @@ struct MulGradFunctor {
   inline HOSTDEVICE T Dy(T x, T y) { return x; }
 };
 
-// AddFunctor
-template <typename T>
-struct AddFunctor {
-  // out = x + y;
-  inline HOSTDEVICE T operator()(T x, T y) { return x + y; }
-};
+// // AddFunctor
+// // NOTE(chenfeiyu): IT IS NOLONGER USED, use pten::funcs::AddFunctor instead
+// template <typename T>
+// struct AddFunctor {
+//   // out = x + y;
+//   inline HOSTDEVICE T operator()(T x, T y) { return x + y; }
+// };
 
 template <typename T>
 struct MaxFunctor {
@@ -102,7 +104,8 @@ struct TanhFunctor {
     // y = 2 / (1 + e^-2x) - 1
     T t0 = static_cast<T>(2) * x;
     T t1 = (t0 < kMin) ? kMin : ((t0 > kMax) ? kMax : t0);
-    return static_cast<T>(2) / (static_cast<T>(1) + real_exp(-t1)) -
+    return static_cast<T>(2) /
+               (static_cast<T>(1) + paddle::operators::real_exp(-t1)) -
            static_cast<T>(1);
   }
 };
@@ -123,7 +126,8 @@ struct SigmoidFunctor {
   inline HOSTDEVICE T operator()(T x) {
     // y = 1 / (1 + e^-x)
     T tmp = (x < kMin) ? kMin : ((x > kMax) ? kMax : x);
-    return static_cast<T>(1) / (static_cast<T>(1) + real_exp(-tmp));
+    return static_cast<T>(1) /
+           (static_cast<T>(1) + paddle::operators::real_exp(-tmp));
   }
 };
 
@@ -138,7 +142,7 @@ struct SigmoidGradFunctor {
 
 template <typename T>
 struct GeluFunctor {
-  using MT = typename details::MPTypeTrait<T>::Type;
+  using MT = typename paddle::operators::details::MPTypeTrait<T>::Type;
   inline HOSTDEVICE T operator()(T x) {
     // this function is tanh approximation of gelu
     // actual gelu is:
@@ -154,7 +158,7 @@ struct GeluFunctor {
 
 template <typename T>
 struct GeluGradFunctor {
-  using MT = typename details::MPTypeTrait<T>::Type;
+  using MT = typename paddle::operators::details::MPTypeTrait<T>::Type;
   inline HOSTDEVICE T UseX(T x) {
     MT mx = static_cast<MT>(x);
     MT tanh_out =
@@ -193,6 +197,5 @@ struct GeluGradFunctor {
   }
 };
 
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace pten
-- 
GitLab