diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.h b/paddle/fluid/distributed/ps/service/communicator/communicator.h
index b9cee1d898c8ad5383b2cd0229da7213b55ab66a..e3795d409f3eceb03191717a304af2b96cd988bf 100644
--- a/paddle/fluid/distributed/ps/service/communicator/communicator.h
+++ b/paddle/fluid/distributed/ps/service/communicator/communicator.h
@@ -37,13 +37,13 @@ limitations under the License. */
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/framework/variable_helper.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/split.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace paddle {
 namespace distributed {
@@ -212,11 +212,10 @@ inline void MergeVars(const std::string &var_name,
     }
     phi::CPUContext dev_ctx;
     if (merge_add) {
-      paddle::operators::math::scatter::MergeAdd<phi::CPUContext, T> merge_add;
+      phi::funcs::scatter::MergeAdd<phi::CPUContext, T> merge_add;
       merge_add(dev_ctx, inputs, out_slr);
     } else {
-      paddle::operators::math::scatter::MergeAverage<phi::CPUContext, T>
-          merge_average;
+      phi::funcs::scatter::MergeAverage<phi::CPUContext, T> merge_average;
       merge_average(dev_ctx, inputs, out_slr);
     }
 
diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc
index 199359a960326378f9641c6a5de4c6d3ccfd5303..d0feccc948fceedac2efc05a67fadfa6d4f827f2 100644
--- a/paddle/fluid/imperative/gradient_accumulator.cc
+++ b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -22,7 +22,6 @@
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/selected_rows_utils.h"
 #include "paddle/fluid/imperative/layer.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/device_context.h"
@@ -30,6 +29,7 @@
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #ifdef PADDLE_WITH_XPU
 #include "xpu/refactor/math.h"
 #endif
@@ -354,15 +354,14 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
       framework::TransToProtoVarType(src_selected_rows.value().dtype());
   platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
 
-#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type)           \
-  if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) {         \
-    paddle::platform::DeviceContext* dev_ctx = pool.Get(place);              \
-    paddle::operators::math::SelectedRowsAddToTensor<dev_ctx_type, cpp_type> \
-        functor;                                                             \
-    functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)),                         \
-            src_selected_rows,                                               \
-            dst_tensor);                                                     \
-    return;                                                                  \
+#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type)       \
+  if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) {     \
+    paddle::platform::DeviceContext* dev_ctx = pool.Get(place);          \
+    phi::funcs::SelectedRowsAddToTensor<dev_ctx_type, cpp_type> functor; \
+    functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)),                     \
+            src_selected_rows,                                           \
+            dst_tensor);                                                 \
+    return;                                                              \
   }
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -406,15 +405,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
   dst_tensor->Resize(src_tensor.dims());
   dst_tensor->mutable_data(place, src_tensor.dtype());
 
-#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type)            \
-  if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) {       \
-    paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \
-        functor;                                                           \
-    functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)),                       \
-            src_selected_rows,                                             \
-            src_tensor,                                                    \
-            dst_tensor);                                                   \
-    return;                                                                \
+#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type)        \
+  if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) {   \
+    phi::funcs::SelectedRowsAddTensor<dev_ctx_type, cpp_type> functor; \
+    functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)),                   \
+            src_selected_rows,                                         \
+            src_tensor,                                                \
+            dst_tensor);                                               \
+    return;                                                            \
   }
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -469,15 +467,14 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
   phi::SelectedRows* dst_selected_rows =
       GetEmptyInnerTensor<phi::SelectedRows>(dst_var.get());
 
-#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type)               \
-  if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) {   \
-    paddle::platform::DeviceContext* dev_ctx = pool.Get(place);        \
-    paddle::operators::math::scatter::MergeAdd<dev_ctx_type, cpp_type> \
-        merge_add;                                                     \
-    merge_add(*(dynamic_cast<dev_ctx_type*>(dev_ctx)),                 \
-              src_selected_rows,                                       \
-              dst_selected_rows);                                      \
-    return dst_var;                                                    \
+#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type)             \
+  if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
+    paddle::platform::DeviceContext* dev_ctx = pool.Get(place);      \
+    phi::funcs::scatter::MergeAdd<dev_ctx_type, cpp_type> merge_add; \
+    merge_add(*(dynamic_cast<dev_ctx_type*>(dev_ctx)),               \
+              src_selected_rows,                                     \
+              dst_selected_rows);                                    \
+    return dst_var;                                                  \
   }
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h
index 6fde5106f10a4ed280ab0c74fdff7a37123b44a0..c84b29211b6616446aac41b0e4d6255150fc5143 100644
--- a/paddle/fluid/operators/clip_by_norm_op.h
+++ b/paddle/fluid/operators/clip_by_norm_op.h
@@ -17,8 +17,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/selected_rows_utils.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/transform.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt
index 3e53904657383a9b495255a8390114dfc3732136..20933a4162fdc65f80e82b7d4bc81d0546a3d4fe 100644
--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -31,20 +31,6 @@ math_library(sampler DEPS generator)
 # math_library(math_function DEPS blas dense_tensor tensor)
 math_library(maxouting)
 
-if(WITH_MKLDNN)
-  math_library(
-    selected_rows_functor
-    DEPS
-    selected_rows_utils
-    math_function
-    blas
-    mkldnn_axpy_handler
-    mixed_vector)
-else()
-  math_library(selected_rows_functor DEPS selected_rows_utils math_function
-               blas mixed_vector)
-endif()
-
 math_library(sequence_padding)
 math_library(sequence_pooling DEPS math_function jit_kernel_helper)
 math_library(sequence_scale)
diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cc
index ecb8aa7824724b3e20985c8f2a3cdb33819ff0fc..9d6d451969bbecd0399694b8d3e242a838d2a141 100644
--- a/paddle/fluid/operators/math/selected_rows_functor_test.cc
+++ b/paddle/fluid/operators/math/selected_rows_functor_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 #include "gtest/gtest.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
@@ -48,7 +48,7 @@ TEST(selected_rows_functor, cpu_add) {
   // simplely concat two SelectedRows
   out_value->mutable_data<float>(phi::make_ddim({7, 10}), cpu_place);
 
-  paddle::operators::math::SelectedRowsAdd<phi::CPUContext, float> add_functor;
+  phi::funcs::SelectedRowsAdd<phi::CPUContext, float> add_functor;
   add_functor(ctx, *selected_rows1, *selected_rows2, output.get());
 
   auto out_height = output->height();
@@ -88,8 +88,7 @@ TEST(selected_rows_functor, cpu_add) {
       new paddle::framework::Tensor()};
   tensor2->mutable_data<float>(phi::make_ddim({height, row_numel}), cpu_place);
 
-  paddle::operators::math::SelectedRowsAddTensor<phi::CPUContext, float>
-      add_tensor_functor;
+  phi::funcs::SelectedRowsAddTensor<phi::CPUContext, float> add_tensor_functor;
   add_tensor_functor(ctx, *output, *tensor1, tensor2.get());
 
   auto* tensor2_data = tensor2->data<float>();
@@ -141,8 +140,7 @@ TEST(selected_rows_functor, cpu_add_to) {
   // simplely concat two SelectedRows
   out_value->mutable_data<float>(phi::make_ddim({7, 10}), cpu_place);
 
-  paddle::operators::math::SelectedRowsAddTo<phi::CPUContext, float>
-      add_to_functor;
+  phi::funcs::SelectedRowsAddTo<phi::CPUContext, float> add_to_functor;
   add_to_functor(ctx, *selected_rows1, 0, output.get());
   add_to_functor(ctx, *selected_rows2, in1_value->numel(), output.get());
 
@@ -179,7 +177,7 @@ TEST(selected_rows_functor, cpu_add_to) {
   tensor1->mutable_data<float>(phi::make_ddim({height, row_numel}), cpu_place);
   functor(ctx, tensor1.get(), 3.0);
 
-  paddle::operators::math::SelectedRowsAddToTensor<phi::CPUContext, float>
+  phi::funcs::SelectedRowsAddToTensor<phi::CPUContext, float>
       add_to_tensor_functor;
   add_to_tensor_functor(ctx, *output, tensor1.get());
 
@@ -216,7 +214,7 @@ TEST(selected_rows_functor, cpu_merge_average_float) {
       cpu_place);
   functor(ctx, in_value, 1.0);
 
-  paddle::operators::math::scatter::MergeAverage<phi::CPUContext, float>
+  phi::funcs::scatter::MergeAverage<phi::CPUContext, float>
       merge_average_functor;
   phi::SelectedRows output = merge_average_functor(ctx, *selected_rows);
 
@@ -253,8 +251,7 @@ TEST(selected_rows_functor, cpu_merge_add_float) {
 
   std::unique_ptr<phi::SelectedRows> output{new phi::SelectedRows()};
 
-  paddle::operators::math::scatter::MergeAdd<phi::CPUContext, float>
-      merge_add_functor;
+  phi::funcs::scatter::MergeAdd<phi::CPUContext, float> merge_add_functor;
   merge_add_functor(ctx, *selected_rows, output.get());
 
   auto out_height = output->height();
@@ -290,8 +287,7 @@ TEST(selected_rows_functor, cpu_merge_add_int) {
 
   std::unique_ptr<phi::SelectedRows> output{new phi::SelectedRows()};
 
-  paddle::operators::math::scatter::MergeAdd<phi::CPUContext, int>
-      merge_add_functor;
+  phi::funcs::scatter::MergeAdd<phi::CPUContext, int> merge_add_functor;
   merge_add_functor(ctx, *selected_rows, output.get());
 
   auto out_height = output->height();
@@ -337,8 +333,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
 
   std::unique_ptr<phi::SelectedRows> output{new phi::SelectedRows()};
   output->set_height(height);
-  paddle::operators::math::scatter::MergeAdd<phi::CPUContext, float>
-      merge_add_functor;
+  phi::funcs::scatter::MergeAdd<phi::CPUContext, float> merge_add_functor;
 
   std::vector<const phi::SelectedRows*> inputs;
   inputs.push_back(selected_rows1.get());
@@ -387,8 +382,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
 
   std::unique_ptr<phi::SelectedRows> output{new phi::SelectedRows()};
   output->set_height(height);
-  paddle::operators::math::scatter::MergeAdd<phi::CPUContext, float>
-      merge_add_functor;
+  phi::funcs::scatter::MergeAdd<phi::CPUContext, float> merge_add_functor;
 
   std::vector<const phi::SelectedRows*> inputs;
   inputs.push_back(selected_rows1.get());
@@ -444,8 +438,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
   auto* out_value = output->mutable_value();
   // simplely concat two SelectedRows
   out_value->mutable_data<float>(phi::make_ddim({7, 10}), cpu_place);
-  paddle::operators::math::SelectedRowsSumTo<phi::CPUContext, float>
-      sum_to_functor;
+  phi::funcs::SelectedRowsSumTo<phi::CPUContext, float> sum_to_functor;
   sum_to_functor(ctx,
                  std::vector<phi::SelectedRows*>(
                      {selected_rows1.get(), selected_rows2.get()}),
@@ -479,7 +472,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
       new paddle::framework::Tensor()};
   tensor1->mutable_data<float>(phi::make_ddim({height, row_numel}), cpu_place);
   functor(ctx, tensor1.get(), 3.0);
-  paddle::operators::math::SelectedRowsAddToTensor<phi::CPUContext, float>
+  phi::funcs::SelectedRowsAddToTensor<phi::CPUContext, float>
       add_to_tensor_functor;
   add_to_tensor_functor(ctx, *output, tensor1.get());
   auto* tensor1_data = tensor1->data<float>();
diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
index 746a64ff58cde6eb1f0bd63dab9ab04242c5549a..d623357aa34b891ecdae6ac42522011d54db93f6 100644
--- a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
+++ b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 #include "gtest/gtest.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
@@ -61,7 +61,7 @@ TEST(selected_rows_functor, gpu_add) {
   // simply concat two SelectedRows
   out_value->mutable_data<float>(phi::make_ddim({7, 10}), gpu_place);
 
-  paddle::operators::math::SelectedRowsAdd<phi::GPUContext, float> add_functor;
+  phi::funcs::SelectedRowsAdd<phi::GPUContext, float> add_functor;
   add_functor(ctx, *selected_rows1, *selected_rows2, output.get());
 
   auto out_height = output->height();
@@ -105,8 +105,7 @@ TEST(selected_rows_functor, gpu_add) {
       new paddle::framework::Tensor()};
   tensor2->mutable_data<float>(phi::make_ddim({height, row_numel}), gpu_place);
 
-  paddle::operators::math::SelectedRowsAddTensor<phi::GPUContext, float>
-      add_tensor_functor;
+  phi::funcs::SelectedRowsAddTensor<phi::GPUContext, float> add_tensor_functor;
   add_tensor_functor(ctx, *output, *tensor1, tensor2.get());
 
   paddle::framework::Tensor tensor2_cpu;
@@ -164,8 +163,7 @@ TEST(selected_rows_functor, gpu_add_to) {
   // simply concat two SelectedRows
   out_value->mutable_data<float>(phi::make_ddim({7, 10}), gpu_place);
 
-  paddle::operators::math::SelectedRowsAddTo<phi::GPUContext, float>
-      add_to_functor;
+  phi::funcs::SelectedRowsAddTo<phi::GPUContext, float> add_to_functor;
   add_to_functor(ctx, *selected_rows1, 0, output.get());
   add_to_functor(ctx, *selected_rows2, in1_value->numel(), output.get());
 
@@ -206,7 +204,7 @@ TEST(selected_rows_functor, gpu_add_to) {
   tensor1->mutable_data<float>(phi::make_ddim({height, row_numel}), gpu_place);
   functor(ctx, tensor1.get(), 3.0);
 
-  paddle::operators::math::SelectedRowsAddToTensor<phi::GPUContext, float>
+  phi::funcs::SelectedRowsAddToTensor<phi::GPUContext, float>
       add_to_tensor_functor;
   add_to_tensor_functor(ctx, *output, tensor1.get());
 
@@ -261,8 +259,7 @@ TEST(selected_rows_functor, gpu_merge_add) {
 
   std::unique_ptr<phi::SelectedRows> output{new phi::SelectedRows()};
   output->set_height(height);
-  paddle::operators::math::scatter::MergeAdd<phi::GPUContext, float>
-      merge_add_functor;
+  phi::funcs::scatter::MergeAdd<phi::GPUContext, float> merge_add_functor;
 
   std::vector<const phi::SelectedRows*> inputs;
   inputs.push_back(selected_rows1.get());
diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cc b/paddle/fluid/operators/optimizers/adagrad_op.cc
index ae05070692fb06527c0a9daa000ba3a4082d005b..d8f611c67a4f3cb522ad80dfc8eca2c632bfa82b 100644
--- a/paddle/fluid/operators/optimizers/adagrad_op.cc
+++ b/paddle/fluid/operators/optimizers/adagrad_op.cc
@@ -17,10 +17,10 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/infershape_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/core/infermeta_utils.h"
 #include "paddle/phi/infermeta/multiary.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/optimizers/adam_op_functor.h b/paddle/fluid/operators/optimizers/adam_op_functor.h
index 15dee861b874e992b1688945565023f4ea8f846a..dd5635011548b00900f1c99f8eb8bf91a6e1ff51 100644
--- a/paddle/fluid/operators/optimizers/adam_op_functor.h
+++ b/paddle/fluid/operators/optimizers/adam_op_functor.h
@@ -16,12 +16,12 @@
 
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace paddle {
 namespace operators {
 
-namespace scatter = paddle::operators::math::scatter;
+namespace scatter = phi::funcs::scatter;
 
 static inline float GetAttrFromTensor(const framework::Tensor* tensor) {
   const float* tensor_data = tensor->data<float>();
diff --git a/paddle/fluid/operators/optimizers/ftrl_op.h b/paddle/fluid/operators/optimizers/ftrl_op.h
index e15233c718a9a9983da689b417bfadb73d6ecac1..022076a178c3c42c08071c24bff498709d231351 100644
--- a/paddle/fluid/operators/optimizers/ftrl_op.h
+++ b/paddle/fluid/operators/optimizers/ftrl_op.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace paddle {
 namespace operators {
@@ -193,7 +193,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
 
       phi::SelectedRows tmp_merged_grad;
       phi::SelectedRows* merged_grad = &tmp_merged_grad;
-      math::scatter::MergeAdd<DeviceContext, T> merge_func;
+      phi::funcs::scatter::MergeAdd<DeviceContext, T> merge_func;
       merge_func(
           ctx.template device_context<DeviceContext>(), *grad, merged_grad);
 
diff --git a/paddle/fluid/operators/optimizers/momentum_op.h b/paddle/fluid/operators/optimizers/momentum_op.h
index 8ec20eb48ced08f2f9bf7a20e2eb62c1542845b5..847601777ebbc31002c8b224e1c823fb09d854ec 100644
--- a/paddle/fluid/operators/optimizers/momentum_op.h
+++ b/paddle/fluid/operators/optimizers/momentum_op.h
@@ -19,7 +19,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/kernels/funcs/algorithm.h"
diff --git a/paddle/phi/kernels/cpu/adagrad_kernel.cc b/paddle/phi/kernels/cpu/adagrad_kernel.cc
index d6867deff4c1567c42b0750d4586062fa2f16529..fd4c185ab519a050892f038c5a91f57064f74cdb 100644
--- a/paddle/phi/kernels/cpu/adagrad_kernel.cc
+++ b/paddle/phi/kernels/cpu/adagrad_kernel.cc
@@ -14,10 +14,10 @@
 
 #include "paddle/phi/kernels/adagrad_kernel.h"
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
 
 namespace phi {
@@ -38,7 +38,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
                   DenseTensor* param) {
     // 1. g_m.rows = set(g.rows)
     auto grad_width = grad.value().dims()[1];
-    paddle::operators::math::scatter::MergeAdd<phi::CPUContext, T> merge_func;
+    phi::funcs::scatter::MergeAdd<phi::CPUContext, T> merge_func;
     auto grad_merge = merge_func(context, grad);
     auto& merge_rows = grad_merge.rows();
     auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
@@ -47,8 +47,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
     auto grad_square =
         SquareSelectedRows<phi::CPUContext, T>(context, grad_merge);
 
-    paddle::operators::math::SelectedRowsAddToTensor<phi::CPUContext, T>
-        functor;
+    phi::funcs::SelectedRowsAddToTensor<phi::CPUContext, T> functor;
     functor(context, grad_square, moment);
 
     // 3. update parameter
diff --git a/paddle/phi/kernels/cpu/add_n_kernel.cc b/paddle/phi/kernels/cpu/add_n_kernel.cc
index 42532161053c93b532e2aa0567b9772d14c11696..54506ccd54f5b9cb2a45d1ad526f1d92ac06096c 100644
--- a/paddle/phi/kernels/cpu/add_n_kernel.cc
+++ b/paddle/phi/kernels/cpu/add_n_kernel.cc
@@ -53,7 +53,7 @@ void AddNKernel(const Context& dev_ctx,
     }
   }
 
-  paddle::operators::math::SelectedRowsAddToTensor<Context, T> functor;
+  phi::funcs::SelectedRowsAddToTensor<Context, T> functor;
   // If in_place, just skip the first tensor
   for (size_t i = start; i < in_num; i++) {
     if (DenseTensor::classof(x[i])) {
diff --git a/paddle/phi/kernels/funcs/CMakeLists.txt b/paddle/phi/kernels/funcs/CMakeLists.txt
index 122e4ba7feae40a3c371148d93762308c878b566..331d35398fd08ac1896a7c8d7ed2d39dbcce2767 100644
--- a/paddle/phi/kernels/funcs/CMakeLists.txt
+++ b/paddle/phi/kernels/funcs/CMakeLists.txt
@@ -38,3 +38,17 @@ else()
     math_library(fft DEPS dense_tensor pocketfft)
   endif()
 endif()
+
+if(WITH_MKLDNN)
+  math_library(
+    selected_rows_functor
+    DEPS
+    selected_rows_utils
+    math_function
+    blas
+    mkldnn_axpy_handler
+    mixed_vector)
+else()
+  math_library(selected_rows_functor DEPS selected_rows_utils math_function
+               blas mixed_vector)
+endif()
diff --git a/paddle/phi/kernels/funcs/lamb_functors.h b/paddle/phi/kernels/funcs/lamb_functors.h
index 5abc86bfb777c48189813a031880a76c05a8606b..4c7e436eb798a4bde8448ab343272893aec5cb6d 100644
--- a/paddle/phi/kernels/funcs/lamb_functors.h
+++ b/paddle/phi/kernels/funcs/lamb_functors.h
@@ -19,19 +19,19 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/fluid/memory/buffer.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/algorithm.h"
 #include "paddle/phi/kernels/funcs/eigen/extensions.h"
 #include "paddle/phi/kernels/funcs/for_range.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #include "paddle/phi/kernels/funcs/squared_l2_norm.h"
 #include "paddle/phi/kernels/funcs/tensor_to_string.h"
 
 namespace phi {
 
-namespace scatter = paddle::operators::math::scatter;
+namespace scatter = phi::funcs::scatter;
 
 template <typename T, bool IsMultiPrecision>
 struct LambMomentREGUpdateFunctor {
diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/phi/kernels/funcs/selected_rows_functor.cc
similarity index 80%
rename from paddle/fluid/operators/math/selected_rows_functor.cc
rename to paddle/phi/kernels/funcs/selected_rows_functor.cc
index 354af32beabee6cfd01d541ee7f351fc87ed85a0..f17ea6d951812bda124a30787020203924358100 100644
--- a/paddle/fluid/operators/math/selected_rows_functor.cc
+++ b/paddle/phi/kernels/funcs/selected_rows_functor.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 #include "paddle/fluid/framework/mixed_vector.h"
 #include "paddle/fluid/platform/device/device_wrapper.h"
@@ -21,9 +21,8 @@ limitations under the License. */
 #include "paddle/fluid/operators/mkldnn/axpy_handler.h"
 #endif
 
-namespace paddle {
-namespace operators {
-namespace math {
+namespace phi {
+namespace funcs {
 template <typename T>
 struct SelectedRowsAdd<phi::CPUContext, T> {
   void operator()(const phi::CPUContext& context,
@@ -34,11 +33,11 @@ struct SelectedRowsAdd<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_height,
         input2.height(),
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height  = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          input2.height()));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height  = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     input2.height()));
     output->set_height(in1_height);
 
     auto& in1_rows = input1.rows();
@@ -59,7 +58,7 @@ struct SelectedRowsAdd<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         in2_value.numel() / in2_rows.size(),
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs width must be equal."
             "But received first input width = [%d], second input width = [%d]",
             in1_row_numel,
@@ -67,42 +66,42 @@ struct SelectedRowsAdd<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         out_value->numel() / out_rows.size(),
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The input and oupput width must be equal."
             "But received input width = [%d], output width = [%d]",
             in1_row_numel,
             out_value->numel() / out_rows.size()));
 
     auto in1_place = input1.place();
-    PADDLE_ENFORCE_EQ(platform::is_cpu_place(in1_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_cpu_place(in1_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the CPU place."));
     auto in2_place = input2.place();
-    PADDLE_ENFORCE_EQ(platform::is_cpu_place(in2_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_cpu_place(in2_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the CPU place."));
     auto out_place = context.GetPlace();
-    PADDLE_ENFORCE_EQ(platform::is_cpu_place(out_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_cpu_place(out_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the CPU place."));
 
     auto* out_data = out_value->data<T>();
     auto* in1_data = in1_value.data<T>();
-    memory::Copy(out_place,
-                 out_data,
-                 in1_place,
-                 in1_data,
-                 in1_value.numel() * sizeof(T));
+    paddle::memory::Copy(out_place,
+                         out_data,
+                         in1_place,
+                         in1_data,
+                         in1_value.numel() * sizeof(T));
 
     auto* in2_data = in2_value.data<T>();
-    memory::Copy(out_place,
-                 out_data + in1_value.numel(),
-                 in2_place,
-                 in2_data,
-                 in2_value.numel() * sizeof(T));
+    paddle::memory::Copy(out_place,
+                         out_data + in1_value.numel(),
+                         in2_place,
+                         in2_data,
+                         in2_value.numel() * sizeof(T));
   }
 };
 
@@ -113,23 +112,23 @@ template <typename T>
 struct SelectedRowsAddTensor<phi::CPUContext, T> {
   void operator()(const phi::CPUContext& context,
                   const phi::SelectedRows& input1,
-                  const framework::Tensor& input2,
-                  framework::Tensor* output) {
+                  const phi::DenseTensor& input2,
+                  phi::DenseTensor* output) {
     auto in1_height = input1.height();
     const auto& in2_dims = input2.dims();
     const auto& out_dims = output->dims();
     PADDLE_ENFORCE_EQ(
         in1_height,
         in2_dims[0],
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          in2_dims[0]));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     in2_dims[0]));
     PADDLE_ENFORCE_EQ(
         in1_height,
         out_dims[0],
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The input and output height must be equal."
             "But received input height = [%d], output height = [%d]",
             in1_height,
@@ -142,7 +141,7 @@ struct SelectedRowsAddTensor<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         input2.numel() / in1_height,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs width must be equal."
             "But received first input width = [%d], second input width = [%d]",
             in1_row_numel,
@@ -150,7 +149,7 @@ struct SelectedRowsAddTensor<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         output->numel() / in1_height,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The input and output width must be equal."
             "But received input width = [%d], output width = [%d]",
             in1_row_numel,
@@ -169,8 +168,8 @@ struct SelectedRowsAddTensor<phi::CPUContext, T> {
       }
     }
 
-    auto out_eigen = framework::EigenVector<T>::Flatten(*output);
-    auto in2_eigen = framework::EigenVector<T>::Flatten(input2);
+    auto out_eigen = EigenVector<T>::Flatten(*output);
+    auto in2_eigen = EigenVector<T>::Flatten(input2);
     out_eigen.device(*context.eigen_device()) = out_eigen + in2_eigen;
   }
 };
@@ -188,11 +187,11 @@ struct SelectedRowsAddTo<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_height,
         input2->height(),
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          input2->height()));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     input2->height()));
 
     auto& in1_rows = input1.rows();
     auto& in2_rows = *(input2->mutable_rows());
@@ -205,23 +204,23 @@ struct SelectedRowsAddTo<phi::CPUContext, T> {
     mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end());
 
     auto in1_place = input1.place();
-    PADDLE_ENFORCE_EQ(platform::is_cpu_place(in1_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_cpu_place(in1_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the CPU place."));
     auto in2_place = input2->place();
-    PADDLE_ENFORCE_EQ(platform::is_cpu_place(in2_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_cpu_place(in2_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the CPU place."));
 
     auto* in1_data = in1_value.data<T>();
     auto* in2_data = in2_value->data<T>();
-    memory::Copy(in2_place,
-                 in2_data + input2_offset,
-                 in1_place,
-                 in1_data,
-                 in1_value.numel() * sizeof(T));
+    paddle::memory::Copy(in2_place,
+                         in2_data + input2_offset,
+                         in1_place,
+                         in1_data,
+                         in1_value.numel() * sizeof(T));
   }
 };
 
@@ -244,7 +243,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> {
       auto in1_height = (*iter)->height();
       PADDLE_ENFORCE_EQ(in1_height,
                         input2->height(),
-                        platform::errors::InvalidArgument(
+                        phi::errors::InvalidArgument(
                             "The two inputs height must be equal."
                             "But received first input height = [%d], second "
                             "input height = [%d]",
@@ -255,7 +254,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> {
     std::vector<int64_t> in2_rows;
     in2_rows.reserve(in2_rows.size() + size);
     for (auto iter = input1.begin(); iter != input1.end(); ++iter) {
-      const framework::Vector<int64_t>& in_rows = (*iter)->rows();
+      const paddle::framework::Vector<int64_t>& in_rows = (*iter)->rows();
       in2_rows.insert(in2_rows.end(), in_rows.begin(), in_rows.end());
     }
     input2->set_rows(in2_rows);
@@ -280,7 +279,7 @@ template <typename T>
 struct SelectedRowsAddToTensor<phi::CPUContext, T> {
   void operator()(const phi::CPUContext& context,
                   const phi::SelectedRows& input1,
-                  framework::Tensor* input2) {
+                  phi::DenseTensor* input2) {
     if (UNLIKELY(input1.rows().size() == 0)) {
       LOG(WARNING) << "input selected rows is empty!";
       return;
@@ -290,11 +289,11 @@ struct SelectedRowsAddToTensor<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_height,
         in2_dims[0],
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          in2_dims[0]));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     in2_dims[0]));
 
     auto& in1_value = input1.value();
     auto& in1_rows = input1.rows();
@@ -303,7 +302,7 @@ struct SelectedRowsAddToTensor<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         input2->numel() / in1_height,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs width must be equal."
             "But received first input width = [%d], second input width = [%d]",
             in1_row_numel,
@@ -325,7 +324,7 @@ template struct SelectedRowsAddToTensor<phi::CPUContext, float>;
 template struct SelectedRowsAddToTensor<phi::CPUContext, double>;
 template struct SelectedRowsAddToTensor<phi::CPUContext, int>;
 template struct SelectedRowsAddToTensor<phi::CPUContext, int64_t>;
-template struct SelectedRowsAddToTensor<phi::CPUContext, platform::bfloat16>;
+template struct SelectedRowsAddToTensor<phi::CPUContext, phi::dtype::bfloat16>;
 // This is a separated namespace for manipulate SelectedRows typed
 // data. Like merge duplicated rows, adding two SelectedRows etc.
 //
@@ -355,7 +354,7 @@ typename std::enable_if<std::is_integral<T>::value>::type elementwise_add_to(
 }
 
 template <typename T, typename DeviceContext>
-typename std::enable_if<std::is_same<T, platform::bfloat16>::value>::type
+typename std::enable_if<std::is_same<T, phi::dtype::bfloat16>::value>::type
 add_sparse_inputs(const std::vector<const phi::SelectedRows*>& inputs,
                   const std::unordered_map<int64_t, size_t>& rows_to_id,
                   int64_t input_width,
@@ -372,7 +371,7 @@ add_sparse_inputs(const std::vector<const phi::SelectedRows*>& inputs,
     auto& input_rows = input->rows();
 
 #ifdef PADDLE_WITH_MKLDNN
-    OneDNNAXPYHandler<T> axpy_handler(input_width, T(1.f));
+    paddle::operators::OneDNNAXPYHandler<T> axpy_handler(input_width, T(1.f));
     for (size_t i = 0; i < input_rows.size(); i++) {
       size_t out_i = rows_to_id.at(input_rows[i]);
       axpy_handler(&input_data[i * input_width],
@@ -391,7 +390,7 @@ add_sparse_inputs(const std::vector<const phi::SelectedRows*>& inputs,
 }
 
 template <typename T, typename DeviceContext>
-typename std::enable_if<!std::is_same<T, platform::bfloat16>::value>::type
+typename std::enable_if<!std::is_same<T, phi::dtype::bfloat16>::value>::type
 add_sparse_inputs(const std::vector<const phi::SelectedRows*>& inputs,
                   const std::unordered_map<int64_t, size_t>& rows_to_id,
                   int64_t input_width,
@@ -463,15 +462,15 @@ struct MergeAddImpl {
       if (input->rows().size() == 0) {
         continue;
       }
-      PADDLE_ENFORCE_EQ(input_width,
-                        input->value().dims()[1],
-                        platform::errors::InvalidArgument(
-                            "All inputs should have same "
-                            "dimension except for the first one."));
-      PADDLE_ENFORCE_EQ(input_height,
-                        input->height(),
-                        platform::errors::InvalidArgument(
-                            "All inputs should have same height."));
+      PADDLE_ENFORCE_EQ(
+          input_width,
+          input->value().dims()[1],
+          phi::errors::InvalidArgument("All inputs should have same "
+                                       "dimension except for the first one."));
+      PADDLE_ENFORCE_EQ(
+          input_height,
+          input->height(),
+          phi::errors::InvalidArgument("All inputs should have same height."));
       row_num += input->rows().size();
       merged_row_set.insert(input->rows().begin(), input->rows().end());
     }
@@ -499,11 +498,11 @@ struct MergeAddImpl {
       for (auto* in : inputs) {
         auto* in_data = in->value().data<T>();
         auto in_numel = in->rows().size() * input_width;
-        memory::Copy(out_place,
-                     out_data + copied_numel,
-                     in_place,
-                     in_data,
-                     in_numel * sizeof(T));
+        paddle::memory::Copy(out_place,
+                             out_data + copied_numel,
+                             in_place,
+                             in_data,
+                             in_numel * sizeof(T));
         copied_numel += in_numel;
       }
     } else {
@@ -563,9 +562,9 @@ TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(float)
 TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(double)
 TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(int)
 TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(int64_t)
-TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(platform::bfloat16)
-TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(platform::complex<float>)
-TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(platform::complex<double>)
+TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(phi::dtype::bfloat16)
+TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(phi::dtype::complex<float>)
+TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(phi::dtype::complex<double>)
 
 #ifdef PADDLE_WITH_XPU
 template <typename T>
@@ -582,7 +581,7 @@ struct MergeAdd<phi::XPUContext, T> {
                   const phi::SelectedRows& input,
                   phi::SelectedRows* output,
                   const bool sorted_result = false) {
-    framework::Vector<int64_t> input_rows(input.rows());
+    paddle::framework::Vector<int64_t> input_rows(input.rows());
     if (input_rows.size() == 0) {
       return;
     }
@@ -612,16 +611,16 @@ struct MergeAdd<phi::XPUContext, T> {
     xpu::ctx_guard RAII_GUARD(context.x_context());
     int64_t* x_rows_data = RAII_GUARD.alloc_l3_or_gm<int64_t>(xm);
     int64_t* y_rows_data = RAII_GUARD.alloc_l3_or_gm<int64_t>(ym);
-    memory::Copy(context.GetPlace(),
-                 y_rows_data,
-                 platform::CPUPlace(),
-                 merge_rows.data(),
-                 ym * sizeof(int64_t));
-    memory::Copy(context.GetPlace(),
-                 x_rows_data,
-                 platform::CPUPlace(),
-                 input_rows.data(),
-                 xm * sizeof(int64_t));
+    paddle::memory::Copy(context.GetPlace(),
+                         y_rows_data,
+                         phi::CPUPlace(),
+                         merge_rows.data(),
+                         ym * sizeof(int64_t));
+    paddle::memory::Copy(context.GetPlace(),
+                         x_rows_data,
+                         phi::CPUPlace(),
+                         input_rows.data(),
+                         xm * sizeof(int64_t));
     int r = xpu::merge_dup_rows<T, int64_t>(context.x_context(),
                                             x_data,
                                             y_data,
@@ -661,15 +660,15 @@ struct MergeAdd<phi::XPUContext, T> {
       if (input->rows().size() == 0) {
         continue;
       }
-      PADDLE_ENFORCE_EQ(input_width,
-                        input->value().dims()[1],
-                        platform::errors::InvalidArgument(
-                            "All inputs should have same "
-                            "dimension except for the first one."));
-      PADDLE_ENFORCE_EQ(input_height,
-                        input->height(),
-                        platform::errors::InvalidArgument(
-                            "All inputs should have same height."));
+      PADDLE_ENFORCE_EQ(
+          input_width,
+          input->value().dims()[1],
+          phi::errors::InvalidArgument("All inputs should have same "
+                                       "dimension except for the first one."));
+      PADDLE_ENFORCE_EQ(
+          input_height,
+          input->height(),
+          phi::errors::InvalidArgument("All inputs should have same height."));
       row_num += input->rows().size();
       merged_row_set.insert(input->rows().begin(), input->rows().end());
     }
@@ -709,16 +708,16 @@ struct MergeAdd<phi::XPUContext, T> {
       xpu::ctx_guard RAII_GUARD(context.x_context());
       int64_t* x_rows_data = RAII_GUARD.alloc_l3_or_gm<int64_t>(xm);
       int64_t* y_rows_data = RAII_GUARD.alloc_l3_or_gm<int64_t>(ym);
-      memory::Copy(context.GetPlace(),
-                   y_rows_data,
-                   platform::CPUPlace(),
-                   merge_rows.data(),
-                   ym * sizeof(int64_t));
-      memory::Copy(context.GetPlace(),
-                   x_rows_data,
-                   platform::CPUPlace(),
-                   input_rows.data(),
-                   xm * sizeof(int64_t));
+      paddle::memory::Copy(context.GetPlace(),
+                           y_rows_data,
+                           phi::CPUPlace(),
+                           merge_rows.data(),
+                           ym * sizeof(int64_t));
+      paddle::memory::Copy(context.GetPlace(),
+                           x_rows_data,
+                           phi::CPUPlace(),
+                           input_rows.data(),
+                           xm * sizeof(int64_t));
       int r = xpu::merge_dup_rows<T, int64_t>(context.x_context(),
                                               x_data,
                                               y_data,
@@ -777,15 +776,15 @@ struct MergeAverage<phi::CPUContext, T> {
       if (input->rows().size() == 0) {
         continue;
       }
-      PADDLE_ENFORCE_EQ(input_width,
-                        input->value().dims()[1],
-                        platform::errors::InvalidArgument(
-                            "All inputs should have same "
-                            "dimension except for the first one."));
-      PADDLE_ENFORCE_EQ(input_height,
-                        input->height(),
-                        platform::errors::InvalidArgument(
-                            "All input should have same height."));
+      PADDLE_ENFORCE_EQ(
+          input_width,
+          input->value().dims()[1],
+          phi::errors::InvalidArgument("All inputs should have same "
+                                       "dimension except for the first one."));
+      PADDLE_ENFORCE_EQ(
+          input_height,
+          input->height(),
+          phi::errors::InvalidArgument("All input should have same height."));
       row_num += input->rows().size();
       merged_row_set.insert(input->rows().begin(), input->rows().end());
     }
@@ -851,17 +850,17 @@ struct UpdateToTensor<phi::CPUContext, T> {
   void operator()(const phi::CPUContext& context,
                   const ScatterOps& op,
                   const phi::SelectedRows& input1,
-                  framework::Tensor* input2) {
+                  phi::DenseTensor* input2) {
     auto in1_height = input1.height();
     const auto& in2_dims = input2->dims();
     PADDLE_ENFORCE_EQ(
         in1_height,
         in2_dims[0],
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          in2_dims[0]));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     in2_dims[0]));
 
     auto& in1_value = input1.value();
     auto& in1_rows = input1.rows();
@@ -870,7 +869,7 @@ struct UpdateToTensor<phi::CPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         input2->numel() / in1_height,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs width must be equal."
             "But received first input width = [%d], second input width = [%d]",
             in1_row_numel,
@@ -923,6 +922,5 @@ struct UpdateToTensor<phi::CPUContext, T> {
 };
 
 }  // namespace scatter
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/phi/kernels/funcs/selected_rows_functor.cu
similarity index 79%
rename from paddle/fluid/operators/math/selected_rows_functor.cu
rename to paddle/phi/kernels/funcs/selected_rows_functor.cu
index 7fa9dc27db9cd920e53dee8863e4730fb79bbc7f..c21402693b9c807903043c4b017e899b18fc5957 100644
--- a/paddle/fluid/operators/math/selected_rows_functor.cu
+++ b/paddle/phi/kernels/funcs/selected_rows_functor.cu
@@ -15,15 +15,14 @@ limitations under the License. */
 #include <set>
 #include <vector>
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
-#include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/common/bfloat16.h"
+#include "paddle/phi/common/float16.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
-namespace paddle {
-namespace operators {
-namespace math {
+namespace phi {
+namespace funcs {
 template <typename T>
 struct SelectedRowsAdd<phi::GPUContext, T> {
   void operator()(const phi::GPUContext& context,
@@ -34,14 +33,14 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_height,
         input2.height(),
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height  = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          input2.height()));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height  = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     input2.height()));
     output->set_height(in1_height);
 
-    framework::Vector<int64_t> in1_rows(input1.rows());
+    paddle::framework::Vector<int64_t> in1_rows(input1.rows());
     auto& in2_rows = input2.rows();
     std::vector<int64_t> out_rows;
     out_rows.reserve(in1_rows.size() + in2_rows.size());
@@ -59,7 +58,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         in2_value.numel() / in2_rows.size(),
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs width must be equal."
             "But received first input width = [%d], second input width = [%d]",
             in1_row_numel,
@@ -67,7 +66,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         out_value->numel() / out_rows.size(),
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The input and oupput width must be equal."
             "But received input width = [%d], output width = [%d]",
             in1_row_numel,
@@ -77,35 +76,35 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
     auto* in1_data = in1_value.data<T>();
 
     auto in1_place = input1.place();
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(in1_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(in1_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the GPU place."));
     auto in2_place = input2.place();
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(in2_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(in2_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the GPU place."));
     auto out_place = context.GetPlace();
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(out_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the GPU place."));
 
-    memory::Copy(out_place,
-                 out_data,
-                 in1_place,
-                 in1_data,
-                 in1_value.numel() * sizeof(T),
-                 context.stream());
+    paddle::memory::Copy(out_place,
+                         out_data,
+                         in1_place,
+                         in1_data,
+                         in1_value.numel() * sizeof(T),
+                         context.stream());
 
     auto* in2_data = in2_value.data<T>();
-    memory::Copy(out_place,
-                 out_data + in1_value.numel(),
-                 in2_place,
-                 in2_data,
-                 in2_value.numel() * sizeof(T),
-                 context.stream());
+    paddle::memory::Copy(out_place,
+                         out_data + in1_value.numel(),
+                         in2_place,
+                         in2_data,
+                         in2_value.numel() * sizeof(T),
+                         context.stream());
   }
 };
 
@@ -137,15 +136,15 @@ template <typename T>
 struct SelectedRowsAddTensor<phi::GPUContext, T> {
   void operator()(const phi::GPUContext& context,
                   const phi::SelectedRows& input1,
-                  const framework::Tensor& input2,
-                  framework::Tensor* output) {
+                  const phi::DenseTensor& input2,
+                  phi::DenseTensor* output) {
     auto in1_height = input1.height();
     auto in2_dims = input2.dims();
     auto out_dims = output->dims();
     PADDLE_ENFORCE_EQ(
         in1_height,
         in2_dims[0],
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs height must be equal."
             "But received first input height = [%d], first input height = [%d]",
             in1_height,
@@ -153,7 +152,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_height,
         out_dims[0],
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The input and output height must be equal."
             "But received input height = [%d], output height = [%d]",
             in1_height,
@@ -166,7 +165,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         input2.numel() / in1_height,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs width must be equal."
             "But received first input width = [%d], second input width = [%d]",
             in1_row_numel,
@@ -174,7 +173,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         output->numel() / in1_height,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The input and output width must be equal."
             "But received input width = [%d], output width = [%d]",
             in1_row_numel,
@@ -198,16 +197,16 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
             out_data,
             in1_row_numel);
 
-    auto out_eigen = framework::EigenVector<T>::Flatten(*output);
-    auto in2_eigen = framework::EigenVector<T>::Flatten(input2);
+    auto out_eigen = EigenVector<T>::Flatten(*output);
+    auto in2_eigen = EigenVector<T>::Flatten(input2);
     out_eigen.device(*context.eigen_device()) = out_eigen + in2_eigen;
   }
 };
 
 template struct SelectedRowsAddTensor<phi::GPUContext, float>;
 template struct SelectedRowsAddTensor<phi::GPUContext, double>;
-template struct SelectedRowsAdd<phi::GPUContext, platform::float16>;
-template struct SelectedRowsAddTensor<phi::GPUContext, platform::float16>;
+template struct SelectedRowsAdd<phi::GPUContext, phi::dtype::float16>;
+template struct SelectedRowsAddTensor<phi::GPUContext, phi::dtype::float16>;
 
 template <typename T>
 struct SelectedRowsAddTo<phi::GPUContext, T> {
@@ -219,11 +218,11 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_height,
         input2->height(),
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          input2->height()));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     input2->height()));
 
     auto& in1_rows = input1.rows();
     auto& in2_rows = *(input2->mutable_rows());
@@ -238,24 +237,24 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
     }
 
     auto in1_place = input1.place();
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(in1_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(in1_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the GPU place."));
     auto in2_place = input2->place();
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(in1_place),
+    PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(in1_place),
                       true,
-                      platform::errors::InvalidArgument(
+                      phi::errors::InvalidArgument(
                           "The running environment is not on the GPU place."));
 
     auto* in1_data = in1_value.data<T>();
     auto* in2_data = in2_value->data<T>();
-    memory::Copy(in2_place,
-                 in2_data + input2_offset,
-                 in1_place,
-                 in1_data,
-                 in1_value.numel() * sizeof(T),
-                 context.stream());
+    paddle::memory::Copy(in2_place,
+                         in2_data + input2_offset,
+                         in1_place,
+                         in1_data,
+                         in1_value.numel() * sizeof(T),
+                         context.stream());
   }
 };
 
@@ -263,7 +262,7 @@ template struct SelectedRowsAddTo<phi::GPUContext, float>;
 template struct SelectedRowsAddTo<phi::GPUContext, double>;
 template struct SelectedRowsAddTo<phi::GPUContext, int>;
 template struct SelectedRowsAddTo<phi::GPUContext, int64_t>;
-template struct SelectedRowsAddTo<phi::GPUContext, platform::float16>;
+template struct SelectedRowsAddTo<phi::GPUContext, phi::dtype::float16>;
 
 namespace {
 template <typename T, int block_size>
@@ -289,17 +288,17 @@ template <typename T>
 struct SelectedRowsAddToTensor<phi::GPUContext, T> {
   void operator()(const phi::GPUContext& context,
                   const phi::SelectedRows& input1,
-                  framework::Tensor* input2) {
+                  phi::DenseTensor* input2) {
     auto in1_height = input1.height();
     auto in2_dims = input2->dims();
     PADDLE_ENFORCE_EQ(
         in1_height,
         in2_dims[0],
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          in2_dims[0]));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     in2_dims[0]));
 
     auto& in1_value = input1.value();
     auto& in1_rows = input1.rows();
@@ -308,7 +307,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         input2->numel() / in1_height,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs width must be equal."
             "But received first input width = [%d], second input width = [%d]",
             in1_row_numel,
@@ -333,7 +332,7 @@ template struct SelectedRowsAddToTensor<phi::GPUContext, float>;
 template struct SelectedRowsAddToTensor<phi::GPUContext, double>;
 template struct SelectedRowsAddToTensor<phi::GPUContext, int>;
 template struct SelectedRowsAddToTensor<phi::GPUContext, int64_t>;
-template struct SelectedRowsAddToTensor<phi::GPUContext, platform::float16>;
+template struct SelectedRowsAddToTensor<phi::GPUContext, phi::dtype::float16>;
 
 namespace scatter {
 
@@ -379,7 +378,7 @@ struct MergeAddImpl {
                   const phi::SelectedRows& input,
                   phi::SelectedRows* output,
                   const bool sorted_result = false) {
-    framework::Vector<int64_t> input_rows(input.rows());
+    paddle::framework::Vector<int64_t> input_rows(input.rows());
     if (input_rows.size() == 0) {
       return;
     }
@@ -387,7 +386,7 @@ struct MergeAddImpl {
     phi::SelectedRows& out = *output;
     std::set<int64_t> row_set(input_rows.begin(), input_rows.end());
     std::vector<int64_t> merge_rows_cpu(row_set.begin(), row_set.end());
-    framework::Vector<int64_t> merge_rows(merge_rows_cpu);
+    paddle::framework::Vector<int64_t> merge_rows(merge_rows_cpu);
 
     auto input_width = input.value().dims()[1];
 
@@ -446,20 +445,20 @@ struct MergeAddImpl {
       if (input->rows().size() == 0) {
         continue;
       }
-      PADDLE_ENFORCE_EQ(input_width,
-                        input->value().dims()[1],
-                        platform::errors::InvalidArgument(
-                            "All input should have same "
-                            "dimension except for the first one."));
-      PADDLE_ENFORCE_EQ(input_height,
-                        input->height(),
-                        platform::errors::InvalidArgument(
-                            "All input should have same height."));
+      PADDLE_ENFORCE_EQ(
+          input_width,
+          input->value().dims()[1],
+          phi::errors::InvalidArgument("All input should have same "
+                                       "dimension except for the first one."));
+      PADDLE_ENFORCE_EQ(
+          input_height,
+          input->height(),
+          phi::errors::InvalidArgument("All input should have same height."));
       merged_row_set.insert(input->rows().begin(), input->rows().end());
     }
     std::vector<int64_t> merge_rows_cpu(merged_row_set.begin(),
                                         merged_row_set.end());
-    framework::Vector<int64_t> merge_rows(merge_rows_cpu);
+    paddle::framework::Vector<int64_t> merge_rows(merge_rows_cpu);
 
     out.set_rows(merge_rows);
     out.set_height(input_height);
@@ -530,10 +529,10 @@ TEMPLATE_SPECIALIZED_FOR_MERGEADD(float)
 TEMPLATE_SPECIALIZED_FOR_MERGEADD(double)
 TEMPLATE_SPECIALIZED_FOR_MERGEADD(int)
 TEMPLATE_SPECIALIZED_FOR_MERGEADD(int64_t)
-TEMPLATE_SPECIALIZED_FOR_MERGEADD(platform::float16)
-TEMPLATE_SPECIALIZED_FOR_MERGEADD(platform::bfloat16)
-TEMPLATE_SPECIALIZED_FOR_MERGEADD(platform::complex<float>)
-TEMPLATE_SPECIALIZED_FOR_MERGEADD(platform::complex<double>)
+TEMPLATE_SPECIALIZED_FOR_MERGEADD(phi::dtype::float16)
+TEMPLATE_SPECIALIZED_FOR_MERGEADD(phi::dtype::bfloat16)
+TEMPLATE_SPECIALIZED_FOR_MERGEADD(phi::dtype::complex<float>)
+TEMPLATE_SPECIALIZED_FOR_MERGEADD(phi::dtype::complex<double>)
 
 template <typename T, int block_size>
 __global__ void UpdateToTensorKernel(const T* selected_rows,
@@ -591,7 +590,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
   void operator()(const phi::GPUContext& context,
                   const ScatterOps& op,
                   const phi::SelectedRows& input1,
-                  framework::Tensor* input2) {
+                  DenseTensor* input2) {
     // NOTE: Use SelectedRowsAddToTensor for better performance
     //       no additional MergeAdd called.
     MergeAdd<phi::GPUContext, T> merge_func;
@@ -602,11 +601,11 @@ struct UpdateToTensor<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_height,
         in2_dims[0],
-        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But received first input height = "
-                                          "[%d], second input height = [%d]",
-                                          in1_height,
-                                          in2_dims[0]));
+        phi::errors::InvalidArgument("The two inputs height must be equal."
+                                     "But received first input height = "
+                                     "[%d], second input height = [%d]",
+                                     in1_height,
+                                     in2_dims[0]));
 
     auto& in1_value = merged_in1.value();
     auto& in1_rows = merged_in1.rows();
@@ -615,7 +614,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
     PADDLE_ENFORCE_EQ(
         in1_row_numel,
         input2->numel() / in1_height,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The two inputs width must be equal."
             "But received first input width = [%d], second input width = [%d]",
             in1_row_numel,
@@ -624,14 +623,13 @@ struct UpdateToTensor<phi::GPUContext, T> {
     auto* in1_data = in1_value.template data<T>();
     auto* in2_data = input2->data<T>();
 
-    dim3 threads(platform::PADDLE_CUDA_NUM_THREADS, 1);
+    dim3 threads(paddle::platform::PADDLE_CUDA_NUM_THREADS, 1);
     dim3 grid(in1_rows.size(), 1);
-    UpdateToTensorKernel<T, platform::PADDLE_CUDA_NUM_THREADS>
+    UpdateToTensorKernel<T, paddle::platform::PADDLE_CUDA_NUM_THREADS>
         <<<grid, threads, 0, context.stream()>>>(
             in1_data, in1_rows.cuda_data(), op, in2_data, in1_row_numel);
   }
 };
 }  // namespace scatter
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/fluid/operators/math/selected_rows_functor.h b/paddle/phi/kernels/funcs/selected_rows_functor.h
similarity index 89%
rename from paddle/fluid/operators/math/selected_rows_functor.h
rename to paddle/phi/kernels/funcs/selected_rows_functor.h
index cf64b5d77e5beb31cd115dbdb52c07ede53a501d..8fe55678195c67564bf8df946d13a7ded2aa99ee 100644
--- a/paddle/fluid/operators/math/selected_rows_functor.h
+++ b/paddle/phi/kernels/funcs/selected_rows_functor.h
@@ -16,19 +16,17 @@ limitations under the License. */
 #include <map>
 #include <vector>
 
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/selected_rows_utils.h"
-#include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 #define INLINE_FOR2(sizei, sizej)     \
   for (int64_t i = 0; i < sizei; i++) \
     for (int64_t j = 0; j < sizej; j++)
 
-namespace paddle {
-namespace operators {
-namespace math {
+namespace phi {
+namespace funcs {
 
 // SelectedRows + SelectedRows will simplely concat value and rows.
 // The real computation happens in dealing with LoDTensor.
@@ -44,8 +42,8 @@ template <typename DeviceContext, typename T>
 struct SelectedRowsAddTensor {
   void operator()(const DeviceContext& context,
                   const phi::SelectedRows& input1,
-                  const framework::Tensor& input2,
-                  framework::Tensor* output);
+                  const phi::DenseTensor& input2,
+                  phi::DenseTensor* output);
 };
 
 // input2 = input1 + input2
@@ -73,7 +71,7 @@ template <typename DeviceContext, typename T>
 struct SelectedRowsAddToTensor {
   void operator()(const DeviceContext& context,
                   const phi::SelectedRows& input1,
-                  framework::Tensor* input2);
+                  phi::DenseTensor* input2);
 };
 
 namespace scatter {
@@ -115,10 +113,9 @@ struct UpdateToTensor {
   void operator()(const DeviceContext& context,
                   const ScatterOps& op,
                   const phi::SelectedRows& input1,
-                  framework::Tensor* input2);
+                  phi::DenseTensor* input2);
 };
 
 }  // namespace scatter
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/phi/kernels/gpu/adagrad_kernel.cu b/paddle/phi/kernels/gpu/adagrad_kernel.cu
index 28db4d418b957868081e092c4fe340187bd7b28d..150b18bdbd60019eef4b46cfcceb44e9b99891c4 100644
--- a/paddle/phi/kernels/gpu/adagrad_kernel.cu
+++ b/paddle/phi/kernels/gpu/adagrad_kernel.cu
@@ -14,11 +14,11 @@
 
 #include "paddle/phi/kernels/adagrad_kernel.h"
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
 
 namespace phi {
@@ -85,7 +85,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
                   DenseTensor* param) {
     // 1. g_m.rows = set(g.rows)
     auto grad_width = grad.value().dims()[1];
-    paddle::operators::math::scatter::MergeAdd<phi::GPUContext, T> merge_func;
+    phi::funcs::scatter::MergeAdd<phi::GPUContext, T> merge_func;
     auto grad_merge = merge_func(context, grad);
     auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
     paddle::framework::Vector<int64_t> merge_rows(grad_merge.rows());
@@ -93,8 +93,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
     auto grad_square =
         SquareSelectedRows<phi::GPUContext, T>(context, grad_merge);
 
-    paddle::operators::math::SelectedRowsAddToTensor<phi::GPUContext, T>
-        functor;
+    phi::funcs::SelectedRowsAddToTensor<phi::GPUContext, T> functor;
     functor(context, grad_square, moment);
 
     // 3. update parameter
diff --git a/paddle/phi/kernels/gpu/adam_kernel.cu b/paddle/phi/kernels/gpu/adam_kernel.cu
index b20e8610fefaf22f671d5342b0cffddf507f46bb..d44f6d2800da5f3fa015394ca342a4e14eee1081 100644
--- a/paddle/phi/kernels/gpu/adam_kernel.cu
+++ b/paddle/phi/kernels/gpu/adam_kernel.cu
@@ -19,7 +19,6 @@
 #include <vector>
 
 #include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/common/float16.h"
diff --git a/paddle/phi/kernels/gpu/adamw_kernel.cu b/paddle/phi/kernels/gpu/adamw_kernel.cu
index 9ce4d229f10c625337e159b731de248d6943fe8b..9ddaacdd5cc6bb1fe14e7a4a4601ffbc1c944b5e 100644
--- a/paddle/phi/kernels/gpu/adamw_kernel.cu
+++ b/paddle/phi/kernels/gpu/adamw_kernel.cu
@@ -19,7 +19,6 @@
 #include <vector>
 
 #include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/common/float16.h"
@@ -27,6 +26,7 @@
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/adam_functors.h"
 #include "paddle/phi/kernels/funcs/for_range.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 template <typename T, typename MT>
diff --git a/paddle/phi/kernels/impl/adagrad_kernel_impl.h b/paddle/phi/kernels/impl/adagrad_kernel_impl.h
index 1b64da5283c258e71622e4d458b0d0cf320d340a..ca349e8b426935ddf01fa14394f03b601ccb3e9b 100644
--- a/paddle/phi/kernels/impl/adagrad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/adagrad_kernel_impl.h
@@ -14,7 +14,6 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/kernels/adagrad_kernel.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
diff --git a/paddle/phi/kernels/impl/add_n_kernel_impl.h b/paddle/phi/kernels/impl/add_n_kernel_impl.h
index 654ef5efbd945510915db2c5208eebcf73fea065..bd575cb4ab3e885779a87a532a37230804a182cd 100644
--- a/paddle/phi/kernels/impl/add_n_kernel_impl.h
+++ b/paddle/phi/kernels/impl/add_n_kernel_impl.h
@@ -21,7 +21,7 @@
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/impl/clip_kernel_impl.h b/paddle/phi/kernels/impl/clip_kernel_impl.h
index dc916eb2af8190f2eb1dde9a357719f24ad4f32e..1b63abae525b69647bc9bdc359203a01d89f323e 100644
--- a/paddle/phi/kernels/impl/clip_kernel_impl.h
+++ b/paddle/phi/kernels/impl/clip_kernel_impl.h
@@ -14,7 +14,6 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/transform.h"
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/core/kernel_registry.h"
diff --git a/paddle/phi/kernels/impl/momentum_kernel_impl.h b/paddle/phi/kernels/impl/momentum_kernel_impl.h
index 93e5e957fd4dc65e5480042299a56b8cb9b7e15a..f3e22e5d944651eec57507146b643632adece22c 100644
--- a/paddle/phi/kernels/impl/momentum_kernel_impl.h
+++ b/paddle/phi/kernels/impl/momentum_kernel_impl.h
@@ -14,12 +14,12 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/kernels/funcs/algorithm.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/for_range.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #include "paddle/phi/kernels/momentum_kernel.h"
 
 namespace phi {
@@ -547,7 +547,7 @@ void MomentumSparseImpl(const Context& ctx,
 
   phi::SelectedRows tmp_merged_grad;
   phi::SelectedRows* merged_grad = &tmp_merged_grad;
-  paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+  phi::funcs::scatter::MergeAdd<Context, T> merge_func;
   merge_func(ctx, grad, merged_grad);
 
   auto* grad_merge_rows = merged_grad->mutable_rows();
diff --git a/paddle/phi/kernels/impl/rmsprop_kernel_impl.h b/paddle/phi/kernels/impl/rmsprop_kernel_impl.h
index a01d4ba3aea3964c7db244f81c502676600a9311..f2a56ff6b8e08d715da76c85bc4349633933427d 100644
--- a/paddle/phi/kernels/impl/rmsprop_kernel_impl.h
+++ b/paddle/phi/kernels/impl/rmsprop_kernel_impl.h
@@ -16,10 +16,10 @@
 
 #include <math.h>
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/kernels/funcs/algorithm.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/for_range.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #include "paddle/phi/kernels/rmsprop_kernel.h"
 
 namespace phi {
@@ -304,7 +304,7 @@ void RmspropSparseKernel(const Context &ctx,
 
   phi::SelectedRows tmp_merged_grad;
   phi::SelectedRows *merged_grad = &tmp_merged_grad;
-  paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+  phi::funcs::scatter::MergeAdd<Context, T> merge_func;
   merge_func(ctx, grad, merged_grad);
 
   funcs::ForRange<Context> for_range(ctx, limit);
diff --git a/paddle/phi/kernels/selected_rows/clip_kernel.h b/paddle/phi/kernels/selected_rows/clip_kernel.h
index ec56d92c513ea25897f63dd31a25f574df8c6fbc..45c876e526794e1e2779c7bcea81d56162fae4e5 100644
--- a/paddle/phi/kernels/selected_rows/clip_kernel.h
+++ b/paddle/phi/kernels/selected_rows/clip_kernel.h
@@ -14,7 +14,6 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/common/scalar.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/device_context.h"
diff --git a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
index ba5d6feb48f58375ca166d3541bafbf0d3b4082e..2e7fe555feffc5deb29d8dcada7e9d4974ee5e12 100644
--- a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
@@ -16,11 +16,11 @@
 
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/framework/threadpool.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/adam_functors.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 namespace sr {
@@ -118,7 +118,7 @@ void AdamDenseParamSparseGradKernel(
   } else {
     // merge duplicated rows if any.
     // The rows of grad_merge have been sorted inside MergeAdd functor
-    paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+    phi::funcs::scatter::MergeAdd<Context, T> merge_func;
     merge_func(dev_ctx, grad, &tmp_grad_merge, true);
     grad_merge_ptr = &tmp_grad_merge;
   }
diff --git a/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
index 9aecbb8e99cd81d4ad6612aa0dc02d405bb98c35..9ac82443403679806592b386e63f5b8db5a4b18c 100644
--- a/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
+++ b/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
@@ -15,7 +15,6 @@
 #include "paddle/phi/kernels/selected_rows/adam_kernel.h"
 
 #include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/common/float16.h"
@@ -23,6 +22,7 @@
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/adam_functors.h"
 #include "paddle/phi/kernels/funcs/for_range.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 namespace sr {
@@ -191,7 +191,7 @@ void AdamDenseParamSparseGradKernel(
   } else {
     // merge duplicated rows if any.
     // The rows of grad_merge have been sorted inside MergeAdd functor
-    paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+    phi::funcs::scatter::MergeAdd<Context, T> merge_func;
     merge_func(dev_ctx, grad, &tmp_grad_merge, true);
     grad_merge_ptr = &tmp_grad_merge;
   }
diff --git a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
index e04784c2620d5d6367bca649f9b3445edc09a609..6dbea3a7ff3c599768d805fc11e41fb04b8da745 100644
--- a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
+++ b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
@@ -19,7 +19,6 @@
 #include <vector>
 
 #include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/common/float16.h"
@@ -27,6 +26,7 @@
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/adam_functors.h"
 #include "paddle/phi/kernels/funcs/for_range.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 namespace sr {
@@ -214,7 +214,7 @@ void AdamwDenseParamSparseGradKernel(
   } else {
     // merge duplicated rows if any.
     // The rows of grad_merge have been sorted inside MergeAdd functor
-    paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+    phi::funcs::scatter::MergeAdd<Context, T> merge_func;
     merge_func(dev_ctx, grad, &tmp_grad_merge, true);
     grad_merge_ptr = &tmp_grad_merge;
   }
diff --git a/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
index d5bd9f2b2c7c6e5cd4a79cbf879463a3a4156ad1..3fd42fb53b5f76306b83a7c2a394e5390f0f509f 100644
--- a/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
+++ b/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
@@ -16,10 +16,10 @@
 
 #include "paddle/phi/kernels/selected_rows/add_n_kernel.h"
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 namespace sr {
@@ -73,7 +73,7 @@ void AddNKernel(const Context &dev_ctx,
     }
   }
   if (has_data) {
-    paddle::operators::math::scatter::MergeAdd<Context, T> merge_add;
+    phi::funcs::scatter::MergeAdd<Context, T> merge_add;
     merge_add(dev_ctx, inputs, out);
 
     out->SyncIndex();
diff --git a/paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
index 5d79393a32d66df3d44a7d79187bda4288cec1e6..802efd6480dbfbaf8748664f0ac75d2cc96290fc 100644
--- a/paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
+++ b/paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
@@ -14,11 +14,11 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/device_context.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/phi/kernels/clip_by_norm_kernel.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
 
 namespace phi {
@@ -30,7 +30,7 @@ void ClipByNormKernel(const Context& dev_ctx,
                       float max_norm,
                       SelectedRows* out) {
   phi::SelectedRows merged_input;
-  paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+  phi::funcs::scatter::MergeAdd<Context, T> merge_func;
   merge_func(dev_ctx, x, &merged_input);
   auto input = &(merged_input.value());
   out->set_rows(merged_input.rows());
diff --git a/paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
index c39d38674677327879b41a0f01838551baa3c291..10471f91b9a27aabe3c958be4017b1c50c003d8f 100644
--- a/paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
+++ b/paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
@@ -14,11 +14,11 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/common/scalar.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/device_context.h"
 #include "paddle/phi/core/selected_rows.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #include "paddle/phi/kernels/selected_rows/clip_kernel.h"
 
 namespace phi {
@@ -45,7 +45,7 @@ void ClipSparseKernel(const Context& dev_ctx,
                     out,
                     errors::InvalidArgument("Inplace clip is not allowed "
                                             "when x is SelectedRows"));
-  paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+  phi::funcs::scatter::MergeAdd<Context, T> merge_func;
   merge_func(dev_ctx, x, out);
   auto* out_tensor = out->mutable_value();
   auto* out_data = out_tensor->data<T>();
diff --git a/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
index 5623d0dbdbe6967744124c338303f1e342d3679c..cac9ef797215f44f468ffe566c4d6befa8845e84 100644
--- a/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
+++ b/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
@@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/phi/kernels/funcs/lamb_functors.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 namespace sr {
@@ -212,7 +212,7 @@ void ComputeRowImpl(const Context& dev_ctx,
   } else {
     // merge duplicated rows if any.
     // The rows of grad_merge have been sorted inside MergeAdd functor
-    paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+    phi::funcs::scatter::MergeAdd<Context, T> merge_func;
     merge_func(dev_ctx, grad, &tmp_grad_merge, true);
     grad_merge_ptr = &tmp_grad_merge;
   }
diff --git a/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc b/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
index 7bbecdbb4a8aeedba2094a5d87e2e2c3ad5ee1b9..a5d2e6678731686cedbd8fa6d5309bed47eae537 100644
--- a/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
@@ -18,7 +18,7 @@
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 namespace sr {
@@ -27,7 +27,7 @@ template <typename T, typename Context>
 void MergeSelectedRowsKernel(const Context& dev_ctx,
                              const SelectedRows& x,
                              SelectedRows* out) {
-  paddle::operators::math::scatter::MergeAdd<Context, T> merge_func;
+  phi::funcs::scatter::MergeAdd<Context, T> merge_func;
   merge_func(dev_ctx, x, out);
 }
 
diff --git a/paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc b/paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
index c9cd5f563fcc51e41af3b89e972c036674870930..d94751f749dac34b131441cec2854d7b9c30f398 100644
--- a/paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
@@ -19,7 +19,7 @@
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/adam_functors.h"
 // See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/operators/math/selected_rows_functor.h"
+#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 namespace phi {
 namespace sr {
@@ -181,7 +181,7 @@ void AdamDenseParamSparseGradKernel(
   if (is_strict_sorted) {
     grad_merge_ptr = &grad;
   } else {
-    paddle::operators::math::scatter::MergeAdd<Context, float> merge_func;
+    phi::funcs::scatter::MergeAdd<Context, float> merge_func;
     merge_func(dev_ctx, grad, &tmp_grad_merge, true);
 
     xpu_wait(dev_ctx.x_context()->xpu_stream);