From 5e222dc27871a8b85247460082e30496dfa8e745 Mon Sep 17 00:00:00 2001
From: huangjiyi <43315610+huangjiyi@users.noreply.github.com>
Date: Mon, 19 Dec 2022 13:46:52 +0800
Subject: [PATCH] [PHI Decoupling] move maxouting and matrix_bit_code from
 fluid to phi (#49131)

* move maxouting from fluid to phi

* move matrix_bit_code from fluid to phi

* replace mutable_data and fix include

* fix include

* move gather_scatter_kernel from fluid to phi

* Revert "move gather_scatter_kernel from fluid to phi"

This reverts commit 3d0b1eaf179656072e8c483dfca688cccccdda01.
---
 paddle/fluid/operators/math/CMakeLists.txt    |  2 --
 paddle/phi/kernels/cpu/hsigmoid_loss_grad.h   | 11 +++++-----
 .../phi/kernels/cpu/hsigmoid_loss_kernel.cc   | 15 ++++++-------
 paddle/phi/kernels/funcs/CMakeLists.txt       |  2 ++
 .../kernels/funcs}/matrix_bit_code.cc         | 18 +++++++++------
 .../kernels/funcs}/matrix_bit_code.h          | 22 ++++++-------------
 .../math => phi/kernels/funcs}/maxouting.cc   | 17 ++++++--------
 .../math => phi/kernels/funcs}/maxouting.cu   | 18 +++++++--------
 .../math => phi/kernels/funcs}/maxouting.h    | 16 +++++---------
 .../kernels/impl/maxout_grad_kernel_impl.h    |  4 ++--
 paddle/phi/kernels/impl/maxout_kernel_impl.h  |  4 ++--
 11 files changed, 58 insertions(+), 71 deletions(-)
 rename paddle/{fluid/operators/math => phi/kernels/funcs}/matrix_bit_code.cc (97%)
 rename paddle/{fluid/operators/math => phi/kernels/funcs}/matrix_bit_code.h (94%)
 rename paddle/{fluid/operators/math => phi/kernels/funcs}/maxouting.cc (93%)
 rename paddle/{fluid/operators/math => phi/kernels/funcs}/maxouting.cu (95%)
 rename paddle/{fluid/operators/math => phi/kernels/funcs}/maxouting.h (81%)
diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt
index 3d5c7bfb4e..9b1f3cb9b6 100644
--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -27,7 +27,6 @@ math_library(sample_prob)
 math_library(sampler DEPS generator)
 
 # math_library(math_function DEPS blas dense_tensor tensor)
-math_library(maxouting)
 
 math_library(sequence_padding)
 math_library(sequence_pooling DEPS math_function jit_kernel_helper)
@@ -39,7 +38,6 @@ elseif(WITH_XPU)
 else()
   math_library(beam_search DEPS math_function)
 endif()
-math_library(matrix_bit_code)
 
 math_library(unpooling)
 math_library(prelu)
diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h b/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h
index 12960e305a..8c8b40c8d9 100644
--- a/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h
+++ b/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h
@@ -14,17 +14,16 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/matrix_bit_code.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/selected_rows.h"
+#include "paddle/phi/kernels/funcs/blas/blas.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/matrix_bit_code.h"
 
 namespace phi {
 
-namespace math = paddle::operators::math;
-
 template <typename T, typename Context>
 void HSigmoidLossGradKernelImpl(const Context& ctx,
                                 const DenseTensor& x,
@@ -55,12 +54,12 @@ void HSigmoidLossGradKernelImpl(const Context& ctx,
     is_custom = true;
   }
 
-  std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code;
+  std::unique_ptr<phi::funcs::MatrixBitCodeFunctor<T>> bit_code;
   if (!is_custom) {
-    bit_code.reset(new math::MatrixBitCodeFunctor<T>(
+    bit_code.reset(new phi::funcs::MatrixBitCodeFunctor<T>(
         num_classes, label.template data<int64_t>()));
   } else {
-    bit_code.reset(new math::MatrixBitCodeFunctor<T>(
+    bit_code.reset(new phi::funcs::MatrixBitCodeFunctor<T>(
         *(path.get_ptr()), *(code.get_ptr()), label.template data<int64_t>()));
   }
 
diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
index a6f10b4ff1..062aa1be24 100644
--- a/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
+++ b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc
@@ -14,19 +14,17 @@
 
 #include "paddle/phi/kernels/hsigmoid_loss_kernel.h"
 
-#include "paddle/fluid/operators/math/matrix_bit_code.h"
 #include "paddle/fluid/platform/transform.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
 #include "paddle/phi/kernels/funcs/math_function_impl.h"
+#include "paddle/phi/kernels/funcs/matrix_bit_code.h"
 #include "paddle/phi/kernels/impl/clip_kernel_impl.h"
 
 namespace phi {
 
-namespace math = paddle::operators::math;
-
 template <typename T, typename Context>
 void HSigmoidLossKernel(const Context& ctx,
                         const DenseTensor& x,
@@ -48,8 +46,9 @@ void HSigmoidLossKernel(const Context& ctx,
   if (path.get_ptr()) {
     is_custom = true;
   }
-  int64_t code_length = path.get_ptr() ? path.get_ptr()->dims()[1]
-                                       : math::FindLastSet(num_classes_st - 1);
+  int64_t code_length = path.get_ptr()
+                            ? path.get_ptr()->dims()[1]
+                            : phi::funcs::FindLastSet(num_classes_st - 1);
   int64_t batch_size = x.dims()[0];
   DenseTensor sum;
   pre_out->Resize(phi::make_ddim({batch_size, code_length}));
@@ -63,12 +62,12 @@ void HSigmoidLossKernel(const Context& ctx,
   auto& place = *ctx.eigen_device();
   funcs::RowwiseSum<Context, T> row_sum;
 
-  std::unique_ptr<math::MatrixBitCodeFunctor<T>> bit_code;
+  std::unique_ptr<phi::funcs::MatrixBitCodeFunctor<T>> bit_code;
   if (!is_custom) {
-    bit_code.reset(new math::MatrixBitCodeFunctor<T>(
+    bit_code.reset(new phi::funcs::MatrixBitCodeFunctor<T>(
         num_classes_st, label.template data<int64_t>()));
   } else {
-    bit_code.reset(new math::MatrixBitCodeFunctor<T>(
+    bit_code.reset(new phi::funcs::MatrixBitCodeFunctor<T>(
         *(path.get_ptr()), *(code.get_ptr()), label.template data<int64_t>()));
   }
 
diff --git a/paddle/phi/kernels/funcs/CMakeLists.txt b/paddle/phi/kernels/funcs/CMakeLists.txt
index efef150b56..0d2cfa150f 100644
--- a/paddle/phi/kernels/funcs/CMakeLists.txt
+++ b/paddle/phi/kernels/funcs/CMakeLists.txt
@@ -20,6 +20,8 @@ math_library(cross_entropy)
 math_library(im2col)
 math_library(vol2col)
 math_library(softmax DEPS math_function)
+math_library(maxouting)
+math_library(matrix_bit_code)
 
 cc_library(
   phi_data_layout_transform
diff --git a/paddle/fluid/operators/math/matrix_bit_code.cc b/paddle/phi/kernels/funcs/matrix_bit_code.cc
similarity index 97%
rename from paddle/fluid/operators/math/matrix_bit_code.cc
rename to paddle/phi/kernels/funcs/matrix_bit_code.cc
index aa2779c350..2fecb1c526 100644
--- a/paddle/fluid/operators/math/matrix_bit_code.cc
+++ b/paddle/phi/kernels/funcs/matrix_bit_code.cc
@@ -12,11 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/math/matrix_bit_code.h"
+#include "paddle/phi/kernels/funcs/matrix_bit_code.h"
 
-namespace paddle {
-namespace operators {
-namespace math {
+#include <map>
+#include <unordered_map>
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/kernels/funcs/blas/blas.h"
+
+namespace phi {
+namespace funcs {
 
 template <typename T>
 struct MatrixBitCodeFunctorAdd {
@@ -354,6 +359,5 @@ void MatrixBitCodeFunctor<T>::Sub(phi::DenseTensor *tmat) {
 template class MatrixBitCodeFunctor<float>;
 template class MatrixBitCodeFunctor<double>;
 
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/fluid/operators/math/matrix_bit_code.h b/paddle/phi/kernels/funcs/matrix_bit_code.h
similarity index 94%
rename from paddle/fluid/operators/math/matrix_bit_code.h
rename to paddle/phi/kernels/funcs/matrix_bit_code.h
index eb232940b8..8d3335791e 100644
--- a/paddle/fluid/operators/math/matrix_bit_code.h
+++ b/paddle/phi/kernels/funcs/matrix_bit_code.h
@@ -13,18 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include <map>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
-#include "paddle/fluid/framework/eigen.h"
-#include "paddle/fluid/framework/lod_tensor.h"
-#include "paddle/fluid/framework/selected_rows_utils.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/platform/device_context.h"
-
-#include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/selected_rows.h"
+#include "paddle/utils/variant.h"
 
 #if defined(_WIN32)
 #include <intrin.h>
@@ -34,9 +28,8 @@ limitations under the License. */
 #include <windows.h>
 #endif  // _WIN32
 
-namespace paddle {
-namespace operators {
-namespace math {
+namespace phi {
+namespace funcs {
 /**
  * SimpleCodeTable class should support 3 functions:
  *
@@ -273,6 +266,5 @@ class MatrixBitCodeFunctor {
   const int64_t* ids_;
   CodeTable code_table_;
 };
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/fluid/operators/math/maxouting.cc b/paddle/phi/kernels/funcs/maxouting.cc
similarity index 93%
rename from paddle/fluid/operators/math/maxouting.cc
rename to paddle/phi/kernels/funcs/maxouting.cc
index 91ae7d472d..67a924128c 100644
--- a/paddle/fluid/operators/math/maxouting.cc
+++ b/paddle/phi/kernels/funcs/maxouting.cc
@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/math/maxouting.h"
+#include "paddle/phi/kernels/funcs/maxouting.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 
-namespace paddle {
-namespace operators {
-namespace math {
+namespace phi {
+namespace funcs {
 
 // All tensors are in NCHW or NHWC format, and the groups must be greater than 1
 template <typename DeviceContext, typename T>
@@ -35,7 +34,7 @@ void MaxOutFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
   // c_size means the output size of each sample
   int c_size = fea_size * output_channels;
   const T* input_data = input.data<T>();
-  T* output_data = output->mutable_data<T>(context.GetPlace());
+  T* output_data = context.template Alloc<T>(output);
   for (int i = 0; i < batch_size; ++i) {
     int new_bindex = c_size * i;
     for (int c = 0; c < output_channels; ++c) {
@@ -80,8 +79,7 @@ void MaxOutGradFunctor<DeviceContext, T>::operator()(
   const T* input_data = input.data<T>();
   const T* output_data = output.data<T>();
   const T* output_grad_data = output_grad.data<T>();
-  T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
-
+  T* input_grad_data = context.template Alloc<T>(input_grad);
   for (int i = 0; i < batch_size; ++i) {
     int blen = fea_size * output_channels * i;
     for (int c = 0; c < output_channels; ++c) {
@@ -114,6 +112,5 @@ template class MaxOutGradFunctor<phi::CPUContext, double>;
 template class MaxOutFunctor<phi::CPUContext, float>;
 template class MaxOutFunctor<phi::CPUContext, double>;
 
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/fluid/operators/math/maxouting.cu b/paddle/phi/kernels/funcs/maxouting.cu
similarity index 95%
rename from paddle/fluid/operators/math/maxouting.cu
rename to paddle/phi/kernels/funcs/maxouting.cu
index 9f1d228639..89450dbd5c 100644
--- a/paddle/fluid/operators/math/maxouting.cu
+++ b/paddle/phi/kernels/funcs/maxouting.cu
@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/math/maxouting.h"
+#include "paddle/phi/kernels/funcs/maxouting.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
 
-namespace paddle {
-namespace operators {
-namespace math {
+namespace phi {
+namespace funcs {
 
 template <typename T>
 __global__ void KernelMaxOut(const int nthreads,
@@ -57,6 +56,7 @@ __global__ void KernelMaxOut(const int nthreads,
     output_data[i] = ele;
   }
 }
+
 template <typename T>
 __global__ void KernelMaxoutGrad(const int nthreads,
                                  const T* input_data,
@@ -102,6 +102,7 @@ __global__ void KernelMaxoutGrad(const int nthreads,
     }
   }
 }
+
 /*
  * All tensors are in NCHW or NHWC format.
  */
@@ -118,7 +119,7 @@ void MaxOutFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
   const int output_channels = output->dims()[axis];
 
   const T* input_data = input.data<T>();
-  T* output_data = output->mutable_data<T>(context.GetPlace());
+  T* output_data = context.template Alloc<T>(output);
   int nthreads = output->numel();
   int blocks = (nthreads + 1024 - 1) / 1024;
   dim3 threads(1024, 1);
@@ -155,7 +156,7 @@ void MaxOutGradFunctor<DeviceContext, T>::operator()(
   const T* input_data = input.data<T>();
   const T* output_data = output.data<T>();
   const T* output_grad_data = output_grad.data<T>();
-  T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
+  T* input_grad_data = context.template Alloc<T>(input_grad);
   int nthreads = output.numel();
   int blocks = (nthreads + 1024 - 1) / 1024;
   dim3 threads(1024, 1);
@@ -179,6 +180,5 @@ template class MaxOutGradFunctor<phi::GPUContext, double>;
 template class MaxOutFunctor<phi::GPUContext, float>;
 template class MaxOutFunctor<phi::GPUContext, double>;
 
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/fluid/operators/math/maxouting.h b/paddle/phi/kernels/funcs/maxouting.h
similarity index 81%
rename from paddle/fluid/operators/math/maxouting.h
rename to paddle/phi/kernels/funcs/maxouting.h
index f42bbdb0e3..c6242318a3 100644
--- a/paddle/fluid/operators/math/maxouting.h
+++ b/paddle/phi/kernels/funcs/maxouting.h
@@ -13,14 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/macros.h"
-#include "paddle/phi/core/hostdevice.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/macros.h"
 
-namespace paddle {
-namespace operators {
-namespace math {
+namespace phi {
+namespace funcs {
 
 template <typename DeviceContext, typename T>
 class MaxOutFunctor {
@@ -43,6 +40,5 @@ class MaxOutGradFunctor {
                   const int groups,
                   const int axis = 1);
 };
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h b/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h
index 4d551b3d82..f5e3085463 100644
--- a/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h
@@ -14,8 +14,8 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/maxouting.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/maxouting.h"
 #include "paddle/phi/kernels/maxout_grad_kernel.h"
 
 namespace phi {
@@ -36,7 +36,7 @@ void MaxOutGradKernel(const Context& dev_ctx,
   if (x_grad) {
     dev_ctx.template Alloc<T>(x_grad);
     zero(dev_ctx, x_grad, static_cast<T>(0.0));
-    paddle::operators::math::MaxOutGradFunctor<Context, T> maxout_backward;
+    phi::funcs::MaxOutGradFunctor<Context, T> maxout_backward;
     maxout_backward(dev_ctx, x, x_grad, out, out_grad, groups, axis);
   }
 }
diff --git a/paddle/phi/kernels/impl/maxout_kernel_impl.h b/paddle/phi/kernels/impl/maxout_kernel_impl.h
index 529534d11c..2b1d044cda 100644
--- a/paddle/phi/kernels/impl/maxout_kernel_impl.h
+++ b/paddle/phi/kernels/impl/maxout_kernel_impl.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "paddle/fluid/operators/math/maxouting.h"
+#include "paddle/phi/kernels/funcs/maxouting.h"
 #include "paddle/phi/kernels/maxout_kernel.h"
 
 namespace phi {
@@ -29,7 +29,7 @@ void MaxOutKernel(const Context& dev_ctx,
     axis += x.dims().size();
   }
 
-  paddle::operators::math::MaxOutFunctor<Context, T> maxout_forward;
+  phi::funcs::MaxOutFunctor<Context, T> maxout_forward;
   maxout_forward(dev_ctx, x, out, groups, axis);
 }
 
-- 
GitLab