[PHI decoupling] move cross_entropy from fluid to phi (#48160)

* move cross_entropy from fluid to phi * replace mutable_data with Alloc * use .template

[PHI decoupling] move cross_entropy from fluid to phi (#48160)
* move cross_entropy from fluid to phi * replace mutable_data with Alloc * use .template
3501ff7d · huangjiyi · GitHub · 88410225 · 3501ff7d · 3501ff7d
12 changed file
--- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu
+++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu
@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h"
-#include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/operators/math/softmax_impl.h"
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #include "paddle/fluid/string/string_helper.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/kernels/funcs/axis_utils.h"
+#include "paddle/phi/kernels/funcs/cross_entropy.h"
 namespace paddle {
 namespace operators {
@@ -237,9 +237,9 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
    auto eigen_predicted_logits = math::EigenMatrix<T>::From(predicted_logits);
    eigen_loss.device(*dev_ctx.eigen_device()) =
-        (eigen_sum_exp_logits.log().unaryExpr(math::TolerableValue<T>()) -
+        (eigen_sum_exp_logits.log().unaryExpr(phi::funcs::TolerableValue<T>()) -
         eigen_predicted_logits)
-            .unaryExpr(math::TolerableValue<T>());
+            .unaryExpr(phi::funcs::TolerableValue<T>());
    eigen_softmax.device(*dev_ctx.eigen_device()) =
        (eigen_softmax *
@@ -372,9 +372,9 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
    auto eigen_predicted_logits = math::EigenMatrix<T>::From(predicted_logits);
    eigen_loss.device(*dev_ctx.eigen_device()) =
-        (eigen_sum_exp_logits.log().unaryExpr(math::TolerableValue<T>()) -
+        (eigen_sum_exp_logits.log().unaryExpr(phi::funcs::TolerableValue<T>()) -
         eigen_predicted_logits)
-            .unaryExpr(math::TolerableValue<T>());
+            .unaryExpr(phi::funcs::TolerableValue<T>());
    eigen_softmax.device(*dev_ctx.eigen_device()) =
        (eigen_softmax *

--- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h
+++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h
@@ -22,9 +22,9 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/operators/math/softmax.h"
 #include "paddle/phi/api/include/tensor.h"
+#include "paddle/phi/kernels/funcs/cross_entropy.h"
 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/cross_entropy_op.h
+++ b/paddle/fluid/operators/cross_entropy_op.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/kernels/funcs/cross_entropy.h"
 #include "paddle/phi/kernels/funcs/math.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
@@ -51,7 +51,7 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
    }
    int axis_dim = x->dims()[rank - 1];
-    math::CrossEntropyFunctor<DeviceContext, T>()(
+    phi::funcs::CrossEntropyFunctor<DeviceContext, T>()(
        ctx.template device_context<DeviceContext>(),
        &y_2d,
        &x_2d,
@@ -190,7 +190,7 @@ struct HardLabelCrossEntropyForwardFunctor {
                     label);
      auto match_x = x_[idx * feature_size_ + label];
-      y_[idx] = -math::TolerableValue<T>()(phi::funcs::real_log(match_x));
+      y_[idx] = -phi::funcs::TolerableValue<T>()(phi::funcs::real_log(match_x));
      match_x_[idx] = match_x;
    } else {
      y_[idx] = 0;

--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -21,7 +21,6 @@ else()
  math_library(concat_and_split DEPS concat_and_split_functor)
 endif()
 math_library(context_project DEPS im2col math_function)
-math_library(cross_entropy)
 math_library(cos_sim_functor)
 math_library(depthwise_conv)
 math_library(im2col)

--- a/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc
+++ b/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc
@@ -16,10 +16,10 @@ limitations under the License. */
 #include <string>
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/operators/math/softmax.h"
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
 #include "paddle/phi/kernels/funcs/axis_utils.h"
+#include "paddle/phi/kernels/funcs/cross_entropy.h"
 namespace paddle {
 namespace operators {

--- a/paddle/phi/kernels/cpu/cross_entropy_kernel.cc
+++ b/paddle/phi/kernels/cpu/cross_entropy_kernel.cc
@@ -14,11 +14,11 @@ limitations under the License. */
 #include "paddle/phi/kernels/cross_entropy_kernel.h"
-#include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/axis_utils.h"
+#include "paddle/phi/kernels/funcs/cross_entropy.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/softmax_kernel.h"
@@ -64,7 +64,7 @@ void CrossEntropy(const CPUContext& dev_ctx,
  DenseTensor out_2d(*out);
  out_2d.Resize({n, d / axis_dim});
-  paddle::operators::math::CrossEntropyFunctor<CPUContext, T>()(
+  phi::funcs::CrossEntropyFunctor<CPUContext, T>()(
      dev_ctx, &out_2d, &x_2d, &label_2d, soft_label, ignore_index, axis_dim);
 }

--- a/paddle/phi/kernels/funcs/CMakeLists.txt
+++ b/paddle/phi/kernels/funcs/CMakeLists.txt
@@ -16,6 +16,7 @@ math_library(pooling DEPS dense_tensor)
 math_library(segment_pooling)
 math_library(sequence2batch)
 math_library(matrix_solve DEPS dense_tensor eigen3 blas math_function)
+math_library(cross_entropy)
 cc_library(
  phi_data_layout_transform

--- a/paddle/fluid/operators/math/cross_entropy.cc
+++ b/paddle/fluid/operators/math/cross_entropy.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,20 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/operators/math/cross_entropy.h"
+#include "paddle/phi/kernels/funcs/cross_entropy.h"
-#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/utils/data_type.h"
-namespace paddle {
+namespace phi {
-namespace operators {
+namespace funcs {
-namespace math {
 using Tensor = phi::DenseTensor;
 template <typename T,
          int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
-using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
+using EigenMatrix = phi::EigenMatrix<T, MajorType, IndexType>;
 template <typename T>
 struct HardLabelCrossEntropyCPUFunctorImpl {
@@ -54,17 +53,17 @@ struct HardLabelCrossEntropyCPUFunctorImpl {
      for (int j = 0; j < num_remain; j++) {
        int lbl = static_cast<int>(label_data[i * num_remain + j]);
        if (lbl != ignore_index_) {
-          PADDLE_ENFORCE_GE(lbl,
+          PADDLE_ENFORCE_GE(
-                            0,
+              lbl,
-                            platform::errors::OutOfRange(
+              0,
-                                "label value should >= 0 when label "
+              phi::errors::OutOfRange("label value should >= 0 when label "
-                                "value(%f) not equal to ignore_index(%f)",
+                                      "value(%f) not equal to ignore_index(%f)",
-                                lbl,
+                                      lbl,
-                                ignore_index_));
+                                      ignore_index_));
          PADDLE_ENFORCE_LT(
              lbl,
              axis_dim_,
-              platform::errors::OutOfRange(
+              phi::errors::OutOfRange(
                  "label value should less than the shape of axis dimension "
                  "when label value(%f) not equal to ignore_index(%f), But "
                  "received label value as %ld and shape of axis dimension "
@@ -79,7 +78,7 @@ struct HardLabelCrossEntropyCPUFunctorImpl {
        loss_data[loss_idx] =
            lbl == ignore_index_
                ? 0
-                : -math::TolerableValue<T>()(std::log(prob_data[index]));
+                : -phi::funcs::TolerableValue<T>()(std::log(prob_data[index]));
      }
    }
  }
@@ -112,19 +111,18 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
    auto loss = EigenMatrix<T>::From(*out);
    loss.device(*ctx.eigen_device()) =
-        -((lbl * in.log().unaryExpr(math::TolerableValue<T>()))
+        -((lbl * in.log().unaryExpr(phi::funcs::TolerableValue<T>()))
              .reshape(batch_axis_remain)
              .sum(Eigen::DSizes<int, 1>(1)));
  } else {
    HardLabelCrossEntropyCPUFunctorImpl<T> functor_impl(
        out, prob, labels, ignore_index, axis_dim);
-    framework::VisitIntDataType(framework::TransToProtoVarType(labels->dtype()),
+    phi::VisitDataType(labels->dtype(), functor_impl);
-                                functor_impl);
  }
 }
 template class CrossEntropyFunctor<phi::CPUContext, float>;
 template class CrossEntropyFunctor<phi::CPUContext, double>;
-}  // namespace math
-}  // namespace operators
+}  // namespace funcs
-}  // namespace paddle
+}  // namespace phi
--- a/paddle/fluid/operators/math/cross_entropy.cu
+++ b/paddle/fluid/operators/math/cross_entropy.cu
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,15 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/operators/math/cross_entropy.h"
+#include "paddle/phi/kernels/funcs/cross_entropy.h"
-#include "paddle/fluid/framework/convert_utils.h"
-#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/backends/gpu/gpu_device_function.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
+#include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/math.h"
-namespace paddle {
-namespace operators {
+namespace phi {
-namespace math {
+namespace funcs {
 template <typename T, typename LabelT>
 __global__ void CrossEntropyKernel(T* Y,
@@ -38,10 +39,9 @@ __global__ void CrossEntropyKernel(T* Y,
                   D,
                   ignore_index,
                   lbl);
-    Y[i] =
+    Y[i] = ignore_index == lbl ? static_cast<T>(0)
-        ignore_index == lbl
+                               : -phi::funcs::TolerableValue<T>()(
-            ? static_cast<T>(0)
+                                     phi::funcs::real_log(X[i * D + lbl]));
-            : -math::TolerableValue<T>()(phi::funcs::real_log(X[i * D + lbl]));
  }
 }
@@ -56,10 +56,11 @@ __global__ void SoftCrossEntropyKernel(T* Y,
  int idx = blockIdx.x * class_num + tid;
  int end = blockIdx.x * class_num + class_num;
  for (; idx < end; idx += blockDim.x) {
-    val += math::TolerableValue<T>()(phi::funcs::real_log(X[idx])) * label[idx];
+    val += phi::funcs::TolerableValue<T>()(phi::funcs::real_log(X[idx])) *
+           label[idx];
  }
-  val = paddle::platform::reduceSum(val, tid, blockDim.x);
+  val = phi::backends::gpu::reduceSum(val, tid, blockDim.x);
  if (threadIdx.x == 0) {
    Y[blockIdx.x] = -val;
  }
@@ -117,8 +118,8 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
    const bool softLabel,
    const int ignore_index,
    const int axis_dim) {
+  T* loss_data = ctx.template Alloc<T>(out);
  const T* prob_data = prob->data<T>();
-  T* loss_data = out->mutable_data<T>(ctx.GetPlace());
  int batch_size = prob->dims()[0];
  int class_num = prob->dims()[1];
@@ -145,8 +146,7 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
                                                    ignore_index,
                                                    kMaxBlockDim,
                                                    ctx.stream());
-    framework::VisitDataType(framework::TransToProtoVarType(labels->dtype()),
+    phi::VisitDataType(labels->dtype(), functor);
-                             functor);
  }
 }
@@ -154,6 +154,5 @@ template class CrossEntropyFunctor<phi::GPUContext, float>;
 template class CrossEntropyFunctor<phi::GPUContext, double>;
 template class CrossEntropyFunctor<phi::GPUContext, phi::dtype::float16>;
-}  // namespace math
+}  // namespace funcs
-}  // namespace operators
+}  // namespace phi
-}  // namespace paddle
--- a/paddle/fluid/operators/math/cross_entropy.h
+++ b/paddle/fluid/operators/math/cross_entropy.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,14 +15,13 @@ limitations under the License. */
 #pragma once
 #include <limits>
-#include "paddle/fluid/framework/eigen.h"
+#include "paddle/phi/common/float16.h"
-#include "paddle/fluid/framework/tensor.h"
+#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/fluid/platform/float16.h"
 #include "paddle/phi/core/hostdevice.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
-namespace paddle {
+namespace phi {
-namespace operators {
+namespace funcs {
-namespace math {
 template <typename T>
 struct TolerableValue {
@@ -46,14 +45,15 @@ struct TolerableValue {
 // Also. In standard implementation of cross entropy, other
 // framework not has the ValueClipping.
 template <>
-struct TolerableValue<platform::float16> {
+struct TolerableValue<phi::dtype::float16> {
-  HOSTDEVICE platform::float16 operator()(const platform::float16& x) const {
+  HOSTDEVICE phi::dtype::float16 operator()(
-    if (platform::isfinite(x))
+      const phi::dtype::float16& x) const {
+    if (phi::dtype::isfinite(x))
      return x;
-    else if (x > static_cast<platform::float16>(0))
+    else if (x > static_cast<phi::dtype::float16>(0))
-      return std::numeric_limits<platform::float16>::max();
+      return std::numeric_limits<phi::dtype::float16>::max();
    else
-      return std::numeric_limits<platform::float16>::min();
+      return std::numeric_limits<phi::dtype::float16>::min();
  }
 };
@@ -68,6 +68,5 @@ class CrossEntropyFunctor {
                  const int ignore_index,
                  const int axis_dim);
 };
-}  // namespace math
+}  // namespace funcs
-}  // namespace operators
+}  // namespace phi
-}  // namespace paddle
--- a/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu
@@ -22,7 +22,6 @@ limitations under the License. */
 namespace cub = hipcub;
 #endif
-#include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/operators/math/softmax.h"
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #include "paddle/phi/backends/gpu/gpu_device_function.h"

--- a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu
+++ b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu
@@ -22,7 +22,6 @@ limitations under the License. */
 namespace cub = hipcub;
 #endif
-#include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/operators/math/softmax.h"
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #include "paddle/phi/backends/gpu/gpu_device_function.h"
@@ -31,6 +30,7 @@ namespace cub = hipcub;
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/core/visit_type.h"
 #include "paddle/phi/kernels/funcs/axis_utils.h"
+#include "paddle/phi/kernels/funcs/cross_entropy.h"
 #include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h"
@@ -46,7 +46,7 @@ template <typename T>
 static __device__ __forceinline__ T Log(T x) {
  using AccT = typename dtype::MPTypeTrait<T>::Type;
  AccT logx = std::log(static_cast<AccT>(x));
-  return paddle::operators::math::TolerableValue<T>()(static_cast<T>(logx));
+  return phi::funcs::TolerableValue<T>()(static_cast<T>(logx));
 }
 // Wrapper of exp function. Use exp(float32) for float16
@@ -54,7 +54,7 @@ template <typename T>
 static __device__ __forceinline__ T Exp(T x) {
  using AccT = typename dtype::MPTypeTrait<T>::Type;
  AccT expx = std::exp(static_cast<AccT>(x));
-  return paddle::operators::math::TolerableValue<T>()(static_cast<T>(expx));
+  return phi::funcs::TolerableValue<T>()(static_cast<T>(expx));
 }
 template <typename Tx, typename Ty = Tx>
@@ -1285,16 +1285,15 @@ void CrossEntropyWithSoftmaxCUDAKernel(const GPUContext& dev_ctx,
    DenseTensor softmax_out_2d(*softmax_out);
    softmax_out_2d.Resize({n, d});
-    // math::CrossEntropyFunctor support axis is the last
+    // phi::funcs::CrossEntropyFunctor support axis is the last
    if (axis_v == -1) {
-      paddle::operators::math::CrossEntropyFunctor<GPUContext, T>()(
+      phi::funcs::CrossEntropyFunctor<GPUContext, T>()(dev_ctx,
-          dev_ctx,
+                                                       &loss_2d,
-          &loss_2d,
+                                                       &softmax_2d,
-          &softmax_2d,
+                                                       &labels_2d,
-          &labels_2d,
+                                                       soft_label,
-          soft_label,
+                                                       ignore_index,
-          ignore_index,
+                                                       axis_dim);
-          axis_dim);
      return;
    }
@@ -1389,14 +1388,13 @@ void CrossEntropyWithSoftmaxCUDAKernel(const GPUContext& dev_ctx,
      loss_2d.Resize({n, 1});
      paddle::operators::math::SoftmaxCUDNNFunctor<T, GPUContext>()(
          dev_ctx, &logits_2d, &softmax_2d);
-      paddle::operators::math::CrossEntropyFunctor<GPUContext, T>()(
+      phi::funcs::CrossEntropyFunctor<GPUContext, T>()(dev_ctx,
-          dev_ctx,
+                                                       &loss_2d,
-          &loss_2d,
+                                                       &softmax_2d,
-          &softmax_2d,
+                                                       &labels_2d,
-          &labels_2d,
+                                                       false,
-          false,
+                                                       ignore_index,
-          ignore_index,
+                                                       axis_dim);
-          axis_dim);
    } else {
      auto* logits_data = logits.data<T>();
      auto* labels_data = label.data<LabelT>();