Fix split api bug (#45396)

* fix split bug * solve function redefine * fix fluid.layers.split and add unit test * delete splitInferMeta register in unary.cc * modify test_split_op GPU unit test * modify test_split_op GPU unit test place param * refactor split op and fix infershape bugs * add () in && and || * fix split C++ unit test * fix split infershape

Fix split api bug (#45396)
* fix split bug * solve function redefine * fix fluid.layers.split and add unit test * delete splitInferMeta register in unary.cc * modify test_split_op GPU unit test * modify test_split_op GPU unit test place param * refactor split op and fix infershape bugs * add () in && and || * fix split C++ unit test * fix split infershape
4a25b60d · Charles-hit · GitHub · df7600ab · 4a25b60d · 4a25b60d
16 changed file
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -22,16 +22,16 @@ import os
 ### Global Variables ###
 ########################
 ops_to_fill_zero_for_empty_grads = set([
-    "split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad",
-    "sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad",
-    "add_triple_grad", "multiply_grad", "multiply_double_grad",
-    "multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad",
-    "tanh_double_grad", "tanh_triple_grad", "subtract_double_grad",
-    "divide_double_grad", "log_double_grad", "elu_double_grad",
-    "leaky_relu_double_grad", "sqrt_double_grad", "rsqrt_double_grad",
-    "square_double_grad", "celu_double_grad", "pad_double_grad",
-    "pad3d_double_grad", "squeeze_double_grad", "unsqueeze_double_grad",
-    "instance_norm_double_grad", "conv3d_double_grad",
+    "split_grad", "split_with_num_grad", "rnn_grad", "matmul_double_grad",
+    "matmul_triple_grad", "sigmoid_double_grad", "sigmoid_triple_grad",
+    "add_double_grad", "add_triple_grad", "multiply_grad",
+    "multiply_double_grad", "multiply_triple_grad", "conv2d_grad_grad",
+    "batch_norm_double_grad", "tanh_double_grad", "tanh_triple_grad",
+    "subtract_double_grad", "divide_double_grad", "log_double_grad",
+    "elu_double_grad", "leaky_relu_double_grad", "sqrt_double_grad",
+    "rsqrt_double_grad", "square_double_grad", "celu_double_grad",
+    "pad_double_grad", "pad3d_double_grad", "squeeze_double_grad",
+    "unsqueeze_double_grad", "instance_norm_double_grad", "conv3d_double_grad",
    "depthwise_conv2d_grad_grad", "concat_double_grad", "expand_grad",
    "argsort_grad"
 ])

--- a/paddle/fluid/operators/split_op.cc
+++ b/paddle/fluid/operators/split_op.cc
@@ -21,7 +21,9 @@ limitations under the License. */

 namespace paddle {
 namespace operators {
+using framework::LoDTensor;
 using framework::Tensor;
+using framework::Variable;

 class SplitOp : public framework::OperatorWithKernel {
 public:
@@ -36,47 +38,72 @@ class SplitOp : public framework::OperatorWithKernel {
                      1UL,
                      platform::errors::InvalidArgument(
                          "Outputs(Out) of SplitOp should not be empty."));
-    auto in_dims = ctx->GetInputDim("X");
-    auto outs_names = ctx->Outputs("Out");
-    size_t axis = static_cast<size_t>(ctx->Attrs().Get<int>("axis"));
-    size_t num = static_cast<size_t>(ctx->Attrs().Get<int>("num"));
+    int axis = static_cast<int>(ctx->Attrs().Get<int>("axis"));
+    int num = static_cast<int>(ctx->Attrs().Get<int>("num"));
    std::vector<int> sections = static_cast<std::vector<int>>(
        ctx->Attrs().Get<std::vector<int>>("sections"));
-    const size_t outs_number = outs_names.size();
-
-    if (sections.size() > 0) {
-      PADDLE_ENFORCE_EQ(
-          sections.size(),
-          outs_number,
-          platform::errors::InvalidArgument("tensor split sections size "
-                                            "should be equal to output size."));
+    // Construct MetaTensor for InferMeta Func
+    using CompatMetaTensor = framework::CompatMetaTensor;
+    CompatMetaTensor x(ctx->GetInputVarPtrs("X")[0], ctx->IsRuntime());
+    std::vector<CompatMetaTensor> out;
+    size_t out_size = ctx->GetOutputVarPtrs("Out").size();
+    out.reserve(out_size);
+    for (size_t i = 0; i < out_size; i++) {
+      out.emplace_back(
+          CompatMetaTensor(ctx->GetOutputVarPtrs("Out")[i], ctx->IsRuntime()));
+    }
+    std::vector<phi::MetaTensor *> out_ptr(out_size);
+    for (size_t i = 0; i < out_size; i++) {
+      out_ptr[i] = &out[i];
+    }
+    phi::Scalar axis_final;
+    phi::IntArray sections_final;
+    // Construct axis_final
+    if (ctx->IsRuntime() && ctx->HasInput("AxisTensor")) {
+      Variable *var =
+          PADDLE_GET_CONST(Variable *, ctx->GetInputVarPtrs("AxisTensor")[0]);
+      axis_final = std::move(experimental::MakePhiScalarFromVar(*var));
+    } else if (!ctx->IsRuntime() && ctx->HasInput("AxisTensor")) {
+      axis_final = std::move(phi::Scalar(-1));
+      axis_final.SetFromTensor(true);
+    } else {
+      axis_final = std::move(phi::Scalar(axis));
    }

-    if (ctx->HasInput("AxisTensor")) {
-      auto out_dims = phi::make_ddim(std::vector<int>(in_dims.size(), -1));
-      std::vector<framework::DDim> outs_dims(outs_number, out_dims);
-      ctx->SetOutputsDim("Out", outs_dims);
-      for (size_t i = 0; i < outs_number; ++i) {
-        ctx->ShareLoD("X", "Out", 0, i);
+    // Construct sections_final
+    if (ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) {
+      int sections_tensor_list_size =
+          ctx->GetInputVarPtrs("SectionsTensorList").size();
+      const paddle::small_vector<framework::InferShapeVarPtr,
+                                 phi::kInputSmallVectorSize>
+          &sections_varptr_list = ctx->GetInputVarPtrs("SectionsTensorList");
+      std::vector<LoDTensor> sections_from_tensor;
+      sections_from_tensor.reserve(sections_tensor_list_size);
+      for (const auto &section_varptr : sections_varptr_list) {
+        Variable *var = PADDLE_GET_CONST(Variable *, section_varptr);
+        sections_from_tensor.emplace_back(var->Get<LoDTensor>());
      }
-      return;
+      sections_final = std::move(phi::IntArray(sections_from_tensor));
+    } else if (!ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) {
+      sections_final = std::move(phi::IntArray(std::vector<int>(
+          ctx->GetInputVarPtrs("SectionsTensorList").size(), -1)));
+      sections_final.SetFromTensor(true);
+    } else {
+      sections_final = std::move(phi::IntArray(sections));
    }
-
-    bool each_section_is_known =
-        (sections.size() > 0 && !ctx->HasInputs("SectionsTensorList"));
-
-    auto outs_dims = UpdateOutsDims(ctx->IsRuntime(),
-                                    each_section_is_known,
-                                    in_dims,
-                                    num,
-                                    sections,
-                                    axis,
-                                    outs_number);
-    ctx->SetOutputsDim("Out", outs_dims);
-    if (axis != 0) {
-      // Only pass LoD when not spliting along the first dim.
-      for (size_t i = 0; i < outs_number; ++i) {
-        ctx->ShareLoD("X", "Out", 0, i);
+    if (sections.size() > 0) {
+      if (ctx->IsRuntime()) {
+        phi::SplitInferMeta(
+            x, sections_final, axis_final, out_ptr, {true, false});
+      } else {
+        phi::SplitInferMeta(
+            x, sections_final, axis_final, out_ptr, {false, false});
+      }
+    } else {
+      if (ctx->IsRuntime()) {
+        phi::SplitWithNumInferMeta(x, num, axis_final, out_ptr, {true, false});
+      } else {
+        phi::SplitWithNumInferMeta(x, num, axis_final, out_ptr, {false, false});
      }
    }
  }

--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -2501,11 +2501,23 @@
  backward : spectral_norm_grad

 - api : split
-  args : (Tensor x, IntArray num_or_sections, Scalar(int) axis)
-  output : Tensor[]
-  invoke : split_impl(x, num_or_sections, axis)
+  args : (Tensor x, IntArray sections, Scalar(int) axis)
+  output : Tensor[]{sections.size()}
+  infer_meta :
+    func : SplitInferMeta
+  kernel :
+    func : split
  backward : split_grad

+- api : split_with_num
+  args : (Tensor x, int num, Scalar(int) axis)
+  output : Tensor[]{num}
+  infer_meta :
+    func : SplitWithNumInferMeta
+  kernel :
+    func : split_with_num
+  backward : split_with_num_grad
+
 - api : sqrt
  args : (Tensor x)
  output : Tensor(out)

--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -2271,6 +2271,12 @@
  args : (Tensor[] out_grad, Scalar axis = -1)
  output : Tensor(x_grad)
  invoke : concat( out_grad, axis)
+
+- backward_api : split_with_num_grad
+  forward : split_with_num (Tensor x, int num, Scalar axis) -> Tensor[](out)
+  args : (Tensor[] out_grad, Scalar axis = -1)
+  output : Tensor(x_grad)
+  invoke : concat( out_grad, axis)
 # TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future.

 - backward_api : sqrt_double_grad

--- a/paddle/phi/common/int_array.cc
+++ b/paddle/phi/common/int_array.cc
@@ -37,7 +37,6 @@ template <>
 IntArrayBase<phi::DenseTensor>::IntArrayBase(
    const std::vector<phi::DenseTensor>& tensor_list) {
  is_from_tensor_ = true;
-
  for (size_t i = 0; i < tensor_list.size(); ++i) {
    DataType data_type = tensor_list[i].dtype();
    switch (data_type) {

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -3084,81 +3084,122 @@ void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out) {
  out->share_lod(x);
 }

+int GetSplitAxisValue(const MetaTensor& x,
+                      const Scalar& axis,
+                      MetaConfig config) {
+  // Tensor has no value in static graph compile time
+  if (axis.FromTensor() && !config.is_runtime) {
+    return -1;
+  } else {
+    if (axis.dtype() == DataType::FLOAT32 ||
+        axis.dtype() == DataType::FLOAT64) {
+      PADDLE_THROW(
+          phi::errors::InvalidArgument("%s(): argument (position 3) must be "
+                                       "int, but got %s",
+                                       "split",
+                                       "float"));  // NOLINT
+    }
+    int axis_value = axis.to<int>();
+    int rank = x.dims().size();
+    PADDLE_ENFORCE_EQ(
+        axis_value >= -rank && axis_value < rank,
+        true,
+        phi::errors::InvalidArgument(
+            "The axis is expected to be in range of [%d, %d), but got %d",
+            -rank,
+            rank,
+            axis_value));
+    if (axis_value < 0) {
+      axis_value = axis_value + rank;
+    }
+    return axis_value;
+  }
+}
+
+void FillSplitOutDims(const MetaTensor& x,
+                      const int axis_value,
+                      const std::vector<int64_t>& sections_vec,
+                      std::vector<MetaTensor*>* out) {
+  std::vector<phi::DDim> out_dims(sections_vec.size(), x.dims());
+  if (x.dims().at(axis_value) > 0) {
+    for (size_t i = 0; i < sections_vec.size(); ++i) {
+      out_dims[i][axis_value] = sections_vec[i];
+    }
+  } else {
+    for (size_t i = 0; i < sections_vec.size(); ++i) {
+      out_dims[i][axis_value] = -1;
+    }
+  }
+  for (size_t i = 0; i < sections_vec.size(); ++i) {
+    if (axis_value != 0) {
+      // Only pass LoD when not spliting along the first dim.
+      (*out)[i]->set_dtype(x.dtype());
+      (*out)[i]->set_dims(out_dims[i]);
+      (*out)[i]->set_layout(x.layout());
+    } else {
+      (*out)[i]->set_dtype(x.dtype());
+      (*out)[i]->set_dims(out_dims[i]);
+      (*out)[i]->set_layout(x.layout());
+      (*out)[i]->share_lod(x);
+    }
+  }
+}
+
 void SplitInferMeta(const MetaTensor& x,
-                    const IntArray& num_or_sections,
+                    const IntArray& sections,
                    const Scalar& axis,
                    std::vector<MetaTensor*> out,
                    MetaConfig config) {
-  if (axis.dtype() == DataType::FLOAT32 || axis.dtype() == DataType::FLOAT64) {
-    PADDLE_THROW(
-        phi::errors::InvalidArgument("%s(): argument (position 3) must be "
-                                     "int, but got %s",
-                                     "split",
-                                     "float"));  // NOLINT
-  }
-  int axis_value = axis.to<int>();
-  int rank = x.dims().size();
-  PADDLE_ENFORCE_EQ(
-      axis_value >= -rank && axis_value < rank,
-      true,
-      phi::errors::InvalidArgument(
-          "The axis is expected to be in range of [%d, %d), but got %d",
-          -rank,
-          rank,
-          axis_value));
-  if (axis_value < 0) {
-    axis_value = axis_value + rank;
-  }
-
-  auto input_axis_dim = x.dims().at(axis_value);
-  auto num_or_sections_data = num_or_sections.GetData();
-  // step1: get formated sections
-  std::vector<int64_t> sections;
-  // num_or_sections is a number
-  if (num_or_sections_data.size() == 1 && num_or_sections_data[0] > 0) {
-    int num = num_or_sections_data.at(0);
-
-    PADDLE_ENFORCE_EQ(input_axis_dim % num,
-                      0,
-                      phi::errors::InvalidArgument(
-                          "The input's size along the split dimension "
-                          "must be evenly divisible by Attr(num_or_sections). "
-                          "But received Attr(num_or_sections) "
-                          "= %d, input(X)'s shape = [%s], Attr(dim) = %d.",
-                          num,
-                          x.dims(),
-                          axis_value));
-
-    for (int i = 0; i < num; ++i) {
-      sections.push_back(input_axis_dim / num);
+  // get axis value
+  int axis_value = GetSplitAxisValue(x, axis, config);
+
+  auto sections_data = sections.GetData();
+  // fill out dims with -1
+  if ((sections.FromTensor() && !config.is_runtime) || axis_value == -1 ||
+      (axis_value >= 0 && x.dims().at(axis_value) <= 0)) {
+    std::vector<phi::DDim> out_dims(
+        sections_data.size(),
+        phi::make_ddim(std::vector<int>(x.dims().size(), -1)));
+
+    for (size_t i = 0; i < sections_data.size(); ++i) {
+      if (axis_value != 0) {
+        // Only pass LoD when not spliting along the first dim.
+        out[i]->set_dtype(x.dtype());
+        out[i]->set_dims(out_dims[i]);
+        out[i]->set_layout(x.layout());
+      } else {
+        out[i]->set_dtype(x.dtype());
+        out[i]->set_dims(out_dims[i]);
+        out[i]->set_layout(x.layout());
+        out[i]->share_lod(x);
+      }
    }
  } else {
-    // num_or_sections is a sections
+    auto input_axis_dim = x.dims().at(axis_value);
+    std::vector<int64_t> sections_vec;
    const int unknow_dim_val = -1;
    int unknow_dim_idx = -1;
    int num_of_unknow = 0;
    int sum_of_section = 0;

-    for (size_t i = 0; i < num_or_sections_data.size(); ++i) {
-      sections.push_back(num_or_sections_data[i]);
+    for (size_t i = 0; i < sections_data.size(); ++i) {
+      sections_vec.push_back(sections_data[i]);

-      if (num_or_sections_data[i] == unknow_dim_val) {
+      if (sections_data[i] == unknow_dim_val) {
        num_of_unknow++;
        unknow_dim_idx = i;
      } else {
-        sum_of_section += num_or_sections_data[i];
+        sum_of_section += sections_data[i];
      }
    }

-    if (config.is_runtime) {
-      PADDLE_ENFORCE_LE(num_of_unknow,
-                        1,
-                        phi::errors::InvalidArgument(
-                            "Only one dimension value of Attr(num_or_sections) "
-                            "in SplitOp can be -1. "
-                            "But received Attr(num_or_sections) = [%s].",
-                            phi::make_ddim(num_or_sections_data)));
-    }
+    PADDLE_ENFORCE_LE(num_of_unknow,
+                      1,
+                      phi::errors::InvalidArgument(
+                          "Only one dimension value of Attr(num_or_sections) "
+                          "in SplitOp can be -1. "
+                          "But received Attr(num_or_sections) = [%s].",
+                          phi::make_ddim(sections_data)));

    if (unknow_dim_idx != -1) {
      // for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1].
@@ -3173,13 +3214,11 @@ void SplitInferMeta(const MetaTensor& x,
              "size "
              "along the split dimension. But received Attr(num_or_sections) "
              "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
-              phi::make_ddim(num_or_sections_data),
+              phi::make_ddim(sections_data),
              x.dims(),
              axis_value));

-      if (config.is_runtime) {
-        sections[unknow_dim_idx] = input_axis_dim - sum_of_section;
-      }
+      sections_vec[unknow_dim_idx] = input_axis_dim - sum_of_section;
    } else {
      PADDLE_ENFORCE_EQ(
          sum_of_section,
@@ -3189,36 +3228,59 @@ void SplitInferMeta(const MetaTensor& x,
              "size "
              "along the split dimension. But received Attr(num_or_sections)"
              " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
-              phi::make_ddim(num_or_sections_data),
+              phi::make_ddim(sections_data),
              x.dims(),
              axis_value));
    }
+    // fill out dims
+    FillSplitOutDims(x, axis_value, sections_vec, &out);
  }
+}
+
+void SplitWithNumInferMeta(const MetaTensor& x,
+                           int num,
+                           const Scalar& axis,
+                           std::vector<MetaTensor*> out,
+                           MetaConfig config) {
+  int axis_value = GetSplitAxisValue(x, axis, config);
+  // fill out dims with -1
+  if (axis_value == -1 || (axis_value >= 0 && x.dims().at(axis_value) <= 0)) {
+    std::vector<phi::DDim> out_dims(
+        num, phi::make_ddim(std::vector<int>(x.dims().size(), -1)));

-  // setp2: fill out dims
-  std::vector<phi::DDim> out_dims(sections.size(), x.dims());
-  if (config.is_runtime || input_axis_dim > 0) {
-    for (size_t i = 0; i < sections.size(); ++i) {
-      out_dims[i][axis_value] = sections[i];
+    for (int i = 0; i < num; ++i) {
+      if (axis_value != 0) {
+        // Only pass LoD when not spliting along the first dim.
+        out[i]->set_dtype(x.dtype());
+        out[i]->set_dims(out_dims[i]);
+        out[i]->set_layout(x.layout());
+      } else {
+        out[i]->set_dtype(x.dtype());
+        out[i]->set_dims(out_dims[i]);
+        out[i]->set_layout(x.layout());
+        out[i]->share_lod(x);
+      }
    }
  } else {
-    for (size_t i = 0; i < sections.size(); ++i) {
-      out_dims[i][axis_value] = -1;
-    }
-  }
+    auto input_axis_dim = x.dims().at(axis_value);
+    // step1: get formated sections
+    std::vector<int64_t> sections_vec;
+    PADDLE_ENFORCE_EQ(input_axis_dim % num,
+                      0,
+                      phi::errors::InvalidArgument(
+                          "The input's size along the split dimension "
+                          "must be evenly divisible by Attr(num_or_sections). "
+                          "But received Attr(num_or_sections) "
+                          "= %d, input(X)'s shape = [%s], Attr(dim) = %d.",
+                          num,
+                          x.dims(),
+                          axis_value));

-  for (size_t i = 0; i < sections.size(); ++i) {
-    if (axis_value != 0) {
-      // Only pass LoD when not spliting along the first dim.
-      out[i]->set_dtype(x.dtype());
-      out[i]->set_dims(out_dims[i]);
-      out[i]->set_layout(x.layout());
-    } else {
-      out[i]->set_dtype(x.dtype());
-      out[i]->set_dims(out_dims[i]);
-      out[i]->set_layout(x.layout());
-      out[i]->share_lod(x);
+    for (int i = 0; i < num; ++i) {
+      sections_vec.push_back(input_axis_dim / num);
    }
+    // setp2: fill out dims
+    FillSplitOutDims(x, axis_value, sections_vec, &out);
  }
 }

@@ -4623,4 +4685,3 @@ void FoldInferMeta(const MetaTensor& x,
 }  // namespace phi

 PD_REGISTER_INFER_META_FN(flatten, phi::FlattenInferMeta);
-PD_REGISTER_INFER_META_FN(split, phi::SplitInferMeta);
--- a/paddle/phi/infermeta/unary.h
+++ b/paddle/phi/infermeta/unary.h
@@ -452,12 +452,27 @@ void SliceRawInferMeta(const MetaTensor& input,

 void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out);

+int GetSplitAxisValue(const MetaTensor& x,
+                      const Scalar& axis,
+                      MetaConfig config);
+
+void FillSplitOutDims(const MetaTensor& x,
+                      const int axis_value,
+                      const std::vector<int64_t>& sections_vec,
+                      std::vector<MetaTensor*>* out);
+
 void SplitInferMeta(const MetaTensor& x_meta,
-                    const IntArray& num_or_sections,
+                    const IntArray& sections,
                    const Scalar& axis,
                    std::vector<MetaTensor*> out,
                    MetaConfig config = MetaConfig());

+void SplitWithNumInferMeta(const MetaTensor& x_meta,
+                           int num,
+                           const Scalar& axis,
+                           std::vector<MetaTensor*> out,
+                           MetaConfig config = MetaConfig());
+
 void SquaredL2NormInferMeta(const MetaTensor& x, MetaTensor* out);

 void SqueezeInferMeta(const MetaTensor& x,

--- a/paddle/phi/kernels/cpu/split_kernel.cc
+++ b/paddle/phi/kernels/cpu/split_kernel.cc
@@ -14,54 +14,9 @@

 #include "paddle/phi/kernels/split_kernel.h"

-#include "paddle/fluid/operators/strided_memcpy.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/infermeta/unary.h"
-#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
-namespace phi {
-
-template <typename T, typename Context>
-void SplitKernel(const Context& dev_ctx,
-                 const DenseTensor& x,
-                 const IntArray& num_or_sections,
-                 const Scalar& axis_scalar,
-                 std::vector<DenseTensor*> outs) {
-  // need to infershape output
-  if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) {
-    std::vector<MetaTensor> out_metas;
-    out_metas.reserve(outs.size());
-    std::vector<MetaTensor*> out_metas_ptr;
-    for (size_t i = 0; i < outs.size(); ++i) {
-      out_metas.push_back(outs[i]);
-      out_metas_ptr.push_back(&out_metas.back());
-    }
-
-    phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr);
-
-    for (size_t i = 0; i < out_metas.size(); ++i) {
-      outs[i]->Resize(out_metas[i].dims());
-    }
-  }
-
-  std::vector<const DenseTensor*> shape_refer;
-  for (size_t j = 0; j < outs.size(); ++j) {
-    dev_ctx.template Alloc<T>(outs[j]);
-    shape_refer.emplace_back(outs[j]);
-  }
-
-  int axis = axis_scalar.to<int>();
-  // Sometimes direct copies will be faster, this maybe need deeply analysis.
-  if (axis == 0 && outs.size() < 10) {
-    paddle::operators::StridedMemcpyWithAxis0<T>(
-        dev_ctx, x, shape_refer, &outs);
-  } else {
-    phi::funcs::SplitFunctor<Context, T> functor;
-    functor(dev_ctx, x, shape_refer, axis, &outs);
-  }
-}
-
-}  // namespace phi
+#include "paddle/phi/kernels/impl/split_kernel_impl.h"

 PD_REGISTER_KERNEL(split,
                   CPU,
@@ -76,3 +31,17 @@ PD_REGISTER_KERNEL(split,
                   int8_t,
                   phi::dtype::float16,
                   phi::dtype::bfloat16) {}
+
+PD_REGISTER_KERNEL(split_with_num,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::SplitWithNumKernel,
+                   float,
+                   double,
+                   int64_t,
+                   int,
+                   bool,
+                   uint8_t,
+                   int8_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
--- a/paddle/phi/kernels/gpu/split_kernel.cu
+++ b/paddle/phi/kernels/gpu/split_kernel.cu
@@ -14,53 +14,9 @@

 #include "paddle/phi/kernels/split_kernel.h"

-#include "paddle/fluid/operators/strided_memcpy.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
-namespace phi {
-
-template <typename T, typename Context>
-void SplitKernel(const Context& dev_ctx,
-                 const DenseTensor& x,
-                 const IntArray& num_or_sections,
-                 const Scalar& axis_scalar,
-                 std::vector<DenseTensor*> outs) {
-  // need to infershape output
-  if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) {
-    std::vector<MetaTensor> out_metas;
-    out_metas.reserve(outs.size());
-    std::vector<MetaTensor*> out_metas_ptr;
-    for (size_t i = 0; i < outs.size(); ++i) {
-      out_metas.push_back(outs[i]);
-      out_metas_ptr.push_back(&out_metas.back());
-    }
-
-    phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr);
-
-    for (size_t i = 0; i < out_metas.size(); ++i) {
-      outs[i]->Resize(out_metas[i].dims());
-    }
-  }
-
-  std::vector<const DenseTensor*> shape_refer;
-  for (size_t j = 0; j < outs.size(); ++j) {
-    dev_ctx.template Alloc<T>(outs[j]);
-    shape_refer.emplace_back(outs[j]);
-  }
-
-  int axis = axis_scalar.to<int>();
-  // Sometimes direct copies will be faster, this maybe need deeply analysis.
-  if (axis == 0 && outs.size() < 10) {
-    paddle::operators::StridedMemcpyWithAxis0<T>(
-        dev_ctx, x, shape_refer, &outs);
-  } else {
-    phi::funcs::SplitFunctor<Context, T> functor;
-    functor(dev_ctx, x, shape_refer, axis, &outs);
-  }
-}
-
-}  // namespace phi
+#include "paddle/phi/kernels/impl/split_kernel_impl.h"

 PD_REGISTER_KERNEL(split,
                   GPU,
@@ -75,3 +31,17 @@ PD_REGISTER_KERNEL(split,
                   int8_t,
                   phi::dtype::float16,
                   phi::dtype::bfloat16) {}
+
+PD_REGISTER_KERNEL(split_with_num,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::SplitWithNumKernel,
+                   float,
+                   double,
+                   int64_t,
+                   int,
+                   bool,
+                   uint8_t,
+                   int8_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
--- a/paddle/phi/kernels/impl/split_kernel_impl.h
+++ b/paddle/phi/kernels/impl/split_kernel_impl.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "paddle/phi/kernels/split_kernel.h"
+
+#include "paddle/fluid/operators/strided_memcpy.h"
+#include "paddle/phi/common/int_array.h"
+#include "paddle/phi/common/scalar.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
+
+namespace phi {
+template <typename T, typename Context>
+void SplitKernel(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 const IntArray& sections,
+                 const Scalar& axis_scalar,
+                 std::vector<DenseTensor*> outs) {
+  std::vector<const DenseTensor*> shape_refer;
+  for (size_t j = 0; j < outs.size(); ++j) {
+    dev_ctx.template Alloc<T>(outs[j]);
+    shape_refer.emplace_back(outs[j]);
+  }
+
+  int axis = axis_scalar.to<int>();
+  // Sometimes direct copies will be faster, this maybe need deeply analysis.
+  if (axis == 0 && outs.size() < 10) {
+    paddle::operators::StridedMemcpyWithAxis0<T>(
+        dev_ctx, x, shape_refer, &outs);
+  } else {
+    phi::funcs::SplitFunctor<Context, T> functor;
+    functor(dev_ctx, x, shape_refer, axis, &outs);
+  }
+}
+
+template <typename T, typename Context>
+void SplitWithNumKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        int num,
+                        const Scalar& axis_scalar,
+                        std::vector<DenseTensor*> outs) {
+  int axis_value = axis_scalar.to<int>();
+  auto input_axis_dim = x.dims().at(axis_value);
+  std::vector<int64_t> sections_vec;
+  for (int i = 0; i < num; ++i) {
+    sections_vec.push_back(input_axis_dim / num);
+  }
+  IntArray sections(sections_vec);
+  SplitKernel<T, Context>(dev_ctx, x, sections, axis_scalar, outs);
+}
+
+}  // namespace phi
--- a/paddle/phi/kernels/split_kernel.h
+++ b/paddle/phi/kernels/split_kernel.h
@@ -18,42 +18,70 @@
 #include "paddle/phi/common/scalar.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/infermeta/unary.h"
-#include "paddle/phi/kernels/empty_kernel.h"

 namespace phi {

 template <typename T, typename Context>
 void SplitKernel(const Context& dev_ctx,
                 const DenseTensor& x,
-                 const IntArray& num_or_sections,
+                 const IntArray& sections,
                 const Scalar& axis,
                 std::vector<DenseTensor*> out);

+template <typename T, typename Context>
+void SplitWithNumKernel(const Context& dev_ctx,
+                        const DenseTensor& x,
+                        int num,
+                        const Scalar& axis,
+                        std::vector<DenseTensor*> out);
+
 template <typename T, typename Context>
 std::vector<DenseTensor> Split(const Context& dev_ctx,
                               const DenseTensor& x,
-                               const IntArray& num_or_sections,
+                               const IntArray& sections,
                               const Scalar& axis) {
  size_t out_number;
-  if (num_or_sections.GetData().size() == 1) {
-    out_number = num_or_sections.GetData()[0];
-  } else {
-    out_number = num_or_sections.GetData().size();
+  out_number = sections.GetData().size();
+
+  std::vector<MetaTensor> out_meta;
+  std::vector<MetaTensor*> out_meta_ptr;
+  out_meta.reserve(out_number);
+  out_meta_ptr.reserve(out_number);
+  std::vector<DenseTensor> result(out_number);
+
+  for (size_t i = 0; i < out_number; ++i) {
+    out_meta.emplace_back(&result[i]);
+    out_meta_ptr.push_back(&out_meta.back());
  }
+  SplitInferMeta(x, sections, axis, out_meta_ptr);
+  std::vector<DenseTensor*> outs;
+  outs.reserve(out_meta.size());
+  for (size_t i = 0; i < out_meta.size(); ++i) {
+    outs.push_back(&result[i]);
+  }
+
+  SplitKernel<T, Context>(dev_ctx, x, sections, axis, outs);
+  return result;
+}
+
+template <typename T, typename Context>
+std::vector<DenseTensor> SplitWithNum(const Context& dev_ctx,
+                                      const DenseTensor& x,
+                                      int num,
+                                      const Scalar& axis) {
+  size_t out_number = num;

  std::vector<MetaTensor> out_meta;
  std::vector<MetaTensor*> out_meta_ptr;
  out_meta.reserve(out_number);
  out_meta_ptr.reserve(out_number);
-  std::vector<DenseTensor> result;
-  result.reserve(out_number);
+  std::vector<DenseTensor> result(out_number);

  for (size_t i = 0; i < out_number; ++i) {
-    result.emplace_back(DenseTensor());
-    out_meta.emplace_back(&result.back());
+    out_meta.emplace_back(&result[i]);
    out_meta_ptr.push_back(&out_meta.back());
  }
-  SplitInferMeta(x, num_or_sections, axis, out_meta_ptr);
+  SplitWithNumInferMeta(x, num, axis, out_meta_ptr);

  std::vector<DenseTensor*> outs;
  outs.reserve(out_meta.size());
@@ -61,7 +89,7 @@ std::vector<DenseTensor> Split(const Context& dev_ctx,
    outs.push_back(&result[i]);
  }

-  SplitKernel<T, Context>(dev_ctx, x, num_or_sections, axis, outs);
+  SplitWithNumKernel<T, Context>(dev_ctx, x, num, axis, outs);

  return result;
 }

--- a/paddle/phi/ops/compat/split_sig.cc
+++ b/paddle/phi/ops/compat/split_sig.cc
@@ -21,9 +21,10 @@ KernelSignature SplitOpArgumentMapping(const ArgumentMappingContext& ctx) {
  // priority: AxisTensor > axis
  if (paddle::any_cast<int>(ctx.Attr("num")) > 0) {
    if (ctx.HasInput("AxisTensor")) {
-      return KernelSignature("split", {"X"}, {"num", "AxisTensor"}, {"Out"});
+      return KernelSignature(
+          "split_with_num", {"X"}, {"num", "AxisTensor"}, {"Out"});
    } else {
-      return KernelSignature("split", {"X"}, {"num", "axis"}, {"Out"});
+      return KernelSignature("split_with_num", {"X"}, {"num", "axis"}, {"Out"});
    }
  }


--- a/paddle/phi/tests/kernels/test_split_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_split_dev_api.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/split_kernel.h"
+
 namespace phi {
 namespace tests {

@@ -40,14 +41,12 @@ TEST(DEV_API, split) {
  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
                           .GetAllocator(paddle::platform::CPUPlace())
                           .get());
-
  auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x);
  for (size_t i = 0; i < 4; ++i) {
    for (size_t j = 0; j < 10; ++j) {
      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
    }
  }
-
  // 2. test API
  auto out = phi::Split<float>(dev_ctx, dense_x, {2, 2}, 0);

@@ -76,5 +75,50 @@ TEST(DEV_API, split) {
  }
 }

+TEST(DEV_API, split_with_num) {
+  // 1. create tensor
+  const auto alloc =
+      std::make_unique<paddle::experimental::DefaultAllocator>(phi::CPUPlace());
+  phi::DenseTensor dense_x(alloc.get(),
+                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
+                                                phi::make_ddim({4, 10}),
+                                                phi::DataLayout::NCHW));
+  phi::CPUContext dev_ctx;
+  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                           .GetAllocator(paddle::platform::CPUPlace())
+                           .get());
+  auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x);
+  for (size_t i = 0; i < 4; ++i) {
+    for (size_t j = 0; j < 10; ++j) {
+      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
+    }
+  }
+  // 2. test API
+  auto out = phi::SplitWithNum<float>(dev_ctx, dense_x, 2, 0);
+  // 3. check result
+  ASSERT_EQ(out.size(), static_cast<size_t>(2));
+  ASSERT_EQ(out[0].dims().size(), 2);
+  ASSERT_EQ(out[0].dims()[0], 2);
+  ASSERT_EQ(out[0].dims()[1], 10);
+  ASSERT_EQ(out[0].meta().dtype, phi::DataType::FLOAT32);
+  ASSERT_EQ(out[0].meta().layout, phi::DataLayout::NCHW);
+
+  ASSERT_EQ(out[1].dims().size(), 2);
+  ASSERT_EQ(out[1].dims()[0], 2);
+  ASSERT_EQ(out[1].dims()[1], 10);
+  ASSERT_EQ(out[1].meta().dtype, phi::DataType::FLOAT32);
+  ASSERT_EQ(out[1].meta().layout, phi::DataLayout::NCHW);
+
+  auto out_data_0 = out[0].data<float>();
+  auto out_data_1 = out[1].data<float>();
+  for (size_t i = 0; i < 4; ++i) {
+    if (i < 20) {
+      ASSERT_NEAR(dense_x_data[i], out_data_0[i], 1e-6);
+    } else {
+      ASSERT_NEAR(dense_x_data[i], out_data_1[i - 20], 1e-6);
+    }
+  }
+}
+
 }  // namespace tests
 }  // namespace phi
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -5180,7 +5180,10 @@ def split(input, num_or_sections, dim=-1, name=None):
                "The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but "
                "received %s." % (type(num_or_sections)))
        if in_dygraph_mode():
-            return _C_ops.split(input, [num], dim)
+            if isinstance(num_or_sections, int):
+                return _C_ops.split_with_num(input, num_or_sections, dim)
+            else:
+                return _C_ops.split(input, num_or_sections, dim)
        elif _in_legacy_dygraph():
            out = [_varbase_creator() for n in range(num)]
            _legacy_C_ops.split(input, out, *attrs)

--- a/python/paddle/fluid/tests/unittests/test_split_op.py
+++ b/python/paddle/fluid/tests/unittests/test_split_op.py
@@ -421,6 +421,95 @@ class API_TestSplit4(unittest.TestCase):
            np.testing.assert_allclose(ex_x1, r1, rtol=1e-05)


+class API_TestSplit5(unittest.TestCase):
+
+    def test_out(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                input_1 = np.random.random([5, 4]).astype("int32")
+                # input is a variable which shape is [5, 4]
+                input = paddle.to_tensor(input_1)
+                n = paddle.full([1], 5, dtype='int32')
+                out = paddle.split(input, [n])
+                exe = paddle.static.Executor(place=place)
+                re = exe.run(fetch_list=[out])
+                re = re[0]
+                ex_out = np.split(input_1, [5])
+                ex_out = ex_out[0]
+                np.testing.assert_allclose(ex_out, re, rtol=1e-05)
+
+
+class API_TestDygraphFluidSplit(unittest.TestCase):
+
+    def test_out1(self):
+        with fluid.dygraph.guard():
+            input_1 = np.random.random([4, 6, 6]).astype("int32")
+            # input is a variable which shape is [4, 6, 6]
+            input = paddle.to_tensor(input_1)
+            x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1)
+            x0_out = x0.numpy()
+            x1_out = x1.numpy()
+            x2_out = x2.numpy()
+            ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1)
+            with _test_eager_guard():
+                # input is a variable which shape is [4, 6, 6]
+                input = paddle.to_tensor(input_1)
+                input.stop_gradient = False
+                x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1)
+                eager_x0_out = x0.numpy()
+                eager_x1_out = x1.numpy()
+                eager_x2_out = x2.numpy()
+                loss = x0.sum()
+                loss.backward()
+                manul_grad = np.zeros_like(input_1)
+                manul_grad[:, :2, :] = 1
+                np.testing.assert_allclose(input.gradient(),
+                                           manul_grad,
+                                           rtol=1e-05)
+                np.testing.assert_allclose(ex_x0, eager_x0_out, rtol=1e-05)
+                np.testing.assert_allclose(ex_x1, eager_x1_out, rtol=1e-05)
+                np.testing.assert_allclose(ex_x2, eager_x2_out, rtol=1e-05)
+
+        np.testing.assert_allclose(ex_x0, x0_out, rtol=1e-05)
+        np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
+        np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)
+
+    def test_out2(self):
+        with fluid.dygraph.guard():
+            input_1 = np.random.random([4, 6, 6]).astype("int32")
+            # input is a variable which shape is [4, 6, 6]
+            input = paddle.to_tensor(input_1)
+            x0, x1, x2 = fluid.layers.split(input, [2, 2, 2], dim=1)
+            x0_out = x0.numpy()
+            x1_out = x1.numpy()
+            x2_out = x2.numpy()
+            ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1)
+            with _test_eager_guard():
+                # input is a variable which shape is [4, 6, 6]
+                input = paddle.to_tensor(input_1)
+                input.stop_gradient = False
+                x0, x1, x2 = fluid.layers.split(input, [2, 2, 2], dim=1)
+                eager_x0_out = x0.numpy()
+                eager_x1_out = x1.numpy()
+                eager_x2_out = x2.numpy()
+                loss = x0.sum()
+                loss.backward()
+                manul_grad = np.zeros_like(input_1)
+                manul_grad[:, :2, :] = 1
+                np.testing.assert_allclose(input.gradient(),
+                                           manul_grad,
+                                           rtol=1e-05)
+                np.testing.assert_allclose(ex_x0, eager_x0_out, rtol=1e-05)
+                np.testing.assert_allclose(ex_x1, eager_x1_out, rtol=1e-05)
+                np.testing.assert_allclose(ex_x2, eager_x2_out, rtol=1e-05)
+
+        np.testing.assert_allclose(ex_x0, x0_out, rtol=1e-05)
+        np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
+        np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)
+
+
 class API_TestDygraphSplit(unittest.TestCase):

    def test_out1(self):
@@ -471,6 +560,25 @@ class API_TestDygraphSplit(unittest.TestCase):
        np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
        np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)

+    def test_out3(self):
+        with fluid.dygraph.guard():
+            np.random.seed(2021)
+            input_1 = np.random.random([4, 6, 6]).astype("int32")
+            # input is a variable which shape is [4, 6, 6]
+            input = paddle.to_tensor(input_1)
+            out_dy = paddle.split(input, [6], axis=1)
+            out_dy = out_dy[0]
+            out_dy_np = out_dy.numpy()
+            ex_out = np.split(input_1, [6], axis=1)
+            ex_out = ex_out[0]
+            with _test_eager_guard():
+                input = paddle.to_tensor(input_1)
+                out_eager = paddle.split(input, [6], axis=1)
+                out_eager = out_eager[0]
+                out_eager_np = out_dy.numpy()
+                np.testing.assert_allclose(ex_out, out_eager_np, rtol=1e-05)
+        np.testing.assert_allclose(ex_out, out_dy_np, rtol=1e-05)
+
    def test_out_tensor_input(self):
        with fluid.dygraph.guard():
            input_1 = np.random.random([4, 6, 6]).astype("int32")

--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -1839,8 +1839,10 @@ def split(x, num_or_sections, axis=0, name=None):
                "The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but "
                "received %s." % (type(num_or_sections)))
        if in_dygraph_mode():
-            return _C_ops.split(input, [num_or_sections] if isinstance(
-                num_or_sections, int) else num_or_sections, dim)
+            if isinstance(num_or_sections, int):
+                return _C_ops.split_with_num(input, num_or_sections, dim)
+            else:
+                return _C_ops.split(input, num_or_sections, dim)
        elif _in_legacy_dygraph():
            out = [_varbase_creator() for n in range(num)]
            _legacy_C_ops.split(input, out, *attrs)