Cherry pick bug fix for Ops: reshape,concat, split and squeeze (#20929)

* [cherry-pick]fix bug in reshape: (#20781) consider the situation that shape of input can contain more than one -1. * [cherry-pick]support Tensor for split and concat, support -1 in num_or_sections, add check num_or_sections (#20780) * improve split and concat op: 1. support Tensor for argument 'dim' in split op. 2. support Tensor for argument 'axis' in concat op. * redefine function GetDataFromTensor and set unknown output shape to - 1. * add check: Attr(sections) match Input(X). * support Tensor for attr(sections) and attr(sections) can contain -1. * modify error message and fix bug for concat and call Resize only when necessary. test=release/1.6 * [cherry-pick]improve unsqueeze op to support int, Tensor for argument axes (#20824) * improve unsqueeze op to support int, Tensor and Tensor list for argument axes. * call Resize only when necessary. test=release/1.6 * [cherry-pick]Compatible int32 and int64 for attr in concat/split/unsqueeze. test=release/1.6 (#20912)

Cherry pick bug fix for Ops: reshape,concat, split and squeeze (#20929)
* [cherry-pick]fix bug in reshape: (#20781) consider the situation that shape of input can contain more than one -1. * [cherry-pick]support Tensor for split and concat, support -1 in num_or_sections, add check num_or_sections (#20780) * improve split and concat op: 1. support Tensor for argument 'dim' in split op. 2. support Tensor for argument 'axis' in concat op. * redefine function GetDataFromTensor and set unknown output shape to - 1. * add check: Attr(sections) match Input(X). * support Tensor for attr(sections) and attr(sections) can contain -1. * modify error message and fix bug for concat and call Resize only when necessary. test=release/1.6 * [cherry-pick]improve unsqueeze op to support int, Tensor for argument axes (#20824) * improve unsqueeze op to support int, Tensor and Tensor list for argument axes. * call Resize only when necessary. test=release/1.6 * [cherry-pick]Compatible int32 and int64 for attr in concat/split/unsqueeze. test=release/1.6 (#20912)
33d7aae1 · liym27 · Aurelius84 · de130e95 · 33d7aae1 · 33d7aae1
14 changed file
--- a/paddle/fluid/operators/concat_op.cc
+++ b/paddle/fluid/operators/concat_op.cc
@@ -32,58 +32,36 @@ class ConcatOp : public framework::OperatorWithKernel {
  void InferShape(framework::InferShapeContext *ctx) const override {
    PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL,
                      "Inputs(X) of ConcatOp should not be empty.");
-    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+    PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
-                   "Output(Out) of ConcatOp should not be null.");
+                      "Output(Out) of ConcatOp should not be null.");
-    auto ins = ctx->GetInputsDim("X");
+    auto inputs_dims = ctx->GetInputsDim("X");
-    size_t axis =
-        ComputeAxis(static_cast<int64_t>(ctx->Attrs().Get<int>("axis")),
-                    static_cast<int64_t>(ins[0].size()));
-    const size_t n = ins.size();
+    const size_t inputs_num = inputs_dims.size();
-    PADDLE_ENFORCE_GT(n, 0,
+    PADDLE_ENFORCE_GT(inputs_num, 0,
                      "ShapeError: Input tensors count should > 0. But "
                      "recevied inputs' length is 0.");
-    if (n == 1) {
+    if (inputs_num == 1) {
      VLOG(3) << "Warning: concat op have only one input, may waste memory";
    }
-    auto out_dims = ins[0];
+    if (ctx->HasInput("AxisTensor")) {
-    size_t in_zero_dims_size = out_dims.size();
+      auto out_dims =
-    for (size_t i = 1; i < n; i++) {
+          framework::make_ddim(std::vector<int>(inputs_dims[0].size(), -1));
-      for (size_t j = 0; j < in_zero_dims_size; j++) {
+      ctx->SetOutputDim("Out", out_dims);
-        if (j == axis) {
+      ctx->ShareLoD("X", /*->*/ "Out");
-          if (ctx->IsRuntime()) {
+    } else {
-            out_dims[axis] += ins[i][j];
+      size_t axis =
-          } else {
+          ComputeAxis(static_cast<int64_t>(ctx->Attrs().Get<int>("axis")),
-            if (ins[i][j] == -1) {
+                      static_cast<int64_t>(inputs_dims[0].size()));
-              out_dims[axis] = -1;
+      framework::DDim out_dims =
-            } else {
+          ComputeAndCheckShape(ctx->IsRuntime(), inputs_dims, axis);
-              out_dims[axis] += ins[i][j];
+      if (out_dims[axis] < 0) {
-            }
+        out_dims[axis] = -1;
-          }
-        } else {
-          bool check_shape =
-              ctx->IsRuntime() || (out_dims[j] > 0 && ins[i][j] > 0);
-          if (check_shape) {
-            // check all shape in run time
-            PADDLE_ENFORCE_EQ(
-                out_dims[j], ins[i][j],
-                "ShapeError: Input tensors should have same "
-                "dimensions(or specific dimension = -1) except the axis. "
-                "But recevied axis = %s, input[0]'s shape = "
-                "[%s], input[%s]'s shape = [%s], the \"%s\" "
-                "dimension of input[%s] is unexpected",
-                axis, ins[0], i, ins[j], j, i);
-          }
-        }
      }
+      ctx->SetOutputDim("Out", out_dims);
+      ctx->ShareLoD("X", /*->*/ "Out");
    }
-    if (out_dims[axis] < 0) {
-      out_dims[axis] = -1;
-    }
-    ctx->SetOutputDim("Out", out_dims);
-    ctx->ShareLoD("X", /*->*/ "Out");
  }
 protected:
@@ -111,6 +89,16 @@ class ConcatOp : public framework::OperatorWithKernel {
 #endif
    return framework::OpKernelType(input_data_type, ctx.GetPlace());
  }
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override {
+    if (var_name == "AxisTensor") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -128,6 +116,12 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
                 "interpreted as counting from the end of the rank."
                 "i.e., axis + rank(X) th dimension.")
        .SetDefault(0);
+    AddInput("AxisTensor",
+             "(Tensor) The axis along which the input tensors will be "
+             "concatenated.  "
+             "It has higher priority than Attr(axis). "
+             "The shape of AxisTensor must be [1].")
+        .AsDispensable();
    AddAttr<bool>("use_quantizer",
                  "(bool, default false) "
                  "Set to true for operators that should be quantized and use "
@@ -178,6 +172,16 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
                                       ctx, framework::GradVarName("Out")),
                                   ctx.GetPlace());
  }
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override {
+    if (var_name == "AxisTensor") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ConcatOpGradNoNeedBufferVarInference,
@@ -192,6 +196,7 @@ class ConcatGradOpDescMaker : public framework::SingleGradOpDescMaker {
    std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
    op->SetType("concat_grad");
    op->SetInput("X", Input("X"));
+    op->SetInput("AxisTensor", Input("AxisTensor"));
    op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), InputGrad("X", false));
    op->SetAttrMap(Attrs());

--- a/paddle/fluid/operators/concat_op.h
+++ b/paddle/fluid/operators/concat_op.h
@@ -14,14 +14,51 @@ limitations under the License. */
 #pragma once
+#include <string>
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/concat_and_split.h"
 #include "paddle/fluid/operators/strided_memcpy.h"
+#include "paddle/fluid/operators/utils.h"
 namespace paddle {
 namespace operators {
+static inline framework::DDim ComputeAndCheckShape(
+    const bool is_runtime, const std::vector<framework::DDim>& inputs_dims,
+    const int axis) {
+  const size_t n = inputs_dims.size();
+  auto out_dims = inputs_dims[0];
+  size_t in_zero_dims_size = out_dims.size();
+  for (size_t i = 1; i < n; i++) {
+    for (size_t j = 0; j < in_zero_dims_size; j++) {
+      if (j == axis) {
+        if (is_runtime) {
+          out_dims[axis] += inputs_dims[i][j];
+        } else {
+          if (inputs_dims[i][j] == -1) {
+            out_dims[axis] = -1;
+          } else {
+            out_dims[axis] += inputs_dims[i][j];
+          }
+        }
+      } else {
+        bool check_shape =
+            is_runtime || (out_dims[j] > 0 && inputs_dims[i][j] > 0);
+        if (check_shape) {
+          // check all shape in run time
+          PADDLE_ENFORCE_EQ(
+              inputs_dims[0][j], inputs_dims[i][j],
+              "ShapeError: Dimension %d in inputs' shapes must be equal. "
+              "But recevied input[0]'s shape = "
+              "[%s], input[%d]'s shape = [%s].",
+              j, inputs_dims[0], i, inputs_dims[i]);
+        }
+      }
+    }
+  }
+  return out_dims;
+}
 static inline int64_t ComputeAxis(int64_t axis, int64_t rank) {
  if (axis < 0) {
@@ -36,9 +73,27 @@ class ConcatKernel : public framework::OpKernel<T> {
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto ins = ctx.MultiInput<framework::Tensor>("X");
    framework::Tensor* out = ctx.Output<framework::Tensor>("Out");
-    PADDLE_ENFORCE(ins[0], "The input should not be null.");
+    PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, "The input should not be null.");
-    auto axis = ComputeAxis(static_cast<int64_t>(ctx.Attr<int>("axis")),
+    auto axis = ctx.Attr<int>("axis");
-                            static_cast<int64_t>(ins[0]->dims().size()));
+    bool need_resize_out_dims = false;
+    if (ctx.HasInput("AxisTensor")) {
+      auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
+      axis = GetDataFromTensor<int>(axis_tensor)[0];
+      need_resize_out_dims = true;
+    }
+    axis = ComputeAxis(static_cast<int64_t>(axis),
+                       static_cast<int64_t>(ins[0]->dims().size()));
+    if (need_resize_out_dims) {
+      const size_t n = ins.size();
+      std::vector<framework::DDim> ins_dims(n);
+      for (size_t i = 0; i < n; i++) {
+        ins_dims[i] = ins[i]->dims();
+      }
+      framework::DDim out_dims = ComputeAndCheckShape(true, ins_dims, axis);
+      out->Resize(out_dims);
+    }
    auto place = ctx.GetPlace();
    out->mutable_data<T>(place);
@@ -92,10 +147,15 @@ class ConcatGradKernel : public framework::OpKernel<T> {
        }
      }
    }
-    PADDLE_ENFORCE(ins[0], "The input should not be null.");
+    PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, "The input should not be null.");
-    auto axis = ComputeAxis(static_cast<int64_t>(ctx.Attr<int>("axis")),
-                            static_cast<int64_t>(ins[0]->dims().size()));
+    auto axis = ctx.Attr<int>("axis");
+    if (ctx.HasInput("AxisTensor")) {
+      auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
+      axis = GetDataFromTensor<int>(axis_tensor)[0];
+    }
+    axis = ComputeAxis(static_cast<int64_t>(axis),
+                       static_cast<int64_t>(ins[0]->dims().size()));
    // get output tensor that the name is not kEmptyVarName
    std::vector<framework::Tensor*> outputs;
    for (size_t j = 0; j < outs.size(); ++j) {

--- a/paddle/fluid/operators/reshape_op.cc
+++ b/paddle/fluid/operators/reshape_op.cc
@@ -186,13 +186,16 @@ class ReshapeOp : public framework::OperatorWithKernel {
        output_shape[unk_dim_idx] = -1;
      }
    } else {
-      PADDLE_ENFORCE_EQ(
+      if (all_positive) {
-          capacity, in_size,
+        PADDLE_ENFORCE_EQ(
-          "ShapeError: The 'shape' in ReshapeOp is invalid. "
+            capacity, in_size,
-          "The input tensor X'size must be equal to the capacity of 'shape'. "
+            "ShapeError: The 'shape' in ReshapeOp is invalid. "
-          "But received X's shape = [%s], X's size = %d, 'shape' is [%s], the "
+            "The input tensor X'size must be equal to the capacity of 'shape'. "
-          "capacity of 'shape' is %d.",
+            "But received X's shape = [%s], X's size = %d, 'shape' is [%s], "
-          in_dims, in_size, framework::make_ddim(shape), capacity);
+            "the "
+            "capacity of 'shape' is %d.",
+            in_dims, in_size, framework::make_ddim(shape), capacity);
+      }
    }
    return framework::make_ddim(output_shape);
  }

--- a/paddle/fluid/operators/split_op.cc
+++ b/paddle/fluid/operators/split_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/operators/split_op.h"
+#include <string>
 namespace paddle {
 namespace operators {
@@ -23,8 +24,8 @@ class SplitOp : public framework::OperatorWithKernel {
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext *ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"),
+    PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
-                   "Input(X) of SplitOp should not be null.");
+                      "Input(X) of SplitOp should not be null.");
    PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL,
                      "Outputs(Out) of SplitOp should not be empty.");
    auto in_dims = ctx->GetInputDim("X");
@@ -34,38 +35,29 @@ class SplitOp : public framework::OperatorWithKernel {
    std::vector<int> sections = static_cast<std::vector<int>>(
        ctx->Attrs().Get<std::vector<int>>("sections"));
    const size_t outs_number = outs_names.size();
-    std::vector<framework::DDim> outs_dims;
-    outs_dims.reserve(outs_number);
+    if (sections.size() > 0) {
-    if (num > 0) {
-      int64_t in_axis_dim = in_dims[axis];
-      if (ctx->IsRuntime() || in_axis_dim > 0) {
-        PADDLE_ENFORCE_EQ(in_axis_dim % num, 0,
-                          "tensor split does not result"
-                          " in an equal division");
-        size_t out_axis_dim = in_axis_dim / num;
-        for (size_t i = 0; i < outs_number; ++i) {
-          auto dim = in_dims;
-          dim[axis] = out_axis_dim;
-          outs_dims.push_back(dim);
-        }
-      } else {
-        for (size_t i = 0; i < outs_number; ++i) {
-          auto dim = in_dims;
-          dim[axis] = -1;
-          outs_dims.push_back(dim);
-        }
-      }
-    } else if (sections.size() > 0) {
      PADDLE_ENFORCE_EQ(sections.size(), outs_number,
-                        "tensor split sections size"
+                        "tensor split sections size "
                        "should be equal to output size.");
+    }
+    if (ctx->HasInput("AxisTensor")) {
+      auto out_dims =
+          framework::make_ddim(std::vector<int>(in_dims.size(), -1));
+      std::vector<framework::DDim> outs_dims(outs_number, out_dims);
+      ctx->SetOutputsDim("Out", outs_dims);
      for (size_t i = 0; i < outs_number; ++i) {
-        auto dim = in_dims;
+        ctx->ShareLoD("X", "Out", 0, i);
-        dim[axis] = sections[i];
-        outs_dims.push_back(dim);
      }
+      return;
    }
+    bool each_section_is_known =
+        (sections.size() > 0 && !ctx->HasInputs("SectionsTensorList"));
+    auto outs_dims = UpdateOutsDims(ctx->IsRuntime(), each_section_is_known,
+                                    in_dims, num, sections, axis, outs_number);
    ctx->SetOutputsDim("Out", outs_dims);
    if (axis != 0) {
      // Only pass LoD when not spliting along the first dim.
@@ -74,12 +66,41 @@ class SplitOp : public framework::OperatorWithKernel {
      }
    }
  }
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(ctx.Input<framework::LoDTensor>("X")->type(),
+                                   ctx.device_context());
+  }
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override {
+    if (var_name == "AxisTensor" || var_name == "SectionsTensorList") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X", "(Tensor) Input tensor of the split operator.");
+    AddInput("AxisTensor",
+             "(Tensor) The axis which the input will be splited on. "
+             "It has higher priority than Attr(axis). "
+             "The shape of AxisTensor must be [1]")
+        .AsDispensable();
+    AddInput("SectionsTensorList",
+             "(vector<Tensor<int>>, optional). "
+             "The length of each output along the specified axis. "
+             "It has a higher priority than Attr(sections)."
+             "The shape of the element in vector must be [1].")
+        .AsDuplicable()
+        .AsDispensable();
    AddOutput("Out", "(Tensor) Output tensors of the split operator.")
        .AsDuplicable();
    AddComment(R"DOC(

--- a/paddle/fluid/operators/split_op.h
+++ b/paddle/fluid/operators/split_op.h
@@ -15,21 +15,125 @@ limitations under the License. */
 #pragma once
 #include <chrono>  // NOLINT
+#include <memory>
+#include <string>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/concat_and_split.h"
 #include "paddle/fluid/operators/strided_memcpy.h"
+#include "paddle/fluid/operators/utils.h"
 namespace paddle {
 namespace operators {
+static inline std::vector<framework::DDim> UpdateOutsDims(
+    const bool is_runtime, const bool each_section_is_known,
+    const framework::DDim in_dims, const size_t num, std::vector<int> sections,
+    const size_t axis, const int outs_number) {
+  std::vector<framework::DDim> outs_dims(outs_number, in_dims);
+  int64_t input_axis_dim = in_dims[axis];
+  if (num > 0) {
+    if (is_runtime || input_axis_dim > 0) {
+      PADDLE_ENFORCE_EQ(input_axis_dim % num, 0,
+                        "The input's size along the split dimension "
+                        "must be evenly divisible by Attr(num_or_sections). "
+                        "But received Attr(num_or_sections) "
+                        "= %d, input(X)'s shape = [%s], Attr(dim) = %d.",
+                        num, in_dims, axis);
+      size_t out_axis_dim = input_axis_dim / num;
+      for (auto& out_dim : outs_dims) {
+        out_dim[axis] = out_axis_dim;
+      }
+    } else {
+      for (auto& out_dim : outs_dims) {
+        out_dim[axis] = -1;
+      }
+    }
+  } else if (sections.size() > 0) {
+    if (is_runtime || input_axis_dim > 0) {
+      const int unk_dim_val = -1;
+      int unk_dim_idx = -1, num_of_unk = 0;
+      int sum_of_section = 0;
+      for (size_t i = 0; i < sections.size(); ++i) {
+        if (sections[i] == unk_dim_val) {
+          num_of_unk++;
+          unk_dim_idx = i;
+        } else {
+          sum_of_section += sections[i];
+        }
+      }
+      if (each_section_is_known) {
+        PADDLE_ENFORCE_LE(num_of_unk, 1,
+                          "Only one dimension value of Attr(num_or_sections) "
+                          "in SplitOp can be -1. "
+                          "But received Attr(num_or_sections) = [%s].",
+                          framework::make_ddim(sections));
+      }
+      if (unk_dim_idx != -1) {
+        // for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1].
+        // input_axis_dim = 5, sum_of_sections = 5.
+        // the following check will fail.
+        PADDLE_ENFORCE_LT(
+            sum_of_section, input_axis_dim,
+            "Sum of Attr(num_or_sections) other than unknown section "
+            "must be less than the input's size "
+            "along the split dimension. But received Attr(num_or_sections) "
+            "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
+            framework::make_ddim(sections), in_dims, axis);
+        if (each_section_is_known) {
+          sections[unk_dim_idx] = input_axis_dim - sum_of_section;
+        }
+      } else {
+        PADDLE_ENFORCE_EQ(
+            sum_of_section, input_axis_dim,
+            "Sum of Attr(num_or_sections) must be equal to the input's size "
+            "along the split dimension. But received Attr(num_or_sections)"
+            " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
+            framework::make_ddim(sections), in_dims, axis);
+      }
+    }
+    for (size_t i = 0; i < outs_number; ++i) {
+      outs_dims[i][axis] = sections[i];
+    }
+  }
+  return outs_dims;
+}
 template <typename DeviceContext, typename T>
 class SplitOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in = ctx.Input<framework::Tensor>("X");
    auto outs = ctx.MultiOutput<framework::Tensor>("Out");
+    int num = ctx.Attr<int>("num");
+    std::vector<int> sections = ctx.Attr<std::vector<int>>("sections");
    int axis = ctx.Attr<int>("axis");
+    auto in_dims = in->dims();
+    auto outs_number = outs.size();
+    bool need_resize_outs_dims = false;
+    if (ctx.HasInput("AxisTensor")) {
+      auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
+      axis = GetDataFromTensor(axis_tensor)[0];
+      need_resize_outs_dims = true;
+    }
+    auto sections_tensor_list =
+        ctx.MultiInput<framework::Tensor>("SectionsTensorList");
+    if (sections_tensor_list.size() > 0) {
+      sections = GetDataFromTensorList(sections_tensor_list);
+      need_resize_outs_dims = true;
+    }
+    if (need_resize_outs_dims) {
+      std::vector<framework::DDim> outs_dims =
+          UpdateOutsDims(true, true, in_dims, num, sections, axis, outs_number);
+      for (size_t j = 0; j < outs.size(); ++j) {
+        outs[j]->Resize(outs_dims[j]);
+      }
+    }
    auto place = ctx.GetPlace();
    std::vector<const framework::Tensor*> shape_refer;
@@ -58,6 +162,7 @@ class SplitGradMaker : public framework::SingleGradOpDescMaker {
    auto op = new framework::OpDesc();
    op->SetType("concat");
    op->SetInput("X", OutputGrad("Out"));
+    op->SetInput("AxisTensor", Input("AxisTensor"));
    op->SetOutput("Out", InputGrad("X"));
    op->SetAttrMap(Attrs());
    return std::unique_ptr<framework::OpDesc>(op);

--- a/paddle/fluid/operators/unsqueeze_op.cc
+++ b/paddle/fluid/operators/unsqueeze_op.cc
@@ -37,12 +37,38 @@ class UnsqueezeOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_LE(x_dims.size(), 6,
                      "Invalid dimensions, the rank of Input(X) "
                      "should be in the range of [1, 6] (Eigen limit)");
-    auto out_dims = GetOutputShape(axes, x_dims);
+    if (!axes.empty()) {
-    ctx->SetOutputDim("Out", out_dims);
+      auto out_dims = GetOutputShape(axes, x_dims);
-    if (x_dims[0] == out_dims[0]) {
+      ctx->SetOutputDim("Out", out_dims);
-      // Only pass LoD when the first dimension of output and Input(X)
+      if (x_dims[0] == out_dims[0]) {
-      // are the same.
+        // Only pass LoD when the first dimension of output and Input(X)
-      ctx->ShareLoD("X", "Out");
+        // are the same.
+        ctx->ShareLoD("X", "Out");
+      }
+    } else if (ctx->HasInputs("AxesTensorList")) {
+      auto AxesTensorList = ctx->Inputs("AxesTensorList");
+      int output_size = x_dims.size() + static_cast<int>(AxesTensorList.size());
+      PADDLE_ENFORCE_LE(output_size, 6,
+                        "The output tensor's rank should be less than 6.");
+      std::vector<int> vec_out_dims(output_size, -1);
+      ctx->SetOutputDim("Out", framework::make_ddim(vec_out_dims));
+    } else if (ctx->HasInput("AxesTensor")) {
+      auto axes_dims = ctx->GetInputDim("AxesTensor");
+      PADDLE_ENFORCE_EQ(
+          axes_dims.size(), 1,
+          "Input(AxesTensor)'s dimension of Op(unsqueeze) must be 1. "
+          "But received AxesTensor's shape = [%s], "
+          "AxesTensor's dimension = %d.",
+          axes_dims, axes_dims.size());
+      PADDLE_ENFORCE_GE(axes_dims[0], 0,
+                        "Input(AxesTensor)'s shape must be known. But received "
+                        "AxesTensor's shape = [%s]",
+                        axes_dims);
+      int output_size = x_dims.size() + static_cast<int>(axes_dims[0]);
+      PADDLE_ENFORCE_LE(output_size, 6,
+                        "The output tensor's rank should be less than 6.");
+      std::vector<int> vec_out_dims(output_size, -1);
+      ctx->SetOutputDim("Out", framework::make_ddim(vec_out_dims));
    }
  }
@@ -83,19 +109,46 @@ class UnsqueezeOp : public framework::OperatorWithKernel {
    return framework::make_ddim(output_shape);
  }
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(ctx.Input<framework::LoDTensor>("X")->type(),
+                                   ctx.device_context());
+  }
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const framework::Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override {
+    if (var_name == "AxesTensor" || var_name == "AxesTensorList") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 class UnsqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X", "(Tensor). The input tensor of unsqueeze operator.");
+    AddInput("AxesTensor",
+             "(Tensor<int32>, optional). The dimensions to be inserted. "
+             "If it exists, it will replace Attr(axes).")
+        .AsDispensable();
+    AddInput(
+        "AxesTensorList",
+        "(vector<Tensor<int32>>, optional). The dimensions to be inserted. "
+        "If it exists, it will replace Attr(axes)."
+        "The shape of the element in vector must be [1].")
+        .AsDuplicable()
+        .AsDispensable();
    AddOutput("Out", "(Tensor). The output tensor of unsqueeze operator.");
    AddAttr<std::vector<int>>("axes",
                              "(std::vector<int>). List of integers,"
                              " indicating the dimensions to be inserted")
+        .SetDefault({})
        .AddCustomChecker([](const std::vector<int> &axes) {
-          PADDLE_ENFORCE_EQ(!axes.empty(), true,
-                            "Invalid axes, The unsqueeze axes is empty.");
          // Validity Check: axes dims (<6).
          PADDLE_ENFORCE_LT(static_cast<int>(axes.size()), 6,
                            "Invalid dimensions, dynamic dimensions should be "
@@ -136,28 +189,12 @@ class UnsqueezeGradOp : public framework::OperatorWithKernel {
 // will be used in unsqueeze_grad, in this way, the framework can reuse
 // the memory of X immediately the unsqueeze2_op is finished.
 // Considering compatibility issues, we could not fix unsqueeze2_op
-class Unsqueeze2Op : public framework::OperatorWithKernel {
+class Unsqueeze2Op : public UnsqueezeOp {
 public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
+  using UnsqueezeOp::UnsqueezeOp;
  void InferShape(framework::InferShapeContext *ctx) const override {
-    PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
+    UnsqueezeOp::InferShape(ctx);
-                      "Input(X) of Unsqueeze operator should not be null.");
-    PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
-                      "Output(Out) of Unsqueeze operator should not be null.");
-    const auto &axes = ctx->Attrs().Get<std::vector<int>>("axes");
    const auto &x_dims = ctx->GetInputDim("X");
-    // Validity Check: input tensor dims (<6).
-    PADDLE_ENFORCE_LE(x_dims.size(), 6,
-                      "Invalid dimensions, the rank of Input(X) "
-                      "should be in the range of [1, 6] (Eigen limit)");
-    auto out_dims = UnsqueezeOp::GetOutputShape(axes, x_dims);
-    ctx->SetOutputDim("Out", out_dims);
-    if (x_dims[0] == out_dims[0]) {
-      // Only pass LoD when the first dimension of output and Input(X)
-      // are the same.
-      ctx->ShareLoD("X", "Out");
-    }
    PADDLE_ENFORCE_EQ(
        ctx->HasOutput("XShape"), true,
@@ -252,12 +289,11 @@ REGISTER_OP_CPU_KERNEL(
    ops::UnsqueezeGradKernel<paddle::platform::CPUDeviceContext, int8_t>,
    ops::UnsqueezeGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
 REGISTER_OP_CPU_KERNEL(
-    unsqueeze2,
+    unsqueeze2, ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, float>,
+    ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, double>,
+    ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, int>,
-    ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, int>,
+    ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, int8_t>,
-    ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, int8_t>,
+    ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, int64_t>);
-    ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, int64_t>);
 REGISTER_OP_CPU_KERNEL(
    unsqueeze2_grad,
    ops::Unsqueeze2GradKernel<paddle::platform::CPUDeviceContext, float>,

--- a/paddle/fluid/operators/unsqueeze_op.cu.cc
+++ b/paddle/fluid/operators/unsqueeze_op.cu.cc
@@ -31,11 +31,11 @@ REGISTER_OP_CUDA_KERNEL(
    ops::UnsqueezeGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
 REGISTER_OP_CUDA_KERNEL(
    unsqueeze2,
-    ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, float>,
+    ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, double>,
+    ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, int>,
+    ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, int8_t>,
+    ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, int8_t>,
-    ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, int64_t>);
+    ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, int64_t>);
 REGISTER_OP_CUDA_KERNEL(
    unsqueeze2_grad,
    ops::Unsqueeze2GradKernel<paddle::platform::CUDADeviceContext, float>,

--- a/paddle/fluid/operators/unsqueeze_op.h
+++ b/paddle/fluid/operators/unsqueeze_op.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/operators/math/pooling.h"
+#include "paddle/fluid/operators/utils.h"
 #include "paddle/fluid/platform/device_context.h"
 namespace paddle {
@@ -28,12 +29,28 @@ template <typename DeviceContext, typename T>
 class UnsqueezeKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
-    auto &axes = context.Attr<std::vector<int>>("axes");
+    auto axes = context.Attr<std::vector<int>>("axes");
    auto *in = context.Input<framework::LoDTensor>("X");
    auto *out = context.Output<framework::LoDTensor>("Out");
    auto x_dims = in->dims();
-    auto out_dims = GetOutputShape(axes, x_dims);
+    bool need_resize_out_dims = false;
+    if (axes.empty()) {
+      auto axes_tensor_list =
+          context.MultiInput<framework::Tensor>("AxesTensorList");
+      if (axes_tensor_list.size() > 0) {
+        axes = GetDataFromTensorList<int>(axes_tensor_list);
+      } else if (context.HasInput("AxesTensor")) {
+        auto *axes_tensor = context.Input<framework::Tensor>("AxesTensor");
+        axes = GetDataFromTensor<int>(axes_tensor);
+      }
+      need_resize_out_dims = true;
+    }
+    framework::DDim out_dims = out->dims();
+    if (need_resize_out_dims) {
+      out_dims = GetOutputShape(axes, x_dims);
+      out->Resize(out_dims);
+    }
    out->mutable_data(context.GetPlace(), in->type());
    framework::TensorCopy(
        *in, context.GetPlace(),
@@ -95,27 +112,6 @@ class UnsqueezeGradKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
-class Unsqueeze2Kernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &context) const override {
-    auto *out = context.Output<framework::LoDTensor>("Out");
-    auto *in = context.Input<framework::LoDTensor>("X");
-    auto &axes = context.Attr<std::vector<int>>("axes");
-    auto x_dims = in->dims();
-    auto out_dims =
-        UnsqueezeKernel<DeviceContext, T>::GetOutputShape(axes, x_dims);
-    out->mutable_data(context.GetPlace(), in->type());
-    framework::TensorCopy(
-        *in, context.GetPlace(),
-        context.template device_context<platform::DeviceContext>(), out);
-    out->Resize(out_dims);
-  }
-};
 template <typename DeviceContext, typename T>
 class Unsqueeze2GradKernel : public framework::OpKernel<T> {
 public:

--- a/paddle/fluid/operators/utils.h
+++ b/paddle/fluid/operators/utils.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <paddle/fluid/framework/operator.h>
+#include <string>
+#include <vector>
+namespace paddle {
+namespace operators {
+template <typename T = int32_t>
+inline std::vector<T> GetDataFromTensor(const framework::Tensor* x) {
+  std::vector<T> vec_new_data;
+  if (x->type() == framework::proto::VarType::INT32) {
+    auto* data = x->data<int>();
+    if (platform::is_gpu_place(x->place())) {
+      framework::Tensor cpu_attr_tensor;
+      TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor);
+      data = cpu_attr_tensor.data<int>();
+    }
+    vec_new_data = std::vector<T>(data, data + x->numel());
+  } else if (x->type() == framework::proto::VarType::INT64) {
+    auto* data = x->data<int64_t>();
+    if (platform::is_gpu_place(x->place())) {
+      framework::Tensor cpu_attr_tensor;
+      TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor);
+      data = cpu_attr_tensor.data<int64_t>();
+    }
+    vec_new_data = std::vector<T>(data, data + x->numel());
+  } else {
+    PADDLE_THROW("The dtype of Tensor must be int32 or int64.");
+  }
+  return vec_new_data;
+}
+template <typename T = int32_t>
+inline std::vector<T> GetDataFromTensorList(
+    const std::vector<const framework::Tensor*>& list_tensor) {
+  std::vector<T> vec_new_data;
+  for (size_t i = 0; i < list_tensor.size(); ++i) {
+    auto tensor = list_tensor[i];
+    PADDLE_ENFORCE_EQ(tensor->dims(), framework::make_ddim({1}),
+                      "ShapeError: The shape of Tensor in list must be [1]. "
+                      "But received the shape "
+                      "is [%s]",
+                      tensor->dims());
+    if (tensor->type() == framework::proto::VarType::INT32) {
+      if (platform::is_gpu_place(tensor->place())) {
+        framework::Tensor temp;
+        TensorCopySync(*tensor, platform::CPUPlace(), &temp);
+        vec_new_data.push_back(static_cast<T>(*temp.data<int>()));
+      } else {
+        vec_new_data.push_back(static_cast<T>(*tensor->data<int>()));
+      }
+    } else if (tensor->type() == framework::proto::VarType::INT64) {
+      if (platform::is_gpu_place(tensor->place())) {
+        framework::Tensor temp;
+        TensorCopySync(*tensor, platform::CPUPlace(), &temp);
+        vec_new_data.push_back(static_cast<T>(*temp.data<int64_t>()));
+      } else {
+        vec_new_data.push_back(static_cast<T>(*tensor->data<int64_t>()));
+      }
+    } else {
+      PADDLE_THROW("The dtype of Tensor in list must be int32 or int64.");
+    }
+  }
+  return vec_new_data;
+}
+}  // namespace operators
+}  // namespace paddle
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -6681,62 +6681,117 @@ def split(input, num_or_sections, dim=-1, name=None):
    Args:
        input (Variable): The input variable which is an N-D Tensor or LoDTensor, data type being float32, float64, int32 or int64.
-        num_or_sections (int|list): Integer or list of Integers. If :attr:`num_or_sections` is an integer,
+        num_or_sections (int|list|tuple): If :attr:`num_or_sections` is an integer,
            then the integer indicates the number of equal sized sub-Tensors
            that the Tensor will be divided into. If :attr:`num_or_sections`
-            is a list of integers, the length of list indicates the number of
+            is a list or tuple, the length of it indicates the number of
-            sub-Tensors and the integers indicate the sizes of sub-Tensors'
+            sub-Tensors and the elements in it indicate the sizes of sub-Tensors'
-            :attr:`dim` dimension orderly. The the length of the list mustn't be larger than the Tensor's size of :attr:`dim` .
+            :attr:`dim` dimension orderly. The length of the list mustn't be larger than the Tensor's size of :attr:`dim` .
-        dim (int): The dimension along which to split. If :math:`dim < 0`, the
+        dim (int32|Varible, optional): A scalar with type ``int32`` or a ``Tensor`` with shape [1] and type ``int32``. The dimension along which to split. If :math:`dim < 0`, the
-            dimension to split along is :math:`rank(input) + dim`.
+            dimension to split along is :math:`rank(input) + dim`. Default is -1.
        name(str, optional): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
    Returns:
        list(Variable): The list of segmented Tensor variables.
+    Raises:
+        TypeError: num_or_sections is not int, list or tuple.
+        TypeError: dim is not int or Variable.
    Example:
        .. code-block:: python
            import paddle.fluid as fluid
-            # input is a variable which shape is [-1, 3, 9, 5]
+            # input is a variable which shape is [3, 9, 5]
-            input = fluid.layers.data(
+            input = fluid.data(
                 name="input", shape=[3, 9, 5], dtype="float32")
-            x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=2)
+            x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1)
-            # x0.shape [-1, 3, 3, 5]
+            # x0.shape [3, 3, 5]
-            # x1.shape [-1, 3, 3, 5]
+            # x1.shape [3, 3, 5]
-            # x2.shape [-1, 3, 3, 5]
+            # x2.shape [3, 3, 5]
-            x0, x1, x2 = fluid.layers.split(input, num_or_sections=[2, 3, 4], dim=2)
+            x0, x1, x2 = fluid.layers.split(input, num_or_sections=[2, 3, 4], dim=1)
-            # x0.shape [-1, 3, 2, 5]
+            # x0.shape [3, 2, 5]
-            # x1.shape [-1, 3, 3, 5]
+            # x1.shape [3, 3, 5]
-            # x2.shape [-1, 3, 4, 5]
+            # x2.shape [3, 4, 5]
+            x0, x1, x2 = fluid.layers.split(input, num_or_sections=[2, 3, -1], dim=1)
+            # x0.shape [3, 2, 5]
+            # x1.shape [3, 3, 5]
+            # x2.shape [3, 4, 5]
    """
+    if not isinstance(num_or_sections, (int, list, tuple)):
+        raise TypeError(
+            "The type of 'num_or_sections' in split must be int, list or "
+            "tuple, but received %s." % (type(num_or_sections)))
+    if not isinstance(dim, (int, Variable)):
+        raise TypeError(
+            "The type of 'dim' in split must be int or Variable, but "
+            "received %s." % (type(dim)))
    helper = LayerHelper('split', **locals())
    input_shape = input.shape
-    dim = (len(input_shape) + dim) if dim < 0 else dim
+    inputs = {'X': input}
+    attrs = {'num': num_or_sections if isinstance(num_or_sections, int) else 0}
+    def _get_SectionsTensorList(one_list):
+        tensor_list = []
+        unk_dim_idx = -1
+        for idx, dim_size in enumerate(one_list):
+            if isinstance(dim_size, Variable):
+                dim_size.stop_gradient = True
+                tensor_list.append(dim_size)
+            else:
+                assert (isinstance(dim_size, int))
+                if dim_size == -1:
+                    assert unk_dim_idx == -1, (
+                        "Only one value of 'num_or_section' in split can "
+                        "be -1. But received num_or_section[%d] is also -1." %
+                        idx)
+                    unk_dim_idx = idx
+                temp_out = helper.create_variable_for_type_inference('int32')
+                fill_constant(
+                    [1], 'int32', dim_size, force_cpu=True, out=temp_out)
+                tensor_list.append(temp_out)
+        return tensor_list
+    if isinstance(dim, Variable):
+        dim.stop_gradient = True
+        inputs['AxisTensor'] = dim
+    else:
+        dim = (len(input_shape) + dim) if dim < 0 else dim
+        attrs['axis'] = dim
    if isinstance(num_or_sections, int):
        assert num_or_sections > 1, 'num_or_sections must be more than 1.'
+        if isinstance(dim, int) and input_shape[dim] > 0:
+            assert input_shape[dim] % num_or_sections ==0, \
+                "The input's size along the split dimension " \
+                "must be evenly divisible by Attr(num_or_sections). " \
+                "But %d is not evenly divisible by %d. " % (num_or_sections,input_shape[dim])
        num = num_or_sections
    else:
-        assert len(num_or_sections) <= input_shape[
+        if isinstance(dim, int) and input_shape[dim] > 0:
-            dim], 'len(num_or_sections) must not be more than input.shape[dim].'
+            assert len(num_or_sections) <= input_shape[
+                dim], 'len(num_or_sections) must not be more than input.shape[dim].'
        num = len(num_or_sections)
+        attrs['sections'] = list(
+            map(lambda ele: -1 if isinstance(ele, Variable) else ele,
+                num_or_sections))
+        contain_var = not all(not isinstance(ele, Variable)
+                              for ele in num_or_sections)
+        if contain_var:
+            inputs['SectionsTensorList'] = _get_SectionsTensorList(
+                num_or_sections)
    outs = [
        helper.create_variable_for_type_inference(dtype=helper.input_dtype())
        for i in range(num)
    ]
    helper.append_op(
-        type='split',
+        type='split', inputs=inputs, outputs={'Out': outs}, attrs=attrs)
-        inputs={'X': input},
-        outputs={'Out': outs},
-        attrs={
-            'num': num_or_sections if isinstance(num_or_sections, int) else 0,
-            'sections': num_or_sections
-            if isinstance(num_or_sections, list) else [],
-            'axis': dim
-        })
    return outs
@@ -9036,7 +9091,7 @@ def unsqueeze(input, axes, name=None):
    Args:
        input (Variable): The input Tensor to be unsqueezed. It is a N-D Tensor of data types float32, float64, int32.
-        axes (list): List of integers, indicating the dimensions to be inserted.
+        axes (int|list|tuple|Variable): Indicates the dimensions to be inserted. The data type is ``int32`` . If ``axes`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``axes`` is an Variable, it should be an 1-D Tensor .
        name (str|None): Name for this layer.
    Returns:
@@ -9050,13 +9105,45 @@ def unsqueeze(input, axes, name=None):
            y = fluid.layers.unsqueeze(input=x, axes=[1])
    """
-    helper = LayerHelper("unsqueeze", **locals())
+    if not isinstance(axes, (int, list, tuple, Variable)):
+        raise TypeError(
+            "The type of 'axes' in unsqueeze must be int, list, tuple or Variable, but "
+            "received %s." % (type(axes)))
+    helper = LayerHelper("unsqueeze2", **locals())
+    inputs = {"X": input}
+    attrs = {}
+    def _to_Variable_list(one_list):
+        Variable_list = []
+        for ele in one_list:
+            if isinstance(ele, Variable):
+                ele.stop_gradient = True
+                Variable_list.append(ele)
+            else:
+                assert (isinstance(ele, int))
+                temp_out = helper.create_variable_for_type_inference('int32')
+                fill_constant([1], 'int32', ele, force_cpu=True, out=temp_out)
+                Variable_list.append(temp_out)
+        return Variable_list
+    if isinstance(axes, int):
+        axes = [axes]
+    if isinstance(axes, Variable):
+        axes.stop_gradient = True
+        inputs["AxesTensor"] = axes
+    elif isinstance(axes, (list, tuple)):
+        contain_var = not all(not isinstance(ele, Variable) for ele in axes)
+        if contain_var:
+            inputs["AxesTensorList"] = _to_Variable_list(axes)
+        else:
+            attrs["axes"] = axes
    out = helper.create_variable_for_type_inference(dtype=input.dtype)
    x_shape = helper.create_variable_for_type_inference(dtype=input.dtype)
    helper.append_op(
        type="unsqueeze2",
-        inputs={"X": input},
+        inputs=inputs,
-        attrs={"axes": axes},
+        attrs=attrs,
        outputs={"Out": out,
                 "XShape": x_shape})

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -222,7 +222,7 @@ def concat(input, axis=0, name=None):
    Args:
        input(list): List of input Tensors with data type float32, float64, int32,
            int64.
-        axis(int, optional): Axis to compute indices along. The effective range
+        axis(int32|Variable, optional):  A scalar with type ``int32`` or a ``Tensor`` with shape [1] and type ``int32``. Axis to compute indices along. The effective range
            is [-R, R), where R is Rank(x). when axis<0, it works the same way
            as axis+R. Default is 0.
        name (str, optional): The default value is None. Normally there is no
@@ -280,12 +280,21 @@ def concat(input, axis=0, name=None):
            raise TypeError(
                "The data type of x in 'input' in concat must be float16(only support on GPU), float32, float64, int32, int64, but received %s."
                % (convert_dtype(x.dtype)))
+    if not isinstance(axis, (int, Variable)):
+        raise TypeError(
+            "The type of 'axis' in concat must be int or Variable, but "
+            "received %s." % (type(axis)))
+    inputs = {'X': input}
+    attrs = {}
+    if isinstance(axis, Variable):
+        axis.stop_gradient = True
+        inputs['AxisTensor'] = axis
+    else:
+        attrs['axis'] = axis
    out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
    helper.append_op(
-        type='concat',
+        type='concat', inputs=inputs, outputs={'Out': [out]}, attrs=attrs)
-        inputs={'X': input},
-        outputs={'Out': [out]},
-        attrs={'axis': axis})
    return out

--- a/python/paddle/fluid/tests/unittests/test_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_concat_op.py
@@ -95,6 +95,41 @@ class TestConcatOp5(TestConcatOp):
        self.axis = -3
+def create_test_AxisTensor(parent):
+    class TestConcatAxisTensor(parent):
+        def setUp(self):
+            self.op_type = "concat"
+            self.dtype = self.get_dtype()
+            self.init_test_data()
+            self.inputs = {
+                'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)],
+                'AxisTensor': np.array([self.axis]).astype("int32")
+            }
+            self.attrs = {}
+            if self.axis < 0:
+                self.actual_axis = self.axis + len(self.x0.shape)
+                self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0
+            else:
+                self.actual_axis = self.axis
+            self.outputs = {
+                'Out': np.concatenate(
+                    (self.x0, self.x1, self.x2), axis=self.actual_axis)
+            }
+    cls_name = "{0}_{1}".format(parent.__name__, "AxisTensor")
+    TestConcatAxisTensor.__name__ = cls_name
+    globals()[cls_name] = TestConcatAxisTensor
+create_test_AxisTensor(TestConcatOp)
+create_test_AxisTensor(TestConcatOp2)
+create_test_AxisTensor(TestConcatOp3)
+create_test_AxisTensor(TestConcatOp4)
+create_test_AxisTensor(TestConcatOp5)
 #----------------Concat Fp16----------------
@@ -135,6 +170,39 @@ class TestConcatOpError(OpTest):
            x7 = fluid.layers.data(shape=[4], dtype='float16', name='x7')
            fluid.layers.concat([x6, x7])
+            # The type of axis in concat_op should be int or Variable.
+            def test_axis_type():
+                fluid.layers.concat([x6, x7], 3.2)
+            self.assertRaises(TypeError, test_axis_type)
+class TestConcatAPI(OpTest):
+    def test_api(self):
+        x_1 = fluid.data(shape=[None, 1, 4, 5], dtype='int32', name='x_1')
+        fluid.layers.concat([x_1, x_1], 0)
+        input_2 = np.random.random([2, 1, 4, 5]).astype("int32")
+        input_3 = np.random.random([2, 2, 4, 5]).astype("int32")
+        x_2 = fluid.data(shape=[2, 1, 4, 5], dtype='int32', name='x_2')
+        x_3 = fluid.data(shape=[2, 2, 4, 5], dtype='int32', name='x_3')
+        positive_1_int32 = fluid.layers.fill_constant([1], "int32", 1)
+        positive_1_int64 = fluid.layers.fill_constant([1], "int64", 1)
+        out_1 = fluid.layers.concat(input=[x_2, x_3], axis=1)
+        out_2 = fluid.layers.concat(input=[x_2, x_3], axis=positive_1_int32)
+        out_3 = fluid.layers.concat(input=[x_2, x_3], axis=positive_1_int64)
+        exe = fluid.Executor(place=fluid.CPUPlace())
+        [res_1, res_2, res_3] = exe.run(
+            fluid.default_main_program(),
+            feed={"x_1": input_2,
+                  "x_2": input_2,
+                  "x_3": input_3},
+            fetch_list=[out_1, out_2, out_3])
+        assert np.array_equal(res_1, np.concatenate((input_2, input_3), axis=1))
+        assert np.array_equal(res_2, np.concatenate((input_2, input_3), axis=1))
+        assert np.array_equal(res_3, np.concatenate((input_2, input_3), axis=1))
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_split_op.py
+++ b/python/paddle/fluid/tests/unittests/test_split_op.py
@@ -17,6 +17,8 @@ from __future__ import print_function
 import unittest
 import numpy as np
 from op_test import OpTest
+import paddle.fluid as fluid
+from paddle.fluid import compiler, Program, program_guard
 class TestSplitOp(OpTest):
@@ -44,6 +46,161 @@ class TestSplitOp(OpTest):
        self.check_grad(['X'], ['out0', 'out1', 'out2'])
+# test with attr(num)
+class TestSplitOp_2(OpTest):
+    def setUp(self):
+        self._set_op_type()
+        self.dtype = self.get_dtype()
+        self.init_data()
+        self.inputs = {'X': self.x}
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections,
+            'num': self.num
+        }
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+    def init_data(self):
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = []
+        self.num = 3
+        self.indices_or_sections = 3
+    def get_dtype(self):
+        return "float32"
+    def _set_op_type(self):
+        self.op_type = "split"
+    def test_check_output(self):
+        self.check_output()
+    def test_check_grad(self):
+        self.check_grad(['X'], ['out0', 'out1', 'out2'])
+# attr(axis) is Tensor
+class TestSplitOp_AxisTensor(OpTest):
+    def setUp(self):
+        self._set_op_type()
+        self.dtype = self.get_dtype()
+        self.init_data()
+        self.inputs = {
+            'X': self.x,
+            'AxisTensor': np.array([self.axis]).astype("int32")
+        }
+        self.attrs = {'sections': self.sections, 'num': self.num}
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+    def init_data(self):
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = []
+        self.num = 3
+        self.indices_or_sections = 3
+    def get_dtype(self):
+        return "float32"
+    def _set_op_type(self):
+        self.op_type = "split"
+    def test_check_output(self):
+        self.check_output()
+    def test_check_grad(self):
+        self.check_grad(['X'], ['out0', 'out1', 'out2'])
+# attr(sections) is list containing Tensor
+class TestSplitOp_SectionsTensor(OpTest):
+    def setUp(self):
+        self._set_op_type()
+        self.dtype = self.get_dtype()
+        self.init_data()
+        self.inputs = {'X': self.x}
+        sections_tensor = []
+        for index, ele in enumerate(self.sections):
+            sections_tensor.append(("x" + str(index), np.ones(
+                (1)).astype('int32') * ele))
+        self.inputs['SectionsTensorList'] = sections_tensor
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections_infer,
+            'num': self.num
+        }
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+    def init_data(self):
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 1
+        self.sections = [2, 1, 2]
+        self.sections_infer = [-1, -1, -1]
+        self.num = 0
+        self.indices_or_sections = [2, 3]
+    def get_dtype(self):
+        return "float32"
+    def _set_op_type(self):
+        self.op_type = "split"
+    def test_check_output(self):
+        self.check_output()
+    def test_check_grad(self):
+        self.check_grad(['X'], ['out0', 'out1', 'out2'])
+class TestSplitOp_unk_section(OpTest):
+    def setUp(self):
+        self._set_op_type()
+        self.dtype = self.get_dtype()
+        self.init_data()
+        self.inputs = {'X': self.x}
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections,
+            'num': self.num
+        }
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+    def init_data(self):
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = [2, 1, -1]
+        self.num = 0
+        self.indices_or_sections = [2, 3]
+    def get_dtype(self):
+        return "float32"
+    def _set_op_type(self):
+        self.op_type = "split"
+    def test_check_output(self):
+        self.check_output()
+    def test_check_grad(self):
+        self.check_grad(['X'], ['out0', 'out1', 'out2'])
 class TestSplitByrefOp(OpTest):
    def _set_op_type(self):
        self.op_type = "split_byref"
@@ -67,5 +224,58 @@ def create_test_fp16(parent):
 create_test_fp16(TestSplitOp)
+class TestSplitAPI(OpTest):
+    def test_api(self):
+        input_1 = np.random.random([4, 5, 6]).astype("int32")
+        positive_1_int32 = fluid.layers.fill_constant([1], "int32", 1)
+        positive_1_int64 = fluid.layers.fill_constant([1], "int64", 1)
+        positive_2_int64 = fluid.layers.fill_constant([1], "int64", 2)
+        x_1 = fluid.data(shape=[4, 5, 6], dtype='int32', name='x_1')
+        x_2 = fluid.data(shape=[4, 5, None], dtype='int32', name='x_2')
+        out_0, out_1, out_2 = fluid.layers.split(
+            input=x_1,
+            num_or_sections=[positive_2_int64, positive_1_int32, -1],
+            dim=positive_1_int64)
+        out_3, out_4, out_5 = fluid.layers.split(
+            input=x_1, num_or_sections=[2, 1, 2], dim=positive_1_int32)
+        fluid.layers.split(input=x_2, num_or_sections=2, dim=2)
+        exe = fluid.Executor(place=fluid.CPUPlace())
+        [res_0, res_1, res_2, res_3, res_4, res_5] = exe.run(
+            fluid.default_main_program(),
+            feed={"x_1": input_1,
+                  "x_2": input_1},
+            fetch_list=[out_0, out_1, out_2, out_3, out_4, out_5])
+        out = np.split(input_1, [2, 3], 1)
+        assert np.array_equal(res_0, out[0])
+        assert np.array_equal(res_1, out[1])
+        assert np.array_equal(res_2, out[2])
+        assert np.array_equal(res_3, out[0])
+        assert np.array_equal(res_4, out[1])
+        assert np.array_equal(res_5, out[2])
+class TestSplitOpError(OpTest):
+    def test_errors(self):
+        with program_guard(Program(), Program()):
+            # The type of axis in split_op should be int or Variable.
+            def test_axis_type():
+                x6 = fluid.layers.data(shape=[4], dtype='float16', name='x3')
+                fluid.layers.split(input=x6, num_or_sections=2, dim=3.2)
+            self.assertRaises(TypeError, test_axis_type)
+            # The type of num_or_sections in split_op should be int, tuple or list.
+            def test_num_or_sections_type():
+                x6 = fluid.layers.data(shape=[4], dtype='float16', name='x4')
+                fluid.layers.split(input=x6, num_or_sections=2.1, dim=3)
+            self.assertRaises(TypeError, test_num_or_sections_type)
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py
@@ -16,7 +16,7 @@ from __future__ import print_function
 import unittest
 import numpy as np
+import paddle.fluid as fluid
 from op_test import OpTest
@@ -79,5 +79,171 @@ class TestUnsqueezeOp4(TestUnsqueezeOp):
        self.new_shape = (3, 1, 1, 2, 5, 1)
+# axes is a list(with tensor)
+class TestUnsqueezeOp_AxesTensorList(OpTest):
+    def setUp(self):
+        self.init_test_case()
+        self.op_type = "unsqueeze2"
+        axes_tensor_list = []
+        for index, ele in enumerate(self.axes):
+            axes_tensor_list.append(("axes" + str(index), np.ones(
+                (1)).astype('int32') * ele))
+        self.inputs = {
+            "X": np.random.random(self.ori_shape).astype("float32"),
+            "AxesTensorList": axes_tensor_list
+        }
+        self.init_attrs()
+        self.outputs = {
+            "Out": self.inputs["X"].reshape(self.new_shape),
+            "XShape": np.random.random(self.ori_shape).astype("float32")
+        }
+    def test_check_output(self):
+        self.check_output(no_check_set=["XShape"])
+    def test_check_grad(self):
+        self.check_grad(["X"], "Out")
+    def init_test_case(self):
+        self.ori_shape = (3, 5)
+        self.axes = (1, 2)
+        self.new_shape = (3, 1, 1, 5)
+    def init_attrs(self):
+        self.attrs = {}
+class TestUnsqueezeOp1_AxesTensorList(TestUnsqueezeOp_AxesTensorList):
+    def init_test_case(self):
+        self.ori_shape = (3, 5)
+        self.axes = (-1, )
+        self.new_shape = (3, 5, 1)
+class TestUnsqueezeOp2_AxesTensorList(TestUnsqueezeOp_AxesTensorList):
+    def init_test_case(self):
+        self.ori_shape = (3, 5)
+        self.axes = (0, -1)
+        self.new_shape = (1, 3, 5, 1)
+class TestUnsqueezeOp3_AxesTensorList(TestUnsqueezeOp_AxesTensorList):
+    def init_test_case(self):
+        self.ori_shape = (3, 2, 5)
+        self.axes = (0, 3, 3)
+        self.new_shape = (1, 3, 2, 1, 1, 5)
+class TestUnsqueezeOp4_AxesTensorList(TestUnsqueezeOp_AxesTensorList):
+    def init_test_case(self):
+        self.ori_shape = (3, 2, 5)
+        self.axes = (3, 1, 1)
+        self.new_shape = (3, 1, 1, 2, 5, 1)
+# axes is a Tensor
+class TestUnsqueezeOp_AxesTensor(OpTest):
+    def setUp(self):
+        self.init_test_case()
+        self.op_type = "unsqueeze2"
+        self.inputs = {
+            "X": np.random.random(self.ori_shape).astype("float32"),
+            "AxesTensor": np.array(self.axes).astype("int32")
+        }
+        self.init_attrs()
+        self.outputs = {
+            "Out": self.inputs["X"].reshape(self.new_shape),
+            "XShape": np.random.random(self.ori_shape).astype("float32")
+        }
+    def test_check_output(self):
+        self.check_output(no_check_set=["XShape"])
+    def test_check_grad(self):
+        self.check_grad(["X"], "Out")
+    def init_test_case(self):
+        self.ori_shape = (3, 5)
+        self.axes = (1, 2)
+        self.new_shape = (3, 1, 1, 5)
+    def init_attrs(self):
+        self.attrs = {}
+class TestUnsqueezeOp1_AxesTensor(TestUnsqueezeOp_AxesTensor):
+    def init_test_case(self):
+        self.ori_shape = (3, 5)
+        self.axes = (-1, )
+        self.new_shape = (3, 5, 1)
+class TestUnsqueezeOp2_AxesTensor(TestUnsqueezeOp_AxesTensor):
+    def init_test_case(self):
+        self.ori_shape = (3, 5)
+        self.axes = (0, -1)
+        self.new_shape = (1, 3, 5, 1)
+class TestUnsqueezeOp3_AxesTensor(TestUnsqueezeOp_AxesTensor):
+    def init_test_case(self):
+        self.ori_shape = (3, 2, 5)
+        self.axes = (0, 3, 3)
+        self.new_shape = (1, 3, 2, 1, 1, 5)
+class TestUnsqueezeOp4_AxesTensor(TestUnsqueezeOp_AxesTensor):
+    def init_test_case(self):
+        self.ori_shape = (3, 2, 5)
+        self.axes = (3, 1, 1)
+        self.new_shape = (3, 1, 1, 2, 5, 1)
+# test api
+class TestUnsqueezeAPI(OpTest):
+    def test_api(self):
+        input = np.random.random([3, 2, 5]).astype("float32")
+        x = fluid.data(name='x', shape=[3, 2, 5], dtype="float32")
+        positive_3_int32 = fluid.layers.fill_constant([1], "int32", 3)
+        positive_1_int64 = fluid.layers.fill_constant([1], "int64", 1)
+        axes_tensor_int32 = fluid.data(
+            name='axes_tensor_int32', shape=[3], dtype="int32")
+        axes_tensor_int64 = fluid.data(
+            name='axes_tensor_int64', shape=[3], dtype="int64")
+        out_1 = fluid.layers.unsqueeze(x, axes=[3, 1, 1])
+        out_2 = fluid.layers.unsqueeze(
+            x, axes=[positive_3_int32, positive_1_int64, 1])
+        out_3 = fluid.layers.unsqueeze(x, axes=axes_tensor_int32)
+        out_4 = fluid.layers.unsqueeze(x, axes=3)
+        out_5 = fluid.layers.unsqueeze(x, axes=axes_tensor_int64)
+        exe = fluid.Executor(place=fluid.CPUPlace())
+        res_1, res_2, res_3, res_4, res_5 = exe.run(
+            fluid.default_main_program(),
+            feed={
+                "x": input,
+                "axes_tensor_int32": np.array([3, 1, 1]).astype("int32"),
+                "axes_tensor_int64": np.array([3, 1, 1]).astype("int64")
+            },
+            fetch_list=[out_1, out_2, out_3, out_4, out_5])
+        assert np.array_equal(res_1, input.reshape([3, 1, 1, 2, 5, 1]))
+        assert np.array_equal(res_2, input.reshape([3, 1, 1, 2, 5, 1]))
+        assert np.array_equal(res_3, input.reshape([3, 1, 1, 2, 5, 1]))
+        assert np.array_equal(res_4, input.reshape([3, 2, 5, 1]))
+        assert np.array_equal(res_5, input.reshape([3, 1, 1, 2, 5, 1]))
+    def test_error(self):
+        def test_axes_type():
+            x2 = fluid.data(name="x2", shape=[2, 25], dtype="int32")
+            fluid.layers.unsqueeze(x2, axes=2.1)
+        self.assertRaises(TypeError, test_axes_type)
 if __name__ == "__main__":
    unittest.main()