diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc
index daef6310ddf5808ab8765362427a005a387096af..820d3e3df700348bdac8368afd17d51e15e5bd92 100644
--- a/paddle/fluid/operators/concat_op.cc
+++ b/paddle/fluid/operators/concat_op.cc
@@ -32,58 +32,36 @@ class ConcatOp : public framework::OperatorWithKernel {
   void InferShape(framework::InferShapeContext *ctx) const override {
     PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL,
                       "Inputs(X) of ConcatOp should not be empty.");
-    PADDLE_ENFORCE(ctx->HasOutput("Out"),
-                   "Output(Out) of ConcatOp should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
+                      "Output(Out) of ConcatOp should not be null.");
 
-    auto ins = ctx->GetInputsDim("X");
-    size_t axis =
-        ComputeAxis(static_cast<int64_t>(ctx->Attrs().Get<int>("axis")),
-                    static_cast<int64_t>(ins[0].size()));
+    auto inputs_dims = ctx->GetInputsDim("X");
 
-    const size_t n = ins.size();
-    PADDLE_ENFORCE_GT(n, 0,
+    const size_t inputs_num = inputs_dims.size();
+    PADDLE_ENFORCE_GT(inputs_num, 0,
                       "ShapeError: Input tensors count should > 0. But "
                       "recevied inputs' length is 0.");
-    if (n == 1) {
+    if (inputs_num == 1) {
       VLOG(3) << "Warning: concat op have only one input, may waste memory";
     }
 
-    auto out_dims = ins[0];
-    size_t in_zero_dims_size = out_dims.size();
-    for (size_t i = 1; i < n; i++) {
-      for (size_t j = 0; j < in_zero_dims_size; j++) {
-        if (j == axis) {
-          if (ctx->IsRuntime()) {
-            out_dims[axis] += ins[i][j];
-          } else {
-            if (ins[i][j] == -1) {
-              out_dims[axis] = -1;
-            } else {
-              out_dims[axis] += ins[i][j];
-            }
-          }
-        } else {
-          bool check_shape =
-              ctx->IsRuntime() || (out_dims[j] > 0 && ins[i][j] > 0);
-          if (check_shape) {
-            // check all shape in run time
-            PADDLE_ENFORCE_EQ(
-                out_dims[j], ins[i][j],
-                "ShapeError: Input tensors should have same "
-                "dimensions(or specific dimension = -1) except the axis. "
-                "But recevied axis = %s, input[0]'s shape = "
-                "[%s], input[%s]'s shape = [%s], the \"%s\" "
-                "dimension of input[%s] is unexpected",
-                axis, ins[0], i, ins[j], j, i);
-          }
-        }
+    if (ctx->HasInput("AxisTensor")) {
+      auto out_dims =
+          framework::make_ddim(std::vector<int>(inputs_dims[0].size(), -1));
+      ctx->SetOutputDim("Out", out_dims);
+      ctx->ShareLoD("X", /*->*/ "Out");
+    } else {
+      size_t axis =
+          ComputeAxis(static_cast<int64_t>(ctx->Attrs().Get<int>("axis")),
+                      static_cast<int64_t>(inputs_dims[0].size()));
+      framework::DDim out_dims =
+          ComputeAndCheckShape(ctx->IsRuntime(), inputs_dims, axis);
+      if (out_dims[axis] < 0) {
+        out_dims[axis] = -1;
       }
+      ctx->SetOutputDim("Out", out_dims);
+      ctx->ShareLoD("X", /*->*/ "Out");
     }
-    if (out_dims[axis] < 0) {
-      out_dims[axis] = -1;
-    }
-    ctx->SetOutputDim("Out", out_dims);
-    ctx->ShareLoD("X", /*->*/ "Out");
   }
 
  protected:
@@ -111,6 +89,16 @@ class ConcatOp : public framework::OperatorWithKernel {
 #endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
+
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override {
+    if (var_name == "AxisTensor") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 
 class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -128,6 +116,12 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
                  "interpreted as counting from the end of the rank."
                  "i.e., axis + rank(X) th dimension.")
         .SetDefault(0);
+    AddInput("AxisTensor",
+             "(Tensor) The axis along which the input tensors will be "
+             "concatenated.  "
+             "It has higher priority than Attr(axis). "
+             "The shape of AxisTensor must be [1].")
+        .AsDispensable();
     AddAttr<bool>("use_quantizer",
                   "(bool, default false) "
                   "Set to true for operators that should be quantized and use "
@@ -178,6 +172,16 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
                                        ctx, framework::GradVarName("Out")),
                                    ctx.GetPlace());
   }
+
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override {
+    if (var_name == "AxisTensor") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 
 DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ConcatOpGradNoNeedBufferVarInference,
@@ -192,6 +196,7 @@ class ConcatGradOpDescMaker : public framework::SingleGradOpDescMaker {
     std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
     op->SetType("concat_grad");
     op->SetInput("X", Input("X"));
+    op->SetInput("AxisTensor", Input("AxisTensor"));
     op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
     op->SetOutput(framework::GradVarName("X"), InputGrad("X", false));
     op->SetAttrMap(Attrs());
diff --git a/paddle/fluid/operators/concat_op.h b/paddle/fluid/operators/concat_op.h
index 4a371de32354d196492a54dce47bf73bf644bad1..8613b5e3c8d9c5a7c8f00fb195e94ee1b23afb0c 100644
--- a/paddle/fluid/operators/concat_op.h
+++ b/paddle/fluid/operators/concat_op.h
@@ -14,14 +14,51 @@ limitations under the License. */
 
 #pragma once
 
+#include <string>
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/concat_and_split.h"
 #include "paddle/fluid/operators/strided_memcpy.h"
+#include "paddle/fluid/operators/utils.h"
 
 namespace paddle {
 namespace operators {
+static inline framework::DDim ComputeAndCheckShape(
+    const bool is_runtime, const std::vector<framework::DDim>& inputs_dims,
+    const int axis) {
+  const size_t n = inputs_dims.size();
+  auto out_dims = inputs_dims[0];
+  size_t in_zero_dims_size = out_dims.size();
+  for (size_t i = 1; i < n; i++) {
+    for (size_t j = 0; j < in_zero_dims_size; j++) {
+      if (j == axis) {
+        if (is_runtime) {
+          out_dims[axis] += inputs_dims[i][j];
+        } else {
+          if (inputs_dims[i][j] == -1) {
+            out_dims[axis] = -1;
+          } else {
+            out_dims[axis] += inputs_dims[i][j];
+          }
+        }
+      } else {
+        bool check_shape =
+            is_runtime || (out_dims[j] > 0 && inputs_dims[i][j] > 0);
+        if (check_shape) {
+          // check all shape in run time
+          PADDLE_ENFORCE_EQ(
+              inputs_dims[0][j], inputs_dims[i][j],
+              "ShapeError: Dimension %d in inputs' shapes must be equal. "
+              "But recevied input[0]'s shape = "
+              "[%s], input[%d]'s shape = [%s].",
+              j, inputs_dims[0], i, inputs_dims[i]);
+        }
+      }
+    }
+  }
+  return out_dims;
+}
 
 static inline int64_t ComputeAxis(int64_t axis, int64_t rank) {
   if (axis < 0) {
@@ -36,9 +73,27 @@ class ConcatKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto ins = ctx.MultiInput<framework::Tensor>("X");
     framework::Tensor* out = ctx.Output<framework::Tensor>("Out");
-    PADDLE_ENFORCE(ins[0], "The input should not be null.");
-    auto axis = ComputeAxis(static_cast<int64_t>(ctx.Attr<int>("axis")),
-                            static_cast<int64_t>(ins[0]->dims().size()));
+    PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, "The input should not be null.");
+    auto axis = ctx.Attr<int>("axis");
+    bool need_resize_out_dims = false;
+    if (ctx.HasInput("AxisTensor")) {
+      auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
+      axis = GetDataFromTensor<int>(axis_tensor)[0];
+      need_resize_out_dims = true;
+    }
+    axis = ComputeAxis(static_cast<int64_t>(axis),
+                       static_cast<int64_t>(ins[0]->dims().size()));
+
+    if (need_resize_out_dims) {
+      const size_t n = ins.size();
+      std::vector<framework::DDim> ins_dims(n);
+      for (size_t i = 0; i < n; i++) {
+        ins_dims[i] = ins[i]->dims();
+      }
+
+      framework::DDim out_dims = ComputeAndCheckShape(true, ins_dims, axis);
+      out->Resize(out_dims);
+    }
     auto place = ctx.GetPlace();
     out->mutable_data<T>(place);
 
@@ -92,10 +147,15 @@ class ConcatGradKernel : public framework::OpKernel<T> {
         }
       }
     }
-    PADDLE_ENFORCE(ins[0], "The input should not be null.");
-    auto axis = ComputeAxis(static_cast<int64_t>(ctx.Attr<int>("axis")),
-                            static_cast<int64_t>(ins[0]->dims().size()));
+    PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, "The input should not be null.");
 
+    auto axis = ctx.Attr<int>("axis");
+    if (ctx.HasInput("AxisTensor")) {
+      auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
+      axis = GetDataFromTensor<int>(axis_tensor)[0];
+    }
+    axis = ComputeAxis(static_cast<int64_t>(axis),
+                       static_cast<int64_t>(ins[0]->dims().size()));
     // get output tensor that the name is not kEmptyVarName
     std::vector<framework::Tensor*> outputs;
     for (size_t j = 0; j < outs.size(); ++j) {
diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc
index f224c807bd6334ab05d7bd3be95382374b9b14a0..5277440f5b622cca074f333071a601d82cc4a58d 100644
--- a/paddle/fluid/operators/split_op.cc
+++ b/paddle/fluid/operators/split_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/split_op.h"
+#include <string>
 
 namespace paddle {
 namespace operators {
@@ -23,8 +24,8 @@ class SplitOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext *ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"),
-                   "Input(X) of SplitOp should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
+                      "Input(X) of SplitOp should not be null.");
     PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL,
                       "Outputs(Out) of SplitOp should not be empty.");
     auto in_dims = ctx->GetInputDim("X");
@@ -34,38 +35,29 @@ class SplitOp : public framework::OperatorWithKernel {
     std::vector<int> sections = static_cast<std::vector<int>>(
         ctx->Attrs().Get<std::vector<int>>("sections"));
     const size_t outs_number = outs_names.size();
-    std::vector<framework::DDim> outs_dims;
-    outs_dims.reserve(outs_number);
-
-    if (num > 0) {
-      int64_t in_axis_dim = in_dims[axis];
-      if (ctx->IsRuntime() || in_axis_dim > 0) {
-        PADDLE_ENFORCE_EQ(in_axis_dim % num, 0,
-                          "tensor split does not result"
-                          " in an equal division");
-        size_t out_axis_dim = in_axis_dim / num;
-        for (size_t i = 0; i < outs_number; ++i) {
-          auto dim = in_dims;
-          dim[axis] = out_axis_dim;
-          outs_dims.push_back(dim);
-        }
-      } else {
-        for (size_t i = 0; i < outs_number; ++i) {
-          auto dim = in_dims;
-          dim[axis] = -1;
-          outs_dims.push_back(dim);
-        }
-      }
-    } else if (sections.size() > 0) {
+
+    if (sections.size() > 0) {
       PADDLE_ENFORCE_EQ(sections.size(), outs_number,
-                        "tensor split sections size"
+                        "tensor split sections size "
                         "should be equal to output size.");
+    }
+
+    if (ctx->HasInput("AxisTensor")) {
+      auto out_dims =
+          framework::make_ddim(std::vector<int>(in_dims.size(), -1));
+      std::vector<framework::DDim> outs_dims(outs_number, out_dims);
+      ctx->SetOutputsDim("Out", outs_dims);
       for (size_t i = 0; i < outs_number; ++i) {
-        auto dim = in_dims;
-        dim[axis] = sections[i];
-        outs_dims.push_back(dim);
+        ctx->ShareLoD("X", "Out", 0, i);
       }
+      return;
     }
+
+    bool each_section_is_known =
+        (sections.size() > 0 && !ctx->HasInputs("SectionsTensorList"));
+
+    auto outs_dims = UpdateOutsDims(ctx->IsRuntime(), each_section_is_known,
+                                    in_dims, num, sections, axis, outs_number);
     ctx->SetOutputsDim("Out", outs_dims);
     if (axis != 0) {
       // Only pass LoD when not spliting along the first dim.
@@ -74,12 +66,41 @@ class SplitOp : public framework::OperatorWithKernel {
       }
     }
   }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(ctx.Input<framework::LoDTensor>("X")->type(),
+                                   ctx.device_context());
+  }
+
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string &var_name, const Tensor &tensor,
+      const framework::OpKernelType &expected_kernel_type) const override {
+    if (var_name == "AxisTensor" || var_name == "SectionsTensorList") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 
 class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X", "(Tensor) Input tensor of the split operator.");
+    AddInput("AxisTensor",
+             "(Tensor) The axis which the input will be splited on. "
+             "It has higher priority than Attr(axis). "
+             "The shape of AxisTensor must be [1]")
+        .AsDispensable();
+    AddInput("SectionsTensorList",
+             "(vector<Tensor<int>>, optional). "
+             "The length of each output along the specified axis. "
+             "It has a higher priority than Attr(sections)."
+             "The shape of the element in vector must be [1].")
+        .AsDuplicable()
+        .AsDispensable();
     AddOutput("Out", "(Tensor) Output tensors of the split operator.")
         .AsDuplicable();
     AddComment(R"DOC(
diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h
index 6f4a25ab5ed86937f2f5db532a9eba22b5a2c5be..77d630235f7c9b129da3ac75f85a24f07be1091e 100644
--- a/paddle/fluid/operators/split_op.h
+++ b/paddle/fluid/operators/split_op.h
@@ -15,21 +15,125 @@ limitations under the License. */
 #pragma once
 
 #include <chrono>  // NOLINT
+#include <memory>
+#include <string>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/concat_and_split.h"
 #include "paddle/fluid/operators/strided_memcpy.h"
+#include "paddle/fluid/operators/utils.h"
 
 namespace paddle {
 namespace operators {
+static inline std::vector<framework::DDim> UpdateOutsDims(
+    const bool is_runtime, const bool each_section_is_known,
+    const framework::DDim in_dims, const size_t num, std::vector<int> sections,
+    const size_t axis, const int outs_number) {
+  std::vector<framework::DDim> outs_dims(outs_number, in_dims);
+  int64_t input_axis_dim = in_dims[axis];
+  if (num > 0) {
+    if (is_runtime || input_axis_dim > 0) {
+      PADDLE_ENFORCE_EQ(input_axis_dim % num, 0,
+                        "The input's size along the split dimension "
+                        "must be evenly divisible by Attr(num_or_sections). "
+                        "But received Attr(num_or_sections) "
+                        "= %d, input(X)'s shape = [%s], Attr(dim) = %d.",
+                        num, in_dims, axis);
+      size_t out_axis_dim = input_axis_dim / num;
 
+      for (auto& out_dim : outs_dims) {
+        out_dim[axis] = out_axis_dim;
+      }
+    } else {
+      for (auto& out_dim : outs_dims) {
+        out_dim[axis] = -1;
+      }
+    }
+  } else if (sections.size() > 0) {
+    if (is_runtime || input_axis_dim > 0) {
+      const int unk_dim_val = -1;
+      int unk_dim_idx = -1, num_of_unk = 0;
+      int sum_of_section = 0;
+      for (size_t i = 0; i < sections.size(); ++i) {
+        if (sections[i] == unk_dim_val) {
+          num_of_unk++;
+          unk_dim_idx = i;
+        } else {
+          sum_of_section += sections[i];
+        }
+      }
+
+      if (each_section_is_known) {
+        PADDLE_ENFORCE_LE(num_of_unk, 1,
+                          "Only one dimension value of Attr(num_or_sections) "
+                          "in SplitOp can be -1. "
+                          "But received Attr(num_or_sections) = [%s].",
+                          framework::make_ddim(sections));
+      }
+
+      if (unk_dim_idx != -1) {
+        // for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1].
+        // input_axis_dim = 5, sum_of_sections = 5.
+        // the following check will fail.
+        PADDLE_ENFORCE_LT(
+            sum_of_section, input_axis_dim,
+            "Sum of Attr(num_or_sections) other than unknown section "
+            "must be less than the input's size "
+            "along the split dimension. But received Attr(num_or_sections) "
+            "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
+            framework::make_ddim(sections), in_dims, axis);
+        if (each_section_is_known) {
+          sections[unk_dim_idx] = input_axis_dim - sum_of_section;
+        }
+      } else {
+        PADDLE_ENFORCE_EQ(
+            sum_of_section, input_axis_dim,
+            "Sum of Attr(num_or_sections) must be equal to the input's size "
+            "along the split dimension. But received Attr(num_or_sections)"
+            " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
+            framework::make_ddim(sections), in_dims, axis);
+      }
+    }
+    for (size_t i = 0; i < outs_number; ++i) {
+      outs_dims[i][axis] = sections[i];
+    }
+  }
+  return outs_dims;
+}
 template <typename DeviceContext, typename T>
 class SplitOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* in = ctx.Input<framework::Tensor>("X");
     auto outs = ctx.MultiOutput<framework::Tensor>("Out");
+    int num = ctx.Attr<int>("num");
+    std::vector<int> sections = ctx.Attr<std::vector<int>>("sections");
     int axis = ctx.Attr<int>("axis");
+
+    auto in_dims = in->dims();
+    auto outs_number = outs.size();
+
+    bool need_resize_outs_dims = false;
+    if (ctx.HasInput("AxisTensor")) {
+      auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
+      axis = GetDataFromTensor<int>(axis_tensor)[0];
+      need_resize_outs_dims = true;
+    }
+    auto sections_tensor_list =
+        ctx.MultiInput<framework::Tensor>("SectionsTensorList");
+    if (sections_tensor_list.size() > 0) {
+      sections = GetDataFromTensorList<int>(sections_tensor_list);
+      need_resize_outs_dims = true;
+    }
+
+    if (need_resize_outs_dims) {
+      std::vector<framework::DDim> outs_dims =
+          UpdateOutsDims(true, true, in_dims, num, sections, axis, outs_number);
+      for (size_t j = 0; j < outs.size(); ++j) {
+        outs[j]->Resize(outs_dims[j]);
+      }
+    }
+
     auto place = ctx.GetPlace();
 
     std::vector<const framework::Tensor*> shape_refer;
@@ -58,6 +162,7 @@ class SplitGradMaker : public framework::SingleGradOpDescMaker {
     auto op = new framework::OpDesc();
     op->SetType("concat");
     op->SetInput("X", OutputGrad("Out"));
+    op->SetInput("AxisTensor", Input("AxisTensor"));
     op->SetOutput("Out", InputGrad("X"));
     op->SetAttrMap(Attrs());
     return std::unique_ptr<framework::OpDesc>(op);
diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..fa76335ac5856c1c35ffa0f69cc5e3525df0ccf3
--- /dev/null
+++ b/paddle/fluid/operators/utils.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <paddle/fluid/framework/operator.h>
+#include <string>
+#include <vector>
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+inline std::vector<T> GetDataFromTensor(const framework::Tensor* x) {
+  auto* data = x->data<T>();
+  framework::Tensor cpu_attr_tensor;
+  if (platform::is_gpu_place(x->place())) {
+    TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor);
+    data = cpu_attr_tensor.data<T>();
+  }
+  auto vec_data = std::vector<T>(data, data + x->numel());
+  return vec_data;
+}
+template <typename T>
+inline std::vector<T> GetDataFromTensorList(
+    const std::vector<const framework::Tensor*>& list_tensor) {
+  std::vector<T> vec_new_data;
+  for (size_t i = 0; i < list_tensor.size(); ++i) {
+    auto tensor = list_tensor[i];
+    PADDLE_ENFORCE_EQ(
+        tensor->dims(), framework::make_ddim({1}),
+        "ShapeError: If the element type is Tensor, "
+        "the element's shape must be [1]. But received the element's shape "
+        "is [%s]",
+        tensor->dims());
+    if (platform::is_gpu_place(tensor->place())) {
+      framework::Tensor temp;
+      TensorCopySync(*tensor, platform::CPUPlace(), &temp);
+      vec_new_data.push_back((*temp.data<T>()));
+    } else {
+      vec_new_data.push_back((*tensor->data<T>()));
+    }
+  }
+  return vec_new_data;
+}
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index e09a49d8322d8c687ca538993794ac9a62e4eb6b..41f6486ac52c230ebc02ffc53195068780b155f2 100755
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -6669,62 +6669,117 @@ def split(input, num_or_sections, dim=-1, name=None):
 
     Args:
         input (Variable): The input variable which is an N-D Tensor or LoDTensor, data type being float32, float64, int32 or int64.
-        num_or_sections (int|list): Integer or list of Integers. If :attr:`num_or_sections` is an integer,
+        num_or_sections (int|list|tuple): If :attr:`num_or_sections` is an integer,
             then the integer indicates the number of equal sized sub-Tensors
             that the Tensor will be divided into. If :attr:`num_or_sections`
-            is a list of integers, the length of list indicates the number of
-            sub-Tensors and the integers indicate the sizes of sub-Tensors'
-            :attr:`dim` dimension orderly. The the length of the list mustn't be larger than the Tensor's size of :attr:`dim` .
-        dim (int): The dimension along which to split. If :math:`dim < 0`, the
-            dimension to split along is :math:`rank(input) + dim`.
+            is a list or tuple, the length of it indicates the number of
+            sub-Tensors and the elements in it indicate the sizes of sub-Tensors'
+            :attr:`dim` dimension orderly. The length of the list mustn't be larger than the Tensor's size of :attr:`dim` .
+        dim (int32|Varible, optional): A scalar with type ``int32`` or a ``Tensor`` with shape [1] and type ``int32``. The dimension along which to split. If :math:`dim < 0`, the
+            dimension to split along is :math:`rank(input) + dim`. Default is -1.
         name(str, optional): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
 
     Returns:
         list(Variable): The list of segmented Tensor variables.
 
+    Raises:
+        TypeError: num_or_sections is not int, list or tuple.
+        TypeError: dim is not int or Variable.
+
     Example:
         .. code-block:: python
 
             import paddle.fluid as fluid
 
-            # input is a variable which shape is [-1, 3, 9, 5]
-            input = fluid.layers.data(
+            # input is a variable which shape is [3, 9, 5]
+            input = fluid.data(
                  name="input", shape=[3, 9, 5], dtype="float32")
 
-            x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=2)
-            # x0.shape [-1, 3, 3, 5]
-            # x1.shape [-1, 3, 3, 5]
-            # x2.shape [-1, 3, 3, 5]
+            x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1)
+            # x0.shape [3, 3, 5]
+            # x1.shape [3, 3, 5]
+            # x2.shape [3, 3, 5]
 
-            x0, x1, x2 = fluid.layers.split(input, num_or_sections=[2, 3, 4], dim=2)
-            # x0.shape [-1, 3, 2, 5]
-            # x1.shape [-1, 3, 3, 5]
-            # x2.shape [-1, 3, 4, 5]
+            x0, x1, x2 = fluid.layers.split(input, num_or_sections=[2, 3, 4], dim=1)
+            # x0.shape [3, 2, 5]
+            # x1.shape [3, 3, 5]
+            # x2.shape [3, 4, 5]
+
+            x0, x1, x2 = fluid.layers.split(input, num_or_sections=[2, 3, -1], dim=1)
+            # x0.shape [3, 2, 5]
+            # x1.shape [3, 3, 5]
+            # x2.shape [3, 4, 5]
     """
+    if not isinstance(num_or_sections, (int, list, tuple)):
+        raise TypeError(
+            "The type of 'num_or_sections' in split must be int, list or "
+            "tuple, but received %s." % (type(num_or_sections)))
+    if not isinstance(dim, (int, Variable)):
+        raise TypeError(
+            "The type of 'dim' in split must be int or Variable, but "
+            "received %s." % (type(dim)))
+
     helper = LayerHelper('split', **locals())
     input_shape = input.shape
-    dim = (len(input_shape) + dim) if dim < 0 else dim
+    inputs = {'X': input}
+    attrs = {'num': num_or_sections if isinstance(num_or_sections, int) else 0}
+
+    def _get_SectionsTensorList(one_list):
+        tensor_list = []
+        unk_dim_idx = -1
+        for idx, dim_size in enumerate(one_list):
+            if isinstance(dim_size, Variable):
+                dim_size.stop_gradient = True
+                tensor_list.append(dim_size)
+            else:
+                assert (isinstance(dim_size, int))
+                if dim_size == -1:
+                    assert unk_dim_idx == -1, (
+                        "Only one value of 'num_or_section' in split can "
+                        "be -1. But received num_or_section[%d] is also -1." %
+                        idx)
+                    unk_dim_idx = idx
+                temp_out = helper.create_variable_for_type_inference('int32')
+                fill_constant(
+                    [1], 'int32', dim_size, force_cpu=True, out=temp_out)
+                tensor_list.append(temp_out)
+        return tensor_list
+
+    if isinstance(dim, Variable):
+        dim.stop_gradient = True
+        inputs['AxisTensor'] = dim
+    else:
+        dim = (len(input_shape) + dim) if dim < 0 else dim
+        attrs['axis'] = dim
+
     if isinstance(num_or_sections, int):
         assert num_or_sections > 1, 'num_or_sections must be more than 1.'
+        if isinstance(dim, int) and input_shape[dim] > 0:
+            assert input_shape[dim] % num_or_sections ==0, \
+                "The input's size along the split dimension " \
+                "must be evenly divisible by Attr(num_or_sections). " \
+                "But %d is not evenly divisible by %d. " % (num_or_sections,input_shape[dim])
         num = num_or_sections
     else:
-        assert len(num_or_sections) <= input_shape[
-            dim], 'len(num_or_sections) must not be more than input.shape[dim].'
+        if isinstance(dim, int) and input_shape[dim] > 0:
+            assert len(num_or_sections) <= input_shape[
+                dim], 'len(num_or_sections) must not be more than input.shape[dim].'
         num = len(num_or_sections)
+        attrs['sections'] = list(
+            map(lambda ele: -1 if isinstance(ele, Variable) else ele,
+                num_or_sections))
+        contain_var = not all(not isinstance(ele, Variable)
+                              for ele in num_or_sections)
+        if contain_var:
+            inputs['SectionsTensorList'] = _get_SectionsTensorList(
+                num_or_sections)
+
     outs = [
         helper.create_variable_for_type_inference(dtype=helper.input_dtype())
         for i in range(num)
     ]
     helper.append_op(
-        type='split',
-        inputs={'X': input},
-        outputs={'Out': outs},
-        attrs={
-            'num': num_or_sections if isinstance(num_or_sections, int) else 0,
-            'sections': num_or_sections
-            if isinstance(num_or_sections, list) else [],
-            'axis': dim
-        })
+        type='split', inputs=inputs, outputs={'Out': outs}, attrs=attrs)
     return outs
 
 
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index fe81da1ea2e1cca11bba6aa2798b7026d787f8b3..0b628461a412b30360c1d640c0c8556ef2c3d118 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -222,7 +222,7 @@ def concat(input, axis=0, name=None):
     Args:
         input(list): List of input Tensors with data type float32, float64, int32,
             int64.
-        axis(int, optional): Axis to compute indices along. The effective range
+        axis(int32|Variable, optional):  A scalar with type ``int32`` or a ``Tensor`` with shape [1] and type ``int32``. Axis to compute indices along. The effective range
             is [-R, R), where R is Rank(x). when axis<0, it works the same way
             as axis+R. Default is 0.
         name (str, optional): The default value is None. Normally there is no
@@ -280,12 +280,21 @@ def concat(input, axis=0, name=None):
             raise TypeError(
                 "The data type of x in 'input' in concat must be float16(only support on GPU), float32, float64, int32, int64, but received %s."
                 % (convert_dtype(x.dtype)))
+    if not isinstance(axis, (int, Variable)):
+        raise TypeError(
+            "The type of 'axis' in concat must be int or Variable, but "
+            "received %s." % (type(axis)))
+    inputs = {'X': input}
+    attrs = {}
+    if isinstance(axis, Variable):
+        axis.stop_gradient = True
+        inputs['AxisTensor'] = axis
+    else:
+        attrs['axis'] = axis
+
     out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
     helper.append_op(
-        type='concat',
-        inputs={'X': input},
-        outputs={'Out': [out]},
-        attrs={'axis': axis})
+        type='concat', inputs=inputs, outputs={'Out': [out]}, attrs=attrs)
     return out
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py
index 4e06b40dc932d05f53ab391f6275ae26940d91cf..47fca5144742b3f3b14396ad138611c87f64f664 100644
--- a/python/paddle/fluid/tests/unittests/test_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_concat_op.py
@@ -95,6 +95,41 @@ class TestConcatOp5(TestConcatOp):
         self.axis = -3
 
 
+def create_test_AxisTensor(parent):
+    class TestConcatAxisTensor(parent):
+        def setUp(self):
+            self.op_type = "concat"
+            self.dtype = self.get_dtype()
+            self.init_test_data()
+
+            self.inputs = {
+                'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)],
+                'AxisTensor': np.array([self.axis]).astype("int32")
+            }
+            self.attrs = {}
+
+            if self.axis < 0:
+                self.actual_axis = self.axis + len(self.x0.shape)
+                self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0
+            else:
+                self.actual_axis = self.axis
+
+            self.outputs = {
+                'Out': np.concatenate(
+                    (self.x0, self.x1, self.x2), axis=self.actual_axis)
+            }
+
+    cls_name = "{0}_{1}".format(parent.__name__, "AxisTensor")
+    TestConcatAxisTensor.__name__ = cls_name
+    globals()[cls_name] = TestConcatAxisTensor
+
+
+create_test_AxisTensor(TestConcatOp)
+create_test_AxisTensor(TestConcatOp2)
+create_test_AxisTensor(TestConcatOp3)
+create_test_AxisTensor(TestConcatOp4)
+create_test_AxisTensor(TestConcatOp5)
+
 #----------------Concat Fp16----------------
 
 
@@ -135,6 +170,36 @@ class TestConcatOpError(OpTest):
             x7 = fluid.layers.data(shape=[4], dtype='float16', name='x7')
             fluid.layers.concat([x6, x7])
 
+            # The type of axis in concat_op should be int or Variable.
+            def test_axis_type():
+                fluid.layers.concat([x6, x7], 3.2)
+
+            self.assertRaises(TypeError, test_axis_type)
+
+
+class TestConcatAPI(OpTest):
+    def test_api(self):
+        x_1 = fluid.data(shape=[None, 1, 4, 5], dtype='int32', name='x_1')
+        fluid.layers.concat([x_1, x_1], 0)
+
+        input_2 = np.random.random([2, 1, 4, 5]).astype("int32")
+        input_3 = np.random.random([2, 2, 4, 5]).astype("int32")
+        x_2 = fluid.data(shape=[2, 1, 4, 5], dtype='int32', name='x_2')
+        x_3 = fluid.data(shape=[2, 2, 4, 5], dtype='int32', name='x_3')
+        positive_1 = fluid.layers.fill_constant([1], "int32", 1)
+        out_1 = fluid.layers.concat(input=[x_2, x_3], axis=1)
+        out_2 = fluid.layers.concat(input=[x_2, x_3], axis=positive_1)
+
+        exe = fluid.Executor(place=fluid.CPUPlace())
+        [res_1, res_2] = exe.run(
+            fluid.default_main_program(),
+            feed={"x_1": input_2,
+                  "x_2": input_2,
+                  "x_3": input_3},
+            fetch_list=[out_1, out_2])
+        assert np.array_equal(res_1, np.concatenate((input_2, input_3), axis=1))
+        assert np.array_equal(res_2, np.concatenate((input_2, input_3), axis=1))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py
index 0374edb9a29558c292adc067b7e16588d25f14a9..369bb3f7cefcbe875bb0f684bf54c6612e82d5db 100644
--- a/python/paddle/fluid/tests/unittests/test_split_op.py
+++ b/python/paddle/fluid/tests/unittests/test_split_op.py
@@ -17,6 +17,8 @@ from __future__ import print_function
 import unittest
 import numpy as np
 from op_test import OpTest
+import paddle.fluid as fluid
+from paddle.fluid import compiler, Program, program_guard
 
 
 class TestSplitOp(OpTest):
@@ -44,6 +46,161 @@ class TestSplitOp(OpTest):
         self.check_grad(['X'], ['out0', 'out1', 'out2'])
 
 
+# test with attr(num)
+class TestSplitOp_2(OpTest):
+    def setUp(self):
+        self._set_op_type()
+        self.dtype = self.get_dtype()
+        self.init_data()
+        self.inputs = {'X': self.x}
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections,
+            'num': self.num
+        }
+
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+
+    def init_data(self):
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = []
+        self.num = 3
+        self.indices_or_sections = 3
+
+    def get_dtype(self):
+        return "float32"
+
+    def _set_op_type(self):
+        self.op_type = "split"
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], ['out0', 'out1', 'out2'])
+
+
+# attr(axis) is Tensor
+class TestSplitOp_AxisTensor(OpTest):
+    def setUp(self):
+        self._set_op_type()
+        self.dtype = self.get_dtype()
+        self.init_data()
+        self.inputs = {
+            'X': self.x,
+            'AxisTensor': np.array([self.axis]).astype("int32")
+        }
+        self.attrs = {'sections': self.sections, 'num': self.num}
+
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+
+    def init_data(self):
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = []
+        self.num = 3
+        self.indices_or_sections = 3
+
+    def get_dtype(self):
+        return "float32"
+
+    def _set_op_type(self):
+        self.op_type = "split"
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], ['out0', 'out1', 'out2'])
+
+
+# attr(sections) is list containing Tensor
+class TestSplitOp_SectionsTensor(OpTest):
+    def setUp(self):
+        self._set_op_type()
+        self.dtype = self.get_dtype()
+        self.init_data()
+        self.inputs = {'X': self.x}
+
+        sections_tensor = []
+        for index, ele in enumerate(self.sections):
+            sections_tensor.append(("x" + str(index), np.ones(
+                (1)).astype('int32') * ele))
+
+        self.inputs['SectionsTensorList'] = sections_tensor
+
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections_infer,
+            'num': self.num
+        }
+
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+
+    def init_data(self):
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 1
+        self.sections = [2, 1, 2]
+        self.sections_infer = [-1, -1, -1]
+        self.num = 0
+        self.indices_or_sections = [2, 3]
+
+    def get_dtype(self):
+        return "float32"
+
+    def _set_op_type(self):
+        self.op_type = "split"
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], ['out0', 'out1', 'out2'])
+
+
+class TestSplitOp_unk_section(OpTest):
+    def setUp(self):
+        self._set_op_type()
+        self.dtype = self.get_dtype()
+        self.init_data()
+        self.inputs = {'X': self.x}
+        self.attrs = {
+            'axis': self.axis,
+            'sections': self.sections,
+            'num': self.num
+        }
+
+        out = np.split(self.x, self.indices_or_sections, self.axis)
+        self.outputs = {'Out': [('out%d' % i, out[i]) \
+                                for i in range(len(out))]}
+
+    def init_data(self):
+        self.x = np.random.random((4, 5, 6)).astype(self.dtype)
+        self.axis = 2
+        self.sections = [2, 1, -1]
+        self.num = 0
+        self.indices_or_sections = [2, 3]
+
+    def get_dtype(self):
+        return "float32"
+
+    def _set_op_type(self):
+        self.op_type = "split"
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], ['out0', 'out1', 'out2'])
+
+
 class TestSplitByrefOp(OpTest):
     def _set_op_type(self):
         self.op_type = "split_byref"
@@ -67,5 +224,53 @@ def create_test_fp16(parent):
 
 create_test_fp16(TestSplitOp)
 
+
+class TestSplitAPI(OpTest):
+    def test_api(self):
+        input_1 = np.random.random([4, 5, 6]).astype("int32")
+        positive_1 = fluid.layers.fill_constant([1], "int32", 1)
+        x_1 = fluid.data(shape=[4, 5, 6], dtype='int32', name='x_1')
+        x_2 = fluid.data(shape=[4, 5, None], dtype='int32', name='x_2')
+
+        out_0, out_1, out_2 = fluid.layers.split(
+            input=x_1, num_or_sections=[2, positive_1, -1], dim=1)
+        out_3, out_4, out_5 = fluid.layers.split(
+            input=x_1, num_or_sections=[2, 1, 2], dim=positive_1)
+        fluid.layers.split(input=x_2, num_or_sections=2, dim=2)
+
+        exe = fluid.Executor(place=fluid.CPUPlace())
+        [res_0, res_1, res_2, res_3, res_4, res_5] = exe.run(
+            fluid.default_main_program(),
+            feed={"x_1": input_1,
+                  "x_2": input_1},
+            fetch_list=[out_0, out_1, out_2, out_3, out_4, out_5])
+
+        out = np.split(input_1, [2, 3], 1)
+        assert np.array_equal(res_0, out[0])
+        assert np.array_equal(res_1, out[1])
+        assert np.array_equal(res_2, out[2])
+        assert np.array_equal(res_3, out[0])
+        assert np.array_equal(res_4, out[1])
+        assert np.array_equal(res_5, out[2])
+
+
+class TestSplitOpError(OpTest):
+    def test_errors(self):
+        with program_guard(Program(), Program()):
+            # The type of axis in split_op should be int or Variable.
+            def test_axis_type():
+                x6 = fluid.layers.data(shape=[4], dtype='float16', name='x3')
+                fluid.layers.split(input=x6, num_or_sections=2, dim=3.2)
+
+            self.assertRaises(TypeError, test_axis_type)
+
+            # The type of num_or_sections in split_op should be int, tuple or list.
+            def test_num_or_sections_type():
+                x6 = fluid.layers.data(shape=[4], dtype='float16', name='x4')
+                fluid.layers.split(input=x6, num_or_sections=2.1, dim=3)
+
+            self.assertRaises(TypeError, test_num_or_sections_type)
+
+
 if __name__ == '__main__':
     unittest.main()