From 13f440997200cef17a2e7315a31bfb2c4fe9bc11 Mon Sep 17 00:00:00 2001
From: phlrain <phliuhongyu@126.com>
Date: Wed, 16 Feb 2022 14:19:29 +0000
Subject: [PATCH] move slice to pten

---
 paddle/fluid/operators/slice_op.h             | 504 +-----------------
 paddle/fluid/operators/slice_utils.h          |   3 +-
 paddle/pten/kernels/cpu/slice_grad_kernel.cc  |  33 ++
 paddle/pten/kernels/cpu/slice_kernel.cc       |  32 ++
 paddle/pten/kernels/funcs/eigen/pad.cc        |   3 +-
 paddle/pten/kernels/gpu/slice_grad_kernel.cu  |  33 ++
 paddle/pten/kernels/gpu/slice_kernel.cu       |  32 ++
 .../kernels/impl/slice_grad_kernel_impl.h     | 342 ++++++++++++
 paddle/pten/kernels/impl/slice_kernel_impl.h  | 152 ++++++
 paddle/pten/kernels/slice_grad_kernel.h       |  31 ++
 paddle/pten/kernels/slice_kernel.h            |  31 ++
 paddle/pten/ops/compat/slice_sig.cc           |  38 ++
 12 files changed, 729 insertions(+), 505 deletions(-)
 create mode 100644 paddle/pten/kernels/cpu/slice_grad_kernel.cc
 create mode 100644 paddle/pten/kernels/cpu/slice_kernel.cc
 create mode 100644 paddle/pten/kernels/gpu/slice_grad_kernel.cu
 create mode 100644 paddle/pten/kernels/gpu/slice_kernel.cu
 create mode 100644 paddle/pten/kernels/impl/slice_grad_kernel_impl.h
 create mode 100644 paddle/pten/kernels/impl/slice_kernel_impl.h
 create mode 100644 paddle/pten/kernels/slice_grad_kernel.h
 create mode 100644 paddle/pten/kernels/slice_kernel.h
 create mode 100644 paddle/pten/ops/compat/slice_sig.cc
diff --git a/paddle/fluid/operators/slice_op.h b/paddle/fluid/operators/slice_op.h
index bf05bbadcbc..7e9767ccef4 100644
--- a/paddle/fluid/operators/slice_op.h
+++ b/paddle/fluid/operators/slice_op.h
@@ -29,520 +29,20 @@ using Variable = framework::Variable;
 using LoDTensorArray = framework::LoDTensorArray;
 using DDim = framework::DDim;
 
-inline void DealTensorArray(const framework::ExecutionContext& ctx,
-                            const std::vector<int64_t>& starts,
-                            const std::vector<int64_t>& ends,
-                            bool out_is_array) {
-  auto in_array = ctx.Input<LoDTensorArray>("Input");
-  // If the input is LoDTensorArray, the rank of input is 1.
-  int64_t in_size = in_array->size();
-  int64_t start = starts[0] < 0 ? (starts[0] + in_size) : starts[0];
-  int64_t end = ends[0] < 0 ? (ends[0] + in_size) : ends[0];
-
-  start = std::max(start, static_cast<int64_t>(0));
-  end = std::max(end, static_cast<int64_t>(0));
-  end = std::min(end, in_size);
-
-  if (starts[0] == -1 && end == 0) {
-    end = start + 1;
-  }
-
-  PADDLE_ENFORCE_GT(end, start,
-                    platform::errors::InvalidArgument(
-                        "Attr(ends) should be greater than attr(starts) in "
-                        "slice op. But received end = %d, start = %d.",
-                        ends[0], starts[0]));
-  int64_t out_size = end - start;
-
-  if (out_is_array) {
-    auto out_array = ctx.Output<LoDTensorArray>("Out");
-    out_array->resize(out_size);
-
-    for (int i = 0; i < out_size; ++i) {
-      auto* out_tensor = &out_array->at(i);
-      auto in_tensor = in_array->at(i + start);
-      out_tensor->set_lod(in_tensor.lod());
-      if (in_tensor.memory_size() > 0) {
-        paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out_tensor);
-      } else {
-        VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
-                    "nothing has been written to output array["
-                 << i << "].";
-      }
-    }
-  } else {
-    auto out = ctx.Output<Tensor>("Out");
-    auto in_tensor = in_array->at(start);
-    paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out);
-  }
-}
-
 template <typename DeviceContext, typename T>
 class SliceKernel : public framework::OpKernel<T> {
  public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    const Variable* input_var = ctx.InputVar("Input");
-    bool is_tensor_array = input_var->IsType<LoDTensorArray>();
-    int rank = is_tensor_array ? 1 : ctx.Input<Tensor>("Input")->dims().size();
-
-    switch (rank) {
-      case 1:
-        SliceCompute<1>(ctx);
-        break;
-      case 2:
-        SliceCompute<2>(ctx);
-        break;
-      case 3:
-        SliceCompute<3>(ctx);
-        break;
-      case 4:
-        SliceCompute<4>(ctx);
-        break;
-      case 5:
-        SliceCompute<5>(ctx);
-        break;
-      case 6:
-        SliceCompute<6>(ctx);
-        break;
-      default:
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "The rank of input should be less than 7, but received %d.", rank));
-    }
-  }
+  void Compute(const framework::ExecutionContext& ctx) const override {}
 
  private:
-  template <size_t D>
-  void SliceCompute(const framework::ExecutionContext& ctx) const {
-    const Variable* input_var = ctx.InputVar("Input");
-    Variable* out_var = ctx.OutputVar("Out");
-    bool input_is_array = input_var->IsType<LoDTensorArray>();
-    bool out_is_array = out_var->IsType<LoDTensorArray>();
-
-    auto axes_int = ctx.Attr<std::vector<int>>("axes");
-    auto starts_int = ctx.Attr<std::vector<int>>("starts");
-    auto ends_int = ctx.Attr<std::vector<int>>("ends");
-    std::vector<int64_t> axes(axes_int.begin(), axes_int.end());
-    std::vector<int64_t> starts(starts_int.begin(), starts_int.end());
-    std::vector<int64_t> ends(ends_int.begin(), ends_int.end());
-
-    auto decrease_axis = ctx.Attr<std::vector<int>>("decrease_axis");
-    auto infer_flags = ctx.Attr<std::vector<int>>("infer_flags");
-
-    // Step 1: Get the accurate attribute value of starts and ends
-    auto starts_tensor_list = ctx.MultiInput<Tensor>("StartsTensorList");
-    if (ctx.HasInput("StartsTensor")) {
-      starts = GetDataFromTensor<int64_t>(ctx.Input<Tensor>("StartsTensor"));
-    } else if (starts_tensor_list.size() > 0) {
-      starts = GetDataFromTensorList<int64_t>(starts_tensor_list);
-    }
-
-    auto ends_tensor_list = ctx.MultiInput<Tensor>("EndsTensorList");
-    if (ctx.HasInput("EndsTensor")) {
-      ends = GetDataFromTensor<int64_t>(ctx.Input<Tensor>("EndsTensor"));
-    } else if (ends_tensor_list.size() > 0) {
-      ends = GetDataFromTensorList<int64_t>(ends_tensor_list);
-    }
-
-    PADDLE_ENFORCE_EQ(
-        starts.size(), axes.size(),
-        platform::errors::InvalidArgument(
-            "The size of starts must be equal to the size of axes."));
-    PADDLE_ENFORCE_EQ(
-        ends.size(), axes.size(),
-        platform::errors::InvalidArgument(
-            "The size of ends must be equal to the size of axes."));
-
-    // Step 2: Compute output
-    if (input_is_array) {
-      DealTensorArray(ctx, starts, ends, out_is_array);
-      return;
-    } else {
-      auto in = ctx.Input<Tensor>("Input");
-      auto out = ctx.Output<Tensor>("Out");
-
-      auto in_dims = in->dims();
-      auto out_dims = out->dims();
-      auto slice_dims = out_dims;
-
-      // 2.1 Infer output dims
-      for (size_t i = 0; i < axes.size(); ++i) {
-        // when start == -1 && end == start+1
-        if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) {
-          auto ret =
-              std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]);
-          if (ret != decrease_axis.end()) {
-            ends[i] = in_dims[axes[i]];
-          }
-        }
-      }
-
-      CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends);
-      slice_dims =
-          GetSliceDims<int64_t>(in_dims, axes, starts, ends, nullptr, nullptr);
-      out_dims = GetDecreasedDims(slice_dims, decrease_axis);
-
-      // 2.2 Get output
-      auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
-      auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
-
-      for (size_t i = 0; i < D; ++i) {
-        offsets[i] = 0;
-        extents[i] = slice_dims[i];
-      }
-      for (size_t i = 0; i < axes.size(); ++i) {
-        offsets[axes[i]] = starts[i];
-      }
-
-      out->Resize(slice_dims);
-      out->mutable_data<T>(ctx.GetPlace());
-
-      auto in_t = framework::EigenTensor<T, D>::From(*in, in_dims);
-      auto out_t = framework::EigenTensor<T, D>::From(*out, slice_dims);
-      auto& eigen_place =
-          *ctx.template device_context<DeviceContext>().eigen_device();
-
-      if (in->numel() <= Eigen::NumTraits<int>::highest()) {
-        // similar to tf.slice:
-        // if element number less than INT_MAX, change the type of index to int
-        Eigen::DSizes<int, D> offsets_32bit, extents_32bit;
-        for (size_t i = 0; i < D; i++) {
-          offsets_32bit[i] = offsets[i];
-          extents_32bit[i] = extents[i];
-        }
-        EigenSlice<std::decay_t<decltype(eigen_place)>, T, D>::Eval(
-            eigen_place, framework::To32BitIndex(out_t),
-            framework::To32BitIndex(in_t), offsets_32bit, extents_32bit);
-      } else {
-        EigenSlice<std::decay_t<decltype(eigen_place)>, T, D>::Eval(
-            eigen_place, out_t, in_t, offsets, extents);
-      }
-
-      out->Resize(out_dims);
-    }
-  }
 };
 
 template <typename DeviceContext, typename T>
 class SliceGradKernel : public framework::OpKernel<T> {
  public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    const Variable* input_var = ctx.InputVar("Input");
-    bool is_array = input_var->IsType<LoDTensorArray>();
-    size_t rank = is_array ? 1 : ctx.Input<Tensor>("Input")->dims().size();
-
-    switch (rank) {
-      case 1:
-        SliceCompute<1>(ctx);
-        break;
-      case 2:
-        SliceCompute<2>(ctx);
-        break;
-      case 3:
-        SliceCompute<3>(ctx);
-        break;
-      case 4:
-        SliceCompute<4>(ctx);
-        break;
-      case 5:
-        SliceCompute<5>(ctx);
-        break;
-      case 6:
-        SliceCompute<6>(ctx);
-        break;
-      default:
-        PADDLE_THROW(platform::errors::InvalidArgument(
-            "The rank of input should be less than 7, but received %d.", rank));
-    }
-  }
+  void Compute(const framework::ExecutionContext& ctx) const override {}
 
  private:
-  template <size_t D>
-  void SliceCompute(const framework::ExecutionContext& ctx) const {
-    auto axes = ctx.Attr<std::vector<int>>("axes");
-    auto starts_int = ctx.Attr<std::vector<int>>("starts");
-    auto ends_int = ctx.Attr<std::vector<int>>("ends");
-    std::vector<int64_t> starts(starts_int.begin(), starts_int.end());
-    std::vector<int64_t> ends(ends_int.begin(), ends_int.end());
-
-    // Get the accurate attribute value of starts and ends
-    auto starts_tensor_list = ctx.MultiInput<Tensor>("StartsTensorList");
-    if (ctx.HasInput("StartsTensor")) {
-      starts = GetDataFromTensor<int64_t>(ctx.Input<Tensor>("StartsTensor"));
-    } else if (starts_tensor_list.size() > 0) {
-      starts = GetDataFromTensorList<int64_t>(starts_tensor_list);
-    }
-
-    auto ends_tensor_list = ctx.MultiInput<Tensor>("EndsTensorList");
-    if (ctx.HasInput("EndsTensor")) {
-      ends = GetDataFromTensor<int64_t>(ctx.Input<Tensor>("EndsTensor"));
-    } else if (ends_tensor_list.size() > 0) {
-      ends = GetDataFromTensorList<int64_t>(ends_tensor_list);
-    }
-
-    Variable* d_input_var = ctx.OutputVar(framework::GradVarName("Input"));
-    const Variable* d_out_var = ctx.InputVar(framework::GradVarName("Out"));
-    bool d_input_is_array = d_input_var->IsType<LoDTensorArray>();
-    bool d_out_is_array = d_out_var->IsType<LoDTensorArray>();
-
-    if (d_input_is_array) {
-      auto* input_array = ctx.Input<LoDTensorArray>("Input");
-      auto* d_in_arr =
-          ctx.Output<LoDTensorArray>(framework::GradVarName("Input"));
-
-      int64_t d_in_size = input_array->size();
-      d_in_arr->resize(d_in_size);
-      // If the input is LoDTensorArray, the rank of input is 1.
-      // So only use the 0th element of starts.
-      int64_t start = starts[0] < 0 ? (starts[0] + d_in_size) : starts[0];
-      start = std::max(start, static_cast<int64_t>(0));
-      // set zero
-      platform::DeviceContextPool& pool =
-          platform::DeviceContextPool::Instance();
-      auto& dev_ctx = *pool.Get(ctx.GetPlace());
-      pten::funcs::SetConstant<DeviceContext, T> functor;
-      for (int i = 0; i < d_in_size; ++i) {
-        auto dim = input_array->at(i).dims();
-        d_in_arr->at(i).Resize(dim);
-        d_in_arr->at(i).mutable_data<T>(ctx.GetPlace());
-        functor(reinterpret_cast<const DeviceContext&>(dev_ctx),
-                &d_in_arr->at(i), static_cast<T>(0));
-      }
-
-      if (d_out_is_array) {
-        auto* d_out_arr =
-            ctx.Input<LoDTensorArray>(framework::GradVarName("Out"));
-        int d_out_size = d_out_arr->size();
-        for (int i = 0; i < d_out_size; ++i) {
-          paddle::framework::TensorCopy(d_out_arr->at(i), ctx.GetPlace(),
-                                        &(d_in_arr->at(start + i)));
-        }
-      } else {
-        auto* d_out = ctx.Input<Tensor>(framework::GradVarName("Out"));
-        paddle::framework::TensorCopy(*d_out, ctx.GetPlace(),
-                                      &(d_in_arr->at(start)));
-      }
-      return;
-    }
-
-    auto* d_out = ctx.Input<Tensor>(framework::GradVarName("Out"));
-    auto* d_input = ctx.Output<Tensor>(framework::GradVarName("Input"));
-    d_input->mutable_data<T>(ctx.GetPlace());
-
-    auto out_dims = d_out->dims();
-    auto in_dims = d_input->dims();
-
-    auto decrease_axis = ctx.Attr<std::vector<int>>("decrease_axis");
-    auto decrease_size = decrease_axis.size();
-    if (decrease_size > 0) {
-      if (decrease_size == static_cast<size_t>(in_dims.size())) {
-        // all dims decrease
-        std::vector<int> origin_out_shape(decrease_size, 1);
-        out_dims = framework::make_ddim(std::vector<int>(decrease_size, 1));
-      } else {
-        std::vector<int> origin_out_shape(out_dims.size() + decrease_size, -1);
-        for (size_t i = 0; i < decrease_size; ++i) {
-          origin_out_shape[decrease_axis[i]] = 1;
-        }
-
-        int index = 0;
-        for (size_t i = 0; i < origin_out_shape.size(); ++i) {
-          if (origin_out_shape[i] == -1) {
-            origin_out_shape[i] = out_dims[index];
-            ++index;
-          }
-        }
-
-        out_dims = framework::make_ddim(origin_out_shape);
-      }
-    }
-
-    auto offsets = Eigen::array<int64_t, D>();
-    auto extents = Eigen::array<int64_t, D>();
-    for (size_t i = 0; i < D; ++i) {
-      offsets[i] = 0;
-      extents[i] = out_dims[i];
-    }
-
-    for (size_t i = 0; i < axes.size(); ++i) {
-      int axis = axes[i];
-      int64_t start = starts[i] < 0 ? (starts[i] + in_dims[axis]) : starts[i];
-      start = std::max(start, static_cast<int64_t>(0));
-      offsets[axis] = start;
-    }
-
-    Eigen::array<std::pair<int64_t, int64_t>, D> paddings;
-    for (size_t i = 0; i < paddings.size(); ++i) {
-      paddings[i].first = offsets[i];
-      paddings[i].second = (in_dims[i] - out_dims[i]) - offsets[i];
-    }
-    EigenPaddingCompute(ctx, d_input, in_dims, d_out, out_dims, paddings);
-  }
-
-  template <size_t D>
-  void EigenPaddingCompute(
-      const framework::ExecutionContext& context, Tensor* d_input,
-      const DDim& in_dims, const Tensor* d_out, const DDim& out_dims,
-      const Eigen::array<std::pair<int64_t, int64_t>, D>& paddings) const {
-    if (D <= 3) {
-      // if dimension less than 3, cannot reduce dimension
-      LaunchEigenPadding(context, d_input, in_dims, d_out, out_dims, paddings);
-    } else {  // else we can reduce dimension
-      // count not-zero padding number, and record the dimension
-      int need_pad_num = 0, pad_dim = -1;
-      for (size_t i = 0; i < D; i++) {
-        if (paddings[i].first != 0 || paddings[i].second != 0) {
-          need_pad_num++;
-          pad_dim = i;
-        }
-      }
-
-      if (need_pad_num == 1) {
-        // only need padding one dimension, we can reduce dimension.
-        // only the padding dimension is available for us.
-        // How to reduce dimension(5 to 3 for example):
-        // before(D=5):
-        // in_dims:        [x1,  x2,  x3,  x4,  x5]
-        // padding.first:  [0,   0,   a,   0,  0]
-        // padding.second: [0,   0,   b,   0,  0]
-        //                     | |
-        //                     V V
-        // after(D=3):
-        // reshaped_in_dims:        [x1*x2,  x3,  x4*x5]
-        // reshaped_padding.first:  [0,      a,     0]
-        // reshaped_padding.second: [0,      b,     0]
-
-        if (pad_dim == D - 1) {
-          // only last dimension need padding,
-          // reshape the dimension of tensor in 2: [preceding, padding]
-          std::vector<int64_t> in_tore_shape(2, 1), out_tore_shape(2, 1);
-          Eigen::array<std::pair<int64_t, int64_t>, 2> reshaped_padding;
-
-          // first dimension is the accumulate of preceding dimension
-          for (int i = 0; i < pad_dim; i++) {
-            in_tore_shape[0] *= in_dims[i];
-            out_tore_shape[0] *= out_dims[i];
-          }
-          // second dimension is the padding dimension
-          in_tore_shape[1] = in_dims[pad_dim];
-          out_tore_shape[1] = out_dims[pad_dim];
-
-          // convert array from std::vector to DDim
-          DDim reshaped_in_dims = framework::make_ddim(in_tore_shape);
-          DDim reshaped_out_dims = framework::make_ddim(out_tore_shape);
-
-          // after reshape: the first dimension do not need padding,
-          // set padding[0] zero
-          reshaped_padding[0].first = reshaped_padding[0].second = 0;
-          // the second dimension is the previous padding dimension
-          reshaped_padding[1].first = paddings[pad_dim].first;
-          reshaped_padding[1].second = paddings[pad_dim].second;
-
-          LaunchEigenPadding(context, d_input, reshaped_in_dims, d_out,
-                             reshaped_out_dims, reshaped_padding);
-        } else if (pad_dim == 0) {
-          // only first dimension need padding,
-          // reshape the dimension of tensor in 2: [padding, succeeding]
-          // similar to (D - 1)
-          std::vector<int64_t> in_tore_shape(2, 1), out_tore_shape(2, 1);
-          Eigen::array<std::pair<int64_t, int64_t>, 2> reshaped_padding;
-
-          // first dimension is the padding dimension
-          in_tore_shape[0] = in_dims[pad_dim];
-          out_tore_shape[0] = out_dims[pad_dim];
-          // sencond dimension is the accumulate of succeeding dimension
-          for (size_t i = pad_dim + 1; i < D; i++) {
-            in_tore_shape[1] *= in_dims[i];
-            out_tore_shape[1] *= out_dims[i];
-          }
-
-          // convert array from std::vector to DDim
-          DDim reshaped_in_dims = framework::make_ddim(in_tore_shape);
-          DDim reshaped_out_dims = framework::make_ddim(out_tore_shape);
-
-          // after reshape:
-          // the first dimension is the previous padding dimension
-          reshaped_padding[0].first = paddings[pad_dim].first;
-          reshaped_padding[0].second = paddings[pad_dim].second;
-          // the second dimension do not need padding, set padding[1] zero
-          reshaped_padding[1].first = reshaped_padding[1].second = 0;
-
-          LaunchEigenPadding(context, d_input, reshaped_in_dims, d_out,
-                             reshaped_out_dims, reshaped_padding);
-        } else {
-          // other dimension need padding
-          // reshape the dimension of tensor in 3:
-          // [preceding, padding, succeeding]
-          std::vector<int64_t> in_tore_shape(3, 1), out_tore_shape(3, 1);
-          Eigen::array<std::pair<int64_t, int64_t>, 3> reshaped_padding;
-
-          // first dimension is the accumulate of preceding dimension
-          for (int i = 0; i < pad_dim; i++) {
-            in_tore_shape[0] *= in_dims[i];
-            out_tore_shape[0] *= out_dims[i];
-          }
-          // second dimension is the padding dimension
-          in_tore_shape[1] = in_dims[pad_dim];
-          out_tore_shape[1] = out_dims[pad_dim];
-          // third dimension is the accumulate of succeeding dimension
-          for (size_t i = pad_dim + 1; i < D; i++) {
-            in_tore_shape[2] *= in_dims[i];
-            out_tore_shape[2] *= out_dims[i];
-          }
-
-          // convert array from std::vector to DDim
-          DDim reshaped_in_dims = framework::make_ddim(in_tore_shape);
-          DDim reshaped_out_dims = framework::make_ddim(out_tore_shape);
-
-          // after reshape:
-          // the first dimension do not need padding, set padding[0] zero
-          reshaped_padding[0].first = reshaped_padding[2].second = 0;
-          // the second dimension is the previous padding dimension
-          reshaped_padding[1].first = paddings[pad_dim].first;
-          reshaped_padding[1].second = paddings[pad_dim].second;
-          // the third dimension do not need padding, set padding[2] zero
-          reshaped_padding[2].first = reshaped_padding[2].second = 0;
-
-          LaunchEigenPadding(context, d_input, reshaped_in_dims, d_out,
-                             reshaped_out_dims, reshaped_padding);
-        }
-      } else {
-        // need padding at many dimension, cannot reduce dimension
-        LaunchEigenPadding(context, d_input, in_dims, d_out, out_dims,
-                           paddings);
-      }
-    }
-  }
-
-  template <size_t D>
-  void LaunchEigenPadding(
-      const framework::ExecutionContext& context, Tensor* d_input,
-      const DDim& in_dims, const Tensor* d_out, const DDim& out_dims,
-      const Eigen::array<std::pair<int64_t, int64_t>, D>& paddings) const {
-    auto& place =
-        *context.template device_context<DeviceContext>().eigen_device();
-    auto d_in_t =
-        framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
-            *d_input, in_dims);
-    auto d_out_t =
-        framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
-            *d_out, out_dims);
-
-    if (d_input->numel() <= Eigen::NumTraits<int>::highest()) {
-      // similar to tf.pad:
-      // if element number less than INT_MAX, change the type of index to int
-      Eigen::array<std::pair<int, int>, D> paddings_32bit;
-      for (size_t i = 0; i < D; i++) {
-        paddings_32bit[i] =
-            std::make_pair(paddings[i].first, paddings[i].second);
-      }
-      EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
-          place, framework::To32BitIndex(d_in_t),
-          framework::To32BitIndex(d_out_t), paddings_32bit, static_cast<T>(0));
-    } else {
-      EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
-          place, d_in_t, d_out_t, paddings, static_cast<T>(0));
-    }
-  }
 };
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/slice_utils.h b/paddle/fluid/operators/slice_utils.h
index fa36ded24f9..c02e54a8a2c 100644
--- a/paddle/fluid/operators/slice_utils.h
+++ b/paddle/fluid/operators/slice_utils.h
@@ -13,13 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include <paddle/fluid/framework/operator.h>
+#include <paddle/fluid/framework/dim.h>
 #include <string>
 #include <vector>
 
 namespace paddle {
 namespace operators {
-using Tensor = framework::Tensor;
 
 template <typename T = int64_t>
 inline void CheckAndUpdateSliceAttrs(const framework::DDim in_dims,
diff --git a/paddle/pten/kernels/cpu/slice_grad_kernel.cc b/paddle/pten/kernels/cpu/slice_grad_kernel.cc
new file mode 100644
index 00000000000..67e3b6859ad
--- /dev/null
+++ b/paddle/pten/kernels/cpu/slice_grad_kernel.cc
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/pten/kernels/slice_grad_kernel.h"
+#include "paddle/pten/kernels/impl/slice_grad_kernel_impl.h"
+
+#include "paddle/pten/backends/cpu/cpu_context.h"
+#include "paddle/pten/core/kernel_registry.h"
+
+PT_REGISTER_KERNEL(slice_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   pten::SliceGradRawKernel,
+                   bool,
+                   int,
+                   int64_t,
+                   float,
+                   double,
+                   pten::dtype::complex<float>,
+                   pten::dtype::complex<double>,
+                   pten::dtype::bfloat16,
+                   pten::dtype::float16) {}
diff --git a/paddle/pten/kernels/cpu/slice_kernel.cc b/paddle/pten/kernels/cpu/slice_kernel.cc
new file mode 100644
index 00000000000..e49038425b1
--- /dev/null
+++ b/paddle/pten/kernels/cpu/slice_kernel.cc
@@ -0,0 +1,32 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/pten/kernels/slice_kernel.h"
+#include "paddle/pten/kernels/impl/slice_kernel_impl.h"
+
+#include "paddle/pten/backends/cpu/cpu_context.h"
+#include "paddle/pten/core/kernel_registry.h"
+
+PT_REGISTER_KERNEL(slice,
+                   CPU,
+                   ALL_LAYOUT,
+                   pten::SliceRawKernel,
+                   bool,
+                   int,
+                   int64_t,
+                   float,
+                   double,
+                   pten::dtype::complex<float>,
+                   pten::dtype::complex<double>,
+                   pten::dtype::bfloat16) {}
diff --git a/paddle/pten/kernels/funcs/eigen/pad.cc b/paddle/pten/kernels/funcs/eigen/pad.cc
index a051c3d9681..6cf80a1f085 100644
--- a/paddle/pten/kernels/funcs/eigen/pad.cc
+++ b/paddle/pten/kernels/funcs/eigen/pad.cc
@@ -56,7 +56,8 @@ struct EigenPad<Eigen::DefaultDevice, T, Rank> {
   template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 3>; \
   template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 4>; \
   template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 5>; \
-  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 6>
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 6>;
+
 INSTANTIATION(EigenPad, bool);
 INSTANTIATION(EigenPad, int);
 INSTANTIATION(EigenPad, int64_t);
diff --git a/paddle/pten/kernels/gpu/slice_grad_kernel.cu b/paddle/pten/kernels/gpu/slice_grad_kernel.cu
new file mode 100644
index 00000000000..a8902e8900b
--- /dev/null
+++ b/paddle/pten/kernels/gpu/slice_grad_kernel.cu
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/pten/kernels/impl/slice_grad_kernel_impl.h"
+#include "paddle/pten/kernels/slice_grad_kernel.h"
+
+#include "paddle/pten/backends/gpu/gpu_context.h"
+#include "paddle/pten/core/kernel_registry.h"
+
+PT_REGISTER_KERNEL(slice_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   pten::SliceGradRawKernel,
+                   bool,
+                   int,
+                   int64_t,
+                   float,
+                   double,
+                   pten::dtype::complex<float>,
+                   pten::dtype::complex<double>,
+                   pten::dtype::bfloat16,
+                   pten::dtype::float16) {}
diff --git a/paddle/pten/kernels/gpu/slice_kernel.cu b/paddle/pten/kernels/gpu/slice_kernel.cu
new file mode 100644
index 00000000000..217dacdafc0
--- /dev/null
+++ b/paddle/pten/kernels/gpu/slice_kernel.cu
@@ -0,0 +1,32 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/pten/kernels/impl/slice_kernel_impl.h"
+#include "paddle/pten/kernels/slice_kernel.h"
+
+#include "paddle/pten/backends/gpu/gpu_context.h"
+#include "paddle/pten/core/kernel_registry.h"
+
+PT_REGISTER_KERNEL(slice,
+                   GPU,
+                   ALL_LAYOUT,
+                   pten::SliceRawKernel,
+                   bool,
+                   int,
+                   int64_t,
+                   float,
+                   double,
+                   pten::dtype::complex<float>,
+                   pten::dtype::complex<double>,
+                   pten::dtype::bfloat16) {}
diff --git a/paddle/pten/kernels/impl/slice_grad_kernel_impl.h b/paddle/pten/kernels/impl/slice_grad_kernel_impl.h
new file mode 100644
index 00000000000..2f442bdaf8e
--- /dev/null
+++ b/paddle/pten/kernels/impl/slice_grad_kernel_impl.h
@@ -0,0 +1,342 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/operators/slice_utils.h"
+#include "paddle/pten/kernels/funcs/eigen/common.h"
+#include "paddle/pten/kernels/funcs/eigen/eigen_function.h"
+#include "paddle/pten/kernels/slice_grad_kernel.h"
+
+namespace pten {
+
+template <typename T, typename Context, size_t D>
+void LaunchEigenPadding(
+    const Context& context,
+    DenseTensor* d_input,
+    const DDim& in_dims,
+    const DenseTensor* d_out,
+    const DDim& out_dims,
+    const Eigen::array<std::pair<int64_t, int64_t>, D>& paddings) {
+  auto& place = *context.template eigen_device();
+  auto d_in_t = EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
+      *d_input, in_dims);
+  auto d_out_t = EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
+      *d_out, out_dims);
+
+  if (d_input->numel() <= Eigen::NumTraits<int>::highest()) {
+    // similar to tf.pad:
+    // if element number less than INT_MAX, change the type of index to int
+    Eigen::array<std::pair<int, int>, D> paddings_32bit;
+    for (size_t i = 0; i < D; i++) {
+      paddings_32bit[i] = std::make_pair(paddings[i].first, paddings[i].second);
+    }
+    funcs::EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
+        place,
+        To32BitIndex(d_in_t),
+        To32BitIndex(d_out_t),
+        paddings_32bit,
+        static_cast<T>(0));
+  } else {
+    funcs::EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
+        place, d_in_t, d_out_t, paddings, static_cast<T>(0));
+  }
+}
+
+template <typename T, typename Context, size_t D>
+void EigenPaddingCompute(
+    const Context& context,
+    DenseTensor* d_input,
+    const DDim& in_dims,
+    const DenseTensor* d_out,
+    const DDim& out_dims,
+    const Eigen::array<std::pair<int64_t, int64_t>, D>& paddings) {
+  if (D <= 3) {
+    // if dimension less than 3, cannot reduce dimension
+    LaunchEigenPadding<T, Context, D>(
+        context, d_input, in_dims, d_out, out_dims, paddings);
+  }
+  // } else {  // else we can reduce dimension
+  //   // count not-zero padding number, and record the dimension
+  //   int need_pad_num = 0, pad_dim = -1;
+  //   for (size_t i = 0; i < D; i++) {
+  //     if (paddings[i].first != 0 || paddings[i].second != 0) {
+  //       need_pad_num++;
+  //       pad_dim = i;
+  //     }
+  //   }
+
+  //   if (need_pad_num == 1) {
+  //     // only need padding one dimension, we can reduce dimension.
+  //     // only the padding dimension is available for us.
+  //     // How to reduce dimension(5 to 3 for example):
+  //     // before(D=5):
+  //     // in_dims:        [x1,  x2,  x3,  x4,  x5]
+  //     // padding.first:  [0,   0,   a,   0,  0]
+  //     // padding.second: [0,   0,   b,   0,  0]
+  //     //                     | |
+  //     //                     V V
+  //     // after(D=3):
+  //     // reshaped_in_dims:        [x1*x2,  x3,  x4*x5]
+  //     // reshaped_padding.first:  [0,      a,     0]
+  //     // reshaped_padding.second: [0,      b,     0]
+
+  //     if (pad_dim == D - 1) {
+  //       // only last dimension need padding,
+  //       // reshape the dimension of tensor in 2: [preceding, padding]
+  //       std::vector<int64_t> in_tore_shape(2, 1), out_tore_shape(2, 1);
+  //       Eigen::array<std::pair<int64_t, int64_t>, 2> reshaped_padding;
+
+  //       // first dimension is the accumulate of preceding dimension
+  //       for (int i = 0; i < pad_dim; i++) {
+  //         in_tore_shape[0] *= in_dims[i];
+  //         out_tore_shape[0] *= out_dims[i];
+  //       }
+  //       // second dimension is the padding dimension
+  //       in_tore_shape[1] = in_dims[pad_dim];
+  //       out_tore_shape[1] = out_dims[pad_dim];
+
+  //       // convert array from std::vector to DDim
+  //       DDim reshaped_in_dims = framework::make_ddim(in_tore_shape);
+  //       DDim reshaped_out_dims = framework::make_ddim(out_tore_shape);
+
+  //       // after reshape: the first dimension do not need padding,
+  //       // set padding[0] zero
+  //       reshaped_padding[0].first = reshaped_padding[0].second = 0;
+  //       // the second dimension is the previous padding dimension
+  //       reshaped_padding[1].first = paddings[pad_dim].first;
+  //       reshaped_padding[1].second = paddings[pad_dim].second;
+
+  //       LaunchEigenPadding<T, Context, D>(context, d_input, reshaped_in_dims,
+  //       d_out,
+  //                          reshaped_out_dims, reshaped_padding);
+  //     } else if (pad_dim == 0) {
+  //       // only first dimension need padding,
+  //       // reshape the dimension of tensor in 2: [padding, succeeding]
+  //       // similar to (D - 1)
+  //       std::vector<int64_t> in_tore_shape(2, 1), out_tore_shape(2, 1);
+  //       Eigen::array<std::pair<int64_t, int64_t>, 2> reshaped_padding;
+
+  //       // first dimension is the padding dimension
+  //       in_tore_shape[0] = in_dims[pad_dim];
+  //       out_tore_shape[0] = out_dims[pad_dim];
+  //       // sencond dimension is the accumulate of succeeding dimension
+  //       for (size_t i = pad_dim + 1; i < D; i++) {
+  //         in_tore_shape[1] *= in_dims[i];
+  //         out_tore_shape[1] *= out_dims[i];
+  //       }
+
+  //       // convert array from std::vector to DDim
+  //       DDim reshaped_in_dims = framework::make_ddim(in_tore_shape);
+  //       DDim reshaped_out_dims = framework::make_ddim(out_tore_shape);
+
+  //       // after reshape:
+  //       // the first dimension is the previous padding dimension
+  //       reshaped_padding[0].first = paddings[pad_dim].first;
+  //       reshaped_padding[0].second = paddings[pad_dim].second;
+  //       // the second dimension do not need padding, set padding[1] zero
+  //       reshaped_padding[1].first = reshaped_padding[1].second = 0;
+
+  //       LaunchEigenPadding<T, Context, D>(context, d_input, reshaped_in_dims,
+  //       d_out,
+  //                          reshaped_out_dims, reshaped_padding);
+  //     } else {
+  //       // other dimension need padding
+  //       // reshape the dimension of tensor in 3:
+  //       // [preceding, padding, succeeding]
+  //       std::vector<int64_t> in_tore_shape(3, 1), out_tore_shape(3, 1);
+  //       Eigen::array<std::pair<int64_t, int64_t>, 3> reshaped_padding;
+
+  //       // first dimension is the accumulate of preceding dimension
+  //       for (int i = 0; i < pad_dim; i++) {
+  //         in_tore_shape[0] *= in_dims[i];
+  //         out_tore_shape[0] *= out_dims[i];
+  //       }
+  //       // second dimension is the padding dimension
+  //       in_tore_shape[1] = in_dims[pad_dim];
+  //       out_tore_shape[1] = out_dims[pad_dim];
+  //       // third dimension is the accumulate of succeeding dimension
+  //       for (size_t i = pad_dim + 1; i < D; i++) {
+  //         in_tore_shape[2] *= in_dims[i];
+  //         out_tore_shape[2] *= out_dims[i];
+  //       }
+
+  //       // convert array from std::vector to DDim
+  //       DDim reshaped_in_dims = framework::make_ddim(in_tore_shape);
+  //       DDim reshaped_out_dims = framework::make_ddim(out_tore_shape);
+
+  //       // after reshape:
+  //       // the first dimension do not need padding, set padding[0] zero
+  //       reshaped_padding[0].first = reshaped_padding[2].second = 0;
+  //       // the second dimension is the previous padding dimension
+  //       reshaped_padding[1].first = paddings[pad_dim].first;
+  //       reshaped_padding[1].second = paddings[pad_dim].second;
+  //       // the third dimension do not need padding, set padding[2] zero
+  //       reshaped_padding[2].first = reshaped_padding[2].second = 0;
+
+  //       LaunchEigenPadding<T, Context, D>(context, d_input, reshaped_in_dims,
+  //       d_out,
+  //                          reshaped_out_dims, reshaped_padding);
+  //     }
+  //   } else {
+  //     // need padding at many dimension, cannot reduce dimension
+  //     LaunchEigenPadding<T, Context, D>(context, d_input, in_dims, d_out,
+  //     out_dims,
+  //                        paddings);
+  //   }
+  // }
+}
+
+template <typename T, typename Context, size_t D>
+void SliceGradCompute(const Context& ctx,
+                      const DenseTensor& out_grad,
+                      const std::vector<int64_t>& axes,
+                      const std::vector<int64_t>& starts,
+                      const std::vector<int64_t>& ends,
+                      const std::vector<int64_t>& infer_flags,
+                      const std::vector<int64_t>& decrease_axis,
+                      DenseTensor* input_grad) {
+  auto* d_out = &out_grad;
+  auto* d_input = input_grad;
+  d_input->mutable_data<T>(ctx.GetPlace());
+
+  auto out_dims = d_out->dims();
+  auto in_dims = d_input->dims();
+
+  auto decrease_size = decrease_axis.size();
+  if (decrease_size > 0) {
+    if (decrease_size == static_cast<size_t>(in_dims.size())) {
+      // all dims decrease
+      std::vector<int> origin_out_shape(decrease_size, 1);
+      out_dims = framework::make_ddim(std::vector<int>(decrease_size, 1));
+    } else {
+      std::vector<int> origin_out_shape(out_dims.size() + decrease_size, -1);
+      for (size_t i = 0; i < decrease_size; ++i) {
+        origin_out_shape[decrease_axis[i]] = 1;
+      }
+
+      int index = 0;
+      for (size_t i = 0; i < origin_out_shape.size(); ++i) {
+        if (origin_out_shape[i] == -1) {
+          origin_out_shape[i] = out_dims[index];
+          ++index;
+        }
+      }
+
+      out_dims = framework::make_ddim(origin_out_shape);
+    }
+  }
+
+  auto offsets = Eigen::array<int64_t, D>();
+  auto extents = Eigen::array<int64_t, D>();
+  for (size_t i = 0; i < D; ++i) {
+    offsets[i] = 0;
+    extents[i] = out_dims[i];
+  }
+
+  for (size_t i = 0; i < axes.size(); ++i) {
+    int axis = axes[i];
+    int64_t start = starts[i] < 0 ? (starts[i] + in_dims[axis]) : starts[i];
+    start = std::max(start, static_cast<int64_t>(0));
+    offsets[axis] = start;
+  }
+
+  Eigen::array<std::pair<int64_t, int64_t>, D> paddings;
+  for (size_t i = 0; i < paddings.size(); ++i) {
+    paddings[i].first = offsets[i];
+    paddings[i].second = (in_dims[i] - out_dims[i]) - offsets[i];
+  }
+  EigenPaddingCompute<T, Context, D>(
+      ctx, d_input, in_dims, d_out, out_dims, paddings);
+}
+
+template <typename T, typename Context>
+void SliceGradRawKernel(const Context& ctx,
+                        const DenseTensor& out_grad,
+                        const std::vector<int64_t>& axes,
+                        const std::vector<int64_t>& starts,
+                        const std::vector<int64_t>& ends,
+                        const std::vector<int64_t>& infer_flags,
+                        const std::vector<int64_t>& decrease_axis,
+                        DenseTensor* input_grad) {
+  size_t rank = out_grad.dims().size();
+
+  switch (rank) {
+    case 1:
+      SliceGradCompute<T, Context, 1>(ctx,
+                                      out_grad,
+                                      axes,
+                                      starts,
+                                      ends,
+                                      infer_flags,
+                                      decrease_axis,
+                                      input_grad);
+      break;
+    case 2:
+      SliceGradCompute<T, Context, 2>(ctx,
+                                      out_grad,
+                                      axes,
+                                      starts,
+                                      ends,
+                                      infer_flags,
+                                      decrease_axis,
+                                      input_grad);
+      break;
+    case 3:
+      SliceGradCompute<T, Context, 3>(ctx,
+                                      out_grad,
+                                      axes,
+                                      starts,
+                                      ends,
+                                      infer_flags,
+                                      decrease_axis,
+                                      input_grad);
+      break;
+    case 4:
+      SliceGradCompute<T, Context, 4>(ctx,
+                                      out_grad,
+                                      axes,
+                                      starts,
+                                      ends,
+                                      infer_flags,
+                                      decrease_axis,
+                                      input_grad);
+      break;
+    case 5:
+      SliceGradCompute<T, Context, 5>(ctx,
+                                      out_grad,
+                                      axes,
+                                      starts,
+                                      ends,
+                                      infer_flags,
+                                      decrease_axis,
+                                      input_grad);
+      break;
+    case 6:
+      SliceGradCompute<T, Context, 6>(ctx,
+                                      out_grad,
+                                      axes,
+                                      starts,
+                                      ends,
+                                      infer_flags,
+                                      decrease_axis,
+                                      input_grad);
+      break;
+    default:
+      PADDLE_THROW(pten::errors::InvalidArgument(
+          "The rank of input should be less than 7, but received %d.", rank));
+  }
+}
+
+}  // namespace pten
diff --git a/paddle/pten/kernels/impl/slice_kernel_impl.h b/paddle/pten/kernels/impl/slice_kernel_impl.h
new file mode 100644
index 00000000000..b3c4f65de4b
--- /dev/null
+++ b/paddle/pten/kernels/impl/slice_kernel_impl.h
@@ -0,0 +1,152 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/operators/slice_utils.h"
+#include "paddle/pten/kernels/funcs/eigen/common.h"
+#include "paddle/pten/kernels/funcs/eigen/eigen_function.h"
+
+namespace pten {
+
+template <typename T, typename Context, size_t D>
+void SliceCompute(const Context& ctx,
+                  const DenseTensor& input,
+                  const std::vector<int64_t>& axes,
+                  const std::vector<int64_t>& starts_t,
+                  const std::vector<int64_t>& ends_t,
+                  const std::vector<int64_t>& infer_flags,
+                  const std::vector<int64_t>& decrease_axis,
+                  DenseTensor* out) {
+  // Step 1: Get the accurate attribute value of starts and ends
+  std::vector<int64_t> starts = starts_t;
+  std::vector<int64_t> ends = ends_t;
+  PADDLE_ENFORCE_EQ(
+      starts.size(),
+      axes.size(),
+      pten::errors::InvalidArgument(
+          "The size of starts must be equal to the size of axes."));
+  PADDLE_ENFORCE_EQ(ends.size(),
+                    axes.size(),
+                    pten::errors::InvalidArgument(
+                        "The size of ends must be equal to the size of axes."));
+
+  // Step 2: Compute output
+  auto in = &input;
+
+  auto in_dims = in->dims();
+  auto out_dims = out->dims();
+  auto slice_dims = out_dims;
+
+  // 2.1 Infer output dims
+  for (size_t i = 0; i < axes.size(); ++i) {
+    // when start == -1 && end == start+1
+    if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) {
+      auto ret = std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]);
+      if (ret != decrease_axis.end()) {
+        ends[i] = in_dims[axes[i]];
+      }
+    }
+  }
+
+  paddle::operators::CheckAndUpdateSliceAttrs<int64_t>(
+      in_dims, axes, &starts, &ends);
+  slice_dims = paddle::operators::GetSliceDims<int64_t>(
+      in_dims, axes, starts, ends, nullptr, nullptr);
+  out_dims = paddle::operators::GetDecreasedDims(slice_dims, decrease_axis);
+
+  // 2.2 Get output
+  auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
+  auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
+
+  for (size_t i = 0; i < D; ++i) {
+    offsets[i] = 0;
+    extents[i] = slice_dims[i];
+  }
+  for (size_t i = 0; i < axes.size(); ++i) {
+    offsets[axes[i]] = starts[i];
+  }
+
+  out->Resize(slice_dims);
+  out->mutable_data<T>(ctx.GetPlace());
+
+  auto in_t = EigenTensor<T, D>::From(*in, in_dims);
+  auto out_t = EigenTensor<T, D>::From(*out, slice_dims);
+  auto& eigen_place = *ctx.eigen_device();
+
+  if (in->numel() <= Eigen::NumTraits<int>::highest()) {
+    // similar to tf.slice:
+    // if element number less than INT_MAX, change the type of index to int
+    Eigen::DSizes<int, D> offsets_32bit, extents_32bit;
+    for (size_t i = 0; i < D; i++) {
+      offsets_32bit[i] = offsets[i];
+      extents_32bit[i] = extents[i];
+    }
+    funcs::EigenSlice<std::decay_t<decltype(eigen_place)>, T, D>::Eval(
+        eigen_place,
+        To32BitIndex(out_t),
+        To32BitIndex(in_t),
+        offsets_32bit,
+        extents_32bit);
+  } else {
+    funcs::EigenSlice<std::decay_t<decltype(eigen_place)>, T, D>::Eval(
+        eigen_place, out_t, in_t, offsets, extents);
+  }
+
+  out->Resize(out_dims);
+}
+
+template <typename T, typename Context>
+void SliceRawKernel(const Context& ctx,
+                    const DenseTensor& input,
+                    const std::vector<int64_t>& axes,
+                    const std::vector<int64_t>& starts,
+                    const std::vector<int64_t>& ends,
+                    const std::vector<int64_t>& infer_flags,
+                    const std::vector<int64_t>& decrease_axis,
+                    DenseTensor* out) {
+  int rank = input.dims().size();
+
+  switch (rank) {
+    case 1:
+      SliceCompute<T, Context, 1>(
+          ctx, input, axes, starts, ends, infer_flags, decrease_axis, out);
+      break;
+    case 2:
+      SliceCompute<T, Context, 2>(
+          ctx, input, axes, starts, ends, infer_flags, decrease_axis, out);
+      break;
+    case 3:
+      SliceCompute<T, Context, 3>(
+          ctx, input, axes, starts, ends, infer_flags, decrease_axis, out);
+      break;
+    case 4:
+      SliceCompute<T, Context, 4>(
+          ctx, input, axes, starts, ends, infer_flags, decrease_axis, out);
+      break;
+    case 5:
+      SliceCompute<T, Context, 5>(
+          ctx, input, axes, starts, ends, infer_flags, decrease_axis, out);
+      break;
+    case 6:
+      SliceCompute<T, Context, 6>(
+          ctx, input, axes, starts, ends, infer_flags, decrease_axis, out);
+      break;
+    default:
+      PADDLE_THROW(pten::errors::InvalidArgument(
+          "The rank of input should be less than 7, but received %d.", rank));
+  }
+}
+
+}  // namespace pten
diff --git a/paddle/pten/kernels/slice_grad_kernel.h b/paddle/pten/kernels/slice_grad_kernel.h
new file mode 100644
index 00000000000..a0c3e3532ac
--- /dev/null
+++ b/paddle/pten/kernels/slice_grad_kernel.h
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/pten/core/dense_tensor.h"
+
+namespace pten {
+
+template <typename T, typename Context>
+void SliceGradRawKernel(const Context& ctx,
+                        const DenseTensor& out_grad,
+                        const std::vector<int64_t>& axes,
+                        const std::vector<int64_t>& starts,
+                        const std::vector<int64_t>& ends,
+                        const std::vector<int64_t>& infer_flags,
+                        const std::vector<int64_t>& decrease_axis,
+                        DenseTensor* input_grad);
+
+}  // namespace pten
diff --git a/paddle/pten/kernels/slice_kernel.h b/paddle/pten/kernels/slice_kernel.h
new file mode 100644
index 00000000000..539fa1ab507
--- /dev/null
+++ b/paddle/pten/kernels/slice_kernel.h
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/pten/core/dense_tensor.h"
+
+namespace pten {
+
+template <typename T, typename Context>
+void SliceRawKernel(const Context& ctx,
+                    const DenseTensor& input,
+                    const std::vector<int64_t>& axes,
+                    const std::vector<int64_t>& starts,
+                    const std::vector<int64_t>& ends,
+                    const std::vector<int64_t>& infer_flags,
+                    const std::vector<int64_t>& decrease_axis,
+                    DenseTensor* out);
+
+}  // namespace pten
diff --git a/paddle/pten/ops/compat/slice_sig.cc b/paddle/pten/ops/compat/slice_sig.cc
new file mode 100644
index 00000000000..9bfd2ec0a60
--- /dev/null
+++ b/paddle/pten/ops/compat/slice_sig.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/pten/core/compat/op_utils.h"
+
+namespace pten {
+
+KernelSignature SliceOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "slice",
+      {"Input"},
+      {"axes", "starts", "ends", "infer_flags", "decrease_axis"},
+      {"Out"});
+}
+
+KernelSignature SliceGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  return KernelSignature(
+      "slice_grad",
+      {GradVarName("Out")},
+      {"axes", "starts", "ends", "infer_flags", "decrease_axis"},
+      {GradVarName("Input")});
+}
+
+}  // namespace pten
+
+PT_REGISTER_ARG_MAPPING_FN(slice, pten::SliceOpArgumentMapping);
+PT_REGISTER_ARG_MAPPING_FN(slice_grad, pten::SliceGradOpArgumentMapping);
-- 
GitLab