From 13f440997200cef17a2e7315a31bfb2c4fe9bc11 Mon Sep 17 00:00:00 2001 From: phlrain Date: Wed, 16 Feb 2022 14:19:29 +0000 Subject: [PATCH] move slice to pten --- paddle/fluid/operators/slice_op.h | 504 +----------------- paddle/fluid/operators/slice_utils.h | 3 +- paddle/pten/kernels/cpu/slice_grad_kernel.cc | 33 ++ paddle/pten/kernels/cpu/slice_kernel.cc | 32 ++ paddle/pten/kernels/funcs/eigen/pad.cc | 3 +- paddle/pten/kernels/gpu/slice_grad_kernel.cu | 33 ++ paddle/pten/kernels/gpu/slice_kernel.cu | 32 ++ .../kernels/impl/slice_grad_kernel_impl.h | 342 ++++++++++++ paddle/pten/kernels/impl/slice_kernel_impl.h | 152 ++++++ paddle/pten/kernels/slice_grad_kernel.h | 31 ++ paddle/pten/kernels/slice_kernel.h | 31 ++ paddle/pten/ops/compat/slice_sig.cc | 38 ++ 12 files changed, 729 insertions(+), 505 deletions(-) create mode 100644 paddle/pten/kernels/cpu/slice_grad_kernel.cc create mode 100644 paddle/pten/kernels/cpu/slice_kernel.cc create mode 100644 paddle/pten/kernels/gpu/slice_grad_kernel.cu create mode 100644 paddle/pten/kernels/gpu/slice_kernel.cu create mode 100644 paddle/pten/kernels/impl/slice_grad_kernel_impl.h create mode 100644 paddle/pten/kernels/impl/slice_kernel_impl.h create mode 100644 paddle/pten/kernels/slice_grad_kernel.h create mode 100644 paddle/pten/kernels/slice_kernel.h create mode 100644 paddle/pten/ops/compat/slice_sig.cc diff --git a/paddle/fluid/operators/slice_op.h b/paddle/fluid/operators/slice_op.h index bf05bbadcbc..7e9767ccef4 100644 --- a/paddle/fluid/operators/slice_op.h +++ b/paddle/fluid/operators/slice_op.h @@ -29,520 +29,20 @@ using Variable = framework::Variable; using LoDTensorArray = framework::LoDTensorArray; using DDim = framework::DDim; -inline void DealTensorArray(const framework::ExecutionContext& ctx, - const std::vector& starts, - const std::vector& ends, - bool out_is_array) { - auto in_array = ctx.Input("Input"); - // If the input is LoDTensorArray, the rank of input is 1. - int64_t in_size = in_array->size(); - int64_t start = starts[0] < 0 ? (starts[0] + in_size) : starts[0]; - int64_t end = ends[0] < 0 ? (ends[0] + in_size) : ends[0]; - - start = std::max(start, static_cast(0)); - end = std::max(end, static_cast(0)); - end = std::min(end, in_size); - - if (starts[0] == -1 && end == 0) { - end = start + 1; - } - - PADDLE_ENFORCE_GT(end, start, - platform::errors::InvalidArgument( - "Attr(ends) should be greater than attr(starts) in " - "slice op. But received end = %d, start = %d.", - ends[0], starts[0])); - int64_t out_size = end - start; - - if (out_is_array) { - auto out_array = ctx.Output("Out"); - out_array->resize(out_size); - - for (int i = 0; i < out_size; ++i) { - auto* out_tensor = &out_array->at(i); - auto in_tensor = in_array->at(i + start); - out_tensor->set_lod(in_tensor.lod()); - if (in_tensor.memory_size() > 0) { - paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out_tensor); - } else { - VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so " - "nothing has been written to output array[" - << i << "]."; - } - } - } else { - auto out = ctx.Output("Out"); - auto in_tensor = in_array->at(start); - paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out); - } -} - template class SliceKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { - const Variable* input_var = ctx.InputVar("Input"); - bool is_tensor_array = input_var->IsType(); - int rank = is_tensor_array ? 1 : ctx.Input("Input")->dims().size(); - - switch (rank) { - case 1: - SliceCompute<1>(ctx); - break; - case 2: - SliceCompute<2>(ctx); - break; - case 3: - SliceCompute<3>(ctx); - break; - case 4: - SliceCompute<4>(ctx); - break; - case 5: - SliceCompute<5>(ctx); - break; - case 6: - SliceCompute<6>(ctx); - break; - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "The rank of input should be less than 7, but received %d.", rank)); - } - } + void Compute(const framework::ExecutionContext& ctx) const override {} private: - template - void SliceCompute(const framework::ExecutionContext& ctx) const { - const Variable* input_var = ctx.InputVar("Input"); - Variable* out_var = ctx.OutputVar("Out"); - bool input_is_array = input_var->IsType(); - bool out_is_array = out_var->IsType(); - - auto axes_int = ctx.Attr>("axes"); - auto starts_int = ctx.Attr>("starts"); - auto ends_int = ctx.Attr>("ends"); - std::vector axes(axes_int.begin(), axes_int.end()); - std::vector starts(starts_int.begin(), starts_int.end()); - std::vector ends(ends_int.begin(), ends_int.end()); - - auto decrease_axis = ctx.Attr>("decrease_axis"); - auto infer_flags = ctx.Attr>("infer_flags"); - - // Step 1: Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); - if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); - } else if (starts_tensor_list.size() > 0) { - starts = GetDataFromTensorList(starts_tensor_list); - } - - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); - } else if (ends_tensor_list.size() > 0) { - ends = GetDataFromTensorList(ends_tensor_list); - } - - PADDLE_ENFORCE_EQ( - starts.size(), axes.size(), - platform::errors::InvalidArgument( - "The size of starts must be equal to the size of axes.")); - PADDLE_ENFORCE_EQ( - ends.size(), axes.size(), - platform::errors::InvalidArgument( - "The size of ends must be equal to the size of axes.")); - - // Step 2: Compute output - if (input_is_array) { - DealTensorArray(ctx, starts, ends, out_is_array); - return; - } else { - auto in = ctx.Input("Input"); - auto out = ctx.Output("Out"); - - auto in_dims = in->dims(); - auto out_dims = out->dims(); - auto slice_dims = out_dims; - - // 2.1 Infer output dims - for (size_t i = 0; i < axes.size(); ++i) { - // when start == -1 && end == start+1 - if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) { - auto ret = - std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]); - if (ret != decrease_axis.end()) { - ends[i] = in_dims[axes[i]]; - } - } - } - - CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends); - slice_dims = - GetSliceDims(in_dims, axes, starts, ends, nullptr, nullptr); - out_dims = GetDecreasedDims(slice_dims, decrease_axis); - - // 2.2 Get output - auto offsets = Eigen::DSizes(); - auto extents = Eigen::DSizes(); - - for (size_t i = 0; i < D; ++i) { - offsets[i] = 0; - extents[i] = slice_dims[i]; - } - for (size_t i = 0; i < axes.size(); ++i) { - offsets[axes[i]] = starts[i]; - } - - out->Resize(slice_dims); - out->mutable_data(ctx.GetPlace()); - - auto in_t = framework::EigenTensor::From(*in, in_dims); - auto out_t = framework::EigenTensor::From(*out, slice_dims); - auto& eigen_place = - *ctx.template device_context().eigen_device(); - - if (in->numel() <= Eigen::NumTraits::highest()) { - // similar to tf.slice: - // if element number less than INT_MAX, change the type of index to int - Eigen::DSizes offsets_32bit, extents_32bit; - for (size_t i = 0; i < D; i++) { - offsets_32bit[i] = offsets[i]; - extents_32bit[i] = extents[i]; - } - EigenSlice, T, D>::Eval( - eigen_place, framework::To32BitIndex(out_t), - framework::To32BitIndex(in_t), offsets_32bit, extents_32bit); - } else { - EigenSlice, T, D>::Eval( - eigen_place, out_t, in_t, offsets, extents); - } - - out->Resize(out_dims); - } - } }; template class SliceGradKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { - const Variable* input_var = ctx.InputVar("Input"); - bool is_array = input_var->IsType(); - size_t rank = is_array ? 1 : ctx.Input("Input")->dims().size(); - - switch (rank) { - case 1: - SliceCompute<1>(ctx); - break; - case 2: - SliceCompute<2>(ctx); - break; - case 3: - SliceCompute<3>(ctx); - break; - case 4: - SliceCompute<4>(ctx); - break; - case 5: - SliceCompute<5>(ctx); - break; - case 6: - SliceCompute<6>(ctx); - break; - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "The rank of input should be less than 7, but received %d.", rank)); - } - } + void Compute(const framework::ExecutionContext& ctx) const override {} private: - template - void SliceCompute(const framework::ExecutionContext& ctx) const { - auto axes = ctx.Attr>("axes"); - auto starts_int = ctx.Attr>("starts"); - auto ends_int = ctx.Attr>("ends"); - std::vector starts(starts_int.begin(), starts_int.end()); - std::vector ends(ends_int.begin(), ends_int.end()); - - // Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); - if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); - } else if (starts_tensor_list.size() > 0) { - starts = GetDataFromTensorList(starts_tensor_list); - } - - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); - } else if (ends_tensor_list.size() > 0) { - ends = GetDataFromTensorList(ends_tensor_list); - } - - Variable* d_input_var = ctx.OutputVar(framework::GradVarName("Input")); - const Variable* d_out_var = ctx.InputVar(framework::GradVarName("Out")); - bool d_input_is_array = d_input_var->IsType(); - bool d_out_is_array = d_out_var->IsType(); - - if (d_input_is_array) { - auto* input_array = ctx.Input("Input"); - auto* d_in_arr = - ctx.Output(framework::GradVarName("Input")); - - int64_t d_in_size = input_array->size(); - d_in_arr->resize(d_in_size); - // If the input is LoDTensorArray, the rank of input is 1. - // So only use the 0th element of starts. - int64_t start = starts[0] < 0 ? (starts[0] + d_in_size) : starts[0]; - start = std::max(start, static_cast(0)); - // set zero - platform::DeviceContextPool& pool = - platform::DeviceContextPool::Instance(); - auto& dev_ctx = *pool.Get(ctx.GetPlace()); - pten::funcs::SetConstant functor; - for (int i = 0; i < d_in_size; ++i) { - auto dim = input_array->at(i).dims(); - d_in_arr->at(i).Resize(dim); - d_in_arr->at(i).mutable_data(ctx.GetPlace()); - functor(reinterpret_cast(dev_ctx), - &d_in_arr->at(i), static_cast(0)); - } - - if (d_out_is_array) { - auto* d_out_arr = - ctx.Input(framework::GradVarName("Out")); - int d_out_size = d_out_arr->size(); - for (int i = 0; i < d_out_size; ++i) { - paddle::framework::TensorCopy(d_out_arr->at(i), ctx.GetPlace(), - &(d_in_arr->at(start + i))); - } - } else { - auto* d_out = ctx.Input(framework::GradVarName("Out")); - paddle::framework::TensorCopy(*d_out, ctx.GetPlace(), - &(d_in_arr->at(start))); - } - return; - } - - auto* d_out = ctx.Input(framework::GradVarName("Out")); - auto* d_input = ctx.Output(framework::GradVarName("Input")); - d_input->mutable_data(ctx.GetPlace()); - - auto out_dims = d_out->dims(); - auto in_dims = d_input->dims(); - - auto decrease_axis = ctx.Attr>("decrease_axis"); - auto decrease_size = decrease_axis.size(); - if (decrease_size > 0) { - if (decrease_size == static_cast(in_dims.size())) { - // all dims decrease - std::vector origin_out_shape(decrease_size, 1); - out_dims = framework::make_ddim(std::vector(decrease_size, 1)); - } else { - std::vector origin_out_shape(out_dims.size() + decrease_size, -1); - for (size_t i = 0; i < decrease_size; ++i) { - origin_out_shape[decrease_axis[i]] = 1; - } - - int index = 0; - for (size_t i = 0; i < origin_out_shape.size(); ++i) { - if (origin_out_shape[i] == -1) { - origin_out_shape[i] = out_dims[index]; - ++index; - } - } - - out_dims = framework::make_ddim(origin_out_shape); - } - } - - auto offsets = Eigen::array(); - auto extents = Eigen::array(); - for (size_t i = 0; i < D; ++i) { - offsets[i] = 0; - extents[i] = out_dims[i]; - } - - for (size_t i = 0; i < axes.size(); ++i) { - int axis = axes[i]; - int64_t start = starts[i] < 0 ? (starts[i] + in_dims[axis]) : starts[i]; - start = std::max(start, static_cast(0)); - offsets[axis] = start; - } - - Eigen::array, D> paddings; - for (size_t i = 0; i < paddings.size(); ++i) { - paddings[i].first = offsets[i]; - paddings[i].second = (in_dims[i] - out_dims[i]) - offsets[i]; - } - EigenPaddingCompute(ctx, d_input, in_dims, d_out, out_dims, paddings); - } - - template - void EigenPaddingCompute( - const framework::ExecutionContext& context, Tensor* d_input, - const DDim& in_dims, const Tensor* d_out, const DDim& out_dims, - const Eigen::array, D>& paddings) const { - if (D <= 3) { - // if dimension less than 3, cannot reduce dimension - LaunchEigenPadding(context, d_input, in_dims, d_out, out_dims, paddings); - } else { // else we can reduce dimension - // count not-zero padding number, and record the dimension - int need_pad_num = 0, pad_dim = -1; - for (size_t i = 0; i < D; i++) { - if (paddings[i].first != 0 || paddings[i].second != 0) { - need_pad_num++; - pad_dim = i; - } - } - - if (need_pad_num == 1) { - // only need padding one dimension, we can reduce dimension. - // only the padding dimension is available for us. - // How to reduce dimension(5 to 3 for example): - // before(D=5): - // in_dims: [x1, x2, x3, x4, x5] - // padding.first: [0, 0, a, 0, 0] - // padding.second: [0, 0, b, 0, 0] - // | | - // V V - // after(D=3): - // reshaped_in_dims: [x1*x2, x3, x4*x5] - // reshaped_padding.first: [0, a, 0] - // reshaped_padding.second: [0, b, 0] - - if (pad_dim == D - 1) { - // only last dimension need padding, - // reshape the dimension of tensor in 2: [preceding, padding] - std::vector in_tore_shape(2, 1), out_tore_shape(2, 1); - Eigen::array, 2> reshaped_padding; - - // first dimension is the accumulate of preceding dimension - for (int i = 0; i < pad_dim; i++) { - in_tore_shape[0] *= in_dims[i]; - out_tore_shape[0] *= out_dims[i]; - } - // second dimension is the padding dimension - in_tore_shape[1] = in_dims[pad_dim]; - out_tore_shape[1] = out_dims[pad_dim]; - - // convert array from std::vector to DDim - DDim reshaped_in_dims = framework::make_ddim(in_tore_shape); - DDim reshaped_out_dims = framework::make_ddim(out_tore_shape); - - // after reshape: the first dimension do not need padding, - // set padding[0] zero - reshaped_padding[0].first = reshaped_padding[0].second = 0; - // the second dimension is the previous padding dimension - reshaped_padding[1].first = paddings[pad_dim].first; - reshaped_padding[1].second = paddings[pad_dim].second; - - LaunchEigenPadding(context, d_input, reshaped_in_dims, d_out, - reshaped_out_dims, reshaped_padding); - } else if (pad_dim == 0) { - // only first dimension need padding, - // reshape the dimension of tensor in 2: [padding, succeeding] - // similar to (D - 1) - std::vector in_tore_shape(2, 1), out_tore_shape(2, 1); - Eigen::array, 2> reshaped_padding; - - // first dimension is the padding dimension - in_tore_shape[0] = in_dims[pad_dim]; - out_tore_shape[0] = out_dims[pad_dim]; - // sencond dimension is the accumulate of succeeding dimension - for (size_t i = pad_dim + 1; i < D; i++) { - in_tore_shape[1] *= in_dims[i]; - out_tore_shape[1] *= out_dims[i]; - } - - // convert array from std::vector to DDim - DDim reshaped_in_dims = framework::make_ddim(in_tore_shape); - DDim reshaped_out_dims = framework::make_ddim(out_tore_shape); - - // after reshape: - // the first dimension is the previous padding dimension - reshaped_padding[0].first = paddings[pad_dim].first; - reshaped_padding[0].second = paddings[pad_dim].second; - // the second dimension do not need padding, set padding[1] zero - reshaped_padding[1].first = reshaped_padding[1].second = 0; - - LaunchEigenPadding(context, d_input, reshaped_in_dims, d_out, - reshaped_out_dims, reshaped_padding); - } else { - // other dimension need padding - // reshape the dimension of tensor in 3: - // [preceding, padding, succeeding] - std::vector in_tore_shape(3, 1), out_tore_shape(3, 1); - Eigen::array, 3> reshaped_padding; - - // first dimension is the accumulate of preceding dimension - for (int i = 0; i < pad_dim; i++) { - in_tore_shape[0] *= in_dims[i]; - out_tore_shape[0] *= out_dims[i]; - } - // second dimension is the padding dimension - in_tore_shape[1] = in_dims[pad_dim]; - out_tore_shape[1] = out_dims[pad_dim]; - // third dimension is the accumulate of succeeding dimension - for (size_t i = pad_dim + 1; i < D; i++) { - in_tore_shape[2] *= in_dims[i]; - out_tore_shape[2] *= out_dims[i]; - } - - // convert array from std::vector to DDim - DDim reshaped_in_dims = framework::make_ddim(in_tore_shape); - DDim reshaped_out_dims = framework::make_ddim(out_tore_shape); - - // after reshape: - // the first dimension do not need padding, set padding[0] zero - reshaped_padding[0].first = reshaped_padding[2].second = 0; - // the second dimension is the previous padding dimension - reshaped_padding[1].first = paddings[pad_dim].first; - reshaped_padding[1].second = paddings[pad_dim].second; - // the third dimension do not need padding, set padding[2] zero - reshaped_padding[2].first = reshaped_padding[2].second = 0; - - LaunchEigenPadding(context, d_input, reshaped_in_dims, d_out, - reshaped_out_dims, reshaped_padding); - } - } else { - // need padding at many dimension, cannot reduce dimension - LaunchEigenPadding(context, d_input, in_dims, d_out, out_dims, - paddings); - } - } - } - - template - void LaunchEigenPadding( - const framework::ExecutionContext& context, Tensor* d_input, - const DDim& in_dims, const Tensor* d_out, const DDim& out_dims, - const Eigen::array, D>& paddings) const { - auto& place = - *context.template device_context().eigen_device(); - auto d_in_t = - framework::EigenTensor::From( - *d_input, in_dims); - auto d_out_t = - framework::EigenTensor::From( - *d_out, out_dims); - - if (d_input->numel() <= Eigen::NumTraits::highest()) { - // similar to tf.pad: - // if element number less than INT_MAX, change the type of index to int - Eigen::array, D> paddings_32bit; - for (size_t i = 0; i < D; i++) { - paddings_32bit[i] = - std::make_pair(paddings[i].first, paddings[i].second); - } - EigenPad, T, D>::Eval( - place, framework::To32BitIndex(d_in_t), - framework::To32BitIndex(d_out_t), paddings_32bit, static_cast(0)); - } else { - EigenPad, T, D>::Eval( - place, d_in_t, d_out_t, paddings, static_cast(0)); - } - } }; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/slice_utils.h b/paddle/fluid/operators/slice_utils.h index fa36ded24f9..c02e54a8a2c 100644 --- a/paddle/fluid/operators/slice_utils.h +++ b/paddle/fluid/operators/slice_utils.h @@ -13,13 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include +#include #include #include namespace paddle { namespace operators { -using Tensor = framework::Tensor; template inline void CheckAndUpdateSliceAttrs(const framework::DDim in_dims, diff --git a/paddle/pten/kernels/cpu/slice_grad_kernel.cc b/paddle/pten/kernels/cpu/slice_grad_kernel.cc new file mode 100644 index 00000000000..67e3b6859ad --- /dev/null +++ b/paddle/pten/kernels/cpu/slice_grad_kernel.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/slice_grad_kernel.h" +#include "paddle/pten/kernels/impl/slice_grad_kernel_impl.h" + +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/core/kernel_registry.h" + +PT_REGISTER_KERNEL(slice_grad, + CPU, + ALL_LAYOUT, + pten::SliceGradRawKernel, + bool, + int, + int64_t, + float, + double, + pten::dtype::complex, + pten::dtype::complex, + pten::dtype::bfloat16, + pten::dtype::float16) {} diff --git a/paddle/pten/kernels/cpu/slice_kernel.cc b/paddle/pten/kernels/cpu/slice_kernel.cc new file mode 100644 index 00000000000..e49038425b1 --- /dev/null +++ b/paddle/pten/kernels/cpu/slice_kernel.cc @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/slice_kernel.h" +#include "paddle/pten/kernels/impl/slice_kernel_impl.h" + +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/core/kernel_registry.h" + +PT_REGISTER_KERNEL(slice, + CPU, + ALL_LAYOUT, + pten::SliceRawKernel, + bool, + int, + int64_t, + float, + double, + pten::dtype::complex, + pten::dtype::complex, + pten::dtype::bfloat16) {} diff --git a/paddle/pten/kernels/funcs/eigen/pad.cc b/paddle/pten/kernels/funcs/eigen/pad.cc index a051c3d9681..6cf80a1f085 100644 --- a/paddle/pten/kernels/funcs/eigen/pad.cc +++ b/paddle/pten/kernels/funcs/eigen/pad.cc @@ -56,7 +56,8 @@ struct EigenPad { template struct FUNCTOR; \ template struct FUNCTOR; \ template struct FUNCTOR; \ - template struct FUNCTOR + template struct FUNCTOR; + INSTANTIATION(EigenPad, bool); INSTANTIATION(EigenPad, int); INSTANTIATION(EigenPad, int64_t); diff --git a/paddle/pten/kernels/gpu/slice_grad_kernel.cu b/paddle/pten/kernels/gpu/slice_grad_kernel.cu new file mode 100644 index 00000000000..a8902e8900b --- /dev/null +++ b/paddle/pten/kernels/gpu/slice_grad_kernel.cu @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/impl/slice_grad_kernel_impl.h" +#include "paddle/pten/kernels/slice_grad_kernel.h" + +#include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/core/kernel_registry.h" + +PT_REGISTER_KERNEL(slice_grad, + GPU, + ALL_LAYOUT, + pten::SliceGradRawKernel, + bool, + int, + int64_t, + float, + double, + pten::dtype::complex, + pten::dtype::complex, + pten::dtype::bfloat16, + pten::dtype::float16) {} diff --git a/paddle/pten/kernels/gpu/slice_kernel.cu b/paddle/pten/kernels/gpu/slice_kernel.cu new file mode 100644 index 00000000000..217dacdafc0 --- /dev/null +++ b/paddle/pten/kernels/gpu/slice_kernel.cu @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/impl/slice_kernel_impl.h" +#include "paddle/pten/kernels/slice_kernel.h" + +#include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/core/kernel_registry.h" + +PT_REGISTER_KERNEL(slice, + GPU, + ALL_LAYOUT, + pten::SliceRawKernel, + bool, + int, + int64_t, + float, + double, + pten::dtype::complex, + pten::dtype::complex, + pten::dtype::bfloat16) {} diff --git a/paddle/pten/kernels/impl/slice_grad_kernel_impl.h b/paddle/pten/kernels/impl/slice_grad_kernel_impl.h new file mode 100644 index 00000000000..2f442bdaf8e --- /dev/null +++ b/paddle/pten/kernels/impl/slice_grad_kernel_impl.h @@ -0,0 +1,342 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/operators/slice_utils.h" +#include "paddle/pten/kernels/funcs/eigen/common.h" +#include "paddle/pten/kernels/funcs/eigen/eigen_function.h" +#include "paddle/pten/kernels/slice_grad_kernel.h" + +namespace pten { + +template +void LaunchEigenPadding( + const Context& context, + DenseTensor* d_input, + const DDim& in_dims, + const DenseTensor* d_out, + const DDim& out_dims, + const Eigen::array, D>& paddings) { + auto& place = *context.template eigen_device(); + auto d_in_t = EigenTensor::From( + *d_input, in_dims); + auto d_out_t = EigenTensor::From( + *d_out, out_dims); + + if (d_input->numel() <= Eigen::NumTraits::highest()) { + // similar to tf.pad: + // if element number less than INT_MAX, change the type of index to int + Eigen::array, D> paddings_32bit; + for (size_t i = 0; i < D; i++) { + paddings_32bit[i] = std::make_pair(paddings[i].first, paddings[i].second); + } + funcs::EigenPad, T, D>::Eval( + place, + To32BitIndex(d_in_t), + To32BitIndex(d_out_t), + paddings_32bit, + static_cast(0)); + } else { + funcs::EigenPad, T, D>::Eval( + place, d_in_t, d_out_t, paddings, static_cast(0)); + } +} + +template +void EigenPaddingCompute( + const Context& context, + DenseTensor* d_input, + const DDim& in_dims, + const DenseTensor* d_out, + const DDim& out_dims, + const Eigen::array, D>& paddings) { + if (D <= 3) { + // if dimension less than 3, cannot reduce dimension + LaunchEigenPadding( + context, d_input, in_dims, d_out, out_dims, paddings); + } + // } else { // else we can reduce dimension + // // count not-zero padding number, and record the dimension + // int need_pad_num = 0, pad_dim = -1; + // for (size_t i = 0; i < D; i++) { + // if (paddings[i].first != 0 || paddings[i].second != 0) { + // need_pad_num++; + // pad_dim = i; + // } + // } + + // if (need_pad_num == 1) { + // // only need padding one dimension, we can reduce dimension. + // // only the padding dimension is available for us. + // // How to reduce dimension(5 to 3 for example): + // // before(D=5): + // // in_dims: [x1, x2, x3, x4, x5] + // // padding.first: [0, 0, a, 0, 0] + // // padding.second: [0, 0, b, 0, 0] + // // | | + // // V V + // // after(D=3): + // // reshaped_in_dims: [x1*x2, x3, x4*x5] + // // reshaped_padding.first: [0, a, 0] + // // reshaped_padding.second: [0, b, 0] + + // if (pad_dim == D - 1) { + // // only last dimension need padding, + // // reshape the dimension of tensor in 2: [preceding, padding] + // std::vector in_tore_shape(2, 1), out_tore_shape(2, 1); + // Eigen::array, 2> reshaped_padding; + + // // first dimension is the accumulate of preceding dimension + // for (int i = 0; i < pad_dim; i++) { + // in_tore_shape[0] *= in_dims[i]; + // out_tore_shape[0] *= out_dims[i]; + // } + // // second dimension is the padding dimension + // in_tore_shape[1] = in_dims[pad_dim]; + // out_tore_shape[1] = out_dims[pad_dim]; + + // // convert array from std::vector to DDim + // DDim reshaped_in_dims = framework::make_ddim(in_tore_shape); + // DDim reshaped_out_dims = framework::make_ddim(out_tore_shape); + + // // after reshape: the first dimension do not need padding, + // // set padding[0] zero + // reshaped_padding[0].first = reshaped_padding[0].second = 0; + // // the second dimension is the previous padding dimension + // reshaped_padding[1].first = paddings[pad_dim].first; + // reshaped_padding[1].second = paddings[pad_dim].second; + + // LaunchEigenPadding(context, d_input, reshaped_in_dims, + // d_out, + // reshaped_out_dims, reshaped_padding); + // } else if (pad_dim == 0) { + // // only first dimension need padding, + // // reshape the dimension of tensor in 2: [padding, succeeding] + // // similar to (D - 1) + // std::vector in_tore_shape(2, 1), out_tore_shape(2, 1); + // Eigen::array, 2> reshaped_padding; + + // // first dimension is the padding dimension + // in_tore_shape[0] = in_dims[pad_dim]; + // out_tore_shape[0] = out_dims[pad_dim]; + // // sencond dimension is the accumulate of succeeding dimension + // for (size_t i = pad_dim + 1; i < D; i++) { + // in_tore_shape[1] *= in_dims[i]; + // out_tore_shape[1] *= out_dims[i]; + // } + + // // convert array from std::vector to DDim + // DDim reshaped_in_dims = framework::make_ddim(in_tore_shape); + // DDim reshaped_out_dims = framework::make_ddim(out_tore_shape); + + // // after reshape: + // // the first dimension is the previous padding dimension + // reshaped_padding[0].first = paddings[pad_dim].first; + // reshaped_padding[0].second = paddings[pad_dim].second; + // // the second dimension do not need padding, set padding[1] zero + // reshaped_padding[1].first = reshaped_padding[1].second = 0; + + // LaunchEigenPadding(context, d_input, reshaped_in_dims, + // d_out, + // reshaped_out_dims, reshaped_padding); + // } else { + // // other dimension need padding + // // reshape the dimension of tensor in 3: + // // [preceding, padding, succeeding] + // std::vector in_tore_shape(3, 1), out_tore_shape(3, 1); + // Eigen::array, 3> reshaped_padding; + + // // first dimension is the accumulate of preceding dimension + // for (int i = 0; i < pad_dim; i++) { + // in_tore_shape[0] *= in_dims[i]; + // out_tore_shape[0] *= out_dims[i]; + // } + // // second dimension is the padding dimension + // in_tore_shape[1] = in_dims[pad_dim]; + // out_tore_shape[1] = out_dims[pad_dim]; + // // third dimension is the accumulate of succeeding dimension + // for (size_t i = pad_dim + 1; i < D; i++) { + // in_tore_shape[2] *= in_dims[i]; + // out_tore_shape[2] *= out_dims[i]; + // } + + // // convert array from std::vector to DDim + // DDim reshaped_in_dims = framework::make_ddim(in_tore_shape); + // DDim reshaped_out_dims = framework::make_ddim(out_tore_shape); + + // // after reshape: + // // the first dimension do not need padding, set padding[0] zero + // reshaped_padding[0].first = reshaped_padding[2].second = 0; + // // the second dimension is the previous padding dimension + // reshaped_padding[1].first = paddings[pad_dim].first; + // reshaped_padding[1].second = paddings[pad_dim].second; + // // the third dimension do not need padding, set padding[2] zero + // reshaped_padding[2].first = reshaped_padding[2].second = 0; + + // LaunchEigenPadding(context, d_input, reshaped_in_dims, + // d_out, + // reshaped_out_dims, reshaped_padding); + // } + // } else { + // // need padding at many dimension, cannot reduce dimension + // LaunchEigenPadding(context, d_input, in_dims, d_out, + // out_dims, + // paddings); + // } + // } +} + +template +void SliceGradCompute(const Context& ctx, + const DenseTensor& out_grad, + const std::vector& axes, + const std::vector& starts, + const std::vector& ends, + const std::vector& infer_flags, + const std::vector& decrease_axis, + DenseTensor* input_grad) { + auto* d_out = &out_grad; + auto* d_input = input_grad; + d_input->mutable_data(ctx.GetPlace()); + + auto out_dims = d_out->dims(); + auto in_dims = d_input->dims(); + + auto decrease_size = decrease_axis.size(); + if (decrease_size > 0) { + if (decrease_size == static_cast(in_dims.size())) { + // all dims decrease + std::vector origin_out_shape(decrease_size, 1); + out_dims = framework::make_ddim(std::vector(decrease_size, 1)); + } else { + std::vector origin_out_shape(out_dims.size() + decrease_size, -1); + for (size_t i = 0; i < decrease_size; ++i) { + origin_out_shape[decrease_axis[i]] = 1; + } + + int index = 0; + for (size_t i = 0; i < origin_out_shape.size(); ++i) { + if (origin_out_shape[i] == -1) { + origin_out_shape[i] = out_dims[index]; + ++index; + } + } + + out_dims = framework::make_ddim(origin_out_shape); + } + } + + auto offsets = Eigen::array(); + auto extents = Eigen::array(); + for (size_t i = 0; i < D; ++i) { + offsets[i] = 0; + extents[i] = out_dims[i]; + } + + for (size_t i = 0; i < axes.size(); ++i) { + int axis = axes[i]; + int64_t start = starts[i] < 0 ? (starts[i] + in_dims[axis]) : starts[i]; + start = std::max(start, static_cast(0)); + offsets[axis] = start; + } + + Eigen::array, D> paddings; + for (size_t i = 0; i < paddings.size(); ++i) { + paddings[i].first = offsets[i]; + paddings[i].second = (in_dims[i] - out_dims[i]) - offsets[i]; + } + EigenPaddingCompute( + ctx, d_input, in_dims, d_out, out_dims, paddings); +} + +template +void SliceGradRawKernel(const Context& ctx, + const DenseTensor& out_grad, + const std::vector& axes, + const std::vector& starts, + const std::vector& ends, + const std::vector& infer_flags, + const std::vector& decrease_axis, + DenseTensor* input_grad) { + size_t rank = out_grad.dims().size(); + + switch (rank) { + case 1: + SliceGradCompute(ctx, + out_grad, + axes, + starts, + ends, + infer_flags, + decrease_axis, + input_grad); + break; + case 2: + SliceGradCompute(ctx, + out_grad, + axes, + starts, + ends, + infer_flags, + decrease_axis, + input_grad); + break; + case 3: + SliceGradCompute(ctx, + out_grad, + axes, + starts, + ends, + infer_flags, + decrease_axis, + input_grad); + break; + case 4: + SliceGradCompute(ctx, + out_grad, + axes, + starts, + ends, + infer_flags, + decrease_axis, + input_grad); + break; + case 5: + SliceGradCompute(ctx, + out_grad, + axes, + starts, + ends, + infer_flags, + decrease_axis, + input_grad); + break; + case 6: + SliceGradCompute(ctx, + out_grad, + axes, + starts, + ends, + infer_flags, + decrease_axis, + input_grad); + break; + default: + PADDLE_THROW(pten::errors::InvalidArgument( + "The rank of input should be less than 7, but received %d.", rank)); + } +} + +} // namespace pten diff --git a/paddle/pten/kernels/impl/slice_kernel_impl.h b/paddle/pten/kernels/impl/slice_kernel_impl.h new file mode 100644 index 00000000000..b3c4f65de4b --- /dev/null +++ b/paddle/pten/kernels/impl/slice_kernel_impl.h @@ -0,0 +1,152 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/operators/slice_utils.h" +#include "paddle/pten/kernels/funcs/eigen/common.h" +#include "paddle/pten/kernels/funcs/eigen/eigen_function.h" + +namespace pten { + +template +void SliceCompute(const Context& ctx, + const DenseTensor& input, + const std::vector& axes, + const std::vector& starts_t, + const std::vector& ends_t, + const std::vector& infer_flags, + const std::vector& decrease_axis, + DenseTensor* out) { + // Step 1: Get the accurate attribute value of starts and ends + std::vector starts = starts_t; + std::vector ends = ends_t; + PADDLE_ENFORCE_EQ( + starts.size(), + axes.size(), + pten::errors::InvalidArgument( + "The size of starts must be equal to the size of axes.")); + PADDLE_ENFORCE_EQ(ends.size(), + axes.size(), + pten::errors::InvalidArgument( + "The size of ends must be equal to the size of axes.")); + + // Step 2: Compute output + auto in = &input; + + auto in_dims = in->dims(); + auto out_dims = out->dims(); + auto slice_dims = out_dims; + + // 2.1 Infer output dims + for (size_t i = 0; i < axes.size(); ++i) { + // when start == -1 && end == start+1 + if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) { + auto ret = std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]); + if (ret != decrease_axis.end()) { + ends[i] = in_dims[axes[i]]; + } + } + } + + paddle::operators::CheckAndUpdateSliceAttrs( + in_dims, axes, &starts, &ends); + slice_dims = paddle::operators::GetSliceDims( + in_dims, axes, starts, ends, nullptr, nullptr); + out_dims = paddle::operators::GetDecreasedDims(slice_dims, decrease_axis); + + // 2.2 Get output + auto offsets = Eigen::DSizes(); + auto extents = Eigen::DSizes(); + + for (size_t i = 0; i < D; ++i) { + offsets[i] = 0; + extents[i] = slice_dims[i]; + } + for (size_t i = 0; i < axes.size(); ++i) { + offsets[axes[i]] = starts[i]; + } + + out->Resize(slice_dims); + out->mutable_data(ctx.GetPlace()); + + auto in_t = EigenTensor::From(*in, in_dims); + auto out_t = EigenTensor::From(*out, slice_dims); + auto& eigen_place = *ctx.eigen_device(); + + if (in->numel() <= Eigen::NumTraits::highest()) { + // similar to tf.slice: + // if element number less than INT_MAX, change the type of index to int + Eigen::DSizes offsets_32bit, extents_32bit; + for (size_t i = 0; i < D; i++) { + offsets_32bit[i] = offsets[i]; + extents_32bit[i] = extents[i]; + } + funcs::EigenSlice, T, D>::Eval( + eigen_place, + To32BitIndex(out_t), + To32BitIndex(in_t), + offsets_32bit, + extents_32bit); + } else { + funcs::EigenSlice, T, D>::Eval( + eigen_place, out_t, in_t, offsets, extents); + } + + out->Resize(out_dims); +} + +template +void SliceRawKernel(const Context& ctx, + const DenseTensor& input, + const std::vector& axes, + const std::vector& starts, + const std::vector& ends, + const std::vector& infer_flags, + const std::vector& decrease_axis, + DenseTensor* out) { + int rank = input.dims().size(); + + switch (rank) { + case 1: + SliceCompute( + ctx, input, axes, starts, ends, infer_flags, decrease_axis, out); + break; + case 2: + SliceCompute( + ctx, input, axes, starts, ends, infer_flags, decrease_axis, out); + break; + case 3: + SliceCompute( + ctx, input, axes, starts, ends, infer_flags, decrease_axis, out); + break; + case 4: + SliceCompute( + ctx, input, axes, starts, ends, infer_flags, decrease_axis, out); + break; + case 5: + SliceCompute( + ctx, input, axes, starts, ends, infer_flags, decrease_axis, out); + break; + case 6: + SliceCompute( + ctx, input, axes, starts, ends, infer_flags, decrease_axis, out); + break; + default: + PADDLE_THROW(pten::errors::InvalidArgument( + "The rank of input should be less than 7, but received %d.", rank)); + } +} + +} // namespace pten diff --git a/paddle/pten/kernels/slice_grad_kernel.h b/paddle/pten/kernels/slice_grad_kernel.h new file mode 100644 index 00000000000..a0c3e3532ac --- /dev/null +++ b/paddle/pten/kernels/slice_grad_kernel.h @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/pten/core/dense_tensor.h" + +namespace pten { + +template +void SliceGradRawKernel(const Context& ctx, + const DenseTensor& out_grad, + const std::vector& axes, + const std::vector& starts, + const std::vector& ends, + const std::vector& infer_flags, + const std::vector& decrease_axis, + DenseTensor* input_grad); + +} // namespace pten diff --git a/paddle/pten/kernels/slice_kernel.h b/paddle/pten/kernels/slice_kernel.h new file mode 100644 index 00000000000..539fa1ab507 --- /dev/null +++ b/paddle/pten/kernels/slice_kernel.h @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/pten/core/dense_tensor.h" + +namespace pten { + +template +void SliceRawKernel(const Context& ctx, + const DenseTensor& input, + const std::vector& axes, + const std::vector& starts, + const std::vector& ends, + const std::vector& infer_flags, + const std::vector& decrease_axis, + DenseTensor* out); + +} // namespace pten diff --git a/paddle/pten/ops/compat/slice_sig.cc b/paddle/pten/ops/compat/slice_sig.cc new file mode 100644 index 00000000000..9bfd2ec0a60 --- /dev/null +++ b/paddle/pten/ops/compat/slice_sig.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/core/compat/op_utils.h" + +namespace pten { + +KernelSignature SliceOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature( + "slice", + {"Input"}, + {"axes", "starts", "ends", "infer_flags", "decrease_axis"}, + {"Out"}); +} + +KernelSignature SliceGradOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature( + "slice_grad", + {GradVarName("Out")}, + {"axes", "starts", "ends", "infer_flags", "decrease_axis"}, + {GradVarName("Input")}); +} + +} // namespace pten + +PT_REGISTER_ARG_MAPPING_FN(slice, pten::SliceOpArgumentMapping); +PT_REGISTER_ARG_MAPPING_FN(slice_grad, pten::SliceGradOpArgumentMapping); -- GitLab