From eb5b83e773f240b61bc747bb8cf0ee0678238dea Mon Sep 17 00:00:00 2001 From: james Date: Thu, 1 Sep 2022 10:49:20 +0800 Subject: [PATCH] migrate slice/slice_grad to phi, test=kunlun (#45606) * migrate slice/slice_grad to phi, test=kunlun * minor, test=kunlun * typo, test=kunlun --- paddle/fluid/operators/slice_op_xpu.cc | 294 -------------------- paddle/phi/kernels/xpu/slice_grad_kernel.cc | 88 ++++++ paddle/phi/kernels/xpu/slice_kernel.cc | 116 ++++++++ 3 files changed, 204 insertions(+), 294 deletions(-) delete mode 100644 paddle/fluid/operators/slice_op_xpu.cc create mode 100644 paddle/phi/kernels/xpu/slice_grad_kernel.cc create mode 100644 paddle/phi/kernels/xpu/slice_kernel.cc diff --git a/paddle/fluid/operators/slice_op_xpu.cc b/paddle/fluid/operators/slice_op_xpu.cc deleted file mode 100644 index f59d363af8b..00000000000 --- a/paddle/fluid/operators/slice_op_xpu.cc +++ /dev/null @@ -1,294 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU -#include -#include -#include -#include - -#include "paddle/fluid/operators/slice_op.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/phi/kernels/funcs/slice_utils.h" -#include "xpu/refactor/math.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -inline void DealTensorArray(const framework::ExecutionContext& ctx, - const std::vector& starts, - const std::vector& ends, - bool out_is_array) { - auto in_array = ctx.Input("Input"); - // If the input is LoDTensorArray, the rank of input is 1. - int in_size = in_array->size(); - int start = starts[0] < 0 ? (starts[0] + in_size) : starts[0]; - int end = ends[0] < 0 ? (ends[0] + in_size) : ends[0]; - - start = std::max(start, static_cast(0)); - end = std::max(end, static_cast(0)); - end = std::min(end, in_size); - - if (starts[0] == -1 && end == 0) { - end = start + 1; - } - - PADDLE_ENFORCE_GT(end, - start, - platform::errors::InvalidArgument( - "Attr(ends) should be greater than attr(starts) in " - "slice op. But received end = %d, start = %d.", - ends[0], - starts[0])); - int out_size = end - start; - - if (out_is_array) { - auto out_array = ctx.Output("Out"); - out_array->resize(out_size); - - for (int i = 0; i < out_size; ++i) { - auto* out_tensor = &out_array->at(i); - auto in_tensor = in_array->at(i + start); - out_tensor->set_lod(in_tensor.lod()); - if (in_tensor.memory_size() > 0) { - paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out_tensor); - } else { - VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so " - "nothing has been written to output array[" - << i << "]."; - } - } - } else { - auto out = ctx.Output("Out"); - auto in_tensor = in_array->at(start); - paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out); - } -} -template -class SliceXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const Variable* input_var = ctx.InputVar("Input"); - Variable* out_var = ctx.OutputVar("Out"); - bool input_is_array = input_var->IsType(); - bool out_is_array = out_var->IsType(); - - auto axes_int = ctx.Attr>("axes"); - auto starts_int = ctx.Attr>("starts"); - auto ends_int = ctx.Attr>("ends"); - std::vector axes(axes_int.begin(), axes_int.end()); - std::vector starts(starts_int.begin(), starts_int.end()); - std::vector ends(ends_int.begin(), ends_int.end()); - - auto decrease_axis = ctx.Attr>("decrease_axis"); - auto infer_flags = ctx.Attr>("infer_flags"); - - // Step 1: Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); - if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); - } else if (starts_tensor_list.size() > 0) { - starts = GetDataFromTensorList(starts_tensor_list); - } - - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); - } else if (ends_tensor_list.size() > 0) { - ends = GetDataFromTensorList(ends_tensor_list); - } - - PADDLE_ENFORCE_EQ( - starts.size(), - axes.size(), - platform::errors::InvalidArgument( - "The size of starts must be equal to the size of axes.")); - PADDLE_ENFORCE_EQ( - ends.size(), - axes.size(), - platform::errors::InvalidArgument( - "The size of ends must be equal to the size of axes.")); - - // Step 2: Compute output - if (input_is_array) { - DealTensorArray(ctx, starts, ends, out_is_array); - return; - } else { - auto in = ctx.Input("Input"); - auto out = ctx.Output("Out"); - - auto in_dims = in->dims(); - auto out_dims = out->dims(); - auto slice_dims = out_dims; - - // 2.1 Infer output dims - for (size_t i = 0; i < axes.size(); ++i) { - // when start == -1 && end == start+1 - if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) { - auto ret = - std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]); - if (ret != decrease_axis.end()) { - ends[i] = in_dims[axes[i]]; - } - } - } - - phi::funcs::CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends); - slice_dims = phi::funcs::GetSliceDims( - in_dims, axes, starts, ends, nullptr, nullptr); - out_dims = phi::funcs::GetDecreasedDims(slice_dims, decrease_axis); - - out->Resize(out_dims); - - // 2.2 Get output - size_t shape_size = in_dims.size(); - // the slice XPU kernel require that the length of `start`, `end` must be - // equal - // to the dims size of input tensor, therefore, if shape_size > - // axes.size(), the `starts_extension` and `ends_extension` is necessary. - std::vector starts_extension(shape_size, 0); - std::vector ends_extension(shape_size, 0); - if (shape_size > axes.size()) { - for (size_t i = 0; i < shape_size; ++i) { - ends_extension[i] = in_dims[i]; - } - for (size_t i = 0; i < axes.size(); ++i) { - starts_extension[axes[i]] = starts[i]; - ends_extension[axes[i]] = ends[i]; - } - } else { - starts_extension = std::move(starts); - ends_extension = std::move(ends); - } - - // prepare shape on XPU - std::vector shape(shape_size, 0); - for (size_t i = 0; i < shape_size; ++i) { - shape[i] = in_dims[i]; - } - - auto& dev_ctx = ctx.template device_context(); - const XPUType* in_data = reinterpret_cast(in->data()); - XPUType* out_data = - reinterpret_cast(out->mutable_data(ctx.GetPlace())); - int r = xpu::slice(dev_ctx.x_context(), - in_data, - out_data, - shape, - starts_extension, - ends_extension); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "slice"); - } - } -}; - -template -class SliceGradXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dinput = ctx.Output(framework::GradVarName("Input")); - - auto axes_int = ctx.Attr>("axes"); - auto starts_int = ctx.Attr>("starts"); - auto ends_int = ctx.Attr>("ends"); - std::vector axes(axes_int.begin(), axes_int.end()); - std::vector starts(starts_int.begin(), starts_int.end()); - std::vector ends(ends_int.begin(), ends_int.end()); - - // Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); - if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); - } else if (starts_tensor_list.size() > 0) { - starts = GetDataFromTensorList(starts_tensor_list); - } - - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); - } else if (ends_tensor_list.size() > 0) { - ends = GetDataFromTensorList(ends_tensor_list); - } - - const auto& in_dims = input->dims(); - int rank = in_dims.size(); - - std::vector pad_left(rank); - std::vector out_dims(rank); - std::vector pad_right(rank); - int cnt = 0; - for (int i = 0; i < in_dims.size(); ++i) { - int start = 0; - int end = in_dims[i]; - int axis = cnt < static_cast(axes.size()) ? axes[cnt] : -1; - if (axis == i) { - start = starts[cnt]; - if (start < 0) { - start = (start + in_dims[i]); - } - start = std::max(start, static_cast(0)); - end = ends[cnt]; - if (end < 0) { - end = (end + in_dims[i]); - } - end = std::min(end, static_cast(in_dims[i])); - cnt++; - } - - pad_left[i] = start; - out_dims[i] = end - start; - pad_right[i] = in_dims[i] - out_dims[i] - pad_left[i]; - } - - auto& dev_ctx = ctx.template device_context(); - const XPUType* dout_data = - reinterpret_cast(dout->data()); - XPUType* din_data = - reinterpret_cast(dinput->mutable_data(ctx.GetPlace())); - int r = xpu::pad(dev_ctx.x_context(), - dout_data, - din_data, - out_dims, - pad_left, - pad_right, - XPUType(0)); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "pad"); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_XPU_KERNEL( - slice, - ops::SliceXPUKernel, - ops::SliceXPUKernel, - ops::SliceXPUKernel); -REGISTER_OP_XPU_KERNEL( - slice_grad, - ops::SliceGradXPUKernel, - ops::SliceGradXPUKernel, - ops::SliceGradXPUKernel); -#endif diff --git a/paddle/phi/kernels/xpu/slice_grad_kernel.cc b/paddle/phi/kernels/xpu/slice_grad_kernel.cc new file mode 100644 index 00000000000..86891776179 --- /dev/null +++ b/paddle/phi/kernels/xpu/slice_grad_kernel.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/slice_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/slice_utils.h" + +namespace phi { + +template +void SliceGradRawKernel(const Context& ctx, + const DenseTensor& input, + const DenseTensor& out_grad, + const std::vector& axes, + const IntArray& starts_t, + const IntArray& ends_t, + const std::vector& infer_flags, + const std::vector& decrease_axis, + DenseTensor* input_grad) { + using XPUType = typename XPUTypeTrait::Type; + ctx.template Alloc(input_grad); + + // Get the accurate attribute value of starts and ends + std::vector starts = starts_t.GetData(); + std::vector ends = ends_t.GetData(); + + const auto& in_dims = input.dims(); + int rank = in_dims.size(); + + std::vector pad_left(rank); + std::vector out_dims(rank); + std::vector pad_right(rank); + int cnt = 0; + for (int i = 0; i < in_dims.size(); ++i) { + int start = 0; + int end = in_dims[i]; + int axis = cnt < static_cast(axes.size()) ? axes[cnt] : -1; + if (axis == i) { + start = starts[cnt]; + if (start < 0) { + start = (start + in_dims[i]); + } + start = std::max(start, static_cast(0)); + end = ends[cnt]; + if (end < 0) { + end = (end + in_dims[i]); + } + end = std::min(end, static_cast(in_dims[i])); + cnt++; + } + + pad_left[i] = start; + out_dims[i] = end - start; + pad_right[i] = in_dims[i] - out_dims[i] - pad_left[i]; + } + + int r = + xpu::pad(ctx.x_context(), + reinterpret_cast(out_grad.data()), + reinterpret_cast(input_grad->data()), + out_dims, + pad_left, + pad_right, + XPUType(0)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "pad"); +} +} // namespace phi + +PD_REGISTER_KERNEL(slice_grad, + XPU, + ALL_LAYOUT, + phi::SliceGradRawKernel, + float, + int, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/xpu/slice_kernel.cc b/paddle/phi/kernels/xpu/slice_kernel.cc new file mode 100644 index 00000000000..3d01fae33e1 --- /dev/null +++ b/paddle/phi/kernels/xpu/slice_kernel.cc @@ -0,0 +1,116 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/slice_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/slice_utils.h" + +namespace phi { + +template +void SliceRawKernel(const Context& ctx, + const DenseTensor& input, + const std::vector& axes, + const IntArray& starts_t, + const IntArray& ends_t, + const std::vector& infer_flags, + const std::vector& decrease_axis, + DenseTensor* out) { + using XPUType = typename XPUTypeTrait::Type; + + // Step 1: Get the accurate attribute value of starts and ends + std::vector starts = starts_t.GetData(); + std::vector ends = ends_t.GetData(); + PADDLE_ENFORCE_EQ( + starts.size(), + axes.size(), + phi::errors::InvalidArgument( + "The size of starts must be equal to the size of axes.")); + PADDLE_ENFORCE_EQ(ends.size(), + axes.size(), + phi::errors::InvalidArgument( + "The size of ends must be equal to the size of axes.")); + + // Step 2: Compute output + auto in_dims = input.dims(); + auto out_dims = out->dims(); + auto slice_dims = out_dims; + + // 2.1 Infer output dims + for (size_t i = 0; i < axes.size(); ++i) { + // when start == -1 && end == start+1 + if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) { + auto ret = std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]); + if (ret != decrease_axis.end()) { + ends[i] = in_dims[axes[i]]; + } + } + } + + phi::funcs::CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends); + slice_dims = funcs::GetSliceDims( + in_dims, axes, starts, ends, nullptr, nullptr); + out_dims = funcs::GetDecreasedDims(slice_dims, decrease_axis); + + out->Resize(out_dims); + + // 2.2 Get output + size_t shape_size = in_dims.size(); + // the slice XPU kernel require that the length of `start`, `end` must be + // equal + // to the dims size of input tensor, therefore, if shape_size > + // axes.size(), the `starts_extension` and `ends_extension` is necessary. + std::vector starts_extension(shape_size, 0); + std::vector ends_extension(shape_size, 0); + if (shape_size > axes.size()) { + for (size_t i = 0; i < shape_size; ++i) { + ends_extension[i] = in_dims[i]; + } + for (size_t i = 0; i < axes.size(); ++i) { + starts_extension[axes[i]] = starts[i]; + ends_extension[axes[i]] = ends[i]; + } + } else { + for (size_t i = 0; i < axes.size(); ++i) { + starts_extension[i] = starts[i]; + ends_extension[i] = ends[i]; + } + } + + // prepare shape on XPU + std::vector shape(shape_size, 0); + for (size_t i = 0; i < shape_size; ++i) { + shape[i] = in_dims[i]; + } + + ctx.template Alloc(out); + int r = xpu::slice(ctx.x_context(), + reinterpret_cast(input.data()), + reinterpret_cast(out->data()), + shape, + starts_extension, + ends_extension); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "slice"); +} +} // namespace phi + +PD_REGISTER_KERNEL(slice, + XPU, + ALL_LAYOUT, + phi::SliceRawKernel, + float, + int, + phi::dtype::float16) {} -- GitLab