From 445fce62f27d965a3c0ede195238bb9719ea2921 Mon Sep 17 00:00:00 2001 From: taixiurong Date: Fri, 2 Sep 2022 15:51:45 +0800 Subject: [PATCH] =?UTF-8?q?xpu-paddlepaddle-38=20[=E4=BB=BB=E5=8A=A1]=20?= =?UTF-8?q?=E8=BF=81=E7=A7=BBbilinear=5Finterp=EF=BC=8Cnearest=5Finterp?= =?UTF-8?q?=E5=88=B0phi=20test=3Dkunlun=20(#45608)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddle/fluid/operators/interpolate_op_xpu.cc | 292 ---------------- .../fluid/operators/interpolate_v2_op_xpu.cc | 324 ------------------ .../phi/kernels/funcs/interpolate_function.h | 8 + .../kernels/xpu/interpolate_grad_kernel.cc | 236 +++++++++++++ paddle/phi/kernels/xpu/interpolate_kernel.cc | 234 +++++++++++++ 5 files changed, 478 insertions(+), 616 deletions(-) delete mode 100644 paddle/fluid/operators/interpolate_op_xpu.cc delete mode 100644 paddle/fluid/operators/interpolate_v2_op_xpu.cc create mode 100644 paddle/phi/kernels/xpu/interpolate_grad_kernel.cc create mode 100644 paddle/phi/kernels/xpu/interpolate_kernel.cc diff --git a/paddle/fluid/operators/interpolate_op_xpu.cc b/paddle/fluid/operators/interpolate_op_xpu.cc deleted file mode 100644 index 7f8fb5a23d3..00000000000 --- a/paddle/fluid/operators/interpolate_op_xpu.cc +++ /dev/null @@ -1,292 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/interpolate_op.h" - -#ifdef PADDLE_WITH_XPU - -namespace paddle { -namespace operators { - -using framework::Tensor; -using DataLayout = framework::DataLayout; - -inline std::vector get_new_shape_xpu( - const std::vector& list_new_shape_tensor) { - // get tensor from - std::vector vec_new_shape; - for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { - auto tensor = list_new_shape_tensor[i]; - PADDLE_ENFORCE_EQ( - tensor->dims(), - phi::make_ddim({1}), - platform::errors::InvalidArgument("shape of dim tensor should be [1]")); - if (platform::is_xpu_place(tensor->place())) { - framework::Tensor temp; - paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); - vec_new_shape.push_back(static_cast(*temp.data())); - } else { - vec_new_shape.push_back(static_cast(*tensor->data())); - } - } - - return vec_new_shape; -} - -template -inline std::vector get_new_data_from_tensor_xpu( - const Tensor* new_data_tensor) { - std::vector vec_new_data; - auto* new_data = new_data_tensor->data(); - framework::Tensor cpu_starts_tensor; - if (platform::is_xpu_place(new_data_tensor->place())) { - paddle::framework::TensorCopySync( - *new_data_tensor, platform::CPUPlace(), &cpu_starts_tensor); - new_data = cpu_starts_tensor.data(); - } - vec_new_data = std::vector(new_data, new_data + new_data_tensor->numel()); - return vec_new_data; -} - -template -class InterpolateXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - - auto input_dims = input->dims(); - PADDLE_ENFORCE_EQ( - input_dims.size(), - 4, - platform::errors::External("XPU Interpolate kernel only support 2d")); - - const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = - framework::StringToDataLayout(data_layout_str); - int n, c, in_d, in_h, in_w; - ExtractNCDWH(input_dims, data_layout, &n, &c, &in_d, &in_h, &in_w); - - auto interp_method = ctx.Attr("interp_method"); - bool align_corners = ctx.Attr("align_corners"); - int align_mode = ctx.Attr("align_mode"); - - int out_h = ctx.Attr("out_h"); - int out_w = ctx.Attr("out_w"); - - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); - if (list_new_size_tensor.size() > 0) { - // have size tensor - auto new_size = get_new_shape_xpu(list_new_size_tensor); - out_h = new_size[0]; - out_w = new_size[1]; - } else { - float scale; - auto scale_tensor = ctx.Input("Scale"); - if (scale_tensor != nullptr) { - auto scale_data = get_new_data_from_tensor_xpu(scale_tensor); - scale = scale_data[0]; - } else { - scale = ctx.Attr("scale"); - } - if (scale > 0) { - out_h = static_cast(in_h * scale); - out_w = static_cast(in_w * scale); - } - auto out_size = ctx.Input("OutSize"); - if (out_size != nullptr) { - auto out_size_data = get_new_data_from_tensor_xpu(out_size); - out_h = out_size_data[0]; - out_w = out_size_data[1]; - } - } - PADDLE_ENFORCE_GT( - out_h, - 0, - platform::errors::InvalidArgument("out_h in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT( - out_w, - 0, - platform::errors::InvalidArgument("out_w in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); - framework::DDim dim_out; - if (data_layout == DataLayout::kNCHW) { - dim_out = {n, c, out_h, out_w}; - } else { - dim_out = {n, out_h, out_w, c}; - } - output->mutable_data(dim_out, ctx.GetPlace()); - - if (in_h == out_h && in_w == out_w) { - framework::TensorCopy(*input, ctx.GetPlace(), output); - return; - } - bool nearest = "nearest" == interp_method; - int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2)); - auto& dev_ctx = ctx.template device_context(); - if (nearest) { - PADDLE_ENFORCE_EQ((data_layout == DataLayout::kNCHW), - true, - platform::errors::InvalidArgument( - "XPU nearest is only support NCHW")); - } - int r = xpu::interpolate2d(dev_ctx.x_context(), - input->data(), - output->data(), - n, - c, - in_h, - in_w, - out_h, - out_w, - nearest, - trans_mode, - (data_layout == DataLayout::kNCHW)); - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External("XPU interpolate2d kernel " - "return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } -}; - -template -class InterpolateGradXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); - - auto output_grad_dims = output_grad->dims(); - - PADDLE_ENFORCE_EQ(output_grad_dims.size(), - 4, - platform::errors::External( - "XPU Interpolategrad kernel only support 2d")); - - auto* input = ctx.Input("X"); - const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = - framework::StringToDataLayout(data_layout_str); - int n, c, in_d, in_h, in_w; - ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); - - auto interp_method = ctx.Attr("interp_method"); - bool align_corners = ctx.Attr("align_corners"); - int align_mode = ctx.Attr("align_mode"); - - int out_h = ctx.Attr("out_h"); - int out_w = ctx.Attr("out_w"); - float scale; - auto scale_tensor = ctx.Input("Scale"); - if (scale_tensor != nullptr) { - auto scale_data = get_new_data_from_tensor_xpu(scale_tensor); - scale = scale_data[0]; - } else { - scale = ctx.Attr("scale"); - } - if (scale > 0) { - out_h = static_cast(in_h * scale); - out_w = static_cast(in_w * scale); - } - auto out_size = ctx.Input("OutSize"); - if (out_size != nullptr) { - auto out_size_data = get_new_data_from_tensor_xpu(out_size); - out_h = out_size_data[0]; - out_w = out_size_data[1]; - } - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); - if (list_new_size_tensor.size() > 0) { - // have size tensor - auto new_size = get_new_shape_xpu(list_new_size_tensor); - out_h = new_size[0]; - out_w = new_size[1]; - } - - framework::DDim dim_grad; - if (data_layout == DataLayout::kNCHW) { - dim_grad = {n, c, in_h, in_w}; - } else { - dim_grad = {n, in_h, in_w, c}; - } - input_grad->mutable_data(dim_grad, ctx.GetPlace()); - - auto& dev_ctx = ctx.template device_context(); - - int r = XPU_SUCCESS; - r = xpu::constant(dev_ctx.x_context(), - input_grad->data(), - input_grad->numel(), - static_cast(0.0)); - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External( - "XPU constant in interpolate2d_grad kernel return " - "wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - - if (in_h == out_h && in_w == out_w) { - framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad); - return; - } - - bool nearest = "nearest" == interp_method; - int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2)); - - if (nearest) { - trans_mode = (align_corners) ? (0) : (2); - } - - r = xpu::interpolate2d_grad(dev_ctx.x_context(), - output_grad->data(), - input_grad->data(), - n, - c, - in_h, - in_w, - out_h, - out_w, - nearest, - trans_mode, - (data_layout == DataLayout::kNCHW)); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External("XPU interpolate2d_grad kernel return " - "wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_XPU_KERNEL(bilinear_interp, ops::InterpolateXPUKernel); -REGISTER_OP_XPU_KERNEL(nearest_interp, ops::InterpolateXPUKernel); - -REGISTER_OP_XPU_KERNEL(bilinear_interp_grad, - ops::InterpolateGradXPUKernel); -REGISTER_OP_XPU_KERNEL(nearest_interp_grad, - ops::InterpolateGradXPUKernel); -#endif diff --git a/paddle/fluid/operators/interpolate_v2_op_xpu.cc b/paddle/fluid/operators/interpolate_v2_op_xpu.cc deleted file mode 100644 index b2fc5ca48c2..00000000000 --- a/paddle/fluid/operators/interpolate_v2_op_xpu.cc +++ /dev/null @@ -1,324 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/interpolate_function.h" -#ifdef PADDLE_WITH_XPU - -namespace paddle { -namespace operators { - -using framework::Tensor; -using DataLayout = framework::DataLayout; - -inline std::vector get_new_shape_xpu( - const std::vector& list_new_shape_tensor) { - // get tensor from - std::vector vec_new_shape; - for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { - auto tensor = list_new_shape_tensor[i]; - PADDLE_ENFORCE_EQ( - tensor->dims(), - phi::make_ddim({1}), - platform::errors::InvalidArgument("shape of dim tensor should be [1]")); - framework::Tensor temp; - paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); - vec_new_shape.push_back(static_cast(*temp.data())); - } - - return vec_new_shape; -} - -template -class InterpolateV2XPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - - auto input_dims = input->dims(); - PADDLE_ENFORCE_EQ( - input_dims.size(), - 4, - platform::errors::External("XPU Interpolate kernel only support 2d")); - - const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = - framework::StringToDataLayout(data_layout_str); - int n, c, in_d, in_h, in_w; - phi::funcs::ExtractNCDWH( - input_dims, data_layout, &n, &c, &in_d, &in_h, &in_w); - - auto interp_method = ctx.Attr("interp_method"); - bool align_corners = ctx.Attr("align_corners"); - int align_mode = ctx.Attr("align_mode"); - - int out_h = ctx.Attr("out_h"); - int out_w = ctx.Attr("out_w"); - float scale_h = -1; - float scale_w = -1; - - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); - if (list_new_size_tensor.size() > 0) { - // have size tensor - auto new_size = get_new_shape_xpu(list_new_size_tensor); - out_h = new_size[0]; - out_w = new_size[1]; - } else { - auto scale_tensor = ctx.Input("Scale"); - auto scale = ctx.Attr>("scale"); - if (scale_tensor != nullptr) { - auto scale_data = - phi::funcs::get_new_data_from_tensor(scale_tensor); - if (scale_data.size() > 1) { - scale_h = scale_data[0]; - scale_w = scale_data[1]; - } else { - scale_h = scale_data[0]; - scale_w = scale_data[0]; - } - PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } else { - if (scale.size() > 1) { - scale_h = scale[0]; - scale_w = scale[1]; - - PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } - } - if (scale_h > 0. && scale_w > 0.) { - out_h = static_cast(in_h * scale_h); - out_w = static_cast(in_w * scale_w); - } - auto out_size = ctx.Input("OutSize"); - if (out_size != nullptr) { - auto out_size_data = - phi::funcs::get_new_data_from_tensor(out_size); - out_h = out_size_data[0]; - out_w = out_size_data[1]; - } - } - PADDLE_ENFORCE_GT( - out_h, - 0, - platform::errors::InvalidArgument("out_h in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT( - out_w, - 0, - platform::errors::InvalidArgument("out_w in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); - framework::DDim dim_out; - if (data_layout == DataLayout::kNCHW) { - dim_out = {n, c, out_h, out_w}; - } else { - dim_out = {n, out_h, out_w, c}; - } - output->mutable_data(dim_out, ctx.GetPlace()); - - if (in_h == out_h && in_w == out_w) { - framework::TensorCopy(*input, ctx.GetPlace(), output); - return; - } - bool nearest = "nearest" == interp_method; - int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2)); - auto& dev_ctx = ctx.template device_context(); - if (nearest) { - PADDLE_ENFORCE_EQ((data_layout == DataLayout::kNCHW), - true, - platform::errors::InvalidArgument( - "XPU nearest is only support NCHW")); - } - int r = xpu::interpolate2d(dev_ctx.x_context(), - input->data(), - output->data(), - n, - c, - in_h, - in_w, - out_h, - out_w, - nearest, - trans_mode, - (data_layout == DataLayout::kNCHW)); - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External("XPU interpolate2d kernel " - "return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } -}; - -template -class InterpolateV2GradXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); - - auto output_grad_dims = output_grad->dims(); - - PADDLE_ENFORCE_EQ(output_grad_dims.size(), - 4, - platform::errors::External( - "XPU Interpolategrad kernel only support 2d")); - - auto* input = ctx.Input("X"); - const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = - framework::StringToDataLayout(data_layout_str); - int n, c, in_d, in_h, in_w; - phi::funcs::ExtractNCDWH( - input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); - - auto interp_method = ctx.Attr("interp_method"); - bool align_corners = ctx.Attr("align_corners"); - int align_mode = ctx.Attr("align_mode"); - - int out_h = ctx.Attr("out_h"); - int out_w = ctx.Attr("out_w"); - float scale_h = -1; - float scale_w = -1; - - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); - if (list_new_size_tensor.size() > 0) { - // have size tensor - auto new_size = get_new_shape_xpu(list_new_size_tensor); - out_h = new_size[0]; - out_w = new_size[1]; - } else { - auto scale_tensor = ctx.Input("Scale"); - auto scale = ctx.Attr>("scale"); - if (scale_tensor != nullptr) { - auto scale_data = - phi::funcs::get_new_data_from_tensor(scale_tensor); - if (scale_data.size() > 1) { - scale_h = scale_data[0]; - scale_w = scale_data[1]; - } else { - scale_h = scale_data[0]; - scale_w = scale_data[0]; - } - PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } else { - if (scale.size() > 1) { - scale_h = scale[0]; - scale_w = scale[1]; - - PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } - } - if (scale_h > 0. && scale_w > 0.) { - out_h = static_cast(in_h * scale_h); - out_w = static_cast(in_w * scale_w); - } - auto out_size = ctx.Input("OutSize"); - if (out_size != nullptr) { - auto out_size_data = - phi::funcs::get_new_data_from_tensor(out_size); - out_h = out_size_data[0]; - out_w = out_size_data[1]; - } - } - - framework::DDim dim_grad; - if (data_layout == DataLayout::kNCHW) { - dim_grad = {n, c, in_h, in_w}; - } else { - dim_grad = {n, in_h, in_w, c}; - } - input_grad->mutable_data(dim_grad, ctx.GetPlace()); - - auto& dev_ctx = ctx.template device_context(); - - int r = XPU_SUCCESS; - r = xpu::constant(dev_ctx.x_context(), - input_grad->data(), - input_grad->numel(), - static_cast(0.0)); - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External( - "XPU constant in interpolate2d_grad kernel return " - "wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - - if (in_h == out_h && in_w == out_w) { - framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad); - return; - } - - bool nearest = "nearest" == interp_method; - int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2)); - - if (nearest) { - trans_mode = (align_corners) ? (0) : (2); - } - - r = xpu::interpolate2d_grad(dev_ctx.x_context(), - output_grad->data(), - input_grad->data(), - n, - c, - in_h, - in_w, - out_h, - out_w, - nearest, - trans_mode, - (data_layout == DataLayout::kNCHW)); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External("XPU interpolate2d_grad kernel return " - "wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_XPU_KERNEL(bilinear_interp_v2, ops::InterpolateV2XPUKernel); -REGISTER_OP_XPU_KERNEL(nearest_interp_v2, ops::InterpolateV2XPUKernel); - -REGISTER_OP_XPU_KERNEL(bilinear_interp_v2_grad, - ops::InterpolateV2GradXPUKernel); -REGISTER_OP_XPU_KERNEL(nearest_interp_v2_grad, - ops::InterpolateV2GradXPUKernel); -#endif diff --git a/paddle/phi/kernels/funcs/interpolate_function.h b/paddle/phi/kernels/funcs/interpolate_function.h index ecc2d0f893c..42adc94a642 100644 --- a/paddle/phi/kernels/funcs/interpolate_function.h +++ b/paddle/phi/kernels/funcs/interpolate_function.h @@ -91,6 +91,14 @@ inline std::vector get_new_shape( errors::InvalidArgument("The shape of dimension tensor should be [1]," "but received d%.", tensor->dims())); +#ifdef PADDLE_WITH_XPU + if (tensor->place().GetType() == phi::AllocationType::XPU) { + DenseTensor temp; + paddle::framework::TensorCopySync(*tensor, phi::CPUPlace(), &temp); + vec_new_shape.push_back(static_cast(*temp.data())); + continue; + } +#endif if (paddle::platform::is_gpu_place(tensor->place())) { DenseTensor temp; paddle::framework::TensorCopySync( diff --git a/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc b/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc new file mode 100644 index 00000000000..33f0e3e948f --- /dev/null +++ b/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc @@ -0,0 +1,236 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/interpolate_grad_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/common/layout.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/interpolate_function.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { + +template +void InterpolateGradKernel( + const Context& dev_ctx, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const DenseTensor& output_grad, + const std::string& data_layout_str, + int out_d, + int out_h, + int out_w, + const std::vector& scale, + const std::string& interp_method, + bool align_corners, + int align_mode, + DenseTensor* x_grad) { + const DataLayout data_layout = + paddle::framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); + + float scale_h = -1; + float scale_w = -1; + if (scale_tensor) { + auto scale_data = + funcs::get_new_data_from_tensor(scale_tensor.get_ptr()); + if (scale_data.size() > 1) { + scale_h = scale_data[0]; + scale_w = scale_data[1]; + } else { + scale_w = scale_data[0]; + scale_h = scale_data[0]; + } + PADDLE_ENFORCE_EQ( + scale_w > 0, + true, + errors::InvalidArgument( + "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, + true, + errors::InvalidArgument( + "The scale_h in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + } else { + if (scale.size() > 1) { + scale_h = scale[0]; + scale_w = scale[1]; + PADDLE_ENFORCE_EQ( + scale_w > 0, + true, + errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, + true, + errors::InvalidArgument( + "The scale_h in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + } + } + if (scale_h > 0. && scale_w > 0.) { + out_h = static_cast(in_h * scale_h); + out_w = static_cast(in_w * scale_w); + } + if (out_size) { + auto out_size_data = + funcs::get_new_data_from_tensor(out_size.get_ptr()); + out_h = out_size_data[0]; + out_w = out_size_data[1]; + } + if (size_tensor && size_tensor->size() > 0) { + // have size tensor + auto new_size = funcs::get_new_shape(size_tensor.get()); + out_h = new_size[0]; + out_w = new_size[1]; + } + + phi::DDim dim_grad; + if (data_layout == DataLayout::kNCHW) { + dim_grad = {n, c, in_h, in_w}; + } else { + dim_grad = {n, in_h, in_w, c}; + } + + x_grad->Resize(dim_grad); + dev_ctx.template Alloc(x_grad); + + int r = XPU_SUCCESS; + r = xpu::constant(dev_ctx.x_context(), + x_grad->data(), + x_grad->numel(), + static_cast(0.0)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + + if (in_h == out_h && in_w == out_w) { + phi::Copy(dev_ctx, output_grad, dev_ctx.GetPlace(), false, x_grad); + return; + } + + bool nearest = "nearest" == interp_method; + int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2)); + + if (nearest) { + trans_mode = (align_corners) ? (0) : (2); + } + + r = xpu::interpolate2d_grad(dev_ctx.x_context(), + output_grad.data(), + x_grad->data(), + n, + c, + in_h, + in_w, + out_h, + out_w, + nearest, + trans_mode, + (data_layout == DataLayout::kNCHW)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "interpolate2d_grad"); +} + +template +void BilinearInterpGradKernel( + const Context& dev_ctx, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const DenseTensor& out_grad, + const std::string& data_layout, + int out_d, + int out_h, + int out_w, + const std::vector& scale, + const std::string& interp_method, + bool align_corners, + int align_mode, + DenseTensor* x_grad) { + InterpolateGradKernel(dev_ctx, + x, + out_size, + size_tensor, + scale_tensor, + out_grad, + data_layout, + out_d, + out_h, + out_w, + scale, + interp_method, + align_corners, + align_mode, + x_grad); +} + +template +void NearestInterpGradKernel( + const Context& dev_ctx, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const DenseTensor& out_grad, + const std::string& data_layout, + int out_d, + int out_h, + int out_w, + const std::vector& scale, + const std::string& interp_method, + bool align_corners, + int align_mode, + DenseTensor* x_grad) { + InterpolateGradKernel(dev_ctx, + x, + out_size, + size_tensor, + scale_tensor, + out_grad, + data_layout, + out_d, + out_h, + out_w, + scale, + interp_method, + align_corners, + align_mode, + x_grad); +} + +} // namespace phi + +PD_REGISTER_KERNEL(bilinear_interp_grad, + XPU, + ALL_LAYOUT, + phi::BilinearInterpGradKernel, + float) { + kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND); + kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND); +} +PD_REGISTER_KERNEL( + nearest_interp_grad, XPU, ALL_LAYOUT, phi::NearestInterpGradKernel, float) { + kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND); + kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND); +} diff --git a/paddle/phi/kernels/xpu/interpolate_kernel.cc b/paddle/phi/kernels/xpu/interpolate_kernel.cc new file mode 100644 index 00000000000..e84ede46158 --- /dev/null +++ b/paddle/phi/kernels/xpu/interpolate_kernel.cc @@ -0,0 +1,234 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/interpolate_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/common/layout.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/interpolate_function.h" + +namespace phi { + +template +void InterpolateKernel( + const Context& ctx, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const std::string& data_layout_str, + int out_d, + int out_h, + int out_w, + const std::vector& scale, + const std::string& interp_method, + bool align_corners, + int align_mode, + DenseTensor* output) { + const DataLayout data_layout = + paddle::framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + phi::funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); + + float scale_h = -1; + float scale_w = -1; + + if (size_tensor && size_tensor->size() > 0) { + // have size tensor + auto new_size = funcs::get_new_shape(size_tensor.get()); + out_h = new_size[0]; + out_w = new_size[1]; + } else { + if (scale_tensor) { + auto scale_data = + funcs::get_new_data_from_tensor(scale_tensor.get_ptr()); + if (scale_data.size() > 1) { + scale_h = scale_data[0]; + scale_w = scale_data[1]; + } else { + scale_h = scale_data[0]; + scale_w = scale_data[0]; + } + PADDLE_ENFORCE_EQ( + scale_w > 0, + true, + errors::InvalidArgument( + "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, + true, + errors::InvalidArgument( + "The scale_h in input 'Scale' Tensor of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + } else { + if (scale.size() > 1) { + scale_h = scale[0]; + scale_w = scale[1]; + + PADDLE_ENFORCE_EQ( + scale_w > 0, + true, + errors::InvalidArgument( + "The scale_w in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_w)); + PADDLE_ENFORCE_EQ( + scale_h > 0, + true, + errors::InvalidArgument( + "The scale_h in Attr(scale) of Operator(interpolate) " + "should be greater than 0, but received value is %d.", + scale_h)); + } + } + if (scale_h > 0. && scale_w > 0.) { + out_h = static_cast(in_h * scale_h); + out_w = static_cast(in_w * scale_w); + } + if (out_size) { + auto out_size_data = + funcs::get_new_data_from_tensor(out_size.get_ptr()); + out_h = out_size_data[0]; + out_w = out_size_data[1]; + } + } + PADDLE_ENFORCE_GT( + out_h, + 0, + errors::InvalidArgument("out_h in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT( + out_w, + 0, + errors::InvalidArgument("out_w in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); + + phi::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {n, c, out_h, out_w}; + } else { + dim_out = {n, out_h, out_w, c}; + } + output->Resize(dim_out); + ctx.template Alloc(output); + + if (in_h == out_h && in_w == out_w) { + phi::Copy(ctx, x, ctx.GetPlace(), false, output); + return; + } + bool nearest = "nearest" == interp_method; + int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2)); + if (nearest) { + PADDLE_ENFORCE_EQ( + (data_layout == DataLayout::kNCHW), + true, + errors::InvalidArgument("XPU nearest is only support NCHW")); + } + + int r = xpu::interpolate2d(ctx.x_context(), + x.data(), + output->data(), + n, + c, + in_h, + in_w, + out_h, + out_w, + nearest, + trans_mode, + (data_layout == DataLayout::kNCHW)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "interpolate2d"); +} + +template +void BilinearInterpKernel( + const Context& ctx, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const std::string& data_layout, + int out_d, + int out_h, + int out_w, + const std::vector& scale, + const std::string& interp_method, + bool align_corners, + int align_mode, + DenseTensor* output) { + InterpolateKernel(ctx, + x, + out_size, + size_tensor, + scale_tensor, + data_layout, + out_d, + out_h, + out_w, + scale, + interp_method, + align_corners, + align_mode, + output); +} + +template +void NearestInterpKernel( + const Context& ctx, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const std::string& data_layout, + int out_d, + int out_h, + int out_w, + const std::vector& scale, + const std::string& interp_method, + bool align_corners, + int align_mode, + DenseTensor* output) { + InterpolateKernel(ctx, + x, + out_size, + size_tensor, + scale_tensor, + data_layout, + out_d, + out_h, + out_w, + scale, + interp_method, + align_corners, + align_mode, + output); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + bilinear_interp, XPU, ALL_LAYOUT, phi::BilinearInterpKernel, float) { + kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND); + kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND); +} +PD_REGISTER_KERNEL( + nearest_interp, XPU, ALL_LAYOUT, phi::NearestInterpKernel, float) { + kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND); + kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND); +} -- GitLab