From be2884eb29e52a2277908dd188b4a600d7f22419 Mon Sep 17 00:00:00 2001 From: zhulei <563755780@qq.com> Date: Thu, 4 Nov 2021 11:52:05 +0800 Subject: [PATCH] [NPU] Add bilinear_interpolate_v2 (#36971) --- .../fluid/operators/interpolate_v2_op_npu.cc | 424 ++++++++++++++++-- .../fluid/tests/unittests/npu/CMakeLists.txt | 1 + .../npu/test_bilinear_interp_v2_op_npu.py | 279 ++++++++++++ 3 files changed, 666 insertions(+), 38 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py diff --git a/paddle/fluid/operators/interpolate_v2_op_npu.cc b/paddle/fluid/operators/interpolate_v2_op_npu.cc index b30c7ac810c..24ad6746ced 100644 --- a/paddle/fluid/operators/interpolate_v2_op_npu.cc +++ b/paddle/fluid/operators/interpolate_v2_op_npu.cc @@ -20,6 +20,369 @@ namespace operators { using Tensor = framework::Tensor; using DataLayout = framework::DataLayout; +using DDim = framework::DDim; +using fp16 = paddle::platform::float16; + +template +struct InterpolateFunction { + public: + explicit InterpolateFunction(const framework::ExecutionContext& ctx) + : ctx(ctx) { + place = ctx.GetPlace(); + stream = ctx.template device_context() + .stream(); + t0.mutable_data({1}, place); + t1.mutable_data({1}, place); + tn.mutable_data({1}, place); + FillNpuTensorWithConstant(&t0, static_cast(0)); + FillNpuTensorWithConstant(&t1, static_cast(1)); + } + void Arange(int n, Tensor* x) { + FillNpuTensorWithConstant(&tn, static_cast(n)); + const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {*x}, {}); + runner.Run(stream); + } + void ReduceSum(const Tensor* x, Tensor* y, const std::vector& dim, + bool keep_dims = true) { + const auto& runner = NpuOpRunner("ReduceSumD", {*x}, {*y}, + {{"axes", dim}, {"keep_dims", keep_dims}}); + runner.Run(stream); + } + void Add(const Tensor* x, const Tensor* y, Tensor* z) { + const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Adds(const Tensor* x, float scalar, Tensor* y) { + const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); + runner.Run(stream); + } + void Mul(const Tensor* x, const Tensor* y, Tensor* z) { + const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Sub(const Tensor* x, const Tensor* y, Tensor* z) { + const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Cast(const Tensor* x, Tensor* y) { + auto dst_dtype = ConvertToNpuDtype(y->type()); + const auto& runner = NpuOpRunner( + "Cast", {*x}, {*y}, {{"dst_type", static_cast(dst_dtype)}}); + runner.Run(stream); + } + void Gather(const Tensor* x, const Tensor* indices, const int axis, + Tensor* y) { + const auto& runner = + NpuOpRunner("GatherV2D", {*x, *indices}, {*y}, {{"axis", axis}}); + runner.Run(stream); + } + void GatherGrad(const Tensor* gy, const Tensor* indices, const int axis, + Tensor* gx) { + // 1 gy swapaxis: axis & 0 + int len = (gy->dims()).size(); + std::vector axis_swap(len); + for (int i = 0; i < len; i++) { + axis_swap[i] = i; + } + axis_swap[0] = axis; + axis_swap[axis] = 0; + auto y_new_shape = gy->dims(); + auto yt = y_new_shape[axis]; + y_new_shape[axis] = y_new_shape[0]; + y_new_shape[0] = yt; + Tensor gy_t; + gy_t.mutable_data(y_new_shape, place); + Transpose(gy, &gy_t, axis_swap); + // 2 scatter + auto x_new_shape = gx->dims(); + auto xt = x_new_shape[axis]; + x_new_shape[axis] = x_new_shape[0]; + x_new_shape[0] = xt; + Tensor gx_zero, gx_t; + gx_zero.mutable_data(x_new_shape, place); + gx_t.mutable_data(x_new_shape, place); + FillNpuTensorWithConstant(&gx_zero, static_cast(0)); + gx_zero.Resize(x_new_shape); + Scatter(&gx_zero, indices, &gy_t, &gx_t); + // 3 gx swapaxis: axis, 0 + Transpose(&gx_t, gx, axis_swap); + } + void Scatter(const Tensor* x, const Tensor* index, const Tensor* updates, + Tensor* y) { + const auto& runner = + NpuOpRunner("TensorScatterAdd", {*x, *index, *updates}, {*y}, {}); + runner.Run(stream); + } + void Transpose(const Tensor* x, Tensor* y, const std::vector& axis) { + const auto& runner = + NpuOpRunner("TransposeD", {*x}, {*y}, {{"perm", axis}}); + runner.Run(stream); + } + void Muls(const Tensor* x, float scalar, Tensor* y) { + const auto& runner = NpuOpRunner("Muls", {*x}, {*y}, {{"value", scalar}}); + runner.Run(stream); + } + void Maximum(const Tensor* x, const Tensor* y, Tensor* z) { + const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Minimum(const Tensor* x, const Tensor* y, Tensor* z) { + const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Floor(const Tensor* x, Tensor* y) { + const auto& runner = NpuOpRunner("Floor", {*x}, {*y}, {}); + runner.Run(stream); + } + + private: + platform::Place place; + aclrtStream stream; + const framework::ExecutionContext& ctx; + Tensor t0; + Tensor t1; + Tensor tn; +}; + +template <> +void InterpolateFunction::Arange(int n, Tensor* x) { + Tensor x_fp32(framework::proto::VarType::FP32); + x_fp32.mutable_data(x->dims(), place); + FillNpuTensorWithConstant(&tn, static_cast(n)); + const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {}); + runner.Run(stream); + Cast(&x_fp32, x); +} + +void InterpolateParamCompute(const float scale_h, const float scale_w, + const bool align_corners, const int align_mode, + const DataLayout& data_layout, const DDim& indim, + const DDim& outdim, int* axis_h, int* axis_w, + int* in_h, int* in_w, int* out_h, int* out_w, + float* ratio_h, float* ratio_w) { + if (data_layout == DataLayout::kNCHW) { + *axis_h = 2; + *axis_w = 3; + } else { + *axis_h = 1; + *axis_w = 2; + } + *out_h = outdim[*axis_h]; + *out_w = outdim[*axis_w]; + *in_h = indim[*axis_h]; + *in_w = indim[*axis_w]; + *ratio_h = 0.0f; + *ratio_w = 0.0f; + if (*out_h > 1) { + *ratio_h = + align_corners + ? static_cast(*in_h - 1) / (*out_h - 1) + : (scale_h > 0 ? 1 / scale_h : static_cast(*in_h) / *out_h); + } + if (*out_w > 1) { + *ratio_w = + align_corners + ? static_cast(*in_w - 1) / (*out_w - 1) + : (scale_w > 0 ? 1 / scale_w : static_cast(*in_w) / *out_w); + } +} + +template +void BilinearParamTensorCompute(const framework::ExecutionContext& ctx, + const DataLayout& data_layout, int in_h, + int in_w, int out_h, int out_w, bool align_cond, + float ratio_h, float ratio_w, Tensor* h0, + Tensor* h1, Tensor* w0, Tensor* w1, + Tensor* coef_h0, Tensor* coef_h1, + Tensor* coef_w0, Tensor* coef_w1) { + InterpolateFunction F(ctx); + auto place = ctx.GetPlace(); + Tensor _h0, _w0; + _h0.mutable_data({out_h}, place); + _w0.mutable_data({out_w}, place); + F.Arange(out_h, &_h0); + F.Arange(out_w, &_w0); + if (align_cond) { + F.Adds(&_h0, static_cast(0.5), &_h0); + F.Adds(&_w0, static_cast(0.5), &_w0); + F.Muls(&_h0, ratio_h, &_h0); + F.Muls(&_w0, ratio_w, &_w0); + F.Adds(&_h0, static_cast(-0.5), &_h0); + F.Adds(&_w0, static_cast(-0.5), &_w0); + } else { + F.Muls(&_h0, ratio_h, &_h0); + F.Muls(&_w0, ratio_w, &_w0); + } + + Tensor zero_t; + Tensor one_t; + zero_t.mutable_data({1}, place); + one_t.mutable_data({1}, place); + FillNpuTensorWithConstant(&zero_t, static_cast(0)); + FillNpuTensorWithConstant(&one_t, static_cast(1)); + F.Maximum(&_h0, &zero_t, &_h0); + F.Maximum(&_w0, &zero_t, &_w0); + + Tensor _h0_floor, _w0_floor; + _h0_floor.mutable_data({out_h}, place); + _w0_floor.mutable_data({out_w}, place); + F.Floor(&_h0, &_h0_floor); + F.Floor(&_w0, &_w0_floor); + F.Cast(&_h0_floor, h0); + F.Cast(&_w0_floor, w0); + + Tensor one_int; + one_int.mutable_data({1}, place); + FillNpuTensorWithConstant(&one_int, static_cast(1)); + F.Add(h0, &one_int, h1); + F.Add(w0, &one_int, w1); + Tensor t_max_h, t_max_w; + t_max_h.mutable_data({1}, place); + t_max_w.mutable_data({1}, place); + FillNpuTensorWithConstant(&t_max_h, static_cast(in_h - 1)); + FillNpuTensorWithConstant(&t_max_w, static_cast(in_w - 1)); + F.Minimum(h1, &t_max_h, h1); + F.Minimum(w1, &t_max_w, w1); + + F.Sub(&_h0, &_h0_floor, coef_h1); + F.Sub(&_w0, &_w0_floor, coef_w1); + F.Sub(&one_t, coef_h1, coef_h0); + F.Sub(&one_t, coef_w1, coef_w0); + + if (data_layout == DataLayout::kNCHW) { + coef_h0->Resize({out_h, 1}); + coef_h1->Resize({out_h, 1}); + } else { + coef_h0->Resize({out_h, 1, 1}); + coef_h1->Resize({out_h, 1, 1}); + coef_w0->Resize({out_w, 1}); + coef_w1->Resize({out_w, 1}); + } +} + +template +void BilinearFwdNpu(const framework::ExecutionContext& ctx, const Tensor* input, + Tensor* output, const float scale_h, const float scale_w, + const bool align_corners, const int align_mode, + const DataLayout& data_layout) { + InterpolateFunction F(ctx); + auto place = ctx.GetPlace(); + auto outdim = output->dims(); + auto indim = input->dims(); + + int axis_h, axis_w; + int out_h, out_w, in_h, in_w; + float ratio_h, ratio_w; + InterpolateParamCompute(scale_h, scale_w, align_corners, align_mode, + data_layout, indim, outdim, &axis_h, &axis_w, &in_h, + &in_w, &out_h, &out_w, &ratio_h, &ratio_w); + + Tensor h0, h1, w0, w1; + h0.mutable_data({out_h}, place); + h1.mutable_data({out_h}, place); + w0.mutable_data({out_w}, place); + w1.mutable_data({out_w}, place); + Tensor coef_h0, coef_h1, coef_w0, coef_w1; + coef_h0.mutable_data({out_h}, place); + coef_h1.mutable_data({out_h}, place); + coef_w0.mutable_data({out_w}, place); + coef_w1.mutable_data({out_w}, place); + bool align_cond = align_mode == 0 && !align_corners; + BilinearParamTensorCompute(ctx, data_layout, in_h, in_w, out_h, out_w, + align_cond, ratio_h, ratio_w, &h0, &h1, &w0, + &w1, &coef_h0, &coef_h1, &coef_w0, &coef_w1); + + Tensor input_gather_h0, input_gather_h1; + auto dim_gather_h = indim; + dim_gather_h[axis_h] = out_h; + input_gather_h0.mutable_data(dim_gather_h, place); + input_gather_h1.mutable_data(dim_gather_h, place); + + F.Gather(input, &h0, axis_h, &input_gather_h0); + F.Gather(input, &h1, axis_h, &input_gather_h1); + + F.Mul(&input_gather_h0, &coef_h0, &input_gather_h0); + F.Mul(&input_gather_h1, &coef_h1, &input_gather_h1); + Tensor out_x4; + out_x4.mutable_data({4, outdim[0], outdim[1], outdim[2], outdim[3]}, + place); + Tensor input_gather_h0_w0 = out_x4.Slice(0, 1); + Tensor input_gather_h0_w1 = out_x4.Slice(1, 2); + Tensor input_gather_h1_w0 = out_x4.Slice(2, 3); + Tensor input_gather_h1_w1 = out_x4.Slice(3, 4); + F.Gather(&input_gather_h0, &w0, axis_w, &input_gather_h0_w0); + F.Gather(&input_gather_h0, &w1, axis_w, &input_gather_h0_w1); + F.Gather(&input_gather_h1, &w0, axis_w, &input_gather_h1_w0); + F.Gather(&input_gather_h1, &w1, axis_w, &input_gather_h1_w1); + F.Mul(&input_gather_h0_w0, &coef_w0, &input_gather_h0_w0); + F.Mul(&input_gather_h0_w1, &coef_w1, &input_gather_h0_w1); + F.Mul(&input_gather_h1_w0, &coef_w0, &input_gather_h1_w0); + F.Mul(&input_gather_h1_w1, &coef_w1, &input_gather_h1_w1); + F.ReduceSum(&out_x4, output, std::vector{0}, false); +} + +template +void BilinearBwdNpu(const framework::ExecutionContext& ctx, const Tensor* gout, + Tensor* gin, const float scale_h, const float scale_w, + const bool align_corners, const int align_mode, + const DataLayout& data_layout) { + InterpolateFunction F(ctx); + auto place = ctx.GetPlace(); + auto outdim = gout->dims(); + auto indim = gin->dims(); + + int axis_h, axis_w; + int out_h, out_w, in_h, in_w; + float ratio_h, ratio_w; + InterpolateParamCompute(scale_h, scale_w, align_corners, align_mode, + data_layout, indim, outdim, &axis_h, &axis_w, &in_h, + &in_w, &out_h, &out_w, &ratio_h, &ratio_w); + + Tensor h0, h1, w0, w1; + h0.mutable_data({out_h}, place); + h1.mutable_data({out_h}, place); + w0.mutable_data({out_w}, place); + w1.mutable_data({out_w}, place); + Tensor coef_h0, coef_h1, coef_w0, coef_w1; + coef_h0.mutable_data({out_h}, place); + coef_h1.mutable_data({out_h}, place); + coef_w0.mutable_data({out_w}, place); + coef_w1.mutable_data({out_w}, place); + bool align_cond = align_mode == 0 && !align_corners; + BilinearParamTensorCompute(ctx, data_layout, in_h, in_w, out_h, out_w, + align_cond, ratio_h, ratio_w, &h0, &h1, &w0, + &w1, &coef_h0, &coef_h1, &coef_w0, &coef_w1); + + Tensor gy_w0, gy_w1; + gy_w0.mutable_data(outdim, place); + gy_w1.mutable_data(outdim, place); + F.Mul(gout, &coef_w0, &gy_w0); + F.Mul(gout, &coef_w1, &gy_w1); + + auto dim_gather_h = indim; + dim_gather_h[axis_h] = out_h; + Tensor g_gather_w0, g_gather_w1; + g_gather_w0.mutable_data(dim_gather_h, place); + g_gather_w1.mutable_data(dim_gather_h, place); + w0.Resize({out_w, 1}); + w1.Resize({out_w, 1}); + F.GatherGrad(&gy_w0, &w0, axis_w, &g_gather_w0); + F.GatherGrad(&gy_w1, &w1, axis_w, &g_gather_w1); + + F.Add(&g_gather_w0, &g_gather_w1, &g_gather_w0); + F.Mul(&g_gather_w0, &coef_h1, &g_gather_w1); + F.Mul(&g_gather_w0, &coef_h0, &g_gather_w0); + + Tensor gx_0, gx_1; + gx_0.mutable_data(indim, place); + gx_1.mutable_data(indim, place); + h0.Resize({out_h, 1}); + h1.Resize({out_h, 1}); + F.GatherGrad(&g_gather_w0, &h0, axis_h, &gx_0); + F.GatherGrad(&g_gather_w1, &h1, axis_h, &gx_1); + + F.Add(&gx_0, &gx_1, gin); +} template class InterpolateV2NPUKernel : public framework::OpKernel { @@ -39,19 +402,6 @@ class InterpolateV2NPUKernel : public framework::OpKernel { int n, c, in_d, in_h, in_w; ExtractNCDWH(input_dims, data_layout, &n, &c, &in_d, &in_h, &in_w); - PADDLE_ENFORCE_EQ( - input->layout(), data_layout, - platform::errors::InvalidArgument( - "Interpolate OP's input tensor layout should equal to attr " - "data_layout, but got tensor layout <%s>, attr layout <%s>", - framework::DataLayoutToString(input->layout()), data_layout_str)); - PADDLE_ENFORCE_EQ( - output->layout(), data_layout, - platform::errors::InvalidArgument( - "Interpolate OP's output tensor layout should equal to attr " - "data_layout, but got tensor layout <%s>, attr layout <%s>", - framework::DataLayoutToString(output->layout()), data_layout_str)); - auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -156,17 +506,22 @@ class InterpolateV2NPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - NpuOpRunner runner; // To-do(qili93): need to support bilineare, try ResizeD + // Add bilineare by zhulei if ("nearest" == interp_method) { + NpuOpRunner runner; runner.SetType("ResizeNearestNeighborV2") .AddInput(*input) .AddInput(std::vector{out_h, out_w}) .AddOutput(*output) .AddAttr("align_corners", align_corners) .AddAttr("half_pixel_centers", false); + runner.Run(stream); + } else if ("bilinear" == interp_method) { + int align_mode = ctx.Attr("align_mode"); + BilinearFwdNpu(ctx, input, output, scale_h, scale_w, align_corners, + align_mode, data_layout); } - runner.Run(stream); } }; @@ -184,27 +539,6 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel { int n, c, in_d, in_h, in_w; ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); - PADDLE_ENFORCE_EQ( - input->layout(), data_layout, - platform::errors::InvalidArgument( - "Interpolate OP's input tensor layout should equal to attr " - "data_layout, but got tensor layout <%s>, attr layout <%s>", - framework::DataLayoutToString(input->layout()), data_layout_str)); - PADDLE_ENFORCE_EQ(output_grad->layout(), data_layout, - platform::errors::InvalidArgument( - "Interpolate OP's output_grad tensor layout should " - "equal to attr data_layout, but got tensor layout is " - "<%s>, and attr layout is <%s>", - framework::DataLayoutToString(output_grad->layout()), - data_layout_str)); - PADDLE_ENFORCE_EQ(input_grad->layout(), data_layout, - platform::errors::InvalidArgument( - "Interpolate OP's input_grad tensor layout should " - "equal to attr data_layout, but got tensor layout is " - "<%s>, and attr layout is <%s>", - framework::DataLayoutToString(input_grad->layout()), - data_layout_str)); - auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -301,17 +635,21 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel { ctx.template device_context() .stream(); - NpuOpRunner runner; // To-do(qili93): need to support bilineare, try ResizeGradD if ("nearest" == interp_method) { + NpuOpRunner runner; runner.SetType("ResizeNearestNeighborV2Grad") .AddInput(*output_grad) .AddInput(std::vector{in_h, in_w}) .AddOutput(*input_grad) .AddAttr("align_corners", align_corners) .AddAttr("half_pixel_centers", false); + runner.Run(stream); + } else if ("bilinear" == interp_method) { + int align_mode = ctx.Attr("align_mode"); + BilinearBwdNpu(ctx, output_grad, input_grad, scale_h, scale_w, + align_corners, align_mode, data_layout); } - runner.Run(stream); } }; @@ -330,3 +668,13 @@ REGISTER_OP_NPU_KERNEL( nearest_interp_v2_grad, ops::InterpolateV2NPUGradKernel, ops::InterpolateV2NPUGradKernel); + +REGISTER_OP_NPU_KERNEL( + bilinear_interp_v2, + ops::InterpolateV2NPUKernel, + ops::InterpolateV2NPUKernel); + +REGISTER_OP_NPU_KERNEL( + bilinear_interp_v2_grad, + ops::InterpolateV2NPUGradKernel, + ops::InterpolateV2NPUGradKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt b/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt index 4e81bb9544c..8e31d58195b 100644 --- a/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt @@ -17,6 +17,7 @@ if (WITH_ASCEND_CL) # Note: the following test cases has running time more than 120s set_tests_properties(test_nearest_interp_op_npu PROPERTIES TIMEOUT 200) set_tests_properties(test_nearest_interp_v2_op_npu PROPERTIES TIMEOUT 200) + set_tests_properties(test_bilinear_interp_v2_op_npu PROPERTIES TIMEOUT 200) set_tests_properties(test_stack_op_npu PROPERTIES TIMEOUT 300) set_tests_properties(test_conv2d_transpose_op_npu PROPERTIES TIMEOUT 200) set_tests_properties(test_conv2d_op_npu PROPERTIES TIMEOUT 300) diff --git a/python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py new file mode 100644 index 00000000000..6da49b8d84d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py @@ -0,0 +1,279 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test import OpTest +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.nn.functional import interpolate +import paddle + +from test_bilinear_interp_v2_op import bilinear_interp_np + +paddle.enable_static() + + +class TestBilinearInterpOp(OpTest): + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def setUp(self): + self.set_npu() + self.out_size = None + self.actual_shape = None + self.data_layout = 'NCHW' + self.init_test_case() + self.op_type = "bilinear_interp_v2" + input_np = np.random.random(self.input_shape).astype(self.dtype) + + if self.data_layout == "NCHW": + in_h = self.input_shape[2] + in_w = self.input_shape[3] + else: + in_h = self.input_shape[1] + in_w = self.input_shape[2] + scale_h = 0 + scale_w = 0 + if self.scale: + if isinstance(self.scale, float) or isinstance(self.scale, int): + if self.scale > 0.: + scale_h = scale_w = float(self.scale) + if isinstance(self.scale, list) and len(self.scale) == 1: + scale_w = scale_h = self.scale[0] + elif isinstance(self.scale, list) and len(self.scale) > 1: + scale_w = self.scale[1] + scale_h = self.scale[0] + out_h = int(in_h * scale_h) + out_w = int(in_w * scale_w) + else: + out_h = self.out_h + out_w = self.out_w + + output_np = bilinear_interp_np(input_np, out_h, out_w, scale_w, scale_h, + self.out_size, self.actual_shape, + self.align_corners, self.align_mode, + self.data_layout) + + self.inputs = {'X': input_np} + if self.out_size is not None: + self.inputs['OutSize'] = self.out_size + if self.actual_shape is not None: + self.inputs['OutSize'] = self.actual_shape + + self.attrs = { + 'out_h': self.out_h, + 'out_w': self.out_w, + 'interp_method': self.interp_method, + 'align_corners': self.align_corners, + 'align_mode': self.align_mode, + 'data_layout': self.data_layout + } + if self.scale: + if isinstance(self.scale, float) or isinstance(self.scale, int): + if self.scale > 0.: + self.scale = [self.scale] + if isinstance(self.scale, list) and len(self.scale) == 1: + self.scale = [self.scale[0], self.scale[0]] + self.attrs['scale'] = self.scale + self.outputs = {'Out': output_np} + + def test_check_output(self): + self.check_output_with_place(self.place, atol=self.atol) + + def test_check_grad(self): + self.__class__.exist_check_grad = True + if self.dtype == 'float16': + return + self.max_relative_error = 0.005 + inputs_to_check = ['X'] + output_names = ['Out'] + no_grad_set = set() + cpu_place = fluid.CPUPlace() + cpu_grads = self._get_gradient(inputs_to_check, cpu_place, output_names, + no_grad_set) + npu_grads = self._get_gradient(inputs_to_check, self.place, + output_names, no_grad_set) + self._assert_is_close(cpu_grads, npu_grads, inputs_to_check, + self.max_relative_error, + "Gradient Check between places") + + def init_test_case(self): + self.interp_method = 'bilinear' + self.input_shape = [2, 3, 5, 7] + self.out_h = 60 + self.out_w = 25 + self.scale = 1.5 + self.align_corners = False + self.align_mode = 1 + self.dtype = 'float32' + self.atol = 1e-5 + + +class TestBilinearInterpCaseFP16(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpCaseFP16, self).init_test_case() + self.dtype = 'float16' + self.atol = 1e-2 + + +class TestBilinearInterpCase1(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpCase1, self).init_test_case() + self.input_shape = [4, 1, 7, 8] + self.out_h = 1 + self.out_w = 1 + self.scale = 0. + + +class TestBilinearInterpCase2(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpCase2, self).init_test_case() + self.input_shape = [3, 3, 9, 6] + self.out_h = 12 + self.out_w = 12 + self.scale = 0. + + +class TestBilinearInterpCase3(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpCase3, self).init_test_case() + self.input_shape = [1, 1, 32, 64] + self.out_h = 64 + self.out_w = 32 + self.scale = 0. + + +class TestBilinearInterpCase4(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpCase4, self).init_test_case() + self.input_shape = [4, 1, 7, 8] + self.out_h = 1 + self.out_w = 1 + self.scale = 0. + self.out_size = np.array([2, 2]).astype("int32") + + +class TestBilinearInterpCase5(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpCase5, self).init_test_case() + self.input_shape = [3, 3, 9, 6] + self.out_h = 12 + self.out_w = 12 + self.scale = 0. + self.out_size = np.array([11, 11]).astype("int32") + + +class TestBilinearInterpCase6(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpCase6, self).init_test_case() + self.input_shape = [1, 1, 32, 64] + self.out_h = 64 + self.out_w = 32 + self.scale = 0. + self.out_size = np.array([65, 33]).astype("int32") + + +class TestBilinearInterpCase7(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpCase7, self).init_test_case() + self.input_shape = [1, 1, 32, 64] + self.out_h = 64 + self.out_w = 32 + self.scale = [2.0, 0.5] + + +class TestBilinearInterpSame(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpSame, self).init_test_case() + self.input_shape = [2, 3, 32, 64] + self.out_h = 32 + self.out_w = 64 + self.scale = 0. + + +class TestBilinearInterpActualShape(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpActualShape, self).init_test_case() + self.input_shape = [3, 2, 32, 16] + self.out_h = 64 + self.out_w = 32 + self.scale = 0. + self.out_size = np.array([66, 40]).astype("int32") + + +class TestBilinearInterpDataLayout(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpDataLayout, self).init_test_case() + self.input_shape = [2, 5, 5, 3] + self.out_h = 2 + self.out_w = 2 + self.scale = 0. + self.out_size = np.array([3, 3]).astype("int32") + self.data_layout = "NHWC" + + +class TestBilinearInterpOtherMethod1(TestBilinearInterpOp): + def set_align_mode(self): + self.align_corners = False + self.align_mode = 1 + + +class TestBilinearInterpWithMethod2(TestBilinearInterpOp): + def set_align_mode(self): + self.align_corners = False + self.align_mode = 0 + + +class TestBilinearInterpWithMethod3(TestBilinearInterpOp): + def set_align_mode(self): + self.align_corners = True + self.align_mode = 0 + + +class TestBilinearInterpScale1(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpScale1, self).init_test_case() + self.input_shape = [2, 3, 5, 7] + self.out_h = 60 + self.out_w = 25 + self.scale = 2. + + +class TestBilinearInterpScale2(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpScale2, self).init_test_case() + self.input_shape = [2, 3, 5, 7] + self.out_h = 60 + self.out_w = 25 + self.scale = 1. + + +class TestBilinearInterpZero(TestBilinearInterpOp): + def init_test_case(self): + super(TestBilinearInterpZero, self).init_test_case() + self.input_shape = [2, 3, 5, 7] + self.out_h = 60 + self.out_w = 25 + self.scale = 0.2 + self.align_mode = 0 + + +if __name__ == "__main__": + unittest.main() -- GitLab