From 5209b4b646764ad045cfae6bdd0dc8a152b1e6db Mon Sep 17 00:00:00 2001 From: zhupengyang <1165938320@qq.com> Date: Tue, 14 Jan 2020 17:02:54 +0800 Subject: [PATCH] [NPU] enhance concat, nearest_interp, bilinear_interp ut (#2764) - enhance interp InferShape --- lite/kernels/npu/bridges/concat_op_test.cc | 130 ----- lite/kernels/npu/bridges/interpolate_op.cc | 22 +- .../npu/bridges/interpolate_op_test.cc | 407 --------------- lite/operators/interpolate_op.cc | 50 +- lite/tests/kernels/CMakeLists.txt | 5 +- .../kernels/bilinear_interp_compute_test.cc | 374 -------------- lite/tests/kernels/concat_compute_test.cc | 30 +- lite/tests/kernels/interp_compute_test.cc | 462 ++++++++++++++++++ .../kernels/nearest_interp_compute_test.cc | 260 ---------- 9 files changed, 515 insertions(+), 1225 deletions(-) delete mode 100644 lite/kernels/npu/bridges/concat_op_test.cc delete mode 100644 lite/kernels/npu/bridges/interpolate_op_test.cc delete mode 100644 lite/tests/kernels/bilinear_interp_compute_test.cc create mode 100644 lite/tests/kernels/interp_compute_test.cc delete mode 100644 lite/tests/kernels/nearest_interp_compute_test.cc diff --git a/lite/kernels/npu/bridges/concat_op_test.cc b/lite/kernels/npu/bridges/concat_op_test.cc deleted file mode 100644 index f870bb0e7e..0000000000 --- a/lite/kernels/npu/bridges/concat_op_test.cc +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/operators/concat_op.h" -#include -#include -#include "lite/core/op_registry.h" -#include "lite/kernels/npu/bridges/registry.h" -#include "lite/kernels/npu/bridges/test_helper.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace npu { -namespace bridges { - -std::vector stride_numel(const DDim& ddim) { - std::vector strides(ddim.size()); - strides[ddim.size() - 1] = ddim[ddim.size() - 1]; - for (int i = ddim.size() - 2; i >= 0; --i) { - strides[i] = strides[i + 1] * ddim[i]; - } - return strides; -} - -void concat_ref(const std::shared_ptr op) { - Scope* scope = op->scope(); - const OpInfo* op_info = op->op_info(); - auto x = op_info->Input("X"); - std::vector inputs; - for (auto var : x) { - inputs.push_back(scope->FindVar(var)->GetMutable()); - } - auto out = - scope->FindVar(op_info->Output("Out").front())->GetMutable(); - int axis = op_info->GetAttr("axis"); - std::vector inputs_concat(inputs.size()); - for (int j = 0; j < inputs.size(); ++j) { - inputs_concat[j] = inputs[j]; - } - size_t num = inputs.size(); - int rows = 1; - auto dim_0 = inputs[0]->dims(); - for (int i = 0; i < axis; ++i) { - rows *= dim_0[i]; - } - int out_rows = rows, out_cols = 0; - std::vector inputs_cols(inputs.size()); - for (int i = 0; i < num; ++i) { - int t_cols = inputs[i]->numel() / rows; - out_cols += t_cols; - inputs_cols[i] = t_cols; - } - for (int k = 0; k < out_rows; ++k) { - float* dst_ptr = out->mutable_data() + k * out_cols; - int col_idx = 0; - for (int j = 0; j < num; ++j) { - int col_len = inputs_cols[j]; - const float* src_prt = inputs[j]->data() + k * col_len; - std::memcpy(dst_ptr + col_idx, src_prt, sizeof(float) * col_len); - col_idx += col_len; - } - } -} - -void test_concat(std::vector> input, int axis) { - std::string x_var_name = "x"; - std::string y_var_name = "y"; - std::string out_var_name = "out"; - std::string out_ref_var_name = "out_ref"; - - // prepare input&output variables - Scope scope; - auto* x = scope.Var(x_var_name)->GetMutable(); - auto* y = scope.Var(y_var_name)->GetMutable(); - x->Resize(DDim(input[0])); - y->Resize(DDim(input[1])); - auto* out = scope.Var(out_var_name)->GetMutable(); - auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); - CHECK_EQ(out->dims(), out_ref->dims()); - - // initialize input&output data - FillTensor(x); - FillTensor(y); - - // initialize op desc - cpp::OpDesc opdesc; - opdesc.SetType("concat"); - opdesc.SetInput("X", {x_var_name, y_var_name}); - opdesc.SetOutput("Out", {out_var_name}); - opdesc.SetAttr("axis", axis); - - auto op = CreateOp(opdesc, &scope); - LauchOp(op, {x_var_name, y_var_name}, {out_var_name}); - out_ref->CopyDataFrom(*out); - concat_ref(op); - auto* out_data = out->mutable_data(); - auto* out_ref_data = out_ref->mutable_data(); - for (int i = 0; i < out->dims().production(); i++) { - VLOG(5) << i; - EXPECT_NEAR(out_data[i], out_ref_data[i], 5e-4); - } -} - -TEST(NPUBridges, concat) { - test_concat({{3, 3, 5, 2}, {2, 3, 5, 2}}, 0); - test_concat({{3, 5, 5, 2}, {3, 1, 5, 2}}, 1); - test_concat({{3, 3, 2, 2}, {3, 3, 4, 2}}, 2); - test_concat({{3, 3, 5, 2}, {3, 3, 5, 6}}, 3); -} - -} // namespace bridges -} // namespace npu -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_OP(concat); -USE_NPU_BRIDGE(concat); diff --git a/lite/kernels/npu/bridges/interpolate_op.cc b/lite/kernels/npu/bridges/interpolate_op.cc index 238200abf3..d68f63b16e 100644 --- a/lite/kernels/npu/bridges/interpolate_op.cc +++ b/lite/kernels/npu/bridges/interpolate_op.cc @@ -48,11 +48,15 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto out_w = op_info->GetAttr("out_w"); auto out_h = op_info->GetAttr("out_h"); auto align_corners = op_info->GetAttr("align_corners"); - int align_mode = op_info->GetAttr("align_mode"); + int align_mode = + op_info->HasAttr("align_mode") ? op_info->GetAttr("align_mode") : 1; auto interp_method = op_info->GetAttr("interp_method"); - CHECK(!(align_mode == 0 && !align_corners)) << "[NPU] align_mode = 0 && " - "align_corners = false isn't " - "supported in HiAI DDK"; + if (align_mode == 0 && !align_corners) { + LOG(WARNING) << "[NPU] align_mode = 0 && " + "align_corners = false isn't " + "supported in HiAI DDK"; + return FAILED; + } // X node std::shared_ptr x_node = nullptr; @@ -93,10 +97,12 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { if (interp_method == "bilinear") { const float largest_multiple = 7.0f; float multiple = static_cast(x_h * x_w) / (out_h * out_w); - CHECK_LT(multiple, largest_multiple) - << "[NPU] multiple=(ih*iw)/(oh*ow)=" << multiple - << " is too large, should not exceed " << largest_multiple - << " in HiAI DDK"; + if (multiple >= largest_multiple) { + LOG(WARNING) << "[NPU] multiple=(ih*iw)/(oh*ow)=" << multiple + << " is too large, should not exceed " << largest_multiple + << " in HiAI DDK"; + return FAILED; + } } out_size_node = graph->Add(out_name + "/out_size", std::vector({out_h, out_w})); diff --git a/lite/kernels/npu/bridges/interpolate_op_test.cc b/lite/kernels/npu/bridges/interpolate_op_test.cc deleted file mode 100644 index c061fbfe5f..0000000000 --- a/lite/kernels/npu/bridges/interpolate_op_test.cc +++ /dev/null @@ -1,407 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/operators/interpolate_op.h" -#include -#include -#include "lite/core/op_registry.h" -#include "lite/kernels/npu/bridges/registry.h" -#include "lite/kernels/npu/bridges/test_helper.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace npu { -namespace bridges { - -template -void bilinear_interp_ref(const std::shared_ptr op) { - auto scope = op->scope(); - auto op_info = op->op_info(); - auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); - auto out = - scope->FindVar(op_info->Output("Out").front())->GetMutable(); - auto x_dims = x->dims(); - int batch_size = x_dims[0]; - int channel_size = x_dims[1]; - auto x_h = x_dims[2]; - auto x_w = x_dims[3]; - CHECK_EQ(x_dims.size(), 4); - auto scale = op_info->GetAttr("scale"); - auto out_w = op_info->GetAttr("out_w"); - auto out_h = op_info->GetAttr("out_h"); - auto align_corners = op_info->GetAttr("align_corners"); - int align_mode = op_info->GetAttr("align_mode"); - auto interp_method = op_info->GetAttr("interp_method"); - - // calc real out_h and out_w - if (scale > 0) { - out_h = static_cast(x_h * scale); - out_w = static_cast(x_w * scale); - } - if (op_info->HasInput("OutSize")) { - auto out_size_var_names = op_info->Input("OutSize"); - if (out_size_var_names.size() > 0) { - auto out_size_var_name = out_size_var_names.front(); - auto out_size = - scope->FindVar(out_size_var_name)->GetMutable(); - auto out_size_dims = out_size->dims(); - CHECK_EQ(out_size_dims.size(), 1); - CHECK_EQ(out_size_dims.production(), 2); - auto out_size_data = out_size->mutable_data(); - out_h = out_size_data[0]; - out_w = out_size_data[1]; - } - } - CHECK_GT(out_h, 0); - CHECK_GT(out_w, 0); - out->Resize({batch_size, channel_size, out_h, out_w}); - - // copy from x if no change - if (x_h == out_h && x_w == out_w) { - out->CopyDataFrom(*x); - return; - } - - float ratio_h = 0.f; - float ratio_w = 0.f; - if (out_h > 1) { - ratio_h = (align_corners) ? static_cast(x_h - 1) / (out_h - 1) - : static_cast(x_h) / out_h; - } - if (out_w > 1) { - ratio_w = (align_corners) ? static_cast(x_w - 1) / (out_w - 1) - : static_cast(x_w) / out_w; - } - - // naive bilinear interpolation - auto x_data = x->mutable_data(); - auto out_data = out->mutable_data(); - bool align_flag = (align_mode == 0 && !align_corners); - - std::vector vy_n, vy_s; - std::vector vd_n, vd_s; - vy_n.reserve(out_h); - vy_s.reserve(out_h); - vd_n.reserve(out_h); - vd_s.reserve(out_h); - for (int k = 0; k < out_h; k++) { - int yn = align_flag ? static_cast(ratio_h * (k + 0.5) - 0.5) - : static_cast(ratio_h * k); - yn = (yn > 0) ? yn : 0; - int ys = (yn + 1) < (x_h - 1) ? (yn + 1) : (x_h - 1); - float idx_src_y = ratio_h * (k + 0.5) - 0.5; - idx_src_y = (idx_src_y > 0) ? idx_src_y : 0; - float dn = align_flag ? idx_src_y - yn : ratio_h * k - yn; - float ds = 1.f - dn; - { - vy_n[k] = yn; - vy_s[k] = ys; - vd_n[k] = dn; - vd_s[k] = ds; - } - } - - std::vector vx_w, vx_e; - std::vector vd_w, vd_e; - vx_w.reserve(out_w); - vx_e.reserve(out_w); - vd_w.reserve(out_w); - vd_e.reserve(out_w); - for (int l = 0; l < out_w; l++) { - int xw = (align_mode == 0 && !align_corners) - ? static_cast(ratio_w * (l + 0.5) - 0.5) - : static_cast(ratio_w * l); - xw = (xw > 0) ? xw : 0; - int xe = (xw + 1) < (x_w - 1) ? (xw + 1) : (x_w - 1); - float idx_src_x = ratio_w * (l + 0.5) - 0.5; - idx_src_x = (idx_src_x > 0) ? idx_src_x : 0; - float dw = align_flag ? idx_src_x - xw : ratio_w * l - xw; - float de = 1.f - dw; - { - vx_w[l] = xw; - vx_e[l] = xe; - vd_w[l] = dw; - vd_e[l] = de; - } - } - - std::vector x_strides(x_dims.size(), 1); - for (int idx = x_strides.size() - 2; idx >= 0; idx--) { - x_strides[idx] = x_strides[idx + 1] * x_dims[idx + 1]; - } - for (int i = 0; i < batch_size; i++) { - for (int j = 0; j < channel_size; j++) { - for (int k = 0; k < out_h; k++) { - for (int l = 0; l < out_w; l++) { - DType x0 = x_data[i * x_strides[0] + j * x_strides[1] + - vy_n[k] * x_strides[2] + vx_w[l] * x_strides[3]]; - DType x1 = x_data[i * x_strides[0] + j * x_strides[1] + - vy_s[k] * x_strides[2] + vx_w[l] * x_strides[3]]; - DType x2 = x_data[i * x_strides[0] + j * x_strides[1] + - vy_n[k] * x_strides[2] + vx_e[l] * x_strides[3]]; - DType x3 = x_data[i * x_strides[0] + j * x_strides[1] + - vy_s[k] * x_strides[2] + vx_e[l] * x_strides[3]]; - *out_data = x0 * vd_s[k] * vd_e[l] + x1 * vd_n[k] * vd_e[l] + - x2 * vd_s[k] * vd_w[l] + x3 * vd_n[k] * vd_w[l]; - out_data++; - } - } - } - } -} - -template -void nearest_interp_ref(const std::shared_ptr op) { - auto scope = op->scope(); - auto op_info = op->op_info(); - auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); - auto out = - scope->FindVar(op_info->Output("Out").front())->GetMutable(); - auto x_dims = x->dims(); - CHECK_EQ(x_dims.size(), 4); - auto scale = op_info->GetAttr("scale"); - auto out_w = op_info->GetAttr("out_w"); - auto out_h = op_info->GetAttr("out_h"); - auto align_corners = op_info->GetAttr("align_corners"); - // int align_mode = op_info->GetAttr("align_mode"); - auto interp_method = op_info->GetAttr("interp_method"); - CHECK_EQ(interp_method, "nearest"); - - int x_h = x_dims[2]; - int x_w = x_dims[3]; - if (scale > 0) { - out_h = static_cast(x_h * scale); - out_w = static_cast(x_w * scale); - } - if (op_info->HasInput("OutSize")) { - auto out_size_var_names = op_info->Input("OutSize"); - if (out_size_var_names.size() > 0) { - auto out_size_var_name = out_size_var_names.front(); - auto out_size = - scope->FindVar(out_size_var_name)->GetMutable(); - CHECK_EQ(out_size->numel(), 2); - auto out_size_data = out_size->mutable_data(); - out_h = out_size_data[0]; - out_w = out_size_data[1]; - } - } - CHECK_GT(out_h, 0); - CHECK_GT(out_w, 0); - out->Resize({x_dims[0], x_dims[1], out_h, out_w}); - - float ratio_h = 0.f; - float ratio_w = 0.f; - if (out_h > 1) { - ratio_h = align_corners ? static_cast(x_h - 1.0) / (out_h - 1.0) - : static_cast(x_h) / out_h; - } - if (out_w > 1) { - ratio_w = align_corners ? static_cast(x_w - 1.0) / (out_w - 1.0) - : static_cast(x_w) / out_w; - } - - auto x_data = x->data(); - auto out_data = out->mutable_data(); - auto out_dims = out->dims(); - std::vector x_strides(x_dims.size(), 1); - for (int idx = x_strides.size() - 2; idx >= 0; idx--) { - x_strides[idx] = x_strides[idx + 1] * x_dims[idx + 1]; - } - - for (int n = 0; n < out_dims[0]; n++) { - for (int c = 0; c < out_dims[1]; c++) { - for (int h = 0; h < out_dims[2]; h++) { - for (int w = 0; w < out_dims[3]; w++) { - int in_i = ratio_h * h; - int in_j = ratio_w * w; - if (align_corners) { - in_i = ratio_h * h + 0.5; - in_j = ratio_w * w + 0.5; - } - *out_data = x_data[n * x_strides[0] + c * x_strides[1] + - in_i * x_strides[2] + in_j * x_strides[3]]; - out_data++; - } - } - } - } -} - -void test_interpolate(int bs, - int ic, - int ih, - int iw, - int oh, - int ow, - float scale, - int out_size_h, - int out_size_w, - bool align_corners, - int align_mode, - std::string interp_method) { - // prepare input&output variables - Scope scope; - std::string x_var_name("x"); - std::string out_size_var_name("out_size"); - std::string out_var_name("out"); - std::string out_ref_var_name("out_ref"); - auto x = scope.Var(x_var_name)->GetMutable(); - auto out_size = scope.Var(out_size_var_name)->GetMutable(); - auto out = scope.Var(out_var_name)->GetMutable(); - auto out_ref = scope.Var(out_ref_var_name)->GetMutable(); - x->Resize({bs, ic, ih, iw}); - out_size->Resize({2}); - - // initialize input&output data - FillTensor(x); - - // initialize op desc - cpp::OpDesc opdesc; - opdesc.SetType(interp_method + "_interp"); - opdesc.SetInput("X", {x_var_name}); - opdesc.SetOutput("Out", {out_var_name}); - opdesc.SetAttr("out_h", oh); - opdesc.SetAttr("out_w", ow); - opdesc.SetAttr("scale", scale); - opdesc.SetAttr("align_corners", static_cast(align_corners)); - opdesc.SetAttr("align_mode", static_cast(align_mode)); - opdesc.SetAttr("interp_method", interp_method); - if (out_size_h > 0 && out_size_w > 0) { - auto out_size_dims = out_size->dims(); - CHECK_EQ(out_size_dims.size(), 1); - CHECK_EQ(out_size_dims.production(), 2); - auto out_size_data = out_size->mutable_data(); - out_size_data[0] = out_size_h; - out_size_data[1] = out_size_w; - opdesc.SetInput("OutSize", {out_size_var_name}); - } - - // create op and execute reference implementation - auto op = CreateOp(opdesc, &scope); - if (interp_method == "bilinear") { - bilinear_interp_ref(op); - } else { - nearest_interp_ref(op); - } - out_ref->CopyDataFrom(*out); - - // convert op to NPU model, then run it on NPU - LauchOp(op, {x_var_name}, {out_var_name}); - - // compare results - auto out_dims = out->dims(); - auto out_ref_dims = out_ref->dims(); - CHECK_EQ(out_dims.size(), out_ref_dims.size()); - for (int i = 0; i < out_dims.size(); i++) { - CHECK_EQ(out_dims[i], out_ref_dims[i]); - } - auto* out_data = out->mutable_data(); - auto* out_ref_data = out_ref->mutable_data(); - for (int i = 0; i < out->dims().production(); i++) { - VLOG(5) << i; - EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2f); - } -} - -TEST(NPUBridges, bilinear_interp) { -#if 1 - for (auto bs : {1, 3}) { - for (auto ic : {3, 4}) { - for (auto ih : {4, 5}) { - for (auto iw : {3, 6}) { - for (auto oh : {0, 3, 8}) { - for (auto ow : {0, 4, 9}) { - for (auto scale : {0.f, 0.5f, 0.6f, 2.0f, 2.2f}) { - for (auto out_size_h : {0, 3, 11}) { - for (auto out_size_w : {0, 2, 12}) { - for (auto align_corners : {true, false}) { - for (auto align_mode : {0, 1}) { - for (auto interp_method : {"bilinear", "nearest"}) { - int act_oh = 0, act_ow = 0; - if (out_size_h > 0 && out_size_w > 0) { - act_oh = out_size_h; - act_ow = out_size_w; - } else if (scale > 1e-5) { - act_oh = static_cast(ih * scale); - act_ow = static_cast(iw * scale); - } else if (oh > 0 && ow > 0) { - act_oh = oh; - act_ow = ow; - } - if (act_oh <= 0 || act_ow <= 0) { - continue; - } - // TODO(hong19860320) multiple=(ih*iw)/(oh*ow) - // should - // not exceed 7.0 in NPU DDK, delete the following - // lines - // if the limination is removed. - const float largest_multiple = 7.0f; - float multiple = - static_cast(ih * iw) / (act_oh * act_ow); - if (multiple > largest_multiple) { - continue; - } - if (align_mode == 0 && !align_corners) { - continue; - } - VLOG(3) << "bs: " << bs << " ic: " << ic - << " ih: " << ih << " iw: " << iw - << " oh: " << oh << " ow: " << ow - << " scale: " << scale - << " out_size: " << out_size_h << "," - << out_size_w - << " align_corners: " << align_corners - << " align_mode: " << align_mode; - test_interpolate(bs, - ic, - ih, - iw, - oh, - ow, - scale, - out_size_h, - out_size_w, - align_corners, - align_mode, - interp_method); - } - } - } - } - } - } - } - } - } - } - } - } -#else - test_interpolate(1, 1, 4, 3, 0, 0, 1.f, 3, 6, false, 1, "nearest"); -#endif -} - -} // namespace bridges -} // namespace npu -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_OP(bilinear_interp); -USE_NPU_BRIDGE(bilinear_interp); - -USE_LITE_OP(nearest_interp); -USE_NPU_BRIDGE(nearest_interp); diff --git a/lite/operators/interpolate_op.cc b/lite/operators/interpolate_op.cc index 936da73d89..1bfb20df4e 100644 --- a/lite/operators/interpolate_op.cc +++ b/lite/operators/interpolate_op.cc @@ -35,8 +35,7 @@ bool InterpolateOp::CheckShape() const { } bool InterpolateOp::InferShape() const { - auto* X = param_.X; - auto* OutSize = param_.OutSize; + auto X = param_.X; int n = X->dims()[0]; int c = X->dims()[1]; @@ -46,39 +45,40 @@ bool InterpolateOp::InferShape() const { int out_w; auto SizeTensor = param_.SizeTensor; + auto OutSize = param_.OutSize; + auto Scale = param_.Scale; if (!SizeTensor.empty()) { - CHECK(SizeTensor.size() == 2) + CHECK_EQ(SizeTensor.size(), 2) << "Input(SizeTensor)'size of Op(interpolate) must be 2. " "Attr(out_shape)'s length must be 2 for 4-D input tensor."; + out_h = SizeTensor[0]->data()[0]; + out_w = SizeTensor[1]->data()[0]; + } else if (OutSize) { + auto OutSize_dims = OutSize->dims(); + CHECK_EQ(OutSize_dims.size(), 1) << "Input(OutSize)'s dims size must be 1"; + CHECK_EQ(OutSize_dims[0], 2) << "OutSize's dim[0] must be 2"; + auto OutSize_data = OutSize->data(); + out_h = OutSize_data[0]; + out_w = OutSize_data[1]; + } else if (param_.out_h > 0 && param_.out_w > 0) { out_h = param_.out_h; out_w = param_.out_w; - param_.Out->Resize({n, c, out_h, out_w}); - return true; - } - - auto Scale = param_.Scale; - if (Scale) { - auto scale_dims = Scale->dims(); - CHECK(scale_dims.size() == 1) << "Scale's dimension size must be 1."; - out_h = -1; - out_w = -1; } else { - auto scale = param_.scale; - if (scale > 0) { - out_h = static_cast(h * scale); - out_w = static_cast(w * scale); - out_h = out_h > 0 ? out_h : -1; - out_w = out_w > 0 ? out_w : -1; + float scale = -1.f; + if (Scale) { + auto Scale_dims = Scale->dims(); + CHECK_EQ(Scale_dims.size(), 1) << "Scale's dimension size must be 1."; + scale = Scale->data()[0]; } else { - out_h = param_.out_h; - out_w = param_.out_w; + scale = param_.scale; } + CHECK(scale > 0) << "scale must large than 0."; + out_h = static_cast(h * scale); + out_w = static_cast(w * scale); } - if (OutSize != nullptr) { - auto out_lod = param_.Out->mutable_lod(); - *out_lod = param_.X->lod(); - } + auto out_lod = param_.Out->mutable_lod(); + *out_lod = param_.X->lod(); param_.Out->Resize({n, c, out_h, out_w}); return true; diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 0c1e71cbe3..cc576dcfe5 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -25,7 +25,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_ #lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) @@ -59,8 +59,7 @@ endif() lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_bilinear_interp_compute SRCS bilinear_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_nearest_interp_compute SRCS nearest_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_interp_compute SRCS interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) diff --git a/lite/tests/kernels/bilinear_interp_compute_test.cc b/lite/tests/kernels/bilinear_interp_compute_test.cc deleted file mode 100644 index 7ea4293f08..0000000000 --- a/lite/tests/kernels/bilinear_interp_compute_test.cc +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include "lite/api/paddle_use_kernels.h" -#include "lite/api/paddle_use_ops.h" -#include "lite/core/arena/framework.h" -#include "lite/core/tensor.h" - -namespace paddle { -namespace lite { - -inline std::vector get_new_shape( - std::vector list_new_shape_tensor) { - // get tensor from - std::vector vec_new_shape; - for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { - auto tensor = list_new_shape_tensor[i]; - vec_new_shape.push_back(static_cast(*(tensor->data()))); - } - return vec_new_shape; -} - -template -inline std::vector get_new_data_from_tensor(const Tensor* new_data_tensor) { - std::vector vec_new_data; - auto* new_data = new_data_tensor->data(); - lite::Tensor cpu_starts_tensor; - vec_new_data = - std::vector(new_data, new_data + new_data_tensor->dims().production()); - return vec_new_data; -} - -template -void resize_bilinear_align(std::vector inputs, - lite::Tensor* output) { - int hin = inputs[0]->dims()[2]; - int win = inputs[0]->dims()[3]; - int channels = inputs[0]->dims()[1]; - int num = inputs[0]->dims()[0]; - int hout = output->dims()[2]; - int wout = output->dims()[3]; - - dtype scale_w = static_cast(win - 1) / (wout - 1); - dtype scale_h = static_cast(hin - 1) / (hout - 1); - const dtype* src = inputs[0]->data(); - dtype* dst = output->mutable_data(); - int dst_stride_w = 1; - int dst_stride_h = wout; - int dst_stride_c = wout * hout; - int dst_stride_batch = wout * hout * channels; - int src_stride_w = 1; - int src_stride_h = win; - int src_stride_c = win * hin; - int src_stride_batch = win * hin * channels; - - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - int src_index = n * src_stride_batch + c * src_stride_c; - - for (int h = 0; h < hout; ++h) { - for (int w = 0; w < wout; ++w) { - dtype fw = w * scale_w; - dtype fh = h * scale_h; - int w_start = static_cast(fw); - int w_id = w_start < win - 1 ? 1 : 0; - int w_end = static_cast(fw + w_id); - int h_start = static_cast(fh); - int h_id = h_start < hin - 1 ? 1 : 0; - int h_end = static_cast(fh + h_id); - fw -= w_start; - fh -= h_start; - const dtype w00 = (1.0 - fh) * (1.0 - fw); - const dtype w01 = fw * (1.0 - fh); - const dtype w10 = fh * (1.0 - fw); - const dtype w11 = fw * fh; - dtype tl = - src[src_index + w_start * src_stride_w + h_start * src_stride_h]; - dtype tr = - src[src_index + w_end * src_stride_w + h_start * src_stride_h]; - dtype bl = - src[src_index + w_start * src_stride_w + h_end * src_stride_h]; - dtype br = - src[src_index + w_end * src_stride_w + h_end * src_stride_h]; - int dst_index = n * dst_stride_batch + c * dst_stride_c + - h * dst_stride_h + w * dst_stride_w; - dst[dst_index] = - static_cast(w00 * tl + w01 * tr + w10 * bl + w11 * br); - } - } - } - } -} - -template -void resize_bilinear_no_align(std::vector inputs, - lite::Tensor* output) { - int hin = inputs[0]->dims()[2]; - int win = inputs[0]->dims()[3]; - int channels = inputs[0]->dims()[1]; - int num = inputs[0]->dims()[0]; - int hout = output->dims()[2]; - int wout = output->dims()[3]; - dtype scale_w = static_cast(win) / (wout); - dtype scale_h = static_cast(hin) / (hout); - const dtype* src = inputs[0]->data(); - dtype* dst = output->mutable_data(); - int dst_stride_w = 1; - int dst_stride_h = wout; - int dst_stride_c = wout * hout; - int dst_stride_batch = wout * hout * channels; - int src_stride_w = 1; - int src_stride_h = win; - int src_stride_c = win * hin; - int src_stride_batch = win * hin * channels; - - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - int src_index = n * src_stride_batch + c * src_stride_c; - - for (int h = 0; h < hout; ++h) { - for (int w = 0; w < wout; ++w) { - dtype fw = scale_w * (w + 0.5f) - 0.5f; - fw = (fw < 0) ? 0 : fw; - dtype fh = scale_h * (h + 0.5f) - 0.5f; - fh = (fh < 0) ? 0 : fh; - int w_start = static_cast(fw); - int w_id = w_start < win - 1 ? 1 : 0; - int w_end = static_cast(fw + w_id); - int h_start = static_cast(fh); - int h_id = h_start < hin - 1 ? 1 : 0; - int h_end = static_cast(fh + h_id); - fw -= w_start; - fh -= h_start; - const dtype w00 = (1.0 - fh) * (1.0 - fw); - const dtype w01 = fw * (1.0 - fh); - const dtype w10 = fh * (1.0 - fw); - const dtype w11 = fw * fh; - dtype tl = - src[src_index + w_start * src_stride_w + h_start * src_stride_h]; - dtype tr = - src[src_index + w_end * src_stride_w + h_start * src_stride_h]; - dtype bl = - src[src_index + w_start * src_stride_w + h_end * src_stride_h]; - dtype br = - src[src_index + w_end * src_stride_w + h_end * src_stride_h]; - int dst_index = n * dst_stride_batch + c * dst_stride_c + - h * dst_stride_h + w * dst_stride_w; - dst[dst_index] = - static_cast(w00 * tl + w01 * tr + w10 * bl + w11 * br); - } - } - } - } -} - -class BilinearInterpComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string input0_ = "X"; - std::string sizetensor0_ = "SizeTensor0"; - std::string sizetensor1_ = "SizeTensor1"; - std::string input_scale_ = "Scale"; - std::string input1_ = "OutSize"; - std::string output_ = "Out"; - - float height_scale_ = 0.f; - float width_scale_ = 0.f; - int out_height_ = -1; - int out_width_ = -1; - int outsize_height_ = -1; - int outsize_width_ = -1; - bool align_corners_ = true; - std::string interp_method_ = "Bilinear"; - DDim _dims0_{{1, 1, 16, 16}}; - DDim _dims1_{{2}}; - DDim sizetensor_dims_{{1}}; - DDim scale_dims_{{1}}; - - public: - BilinearInterpComputeTester(const Place& place, - const std::string& alias, - float scale, - int out_height, - int out_width, - int outsize_height, - int outsize_width, - bool align_corners, - std::string interp_method) - : TestCase(place, alias), - height_scale_(scale), - width_scale_(scale), - out_height_(out_height), - out_width_(out_width), - outsize_height_(outsize_height), - outsize_width_(outsize_width), - align_corners_(align_corners), - interp_method_(interp_method) {} - - void RunBaseline(Scope* scope) override { - width_scale_ = height_scale_; - std::vector inputs; - inputs.emplace_back(scope->FindTensor(input0_)); - if (outsize_height_ > 0 && outsize_width_ > 0) { - inputs.emplace_back(scope->FindTensor(input1_)); - } - std::vector SizeTensor; - if (outsize_height_ > 0 && outsize_width_ > 0) { - SizeTensor.emplace_back(scope->FindTensor(sizetensor0_)); - SizeTensor.emplace_back(scope->FindTensor(sizetensor1_)); - } - const lite::Tensor* input_scale = scope->FindTensor(input_scale_); - float scale = height_scale_; - int in_h = inputs[0]->dims()[2]; - int in_w = inputs[0]->dims()[3]; - if (SizeTensor.size() > 0) { - auto new_size = get_new_shape(SizeTensor); - out_height_ = new_size[0]; - out_width_ = new_size[1]; - } else { - auto scale_tensor = input_scale; - if (scale_tensor != nullptr) { - auto scale_data = get_new_data_from_tensor(scale_tensor); - scale = scale_data[0]; - } - if (scale > 0) { - out_height_ = static_cast(in_h * scale); - out_width_ = static_cast(in_w * scale); - } - if (inputs.size() > 1) { - auto out_size = inputs[1]; - auto out_size_data = get_new_data_from_tensor(out_size); - out_height_ = out_size_data[0]; - out_width_ = out_size_data[1]; - } - } - height_scale_ = scale; - width_scale_ = scale; - - if (out_width_ != -1 && out_height_ != -1) { - height_scale_ = static_cast(out_height_ / inputs[0]->dims()[2]); - width_scale_ = static_cast(out_width_ / inputs[0]->dims()[3]); - } - auto* outputs = scope->NewTensor(output_); - CHECK(outputs); - int num_cout = inputs[0]->dims()[0]; - int c_cout = inputs[0]->dims()[1]; - outputs->Resize({num_cout, c_cout, out_height_, out_width_}); - if (align_corners_) { - resize_bilinear_align(inputs, outputs); - } else { - resize_bilinear_no_align(inputs, outputs); - } - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("bilinear_interp"); - op_desc->SetInput("X", {input0_}); - if (outsize_height_ > 0 && outsize_width_ > 0) { - op_desc->SetInput("OutSize", {input1_}); - op_desc->SetInput("SizeTensor", {sizetensor0_, sizetensor1_}); - } - if (height_scale_ > 0) { - op_desc->SetInput("Scale", {input_scale_}); - } - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("scale", height_scale_); - op_desc->SetAttr("out_h", out_height_); - op_desc->SetAttr("out_w", out_width_); - op_desc->SetAttr("align_corners", align_corners_); - op_desc->SetAttr("interp_method", interp_method_); - } - - void PrepareData() override { - std::vector data0(_dims0_.production()); - for (int i = 0; i < _dims0_.production(); i++) { - data0[i] = i * 1.1; - } - SetCommonTensor(input0_, _dims0_, data0.data()); - - if (outsize_height_ > 0 && outsize_width_ > 0) { - std::vector data1(2); - data1[0] = outsize_height_; - data1[1] = outsize_width_; - SetCommonTensor(input1_, _dims1_, data1.data()); - - std::vector sizetensor_data(1); - sizetensor_data[0] = outsize_height_; - SetCommonTensor(sizetensor0_, sizetensor_dims_, sizetensor_data.data()); - - sizetensor_data[0] = outsize_width_; - SetCommonTensor(sizetensor1_, sizetensor_dims_, sizetensor_data.data()); - } - - if (height_scale_ > 0) { - std::vector scale_data(1); - scale_data[0] = height_scale_; - SetCommonTensor(input_scale_, scale_dims_, scale_data.data()); - } - } -}; - -void test_bilinear_interp(Place place) { - std::string interp_method = "Bilinear"; - for (float scale : {2., 1., 0.3}) { - for (bool align_corners : {true, false}) { - std::unique_ptr tester(new BilinearInterpComputeTester( - place, "def", scale, -1, -1, -1, -1, align_corners, interp_method)); - arena::Arena arena(std::move(tester), place, 5e-5); - arena.TestPrecision(); - } - } - for (int out_height : {8, 16, 24}) { - for (int out_width : {8, 16, 24}) { - for (bool align_corners : {true, false}) { - std::unique_ptr tester( - new BilinearInterpComputeTester(place, - "def", - 0, - out_height, - out_width, - -1, - -1, - align_corners, - interp_method)); - arena::Arena arena(std::move(tester), place, 5e-5); - arena.TestPrecision(); - } - } - } - for (int outsize_height : {8, 16, 24}) { - for (int outsize_width : {8, 16, 24}) { - for (bool align_corners : {true, false}) { - std::unique_ptr tester( - new BilinearInterpComputeTester(place, - "def", - 0, - -1, - -1, - outsize_height, - outsize_width, - align_corners, - interp_method)); - arena::Arena arena(std::move(tester), place, 5e-5); - arena.TestPrecision(); - } - } - } -} - -TEST(BilinearInterp, precision) { -// #ifdef LITE_WITH_X86 -// Place place(TARGET(kX86)); -// #endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_bilinear_interp(place); -#endif -} - -} // namespace lite -} // namespace paddle diff --git a/lite/tests/kernels/concat_compute_test.cc b/lite/tests/kernels/concat_compute_test.cc index e0ae4c2828..3e30035f10 100644 --- a/lite/tests/kernels/concat_compute_test.cc +++ b/lite/tests/kernels/concat_compute_test.cc @@ -142,35 +142,29 @@ class ConcateComputeTester : public arena::TestCase { TEST(Concat, precision) { LOG(INFO) << "test concat op, kARM"; -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - for (int axis : {1, 2}) { - for (bool is_use_axis_tensor : {false, true}) { - LOG(INFO) << "axis:" << axis - << ", is_use_axis_tensor:" << is_use_axis_tensor; - std::unique_ptr tester( - new ConcateComputeTester(place, "def", axis, is_use_axis_tensor)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); - } - } + Place place; + float abs_error = 2e-5; +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_ARM) + place = TARGET(kARM); +#elif defined(LITE_WITH_X86) + place = TARGET(kX86); +#else + return; #endif -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); - LOG(INFO) << "test concate op, x86"; for (int axis : {1, 2}) { for (bool is_use_axis_tensor : {false, true}) { LOG(INFO) << "axis:" << axis << ", is_use_axis_tensor:" << is_use_axis_tensor; std::unique_ptr tester( new ConcateComputeTester(place, "def", axis, is_use_axis_tensor)); - arena::Arena arena(std::move(tester), place, 2e-5); + arena::Arena arena(std::move(tester), place, abs_error); arena.TestPrecision(); } } - -#endif } } // namespace lite diff --git a/lite/tests/kernels/interp_compute_test.cc b/lite/tests/kernels/interp_compute_test.cc new file mode 100644 index 0000000000..34e5f0fc9d --- /dev/null +++ b/lite/tests/kernels/interp_compute_test.cc @@ -0,0 +1,462 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" +#include "lite/core/tensor.h" +#include "lite/tests/utils/fill_data.h" + +namespace paddle { +namespace lite { + +template +void ResizeNearestAlign(const lite::Tensor* x, + lite::Tensor* out, + bool with_align) { + auto x_dims = x->dims(); + int num = x_dims[0]; + int channels = x_dims[1]; + int hin = x_dims[2]; + int win = x_dims[3]; + int hout = out->dims()[2]; + int wout = out->dims()[3]; + dtype scale_w = (with_align) ? (static_cast(win - 1) / (wout - 1)) + : (static_cast(win) / (wout)); + dtype scale_h = (with_align) ? (static_cast(hin - 1) / (hout - 1)) + : (static_cast(hin) / (hout)); + const dtype* src = x->data(); + dtype* dst = out->mutable_data(); + int dst_stride_w = 1; + int dst_stride_h = wout; + int dst_stride_c = wout * hout; + int dst_stride_batch = wout * hout * channels; + int src_stride_w = 1; + int src_stride_h = win; + int src_stride_c = win * hin; + int src_stride_batch = win * hin * channels; + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + int src_index = n * src_stride_batch + c * src_stride_c; + for (int h = 0; h < hout; ++h) { + for (int w = 0; w < wout; ++w) { + int fw = (with_align) ? static_cast(scale_w * w + 0.5) + : static_cast(scale_w * w); + fw = (fw < 0) ? 0 : fw; + int fh = (with_align) ? static_cast(scale_h * h + 0.5) + : static_cast(scale_h * h); + fh = (fh < 0) ? 0 : fh; + int w_start = static_cast(fw); + int h_start = static_cast(fh); + int dst_index = n * dst_stride_batch + c * dst_stride_c + + h * dst_stride_h + w * dst_stride_w; + dst[dst_index] = + src[src_index + w_start * src_stride_w + h_start * src_stride_h]; + } + } + } + } +} + +template +void BilinearInterpRef(const lite::Tensor* x, + lite::Tensor* out, + bool align_corners, + int align_mode) { + auto x_dims = x->dims(); + int batch_size = x_dims[0]; + int channel_size = x_dims[1]; + auto x_h = x_dims[2]; + auto x_w = x_dims[3]; + CHECK_EQ(x_dims.size(), 4); + + auto out_dims = out->dims(); + int out_h = out_dims[2]; + int out_w = out_dims[3]; + + // copy from x if no change + if (x_h == out_h && x_w == out_w) { + out->CopyDataFrom(*x); + return; + } + + float ratio_h = 0.f; + float ratio_w = 0.f; + if (out_h > 1) { + ratio_h = (align_corners) ? static_cast(x_h - 1) / (out_h - 1) + : static_cast(x_h) / out_h; + } + if (out_w > 1) { + ratio_w = (align_corners) ? static_cast(x_w - 1) / (out_w - 1) + : static_cast(x_w) / out_w; + } + + // naive bilinear interpolation + auto x_data = x->data(); + auto out_data = out->mutable_data(); + bool align_flag = (align_mode == 0 && !align_corners); + + std::vector vy_n, vy_s; + std::vector vd_n, vd_s; + vy_n.reserve(out_h); + vy_s.reserve(out_h); + vd_n.reserve(out_h); + vd_s.reserve(out_h); + for (int k = 0; k < out_h; k++) { + int yn = align_flag ? static_cast(ratio_h * (k + 0.5) - 0.5) + : static_cast(ratio_h * k); + yn = (yn > 0) ? yn : 0; + int ys = (yn + 1) < (x_h - 1) ? (yn + 1) : (x_h - 1); + float idx_src_y = ratio_h * (k + 0.5) - 0.5; + idx_src_y = (idx_src_y > 0) ? idx_src_y : 0; + float dn = align_flag ? idx_src_y - yn : ratio_h * k - yn; + float ds = 1.f - dn; + { + vy_n[k] = yn; + vy_s[k] = ys; + vd_n[k] = dn; + vd_s[k] = ds; + } + } + + std::vector vx_w, vx_e; + std::vector vd_w, vd_e; + vx_w.reserve(out_w); + vx_e.reserve(out_w); + vd_w.reserve(out_w); + vd_e.reserve(out_w); + for (int l = 0; l < out_w; l++) { + int xw = align_flag ? static_cast(ratio_w * (l + 0.5) - 0.5) + : static_cast(ratio_w * l); + xw = (xw > 0) ? xw : 0; + int xe = (xw + 1) < (x_w - 1) ? (xw + 1) : (x_w - 1); + float idx_src_x = ratio_w * (l + 0.5) - 0.5; + idx_src_x = (idx_src_x > 0) ? idx_src_x : 0; + float dw = align_flag ? idx_src_x - xw : ratio_w * l - xw; + float de = 1.f - dw; + { + vx_w[l] = xw; + vx_e[l] = xe; + vd_w[l] = dw; + vd_e[l] = de; + } + } + + std::vector x_strides(x_dims.size(), 1); + for (int idx = x_strides.size() - 2; idx >= 0; idx--) { + x_strides[idx] = x_strides[idx + 1] * x_dims[idx + 1]; + } + for (int i = 0; i < batch_size; i++) { + for (int j = 0; j < channel_size; j++) { + for (int k = 0; k < out_h; k++) { + for (int l = 0; l < out_w; l++) { + DType x0 = x_data[i * x_strides[0] + j * x_strides[1] + + vy_n[k] * x_strides[2] + vx_w[l] * x_strides[3]]; + DType x1 = x_data[i * x_strides[0] + j * x_strides[1] + + vy_s[k] * x_strides[2] + vx_w[l] * x_strides[3]]; + DType x2 = x_data[i * x_strides[0] + j * x_strides[1] + + vy_n[k] * x_strides[2] + vx_e[l] * x_strides[3]]; + DType x3 = x_data[i * x_strides[0] + j * x_strides[1] + + vy_s[k] * x_strides[2] + vx_e[l] * x_strides[3]]; + *out_data = x0 * vd_s[k] * vd_e[l] + x1 * vd_n[k] * vd_e[l] + + x2 * vd_s[k] * vd_w[l] + x3 * vd_n[k] * vd_w[l]; + out_data++; + } + } + } + } +} +class NearestInterpComputeTester : public arena::TestCase { + protected: + // common attributes for this op. + std::string x_ = "X"; + std::string sizetensor0_ = "SizeTensor0"; + std::string sizetensor1_ = "SizeTensor1"; + std::string input_scale_ = "Scale"; + std::string outsize_ = "OutSize"; + std::string out_ = "Out"; + DDim dims_{{1, 2, 3, 4}}; + + std::string interp_method_ = "nearest"; + float scale_ = -1.f; + int out_h_ = -1; + int out_w_ = -1; + bool align_corners_ = true; + int align_mode_ = 1; + bool use_sizetensor_ = false; + bool use_input_scale_ = false; + bool use_outsize_ = false; + + public: + NearestInterpComputeTester(const Place& place, + const std::string& alias, + DDim dims, + std::string interp_method = "nearest", + float scale = -1.f, + int out_h = -1, + int out_w = -1, + bool align_corners = true, + int align_mode = 1, + bool use_sizetensor = false, + bool use_input_scale = false, + bool use_outsize = false) + : TestCase(place, alias), + dims_(dims), + interp_method_(interp_method), + scale_(scale), + out_h_(out_h), + out_w_(out_w), + align_corners_(align_corners), + align_mode_(align_mode), + use_sizetensor_(use_sizetensor), + use_input_scale_(use_input_scale), + use_outsize_(use_outsize) {} + + void RunBaseline(Scope* scope) override { + int out_h = out_h_; + int out_w = out_w_; + if (scale_ > 0) { + out_h = dims_[2] * scale_; + out_w = dims_[3] * scale_; + } + + auto input = scope->FindTensor(x_); + auto output = scope->NewTensor(out_); + std::vector out_shape{dims_[0], dims_[1], out_h, out_w}; + output->Resize(out_shape); + if (interp_method_ == "nearest") { + ResizeNearestAlign(input, output, align_corners_); + } else if (interp_method_ == "bilinear") { + BilinearInterpRef(input, output, align_corners_, align_mode_); + } + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + if (interp_method_ == "nearest") { + op_desc->SetType("nearest_interp"); + } else if (interp_method_ == "bilinear") { + op_desc->SetType("bilinear_interp"); + } else { + LOG(FATAL) << "unsupport"; + } + op_desc->SetInput("X", {x_}); + if (use_sizetensor_) { + op_desc->SetInput("SizeTensor", {sizetensor0_, sizetensor1_}); + } + if (use_input_scale_) { + op_desc->SetInput("Scale", {input_scale_}); + } + if (use_outsize_) { + op_desc->SetInput("OutSize", {outsize_}); + } + op_desc->SetOutput("Out", {out_}); + op_desc->SetAttr("scale", scale_); + op_desc->SetAttr("out_h", out_h_); + op_desc->SetAttr("out_w", out_w_); + op_desc->SetAttr("align_corners", align_corners_); + op_desc->SetAttr("align_mode", align_mode_); + op_desc->SetAttr("interp_method", interp_method_); + } + + void PrepareData() override { + std::vector din(dims_.production()); + fill_data_rand(din.data(), -1.f, 1.f, dims_.production()); + SetCommonTensor(x_, dims_, din.data()); + + if (use_sizetensor_) { + DDim sizetensor_dims(std::vector{1}); + std::vector dsizetensor0{out_h_}; + std::vector dsizetensor1{out_w_}; + SetCommonTensor( + sizetensor0_, sizetensor_dims, dsizetensor0.data(), {}, true); + SetCommonTensor( + sizetensor1_, sizetensor_dims, dsizetensor1.data(), {}, true); + } + + if (use_input_scale_) { + DDim input_scale_dims(std::vector{1}); + std::vector dinput_scale{scale_}; + SetCommonTensor( + input_scale_, input_scale_dims, dinput_scale.data(), {}, true); + } + + if (use_outsize_) { + DDim outsize_dims(std::vector{2}); + std::vector doutsize{out_h_, out_w_}; + SetCommonTensor(outsize_, outsize_dims, doutsize.data(), {}, true); + } + } +}; + +void TestInterpOuthw(Place place, float abs_error = 2e-5) { + for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { + for (auto interp_method : std::vector{"nearest", "bilinear"}) { + for (int out_h : {6, 8, 12}) { + for (int out_w : {6, 9, 12}) { + std::unique_ptr tester( + new NearestInterpComputeTester(place, + "def", + DDim(x_dims), + interp_method, + -1.f, + out_h, + out_w)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } + } + } + } +} + +void TestInterpScale(Place place, float abs_error = 2e-5) { + for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { + for (auto interp_method : std::vector{"nearest", "bilinear"}) { + for (float scale : {0.3f, 1.f, 1.7f}) { + std::unique_ptr tester(new NearestInterpComputeTester( + place, "def", DDim(x_dims), interp_method, scale)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } + } + } +} + +void TestInterpSizetensor(Place place, float abs_error = 2e-5) { + for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { + for (auto interp_method : std::vector{"nearest", "bilinear"}) { + std::unique_ptr tester( + new NearestInterpComputeTester(place, + "def", + DDim(x_dims), + interp_method, + -1.f, + 10, + 12, + true, + 1, + true, + false, + false)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } + } +} + +void TestInterpInputScale(Place place, float abs_error = 2e-5) { + for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { + for (auto interp_method : std::vector{"nearest", "bilinear"}) { + std::unique_ptr tester( + new NearestInterpComputeTester(place, + "def", + DDim(x_dims), + interp_method, + 0.7, + -1, + -1, + true, + 1, + false, + true, + false)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } + } +} + +void TestInterpOutsize(Place place, float abs_error = 2e-5) { + for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { + for (auto interp_method : std::vector{"nearest", "bilinear"}) { + std::unique_ptr tester( + new NearestInterpComputeTester(place, + "def", + DDim(x_dims), + interp_method, + -1, + 4, + 4, + true, + 1, + false, + false, + true)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } + } +} + +void TestInterpAlignCorners(Place place, float abs_error = 2e-5) { + for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { + for (bool align_corners : {true, false}) { + std::unique_ptr tester(new NearestInterpComputeTester( + place, "def", DDim(x_dims), "nearest", 0.4, -1, -1, align_corners)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } + } +} + +void TestInterpAlignMode(Place place, float abs_error = 2e-5) { + for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { + for (bool align_corners : {true, false}) { + for (int align_mode : {0, 1}) { + // may exist bug in arm kernel + if (place == TARGET(kARM) && align_mode == 1 && !align_corners) { + continue; + } + std::unique_ptr tester( + new NearestInterpComputeTester(place, + "def", + DDim(x_dims), + "bilinear", + 0.7, + -1, + -1, + align_corners, + align_mode)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } + } + } +} + +TEST(Interp, precision) { + Place place; + float abs_error = 2e-5; +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; +#endif + + TestInterpOuthw(place, abs_error); + TestInterpScale(place, abs_error); + TestInterpSizetensor(place, abs_error); + TestInterpInputScale(place, abs_error); + TestInterpOutsize(place, abs_error); + TestInterpAlignCorners(place, abs_error); + TestInterpAlignMode(place, abs_error); +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/nearest_interp_compute_test.cc b/lite/tests/kernels/nearest_interp_compute_test.cc deleted file mode 100644 index 894959f909..0000000000 --- a/lite/tests/kernels/nearest_interp_compute_test.cc +++ /dev/null @@ -1,260 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include "lite/api/paddle_use_kernels.h" -#include "lite/api/paddle_use_ops.h" -#include "lite/core/arena/framework.h" -#include "lite/core/tensor.h" - -namespace paddle { -namespace lite { - -inline std::vector get_new_shape( - const std::vector& list_new_shape_tensor) { - // get tensor from - std::vector vec_new_shape; - for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { - auto tensor = list_new_shape_tensor[i]; - vec_new_shape.push_back(static_cast(*tensor->data())); - } - - return vec_new_shape; -} - -template -inline std::vector get_new_data_from_tensor(const Tensor* new_data_tensor) { - std::vector vec_new_data; - auto* new_data = new_data_tensor->data(); - lite::Tensor cpu_starts_tensor; - vec_new_data = - std::vector(new_data, new_data + new_data_tensor->dims().production()); - return vec_new_data; -} - -template -void resize_nearest_align(std::vector inputs, - lite::Tensor* output, - bool with_align) { - int hin = inputs[0]->dims()[2]; - int win = inputs[0]->dims()[3]; - int channels = inputs[0]->dims()[1]; - int num = inputs[0]->dims()[0]; - int hout = output->dims()[2]; - int wout = output->dims()[3]; - dtype scale_w = (with_align) ? (static_cast(win - 1) / (wout - 1)) - : (static_cast(win) / (wout)); - dtype scale_h = (with_align) ? (static_cast(hin - 1) / (hout - 1)) - : (static_cast(hin) / (hout)); - const dtype* src = inputs[0]->data(); - dtype* dst = output->mutable_data(); - int dst_stride_w = 1; - int dst_stride_h = wout; - int dst_stride_c = wout * hout; - int dst_stride_batch = wout * hout * channels; - int src_stride_w = 1; - int src_stride_h = win; - int src_stride_c = win * hin; - int src_stride_batch = win * hin * channels; - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - int src_index = n * src_stride_batch + c * src_stride_c; - for (int h = 0; h < hout; ++h) { - for (int w = 0; w < wout; ++w) { - int fw = (with_align) ? static_cast(scale_w * w + 0.5) - : static_cast(scale_w * w); - fw = (fw < 0) ? 0 : fw; - int fh = (with_align) ? static_cast(scale_h * h + 0.5) - : static_cast(scale_h * h); - fh = (fh < 0) ? 0 : fh; - int w_start = static_cast(fw); - int h_start = static_cast(fh); - int dst_index = n * dst_stride_batch + c * dst_stride_c + - h * dst_stride_h + w * dst_stride_w; - dst[dst_index] = - src[src_index + w_start * src_stride_w + h_start * src_stride_h]; - } - } - } - } -} - -class NearestInterpComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string input0_ = "X"; - std::string sizetensor0_ = "SizeTensor0"; - std::string sizetensor1_ = "SizeTensor1"; - std::string input_scale_ = "Scale"; - std::string input1_ = "OutSize"; - std::string output_ = "Out"; - - float height_scale_ = 0.0f; - float width_scale_ = 0.0f; - int out_height_ = -1; - int out_width_ = -1; - bool align_corners_ = true; - std::string interp_method_ = "Nearest"; - DDim dims_{{2, 3}}; - DDim _dims0_{{2, 3, 3, 2}}; - DDim _dims1_{{2}}; - DDim sizetensor_dims_{{1}}; - DDim scale_dims_{{1}}; - - public: - NearestInterpComputeTester(const Place& place, - const std::string& alias, - float height_scale, - float width_scale, - int out_height, - int out_width, - bool align_corners, - std::string interp_method) - : TestCase(place, alias), - height_scale_(height_scale), - width_scale_(width_scale), - out_height_(out_height), - out_width_(out_width), - align_corners_(align_corners), - interp_method_(interp_method) {} - - void RunBaseline(Scope* scope) override { - width_scale_ = height_scale_; - auto* outputs = scope->NewTensor(output_); - CHECK(outputs); - outputs->Resize(dims_); - std::vector inputs; - inputs.emplace_back(scope->FindTensor(input0_)); - inputs.emplace_back(scope->FindTensor(input1_)); - - std::vector SizeTensor(2); - SizeTensor[0] = scope->FindTensor(sizetensor0_); - SizeTensor[1] = scope->FindTensor(sizetensor1_); - const lite::Tensor* input_scale = scope->FindTensor(input_scale_); - - float scale = height_scale_; - int in_h = inputs[0]->dims()[2]; - int in_w = inputs[0]->dims()[3]; - if (SizeTensor.size() > 0) { - auto new_size = get_new_shape(SizeTensor); - out_height_ = new_size[0]; - out_width_ = new_size[1]; - } else { - auto scale_tensor = input_scale; - if (scale_tensor != nullptr) { - auto scale_data = get_new_data_from_tensor(scale_tensor); - scale = scale_data[0]; - } - if (scale > 0) { - out_height_ = static_cast(in_h * scale); - out_width_ = static_cast(in_w * scale); - } - auto out_size = inputs[1]; - if (out_size != nullptr) { - auto out_size_data = get_new_data_from_tensor(out_size); - out_height_ = out_size_data[0]; - out_width_ = out_size_data[1]; - } - } - height_scale_ = scale; - width_scale_ = scale; - - if (out_width_ != -1 && out_height_ != -1) { - height_scale_ = static_cast(out_height_ / inputs[0]->dims()[2]); - width_scale_ = static_cast(out_width_ / inputs[0]->dims()[3]); - } - int num_cout = inputs[0]->dims()[0]; - int c_cout = inputs[0]->dims()[1]; - outputs->Resize({num_cout, c_cout, out_height_, out_width_}); - - resize_nearest_align(inputs, outputs, align_corners_); - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("nearest_interp"); - op_desc->SetInput("X", {input0_}); - op_desc->SetInput("SizeTensor", {sizetensor0_, sizetensor1_}); - op_desc->SetInput("Scale", {input_scale_}); - op_desc->SetInput("OutSize", {input1_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("scale", height_scale_); - op_desc->SetAttr("out_h", out_height_); - op_desc->SetAttr("out_w", out_width_); - op_desc->SetAttr("align_corners", align_corners_); - op_desc->SetAttr("interp_method", interp_method_); - } - - void PrepareData() override { - std::vector data0(_dims0_.production()); - for (int i = 0; i < _dims0_.production(); i++) { - data0[i] = i * 1.1; - } - - std::vector data1(_dims1_.production()); - for (int i = 0; i < _dims1_.production(); i++) { - data1[i] = (i + 1) * 2; - } - - SetCommonTensor(input0_, _dims0_, data0.data()); - SetCommonTensor(input1_, _dims1_, data1.data()); - - std::vector sizetensor_data(1); - sizetensor_data[0] = out_height_; - SetCommonTensor(sizetensor0_, sizetensor_dims_, sizetensor_data.data()); - - sizetensor_data[0] = out_width_; - SetCommonTensor(sizetensor1_, sizetensor_dims_, sizetensor_data.data()); - - std::vector scale_data(1); - scale_data[0] = height_scale_; - SetCommonTensor(input_scale_, scale_dims_, scale_data.data()); - } -}; - -void test_nearest_interp(Place place) { - std::string interp_method = "Nearest"; - for (float scale : {0.123, 2., 1.2}) { - for (int out_height : {2, 1, 6}) { - for (int out_width : {2, 3, 5}) { - for (bool align_corners : {true, false}) { - std::unique_ptr tester( - new NearestInterpComputeTester(place, - "def", - scale, - scale, - out_height, - out_width, - align_corners, - interp_method)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); - } - } - } - } -} - -TEST(NearestInterp, precision) { -// #ifdef LITE_WITH_X86 -// Place place(TARGET(kX86)); -// #endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_nearest_interp(place); -#endif -} - -} // namespace lite -} // namespace paddle -- GitLab