提交 5209b4b6 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] enhance concat, nearest_interp, bilinear_interp ut (#2764)

- enhance interp InferShape
上级 c4a87224
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/concat_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace npu {
namespace bridges {
std::vector<size_t> stride_numel(const DDim& ddim) {
std::vector<size_t> strides(ddim.size());
strides[ddim.size() - 1] = ddim[ddim.size() - 1];
for (int i = ddim.size() - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * ddim[i];
}
return strides;
}
void concat_ref(const std::shared_ptr<operators::ConcatOpLite> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = op_info->Input("X");
std::vector<lite::Tensor*> inputs;
for (auto var : x) {
inputs.push_back(scope->FindVar(var)->GetMutable<lite::Tensor>());
}
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
int axis = op_info->GetAttr<int>("axis");
std::vector<lite::Tensor*> inputs_concat(inputs.size());
for (int j = 0; j < inputs.size(); ++j) {
inputs_concat[j] = inputs[j];
}
size_t num = inputs.size();
int rows = 1;
auto dim_0 = inputs[0]->dims();
for (int i = 0; i < axis; ++i) {
rows *= dim_0[i];
}
int out_rows = rows, out_cols = 0;
std::vector<int64_t> inputs_cols(inputs.size());
for (int i = 0; i < num; ++i) {
int t_cols = inputs[i]->numel() / rows;
out_cols += t_cols;
inputs_cols[i] = t_cols;
}
for (int k = 0; k < out_rows; ++k) {
float* dst_ptr = out->mutable_data<float>() + k * out_cols;
int col_idx = 0;
for (int j = 0; j < num; ++j) {
int col_len = inputs_cols[j];
const float* src_prt = inputs[j]->data<float>() + k * col_len;
std::memcpy(dst_ptr + col_idx, src_prt, sizeof(float) * col_len);
col_idx += col_len;
}
}
}
void test_concat(std::vector<vector<int64_t>> input, int axis) {
std::string x_var_name = "x";
std::string y_var_name = "y";
std::string out_var_name = "out";
std::string out_ref_var_name = "out_ref";
// prepare input&output variables
Scope scope;
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* y = scope.Var(y_var_name)->GetMutable<Tensor>();
x->Resize(DDim(input[0]));
y->Resize(DDim(input[1]));
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
CHECK_EQ(out->dims(), out_ref->dims());
// initialize input&output data
FillTensor<float>(x);
FillTensor<float>(y);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("concat");
opdesc.SetInput("X", {x_var_name, y_var_name});
opdesc.SetOutput("Out", {out_var_name});
opdesc.SetAttr("axis", axis);
auto op = CreateOp<operators::ConcatOpLite>(opdesc, &scope);
LauchOp(op, {x_var_name, y_var_name}, {out_var_name});
out_ref->CopyDataFrom(*out);
concat_ref(op);
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
VLOG(5) << i;
EXPECT_NEAR(out_data[i], out_ref_data[i], 5e-4);
}
}
TEST(NPUBridges, concat) {
test_concat({{3, 3, 5, 2}, {2, 3, 5, 2}}, 0);
test_concat({{3, 5, 5, 2}, {3, 1, 5, 2}}, 1);
test_concat({{3, 3, 2, 2}, {3, 3, 4, 2}}, 2);
test_concat({{3, 3, 5, 2}, {3, 3, 5, 6}}, 3);
}
} // namespace bridges
} // namespace npu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(concat);
USE_NPU_BRIDGE(concat);
...@@ -48,11 +48,15 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -48,11 +48,15 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto out_w = op_info->GetAttr<int>("out_w"); auto out_w = op_info->GetAttr<int>("out_w");
auto out_h = op_info->GetAttr<int>("out_h"); auto out_h = op_info->GetAttr<int>("out_h");
auto align_corners = op_info->GetAttr<bool>("align_corners"); auto align_corners = op_info->GetAttr<bool>("align_corners");
int align_mode = op_info->GetAttr<int>("align_mode"); int align_mode =
op_info->HasAttr("align_mode") ? op_info->GetAttr<int>("align_mode") : 1;
auto interp_method = op_info->GetAttr<std::string>("interp_method"); auto interp_method = op_info->GetAttr<std::string>("interp_method");
CHECK(!(align_mode == 0 && !align_corners)) << "[NPU] align_mode = 0 && " if (align_mode == 0 && !align_corners) {
LOG(WARNING) << "[NPU] align_mode = 0 && "
"align_corners = false isn't " "align_corners = false isn't "
"supported in HiAI DDK"; "supported in HiAI DDK";
return FAILED;
}
// X node // X node
std::shared_ptr<Node> x_node = nullptr; std::shared_ptr<Node> x_node = nullptr;
...@@ -93,10 +97,12 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -93,10 +97,12 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (interp_method == "bilinear") { if (interp_method == "bilinear") {
const float largest_multiple = 7.0f; const float largest_multiple = 7.0f;
float multiple = static_cast<float>(x_h * x_w) / (out_h * out_w); float multiple = static_cast<float>(x_h * x_w) / (out_h * out_w);
CHECK_LT(multiple, largest_multiple) if (multiple >= largest_multiple) {
<< "[NPU] multiple=(ih*iw)/(oh*ow)=" << multiple LOG(WARNING) << "[NPU] multiple=(ih*iw)/(oh*ow)=" << multiple
<< " is too large, should not exceed " << largest_multiple << " is too large, should not exceed " << largest_multiple
<< " in HiAI DDK"; << " in HiAI DDK";
return FAILED;
}
} }
out_size_node = out_size_node =
graph->Add(out_name + "/out_size", std::vector<int>({out_h, out_w})); graph->Add(out_name + "/out_size", std::vector<int>({out_h, out_w}));
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/interpolate_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace npu {
namespace bridges {
template <typename DType>
void bilinear_interp_ref(const std::shared_ptr<operators::InterpolateOp> op) {
auto scope = op->scope();
auto op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
auto x_dims = x->dims();
int batch_size = x_dims[0];
int channel_size = x_dims[1];
auto x_h = x_dims[2];
auto x_w = x_dims[3];
CHECK_EQ(x_dims.size(), 4);
auto scale = op_info->GetAttr<float>("scale");
auto out_w = op_info->GetAttr<int>("out_w");
auto out_h = op_info->GetAttr<int>("out_h");
auto align_corners = op_info->GetAttr<bool>("align_corners");
int align_mode = op_info->GetAttr<int>("align_mode");
auto interp_method = op_info->GetAttr<std::string>("interp_method");
// calc real out_h and out_w
if (scale > 0) {
out_h = static_cast<int>(x_h * scale);
out_w = static_cast<int>(x_w * scale);
}
if (op_info->HasInput("OutSize")) {
auto out_size_var_names = op_info->Input("OutSize");
if (out_size_var_names.size() > 0) {
auto out_size_var_name = out_size_var_names.front();
auto out_size =
scope->FindVar(out_size_var_name)->GetMutable<lite::Tensor>();
auto out_size_dims = out_size->dims();
CHECK_EQ(out_size_dims.size(), 1);
CHECK_EQ(out_size_dims.production(), 2);
auto out_size_data = out_size->mutable_data<int>();
out_h = out_size_data[0];
out_w = out_size_data[1];
}
}
CHECK_GT(out_h, 0);
CHECK_GT(out_w, 0);
out->Resize({batch_size, channel_size, out_h, out_w});
// copy from x if no change
if (x_h == out_h && x_w == out_w) {
out->CopyDataFrom(*x);
return;
}
float ratio_h = 0.f;
float ratio_w = 0.f;
if (out_h > 1) {
ratio_h = (align_corners) ? static_cast<float>(x_h - 1) / (out_h - 1)
: static_cast<float>(x_h) / out_h;
}
if (out_w > 1) {
ratio_w = (align_corners) ? static_cast<float>(x_w - 1) / (out_w - 1)
: static_cast<float>(x_w) / out_w;
}
// naive bilinear interpolation
auto x_data = x->mutable_data<DType>();
auto out_data = out->mutable_data<DType>();
bool align_flag = (align_mode == 0 && !align_corners);
std::vector<int> vy_n, vy_s;
std::vector<float> vd_n, vd_s;
vy_n.reserve(out_h);
vy_s.reserve(out_h);
vd_n.reserve(out_h);
vd_s.reserve(out_h);
for (int k = 0; k < out_h; k++) {
int yn = align_flag ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
: static_cast<int>(ratio_h * k);
yn = (yn > 0) ? yn : 0;
int ys = (yn + 1) < (x_h - 1) ? (yn + 1) : (x_h - 1);
float idx_src_y = ratio_h * (k + 0.5) - 0.5;
idx_src_y = (idx_src_y > 0) ? idx_src_y : 0;
float dn = align_flag ? idx_src_y - yn : ratio_h * k - yn;
float ds = 1.f - dn;
{
vy_n[k] = yn;
vy_s[k] = ys;
vd_n[k] = dn;
vd_s[k] = ds;
}
}
std::vector<int> vx_w, vx_e;
std::vector<float> vd_w, vd_e;
vx_w.reserve(out_w);
vx_e.reserve(out_w);
vd_w.reserve(out_w);
vd_e.reserve(out_w);
for (int l = 0; l < out_w; l++) {
int xw = (align_mode == 0 && !align_corners)
? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
: static_cast<int>(ratio_w * l);
xw = (xw > 0) ? xw : 0;
int xe = (xw + 1) < (x_w - 1) ? (xw + 1) : (x_w - 1);
float idx_src_x = ratio_w * (l + 0.5) - 0.5;
idx_src_x = (idx_src_x > 0) ? idx_src_x : 0;
float dw = align_flag ? idx_src_x - xw : ratio_w * l - xw;
float de = 1.f - dw;
{
vx_w[l] = xw;
vx_e[l] = xe;
vd_w[l] = dw;
vd_e[l] = de;
}
}
std::vector<int64_t> x_strides(x_dims.size(), 1);
for (int idx = x_strides.size() - 2; idx >= 0; idx--) {
x_strides[idx] = x_strides[idx + 1] * x_dims[idx + 1];
}
for (int i = 0; i < batch_size; i++) {
for (int j = 0; j < channel_size; j++) {
for (int k = 0; k < out_h; k++) {
for (int l = 0; l < out_w; l++) {
DType x0 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_n[k] * x_strides[2] + vx_w[l] * x_strides[3]];
DType x1 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_s[k] * x_strides[2] + vx_w[l] * x_strides[3]];
DType x2 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_n[k] * x_strides[2] + vx_e[l] * x_strides[3]];
DType x3 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_s[k] * x_strides[2] + vx_e[l] * x_strides[3]];
*out_data = x0 * vd_s[k] * vd_e[l] + x1 * vd_n[k] * vd_e[l] +
x2 * vd_s[k] * vd_w[l] + x3 * vd_n[k] * vd_w[l];
out_data++;
}
}
}
}
}
template <typename DType>
void nearest_interp_ref(const std::shared_ptr<operators::InterpolateOp> op) {
auto scope = op->scope();
auto op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
auto x_dims = x->dims();
CHECK_EQ(x_dims.size(), 4);
auto scale = op_info->GetAttr<float>("scale");
auto out_w = op_info->GetAttr<int>("out_w");
auto out_h = op_info->GetAttr<int>("out_h");
auto align_corners = op_info->GetAttr<bool>("align_corners");
// int align_mode = op_info->GetAttr<int>("align_mode");
auto interp_method = op_info->GetAttr<std::string>("interp_method");
CHECK_EQ(interp_method, "nearest");
int x_h = x_dims[2];
int x_w = x_dims[3];
if (scale > 0) {
out_h = static_cast<int>(x_h * scale);
out_w = static_cast<int>(x_w * scale);
}
if (op_info->HasInput("OutSize")) {
auto out_size_var_names = op_info->Input("OutSize");
if (out_size_var_names.size() > 0) {
auto out_size_var_name = out_size_var_names.front();
auto out_size =
scope->FindVar(out_size_var_name)->GetMutable<lite::Tensor>();
CHECK_EQ(out_size->numel(), 2);
auto out_size_data = out_size->mutable_data<int>();
out_h = out_size_data[0];
out_w = out_size_data[1];
}
}
CHECK_GT(out_h, 0);
CHECK_GT(out_w, 0);
out->Resize({x_dims[0], x_dims[1], out_h, out_w});
float ratio_h = 0.f;
float ratio_w = 0.f;
if (out_h > 1) {
ratio_h = align_corners ? static_cast<float>(x_h - 1.0) / (out_h - 1.0)
: static_cast<float>(x_h) / out_h;
}
if (out_w > 1) {
ratio_w = align_corners ? static_cast<float>(x_w - 1.0) / (out_w - 1.0)
: static_cast<float>(x_w) / out_w;
}
auto x_data = x->data<DType>();
auto out_data = out->mutable_data<DType>();
auto out_dims = out->dims();
std::vector<int64_t> x_strides(x_dims.size(), 1);
for (int idx = x_strides.size() - 2; idx >= 0; idx--) {
x_strides[idx] = x_strides[idx + 1] * x_dims[idx + 1];
}
for (int n = 0; n < out_dims[0]; n++) {
for (int c = 0; c < out_dims[1]; c++) {
for (int h = 0; h < out_dims[2]; h++) {
for (int w = 0; w < out_dims[3]; w++) {
int in_i = ratio_h * h;
int in_j = ratio_w * w;
if (align_corners) {
in_i = ratio_h * h + 0.5;
in_j = ratio_w * w + 0.5;
}
*out_data = x_data[n * x_strides[0] + c * x_strides[1] +
in_i * x_strides[2] + in_j * x_strides[3]];
out_data++;
}
}
}
}
}
void test_interpolate(int bs,
int ic,
int ih,
int iw,
int oh,
int ow,
float scale,
int out_size_h,
int out_size_w,
bool align_corners,
int align_mode,
std::string interp_method) {
// prepare input&output variables
Scope scope;
std::string x_var_name("x");
std::string out_size_var_name("out_size");
std::string out_var_name("out");
std::string out_ref_var_name("out_ref");
auto x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto out_size = scope.Var(out_size_var_name)->GetMutable<Tensor>();
auto out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
x->Resize({bs, ic, ih, iw});
out_size->Resize({2});
// initialize input&output data
FillTensor<float, int>(x);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType(interp_method + "_interp");
opdesc.SetInput("X", {x_var_name});
opdesc.SetOutput("Out", {out_var_name});
opdesc.SetAttr("out_h", oh);
opdesc.SetAttr("out_w", ow);
opdesc.SetAttr("scale", scale);
opdesc.SetAttr("align_corners", static_cast<bool>(align_corners));
opdesc.SetAttr("align_mode", static_cast<int>(align_mode));
opdesc.SetAttr("interp_method", interp_method);
if (out_size_h > 0 && out_size_w > 0) {
auto out_size_dims = out_size->dims();
CHECK_EQ(out_size_dims.size(), 1);
CHECK_EQ(out_size_dims.production(), 2);
auto out_size_data = out_size->mutable_data<int>();
out_size_data[0] = out_size_h;
out_size_data[1] = out_size_w;
opdesc.SetInput("OutSize", {out_size_var_name});
}
// create op and execute reference implementation
auto op = CreateOp<operators::InterpolateOp>(opdesc, &scope);
if (interp_method == "bilinear") {
bilinear_interp_ref<float>(op);
} else {
nearest_interp_ref<float>(op);
}
out_ref->CopyDataFrom(*out);
// convert op to NPU model, then run it on NPU
LauchOp(op, {x_var_name}, {out_var_name});
// compare results
auto out_dims = out->dims();
auto out_ref_dims = out_ref->dims();
CHECK_EQ(out_dims.size(), out_ref_dims.size());
for (int i = 0; i < out_dims.size(); i++) {
CHECK_EQ(out_dims[i], out_ref_dims[i]);
}
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
VLOG(5) << i;
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2f);
}
}
TEST(NPUBridges, bilinear_interp) {
#if 1
for (auto bs : {1, 3}) {
for (auto ic : {3, 4}) {
for (auto ih : {4, 5}) {
for (auto iw : {3, 6}) {
for (auto oh : {0, 3, 8}) {
for (auto ow : {0, 4, 9}) {
for (auto scale : {0.f, 0.5f, 0.6f, 2.0f, 2.2f}) {
for (auto out_size_h : {0, 3, 11}) {
for (auto out_size_w : {0, 2, 12}) {
for (auto align_corners : {true, false}) {
for (auto align_mode : {0, 1}) {
for (auto interp_method : {"bilinear", "nearest"}) {
int act_oh = 0, act_ow = 0;
if (out_size_h > 0 && out_size_w > 0) {
act_oh = out_size_h;
act_ow = out_size_w;
} else if (scale > 1e-5) {
act_oh = static_cast<int>(ih * scale);
act_ow = static_cast<int>(iw * scale);
} else if (oh > 0 && ow > 0) {
act_oh = oh;
act_ow = ow;
}
if (act_oh <= 0 || act_ow <= 0) {
continue;
}
// TODO(hong19860320) multiple=(ih*iw)/(oh*ow)
// should
// not exceed 7.0 in NPU DDK, delete the following
// lines
// if the limination is removed.
const float largest_multiple = 7.0f;
float multiple =
static_cast<float>(ih * iw) / (act_oh * act_ow);
if (multiple > largest_multiple) {
continue;
}
if (align_mode == 0 && !align_corners) {
continue;
}
VLOG(3) << "bs: " << bs << " ic: " << ic
<< " ih: " << ih << " iw: " << iw
<< " oh: " << oh << " ow: " << ow
<< " scale: " << scale
<< " out_size: " << out_size_h << ","
<< out_size_w
<< " align_corners: " << align_corners
<< " align_mode: " << align_mode;
test_interpolate(bs,
ic,
ih,
iw,
oh,
ow,
scale,
out_size_h,
out_size_w,
align_corners,
align_mode,
interp_method);
}
}
}
}
}
}
}
}
}
}
}
}
#else
test_interpolate(1, 1, 4, 3, 0, 0, 1.f, 3, 6, false, 1, "nearest");
#endif
}
} // namespace bridges
} // namespace npu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(bilinear_interp);
USE_NPU_BRIDGE(bilinear_interp);
USE_LITE_OP(nearest_interp);
USE_NPU_BRIDGE(nearest_interp);
...@@ -35,8 +35,7 @@ bool InterpolateOp::CheckShape() const { ...@@ -35,8 +35,7 @@ bool InterpolateOp::CheckShape() const {
} }
bool InterpolateOp::InferShape() const { bool InterpolateOp::InferShape() const {
auto* X = param_.X; auto X = param_.X;
auto* OutSize = param_.OutSize;
int n = X->dims()[0]; int n = X->dims()[0];
int c = X->dims()[1]; int c = X->dims()[1];
...@@ -46,39 +45,40 @@ bool InterpolateOp::InferShape() const { ...@@ -46,39 +45,40 @@ bool InterpolateOp::InferShape() const {
int out_w; int out_w;
auto SizeTensor = param_.SizeTensor; auto SizeTensor = param_.SizeTensor;
auto OutSize = param_.OutSize;
auto Scale = param_.Scale;
if (!SizeTensor.empty()) { if (!SizeTensor.empty()) {
CHECK(SizeTensor.size() == 2) CHECK_EQ(SizeTensor.size(), 2)
<< "Input(SizeTensor)'size of Op(interpolate) must be 2. " << "Input(SizeTensor)'size of Op(interpolate) must be 2. "
"Attr(out_shape)'s length must be 2 for 4-D input tensor."; "Attr(out_shape)'s length must be 2 for 4-D input tensor.";
out_h = SizeTensor[0]->data<int>()[0];
out_w = SizeTensor[1]->data<int>()[0];
} else if (OutSize) {
auto OutSize_dims = OutSize->dims();
CHECK_EQ(OutSize_dims.size(), 1) << "Input(OutSize)'s dims size must be 1";
CHECK_EQ(OutSize_dims[0], 2) << "OutSize's dim[0] must be 2";
auto OutSize_data = OutSize->data<int>();
out_h = OutSize_data[0];
out_w = OutSize_data[1];
} else if (param_.out_h > 0 && param_.out_w > 0) {
out_h = param_.out_h; out_h = param_.out_h;
out_w = param_.out_w; out_w = param_.out_w;
param_.Out->Resize({n, c, out_h, out_w}); } else {
return true; float scale = -1.f;
}
auto Scale = param_.Scale;
if (Scale) { if (Scale) {
auto scale_dims = Scale->dims(); auto Scale_dims = Scale->dims();
CHECK(scale_dims.size() == 1) << "Scale's dimension size must be 1."; CHECK_EQ(Scale_dims.size(), 1) << "Scale's dimension size must be 1.";
out_h = -1; scale = Scale->data<float>()[0];
out_w = -1;
} else { } else {
auto scale = param_.scale; scale = param_.scale;
if (scale > 0) { }
CHECK(scale > 0) << "scale must large than 0.";
out_h = static_cast<int>(h * scale); out_h = static_cast<int>(h * scale);
out_w = static_cast<int>(w * scale); out_w = static_cast<int>(w * scale);
out_h = out_h > 0 ? out_h : -1;
out_w = out_w > 0 ? out_w : -1;
} else {
out_h = param_.out_h;
out_w = param_.out_w;
}
} }
if (OutSize != nullptr) {
auto out_lod = param_.Out->mutable_lod(); auto out_lod = param_.Out->mutable_lod();
*out_lod = param_.X->lod(); *out_lod = param_.X->lod();
}
param_.Out->Resize({n, c, out_h, out_w}); param_.Out->Resize({n, c, out_h, out_w});
return true; return true;
......
...@@ -25,7 +25,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_ ...@@ -25,7 +25,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_
#lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
...@@ -59,8 +59,7 @@ endif() ...@@ -59,8 +59,7 @@ endif()
lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_bilinear_interp_compute SRCS bilinear_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_interp_compute SRCS interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_nearest_interp_compute SRCS nearest_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <string>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
inline std::vector<int> get_new_shape(
std::vector<const lite::Tensor*> list_new_shape_tensor) {
// get tensor from
std::vector<int> vec_new_shape;
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
auto tensor = list_new_shape_tensor[i];
vec_new_shape.push_back(static_cast<int32_t>(*(tensor->data<int32_t>())));
}
return vec_new_shape;
}
template <typename T>
inline std::vector<T> get_new_data_from_tensor(const Tensor* new_data_tensor) {
std::vector<T> vec_new_data;
auto* new_data = new_data_tensor->data<T>();
lite::Tensor cpu_starts_tensor;
vec_new_data =
std::vector<T>(new_data, new_data + new_data_tensor->dims().production());
return vec_new_data;
}
template <typename dtype>
void resize_bilinear_align(std::vector<const lite::Tensor*> inputs,
lite::Tensor* output) {
int hin = inputs[0]->dims()[2];
int win = inputs[0]->dims()[3];
int channels = inputs[0]->dims()[1];
int num = inputs[0]->dims()[0];
int hout = output->dims()[2];
int wout = output->dims()[3];
dtype scale_w = static_cast<dtype>(win - 1) / (wout - 1);
dtype scale_h = static_cast<dtype>(hin - 1) / (hout - 1);
const dtype* src = inputs[0]->data<dtype>();
dtype* dst = output->mutable_data<dtype>();
int dst_stride_w = 1;
int dst_stride_h = wout;
int dst_stride_c = wout * hout;
int dst_stride_batch = wout * hout * channels;
int src_stride_w = 1;
int src_stride_h = win;
int src_stride_c = win * hin;
int src_stride_batch = win * hin * channels;
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
int src_index = n * src_stride_batch + c * src_stride_c;
for (int h = 0; h < hout; ++h) {
for (int w = 0; w < wout; ++w) {
dtype fw = w * scale_w;
dtype fh = h * scale_h;
int w_start = static_cast<int>(fw);
int w_id = w_start < win - 1 ? 1 : 0;
int w_end = static_cast<int>(fw + w_id);
int h_start = static_cast<int>(fh);
int h_id = h_start < hin - 1 ? 1 : 0;
int h_end = static_cast<int>(fh + h_id);
fw -= w_start;
fh -= h_start;
const dtype w00 = (1.0 - fh) * (1.0 - fw);
const dtype w01 = fw * (1.0 - fh);
const dtype w10 = fh * (1.0 - fw);
const dtype w11 = fw * fh;
dtype tl =
src[src_index + w_start * src_stride_w + h_start * src_stride_h];
dtype tr =
src[src_index + w_end * src_stride_w + h_start * src_stride_h];
dtype bl =
src[src_index + w_start * src_stride_w + h_end * src_stride_h];
dtype br =
src[src_index + w_end * src_stride_w + h_end * src_stride_h];
int dst_index = n * dst_stride_batch + c * dst_stride_c +
h * dst_stride_h + w * dst_stride_w;
dst[dst_index] =
static_cast<dtype>(w00 * tl + w01 * tr + w10 * bl + w11 * br);
}
}
}
}
}
template <typename dtype>
void resize_bilinear_no_align(std::vector<const lite::Tensor*> inputs,
lite::Tensor* output) {
int hin = inputs[0]->dims()[2];
int win = inputs[0]->dims()[3];
int channels = inputs[0]->dims()[1];
int num = inputs[0]->dims()[0];
int hout = output->dims()[2];
int wout = output->dims()[3];
dtype scale_w = static_cast<dtype>(win) / (wout);
dtype scale_h = static_cast<dtype>(hin) / (hout);
const dtype* src = inputs[0]->data<dtype>();
dtype* dst = output->mutable_data<dtype>();
int dst_stride_w = 1;
int dst_stride_h = wout;
int dst_stride_c = wout * hout;
int dst_stride_batch = wout * hout * channels;
int src_stride_w = 1;
int src_stride_h = win;
int src_stride_c = win * hin;
int src_stride_batch = win * hin * channels;
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
int src_index = n * src_stride_batch + c * src_stride_c;
for (int h = 0; h < hout; ++h) {
for (int w = 0; w < wout; ++w) {
dtype fw = scale_w * (w + 0.5f) - 0.5f;
fw = (fw < 0) ? 0 : fw;
dtype fh = scale_h * (h + 0.5f) - 0.5f;
fh = (fh < 0) ? 0 : fh;
int w_start = static_cast<int>(fw);
int w_id = w_start < win - 1 ? 1 : 0;
int w_end = static_cast<int>(fw + w_id);
int h_start = static_cast<int>(fh);
int h_id = h_start < hin - 1 ? 1 : 0;
int h_end = static_cast<int>(fh + h_id);
fw -= w_start;
fh -= h_start;
const dtype w00 = (1.0 - fh) * (1.0 - fw);
const dtype w01 = fw * (1.0 - fh);
const dtype w10 = fh * (1.0 - fw);
const dtype w11 = fw * fh;
dtype tl =
src[src_index + w_start * src_stride_w + h_start * src_stride_h];
dtype tr =
src[src_index + w_end * src_stride_w + h_start * src_stride_h];
dtype bl =
src[src_index + w_start * src_stride_w + h_end * src_stride_h];
dtype br =
src[src_index + w_end * src_stride_w + h_end * src_stride_h];
int dst_index = n * dst_stride_batch + c * dst_stride_c +
h * dst_stride_h + w * dst_stride_w;
dst[dst_index] =
static_cast<dtype>(w00 * tl + w01 * tr + w10 * bl + w11 * br);
}
}
}
}
}
class BilinearInterpComputeTester : public arena::TestCase {
protected:
// common attributes for this op.
std::string input0_ = "X";
std::string sizetensor0_ = "SizeTensor0";
std::string sizetensor1_ = "SizeTensor1";
std::string input_scale_ = "Scale";
std::string input1_ = "OutSize";
std::string output_ = "Out";
float height_scale_ = 0.f;
float width_scale_ = 0.f;
int out_height_ = -1;
int out_width_ = -1;
int outsize_height_ = -1;
int outsize_width_ = -1;
bool align_corners_ = true;
std::string interp_method_ = "Bilinear";
DDim _dims0_{{1, 1, 16, 16}};
DDim _dims1_{{2}};
DDim sizetensor_dims_{{1}};
DDim scale_dims_{{1}};
public:
BilinearInterpComputeTester(const Place& place,
const std::string& alias,
float scale,
int out_height,
int out_width,
int outsize_height,
int outsize_width,
bool align_corners,
std::string interp_method)
: TestCase(place, alias),
height_scale_(scale),
width_scale_(scale),
out_height_(out_height),
out_width_(out_width),
outsize_height_(outsize_height),
outsize_width_(outsize_width),
align_corners_(align_corners),
interp_method_(interp_method) {}
void RunBaseline(Scope* scope) override {
width_scale_ = height_scale_;
std::vector<const lite::Tensor*> inputs;
inputs.emplace_back(scope->FindTensor(input0_));
if (outsize_height_ > 0 && outsize_width_ > 0) {
inputs.emplace_back(scope->FindTensor(input1_));
}
std::vector<const lite::Tensor*> SizeTensor;
if (outsize_height_ > 0 && outsize_width_ > 0) {
SizeTensor.emplace_back(scope->FindTensor(sizetensor0_));
SizeTensor.emplace_back(scope->FindTensor(sizetensor1_));
}
const lite::Tensor* input_scale = scope->FindTensor(input_scale_);
float scale = height_scale_;
int in_h = inputs[0]->dims()[2];
int in_w = inputs[0]->dims()[3];
if (SizeTensor.size() > 0) {
auto new_size = get_new_shape(SizeTensor);
out_height_ = new_size[0];
out_width_ = new_size[1];
} else {
auto scale_tensor = input_scale;
if (scale_tensor != nullptr) {
auto scale_data = get_new_data_from_tensor<float>(scale_tensor);
scale = scale_data[0];
}
if (scale > 0) {
out_height_ = static_cast<int>(in_h * scale);
out_width_ = static_cast<int>(in_w * scale);
}
if (inputs.size() > 1) {
auto out_size = inputs[1];
auto out_size_data = get_new_data_from_tensor<int>(out_size);
out_height_ = out_size_data[0];
out_width_ = out_size_data[1];
}
}
height_scale_ = scale;
width_scale_ = scale;
if (out_width_ != -1 && out_height_ != -1) {
height_scale_ = static_cast<float>(out_height_ / inputs[0]->dims()[2]);
width_scale_ = static_cast<float>(out_width_ / inputs[0]->dims()[3]);
}
auto* outputs = scope->NewTensor(output_);
CHECK(outputs);
int num_cout = inputs[0]->dims()[0];
int c_cout = inputs[0]->dims()[1];
outputs->Resize({num_cout, c_cout, out_height_, out_width_});
if (align_corners_) {
resize_bilinear_align<float>(inputs, outputs);
} else {
resize_bilinear_no_align<float>(inputs, outputs);
}
}
void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType("bilinear_interp");
op_desc->SetInput("X", {input0_});
if (outsize_height_ > 0 && outsize_width_ > 0) {
op_desc->SetInput("OutSize", {input1_});
op_desc->SetInput("SizeTensor", {sizetensor0_, sizetensor1_});
}
if (height_scale_ > 0) {
op_desc->SetInput("Scale", {input_scale_});
}
op_desc->SetOutput("Out", {output_});
op_desc->SetAttr("scale", height_scale_);
op_desc->SetAttr("out_h", out_height_);
op_desc->SetAttr("out_w", out_width_);
op_desc->SetAttr("align_corners", align_corners_);
op_desc->SetAttr("interp_method", interp_method_);
}
void PrepareData() override {
std::vector<float> data0(_dims0_.production());
for (int i = 0; i < _dims0_.production(); i++) {
data0[i] = i * 1.1;
}
SetCommonTensor(input0_, _dims0_, data0.data());
if (outsize_height_ > 0 && outsize_width_ > 0) {
std::vector<int> data1(2);
data1[0] = outsize_height_;
data1[1] = outsize_width_;
SetCommonTensor(input1_, _dims1_, data1.data());
std::vector<int> sizetensor_data(1);
sizetensor_data[0] = outsize_height_;
SetCommonTensor(sizetensor0_, sizetensor_dims_, sizetensor_data.data());
sizetensor_data[0] = outsize_width_;
SetCommonTensor(sizetensor1_, sizetensor_dims_, sizetensor_data.data());
}
if (height_scale_ > 0) {
std::vector<float> scale_data(1);
scale_data[0] = height_scale_;
SetCommonTensor(input_scale_, scale_dims_, scale_data.data());
}
}
};
void test_bilinear_interp(Place place) {
std::string interp_method = "Bilinear";
for (float scale : {2., 1., 0.3}) {
for (bool align_corners : {true, false}) {
std::unique_ptr<arena::TestCase> tester(new BilinearInterpComputeTester(
place, "def", scale, -1, -1, -1, -1, align_corners, interp_method));
arena::Arena arena(std::move(tester), place, 5e-5);
arena.TestPrecision();
}
}
for (int out_height : {8, 16, 24}) {
for (int out_width : {8, 16, 24}) {
for (bool align_corners : {true, false}) {
std::unique_ptr<arena::TestCase> tester(
new BilinearInterpComputeTester(place,
"def",
0,
out_height,
out_width,
-1,
-1,
align_corners,
interp_method));
arena::Arena arena(std::move(tester), place, 5e-5);
arena.TestPrecision();
}
}
}
for (int outsize_height : {8, 16, 24}) {
for (int outsize_width : {8, 16, 24}) {
for (bool align_corners : {true, false}) {
std::unique_ptr<arena::TestCase> tester(
new BilinearInterpComputeTester(place,
"def",
0,
-1,
-1,
outsize_height,
outsize_width,
align_corners,
interp_method));
arena::Arena arena(std::move(tester), place, 5e-5);
arena.TestPrecision();
}
}
}
}
TEST(BilinearInterp, precision) {
// #ifdef LITE_WITH_X86
// Place place(TARGET(kX86));
// #endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_bilinear_interp(place);
#endif
}
} // namespace lite
} // namespace paddle
...@@ -142,35 +142,29 @@ class ConcateComputeTester : public arena::TestCase { ...@@ -142,35 +142,29 @@ class ConcateComputeTester : public arena::TestCase {
TEST(Concat, precision) { TEST(Concat, precision) {
LOG(INFO) << "test concat op, kARM"; LOG(INFO) << "test concat op, kARM";
#ifdef LITE_WITH_ARM Place place;
Place place(TARGET(kARM)); float abs_error = 2e-5;
for (int axis : {1, 2}) { #if defined(LITE_WITH_NPU)
for (bool is_use_axis_tensor : {false, true}) { place = TARGET(kNPU);
LOG(INFO) << "axis:" << axis abs_error = 1e-2; // use fp16 in npu
<< ", is_use_axis_tensor:" << is_use_axis_tensor; #elif defined(LITE_WITH_ARM)
std::unique_ptr<arena::TestCase> tester( place = TARGET(kARM);
new ConcateComputeTester(place, "def", axis, is_use_axis_tensor)); #elif defined(LITE_WITH_X86)
arena::Arena arena(std::move(tester), place, 2e-5); place = TARGET(kX86);
arena.TestPrecision(); #else
} return;
}
#endif #endif
#ifdef LITE_WITH_X86
Place place(TARGET(kX86));
LOG(INFO) << "test concate op, x86";
for (int axis : {1, 2}) { for (int axis : {1, 2}) {
for (bool is_use_axis_tensor : {false, true}) { for (bool is_use_axis_tensor : {false, true}) {
LOG(INFO) << "axis:" << axis LOG(INFO) << "axis:" << axis
<< ", is_use_axis_tensor:" << is_use_axis_tensor; << ", is_use_axis_tensor:" << is_use_axis_tensor;
std::unique_ptr<arena::TestCase> tester( std::unique_ptr<arena::TestCase> tester(
new ConcateComputeTester(place, "def", axis, is_use_axis_tensor)); new ConcateComputeTester(place, "def", axis, is_use_axis_tensor));
arena::Arena arena(std::move(tester), place, 2e-5); arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision(); arena.TestPrecision();
} }
} }
#endif
} }
} // namespace lite } // namespace lite
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <string>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/core/tensor.h"
#include "lite/tests/utils/fill_data.h"
namespace paddle {
namespace lite {
template <typename dtype>
void ResizeNearestAlign(const lite::Tensor* x,
lite::Tensor* out,
bool with_align) {
auto x_dims = x->dims();
int num = x_dims[0];
int channels = x_dims[1];
int hin = x_dims[2];
int win = x_dims[3];
int hout = out->dims()[2];
int wout = out->dims()[3];
dtype scale_w = (with_align) ? (static_cast<float>(win - 1) / (wout - 1))
: (static_cast<float>(win) / (wout));
dtype scale_h = (with_align) ? (static_cast<float>(hin - 1) / (hout - 1))
: (static_cast<float>(hin) / (hout));
const dtype* src = x->data<dtype>();
dtype* dst = out->mutable_data<dtype>();
int dst_stride_w = 1;
int dst_stride_h = wout;
int dst_stride_c = wout * hout;
int dst_stride_batch = wout * hout * channels;
int src_stride_w = 1;
int src_stride_h = win;
int src_stride_c = win * hin;
int src_stride_batch = win * hin * channels;
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
int src_index = n * src_stride_batch + c * src_stride_c;
for (int h = 0; h < hout; ++h) {
for (int w = 0; w < wout; ++w) {
int fw = (with_align) ? static_cast<int>(scale_w * w + 0.5)
: static_cast<int>(scale_w * w);
fw = (fw < 0) ? 0 : fw;
int fh = (with_align) ? static_cast<int>(scale_h * h + 0.5)
: static_cast<int>(scale_h * h);
fh = (fh < 0) ? 0 : fh;
int w_start = static_cast<int>(fw);
int h_start = static_cast<int>(fh);
int dst_index = n * dst_stride_batch + c * dst_stride_c +
h * dst_stride_h + w * dst_stride_w;
dst[dst_index] =
src[src_index + w_start * src_stride_w + h_start * src_stride_h];
}
}
}
}
}
template <typename DType>
void BilinearInterpRef(const lite::Tensor* x,
lite::Tensor* out,
bool align_corners,
int align_mode) {
auto x_dims = x->dims();
int batch_size = x_dims[0];
int channel_size = x_dims[1];
auto x_h = x_dims[2];
auto x_w = x_dims[3];
CHECK_EQ(x_dims.size(), 4);
auto out_dims = out->dims();
int out_h = out_dims[2];
int out_w = out_dims[3];
// copy from x if no change
if (x_h == out_h && x_w == out_w) {
out->CopyDataFrom(*x);
return;
}
float ratio_h = 0.f;
float ratio_w = 0.f;
if (out_h > 1) {
ratio_h = (align_corners) ? static_cast<float>(x_h - 1) / (out_h - 1)
: static_cast<float>(x_h) / out_h;
}
if (out_w > 1) {
ratio_w = (align_corners) ? static_cast<float>(x_w - 1) / (out_w - 1)
: static_cast<float>(x_w) / out_w;
}
// naive bilinear interpolation
auto x_data = x->data<DType>();
auto out_data = out->mutable_data<DType>();
bool align_flag = (align_mode == 0 && !align_corners);
std::vector<int> vy_n, vy_s;
std::vector<float> vd_n, vd_s;
vy_n.reserve(out_h);
vy_s.reserve(out_h);
vd_n.reserve(out_h);
vd_s.reserve(out_h);
for (int k = 0; k < out_h; k++) {
int yn = align_flag ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
: static_cast<int>(ratio_h * k);
yn = (yn > 0) ? yn : 0;
int ys = (yn + 1) < (x_h - 1) ? (yn + 1) : (x_h - 1);
float idx_src_y = ratio_h * (k + 0.5) - 0.5;
idx_src_y = (idx_src_y > 0) ? idx_src_y : 0;
float dn = align_flag ? idx_src_y - yn : ratio_h * k - yn;
float ds = 1.f - dn;
{
vy_n[k] = yn;
vy_s[k] = ys;
vd_n[k] = dn;
vd_s[k] = ds;
}
}
std::vector<int> vx_w, vx_e;
std::vector<float> vd_w, vd_e;
vx_w.reserve(out_w);
vx_e.reserve(out_w);
vd_w.reserve(out_w);
vd_e.reserve(out_w);
for (int l = 0; l < out_w; l++) {
int xw = align_flag ? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
: static_cast<int>(ratio_w * l);
xw = (xw > 0) ? xw : 0;
int xe = (xw + 1) < (x_w - 1) ? (xw + 1) : (x_w - 1);
float idx_src_x = ratio_w * (l + 0.5) - 0.5;
idx_src_x = (idx_src_x > 0) ? idx_src_x : 0;
float dw = align_flag ? idx_src_x - xw : ratio_w * l - xw;
float de = 1.f - dw;
{
vx_w[l] = xw;
vx_e[l] = xe;
vd_w[l] = dw;
vd_e[l] = de;
}
}
std::vector<int64_t> x_strides(x_dims.size(), 1);
for (int idx = x_strides.size() - 2; idx >= 0; idx--) {
x_strides[idx] = x_strides[idx + 1] * x_dims[idx + 1];
}
for (int i = 0; i < batch_size; i++) {
for (int j = 0; j < channel_size; j++) {
for (int k = 0; k < out_h; k++) {
for (int l = 0; l < out_w; l++) {
DType x0 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_n[k] * x_strides[2] + vx_w[l] * x_strides[3]];
DType x1 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_s[k] * x_strides[2] + vx_w[l] * x_strides[3]];
DType x2 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_n[k] * x_strides[2] + vx_e[l] * x_strides[3]];
DType x3 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_s[k] * x_strides[2] + vx_e[l] * x_strides[3]];
*out_data = x0 * vd_s[k] * vd_e[l] + x1 * vd_n[k] * vd_e[l] +
x2 * vd_s[k] * vd_w[l] + x3 * vd_n[k] * vd_w[l];
out_data++;
}
}
}
}
}
class NearestInterpComputeTester : public arena::TestCase {
protected:
// common attributes for this op.
std::string x_ = "X";
std::string sizetensor0_ = "SizeTensor0";
std::string sizetensor1_ = "SizeTensor1";
std::string input_scale_ = "Scale";
std::string outsize_ = "OutSize";
std::string out_ = "Out";
DDim dims_{{1, 2, 3, 4}};
std::string interp_method_ = "nearest";
float scale_ = -1.f;
int out_h_ = -1;
int out_w_ = -1;
bool align_corners_ = true;
int align_mode_ = 1;
bool use_sizetensor_ = false;
bool use_input_scale_ = false;
bool use_outsize_ = false;
public:
NearestInterpComputeTester(const Place& place,
const std::string& alias,
DDim dims,
std::string interp_method = "nearest",
float scale = -1.f,
int out_h = -1,
int out_w = -1,
bool align_corners = true,
int align_mode = 1,
bool use_sizetensor = false,
bool use_input_scale = false,
bool use_outsize = false)
: TestCase(place, alias),
dims_(dims),
interp_method_(interp_method),
scale_(scale),
out_h_(out_h),
out_w_(out_w),
align_corners_(align_corners),
align_mode_(align_mode),
use_sizetensor_(use_sizetensor),
use_input_scale_(use_input_scale),
use_outsize_(use_outsize) {}
void RunBaseline(Scope* scope) override {
int out_h = out_h_;
int out_w = out_w_;
if (scale_ > 0) {
out_h = dims_[2] * scale_;
out_w = dims_[3] * scale_;
}
auto input = scope->FindTensor(x_);
auto output = scope->NewTensor(out_);
std::vector<int64_t> out_shape{dims_[0], dims_[1], out_h, out_w};
output->Resize(out_shape);
if (interp_method_ == "nearest") {
ResizeNearestAlign<float>(input, output, align_corners_);
} else if (interp_method_ == "bilinear") {
BilinearInterpRef<float>(input, output, align_corners_, align_mode_);
}
}
void PrepareOpDesc(cpp::OpDesc* op_desc) {
if (interp_method_ == "nearest") {
op_desc->SetType("nearest_interp");
} else if (interp_method_ == "bilinear") {
op_desc->SetType("bilinear_interp");
} else {
LOG(FATAL) << "unsupport";
}
op_desc->SetInput("X", {x_});
if (use_sizetensor_) {
op_desc->SetInput("SizeTensor", {sizetensor0_, sizetensor1_});
}
if (use_input_scale_) {
op_desc->SetInput("Scale", {input_scale_});
}
if (use_outsize_) {
op_desc->SetInput("OutSize", {outsize_});
}
op_desc->SetOutput("Out", {out_});
op_desc->SetAttr("scale", scale_);
op_desc->SetAttr("out_h", out_h_);
op_desc->SetAttr("out_w", out_w_);
op_desc->SetAttr("align_corners", align_corners_);
op_desc->SetAttr("align_mode", align_mode_);
op_desc->SetAttr("interp_method", interp_method_);
}
void PrepareData() override {
std::vector<float> din(dims_.production());
fill_data_rand(din.data(), -1.f, 1.f, dims_.production());
SetCommonTensor(x_, dims_, din.data());
if (use_sizetensor_) {
DDim sizetensor_dims(std::vector<int64_t>{1});
std::vector<int> dsizetensor0{out_h_};
std::vector<int> dsizetensor1{out_w_};
SetCommonTensor(
sizetensor0_, sizetensor_dims, dsizetensor0.data(), {}, true);
SetCommonTensor(
sizetensor1_, sizetensor_dims, dsizetensor1.data(), {}, true);
}
if (use_input_scale_) {
DDim input_scale_dims(std::vector<int64_t>{1});
std::vector<float> dinput_scale{scale_};
SetCommonTensor(
input_scale_, input_scale_dims, dinput_scale.data(), {}, true);
}
if (use_outsize_) {
DDim outsize_dims(std::vector<int64_t>{2});
std::vector<int> doutsize{out_h_, out_w_};
SetCommonTensor(outsize_, outsize_dims, doutsize.data(), {}, true);
}
}
};
void TestInterpOuthw(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (auto interp_method : std::vector<std::string>{"nearest", "bilinear"}) {
for (int out_h : {6, 8, 12}) {
for (int out_w : {6, 9, 12}) {
std::unique_ptr<arena::TestCase> tester(
new NearestInterpComputeTester(place,
"def",
DDim(x_dims),
interp_method,
-1.f,
out_h,
out_w));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
}
void TestInterpScale(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (auto interp_method : std::vector<std::string>{"nearest", "bilinear"}) {
for (float scale : {0.3f, 1.f, 1.7f}) {
std::unique_ptr<arena::TestCase> tester(new NearestInterpComputeTester(
place, "def", DDim(x_dims), interp_method, scale));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
void TestInterpSizetensor(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (auto interp_method : std::vector<std::string>{"nearest", "bilinear"}) {
std::unique_ptr<arena::TestCase> tester(
new NearestInterpComputeTester(place,
"def",
DDim(x_dims),
interp_method,
-1.f,
10,
12,
true,
1,
true,
false,
false));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
void TestInterpInputScale(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (auto interp_method : std::vector<std::string>{"nearest", "bilinear"}) {
std::unique_ptr<arena::TestCase> tester(
new NearestInterpComputeTester(place,
"def",
DDim(x_dims),
interp_method,
0.7,
-1,
-1,
true,
1,
false,
true,
false));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
void TestInterpOutsize(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (auto interp_method : std::vector<std::string>{"nearest", "bilinear"}) {
std::unique_ptr<arena::TestCase> tester(
new NearestInterpComputeTester(place,
"def",
DDim(x_dims),
interp_method,
-1,
4,
4,
true,
1,
false,
false,
true));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
void TestInterpAlignCorners(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (bool align_corners : {true, false}) {
std::unique_ptr<arena::TestCase> tester(new NearestInterpComputeTester(
place, "def", DDim(x_dims), "nearest", 0.4, -1, -1, align_corners));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
void TestInterpAlignMode(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (bool align_corners : {true, false}) {
for (int align_mode : {0, 1}) {
// may exist bug in arm kernel
if (place == TARGET(kARM) && align_mode == 1 && !align_corners) {
continue;
}
std::unique_ptr<arena::TestCase> tester(
new NearestInterpComputeTester(place,
"def",
DDim(x_dims),
"bilinear",
0.7,
-1,
-1,
align_corners,
align_mode));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
TEST(Interp, precision) {
Place place;
float abs_error = 2e-5;
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#else
return;
#endif
TestInterpOuthw(place, abs_error);
TestInterpScale(place, abs_error);
TestInterpSizetensor(place, abs_error);
TestInterpInputScale(place, abs_error);
TestInterpOutsize(place, abs_error);
TestInterpAlignCorners(place, abs_error);
TestInterpAlignMode(place, abs_error);
}
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <string>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
inline std::vector<int> get_new_shape(
const std::vector<const lite::Tensor*>& list_new_shape_tensor) {
// get tensor from
std::vector<int> vec_new_shape;
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
auto tensor = list_new_shape_tensor[i];
vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
}
return vec_new_shape;
}
template <typename T>
inline std::vector<T> get_new_data_from_tensor(const Tensor* new_data_tensor) {
std::vector<T> vec_new_data;
auto* new_data = new_data_tensor->data<T>();
lite::Tensor cpu_starts_tensor;
vec_new_data =
std::vector<T>(new_data, new_data + new_data_tensor->dims().production());
return vec_new_data;
}
template <typename dtype>
void resize_nearest_align(std::vector<const lite::Tensor*> inputs,
lite::Tensor* output,
bool with_align) {
int hin = inputs[0]->dims()[2];
int win = inputs[0]->dims()[3];
int channels = inputs[0]->dims()[1];
int num = inputs[0]->dims()[0];
int hout = output->dims()[2];
int wout = output->dims()[3];
dtype scale_w = (with_align) ? (static_cast<float>(win - 1) / (wout - 1))
: (static_cast<float>(win) / (wout));
dtype scale_h = (with_align) ? (static_cast<float>(hin - 1) / (hout - 1))
: (static_cast<float>(hin) / (hout));
const dtype* src = inputs[0]->data<dtype>();
dtype* dst = output->mutable_data<dtype>();
int dst_stride_w = 1;
int dst_stride_h = wout;
int dst_stride_c = wout * hout;
int dst_stride_batch = wout * hout * channels;
int src_stride_w = 1;
int src_stride_h = win;
int src_stride_c = win * hin;
int src_stride_batch = win * hin * channels;
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
int src_index = n * src_stride_batch + c * src_stride_c;
for (int h = 0; h < hout; ++h) {
for (int w = 0; w < wout; ++w) {
int fw = (with_align) ? static_cast<int>(scale_w * w + 0.5)
: static_cast<int>(scale_w * w);
fw = (fw < 0) ? 0 : fw;
int fh = (with_align) ? static_cast<int>(scale_h * h + 0.5)
: static_cast<int>(scale_h * h);
fh = (fh < 0) ? 0 : fh;
int w_start = static_cast<int>(fw);
int h_start = static_cast<int>(fh);
int dst_index = n * dst_stride_batch + c * dst_stride_c +
h * dst_stride_h + w * dst_stride_w;
dst[dst_index] =
src[src_index + w_start * src_stride_w + h_start * src_stride_h];
}
}
}
}
}
class NearestInterpComputeTester : public arena::TestCase {
protected:
// common attributes for this op.
std::string input0_ = "X";
std::string sizetensor0_ = "SizeTensor0";
std::string sizetensor1_ = "SizeTensor1";
std::string input_scale_ = "Scale";
std::string input1_ = "OutSize";
std::string output_ = "Out";
float height_scale_ = 0.0f;
float width_scale_ = 0.0f;
int out_height_ = -1;
int out_width_ = -1;
bool align_corners_ = true;
std::string interp_method_ = "Nearest";
DDim dims_{{2, 3}};
DDim _dims0_{{2, 3, 3, 2}};
DDim _dims1_{{2}};
DDim sizetensor_dims_{{1}};
DDim scale_dims_{{1}};
public:
NearestInterpComputeTester(const Place& place,
const std::string& alias,
float height_scale,
float width_scale,
int out_height,
int out_width,
bool align_corners,
std::string interp_method)
: TestCase(place, alias),
height_scale_(height_scale),
width_scale_(width_scale),
out_height_(out_height),
out_width_(out_width),
align_corners_(align_corners),
interp_method_(interp_method) {}
void RunBaseline(Scope* scope) override {
width_scale_ = height_scale_;
auto* outputs = scope->NewTensor(output_);
CHECK(outputs);
outputs->Resize(dims_);
std::vector<const lite::Tensor*> inputs;
inputs.emplace_back(scope->FindTensor(input0_));
inputs.emplace_back(scope->FindTensor(input1_));
std::vector<const lite::Tensor*> SizeTensor(2);
SizeTensor[0] = scope->FindTensor(sizetensor0_);
SizeTensor[1] = scope->FindTensor(sizetensor1_);
const lite::Tensor* input_scale = scope->FindTensor(input_scale_);
float scale = height_scale_;
int in_h = inputs[0]->dims()[2];
int in_w = inputs[0]->dims()[3];
if (SizeTensor.size() > 0) {
auto new_size = get_new_shape(SizeTensor);
out_height_ = new_size[0];
out_width_ = new_size[1];
} else {
auto scale_tensor = input_scale;
if (scale_tensor != nullptr) {
auto scale_data = get_new_data_from_tensor<float>(scale_tensor);
scale = scale_data[0];
}
if (scale > 0) {
out_height_ = static_cast<int>(in_h * scale);
out_width_ = static_cast<int>(in_w * scale);
}
auto out_size = inputs[1];
if (out_size != nullptr) {
auto out_size_data = get_new_data_from_tensor<int>(out_size);
out_height_ = out_size_data[0];
out_width_ = out_size_data[1];
}
}
height_scale_ = scale;
width_scale_ = scale;
if (out_width_ != -1 && out_height_ != -1) {
height_scale_ = static_cast<float>(out_height_ / inputs[0]->dims()[2]);
width_scale_ = static_cast<float>(out_width_ / inputs[0]->dims()[3]);
}
int num_cout = inputs[0]->dims()[0];
int c_cout = inputs[0]->dims()[1];
outputs->Resize({num_cout, c_cout, out_height_, out_width_});
resize_nearest_align<float>(inputs, outputs, align_corners_);
}
void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType("nearest_interp");
op_desc->SetInput("X", {input0_});
op_desc->SetInput("SizeTensor", {sizetensor0_, sizetensor1_});
op_desc->SetInput("Scale", {input_scale_});
op_desc->SetInput("OutSize", {input1_});
op_desc->SetOutput("Out", {output_});
op_desc->SetAttr("scale", height_scale_);
op_desc->SetAttr("out_h", out_height_);
op_desc->SetAttr("out_w", out_width_);
op_desc->SetAttr("align_corners", align_corners_);
op_desc->SetAttr("interp_method", interp_method_);
}
void PrepareData() override {
std::vector<float> data0(_dims0_.production());
for (int i = 0; i < _dims0_.production(); i++) {
data0[i] = i * 1.1;
}
std::vector<int> data1(_dims1_.production());
for (int i = 0; i < _dims1_.production(); i++) {
data1[i] = (i + 1) * 2;
}
SetCommonTensor(input0_, _dims0_, data0.data());
SetCommonTensor(input1_, _dims1_, data1.data());
std::vector<int> sizetensor_data(1);
sizetensor_data[0] = out_height_;
SetCommonTensor(sizetensor0_, sizetensor_dims_, sizetensor_data.data());
sizetensor_data[0] = out_width_;
SetCommonTensor(sizetensor1_, sizetensor_dims_, sizetensor_data.data());
std::vector<float> scale_data(1);
scale_data[0] = height_scale_;
SetCommonTensor(input_scale_, scale_dims_, scale_data.data());
}
};
void test_nearest_interp(Place place) {
std::string interp_method = "Nearest";
for (float scale : {0.123, 2., 1.2}) {
for (int out_height : {2, 1, 6}) {
for (int out_width : {2, 3, 5}) {
for (bool align_corners : {true, false}) {
std::unique_ptr<arena::TestCase> tester(
new NearestInterpComputeTester(place,
"def",
scale,
scale,
out_height,
out_width,
align_corners,
interp_method));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
}
}
}
}
}
TEST(NearestInterp, precision) {
// #ifdef LITE_WITH_X86
// Place place(TARGET(kX86));
// #endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_nearest_interp(place);
#endif
}
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册