提交 ec025abd 编写于 作者: D dingminghui 提交者: jackzhang235

feat(nearest_interp): add nearest_interp kernel and test

上级 d1a089f5
......@@ -16,6 +16,7 @@ lite_cc_library(subgraph_bridge_pool_op_mlu SRCS pool_op.cc DEPS ${subgraph_brid
lite_cc_library(subgraph_bridge_softmax_op_mlu SRCS softmax_op.cc DEPS ${subgraph_bridge_deps_mlu})
lite_cc_library(subgraph_bridge_fc_op_mlu SRCS fc_op.cc DEPS ${subgraph_bridge_deps_mlu})
lite_cc_library(subgraph_bridge_scale_op_mlu SRCS scale_op.cc DEPS ${subgraph_bridge_deps_mlu})
lite_cc_library(subgraph_bridge_interp_op_mlu SRCS interpolate_op.cc DEPS ${subgraph_bridge_deps_mlu})
set(mlu_subgraph_bridges
subgraph_bridge_registry
subgraph_bridge_utility_mlu
......@@ -28,6 +29,7 @@ set(mlu_subgraph_bridges
subgraph_bridge_fc_op_mlu
subgraph_bridge_batch_norm_op_mlu
subgraph_bridge_scale_op_mlu
subgraph_bridge_interp_op_mlu
CACHE INTERNAL "mlu_subgraph_bridges")
lite_cc_library(subgraph_test_helper_mlu SRCS test_helper.cc DEPS ${mlu_subgraph_bridges})
......@@ -39,5 +41,6 @@ lite_cc_test(test_pool_converter_mlu SRCS pool_op_test.cc DEPS scope optimizer t
lite_cc_test(test_softmax_converter_mlu SRCS softmax_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_fc_converter_mlu SRCS fc_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_scale_converter_mlu SRCS scale_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_interp_converter_mlu SRCS interpolate_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
message(STATUS "+++++ mlu_subgraph_bridges: ${mlu_subgraph_bridges}")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/mlu/bridges/graph.h"
#include "lite/kernels/mlu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[MLU] Converting " + op_type + "...";
// Get input and output vars and op attributes
auto x_var_name = op_info->Input("X").front();
auto out_var_name = op_info->Output("Out").front();
auto x = scope->FindVar(x_var_name)->GetMutable<Tensor>();
auto x_dims = x->dims();
CHECK_EQ(x_dims.size(), 4);
auto scale = op_info->GetAttr<float>("scale");
auto out_w = op_info->GetAttr<int>("out_w");
auto out_h = op_info->GetAttr<int>("out_h");
auto align_corners = op_info->GetAttr<bool>("align_corners");
/* int align_mode = */
/* op_info->HasAttr("align_mode") ? op_info->GetAttr<int>("align_mode") :
* 1; */
/* auto interp_method = op_info->GetAttr<std::string>("interp_method"); */
/* if (align_mode == 0 && !align_corners) { */
/* LOG(WARNING) << "[NPU] align_mode = 0 && " */
/* "align_corners = false isn't " */
/* "supported in CNML"; */
/* return FAILED; */
/* } */
CHECK(graph->HasNode(x_var_name));
auto input_tensor = graph->GetNode(x_var_name);
auto out = scope->FindVar(out_var_name)->GetMutable<Tensor>();
/* int x_h, x_w; */
/* if (interp_method == "bilinear") { */
/* x_h = x_dims[1]; */
/* x_w = x_dims[2]; */
/* auto output_tensor = graph->AddNode( */
/* out_var_name, out->dims().Vectorize(), CNML_TENSOR, CNML_NHWC,
* graph->FPType()); */
/* } */
auto x_h = x_dims[1];
auto x_w = x_dims[2];
auto output_tensor = graph->AddNode(out_var_name,
out->dims().Vectorize(),
CNML_TENSOR,
CNML_NHWC,
graph->FPType());
// Priority: OutSize > scale > out_h/out_w
if (scale > 0) {
out_h = static_cast<int>(x_h * scale);
out_w = static_cast<int>(x_w * scale);
out_h = out_h > 0 ? out_h : -1;
out_w = out_w > 0 ? out_w : -1;
}
// Update out_h and out_w and create out_size node if has OutSize
if (HasInputArg(op_info, scope, "OutSize")) {
auto out_size_name = op_info->Input("OutSize").front();
auto out_size = scope->FindVar(out_size_name)->GetMutable<Tensor>();
CHECK_EQ(out_size->numel(), 2);
CHECK(out_size->persistable());
auto out_size_data = out_size->mutable_data<int>();
// Update out_h and out_w if has OutSize
out_h = out_size_data[0];
out_w = out_size_data[1];
}
/* std::cout << "@@@scale: " << scale << "; in| w, h: " << x_w << ":" << x_h
* << "; out| w, h: " << out_w << ":" << out_h << std::endl; */
cnmlBaseOp_t interp_op;
/* if (interp_method == "bilinear") { */
/* cnmlInterpOpParam_t interp_param; */
/* CNML_CALL(cnmlCreateInterpOpParam(&interp_param, out_w, out_h,
* align_corners)); */
/* CNML_CALL(cnmlCreateInterpOp(&interp_op, */
/* input_tensor->mlu_tensor(), */
/* output_tensor->mlu_tensor(), */
/* interp_param)); */
/* CNML_CALL(cnmlDestroyInterpOpParam(&interp_param)); */
/* } else if (interp_method == "nearest") { */
cnmlNearestNeighborOpParam_t nn_param;
CNML_CALL(cnmlCreateNearestNeighborOpParam(&nn_param, out_w, out_h));
CNML_CALL(cnmlSetNearestNeighborAlignCorner(&nn_param, align_corners));
CNML_CALL(cnmlCreateNearestNeighborOp(&interp_op,
input_tensor->mlu_tensor(),
output_tensor->mlu_tensor(),
nn_param));
CNML_CALL(cnmlDestroyNearestNeighborOpParam(&nn_param));
/* } else { */
/* LOG(WARNING) << "[MLU] Unsupported interpolate method: " <<
* interp_method; */
/* return FAILED; */
/* } */
graph->FuseOp(interp_op);
return SUCCESS;
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(nearest_interp,
kMLU,
paddle::lite::subgraph::mlu::InterpolateConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/interpolate_op.h"
#include <gtest/gtest.h>
#include <string>
#include "lite/core/device_info.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/mlu/bridges/test_helper.h"
#include "lite/kernels/mlu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
template <typename dtype>
void ResizeNearestAlign(const lite::Tensor* x,
lite::Tensor* out,
bool with_align) {
auto x_dims = x->dims();
int num = x_dims[0];
int channels = x_dims[1];
int hin = x_dims[2];
int win = x_dims[3];
int hout = out->dims()[2];
int wout = out->dims()[3];
dtype scale_w = (with_align) ? (static_cast<float>(win - 1) / (wout - 1))
: (static_cast<float>(win) / (wout));
dtype scale_h = (with_align) ? (static_cast<float>(hin - 1) / (hout - 1))
: (static_cast<float>(hin) / (hout));
const dtype* src = x->data<dtype>();
dtype* dst = out->mutable_data<dtype>();
int dst_stride_w = 1;
int dst_stride_h = wout;
int dst_stride_c = wout * hout;
int dst_stride_batch = wout * hout * channels;
int src_stride_w = 1;
int src_stride_h = win;
int src_stride_c = win * hin;
int src_stride_batch = win * hin * channels;
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
int src_index = n * src_stride_batch + c * src_stride_c;
for (int h = 0; h < hout; ++h) {
for (int w = 0; w < wout; ++w) {
int fw = (with_align) ? static_cast<int>(scale_w * w + 0.5)
: static_cast<int>(scale_w * w);
fw = (fw < 0) ? 0 : fw;
int fh = (with_align) ? static_cast<int>(scale_h * h + 0.5)
: static_cast<int>(scale_h * h);
fh = (fh < 0) ? 0 : fh;
int w_start = static_cast<int>(fw);
int h_start = static_cast<int>(fh);
int dst_index = n * dst_stride_batch + c * dst_stride_c +
h * dst_stride_h + w * dst_stride_w;
dst[dst_index] =
src[src_index + w_start * src_stride_w + h_start * src_stride_h];
}
}
}
}
}
template <typename DType>
void BilinearInterpRef(const lite::Tensor* x,
lite::Tensor* out,
bool align_corners,
int align_mode) {
auto x_dims = x->dims();
int batch_size = x_dims[0];
int channel_size = x_dims[1];
auto x_h = x_dims[2];
auto x_w = x_dims[3];
CHECK_EQ(x_dims.size(), 4);
auto out_dims = out->dims();
int out_h = out_dims[2];
int out_w = out_dims[3];
// copy from x if no change
if (x_h == out_h && x_w == out_w) {
out->CopyDataFrom(*x);
return;
}
float ratio_h = 0.f;
float ratio_w = 0.f;
if (out_h > 1) {
ratio_h = (align_corners) ? static_cast<float>(x_h - 1) / (out_h - 1)
: static_cast<float>(x_h) / out_h;
}
if (out_w > 1) {
ratio_w = (align_corners) ? static_cast<float>(x_w - 1) / (out_w - 1)
: static_cast<float>(x_w) / out_w;
}
// naive bilinear interpolation
auto x_data = x->data<DType>();
auto out_data = out->mutable_data<DType>();
bool align_flag = (align_mode == 0 && !align_corners);
std::vector<int> vy_n, vy_s;
std::vector<float> vd_n, vd_s;
vy_n.reserve(out_h);
vy_s.reserve(out_h);
vd_n.reserve(out_h);
vd_s.reserve(out_h);
for (int k = 0; k < out_h; k++) {
int yn = align_flag ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
: static_cast<int>(ratio_h * k);
yn = (yn > 0) ? yn : 0;
int ys = (yn + 1) < (x_h - 1) ? (yn + 1) : (x_h - 1);
float idx_src_y = ratio_h * (k + 0.5) - 0.5;
idx_src_y = (idx_src_y > 0) ? idx_src_y : 0;
float dn = align_flag ? idx_src_y - yn : ratio_h * k - yn;
float ds = 1.f - dn;
{
vy_n[k] = yn;
vy_s[k] = ys;
vd_n[k] = dn;
vd_s[k] = ds;
}
}
std::vector<int> vx_w, vx_e;
std::vector<float> vd_w, vd_e;
vx_w.reserve(out_w);
vx_e.reserve(out_w);
vd_w.reserve(out_w);
vd_e.reserve(out_w);
for (int l = 0; l < out_w; l++) {
int xw = align_flag ? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
: static_cast<int>(ratio_w * l);
xw = (xw > 0) ? xw : 0;
int xe = (xw + 1) < (x_w - 1) ? (xw + 1) : (x_w - 1);
float idx_src_x = ratio_w * (l + 0.5) - 0.5;
idx_src_x = (idx_src_x > 0) ? idx_src_x : 0;
float dw = align_flag ? idx_src_x - xw : ratio_w * l - xw;
float de = 1.f - dw;
{
vx_w[l] = xw;
vx_e[l] = xe;
vd_w[l] = dw;
vd_e[l] = de;
}
}
std::vector<int64_t> x_strides(x_dims.size(), 1);
for (int idx = x_strides.size() - 2; idx >= 0; idx--) {
x_strides[idx] = x_strides[idx + 1] * x_dims[idx + 1];
}
for (int i = 0; i < batch_size; i++) {
for (int j = 0; j < channel_size; j++) {
for (int k = 0; k < out_h; k++) {
for (int l = 0; l < out_w; l++) {
DType x0 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_n[k] * x_strides[2] + vx_w[l] * x_strides[3]];
DType x1 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_s[k] * x_strides[2] + vx_w[l] * x_strides[3]];
DType x2 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_n[k] * x_strides[2] + vx_e[l] * x_strides[3]];
DType x3 = x_data[i * x_strides[0] + j * x_strides[1] +
vy_s[k] * x_strides[2] + vx_e[l] * x_strides[3]];
*out_data = x0 * vd_s[k] * vd_e[l] + x1 * vd_n[k] * vd_e[l] +
x2 * vd_s[k] * vd_w[l] + x3 * vd_n[k] * vd_w[l];
out_data++;
}
}
}
}
}
class InterpComputeTester {
protected:
// common attributes for this op.
std::string x_var_name = "X";
std::string outsize_var_name = "OutSize";
std::string out_var_name = "Out";
std::string out_ref_var_name = "out_ref";
DDim dims_{{1, 2, 3, 4}};
Scope scope;
std::string interp_method_ = "nearest";
float scale_ = -1.f;
int out_h_ = -1;
int out_w_ = -1;
bool align_corners_ = true;
int align_mode_ = 1;
bool use_outsize_ = false;
public:
InterpComputeTester(const std::string& alias,
DDim dims,
std::string interp_method = "nearest",
float scale = -1.f,
int out_h = -1,
int out_w = -1,
bool align_corners = true,
int align_mode = 1,
bool use_outsize = false)
: dims_(dims),
interp_method_(interp_method),
scale_(scale),
out_h_(out_h),
out_w_(out_w),
align_corners_(align_corners),
align_mode_(align_mode),
use_outsize_(use_outsize) {}
void Execute(float abs_error) {
cpp::OpDesc op_desc;
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* outsize = scope.Var(outsize_var_name)->GetMutable<Tensor>();
auto* outref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
int out_h = out_h_;
int out_w = out_w_;
if (scale_ > 0) {
out_h = static_cast<int>(dims_[2] * scale_);
out_w = static_cast<int>(dims_[3] * scale_);
}
x->Resize(dims_);
/* printf("----output tensor dims: %ld, %d, %d, %ld\n", dims_[0], out_h,
* out_w, dims_[1]); */
std::vector<int64_t> out_shape_nchw = {dims_[0], dims_[1], out_h, out_w};
out->Resize(DimNCHW2NHWC(out_shape_nchw));
outref->Resize(out_shape_nchw);
outsize->Resize({2});
FillTensor<float, float>(x, -1.f, 1.f);
if (use_outsize_) {
outsize->mutable_data<int>()[0] = out_h;
outsize->mutable_data<int>()[1] = out_w;
outsize->set_persistable(true);
}
if (interp_method_ == "nearest") {
op_desc.SetType("nearest_interp");
} else if (interp_method_ == "bilinear") {
op_desc.SetType("bilinear_interp");
} else {
LOG(FATAL) << "unsupport";
}
op_desc.SetInput("X", {x_var_name});
if (use_outsize_) {
op_desc.SetInput("OutSize", {outsize_var_name});
}
op_desc.SetOutput("Out", {out_var_name});
op_desc.SetAttr("scale", scale_);
op_desc.SetAttr("out_h", out_h_);
op_desc.SetAttr("out_w", out_w_);
op_desc.SetAttr("align_corners", align_corners_);
op_desc.SetAttr("align_mode", align_mode_);
op_desc.SetAttr("interp_method", interp_method_);
auto op = CreateOp<operators::InterpolateOp>(op_desc, &scope);
if (interp_method_ == "nearest") {
ResizeNearestAlign<float>(x, outref, align_corners_);
} else if (interp_method_ == "bilinear") {
BilinearInterpRef<float>(x, outref, align_corners_, align_mode_);
}
int in = dims_[0], ic = dims_[1], ih = dims_[2], iw = dims_[3];
Tensor input_trans;
input_trans.Resize(dims_);
transpose(x->mutable_data<float>(),
input_trans.mutable_data<float>(),
{in, ic, ih, iw},
{0, 2, 3, 1});
x->CopyDataFrom(input_trans);
x->Resize(DimNCHW2NHWC(dims_.Vectorize()));
if (use_outsize_) {
LaunchOp(op, {x_var_name, outsize_var_name}, {out_var_name});
} else {
LaunchOp(op, {x_var_name}, {out_var_name});
}
auto* out_ref_data = outref->mutable_data<float>();
Tensor output_trans;
output_trans.Resize(out_shape_nchw);
transpose(
out->mutable_data<float>(),
output_trans.mutable_data<float>(),
{static_cast<int>(dims_[0]), out_h, out_w, static_cast<int>(dims_[1])},
{0, 3, 1, 2});
auto* out_data = output_trans.mutable_data<float>();
for (int i = 0; i < out->dims().production(); ++i) {
EXPECT_NEAR(out_data[i], out_ref_data[i], abs_error);
}
}
};
void TestInterpOuthw(float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
/* for (auto interp_method : std::vector<std::string>{"nearest",
* "bilinear"}) { */
for (auto interp_method : std::vector<std::string>{"nearest"}) {
for (int out_h : {6, 8, 12}) {
for (int out_w : {6, 9}) {
printf("testcase %s: out_w %d, out_h %d\n",
interp_method.c_str(),
out_w,
out_h);
InterpComputeTester tester(
"def", DDim(x_dims), interp_method, -1.f, out_h, out_w);
tester.Execute(abs_error);
}
}
}
}
}
void TestInterpScale(float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
/* for (auto interp_method : std::vector<std::string>{"nearest",
* "bilinear"}) { */
for (auto interp_method : std::vector<std::string>{"nearest"}) {
for (float scale : {0.3f, 1.f, 1.7f}) {
printf("testcase %s: scale: %f\n", interp_method.c_str(), scale);
InterpComputeTester tester("def", DDim(x_dims), interp_method, scale);
tester.Execute(abs_error);
}
}
}
}
void TestInterpOutsize(float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
/* for (auto interp_method : std::vector<std::string>{"nearest",
* "bilinear"}) { */
for (auto interp_method : std::vector<std::string>{"nearest"}) {
printf("testcase %s: outsize: %d %d\n", interp_method.c_str(), 4, 4);
InterpComputeTester tester(
"def", DDim(x_dims), interp_method, -1, 4, 4, true, 1, true);
tester.Execute(abs_error);
}
}
}
void TestInterpAlignCorners(float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (bool align_corners : {true, false}) {
printf(
"testcase nearest: scale: 0.4, out_w -1 out_h -1, align_corners %d\n",
align_corners);
InterpComputeTester tester(
"def", DDim(x_dims), "nearest", 0.4, -1, -1, align_corners);
tester.Execute(abs_error);
}
}
}
void TestInterpAlignMode(float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (bool align_corners : {true, false}) {
for (int align_mode : {0, 1}) {
printf(
"testcase bilinear: scale: 0.7, out_w -1 out_h -1, align_corners "
"%d, mode %d\n",
align_corners,
align_mode);
InterpComputeTester tester("def",
DDim(x_dims),
"bilinear",
0.7,
-1,
-1,
align_corners,
align_mode);
tester.Execute(abs_error);
}
}
}
}
TEST(MLUBridges, interpolate) {
float abs_error = 2e-5;
TestInterpOuthw(abs_error);
TestInterpScale(abs_error);
// bug, not usable
// TestInterpOutsize(abs_error);
TestInterpAlignCorners(abs_error);
// only for bilinear interp
// TestInterpAlignMode(abs_error);
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
USE_SUBGRAPH_BRIDGE(nearest_interp, kMLU);
......@@ -22,3 +22,5 @@ USE_SUBGRAPH_BRIDGE(pool2d, kMLU);
USE_SUBGRAPH_BRIDGE(softmax, kMLU);
USE_SUBGRAPH_BRIDGE(batch_norm, kMLU);
USE_SUBGRAPH_BRIDGE(fc, kMLU);
USE_SUBGRAPH_BRIDGE(nearest_interp, kMLU);
USE_SUBGRAPH_BRIDGE(leaky_relu, kMLU);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册