diff --git a/lite/npu/bridge/CMakeLists.txt b/lite/npu/bridge/CMakeLists.txt index 583ea0e2eed4b703a6175916b5386f891e7a496a..e4c1cca5a8e82a3de2fb70676a68f707ae1eba97 100644 --- a/lite/npu/bridge/CMakeLists.txt +++ b/lite/npu/bridge/CMakeLists.txt @@ -15,7 +15,7 @@ lite_cc_library(npu_bridge_batch_norm_op SRCS batch_norm_op.cc DEPS ${npu_bridge lite_cc_library(npu_bridge_elementwise_op SRCS elementwise_ops.cc DEPS ${npu_bridge_deps}) lite_cc_library(npu_bridge_reshape_op SRCS reshape_op.cc DEPS ${npu_bridge_deps}) lite_cc_library(npu_bridge_conv_transpose_op SRCS conv_transpose_op.cc DEPS ${npu_bridge_deps}) -lite_cc_library(npu_bridge_bilinear_interp_op SRCS bilinear_interp_op.cc DEPS ${npu_bridge_deps}) +lite_cc_library(npu_bridge_interpolate_op SRCS interpolate_op.cc DEPS ${npu_bridge_deps}) lite_cc_library(npu_bridge_transpose_op SRCS transpose_op.cc DEPS ${npu_bridge_deps}) lite_cc_library(npu_bridge_split_op SRCS split_op.cc DEPS ${npu_bridge_deps}) lite_cc_library(npu_bridge_concat_op SRCS concat_op.cc DEPS ${npu_bridge_deps}) @@ -35,7 +35,7 @@ set(npu_bridges npu_bridge_elementwise_op npu_bridge_reshape_op npu_bridge_conv_transpose_op - npu_bridge_bilinear_interp_op + npu_bridge_interpolate_op npu_bridge_transpose_op npu_bridge_split_op npu_bridge_concat_op @@ -55,7 +55,7 @@ lite_cc_test(test_npu_bridge_batch_norm_op SRCS batch_norm_op_test.cc DEPS npu_t lite_cc_test(test_npu_bridge_elementwise_op SRCS elementwise_ops_test.cc DEPS npu_test_helper) lite_cc_test(test_npu_bridge_reshape_op SRCS reshape_op_test.cc DEPS npu_test_helper) lite_cc_test(test_npu_bridge_conv_transpose_op SRCS conv_transpose_op_test.cc DEPS npu_test_helper) -lite_cc_test(test_npu_bridge_bilinear_interp_op SRCS bilinear_interp_op_test.cc DEPS npu_test_helper) +lite_cc_test(test_npu_bridge_interpolate_op SRCS interpolate_op_test.cc DEPS npu_test_helper) lite_cc_test(test_npu_bridge_transpose_op SRCS transpose_op_test.cc DEPS npu_test_helper) lite_cc_test(test_npu_bridge_split_op SRCS split_op_test.cc DEPS npu_test_helper) lite_cc_test(test_npu_bridge_concat_op SRCS concat_op_test.cc DEPS npu_test_helper) diff --git a/lite/npu/bridge/bilinear_interp_op.cc b/lite/npu/bridge/interpolate_op.cc similarity index 53% rename from lite/npu/bridge/bilinear_interp_op.cc rename to lite/npu/bridge/interpolate_op.cc index c7f3289af371f44160d1ed8364892c854d3aa254..cb63cca01442ab65fb46c042d1c0dabebff2b9f2 100644 --- a/lite/npu/bridge/bilinear_interp_op.cc +++ b/lite/npu/bridge/interpolate_op.cc @@ -26,17 +26,20 @@ namespace lite { namespace npu { namespace bridge { -node_map_type BilinearInterpConverter( - const std::shared_ptr interp_op, +node_map_type InterpolateConverter( + const std::shared_ptr interpolate_op, const node_map_type& inputs_map) { - auto scope = interp_op->scope(); - auto op_info = interp_op->op_info(); + auto scope = interpolate_op->scope(); + auto op_info = interpolate_op->op_info(); auto op_type = op_info->Type(); auto unique_op_type = UniqueName(op_type); LOG(INFO) << "Converting " + op_type + "..."; // get input, output and attributes from lite op auto x_var_name = op_info->Input("X").front(); + CHECK(inputs_map.count(x_var_name)); + OpList::Global().add(inputs_map.at(x_var_name)); + auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); auto x_h = x_dims[2]; @@ -46,7 +49,6 @@ node_map_type BilinearInterpConverter( auto out_w = op_info->GetAttr("out_w"); auto out_h = op_info->GetAttr("out_h"); auto align_corners = op_info->GetAttr("align_corners"); - auto interp_method = op_info->GetAttr("interp_method"); int align_mode = op_info->GetAttr("align_mode"); CHECK(!(align_mode == 0 && !align_corners)) << "align_mode = 0 && align_corners = false isn't supported in NPU DDK"; @@ -59,56 +61,74 @@ node_map_type BilinearInterpConverter( out_w = out_w > 0 ? out_w : -1; } - // create interp node and set input node from inputs_map - auto interp_node = std::make_shared(unique_op_type); - CHECK(inputs_map.count(x_var_name)); - interp_node->set_input_x(*inputs_map.at(x_var_name)); - OpList::Global().add(inputs_map.at(x_var_name)); - OpList::Global().add(interp_node); - // update out_h and out_w if has OutSize - bool is_dyn_out_size = false; + bool inputs_map_has_w = false; if (HasInputArg(op_info, scope, "OutSize")) { auto out_size_var_name = op_info->Input("OutSize").front(); - if (!inputs_map.count(out_size_var_name)) { + if (inputs_map.count(out_size_var_name)) { + inputs_map_has_w = true; + } else { auto out_size = scope->FindVar(out_size_var_name)->GetMutable(); - auto out_size_dims = out_size->dims(); - CHECK_EQ(out_size_dims.size(), 1); - CHECK_EQ(out_size_dims.production(), 2); + CHECK_EQ(out_size->numel(), 2); auto out_size_data = out_size->mutable_data(); // update out_h and out_w if has OutSize out_h = out_size_data[0]; out_w = out_size_data[1]; - } else { + } + } + + node_map_type outputs_map; + auto interp_method = op_info->GetAttr("interp_method"); + if (interp_method == "bilinear") { + auto interp_node = std::make_shared(unique_op_type); + OpList::Global().add(interp_node); + interp_node->set_input_x(*inputs_map.at(x_var_name)); + if (inputs_map_has_w) { + auto out_size_var_name = op_info->Input("OutSize").front(); interp_node->set_input_w(*inputs_map.at(out_size_var_name)); OpList::Global().add(inputs_map.at(out_size_var_name)); - is_dyn_out_size = true; // using dynamic output size + } else { + const float largest_multiple = 7.0f; + float multiple = static_cast(x_h * x_w) / (out_h * out_w); + CHECK_LT(multiple, largest_multiple) + << "multiple=(ih*iw)/(oh*ow)=" << multiple + << " is too large, should not exceed " << largest_multiple + << " in NPU DDK"; + auto w_const_node = + std::make_shared(unique_op_type + "/w"); + w_const_node->set_attr_value( + CreateTensorAndFillData(std::vector({out_h, out_w}))); + interp_node->set_input_w(*w_const_node); + OpList::Global().add(w_const_node); } - } - if (!is_dyn_out_size) { - CHECK_GT(out_h, 0); - CHECK_GT(out_w, 0); - const float largest_multiple = 7.0f; - float multiple = static_cast(x_h * x_w) / (out_h * out_w); - CHECK_LT(multiple, largest_multiple) - << "multiple=(ih*iw)/(oh*ow)=" << multiple - << " is too large, should not exceed " << largest_multiple - << " in NPU DDK"; - auto w_const_node = std::make_shared(unique_op_type + "/w"); - w_const_node->set_attr_value( - CreateTensorAndFillData(std::vector({out_h, out_w}))); - interp_node->set_input_w(*w_const_node); - OpList::Global().add(w_const_node); + interp_node->set_attr_output_dim_mode( + 2); // 0: zoom_factor, 1: shrink_factor, 2: height/width + interp_node->set_attr_align_corners(align_corners); + outputs_map[op_info->Output("Out").front()] = interp_node; + } else if (interp_method == "nearest") { + auto interp_node = + std::make_shared(unique_op_type); + OpList::Global().add(interp_node); + interp_node->set_input_image(*inputs_map.at(x_var_name)); + if (inputs_map_has_w) { + auto out_size_var_name = op_info->Input("OutSize").front(); + interp_node->set_input_size(*inputs_map.at(out_size_var_name)); + OpList::Global().add(inputs_map.at(out_size_var_name)); + } else { + auto w_const_node = + std::make_shared(unique_op_type + "/w"); + w_const_node->set_attr_value( + CreateTensorAndFillData(std::vector({out_h, out_w}))); + interp_node->set_input_size(*w_const_node); + OpList::Global().add(w_const_node); + } + interp_node->set_attr_align_corners(align_corners); + outputs_map[op_info->Output("Out").front()] = interp_node; + } else { + LOG(FATAL) << "unsupported interpolate method: " << interp_method; } - // set attributes - interp_node->set_attr_output_dim_mode( - 2); // 0: zoom_factor, 1: shrink_factor, 2: height/width - interp_node->set_attr_align_corners(align_corners); - - node_map_type outputs_map; - outputs_map[op_info->Output("Out").front()] = interp_node; return outputs_map; } @@ -118,4 +138,6 @@ node_map_type BilinearInterpConverter( } // namespace paddle REGISTER_NPU_BRIDGE(bilinear_interp, - paddle::lite::npu::bridge::BilinearInterpConverter); + paddle::lite::npu::bridge::InterpolateConverter); +REGISTER_NPU_BRIDGE(nearest_interp, + paddle::lite::npu::bridge::InterpolateConverter); diff --git a/lite/npu/bridge/bilinear_interp_op_test.cc b/lite/npu/bridge/interpolate_op_test.cc similarity index 58% rename from lite/npu/bridge/bilinear_interp_op_test.cc rename to lite/npu/bridge/interpolate_op_test.cc index 402b93909649485ea4157e5650ee80410c7b75b8..30c52530d0351f2124eb8b88541dfed48645b0d5 100644 --- a/lite/npu/bridge/bilinear_interp_op_test.cc +++ b/lite/npu/bridge/interpolate_op_test.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "lite/operators/interpolate_op.h" #include #include #include "lite/core/op_registry.h" #include "lite/npu/bridge/registry.h" #include "lite/npu/bridge/test_helper.h" -#include "lite/operators/interpolate_op.h" namespace paddle { namespace lite { @@ -161,17 +161,95 @@ void bilinear_interp_ref(const std::shared_ptr op) { } } -void test_bilinear_interp(int bs, - int ic, - int ih, - int iw, - int oh, - int ow, - float scale, - int out_size_h, - int out_size_w, - bool align_corners, - int align_mode) { +template +void nearest_interp_ref(const std::shared_ptr op) { + auto scope = op->scope(); + auto op_info = op->op_info(); + auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); + auto out = + scope->FindVar(op_info->Output("Out").front())->GetMutable(); + auto x_dims = x->dims(); + CHECK_EQ(x_dims.size(), 4); + auto scale = op_info->GetAttr("scale"); + auto out_w = op_info->GetAttr("out_w"); + auto out_h = op_info->GetAttr("out_h"); + auto align_corners = op_info->GetAttr("align_corners"); + // int align_mode = op_info->GetAttr("align_mode"); + auto interp_method = op_info->GetAttr("interp_method"); + CHECK_EQ(interp_method, "nearest"); + + int x_h = x_dims[2]; + int x_w = x_dims[3]; + if (scale > 0) { + out_h = static_cast(x_h * scale); + out_w = static_cast(x_w * scale); + } + if (op_info->HasInput("OutSize")) { + auto out_size_var_names = op_info->Input("OutSize"); + if (out_size_var_names.size() > 0) { + auto out_size_var_name = out_size_var_names.front(); + auto out_size = + scope->FindVar(out_size_var_name)->GetMutable(); + CHECK_EQ(out_size->numel(), 2); + auto out_size_data = out_size->mutable_data(); + out_h = out_size_data[0]; + out_w = out_size_data[1]; + } + } + CHECK_GT(out_h, 0); + CHECK_GT(out_w, 0); + out->Resize({x_dims[0], x_dims[1], out_h, out_w}); + + float ratio_h = 0.f; + float ratio_w = 0.f; + if (out_h > 1) { + ratio_h = align_corners ? static_cast(x_h - 1.0) / (out_h - 1.0) + : static_cast(x_h) / out_h; + } + if (out_w > 1) { + ratio_w = align_corners ? static_cast(x_w - 1.0) / (out_w - 1.0) + : static_cast(x_w) / out_w; + } + + auto x_data = x->data(); + auto out_data = out->mutable_data(); + auto out_dims = out->dims(); + std::vector x_strides(x_dims.size(), 1); + for (int idx = x_strides.size() - 2; idx >= 0; idx--) { + x_strides[idx] = x_strides[idx + 1] * x_dims[idx + 1]; + } + + for (int n = 0; n < out_dims[0]; n++) { + for (int c = 0; c < out_dims[1]; c++) { + for (int h = 0; h < out_dims[2]; h++) { + for (int w = 0; w < out_dims[3]; w++) { + int in_i = ratio_h * h; + int in_j = ratio_w * w; + if (align_corners) { + in_i = ratio_h * h + 0.5; + in_j = ratio_w * w + 0.5; + } + *out_data = x_data[n * x_strides[0] + c * x_strides[1] + + in_i * x_strides[2] + in_j * x_strides[3]]; + out_data++; + } + } + } + } +} + +void test_interpolate(int bs, + int ic, + int ih, + int iw, + int oh, + int ow, + float scale, + int out_size_h, + int out_size_w, + bool align_corners, + int align_mode, + std::string interp_method) { // prepare input&output variables Scope scope; std::string x_var_name("x"); @@ -190,7 +268,7 @@ void test_bilinear_interp(int bs, // initialize op desc cpp::OpDesc opdesc; - opdesc.SetType("bilinear_interp"); + opdesc.SetType(interp_method + "_interp"); opdesc.SetInput("X", {x_var_name}); opdesc.SetOutput("Out", {out_var_name}); opdesc.SetAttr("out_h", oh); @@ -198,7 +276,7 @@ void test_bilinear_interp(int bs, opdesc.SetAttr("scale", scale); opdesc.SetAttr("align_corners", static_cast(align_corners)); opdesc.SetAttr("align_mode", static_cast(align_mode)); - opdesc.SetAttr("interp_method", std::string("bilinear")); + opdesc.SetAttr("interp_method", interp_method); if (out_size_h > 0 && out_size_w > 0) { auto out_size_dims = out_size->dims(); CHECK_EQ(out_size_dims.size(), 1); @@ -211,7 +289,11 @@ void test_bilinear_interp(int bs, // create op and execute reference implementation auto op = CreateOp(opdesc, &scope); - bilinear_interp_ref(op); + if (interp_method == "bilinear") { + bilinear_interp_ref(op); + } else { + nearest_interp_ref(op); + } out_ref->CopyDataFrom(*out); // convert op to NPU model, then run it on NPU @@ -245,50 +327,56 @@ TEST(NPUBridges, bilinear_interp) { for (auto out_size_w : {0, 2, 12}) { for (auto align_corners : {true, false}) { for (auto align_mode : {0, 1}) { - int act_oh = 0, act_ow = 0; - if (out_size_h > 0 && out_size_w > 0) { - act_oh = out_size_h; - act_ow = out_size_w; - } else if (scale > 1e-5) { - act_oh = static_cast(ih * scale); - act_ow = static_cast(iw * scale); - } else if (oh > 0 && ow > 0) { - act_oh = oh; - act_ow = ow; - } - if (act_oh <= 0 || act_ow <= 0) { - continue; + for (auto interp_method : {"bilinear", "nearest"}) { + int act_oh = 0, act_ow = 0; + if (out_size_h > 0 && out_size_w > 0) { + act_oh = out_size_h; + act_ow = out_size_w; + } else if (scale > 1e-5) { + act_oh = static_cast(ih * scale); + act_ow = static_cast(iw * scale); + } else if (oh > 0 && ow > 0) { + act_oh = oh; + act_ow = ow; + } + if (act_oh <= 0 || act_ow <= 0) { + continue; + } + // TODO(hong19860320) multiple=(ih*iw)/(oh*ow) + // should + // not exceed 7.0 in NPU DDK, delete the following + // lines + // if the limination is removed. + const float largest_multiple = 7.0f; + float multiple = + static_cast(ih * iw) / (act_oh * act_ow); + if (multiple > largest_multiple) { + continue; + } + if (align_mode == 0 && !align_corners) { + continue; + } + VLOG(3) << "bs: " << bs << " ic: " << ic + << " ih: " << ih << " iw: " << iw + << " oh: " << oh << " ow: " << ow + << " scale: " << scale + << " out_size: " << out_size_h << "," + << out_size_w + << " align_corners: " << align_corners + << " align_mode: " << align_mode; + test_interpolate(bs, + ic, + ih, + iw, + oh, + ow, + scale, + out_size_h, + out_size_w, + align_corners, + align_mode, + interp_method); } - // TODO(hong19860320) multiple=(ih*iw)/(oh*ow) should - // not exceed 7.0 in NPU DDK, delete the following lines - // if the limination is removed. - const float largest_multiple = 7.0f; - float multiple = - static_cast(ih * iw) / (act_oh * act_ow); - if (multiple > largest_multiple) { - continue; - } - if (align_mode == 0 && !align_corners) { - continue; - } - VLOG(3) - << "bs: " << bs << " ic: " << ic << " ih: " << ih - << " iw: " << iw << " oh: " << oh << " ow: " << ow - << " scale: " << scale - << " out_size: " << out_size_h << "," << out_size_w - << " align_corners: " << align_corners - << " align_mode: " << align_mode; - test_bilinear_interp(bs, - ic, - ih, - iw, - oh, - ow, - scale, - out_size_h, - out_size_w, - align_corners, - align_mode); } } } @@ -301,7 +389,7 @@ TEST(NPUBridges, bilinear_interp) { } } #else - test_bilinear_interp(3, 4, 5, 3, 8, 4, 0.6f, 3, 0, true, 0); + test_interpolate(1, 1, 4, 3, 0, 0, 1.f, 3, 6, false, 1, "nearest"); #endif } @@ -312,3 +400,6 @@ TEST(NPUBridges, bilinear_interp) { USE_LITE_OP(bilinear_interp); USE_NPU_BRIDGE(bilinear_interp); + +USE_LITE_OP(nearest_interp); +USE_NPU_BRIDGE(nearest_interp);