From fc913904949dbcc15bdcfa983537251183ed877a Mon Sep 17 00:00:00 2001 From: zhupengyang Date: Wed, 27 Nov 2019 19:48:46 +0800 Subject: [PATCH] [NPU] fix elementwise_add op bridge and unit test (#2503) add elementwise_sub, mul, div op bridge test=develop --- lite/kernels/npu/bridges/elementwise_ops.cc | 56 ++++++++---- .../npu/bridges/elementwise_ops_test.cc | 88 ++++++++++--------- 2 files changed, 87 insertions(+), 57 deletions(-) diff --git a/lite/kernels/npu/bridges/elementwise_ops.cc b/lite/kernels/npu/bridges/elementwise_ops.cc index 2ec757ab14..da56343676 100644 --- a/lite/kernels/npu/bridges/elementwise_ops.cc +++ b/lite/kernels/npu/bridges/elementwise_ops.cc @@ -30,34 +30,50 @@ node_map_type ElementwiseConverter( auto unique_op_type = lite::npu::UniqueName(op_type); LOG(INFO) << "[NPU] Converting " + op_type + "..."; - std::shared_ptr elementwise_node = - std::make_shared(unique_op_type); - auto x_var_name = op_info->Input("X").front(); auto y_var_name = op_info->Input("Y").front(); - - CHECK_EQ(op_info->GetAttr("axis"), -1) - << "[NPU] elementwise only support inputs with same size"; - CHECK(inputs_map.find(x_var_name) != inputs_map.end()); - elementwise_node->set_input_x1(*inputs_map.at(x_var_name)); - lite::npu::OpList::Global().add(inputs_map.at(x_var_name)); + std::shared_ptr elementwise_node = nullptr; + std::shared_ptr x_node = inputs_map.at(x_var_name); + std::shared_ptr y_node = nullptr; if (inputs_map.find(y_var_name) != inputs_map.end()) { - elementwise_node->set_input_x2(*inputs_map.at(y_var_name)); - lite::npu::OpList::Global().add(inputs_map.at(y_var_name)); + y_node = inputs_map.at(y_var_name); } else { auto y_const_node = std::make_shared(y_var_name); - auto* y = scope->FindVar(y_var_name)->GetMutable(); + auto* y = scope->FindMutableTensor(y_var_name); y_const_node->set_attr_value(lite::npu::CvtTensor(y)); - elementwise_node->set_input_x2(*y_const_node); - lite::npu::OpList::Global().add(y_const_node); + y_node = y_const_node; } + lite::npu::OpList::Global().add(x_node); + lite::npu::OpList::Global().add(y_node); - lite::npu::OpList::Global().add(elementwise_node); + if (op_type == "elementwise_add" || + op_type == "fusion_elementwise_add_activation") { + auto elt_node = std::make_shared(unique_op_type); + elt_node->set_input_x1(*x_node); + elt_node->set_input_x2(*y_node); + elementwise_node = elt_node; + } else if (op_type == "elementwise_sub") { + auto elt_node = std::make_shared(unique_op_type); + elt_node->set_input_x1(*x_node); + elt_node->set_input_x2(*y_node); + elementwise_node = elt_node; + } else if (op_type == "elementwise_mul") { + auto elt_node = std::make_shared(unique_op_type); + elt_node->set_input_x(*x_node); + elt_node->set_input_y(*y_node); + elementwise_node = elt_node; + } else if (op_type == "elementwise_div") { + auto elt_node = std::make_shared(unique_op_type); + elt_node->set_input_x1(*x_node); + elt_node->set_input_x2(*y_node); + elementwise_node = elt_node; + } else { + LOG(FATAL) << "unsupported op type: " << op_type; + } - // paddlelite has sum only - elementwise_node->set_attr_mode(1); + lite::npu::OpList::Global().add(elementwise_node); node_map_type outputs_map; if (op_type == "fusion_elementwise_add_activation") { @@ -86,3 +102,9 @@ REGISTER_NPU_BRIDGE(elementwise_add, paddle::lite::kernels::npu::bridges::ElementwiseConverter); REGISTER_NPU_BRIDGE(fusion_elementwise_add_activation, paddle::lite::kernels::npu::bridges::ElementwiseConverter); +REGISTER_NPU_BRIDGE(elementwise_sub, + paddle::lite::kernels::npu::bridges::ElementwiseConverter); +REGISTER_NPU_BRIDGE(elementwise_mul, + paddle::lite::kernels::npu::bridges::ElementwiseConverter); +REGISTER_NPU_BRIDGE(elementwise_div, + paddle::lite::kernels::npu::bridges::ElementwiseConverter); diff --git a/lite/kernels/npu/bridges/elementwise_ops_test.cc b/lite/kernels/npu/bridges/elementwise_ops_test.cc index 0e2fc9f262..68795b0a23 100644 --- a/lite/kernels/npu/bridges/elementwise_ops_test.cc +++ b/lite/kernels/npu/bridges/elementwise_ops_test.cc @@ -29,37 +29,28 @@ template void elementwise_add_ref(const std::shared_ptr op) { Scope* scope = op->scope(); const OpInfo* op_info = op->op_info(); - auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); - auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable(); - auto out = - scope->FindVar(op_info->Output("Out").front())->GetMutable(); + auto x = scope->FindTensor("x"); + auto y = scope->FindTensor("y"); + auto out = scope->FindMutableTensor("out_ref"); + out->Resize(x->dims()); auto x_data = x->data(); auto y_data = y->data(); - dtype* out_data = out->mutable_data(); + auto out_data = out->mutable_data(); auto x_dims = x->dims(); auto y_dims = y->dims(); int axis = op_info->GetAttr("axis"); if (axis < 0) { - axis = x_dims.size() - y_dims.size(); - } - int batch = 1; - int channels = 1; - int num = 1; - for (int i = 0; i < axis; ++i) { - batch *= x_dims[i]; - } - for (int i = 0; i < y_dims.size(); ++i) { - channels *= y_dims[i]; - } - for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) { - num *= x_dims[i]; + axis += x_dims.size(); } + int batch = x_dims[0] / y_dims[0]; + int channels = y->numel(); + int num = x->numel() / channels / batch; // do elementwise add/sub/max... - std::string elt_type = "add"; - if (elt_type == "add") { + std::string op_type = op_info->Type(); + if (op_type == "elementwise_add") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; @@ -73,7 +64,7 @@ void elementwise_add_ref(const std::shared_ptr op) { } } } - } else if (elt_type == "sub") { + } else if (op_type == "elementwise_sub") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; @@ -87,7 +78,7 @@ void elementwise_add_ref(const std::shared_ptr op) { } } } - } else if (elt_type == "mul") { + } else if (op_type == "elementwise_mul") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; @@ -101,7 +92,21 @@ void elementwise_add_ref(const std::shared_ptr op) { } } } - } else if (elt_type == "max") { + } else if (op_type == "elementwise_div") { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const dtype* din_ptr = x_data + offset; + const dtype diny_data = y_data[j]; + dtype* dout_ptr = out_data + offset; + for (int k = 0; k < num; ++k) { + *dout_ptr = *din_ptr / diny_data; + dout_ptr++; + din_ptr++; + } + } + } + } else if (op_type == "elementwise_max") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; @@ -116,11 +121,14 @@ void elementwise_add_ref(const std::shared_ptr op) { } } } else { - LOG(FATAL) << "unsupported Elementwise type: " << elt_type; + LOG(FATAL) << "unsupported Elementwise type: " << op_type; } } -void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) { +void test_elementwise_add(const std::vector& x_shape, + const std::vector& y_shape, + int axis, + std::string elt_type) { // prepare input&output variables Scope scope; std::string x_var_name = "x"; @@ -131,16 +139,16 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) { auto* y = scope.Var(y_var_name)->GetMutable(); auto* out = scope.Var(out_var_name)->GetMutable(); auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); - x->Resize({bs, ic, ih, iw}); - y->Resize({bs, ic, ih, iw}); + x->Resize(x_shape); + y->Resize(y_shape); // initialize input&output data - FillTensor(x); - FillTensor(y); + FillTensor(x, 1, 5); + FillTensor(y, 1, 5); // initialize op desc cpp::OpDesc opdesc; - opdesc.SetType("elementwise_add"); + opdesc.SetType("elementwise_" + elt_type); opdesc.SetInput("X", {x_var_name}); opdesc.SetInput("Y", {y_var_name}); opdesc.SetOutput("Out", {out_var_name}); @@ -149,7 +157,6 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) { // create and convert op to NPU model, then run it on NPU auto op = CreateOp(opdesc, &scope); LauchOp(op, {x_var_name}, {out_var_name}); - out_ref->CopyDataFrom(*out); // execute reference implementation and save to output tensor elementwise_add_ref(op); @@ -158,19 +165,14 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) { auto* out_data = out->mutable_data(); auto* out_ref_data = out_ref->mutable_data(); for (int i = 0; i < out->dims().production(); i++) { - EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-1); + EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2); } } TEST(NPUBridges, elementwise_add) { - for (auto bs : {1, 4, 7}) { - for (auto ic : {1, 4, 7}) { - for (auto ih : {1, 4, 7}) { - for (auto iw : {1, 4, 7}) { - for (auto axis : {-1}) test_elementwise_add(bs, ic, ih, iw, axis); - } - } - } + for (auto elt_type : {"add", "sub", "mul", "div"}) { + test_elementwise_add({1, 2, 3, 4}, {1, 2, 1, 1}, 1, elt_type); + test_elementwise_add({1, 2, 3, 4}, {1, 2, 3, 4}, 3, elt_type); } } @@ -182,3 +184,9 @@ TEST(NPUBridges, elementwise_add) { USE_LITE_OP(elementwise_add); USE_NPU_BRIDGE(elementwise_add); +USE_LITE_OP(elementwise_sub); +USE_NPU_BRIDGE(elementwise_sub); +USE_LITE_OP(elementwise_mul); +USE_NPU_BRIDGE(elementwise_mul); +USE_LITE_OP(elementwise_div); +USE_NPU_BRIDGE(elementwise_div); -- GitLab