[NPU] fix elementwise_add op bridge and unit test (#2503)

add elementwise_sub, mul, div op bridge test=develop

[NPU] fix elementwise_add op bridge and unit test (#2503)
add elementwise_sub, mul, div op bridge test=develop
fc913904 · zhupengyang · GitHub · e8ea4a56 · fc913904 · fc913904
Showing with 87 addition and 57 deletion

lite/kernels/npu/bridges/elementwise_ops.cc lite/kernels/npu/bridges/elementwise_ops.cc +39 -17

lite/kernels/npu/bridges/elementwise_ops_test.cc lite/kernels/npu/bridges/elementwise_ops_test.cc +48 -40

未找到文件。
--- a/lite/kernels/npu/bridges/elementwise_ops.cc
+++ b/lite/kernels/npu/bridges/elementwise_ops.cc
@@ -30,34 +30,50 @@ node_map_type ElementwiseConverter(
  auto unique_op_type = lite::npu::UniqueName(op_type);
  LOG(INFO) << "[NPU] Converting " + op_type + "...";
-  std::shared_ptr<ge::op::Eltwise> elementwise_node =
-      std::make_shared<ge::op::Eltwise>(unique_op_type);
  auto x_var_name = op_info->Input("X").front();
  auto y_var_name = op_info->Input("Y").front();
-  CHECK_EQ(op_info->GetAttr<int>("axis"), -1)
-      << "[NPU] elementwise only support inputs with same size";
  CHECK(inputs_map.find(x_var_name) != inputs_map.end());
-  elementwise_node->set_input_x1(*inputs_map.at(x_var_name));
-  lite::npu::OpList::Global().add(inputs_map.at(x_var_name));
+  std::shared_ptr<ge::Operator> elementwise_node = nullptr;
+  std::shared_ptr<ge::Operator> x_node = inputs_map.at(x_var_name);
+  std::shared_ptr<ge::Operator> y_node = nullptr;
  if (inputs_map.find(y_var_name) != inputs_map.end()) {
-    elementwise_node->set_input_x2(*inputs_map.at(y_var_name));
+    y_node = inputs_map.at(y_var_name);
-    lite::npu::OpList::Global().add(inputs_map.at(y_var_name));
  } else {
    auto y_const_node = std::make_shared<ge::op::Const>(y_var_name);
-    auto* y = scope->FindVar(y_var_name)->GetMutable<Tensor>();
+    auto* y = scope->FindMutableTensor(y_var_name);
    y_const_node->set_attr_value(lite::npu::CvtTensor(y));
-    elementwise_node->set_input_x2(*y_const_node);
+    y_node = y_const_node;
-    lite::npu::OpList::Global().add(y_const_node);
  }
+  lite::npu::OpList::Global().add(x_node);
+  lite::npu::OpList::Global().add(y_node);
-  lite::npu::OpList::Global().add(elementwise_node);
+  if (op_type == "elementwise_add" ||
+      op_type == "fusion_elementwise_add_activation") {
+    auto elt_node = std::make_shared<ge::op::Add>(unique_op_type);
+    elt_node->set_input_x1(*x_node);
+    elt_node->set_input_x2(*y_node);
+    elementwise_node = elt_node;
+  } else if (op_type == "elementwise_sub") {
+    auto elt_node = std::make_shared<ge::op::Sub>(unique_op_type);
+    elt_node->set_input_x1(*x_node);
+    elt_node->set_input_x2(*y_node);
+    elementwise_node = elt_node;
+  } else if (op_type == "elementwise_mul") {
+    auto elt_node = std::make_shared<ge::op::Mul>(unique_op_type);
+    elt_node->set_input_x(*x_node);
+    elt_node->set_input_y(*y_node);
+    elementwise_node = elt_node;
+  } else if (op_type == "elementwise_div") {
+    auto elt_node = std::make_shared<ge::op::RealDiv>(unique_op_type);
+    elt_node->set_input_x1(*x_node);
+    elt_node->set_input_x2(*y_node);
+    elementwise_node = elt_node;
+  } else {
+    LOG(FATAL) << "unsupported op type: " << op_type;
+  }
-  // paddlelite has sum only
+  lite::npu::OpList::Global().add(elementwise_node);
-  elementwise_node->set_attr_mode(1);
  node_map_type outputs_map;
  if (op_type == "fusion_elementwise_add_activation") {
@@ -86,3 +102,9 @@ REGISTER_NPU_BRIDGE(elementwise_add,
                    paddle::lite::kernels::npu::bridges::ElementwiseConverter);
 REGISTER_NPU_BRIDGE(fusion_elementwise_add_activation,
                    paddle::lite::kernels::npu::bridges::ElementwiseConverter);
+REGISTER_NPU_BRIDGE(elementwise_sub,
+                    paddle::lite::kernels::npu::bridges::ElementwiseConverter);
+REGISTER_NPU_BRIDGE(elementwise_mul,
+                    paddle::lite::kernels::npu::bridges::ElementwiseConverter);
+REGISTER_NPU_BRIDGE(elementwise_div,
+                    paddle::lite::kernels::npu::bridges::ElementwiseConverter);
--- a/lite/kernels/npu/bridges/elementwise_ops_test.cc
+++ b/lite/kernels/npu/bridges/elementwise_ops_test.cc
@@ -29,37 +29,28 @@ template <typename dtype>
 void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
  Scope* scope = op->scope();
  const OpInfo* op_info = op->op_info();
-  auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
+  auto x = scope->FindTensor("x");
-  auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable<Tensor>();
+  auto y = scope->FindTensor("y");
-  auto out =
+  auto out = scope->FindMutableTensor("out_ref");
-      scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
+  out->Resize(x->dims());
  auto x_data = x->data<dtype>();
  auto y_data = y->data<dtype>();
-  dtype* out_data = out->mutable_data<dtype>();
+  auto out_data = out->mutable_data<dtype>();
  auto x_dims = x->dims();
  auto y_dims = y->dims();
  int axis = op_info->GetAttr<int>("axis");
  if (axis < 0) {
-    axis = x_dims.size() - y_dims.size();
+    axis += x_dims.size();
-  }
-  int batch = 1;
-  int channels = 1;
-  int num = 1;
-  for (int i = 0; i < axis; ++i) {
-    batch *= x_dims[i];
-  }
-  for (int i = 0; i < y_dims.size(); ++i) {
-    channels *= y_dims[i];
-  }
-  for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) {
-    num *= x_dims[i];
  }
+  int batch = x_dims[0] / y_dims[0];
+  int channels = y->numel();
+  int num = x->numel() / channels / batch;
  // do elementwise add/sub/max...
-  std::string elt_type = "add";
+  std::string op_type = op_info->Type();
-  if (elt_type == "add") {
+  if (op_type == "elementwise_add") {
    for (int i = 0; i < batch; ++i) {
      for (int j = 0; j < channels; ++j) {
        int offset = (i * channels + j) * num;
@@ -73,7 +64,7 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
        }
      }
    }
-  } else if (elt_type == "sub") {
+  } else if (op_type == "elementwise_sub") {
    for (int i = 0; i < batch; ++i) {
      for (int j = 0; j < channels; ++j) {
        int offset = (i * channels + j) * num;
@@ -87,7 +78,7 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
        }
      }
    }
-  } else if (elt_type == "mul") {
+  } else if (op_type == "elementwise_mul") {
    for (int i = 0; i < batch; ++i) {
      for (int j = 0; j < channels; ++j) {
        int offset = (i * channels + j) * num;
@@ -101,7 +92,21 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
        }
      }
    }
-  } else if (elt_type == "max") {
+  } else if (op_type == "elementwise_div") {
+    for (int i = 0; i < batch; ++i) {
+      for (int j = 0; j < channels; ++j) {
+        int offset = (i * channels + j) * num;
+        const dtype* din_ptr = x_data + offset;
+        const dtype diny_data = y_data[j];
+        dtype* dout_ptr = out_data + offset;
+        for (int k = 0; k < num; ++k) {
+          *dout_ptr = *din_ptr / diny_data;
+          dout_ptr++;
+          din_ptr++;
+        }
+      }
+    }
+  } else if (op_type == "elementwise_max") {
    for (int i = 0; i < batch; ++i) {
      for (int j = 0; j < channels; ++j) {
        int offset = (i * channels + j) * num;
@@ -116,11 +121,14 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
      }
    }
  } else {
-    LOG(FATAL) << "unsupported Elementwise type: " << elt_type;
+    LOG(FATAL) << "unsupported Elementwise type: " << op_type;
  }
 }
-void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) {
+void test_elementwise_add(const std::vector<int64_t>& x_shape,
+                          const std::vector<int64_t>& y_shape,
+                          int axis,
+                          std::string elt_type) {
  // prepare input&output variables
  Scope scope;
  std::string x_var_name = "x";
@@ -131,16 +139,16 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) {
  auto* y = scope.Var(y_var_name)->GetMutable<Tensor>();
  auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
  auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
-  x->Resize({bs, ic, ih, iw});
+  x->Resize(x_shape);
-  y->Resize({bs, ic, ih, iw});
+  y->Resize(y_shape);
  // initialize input&output data
-  FillTensor<float>(x);
+  FillTensor<float>(x, 1, 5);
-  FillTensor<float>(y);
+  FillTensor<float>(y, 1, 5);
  // initialize op desc
  cpp::OpDesc opdesc;
-  opdesc.SetType("elementwise_add");
+  opdesc.SetType("elementwise_" + elt_type);
  opdesc.SetInput("X", {x_var_name});
  opdesc.SetInput("Y", {y_var_name});
  opdesc.SetOutput("Out", {out_var_name});
@@ -149,7 +157,6 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) {
  // create and convert op to NPU model, then run it on NPU
  auto op = CreateOp<operators::ElementwiseOp>(opdesc, &scope);
  LauchOp(op, {x_var_name}, {out_var_name});
-  out_ref->CopyDataFrom(*out);
  // execute reference implementation and save to output tensor
  elementwise_add_ref<float>(op);
@@ -158,19 +165,14 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) {
  auto* out_data = out->mutable_data<float>();
  auto* out_ref_data = out_ref->mutable_data<float>();
  for (int i = 0; i < out->dims().production(); i++) {
-    EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-1);
+    EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2);
  }
 }
 TEST(NPUBridges, elementwise_add) {
-  for (auto bs : {1, 4, 7}) {
+  for (auto elt_type : {"add", "sub", "mul", "div"}) {
-    for (auto ic : {1, 4, 7}) {
+    test_elementwise_add({1, 2, 3, 4}, {1, 2, 1, 1}, 1, elt_type);
-      for (auto ih : {1, 4, 7}) {
+    test_elementwise_add({1, 2, 3, 4}, {1, 2, 3, 4}, 3, elt_type);
-        for (auto iw : {1, 4, 7}) {
-          for (auto axis : {-1}) test_elementwise_add(bs, ic, ih, iw, axis);
-        }
-      }
-    }
  }
 }
@@ -182,3 +184,9 @@ TEST(NPUBridges, elementwise_add) {
 USE_LITE_OP(elementwise_add);
 USE_NPU_BRIDGE(elementwise_add);
+USE_LITE_OP(elementwise_sub);
+USE_NPU_BRIDGE(elementwise_sub);
+USE_LITE_OP(elementwise_mul);
+USE_NPU_BRIDGE(elementwise_mul);
+USE_LITE_OP(elementwise_div);
+USE_NPU_BRIDGE(elementwise_div);