[NPU] fix conv2d npu bridge, supports bias from input map (#1839)

* [NPU] fix conv2d npu bridge, supports bias from input map test=develop * [NPU] support more dimensions for the bias of conv2d NPU bridge test=develop

[NPU] fix conv2d npu bridge, supports bias from input map (#1839)
* [NPU] fix conv2d npu bridge, supports bias from input map test=develop * [NPU] support more dimensions for the bias of conv2d NPU bridge test=develop
1ee60474 · hong19860320 · GitHub · 1f5ce9c9 · 1ee60474 · 1ee60474
隐藏空白更改
内联并排

Showing with 131 addition and 58 deletion

lite/npu/bridge/conv_op.cc lite/npu/bridge/conv_op.cc +62 -14

lite/npu/bridge/conv_op_test.cc lite/npu/bridge/conv_op_test.cc +69 -44

未找到文件。
--- a/lite/npu/bridge/conv_op.cc
+++ b/lite/npu/bridge/conv_op.cc
@@ -39,11 +39,20 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
  auto input_var_name = op_info->Input("Input").front();
  auto input = scope->FindVar(input_var_name)->GetMutable<lite::Tensor>();
  auto input_dims = input->dims();
+  auto output_var_name = op_info->Output("Output").front();
+  auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
+  auto output_dims = output->dims();
  auto filter_var_name = op_info->Input("Filter").front();
  auto filter = scope->FindVar(filter_var_name)->GetMutable<lite::Tensor>();
  auto filter_dims = filter->dims();
+  auto bs = input_dims[0];
+  auto ic = input_dims[1];
+  auto oc = filter_dims[0];
  CHECK_EQ(input_dims.size(), 4);
+  CHECK_EQ(output_dims.size(), 4);
  CHECK_EQ(filter_dims.size(), 4);
+  CHECK_EQ(output_dims[0], bs);
+  CHECK_EQ(output_dims[1], oc);
  auto strides = op_info->GetAttr<std::vector<int>>("strides");
  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
  auto groups = op_info->GetAttr<int>("groups");
@@ -56,7 +65,7 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
  // check depthwise mode, and decide whether use ConvolutionDepthwise Op
  bool use_depthwise_conv =
      false;  // whether use ge::op::ConvolutionDepthwise ?
-  bool is_depthwise_mode = input_dims[1] == groups && filter_dims[0] == groups;
+  bool is_depthwise_mode = ic == groups && oc == groups;
  if (is_depthwise_mode &&
      !((groups == 1 || groups >= 5) && dilations[0] == 1 &&
        dilations[1] == 1)) {
@@ -78,17 +87,44 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
  OpList::Global().add(filter_const_node);
  // create bias node if has bias
-  std::shared_ptr<ge::op::Const> bias_const_node = nullptr;
+  // supports the bias nodes with the following dimensions
+  // 0: {oc}
+  // 1: {1, oc, oh, ow}
+  // 2: {n, oc, oh, ow}
+  std::shared_ptr<ge::Operator> bias_node = nullptr;
+  bool is_channel_bias = false;
  if (HasInputArg(op_info, scope, "Bias")) {
    auto bias_var_name = op_info->Input("Bias").front();
-    CHECK(!inputs_map.count(bias_var_name));
    auto* bias = scope->FindVar(bias_var_name)->GetMutable<lite::Tensor>();
-    auto channel_size = bias->dims().production();
+    auto bias_dims = bias->dims();
-    CHECK_EQ(channel_size, filter_dims[0]);
+    auto bias_data_size = bias_dims.production();
-    bias_const_node = std::make_shared<ge::op::Const>(bias_var_name);
+    auto output_data_size = output_dims.production();
-    bias_const_node->set_attr_value(
+    std::vector<int64_t> bias_shape;
-        CvtFromLiteTensor(bias, {1, channel_size, 1, 1}));
+    if (bias_data_size == oc) {
-    OpList::Global().add(bias_const_node);
+      // 0: {oc}
+      bias_shape = {1, oc, 1, 1};
+      is_channel_bias = true;
+    } else if (bias_data_size == output_data_size / bs) {
+      // 1: {1, oc, oh, ow}
+      bias_shape = {1, output_dims[1], output_dims[2], output_dims[3]};
+    } else if (bias_data_size == output_data_size) {
+      // 2: {n, oc, oh, ow}
+      bias_shape = output_dims.Vectorize();
+    } else {
+      LOG(ERROR) << "bias dimension " << bias_dims
+                 << " isn't supported in conv2d Op when output dimension is "
+                 << output_dims;
+    }
+    if (inputs_map.count(bias_var_name)) {
+      // bias node from input map
+      bias_node = inputs_map.at(bias_var_name);
+    } else {
+      // bias node with const data
+      auto bias_const_node = std::make_shared<ge::op::Const>(bias_var_name);
+      bias_const_node->set_attr_value(CvtFromLiteTensor(bias, bias_shape));
+      bias_node = bias_const_node;
+    }
+    OpList::Global().add(bias_node);
  }
  // create conv node and set input, filter, bias nodes and attributes
@@ -113,10 +149,12 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
        ge::AttrValue::LIST_INT({filter_dims[2], filter_dims[3]}));
    OpList::Global().add(depthwise_conv_node);
    conv_node = depthwise_conv_node;
-    if (bias_const_node != nullptr) {
+    // ConvolutionDepthwise Op doesn't support bias, so append Add node to
+    // support bias
+    if (bias_node != nullptr) {
      auto add_node = std::make_shared<ge::op::Add>(unique_op_type + "/add");
      add_node->set_input_x1(*depthwise_conv_node);
-      add_node->set_input_x2(*bias_const_node);
+      add_node->set_input_x2(*bias_node);
      OpList::Global().add(add_node);
      conv_node = add_node;
    }
@@ -136,11 +174,21 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
        ge::AttrValue::LIST_INT({strides[0], strides[1]}));
    common_conv_node->set_attr_kernel(
        ge::AttrValue::LIST_INT({filter_dims[2], filter_dims[3]}));
-    if (bias_const_node != nullptr) {
-      common_conv_node->set_input_b(*bias_const_node);
-    }
    OpList::Global().add(common_conv_node);
    conv_node = common_conv_node;
+    // Convolution Op only support bias with dimension {1, oc, 1, 1},
+    // so append Add node if dimension is {1, oc, oh, ow} or (n, oc, oh, ow)
+    if (bias_node != nullptr) {
+      if (is_channel_bias) {
+        common_conv_node->set_input_b(*bias_node);
+      } else {
+        auto add_node = std::make_shared<ge::op::Add>(unique_op_type + "/add");
+        add_node->set_input_x1(*common_conv_node);
+        add_node->set_input_x2(*bias_node);
+        OpList::Global().add(add_node);
+        conv_node = add_node;
+      }
+    }
  }
  CHECK(conv_node);

--- a/lite/npu/bridge/conv_op_test.cc
+++ b/lite/npu/bridge/conv_op_test.cc
@@ -41,16 +41,6 @@ void conv_ref(const std::shared_ptr<operators::ConvOpLite> op) {
  std::vector<int32_t> dilations =
      op_info->GetAttr<std::vector<int32_t>>("dilations");
  bool fuse_relu = op_info->GetAttr<bool>("fuse_relu");
-  Tensor* bias = nullptr;
-  float* bias_data = nullptr;
-  if (op_info->HasInput("Bias")) {
-    auto bias_var_names = op_info->Input("Bias");
-    if (bias_var_names.size() > 0) {
-      auto bias_var_name = bias_var_names.front();
-      bias = scope->FindVar(bias_var_name)->GetMutable<lite::Tensor>();
-      bias_data = bias->mutable_data<float>();
-    }
-  }
  auto input_dims = input->dims();
  auto filter_dims = filter->dims();
  auto output_dims = output->dims();
@@ -74,6 +64,19 @@ void conv_ref(const std::shared_ptr<operators::ConvOpLite> op) {
  int out_w = output_dims[3];
  int out_c_group = out_ch_size / groups;
  int in_c_group = in_ch_size / groups;
+  Tensor* bias = nullptr;
+  float* bias_data = nullptr;
+  bool is_channel_bias = false;
+  if (op_info->HasInput("Bias")) {
+    auto bias_var_names = op_info->Input("Bias");
+    if (bias_var_names.size() > 0) {
+      auto bias_var_name = bias_var_names.front();
+      bias = scope->FindVar(bias_var_name)->GetMutable<lite::Tensor>();
+      auto bias_dims = bias->dims();
+      is_channel_bias = bias_dims.production() == out_ch_size;
+      bias_data = bias->mutable_data<float>();
+    }
+  }
  for (int n = 0; n < batch_size; ++n) {
    for (int g = 0; g < groups; ++g) {
      for (int oc = 0; oc < out_c_group; ++oc) {
@@ -83,7 +86,10 @@ void conv_ref(const std::shared_ptr<operators::ConvOpLite> op) {
                          g * out_c_group * out_h * out_w + oc * out_h * out_w +
                          oh * out_w + ow;
            float out_value =
-                bias_data != nullptr ? (bias_data[g * out_c_group + oc]) : 0;
+                bias_data != nullptr
+                    ? (is_channel_bias ? bias_data[g * out_c_group + oc]
+                                       : bias_data[out_idx])
+                    : 0;
            // + out_value *= beta;
            for (int ic = 0; ic < in_c_group; ++ic) {
              for (int kh = 0; kh < kernel_h; ++kh) {
@@ -120,6 +126,7 @@ void test_conv(int bs,
               int ih,
               int iw,
               bool has_bias,
+               bool is_channel_bias,
               bool fuse_relu,
               bool depthwise,
               int dilation,
@@ -146,6 +153,12 @@ void test_conv(int bs,
  }
  std::vector<int64_t> input_shape = {bs, ic, ih, iw};
  std::vector<int64_t> filter_shape = {oc, ic / groups, kernel, kernel};
+  std::vector<int64_t> output_shape({bs, oc});
+  for (size_t i = 0; i < 2; i++) {
+    const int dkernel = dilation * (kernel - 1) + 1;
+    int output_size = (input_shape[i + 2] + 2 * padding - dkernel) / stride + 1;
+    output_shape.push_back(output_size);
+  }
  input->Resize(input_shape);
  filter->Resize(filter_shape);
@@ -165,7 +178,11 @@ void test_conv(int bs,
  opdesc.SetAttr("groups", groups);
  opdesc.SetAttr("fuse_relu", static_cast<bool>(fuse_relu));
  if (has_bias) {
-    bias->Resize({1, oc, 1, 1});
+    if (is_channel_bias) {
+      bias->Resize({1, oc, 1, 1});
+    } else {
+      bias->Resize({output_shape});
+    }
    FillTensor<float, int>(bias);
    opdesc.SetInput("Bias", {bias_var_name});
  }
@@ -195,37 +212,42 @@ TEST(NPUBridges, conv) {
        for (auto ih : {14, 28}) {
          for (auto iw : {14, 28}) {
            for (auto has_bias : {false, true}) {
-              for (auto fuse_relu : {false, true}) {
+              for (auto is_channel_bias : {false, true}) {
-                for (auto depthwise : {false, true}) {
+                for (auto fuse_relu : {false, true}) {
-                  for (auto dilation : {1, 2}) {
+                  for (auto depthwise : {false, true}) {
-                    for (auto stride : {1, 2}) {
+                    for (auto dilation : {1, 2}) {
-                      for (auto kernel : {1, 3, 5}) {
+                      for (auto stride : {1, 2}) {
-                        std::vector<int> paddings = {kernel / 2};
+                        for (auto kernel : {1, 3, 5}) {
-                        if (kernel / 2 != 0) {
+                          std::vector<int> paddings = {kernel / 2};
-                          paddings.push_back(0);
+                          if (kernel / 2 != 0) {
-                        }
+                            paddings.push_back(0);
-                        for (auto padding : paddings) {
+                          }
-                          VLOG(3) << "bs: " << bs << " ic: " << ic
+                          for (auto padding : paddings) {
-                                  << " oc: " << oc << " ih: " << ih
+                            VLOG(3) << "bs: " << bs << " ic: " << ic
-                                  << " iw: " << iw << " has_bias: " << has_bias
+                                    << " oc: " << oc << " ih: " << ih
-                                  << " fuse_relu: " << fuse_relu
+                                    << " iw: " << iw
-                                  << " depthwise: " << depthwise
+                                    << " has_bias: " << has_bias
-                                  << " dilation: " << dilation
+                                    << " is_channel_bias: " << is_channel_bias
-                                  << " stride: " << stride
+                                    << " fuse_relu: " << fuse_relu
-                                  << " padding: " << padding
+                                    << " depthwise: " << depthwise
-                                  << " kernel: " << kernel;
+                                    << " dilation: " << dilation
-                          test_conv(bs,
+                                    << " stride: " << stride
-                                    ic,
+                                    << " padding: " << padding
-                                    oc,
+                                    << " kernel: " << kernel;
-                                    ih,
+                            test_conv(bs,
-                                    iw,
+                                      ic,
-                                    has_bias,
+                                      oc,
-                                    fuse_relu,
+                                      ih,
-                                    depthwise,
+                                      iw,
-                                    dilation,
+                                      has_bias,
-                                    stride,
+                                      is_channel_bias,
-                                    padding,
+                                      fuse_relu,
-                                    kernel);
+                                      depthwise,
+                                      dilation,
+                                      stride,
+                                      padding,
+                                      kernel);
+                          }
                        }
                      }
                    }
@@ -239,7 +261,10 @@ TEST(NPUBridges, conv) {
    }
  }
 #else
-  test_conv(2, 3, 3, 8, 8, true, true, true, 1, 1, 1, 3);
+  test_conv(1, 3, 6, 14, 14, false, false, false, true, 2, 1, 1, 3);
+  test_conv(1, 3, 6, 14, 14, false, false, false, true, 2, 1, 0, 3);
+  test_conv(1, 3, 6, 14, 14, false, false, false, true, 2, 1, 2, 5);
+  test_conv(1, 3, 6, 14, 14, false, false, false, true, 2, 1, 0, 5);
 #endif
 }