[ASCEND] fix conv op, test=develop (#4041)

737fedeb · Qi Li · GitHub · 71d9dbd1 · 737fedeb · 737fedeb
5 changed file
--- a/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc
+++ b/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc
@@ -55,9 +55,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto strides = op_info->GetAttr<std::vector<int>>("strides");
  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
  auto groups = op_info->GetAttr<int>("groups");
-  // Conv2D: groups must set to 1; DepthwiseConv2D: groups not supported.
-  CHECK_LE(groups, 1)
-      << "[HUAWEI_ASCEND_NPU] groups > 1 NOT supported, groups: " << groups;
  auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
  bool with_act =
      op_info->HasAttr("with_act") && op_info->GetAttr<bool>("with_act");
@@ -114,11 +111,18 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                                   "filter width after dilation";
  }
+  // Check Restrictions: outChannel divide groups should equal to 0
+  CHECK_EQ(oc % groups, 0) << "[HUAWEI_ASCEND_NPU] Huawei Ascend NPU DDK "
+                              "restriction: out channel divice groups should "
+                              "equal to 0";
  // Check depthwise mode, and decide whether use DepthwiseConv2D Op
  bool use_depthwise_conv = false;
-  bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1);
+  bool is_depthwise_mode = (ic == groups && oc == groups);
  if (is_depthwise_mode && dilations[0] == 1 && dilations[1] == 1) {
    use_depthwise_conv = true;
+    // Change filter shape {oc, ic/groups = 1, kh, kw} => { K=1, oc, kh, hw}
+    filter->Resize({1L, oc, filter_dims[2], filter_dims[3]});
    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] DepthwiseConv2D op is used.";
  }
@@ -197,11 +201,11 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    conv_op->set_input_x(*input_node->data());
    conv_op->set_input_filter(*filter_node->data());
    conv_op->set_attr_strides(
-        ge::Operator::OpListInt({bs, ic, strides[0], strides[1]}));
+        ge::Operator::OpListInt({1, 1, strides[0], strides[1]}));
    conv_op->set_attr_pads(ge::Operator::OpListInt(
        {paddings[0], paddings[1], paddings[2], paddings[3]}));
    conv_op->set_attr_dilations(
-        ge::Operator::OpListInt({bs, ic, dilations[0], dilations[1]}));
+        ge::Operator::OpListInt({1, 1, dilations[0], dilations[1]}));
    conv_op->set_attr_groups(groups);
    conv_op->set_attr_data_format("NCHW");
    if (bias_node != nullptr && is_channel_bias) {

--- a/lite/tests/kernels/activation_compute_test.cc
+++ b/lite/tests/kernels/activation_compute_test.cc
@@ -321,7 +321,7 @@ TEST(Activation_relu, precision) {
  place = TARGET(kXPU);
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  place = TARGET(kHuaweiAscendNPU);
-  abs_error = 1e-2;  // Using fp16 in NPU
+  abs_error = 1e-2;  // precision_mode default is force_fp16
 #else
  return;
 #endif
@@ -346,7 +346,7 @@ TEST(Activation_leaky_relu, precision) {
  place = TARGET(kARM);
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  place = TARGET(kHuaweiAscendNPU);
-  abs_error = 1e-2;  // Using fp16 in NPU
+  abs_error = 1e-2;  // precision_mode default is force_fp16
 #else
  return;
 #endif
@@ -431,7 +431,7 @@ TEST(Activation_sigmoid, precision) {
  place = TARGET(kARM);
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  place = TARGET(kHuaweiAscendNPU);
-  abs_error = 1e-2;  // Using fp16 in NPU
+  abs_error = 1e-2;  // precision_mode default is force_fp16
 #else
  return;
 #endif
@@ -467,7 +467,7 @@ TEST(Activation_tanh, precision) {
  place = TARGET(kXPU);
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  place = TARGET(kHuaweiAscendNPU);
-  abs_error = 1e-2;  // Using fp16 in NPU
+  abs_error = 1e-2;  // precision_mode default is force_fp16
 #else
  return;
 #endif
@@ -509,7 +509,7 @@ TEST(Activation_relu6, precision) {
  place = TARGET(kARM);
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  place = TARGET(kHuaweiAscendNPU);
-  abs_error = 1e-2;  // Using fp16 in NPU
+  abs_error = 1e-2;  // precision_mode default is force_fp16
 #else
  return;
 #endif

--- a/lite/tests/kernels/concat_compute_test.cc
+++ b/lite/tests/kernels/concat_compute_test.cc
@@ -149,6 +149,7 @@ TEST(Concat, precision) {
  abs_error = 1e-2;  // use fp16 in npu
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  place = TARGET(kHuaweiAscendNPU);
+  abs_error = 1e-2;  // precision_mode default is force_fp16
 #elif defined(LITE_WITH_ARM)
  place = TARGET(kARM);
 #elif defined(LITE_WITH_X86)

--- a/lite/tests/kernels/conv_compute_test.cc
+++ b/lite/tests/kernels/conv_compute_test.cc
@@ -256,7 +256,7 @@ void TestConvGroups(Place place, float abs_error = 2e-5) {
       std::vector<std::vector<int64_t>>{{1, 6, 3, 4}, {5, 12, 7, 8}}) {
    for (auto out_channels : {2, 3, 6}) {
      for (auto groups : {2, 3, 6}) {
-#ifdef LITE_WITH_NPU
+#if (defined LITE_WITH_NPU) || (defined LITE_WITH_HUAWEI_ASCEND_NPU)
        if (out_channels % groups != 0) continue;
 #endif
        std::unique_ptr<arena::TestCase> tester(new ConvComputeTester(
@@ -420,16 +420,13 @@ TEST(Conv2d, precision) {
  abs_error = 5e-2;  // Using fp16 in NPU
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  place = TARGET(kHuaweiAscendNPU);
-  abs_error = 1e-2;  // Using fp16 in NPU
+  abs_error = 1e-2;  // precision_mode default is force_fp16
 #else
  return;
 #endif
  TestConvKsize(place, abs_error);
-// Huawei Ascend NPU DDK not support groups > 1
-#if !defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  TestConvGroups(place, abs_error);
-#endif
  TestConvDilations(place, abs_error);
  TestConvStrides(place, abs_error);
  TestConvPaddings(place, abs_error);

--- a/lite/tests/kernels/interp_compute_test.cc
+++ b/lite/tests/kernels/interp_compute_test.cc
@@ -451,7 +451,7 @@ TEST(Interp, precision) {
  abs_error = 1e-2;  // use fp16 in npu
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
  place = TARGET(kHuaweiAscendNPU);
-  abs_error = 1e-2;  // use fp16 in npu
+  abs_error = 1e-2;  // precision_mode default is force_fp16
 #elif defined(LITE_WITH_ARM)
  place = TARGET(kARM);
 #else