1, Change bridge code to follow the new way(2.6->dev) to obtain input/output scale;

2, Add kImaginationNNA to BindTargets of 'quantized_op_attribute_inference_pass' 3, Test image classification with one imput passed

1, Change bridge code to follow the new way(2.6->dev) to obtain input/output scale;
2, Add kImaginationNNA to BindTargets of 'quantized_op_attribute_inference_pass' 3, Test image classification with one imput passed
620c9c1a · Jiansong Wang · 0633d001 · 620c9c1a · 620c9c1a · 620c9c1a
8 changed file
--- a/lite/core/mir/quantized_op_attributes_inference_pass.cc
+++ b/lite/core/mir/quantized_op_attributes_inference_pass.cc
@@ -97,4 +97,4 @@ void QuantizedOpAttributesInferencePass::Apply(

 REGISTER_MIR_PASS(quantized_op_attributes_inference_pass,
                  paddle::lite::mir::QuantizedOpAttributesInferencePass)
-    .BindTargets({TARGET(kAPU), TARGET(kRKNPU)});
+    .BindTargets({TARGET(kAPU), TARGET(kRKNPU), TARGET(kImaginationNNA)});
--- a/lite/kernels/imagination_nna/.subgraph_compute.h.swo
+++ b/lite/kernels/imagination_nna/.subgraph_compute.h.swo
--- a/lite/kernels/imagination_nna/bridges/conv_op.cc
+++ b/lite/kernels/imagination_nna/bridges/conv_op.cc
@@ -31,6 +31,9 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
  auto scope = op->scope();
  VLOG(3) << "[NNA] Converting " << op_type << "... ";

+  CHECK(op_info->HasAttr("enable_int8") &&
+        op_info->GetAttr<bool>("enable_int8"));
+
  // Get input and output vars and op attributes
  auto input_name = op_info->Input("Input").front();
  auto input = scope->FindMutableTensor(input_name);
@@ -67,19 +70,14 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
  CHECK_EQ(strides.size(), 2L);
  CHECK_EQ(dilations.size(), 2L);

-  // for quantization
-  bool enable_int8 = false;
-  float input_scale = 1.0;
-  float output_scale = 1.0;
-  std::vector<float> weight_scale;
-  TensorInfo qnt;
+  CHECK(op_info->HasInputScale(input_name));
+  float input_scale = op_info->GetInputScale(input_name)[0];
+  CHECK(op_info->HasInputScale(filter_name));
+  std::vector<float> weight_scale = op_info->GetInputScale(filter_name);
+  CHECK(op_info->HasOutputScale(output_name));
+  float output_scale = op_info->GetOutputScale(output_name)[0];

-  if (op_info->HasAttr("enable_int8")) {
-    enable_int8 = op_info->GetAttr<bool>("enable_int8");
-    input_scale = op_info->GetAttr<float>("input_scale");
-    output_scale = op_info->GetAttr<float>("output_scale");
-    weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
-  }
+  TensorInfo qnt;

  // Input node
  std::shared_ptr<Node> input_node = nullptr;
@@ -89,10 +87,7 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
    in_tensor = input_node->data();
  } else {
    TensorInfoReset(&qnt);
-    if (enable_int8)
-      qnt.type = IMGDNN_TYPE_Q_U8;
-    else
-      qnt.type = IMGDNN_TYPE_F32;
+    qnt.type = IMGDNN_TYPE_Q_U8;
    qnt.scales.push_back(input_scale);
    qnt.zero_points.push_back(128);
    input_node = graph->Add(input_name, *input, qnt, Node::Role::kInput);
@@ -123,39 +118,32 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
  bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1);

  // Filter node
-  std::shared_ptr<Node> filter_node = nullptr;
-  imgdnn_tensor filter_tensor;
  bool per_channel = isScalesPerChannel(weight_scale);
  TensorInfoReset(&qnt);
  uint8_t *weights_u8 =
      graph->GetBuilder()->GetBufromPool(filter_dims.production());
-  if (enable_int8) {
-    char *weight_src = static_cast<char *>(filter->raw_data());
+  char *weight_src = static_cast<char *>(filter->raw_data());

-    qnt.type = IMGDNN_TYPE_Q_U8;
-    if (per_channel) {
-      qnt.scales.assign(weight_scale.begin(), weight_scale.end());
-      qnt.zero_points.assign(weight_scale.size(), 128);
-      qnt.count = oc;
-      qnt.axis = 1;
-    } else {
-      qnt.scales.push_back(weight_scale.at(0));
-      qnt.zero_points.push_back(128);
-    }
-    for (int i = 0; i < filter_dims.production(); i++) {
-      weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
-    }
-
-    filter_node = graph->Add(filter_name,
-                             weights_u8,
-                             filter_dims.Vectorize(),
-                             qnt,
-                             Node::Role::kConst);
-    filter_tensor = filter_node->data();
+  qnt.type = IMGDNN_TYPE_Q_U8;
+  if (per_channel) {
+    qnt.scales.assign(weight_scale.begin(), weight_scale.end());
+    qnt.zero_points.assign(weight_scale.size(), 128);
+    qnt.count = oc;
+    qnt.axis = 1;
  } else {
-    qnt.type = IMGDNN_TYPE_F32;
-    filter_node = graph->Add(filter_name, *filter, qnt, Node::Role::kConst);
+    qnt.scales.push_back(weight_scale.at(0));
+    qnt.zero_points.push_back(128);
  }
+  for (int i = 0; i < filter_dims.production(); i++) {
+    weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
+  }
+
+  std::shared_ptr<Node> filter_node = graph->Add(filter_name,
+                                                 weights_u8,
+                                                 filter_dims.Vectorize(),
+                                                 qnt,
+                                                 Node::Role::kConst);
+  imgdnn_tensor filter_tensor = filter_node->data();

  // Add bias node if exists bias
  // Supports the bias nodes with the following dimensions
@@ -192,46 +180,38 @@ int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
      }

      TensorInfoReset(&qnt);
-      std::vector<int64_t> shapes{1, oc};
-      auto bias_data = bias->data<float, float>();
-      if (enable_int8) {
-        qnt.type = IMGDNN_TYPE_I32;
-        if (per_channel) {
-          qnt.scales.resize(bias_data_size);
-          for (int i = 0; i < bias_data_size; i++)
-            qnt.scales[i] = input_scale * weight_scale[i];
-          qnt.zero_points.assign(bias_data_size, 0);
-          qnt.count = 2;
-          qnt.axis = 1;
-        } else {
-          qnt.scales.push_back(input_scale * weight_scale[0]);
-          qnt.zero_points.push_back(0);
-        }
-
-        int quant_bits = 32;
-        auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
-        auto dtype_min = static_cast<int>(0 - dtype_max);
-
-        int32_t *bias_qnt_data =
-            reinterpret_cast<int32_t *>(graph->GetBuilder()->GetBufromPool(
-                bias_dims.production() * sizeof(int32_t)));
-        for (int i = 0; i < bias_data_size; i++) {
-          float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
-          bias_qnt_data[i] =
-              std::min(std::max(static_cast<int>(bias_data[i] / current_scale),
-                                dtype_min),
-                       dtype_max);
-        }
-
-        bias_node = graph->Add(
-            bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
+      qnt.type = IMGDNN_TYPE_I32;
+      if (per_channel) {
+        qnt.scales.resize(bias_data_size);
+        for (int i = 0; i < bias_data_size; i++)
+          qnt.scales[i] = input_scale * weight_scale[i];
+        qnt.zero_points.assign(bias_data_size, 0);
+        qnt.count = 2;
+        qnt.axis = 1;
      } else {
-        qnt.type = IMGDNN_TYPE_F32;
-        std::vector<float> bias_float_data(bias_data,
-                                           bias_data + bias_data_size);
-        bias_node = graph->Add(
-            bias_name, bias_float_data.data(), shapes, qnt, Node::Role::kConst);
+        qnt.scales.push_back(input_scale * weight_scale[0]);
+        qnt.zero_points.push_back(0);
      }
+
+      int quant_bits = 32;
+      auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
+      auto dtype_min = static_cast<int>(0 - dtype_max);
+
+      auto bias_data = bias->data<float, float>();
+      int32_t *bias_qnt_data =
+          reinterpret_cast<int32_t *>(graph->GetBuilder()->GetBufromPool(
+              bias_dims.production() * sizeof(int32_t)));
+      for (int i = 0; i < bias_data_size; i++) {
+        float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
+        bias_qnt_data[i] = std::min(
+            std::max(static_cast<int>(bias_data[i] / current_scale), dtype_min),
+            dtype_max);
+      }
+
+      std::vector<int64_t> shapes{1, oc};
+      bias_node =
+          graph->Add(bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
+
      bias_tensor = bias_node->data();
    }
  }

--- a/lite/kernels/imagination_nna/bridges/fc_op.cc
+++ b/lite/kernels/imagination_nna/bridges/fc_op.cc
@@ -30,6 +30,9 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto scope = op->scope();
  VLOG(3) << "[NNA] Converting " + op_type + "...";

+  CHECK(op_info->HasAttr("enable_int8") &&
+        op_info->GetAttr<bool>("enable_int8"));
+
  auto input_name = op_info->Input("Input").front();
  auto input = scope->FindTensor(input_name);
  auto input_dims = input->dims();
@@ -58,19 +61,12 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  VLOG(3) << "[NNA] input dims: " << input_dims << " w dims: " << w_dims
          << " m: " << m << " k: " << k << " n: " << n;

-  // for quantization
-  bool enable_int8 = false;
-  float input_scale = 1.0;
-  float output_scale = 1.0;
-  std::vector<float> weight_scale;
-  TensorInfo qnt;
-
-  if (op_info->HasAttr("enable_int8")) {
-    enable_int8 = op_info->GetAttr<bool>("enable_int8");
-    input_scale = op_info->GetAttr<float>("input_scale");
-    output_scale = op_info->GetAttr<float>("output_scale");
-    weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
-  }
+  CHECK(op_info->HasInputScale(input_name));
+  float input_scale = op_info->GetInputScale(input_name)[0];
+  CHECK(op_info->HasInputScale(weight_name));
+  std::vector<float> weight_scale = op_info->GetInputScale(weight_name);
+  CHECK(op_info->HasOutputScale(out_name));
+  float output_scale = op_info->GetOutputScale(out_name)[0];

  // Create input node and reshape it to (m, k, 1, 1)
  std::shared_ptr<Node> input_node = nullptr;
@@ -84,21 +80,19 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::shared_ptr<Node> weight_node = nullptr;
  bool per_channel = isScalesPerChannel(weight_scale);
  uint8_t* weights_u8 = graph->GetBuilder()->GetBufromPool(w_dims.production());
-  if (enable_int8) {
-    qnt.type = IMGDNN_TYPE_Q_U8;
-    if (per_channel) {
-      LOG(FATAL)
-          << "[NNA] FC per-channel quantization is not supported for Mirage";
-    } else {
-      qnt.scales.push_back(weight_scale.at(0));
-      qnt.zero_points.push_back(128);
-    }
-    const char* weight_src = static_cast<const char*>(weights->raw_data());
-    for (int i = 0; i < w_dims.production(); i++)
-      weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
+
+  TensorInfo qnt;
+  qnt.type = IMGDNN_TYPE_Q_U8;
+  if (per_channel) {
+    LOG(FATAL)
+        << "[NNA] FC per-channel quantization is not supported for Mirage";
  } else {
-    LOG(FATAL) << "[NNA] PaddleLite Only 8-bits quantization.";
+    qnt.scales.push_back(weight_scale.at(0));
+    qnt.zero_points.push_back(128);
  }
+  const char* weight_src = static_cast<const char*>(weights->raw_data());
+  for (int i = 0; i < w_dims.production(); i++)
+    weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
  weight_node = graph->Add(
      weight_name, weights_u8, w_dims.Vectorize(), qnt, Node::Role::kConst);

@@ -111,49 +105,43 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
      bias_node = graph->Get(bias_name);
    } else {
      auto bias = scope->FindTensor(bias_name);
+      // CHECK_EQ(bias->precision(), PRECISION(kFloat));
      auto bias_dims = bias->dims();
      CHECK_EQ(bias_dims.production(), n);

-      if (enable_int8 && bias->precision() == PRECISION(kFloat)) {
-        TensorInfoReset(&qnt);
-        qnt.type = IMGDNN_TYPE_I32;
-        if (per_channel) {
-          qnt.scales.resize(weight_scale.size());
-          qnt.count = bias_dims.size();
-          qnt.axis = 0;
-          for (int i = 0; i < weight_scale.size(); i++) {
-            qnt.scales[i] = input_scale * weight_scale[i];
-          }
-          LOG(FATAL)
-              << "[NNA] per-channel quantization is not supported for FC";
-        } else {
-          qnt.scales.push_back(weight_scale.at(0) * input_scale);
-          qnt.zero_points.push_back(0);
-        }
-
-        int quant_bits = 32;
-        auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
-        auto dtype_min = static_cast<int>(0 - dtype_max);
-
-        auto* bias_data = bias->data<float, float>();
-        int32_t* bias_qnt_data =
-            reinterpret_cast<int32_t*>(graph->GetBuilder()->GetBufromPool(
-                bias_dims.production() * sizeof(int32_t)));
-        for (int i = 0; i < n; i++) {
-          float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
-          bias_qnt_data[i] =
-              std::min(std::max(static_cast<int>(bias_data[i] / current_scale),
-                                dtype_min),
-                       dtype_max);
+      TensorInfoReset(&qnt);
+      qnt.type = IMGDNN_TYPE_I32;
+      if (per_channel) {
+        qnt.scales.resize(weight_scale.size());
+        qnt.count = bias_dims.size();
+        qnt.axis = 0;
+        for (int i = 0; i < weight_scale.size(); i++) {
+          qnt.scales[i] = input_scale * weight_scale[i];
        }
-
-        std::vector<int64_t> shapes{1};
-        bias_node = graph->Add(
-            bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
+        LOG(FATAL) << "[NNA] per-channel quantization is not supported for FC";
      } else {
-        qnt.type = IMGDNN_TYPE_F32;
-        bias_node = graph->Add(bias_name, *bias, qnt, Node::Role::kConst);
+        qnt.scales.push_back(weight_scale.at(0) * input_scale);
+        qnt.zero_points.push_back(0);
      }
+
+      int quant_bits = 32;
+      auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
+      auto dtype_min = static_cast<int>(0 - dtype_max);
+
+      auto* bias_data = bias->data<float, float>();
+      int32_t* bias_qnt_data =
+          reinterpret_cast<int32_t*>(graph->GetBuilder()->GetBufromPool(
+              bias_dims.production() * sizeof(int32_t)));
+      for (int i = 0; i < n; i++) {
+        float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
+        bias_qnt_data[i] = std::min(
+            std::max(static_cast<int>(bias_data[i] / current_scale), dtype_min),
+            dtype_max);
+      }
+
+      std::vector<int64_t> shapes{1};
+      bias_node =
+          graph->Add(bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
    }
    bias_tensor = bias_node->data();
  }

--- a/lite/kernels/imagination_nna/bridges/pool_op.cc
+++ b/lite/kernels/imagination_nna/bridges/pool_op.cc
@@ -32,6 +32,9 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto scope = op->scope();
  VLOG(3) << "[NNA] Converting " + op_type + "...";

+  CHECK(op_info->HasAttr("enable_int8") &&
+        op_info->GetAttr<bool>("enable_int8"));
+
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
  auto x = scope->FindMutableTensor(x_name);
@@ -43,11 +46,8 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");

  // for quantization
-  float output_scale = 1.0;
-
-  if (op_info->HasAttr("enable_int8")) {
-    output_scale = op_info->GetAttr<float>("output_scale");
-  }
+  CHECK(op_info->HasOutputScale(out_name));
+  float output_scale = op_info->GetOutputScale(out_name)[0];

  // X node
  std::shared_ptr<Node> x_node = nullptr;

--- a/lite/kernels/imagination_nna/bridges/softmax_op.cc
+++ b/lite/kernels/imagination_nna/bridges/softmax_op.cc
@@ -30,6 +30,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto scope = op->scope();
  VLOG(3) << "[NNA] Converting " + op_type + "...";

+  CHECK(op_info->HasAttr("enable_int8") &&
+        op_info->GetAttr<bool>("enable_int8"));
+
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
  auto x = scope->FindMutableTensor(x_name);
@@ -41,11 +44,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    axis += x_rank;
  }

-  // for quantization
-  float output_scale = 1.0;
-
  if (op_info->HasAttr("enable_int8")) {
-    output_scale = op_info->GetAttr<float>("output_scale");
+    CHECK(op_info->HasOutputScale(out_name));
+    float output_scale = op_info->GetOutputScale(out_name)[0];

    // X node
    std::shared_ptr<Node> x_node = nullptr;

--- a/lite/kernels/imagination_nna/subgraph_compute.cc
+++ b/lite/kernels/imagination_nna/subgraph_compute.cc
@@ -28,6 +28,7 @@ namespace kernels {
 namespace imagination_nna {

 bool SubgraphEngine::BuildDeviceProgram() {
+  device_program_ready = false;
  int status = 0;
  // Convert all of ops and their input vars and weights and added into the NNA
  // IMG IR graph
@@ -44,7 +45,6 @@ bool SubgraphEngine::BuildDeviceProgram() {
    op->InferShape();
    std::string op_type = op->op_info()->Type();
    if (!bridges.Exists(op_type, TARGET(kImaginationNNA))) {
-      // return subgraph::FAILED;
      return false;
    }
    auto kernel = inst.kernel();
@@ -53,7 +53,6 @@ bool SubgraphEngine::BuildDeviceProgram() {
        const_cast<OpLite*>(op),
        const_cast<KernelBase*>(kernel));
    if (subgraph::CHECK_FAILED(status)) {
-      // return subgraph::FAILED;
      return false;
    }
  }
@@ -156,11 +155,15 @@ bool SubgraphEngine::BuildDeviceProgram() {
        break;
    }
  }
+  device_program_ready = true;

  return true;
 }

 bool SubgraphEngine::LaunchDeviceProgram() {
+  if (!device_program_ready)  // build device program fail
+    LaunchOriginProgram();
+
  // Set input buffer
  for (size_t i = 0; i < origin_itensors_.size(); i++) {
    // check input shapes

--- a/lite/kernels/imagination_nna/subgraph_compute.h
+++ b/lite/kernels/imagination_nna/subgraph_compute.h
@@ -55,6 +55,7 @@ class SubgraphEngine : public subgraph::Engine {
  std::vector<imgdnn_input> device_itensors_;
  std::vector<imgdnn_output> device_otensors_;
  lite::imagination_nna::ImgdnnManager imgdnn_mgr_;
+  bool device_program_ready{false};
 };

 class SubgraphCompute : public KernelLite<TARGET(kImaginationNNA),