!5965 benchmark support multi outputs node & detection_post_process use stable...

!5965 benchmark support multi outputs node & detection_post_process use stable sort & add model detect.tflite Merge pull request !5965 from wangzhe/master

!5965 benchmark support multi outputs node & detection_post_process use stable...
!5965 benchmark support multi outputs node & detection_post_process use stable sort & add model detect.tflite Merge pull request !5965 from wangzhe/master
368bfb24 · mindspore-ci-bot · Gitee · ffeff2fa · a758a882 · 368bfb24
6 changed file
--- a/mindspore/lite/nnacl/detection_post_process_parameter.h
+++ b/mindspore/lite/nnacl/detection_post_process_parameter.h
@@ -37,6 +37,7 @@ typedef struct DetectionPostProcessParameter {
  void *decoded_boxes_;
  void *nms_candidate_;
+  void *indexes_;
  void *selected_;
  void *score_with_class_;
  void *score_with_class_all_;

--- a/mindspore/lite/nnacl/fp32/detection_post_process.c
+++ b/mindspore/lite/nnacl/fp32/detection_post_process.c
@@ -27,7 +27,7 @@ int ScoreWithIndexCmp(const void *a, const void *b) {
  } else if (pa->score < pb->score) {
    return 1;
  } else {
-    return 0;
+    return pa->index - pb->index;
  }
 }
@@ -108,6 +108,7 @@ int NmsMultiClassesRegular(const int num_boxes, const int num_classes_with_bg, c
  int all_classes_sorted_num = 0;
  int all_classes_output_num = 0;
  ScoreWithIndex *score_with_index_all = (ScoreWithIndex *)(param->score_with_class_all_);
+  int *indexes = (int *)(param->indexes_);
  for (int j = first_class_index; j < num_classes_with_bg; ++j) {
    int candidate_num = 0;
    // process single class
@@ -120,15 +121,23 @@ int NmsMultiClassesRegular(const int num_boxes, const int num_classes_with_bg, c
    }
    int selected_num = NmsSingleClass(candidate_num, decoded_boxes, param->detections_per_class_,
                                      score_with_index_single, selected, param);
+    for (int i = 0; i < all_classes_sorted_num; ++i) {
+      indexes[i] = score_with_index_all[i].index;
+      score_with_index_all[i].index = i;
+    }
    // process all classes
    for (int i = 0; i < selected_num; ++i) {
      // store class to index
-      score_with_index_all[all_classes_sorted_num].index = selected[i] * num_classes_with_bg + j;
+      indexes[all_classes_sorted_num] = selected[i] * num_classes_with_bg + j;
+      score_with_index_all[all_classes_sorted_num].index = all_classes_sorted_num;
      score_with_index_all[all_classes_sorted_num++].score = input_scores[selected[i] * num_classes_with_bg + j];
    }
    all_classes_output_num =
      all_classes_sorted_num < param->max_detections_ ? all_classes_sorted_num : param->max_detections_;
    qsort(score_with_index_all, all_classes_sorted_num, sizeof(ScoreWithIndex), ScoreWithIndexCmp);
+    for (int i = 0; i < all_classes_output_num; ++i) {
+      score_with_index_all[i].index = indexes[score_with_index_all[i].index];
+    }
    all_classes_sorted_num = all_classes_output_num;
  }
  for (int i = 0; i < param->max_detections_ * param->max_classes_per_detection_; ++i) {

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
@@ -91,6 +91,7 @@ int DetectionPostProcessCPUKernel::Run() {
  if (parameter->use_regular_nms_) {
    parameter->score_with_class_all_ =
      context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex));
+    parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int));
  } else {
    parameter->score_with_class_all_ =
      context_->allocator->Malloc((num_boxes * parameter->num_classes_) * sizeof(ScoreWithIndex));
@@ -102,6 +103,9 @@ int DetectionPostProcessCPUKernel::Run() {
  context_->allocator->Free(parameter->selected_);
  context_->allocator->Free(parameter->score_with_class_);
  context_->allocator->Free(parameter->score_with_class_all_);
+  if (parameter->use_regular_nms_) {
+    context_->allocator->Free(parameter->indexes_);
+  }
  return RET_OK;
 }

--- a/mindspore/lite/test/models_tflite_awaretraining.cfg
+++ b/mindspore/lite/test/models_tflite_awaretraining.cfg
@@ -23,3 +23,4 @@ inception_v3_quant.tflite
 inception_v4_299_quant.tflite
 graph_8bit_1021_combine.tflite
 lite-model_object_detection_mobile_object_labeler_v1_1.tflite
+detect.tflite
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -100,7 +100,7 @@ int Benchmark::ReadInputFile() {
      }
      auto tensorDataSize = cur_tensor->Size();
      if (size != tensorDataSize) {
-        std::cerr << "Input binary file size error, required: %zu, in fact: %zu" << tensorDataSize << size << std::endl;
+        std::cerr << "Input binary file size error, required: " << tensorDataSize << ", in fact: " << size << std::endl;
        MS_LOG(ERROR) << "Input binary file size error, required: " << tensorDataSize << ", in fact: " << size;
        delete binBuf;
        return RET_ERROR;
@@ -166,41 +166,40 @@ int Benchmark::ReadCalibData() {
  return RET_OK;
 }
 int Benchmark::CompareOutput() {
  std::cout << "================ Comparing Output data ================" << std::endl;
  float totalBias = 0;
  int totalSize = 0;
  bool hasError = false;
  for (const auto &calibTensor : calibData) {
-    std::string nodeName = calibTensor.first;
+    std::string nodeOrTensorName = calibTensor.first;
-    auto tensors = session->GetOutputsByNodeName(nodeName);
+    auto tensors = session->GetOutputsByNodeName(nodeOrTensorName);
-    if (tensors.empty()) {
+    const mindspore::tensor::MSTensor *tensor = nullptr;
-      MS_LOG(ERROR) << "Cannot find output node: " << nodeName.c_str() << " , compare output data fail.";
+    if (tensors.empty() || tensors.size() != 1) {
-      std::cerr << "Cannot find output node: " << nodeName.c_str() << " , compare output data fail." << std::endl;
+      MS_LOG(INFO) << "Cannot find output node: " << nodeOrTensorName
-      return RET_ERROR;
+                   << " or node has more than one output tensor, switch to GetOutputByTensorName";
-    }
+      tensor = session->GetOutputByTensorName(nodeOrTensorName);
-    // make sure tensor size is 1
+      if (tensor == nullptr) {
-    if (tensors.size() != 1) {
+        MS_LOG(ERROR) << "Cannot find output tensor " << nodeOrTensorName << ", get model output failed";
-      MS_LOG(ERROR) << "Only support 1 tensor with a name now.";
+        return RET_ERROR;
-      std::cerr << "Only support 1 tensor with a name now." << std::endl;
+      }
-      return RET_ERROR;
+    } else {
+      tensor = tensors.front();
    }
-    auto &tensor = tensors.front();
    MS_ASSERT(tensor->GetDataType() == DataType_DT_FLOAT);
    MS_ASSERT(tensor->GetData() != nullptr);
    float bias = 0;
    switch (msCalibDataType) {
      case TypeId::kNumberTypeFloat: {
-        bias = CompareData<float>(nodeName, tensor->shape(), static_cast<float *>(tensor->MutableData()));
+        bias = CompareData<float>(nodeOrTensorName, tensor->shape(), static_cast<float *>(tensor->MutableData()));
        break;
      }
      case TypeId::kNumberTypeInt8: {
-        bias = CompareData<int8_t>(nodeName, tensor->shape(), static_cast<int8_t *>(tensor->MutableData()));
+        bias = CompareData<int8_t>(nodeOrTensorName, tensor->shape(), static_cast<int8_t *>(tensor->MutableData()));
        break;
      }
      case TypeId::kNumberTypeInt32: {
-        bias = CompareData<int32_t>(nodeName, tensor->shape(), static_cast<int32_t *>(tensor->MutableData()));
+        bias = CompareData<int32_t>(nodeOrTensorName, tensor->shape(), static_cast<int32_t *>(tensor->MutableData()));
        break;
      }
      default:
@@ -224,12 +223,12 @@ int Benchmark::CompareOutput() {
      meanBias = 0;
    }
-    std::cout << "Mean bias of all nodes: " << meanBias << "%" << std::endl;
+    std::cout << "Mean bias of all nodes/tensors: " << meanBias << "%" << std::endl;
    std::cout << "=======================================================" << std::endl << std::endl;
    if (meanBias > this->_flags->accuracyThreshold) {
-      MS_LOG(ERROR) << "Mean bias of all nodes is too big: " << meanBias << "%";
+      MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << meanBias << "%";
-      std::cerr << "Mean bias of all nodes is too big: " << meanBias << "%" << std::endl;
+      std::cerr << "Mean bias of all nodes/tensors is too big: " << meanBias << "%" << std::endl;
      return RET_ERROR;
    } else {
      return RET_OK;
@@ -294,26 +293,26 @@ int Benchmark::MarkAccuracy() {
  MS_LOG(INFO) << "MarkAccuracy";
  std::cout << "MarkAccuracy" << std::endl;
  for (size_t i = 0; i < msInputs.size(); i++) {
-      switch (msInputs.at(i)->data_type()) {
+    switch (msInputs.at(i)->data_type()) {
-        case TypeId::kNumberTypeFloat:
+      case TypeId::kNumberTypeFloat:
-          PrintInputData<float>(msInputs.at(i));
+        PrintInputData<float>(msInputs.at(i));
-          break;
+        break;
-        case TypeId::kNumberTypeFloat32:
+      case TypeId::kNumberTypeFloat32:
-          PrintInputData<float>(msInputs.at(i));
+        PrintInputData<float>(msInputs.at(i));
-          break;
+        break;
-        case TypeId::kNumberTypeInt8:
+      case TypeId::kNumberTypeInt8:
-          PrintInputData<int8_t>(msInputs.at(i));
+        PrintInputData<int8_t>(msInputs.at(i));
-          break;
+        break;
-        case TypeId::kNumberTypeUInt8:
+      case TypeId::kNumberTypeUInt8:
-          PrintInputData<uint8_t>(msInputs.at(i));
+        PrintInputData<uint8_t>(msInputs.at(i));
-          break;
+        break;
-        case TypeId::kNumberTypeInt32:
+      case TypeId::kNumberTypeInt32:
-          PrintInputData<int>(msInputs.at(i));
+        PrintInputData<int>(msInputs.at(i));
-          break;
+        break;
-        default:
+      default:
-          MS_LOG(ERROR) << "Datatype " << msInputs.at(i)->data_type() << " is not supported.";
+        MS_LOG(ERROR) << "Datatype " << msInputs.at(i)->data_type() << " is not supported.";
-          return RET_ERROR;
+        return RET_ERROR;
-      }
+    }
  }
  auto status = session->RunGraph();
  if (status != RET_OK) {
@@ -355,7 +354,7 @@ int Benchmark::RunBenchmark(const std::string &deviceType) {
  auto model = lite::Model::Import(graphBuf, size);
  auto model_version = model->version_;
  if (model_version != Version()) {
-    MS_LOG(WARNING) << "model version is "<< model_version << ", inference version is " << Version() << " not equal";
+    MS_LOG(WARNING) << "model version is " << model_version << ", inference version is " << Version() << " not equal";
  }
  if (model == nullptr) {
    MS_LOG(ERROR) << "Import model file failed while running " << modelName.c_str();

--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@@ -131,7 +131,7 @@ class MS_API Benchmark {
    auto inData = reinterpret_cast<T *>(input->MutableData());
    std::cout << "InData" << i++ << ": ";
    for (size_t j = 0; j < 20; j++) {
-      std::cout << static_cast<float >(inData[j]) << " ";
+      std::cout << static_cast<float>(inData[j]) << " ";
    }
    std::cout << std::endl;
  }
@@ -192,9 +192,9 @@ class MS_API Benchmark {
      }
      if (meanError <= 0.0000001) {
-        std::cout << "Mean bias of node " << nodeName << " : 0%" << std::endl;
+        std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl;
      } else {
-        std::cout << "Mean bias of node " << nodeName << " : " << meanError * 100 << "%" << std::endl;
+        std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl;
      }
      return meanError;
    } else {