diff --git a/mindspore/lite/nnacl/detection_post_process_parameter.h b/mindspore/lite/nnacl/detection_post_process_parameter.h
index e49a8aaee6a243c9c0299b4338c050c86ea32bf0..1ee172f75942e7cbf93acea7cee96a5c0d0c84af 100644
--- a/mindspore/lite/nnacl/detection_post_process_parameter.h
+++ b/mindspore/lite/nnacl/detection_post_process_parameter.h
@@ -37,6 +37,7 @@ typedef struct DetectionPostProcessParameter {
 
   void *decoded_boxes_;
   void *nms_candidate_;
+  void *indexes_;
   void *selected_;
   void *score_with_class_;
   void *score_with_class_all_;
diff --git a/mindspore/lite/nnacl/fp32/detection_post_process.c b/mindspore/lite/nnacl/fp32/detection_post_process.c
index 34361075ad64188a3a25787db311e40a8c2c2337..513da58adad8780974f20c16f9457e3e331e48e4 100644
--- a/mindspore/lite/nnacl/fp32/detection_post_process.c
+++ b/mindspore/lite/nnacl/fp32/detection_post_process.c
@@ -27,7 +27,7 @@ int ScoreWithIndexCmp(const void *a, const void *b) {
   } else if (pa->score < pb->score) {
     return 1;
   } else {
-    return 0;
+    return pa->index - pb->index;
   }
 }
 
@@ -108,6 +108,7 @@ int NmsMultiClassesRegular(const int num_boxes, const int num_classes_with_bg, c
   int all_classes_sorted_num = 0;
   int all_classes_output_num = 0;
   ScoreWithIndex *score_with_index_all = (ScoreWithIndex *)(param->score_with_class_all_);
+  int *indexes = (int *)(param->indexes_);
   for (int j = first_class_index; j < num_classes_with_bg; ++j) {
     int candidate_num = 0;
     // process single class
@@ -120,15 +121,23 @@ int NmsMultiClassesRegular(const int num_boxes, const int num_classes_with_bg, c
     }
     int selected_num = NmsSingleClass(candidate_num, decoded_boxes, param->detections_per_class_,
                                       score_with_index_single, selected, param);
+    for (int i = 0; i < all_classes_sorted_num; ++i) {
+      indexes[i] = score_with_index_all[i].index;
+      score_with_index_all[i].index = i;
+    }
     // process all classes
     for (int i = 0; i < selected_num; ++i) {
       // store class to index
-      score_with_index_all[all_classes_sorted_num].index = selected[i] * num_classes_with_bg + j;
+      indexes[all_classes_sorted_num] = selected[i] * num_classes_with_bg + j;
+      score_with_index_all[all_classes_sorted_num].index = all_classes_sorted_num;
       score_with_index_all[all_classes_sorted_num++].score = input_scores[selected[i] * num_classes_with_bg + j];
     }
     all_classes_output_num =
       all_classes_sorted_num < param->max_detections_ ? all_classes_sorted_num : param->max_detections_;
     qsort(score_with_index_all, all_classes_sorted_num, sizeof(ScoreWithIndex), ScoreWithIndexCmp);
+    for (int i = 0; i < all_classes_output_num; ++i) {
+      score_with_index_all[i].index = indexes[score_with_index_all[i].index];
+    }
     all_classes_sorted_num = all_classes_output_num;
   }
   for (int i = 0; i < param->max_detections_ * param->max_classes_per_detection_; ++i) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
index 57ddcbb7bd1db653129596746339be1f1a82825a..a3616eed1940720bee22faa2c377021d819d9bf7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
@@ -91,6 +91,7 @@ int DetectionPostProcessCPUKernel::Run() {
   if (parameter->use_regular_nms_) {
     parameter->score_with_class_all_ =
       context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex));
+    parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int));
   } else {
     parameter->score_with_class_all_ =
       context_->allocator->Malloc((num_boxes * parameter->num_classes_) * sizeof(ScoreWithIndex));
@@ -102,6 +103,9 @@ int DetectionPostProcessCPUKernel::Run() {
   context_->allocator->Free(parameter->selected_);
   context_->allocator->Free(parameter->score_with_class_);
   context_->allocator->Free(parameter->score_with_class_all_);
+  if (parameter->use_regular_nms_) {
+    context_->allocator->Free(parameter->indexes_);
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/test/models_tflite_awaretraining.cfg b/mindspore/lite/test/models_tflite_awaretraining.cfg
index 9690f9ebe5c5952e52f91f704b1099890945dae4..eab380f3d85562f20395f8808fb3b00749b9efe4 100644
--- a/mindspore/lite/test/models_tflite_awaretraining.cfg
+++ b/mindspore/lite/test/models_tflite_awaretraining.cfg
@@ -23,3 +23,4 @@ inception_v3_quant.tflite
 inception_v4_299_quant.tflite
 graph_8bit_1021_combine.tflite
 lite-model_object_detection_mobile_object_labeler_v1_1.tflite
+detect.tflite
diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc
index be5f836bfdde0506dccccc997b2e86e372311638..5870c2588e4b935a30d5900500deb241e7bbbb3d 100644
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -100,7 +100,7 @@ int Benchmark::ReadInputFile() {
       }
       auto tensorDataSize = cur_tensor->Size();
       if (size != tensorDataSize) {
-        std::cerr << "Input binary file size error, required: %zu, in fact: %zu" << tensorDataSize << size << std::endl;
+        std::cerr << "Input binary file size error, required: " << tensorDataSize << ", in fact: " << size << std::endl;
         MS_LOG(ERROR) << "Input binary file size error, required: " << tensorDataSize << ", in fact: " << size;
         delete binBuf;
         return RET_ERROR;
@@ -166,41 +166,40 @@ int Benchmark::ReadCalibData() {
   return RET_OK;
 }
 
-
 int Benchmark::CompareOutput() {
   std::cout << "================ Comparing Output data ================" << std::endl;
   float totalBias = 0;
   int totalSize = 0;
   bool hasError = false;
   for (const auto &calibTensor : calibData) {
-    std::string nodeName = calibTensor.first;
-    auto tensors = session->GetOutputsByNodeName(nodeName);
-    if (tensors.empty()) {
-      MS_LOG(ERROR) << "Cannot find output node: " << nodeName.c_str() << " , compare output data fail.";
-      std::cerr << "Cannot find output node: " << nodeName.c_str() << " , compare output data fail." << std::endl;
-      return RET_ERROR;
-    }
-    // make sure tensor size is 1
-    if (tensors.size() != 1) {
-      MS_LOG(ERROR) << "Only support 1 tensor with a name now.";
-      std::cerr << "Only support 1 tensor with a name now." << std::endl;
-      return RET_ERROR;
+    std::string nodeOrTensorName = calibTensor.first;
+    auto tensors = session->GetOutputsByNodeName(nodeOrTensorName);
+    const mindspore::tensor::MSTensor *tensor = nullptr;
+    if (tensors.empty() || tensors.size() != 1) {
+      MS_LOG(INFO) << "Cannot find output node: " << nodeOrTensorName
+                   << " or node has more than one output tensor, switch to GetOutputByTensorName";
+      tensor = session->GetOutputByTensorName(nodeOrTensorName);
+      if (tensor == nullptr) {
+        MS_LOG(ERROR) << "Cannot find output tensor " << nodeOrTensorName << ", get model output failed";
+        return RET_ERROR;
+      }
+    } else {
+      tensor = tensors.front();
     }
-    auto &tensor = tensors.front();
     MS_ASSERT(tensor->GetDataType() == DataType_DT_FLOAT);
     MS_ASSERT(tensor->GetData() != nullptr);
     float bias = 0;
     switch (msCalibDataType) {
       case TypeId::kNumberTypeFloat: {
-        bias = CompareData<float>(nodeName, tensor->shape(), static_cast<float *>(tensor->MutableData()));
+        bias = CompareData<float>(nodeOrTensorName, tensor->shape(), static_cast<float *>(tensor->MutableData()));
         break;
       }
       case TypeId::kNumberTypeInt8: {
-        bias = CompareData<int8_t>(nodeName, tensor->shape(), static_cast<int8_t *>(tensor->MutableData()));
+        bias = CompareData<int8_t>(nodeOrTensorName, tensor->shape(), static_cast<int8_t *>(tensor->MutableData()));
         break;
       }
       case TypeId::kNumberTypeInt32: {
-        bias = CompareData<int32_t>(nodeName, tensor->shape(), static_cast<int32_t *>(tensor->MutableData()));
+        bias = CompareData<int32_t>(nodeOrTensorName, tensor->shape(), static_cast<int32_t *>(tensor->MutableData()));
         break;
       }
       default:
@@ -224,12 +223,12 @@ int Benchmark::CompareOutput() {
       meanBias = 0;
     }
 
-    std::cout << "Mean bias of all nodes: " << meanBias << "%" << std::endl;
+    std::cout << "Mean bias of all nodes/tensors: " << meanBias << "%" << std::endl;
     std::cout << "=======================================================" << std::endl << std::endl;
 
     if (meanBias > this->_flags->accuracyThreshold) {
-      MS_LOG(ERROR) << "Mean bias of all nodes is too big: " << meanBias << "%";
-      std::cerr << "Mean bias of all nodes is too big: " << meanBias << "%" << std::endl;
+      MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << meanBias << "%";
+      std::cerr << "Mean bias of all nodes/tensors is too big: " << meanBias << "%" << std::endl;
       return RET_ERROR;
     } else {
       return RET_OK;
@@ -294,26 +293,26 @@ int Benchmark::MarkAccuracy() {
   MS_LOG(INFO) << "MarkAccuracy";
   std::cout << "MarkAccuracy" << std::endl;
   for (size_t i = 0; i < msInputs.size(); i++) {
-      switch (msInputs.at(i)->data_type()) {
-        case TypeId::kNumberTypeFloat:
-          PrintInputData<float>(msInputs.at(i));
-          break;
-        case TypeId::kNumberTypeFloat32:
-          PrintInputData<float>(msInputs.at(i));
-          break;
-        case TypeId::kNumberTypeInt8:
-          PrintInputData<int8_t>(msInputs.at(i));
-          break;
-        case TypeId::kNumberTypeUInt8:
-          PrintInputData<uint8_t>(msInputs.at(i));
-          break;
-        case TypeId::kNumberTypeInt32:
-          PrintInputData<int>(msInputs.at(i));
-          break;
-        default:
-          MS_LOG(ERROR) << "Datatype " << msInputs.at(i)->data_type() << " is not supported.";
-          return RET_ERROR;
-      }
+    switch (msInputs.at(i)->data_type()) {
+      case TypeId::kNumberTypeFloat:
+        PrintInputData<float>(msInputs.at(i));
+        break;
+      case TypeId::kNumberTypeFloat32:
+        PrintInputData<float>(msInputs.at(i));
+        break;
+      case TypeId::kNumberTypeInt8:
+        PrintInputData<int8_t>(msInputs.at(i));
+        break;
+      case TypeId::kNumberTypeUInt8:
+        PrintInputData<uint8_t>(msInputs.at(i));
+        break;
+      case TypeId::kNumberTypeInt32:
+        PrintInputData<int>(msInputs.at(i));
+        break;
+      default:
+        MS_LOG(ERROR) << "Datatype " << msInputs.at(i)->data_type() << " is not supported.";
+        return RET_ERROR;
+    }
   }
   auto status = session->RunGraph();
   if (status != RET_OK) {
@@ -355,7 +354,7 @@ int Benchmark::RunBenchmark(const std::string &deviceType) {
   auto model = lite::Model::Import(graphBuf, size);
   auto model_version = model->version_;
   if (model_version != Version()) {
-    MS_LOG(WARNING) << "model version is "<< model_version << ", inference version is " << Version() << " not equal";
+    MS_LOG(WARNING) << "model version is " << model_version << ", inference version is " << Version() << " not equal";
   }
   if (model == nullptr) {
     MS_LOG(ERROR) << "Import model file failed while running " << modelName.c_str();
diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h
index 41d6da6bf607df98996020a1b17009af492ceaaa..fc8ac3ec0d191dd1b7bf478b1f39e370b5c4bbd2 100644
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@@ -131,7 +131,7 @@ class MS_API Benchmark {
     auto inData = reinterpret_cast<T *>(input->MutableData());
     std::cout << "InData" << i++ << ": ";
     for (size_t j = 0; j < 20; j++) {
-      std::cout << static_cast<float >(inData[j]) << " ";
+      std::cout << static_cast<float>(inData[j]) << " ";
     }
     std::cout << std::endl;
   }
@@ -192,9 +192,9 @@ class MS_API Benchmark {
       }
 
       if (meanError <= 0.0000001) {
-        std::cout << "Mean bias of node " << nodeName << " : 0%" << std::endl;
+        std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl;
       } else {
-        std::cout << "Mean bias of node " << nodeName << " : " << meanError * 100 << "%" << std::endl;
+        std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl;
       }
       return meanError;
     } else {