/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" namespace paddle { namespace operators { enum APType { kNone = 0, kIntegral, k11point }; APType GetAPType(std::string str) { if (str == "integral") { return APType::kIntegral; } else if (str == "11point") { return APType::k11point; } else { return APType::kNone; } } template inline bool SortScorePairDescend(const std::pair& pair1, const std::pair& pair2) { return pair1.first > pair2.first; } template inline void GetAccumulation(std::vector> in_pairs, std::vector* accu_vec) { std::stable_sort(in_pairs.begin(), in_pairs.end(), SortScorePairDescend); accu_vec->clear(); size_t sum = 0; for (size_t i = 0; i < in_pairs.size(); ++i) { auto count = in_pairs[i].second; sum += count; accu_vec->push_back(sum); } } template class DetectionMAPOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* in_detect = ctx.Input("Detection"); auto* in_label = ctx.Input("Label"); auto* out_map = ctx.Output("MAP"); auto* in_pos_count = ctx.Input("PosCount"); auto* in_true_pos = ctx.Input("TruePos"); auto* in_false_pos = ctx.Input("FalsePos"); auto* out_pos_count = ctx.Output("OutPosCount"); auto* out_true_pos = ctx.Output("OutTruePos"); auto* out_false_pos = ctx.Output("OutFalsePos"); float overlap_threshold = ctx.Attr("overlap_threshold"); float evaluate_difficult = ctx.Attr("evaluate_difficult"); auto ap_type = GetAPType(ctx.Attr("ap_type")); auto label_lod = in_label->lod(); auto detect_lod = in_detect->lod(); PADDLE_ENFORCE_EQ(label_lod.size(), 1UL, "Only support one level sequence now."); PADDLE_ENFORCE_EQ(label_lod[0].size(), detect_lod[0].size(), "The batch_size of input(Label) and input(Detection) " "must be the same."); std::vector>> gt_boxes; std::vector>>> detect_boxes; GetBoxes(*in_label, *in_detect, gt_boxes, detect_boxes); std::map label_pos_count; std::map>> true_pos; std::map>> false_pos; if (in_pos_count != nullptr) { GetInputPos(*in_pos_count, *in_true_pos, *in_false_pos, label_pos_count, true_pos, false_pos); } CalcTrueAndFalsePositive(gt_boxes, detect_boxes, evaluate_difficult, overlap_threshold, label_pos_count, true_pos, false_pos); T map = CalcMAP(ap_type, label_pos_count, true_pos, false_pos); GetOutputPos(ctx, label_pos_count, true_pos, false_pos, *out_pos_count, *out_true_pos, *out_false_pos); T* map_data = out_map->mutable_data(ctx.GetPlace()); map_data[0] = map; } protected: struct Box { Box(T xmin, T ymin, T xmax, T ymax) : xmin(xmin), ymin(ymin), xmax(xmax), ymax(ymax), is_difficult(false) {} T xmin, ymin, xmax, ymax; bool is_difficult; }; inline T JaccardOverlap(const Box& box1, const Box& box2) const { if (box2.xmin > box1.xmax || box2.xmax < box1.xmin || box2.ymin > box1.ymax || box2.ymax < box1.ymin) { return 0.0; } else { T inter_xmin = std::max(box1.xmin, box2.xmin); T inter_ymin = std::max(box1.ymin, box2.ymin); T inter_xmax = std::min(box1.xmax, box2.xmax); T inter_ymax = std::min(box1.ymax, box2.ymax); T inter_width = inter_xmax - inter_xmin; T inter_height = inter_ymax - inter_ymin; T inter_area = inter_width * inter_height; T bbox_area1 = (box1.xmax - box1.xmin) * (box1.ymax - box1.ymin); T bbox_area2 = (box2.xmax - box2.xmin) * (box2.ymax - box2.ymin); return inter_area / (bbox_area1 + bbox_area2 - inter_area); } } void GetBoxes(const framework::LoDTensor& input_label, const framework::LoDTensor& input_detect, std::vector>>& gt_boxes, std::vector>>>& detect_boxes) const { auto labels = framework::EigenTensor::From(input_label); auto detect = framework::EigenTensor::From(input_detect); auto label_lod = input_label.lod(); auto detect_lod = input_detect.lod(); int batch_size = label_lod[0].size() - 1; auto label_index = label_lod[0]; for (int n = 0; n < batch_size; ++n) { std::map> boxes; for (int i = label_index[n]; i < label_index[n + 1]; ++i) { Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5)); int label = labels(i, 0); auto is_difficult = labels(i, 1); if (std::abs(is_difficult - 0.0) < 1e-6) box.is_difficult = false; else box.is_difficult = true; boxes[label].push_back(box); } gt_boxes.push_back(boxes); } auto detect_index = detect_lod[0]; for (int n = 0; n < batch_size; ++n) { std::map>> boxes; for (int i = detect_index[n]; i < detect_index[n + 1]; ++i) { Box box(detect(i, 2), detect(i, 3), detect(i, 4), detect(i, 5)); int label = detect(i, 0); auto score = detect(i, 1); boxes[label].push_back(std::make_pair(score, box)); } detect_boxes.push_back(boxes); } } void GetOutputPos( const framework::ExecutionContext& ctx, const std::map& label_pos_count, const std::map>>& true_pos, const std::map>>& false_pos, framework::Tensor& output_pos_count, framework::LoDTensor& output_true_pos, framework::LoDTensor& output_false_pos) const { int max_class_id = 0; int true_pos_count = 0; int false_pos_count = 0; for (auto it = label_pos_count.begin(); it != label_pos_count.end(); ++it) { int label = it->first; if (label > max_class_id) max_class_id = label; int label_num_pos = it->second; if (label_num_pos == 0 || true_pos.find(label) == true_pos.end()) continue; auto label_true_pos = true_pos.find(label)->second; auto label_false_pos = false_pos.find(label)->second; true_pos_count += label_true_pos.size(); false_pos_count += label_false_pos.size(); } int* pos_count_data = output_pos_count.mutable_data( framework::make_ddim({max_class_id + 1, 1}), ctx.GetPlace()); T* true_pos_data = output_true_pos.mutable_data( framework::make_ddim({true_pos_count, 2}), ctx.GetPlace()); T* false_pos_data = output_false_pos.mutable_data( framework::make_ddim({false_pos_count, 2}), ctx.GetPlace()); true_pos_count = 0; false_pos_count = 0; std::vector true_pos_starts = {0}; std::vector false_pos_starts = {0}; for (int i = 0; i <= max_class_id; ++i) { auto it_count = label_pos_count.find(i); pos_count_data[i] = 0; if (it_count != label_pos_count.end()) { pos_count_data[i] = it_count->second; } auto it_true_pos = true_pos.find(i); if (it_true_pos != true_pos.end()) { const std::vector>& true_pos_vec = it_true_pos->second; for (const std::pair& tp : true_pos_vec) { true_pos_data[true_pos_count * 2] = tp.first; true_pos_data[true_pos_count * 2 + 1] = static_cast(tp.second); true_pos_count++; } } true_pos_starts.push_back(true_pos_count); auto it_false_pos = false_pos.find(i); if (it_false_pos != false_pos.end()) { const std::vector>& false_pos_vec = it_false_pos->second; for (const std::pair& fp : false_pos_vec) { false_pos_data[false_pos_count * 2] = fp.first; false_pos_data[false_pos_count * 2 + 1] = static_cast(fp.second); false_pos_count++; } } false_pos_starts.push_back(false_pos_count); } framework::LoD true_pos_lod; true_pos_lod.emplace_back(true_pos_starts); framework::LoD false_pos_lod; false_pos_lod.emplace_back(false_pos_starts); output_true_pos.set_lod(true_pos_lod); output_false_pos.set_lod(false_pos_lod); return; } void GetInputPos( const framework::Tensor& input_pos_count, const framework::LoDTensor& input_true_pos, const framework::LoDTensor& input_false_pos, std::map& label_pos_count, std::map>>& true_pos, std::map>>& false_pos) const { constexpr T kEPS = static_cast(1e-6); int class_number = input_pos_count.dims()[0]; const int* pos_count_data = input_pos_count.data(); for (int i = 0; i < class_number; ++i) { label_pos_count[i] = pos_count_data[i]; } const T* true_pos_data = input_true_pos.data(); auto true_pos_data_lod = input_true_pos.lod(); for (int i = 0; i < true_pos_data_lod.size(); ++i) { for (int j = true_pos_data_lod[0][i]; j < true_pos_data_lod[0][i + 1]; ++j) { T score = true_pos_data[j * 2]; int flag = 1; if (true_pos_data[j * 2 + 1] < kEPS) flag = 0; true_pos[i].push_back(std::make_pair(score, flag)); } } const T* false_pos_data = input_false_pos.data(); auto false_pos_data_lod = input_false_pos.lod(); for (int i = 0; i < false_pos_data_lod.size(); ++i) { for (int j = false_pos_data_lod[0][i]; j < false_pos_data_lod[0][i + 1]; ++j) { T score = false_pos_data[j * 2]; int flag = 1; if (false_pos_data[j * 2 + 1] < kEPS) flag = 0; false_pos[i].push_back(std::make_pair(score, flag)); } } return; } void CalcTrueAndFalsePositive( const std::vector>>& gt_boxes, const std::vector>>>& detect_boxes, bool evaluate_difficult, float overlap_threshold, std::map& label_pos_count, std::map>>& true_pos, std::map>>& false_pos) const { int batch_size = gt_boxes.size(); for (int n = 0; n < batch_size; ++n) { auto image_gt_boxes = gt_boxes[n]; for (auto it = image_gt_boxes.begin(); it != image_gt_boxes.end(); ++it) { size_t count = 0; auto labeled_bboxes = it->second; if (evaluate_difficult) { count = labeled_bboxes.size(); } else { for (size_t i = 0; i < labeled_bboxes.size(); ++i) if (!(labeled_bboxes[i].is_difficult)) ++count; } if (count == 0) { continue; } int label = it->first; if (label_pos_count.find(label) == label_pos_count.end()) { label_pos_count[label] = count; } else { label_pos_count[label] += count; } } } for (size_t n = 0; n < detect_boxes.size(); ++n) { auto image_gt_boxes = gt_boxes[n]; auto detections = detect_boxes[n]; if (image_gt_boxes.size() == 0) { for (auto it = detections.begin(); it != detections.end(); ++it) { auto pred_boxes = it->second; int label = it->first; for (size_t i = 0; i < pred_boxes.size(); ++i) { auto score = pred_boxes[i].first; true_pos[label].push_back(std::make_pair(score, 0)); false_pos[label].push_back(std::make_pair(score, 1)); } } continue; } for (auto it = detections.begin(); it != detections.end(); ++it) { int label = it->first; auto pred_boxes = it->second; if (image_gt_boxes.find(label) == image_gt_boxes.end()) { for (size_t i = 0; i < pred_boxes.size(); ++i) { auto score = pred_boxes[i].first; true_pos[label].push_back(std::make_pair(score, 0)); false_pos[label].push_back(std::make_pair(score, 1)); } continue; } auto matched_bboxes = image_gt_boxes.find(label)->second; std::vector visited(matched_bboxes.size(), false); // Sort detections in descend order based on scores std::sort(pred_boxes.begin(), pred_boxes.end(), SortScorePairDescend); for (size_t i = 0; i < pred_boxes.size(); ++i) { T max_overlap = -1.0; size_t max_idx = 0; auto score = pred_boxes[i].first; for (size_t j = 0; j < matched_bboxes.size(); ++j) { T overlap = JaccardOverlap(pred_boxes[i].second, matched_bboxes[j]); if (overlap > max_overlap) { max_overlap = overlap; max_idx = j; } } if (max_overlap > overlap_threshold) { bool match_evaluate_difficult = evaluate_difficult || (!evaluate_difficult && !matched_bboxes[max_idx].is_difficult); if (match_evaluate_difficult) { if (!visited[max_idx]) { true_pos[label].push_back(std::make_pair(score, 1)); false_pos[label].push_back(std::make_pair(score, 0)); visited[max_idx] = true; } else { true_pos[label].push_back(std::make_pair(score, 0)); false_pos[label].push_back(std::make_pair(score, 1)); } } } else { true_pos[label].push_back(std::make_pair(score, 0)); false_pos[label].push_back(std::make_pair(score, 1)); } } } } } T CalcMAP( APType ap_type, const std::map& label_pos_count, const std::map>>& true_pos, const std::map>>& false_pos) const { T mAP = 0.0; int count = 0; for (auto it = label_pos_count.begin(); it != label_pos_count.end(); ++it) { int label = it->first; int label_num_pos = it->second; if (label_num_pos == 0 || true_pos.find(label) == true_pos.end()) continue; auto label_true_pos = true_pos.find(label)->second; auto label_false_pos = false_pos.find(label)->second; // Compute average precision. std::vector tp_sum; GetAccumulation(label_true_pos, &tp_sum); std::vector fp_sum; GetAccumulation(label_false_pos, &fp_sum); std::vector precision, recall; size_t num = tp_sum.size(); // Compute Precision. for (size_t i = 0; i < num; ++i) { precision.push_back(static_cast(tp_sum[i]) / static_cast(tp_sum[i] + fp_sum[i])); recall.push_back(static_cast(tp_sum[i]) / label_num_pos); } // VOC2007 style if (ap_type == APType::k11point) { std::vector max_precisions(11, 0.0); int start_idx = num - 1; for (int j = 10; j >= 0; --j) for (int i = start_idx; i >= 0; --i) { if (recall[i] < j / 10.) { start_idx = i; if (j > 0) max_precisions[j - 1] = max_precisions[j]; break; } else { if (max_precisions[j] < precision[i]) max_precisions[j] = precision[i]; } } for (int j = 10; j >= 0; --j) mAP += max_precisions[j] / 11; ++count; } else if (ap_type == APType::kIntegral) { // Nature integral float average_precisions = 0.; float prev_recall = 0.; for (size_t i = 0; i < num; ++i) { if (fabs(recall[i] - prev_recall) > 1e-6) average_precisions += precision[i] * fabs(recall[i] - prev_recall); prev_recall = recall[i]; } mAP += average_precisions; ++count; } else { LOG(FATAL) << "Unkown ap version: " << ap_type; } } if (count != 0) mAP /= count; return mAP * 100; } }; // namespace operators } // namespace operators } // namespace paddle