/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include #include "paddle/framework/selected_rows.h" #include "paddle/platform/device_context.h" namespace paddle { namespace operators { namespace math { template struct BBox { BBox(T x_min, T y_min, T x_max, T y_max) : x_min(x_min), y_min(y_min), x_max(x_max), y_max(y_max), is_difficult(false) {} BBox() {} T get_width() const { return x_max - x_min; } T get_height() const { return y_max - y_min; } T get_center_x() const { return (x_min + x_max) / 2; } T get_center_y() const { return (y_min + y_max) / 2; } T get_area() const { return get_width() * get_height(); } // coordinate of bounding box T x_min; T y_min; T x_max; T y_max; // whether difficult object (e.g. object with heavy occlusion is difficult) bool is_difficult; }; // KNCHW ==> NHWC // template template void get_bbox_from_priorData(const T* prior_data, const size_t num_bboxes, std::vector>& bbox_vec); template void get_bbox_var_from_prior_data(const T* prior_data, const size_t num, std::vector>& var_vec); template BBox decode_bbox_with_var(BBox& prior_bbox, const std::vector& prior_bbox_var, const std::vector& loc_pred_data); template bool sort_score_pair_descend(const std::pair& pair1, const std::pair& pair2); template bool sort_score_pair_descend(const std::pair>& pair1, const std::pair>& pair2); template T jaccard_overlap(const BBox& bbox1, const BBox& bbox2); template void apply_nms_fast(const std::vector>& bboxes, const T* conf_score_data, size_t class_idx, size_t top_k, T conf_threshold, T nms_threshold, size_t num_priors, size_t num_classes, std::vector* indices); template int get_detection_indices( const T* conf_data, const size_t num_priors, const size_t num_classes, const size_t background_label_id, const size_t batch_size, const T conf_threshold, const size_t nms_top_k, const T nms_threshold, const size_t top_k, const std::vector>>& all_decoded_bboxes, std::vector>>* all_detection_indices); template BBox clipBBox(const BBox& bbox); template void get_detection_output( const T* conf_data, const size_t num_kept, const size_t num_priors, const size_t num_classes, const size_t batch_size, const std::vector>>& all_indices, const std::vector>>& all_decoded_bboxes, T* out_data); template void get_bbox_from_priorData(const T* prior_data, const size_t num_bboxes, std::vector>& bbox_vec) { size_t out_offset = bbox_vec.size(); bbox_vec.resize(bbox_vec.size() + num_bboxes); for (size_t i = 0; i < num_bboxes; ++i) { BBox bbox; bbox.x_min = *(prior_data + i * 8); bbox.y_min = *(prior_data + i * 8 + 1); bbox.x_max = *(prior_data + i * 8 + 2); bbox.y_max = *(prior_data + i * 8 + 3); bbox_vec[out_offset + i] = bbox; } } template void get_bbox_var_from_prior_data(const T* prior_data, const size_t num, std::vector>& var_vec) { size_t out_offset = var_vec.size(); var_vec.resize(var_vec.size() + num); for (size_t i = 0; i < num; ++i) { std::vector var; var.push_back(*(prior_data + i * 8 + 4)); var.push_back(*(prior_data + i * 8 + 5)); var.push_back(*(prior_data + i * 8 + 6)); var.push_back(*(prior_data + i * 8 + 7)); var_vec[out_offset + i] = var; } } template BBox decode_bbox_with_var(BBox& prior_bbox, const std::vector& prior_bbox_var, const std::vector& loc_pred_data) { T prior_bbox_width = prior_bbox.get_width(); T prior_bbox_height = prior_bbox.get_height(); T prior_bbox_center_x = prior_bbox.get_center_x(); T prior_bbox_center_y = prior_bbox.get_center_y(); T decoded_bbox_center_x = prior_bbox_var[0] * loc_pred_data[0] * prior_bbox_width + prior_bbox_center_x; T decoded_bbox_center_y = prior_bbox_var[1] * loc_pred_data[1] * prior_bbox_height + prior_bbox_center_y; T decoded_bbox_width = std::exp(prior_bbox_var[2] * loc_pred_data[2]) * prior_bbox_width; T decoded_bbox_height = std::exp(prior_bbox_var[3] * loc_pred_data[3]) * prior_bbox_height; BBox decoded_bbox; decoded_bbox.x_min = decoded_bbox_center_x - decoded_bbox_width / 2; decoded_bbox.y_min = decoded_bbox_center_y - decoded_bbox_height / 2; decoded_bbox.x_max = decoded_bbox_center_x + decoded_bbox_width / 2; decoded_bbox.y_max = decoded_bbox_center_y + decoded_bbox_height / 2; return decoded_bbox; } template bool sort_score_pair_descend(const std::pair& pair1, const std::pair& pair2) { return pair1.first > pair2.first; } template T jaccard_overlap(const BBox& bbox1, const BBox& bbox2) { if (bbox2.x_min > bbox1.x_max || bbox2.x_max < bbox1.x_min || bbox2.y_min > bbox1.y_max || bbox2.y_max < bbox1.y_min) { return 0.0; } else { T inter_x_min = std::max(bbox1.x_min, bbox2.x_min); T inter_y_min = std::max(bbox1.y_min, bbox2.y_min); T interX_max = std::min(bbox1.x_max, bbox2.x_max); T interY_max = std::min(bbox1.y_max, bbox2.y_max); T inter_width = interX_max - inter_x_min; T inter_height = interY_max - inter_y_min; T inter_area = inter_width * inter_height; T bbox_area1 = bbox1.get_area(); T bbox_area2 = bbox2.get_area(); return inter_area / (bbox_area1 + bbox_area2 - inter_area); } } template void apply_nms_fast(const std::vector>& bboxes, const T* conf_score_data, size_t class_idx, size_t top_k, T conf_threshold, T nms_threshold, size_t num_priors, size_t num_classes, std::vector* indices) { std::vector> scores; for (size_t i = 0; i < num_priors; ++i) { size_t conf_offset = i * num_classes + class_idx; if (conf_score_data[conf_offset] > conf_threshold) scores.push_back(std::make_pair(conf_score_data[conf_offset], i)); } std::stable_sort(scores.begin(), scores.end(), sort_score_pair_descend); if (top_k > 0 && top_k < scores.size()) scores.resize(top_k); while (scores.size() > 0) { const size_t idx = scores.front().second; bool keep = true; for (size_t i = 0; i < indices->size(); ++i) { if (keep) { const size_t saved_idx = (*indices)[i]; T overlap = jaccard_overlap(bboxes[idx], bboxes[saved_idx]); keep = overlap <= nms_threshold; } else { break; } } if (keep) indices->push_back(idx); scores.erase(scores.begin()); } } template int get_detection_indices( const T* conf_data, const size_t num_priors, const size_t num_classes, const size_t background_label_id, const size_t batch_size, const T conf_threshold, const size_t nms_top_k, const T nms_threshold, const size_t top_k, const std::vector>>& all_decoded_bboxes, std::vector>>* all_detection_indices) { int total_keep_num = 0; for (size_t n = 0; n < batch_size; ++n) { const std::vector>& decoded_bboxes = all_decoded_bboxes[n]; size_t num_detected = 0; std::map> indices; size_t conf_offset = n * num_priors * num_classes; for (size_t c = 0; c < num_classes; ++c) { if (c == background_label_id) continue; apply_nms_fast(decoded_bboxes, conf_data + conf_offset, c, nms_top_k, conf_threshold, nms_threshold, num_priors, num_classes, &(indices[c])); num_detected += indices[c].size(); } if (top_k > 0 && num_detected > top_k) { // std::vector> score_index_pairs; std::vector>> score_index_pairs; for (size_t c = 0; c < num_classes; ++c) { const std::vector& label_indices = indices[c]; for (size_t i = 0; i < label_indices.size(); ++i) { size_t idx = label_indices[i]; score_index_pairs.push_back( std::make_pair((conf_data + conf_offset)[idx * num_classes + c], std::make_pair(c, idx))); } } std::sort(score_index_pairs.begin(), score_index_pairs.end(), sort_score_pair_descend>); score_index_pairs.resize(top_k); std::map> new_indices; for (size_t i = 0; i < score_index_pairs.size(); ++i) { size_t label = score_index_pairs[i].second.first; size_t idx = score_index_pairs[i].second.second; new_indices[label].push_back(idx); } all_detection_indices->push_back(new_indices); total_keep_num += top_k; } else { all_detection_indices->push_back(indices); total_keep_num += num_detected; } } return total_keep_num; } template BBox clipBBox(const BBox& bbox) { T one = static_cast(1.0); T zero = static_cast(0.0); BBox clipped_bbox; clipped_bbox.x_min = std::max(std::min(bbox.x_min, one), zero); clipped_bbox.y_min = std::max(std::min(bbox.y_min, one), zero); clipped_bbox.x_max = std::max(std::min(bbox.x_max, one), zero); clipped_bbox.y_max = std::max(std::min(bbox.y_max, one), zero); return clipped_bbox; } template void get_detection_output( const T* conf_data, const size_t num_kept, const size_t num_priors, const size_t num_classes, const size_t batch_size, const std::vector>>& all_indices, const std::vector>>& all_decoded_bboxes, T* out_data) { size_t count = 0; for (size_t n = 0; n < batch_size; ++n) { for (std::map>::const_iterator it = all_indices[n].begin(); it != all_indices[n].end(); ++it) { size_t label = it->first; const std::vector& indices = it->second; const std::vector>& decoded_bboxes = all_decoded_bboxes[n]; for (size_t i = 0; i < indices.size(); ++i) { size_t idx = indices[i]; size_t conf_offset = n * num_priors * num_classes + idx * num_classes; out_data[count * 7] = n; out_data[count * 7 + 1] = label; out_data[count * 7 + 2] = (conf_data + conf_offset)[label]; BBox clipped_bbox = clipBBox(decoded_bboxes[idx]); out_data[count * 7 + 3] = clipped_bbox.x_min; out_data[count * 7 + 4] = clipped_bbox.y_min; out_data[count * 7 + 5] = clipped_bbox.x_max; out_data[count * 7 + 6] = clipped_bbox.y_max; ++count; } } } } } // namespace math } // namespace operators } // namespace paddle