/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include "paddle/framework/selected_rows.h" #include "paddle/platform/device_context.h" namespace paddle { namespace operators { namespace math { template struct BBox { BBox(T x_min, T y_min, T x_max, T y_max) : x_min(x_min), y_min(y_min), x_max(x_max), y_max(y_max), is_difficult(false) {} BBox() {} T get_width() const { return x_max - x_min; } T get_height() const { return y_max - y_min; } T get_center_x() const { return (x_min + x_max) / 2; } T get_center_y() const { return (y_min + y_max) / 2; } T get_area() const { return get_width() * get_height(); } // coordinate of bounding box T x_min; T y_min; T x_max; T y_max; // whether difficult object (e.g. object with heavy occlusion is difficult) bool is_difficult; }; // KNCHW ==> NHWC template int appendWithPermute(const T* input_data, int input_nums, int batch_size, int channels, int height, int weight, T* output_data) { int image_size = height * weight; int numel = input_nums * batch_size * channels * height * weight; int offset = 0; for (int p = 0; p < input_nums; ++p) { int in_p_offset = p * batch_size * channels * image_size; for (int n = 0; n < batch_size; ++n) { int in_n_offset = n * channels * image_size; int out_n_offset = n * numel / batch_size + offset; int in_stride = image_size; int out_stride = channels; const T* in_data = input_data + in_p_offset + in_n_offset; T* out_data = output_data + out_n_offset; for (int c = 0; c < channels; ++c) { for (int i = 0; i < image_size; ++i) { out_data[out_stride * i + c] = in_data[c * in_stride + i]; } } } offset += image_size * channels; } return 0; } template void getBBoxFromPriorData(const T* prior_data, const size_t num_bboxes, std::vector>& bbox_vec) { size_t out_offset = bbox_vec.size(); bbox_vec.resize(bbox_vec.size() + num_bboxes); for (size_t i = 0; i < num_bboxes; ++i) { BBox bbox; bbox.x_min = *(prior_data + i * 8); bbox.y_min = *(prior_data + i * 8 + 1); bbox.x_max = *(prior_data + i * 8 + 2); bbox.y_max = *(prior_data + i * 8 + 3); bbox_vec[out_offset + i] = bbox; } } template void getBBoxVarFromPriorData(const T* prior_data, const size_t num, std::vector>& var_vec) { size_t out_offset = var_vec.size(); var_vec.resize(var_vec.size() + num); for (size_t i = 0; i < num; ++i) { std::vector var; var.push_back(*(prior_data + i * 8 + 4)); var.push_back(*(prior_data + i * 8 + 5)); var.push_back(*(prior_data + i * 8 + 6)); var.push_back(*(prior_data + i * 8 + 7)); var_vec[out_offset + i] = var; } } template BBox decodeBBoxWithVar(BBox& prior_bbox, const std::vector& prior_bbox_var, const std::vector& loc_pred_data) { T prior_bbox_width = prior_bbox.get_width(); T prior_bbox_height = prior_bbox.get_height(); T prior_bbox_center_x = prior_bbox.get_center_x(); T prior_bbox_center_y = prior_bbox.get_center_y(); T decoded_bbox_center_x = prior_bbox_var[0] * loc_pred_data[0] * prior_bbox_width + prior_bbox_center_x; T decoded_bbox_center_y = prior_bbox_var[1] * loc_pred_data[1] * prior_bbox_height + prior_bbox_center_y; T decoded_bbox_width = std::exp(prior_bbox_var[2] * loc_pred_data[2]) * prior_bbox_width; T decoded_bbox_height = std::exp(prior_bbox_var[3] * loc_pred_data[3]) * prior_bbox_height; BBox decoded_bbox; decoded_bbox.x_min = decoded_bbox_center_x - decoded_bbox_width / 2; decoded_bbox.y_min = decoded_bbox_center_y - decoded_bbox_height / 2; decoded_bbox.x_max = decoded_bbox_center_x + decoded_bbox_width / 2; decoded_bbox.y_max = decoded_bbox_center_y + decoded_bbox_height / 2; return decoded_bbox; } template bool sortScorePairDescend(const std::pair& pair1, const std::pair& pair2) { return pair1.first > pair2.first; } template bool sortScorePairDescend(const std::pair>& pair1, const std::pair>& pair2); template T jaccardOverlap(const BBox& bbox1, const BBox& bbox2) { if (bbox2.x_min > bbox1.x_max || bbox2.x_max < bbox1.x_min || bbox2.y_min > bbox1.y_max || bbox2.y_max < bbox1.y_min) { return 0.0; } else { T inter_x_min = std::max(bbox1.x_min, bbox2.x_min); T inter_y_min = std::max(bbox1.y_min, bbox2.y_min); T interX_max = std::min(bbox1.x_max, bbox2.x_max); T interY_max = std::min(bbox1.y_max, bbox2.y_max); T inter_width = interX_max - inter_x_min; T inter_height = interY_max - inter_y_min; T inter_area = inter_width * inter_height; T bbox_area1 = bbox1.get_area(); T bbox_area2 = bbox2.get_area(); return inter_area / (bbox_area1 + bbox_area2 - inter_area); } } template void applyNMSFast(const std::vector>& bboxes, const T* conf_score_data, size_t class_idx, size_t top_k, T conf_threshold, T nms_threshold, size_t num_priors, size_t num_classes, std::vector* indices) { std::vector> scores; for (size_t i = 0; i < num_priors; ++i) { size_t conf_offset = i * num_classes + class_idx; if (conf_score_data[conf_offset] > conf_threshold) scores.push_back(std::make_pair(conf_score_data[conf_offset], i)); } std::stable_sort(scores.begin(), scores.end(), sortScorePairDescend); if (top_k > 0 && top_k < scores.size()) scores.resize(top_k); while (scores.size() > 0) { const size_t idx = scores.front().second; bool keep = true; for (size_t i = 0; i < indices->size(); ++i) { if (keep) { const size_t saved_idx = (*indices)[i]; T overlap = jaccardOverlap(bboxes[idx], bboxes[saved_idx]); keep = overlap <= nms_threshold; } else { break; } } if (keep) indices->push_back(idx); scores.erase(scores.begin()); } } template int getDetectionIndices( const T* conf_data, const size_t num_priors, const size_t num_classes, const size_t background_label_id, const size_t batch_size, const T conf_threshold, const size_t nms_top_k, const T nms_threshold, const size_t top_k, const std::vector>>& all_decoded_bboxes, std::vector>>* all_detection_indices) { int total_keep_num = 0; for (size_t n = 0; n < batch_size; ++n) { const std::vector>& decoded_bboxes = all_decoded_bboxes[n]; size_t num_detected = 0; std::map> indices; size_t conf_offset = n * num_priors * num_classes; for (size_t c = 0; c < num_classes; ++c) { if (c == background_label_id) continue; applyNMSFast(decoded_bboxes, conf_data + conf_offset, c, nms_top_k, conf_threshold, nms_threshold, num_priors, num_classes, &(indices[c])); num_detected += indices[c].size(); } if (top_k > 0 && num_detected > top_k) { // std::vector> score_index_pairs; std::vector>> score_index_pairs; for (size_t c = 0; c < num_classes; ++c) { const std::vector& label_indices = indices[c]; for (size_t i = 0; i < label_indices.size(); ++i) { size_t idx = label_indices[i]; score_index_pairs.push_back( std::make_pair((conf_data + conf_offset)[idx * num_classes + c], std::make_pair(c, idx))); } } std::sort(score_index_pairs.begin(), score_index_pairs.end(), sortScorePairDescend>); score_index_pairs.resize(top_k); std::map> new_indices; for (size_t i = 0; i < score_index_pairs.size(); ++i) { size_t label = score_index_pairs[i].second.first; size_t idx = score_index_pairs[i].second.second; new_indices[label].push_back(idx); } all_detection_indices->push_back(new_indices); total_keep_num += top_k; } else { all_detection_indices->push_back(indices); total_keep_num += num_detected; } } return total_keep_num; } template BBox clipBBox(const BBox& bbox) { T one = static_cast(1.0); T zero = static_cast(0.0); BBox clipped_bbox; clipped_bbox.x_min = std::max(std::min(bbox.x_min, one), zero); clipped_bbox.y_min = std::max(std::min(bbox.y_min, one), zero); clipped_bbox.x_max = std::max(std::min(bbox.x_max, one), zero); clipped_bbox.y_max = std::max(std::min(bbox.y_max, one), zero); return clipped_bbox; } template void getDetectionOutput( const T* conf_data, const size_t num_kept, const size_t num_priors, const size_t num_classes, const size_t batch_size, const std::vector>>& all_indices, const std::vector>>& all_decoded_bboxes, T* out_data) { size_t count = 0; for (size_t n = 0; n < batch_size; ++n) { for (std::map>::const_iterator it = all_indices[n].begin(); it != all_indices[n].end(); ++it) { size_t label = it->first; const std::vector& indices = it->second; const std::vector>& decoded_bboxes = all_decoded_bboxes[n]; for (size_t i = 0; i < indices.size(); ++i) { size_t idx = indices[i]; size_t conf_offset = n * num_priors * num_classes + idx * num_classes; out_data[count * 7] = n; out_data[count * 7 + 1] = label; out_data[count * 7 + 2] = (conf_data + conf_offset)[label]; BBox clipped_bbox = clipBBox(decoded_bboxes[idx]); out_data[count * 7 + 3] = clipped_bbox.x_min; out_data[count * 7 + 4] = clipped_bbox.y_min; out_data[count * 7 + 5] = clipped_bbox.x_max; out_data[count * 7 + 6] = clipped_bbox.y_max; ++count; } } } // out.copyFrom(out_data, num_kept * 7); } } // namespace math } // namespace operators } // namespace paddle