// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include #include #include "include/config_parser.h" #include "include/keypoint_detector.h" #include "include/object_detector.h" #include "include/preprocess_op.h" #include "json/json.h" Json::Value RT_Config; void PrintBenchmarkLog(std::vector det_time, int img_num) { std::cout << "----------------------- Config info -----------------------" << std::endl; std::cout << "num_threads: " << RT_Config["cpu_threads"].as() << std::endl; std::cout << "----------------------- Data info -----------------------" << std::endl; std::cout << "batch_size_det: " << RT_Config["batch_size_det"].as() << std::endl; std::cout << "----------------------- Model info -----------------------" << std::endl; RT_Config["model_dir_det"].as().erase( RT_Config["model_dir_det"].as().find_last_not_of("/") + 1); std::cout << "detection model_name: " << RT_Config["model_dir_det"].as() << std::endl; std::cout << "----------------------- Perf info ------------------------" << std::endl; std::cout << "Total number of predicted data: " << img_num << " and total time spent(ms): " << std::accumulate(det_time.begin(), det_time.end(), 0.) << std::endl; img_num = std::max(1, img_num); std::cout << "preproce_time(ms): " << det_time[0] / img_num << ", inference_time(ms): " << det_time[1] / img_num << ", postprocess_time(ms): " << det_time[2] / img_num << std::endl; } void PrintKptsBenchmarkLog(std::vector det_time, int img_num) { std::cout << "----------------------- Data info -----------------------" << std::endl; std::cout << "batch_size_keypoint: " << RT_Config["batch_size_keypoint"].as() << std::endl; std::cout << "----------------------- Model info -----------------------" << std::endl; RT_Config["model_dir_keypoint"].as().erase( RT_Config["model_dir_keypoint"].as().find_last_not_of("/") + 1); std::cout << "keypoint model_name: " << RT_Config["model_dir_keypoint"].as() << std::endl; std::cout << "----------------------- Perf info ------------------------" << std::endl; std::cout << "Total number of predicted data: " << img_num << " and total time spent(ms): " << std::accumulate(det_time.begin(), det_time.end(), 0.) << std::endl; img_num = std::max(1, img_num); std::cout << "Average time cost per person:" << std::endl << "preproce_time(ms): " << det_time[0] / img_num << ", inference_time(ms): " << det_time[1] / img_num << ", postprocess_time(ms): " << det_time[2] / img_num << std::endl; } void PrintTotalIimeLog(double det_time, double keypoint_time, double crop_time) { std::cout << "----------------------- Time info ------------------------" << std::endl; std::cout << "Total Pipeline time(ms) per image: " << det_time + keypoint_time + crop_time << std::endl; std::cout << "Average det time(ms) per image: " << det_time << ", average keypoint time(ms) per image: " << keypoint_time << ", average crop time(ms) per image: " << crop_time << std::endl; } static std::string DirName(const std::string& filepath) { auto pos = filepath.rfind(OS_PATH_SEP); if (pos == std::string::npos) { return ""; } return filepath.substr(0, pos); } static bool PathExists(const std::string& path) { struct stat buffer; return (stat(path.c_str(), &buffer) == 0); } static void MkDir(const std::string& path) { if (PathExists(path)) return; int ret = 0; ret = mkdir(path.c_str(), 0755); if (ret != 0) { std::string path_error(path); path_error += " mkdir failed!"; throw std::runtime_error(path_error); } } static void MkDirs(const std::string& path) { if (path.empty()) return; if (PathExists(path)) return; MkDirs(DirName(path)); MkDir(path); } void PredictImage(const std::vector all_img_paths, const int batch_size_det, const double threshold_det, const bool run_benchmark, PaddleDetection::ObjectDetector* det, PaddleDetection::KeyPointDetector* keypoint, const std::string& output_dir = "output") { std::vector det_t = {0, 0, 0}; int steps = ceil(static_cast(all_img_paths.size()) / batch_size_det); int kpts_imgs = 0; std::vector keypoint_t = {0, 0, 0}; double midtimecost = 0; for (int idx = 0; idx < steps; idx++) { std::vector batch_imgs; int left_image_cnt = all_img_paths.size() - idx * batch_size_det; if (left_image_cnt > batch_size_det) { left_image_cnt = batch_size_det; } for (int bs = 0; bs < left_image_cnt; bs++) { std::string image_file_path = all_img_paths.at(idx * batch_size_det + bs); cv::Mat im = cv::imread(image_file_path, 1); batch_imgs.insert(batch_imgs.end(), im); } // Store all detected result std::vector result; std::vector bbox_num; std::vector det_times; // Store keypoint results std::vector result_kpts; std::vector imgs_kpts; std::vector> center_bs; std::vector> scale_bs; std::vector colormap_kpts = PaddleDetection::GenerateColorMap(20); bool is_rbox = false; if (run_benchmark) { det->Predict( batch_imgs, threshold_det, 50, 50, &result, &bbox_num, &det_times); } else { det->Predict( batch_imgs, threshold_det, 0, 1, &result, &bbox_num, &det_times); } // get labels and colormap auto labels = det->GetLabelList(); auto colormap = PaddleDetection::GenerateColorMap(labels.size()); int item_start_idx = 0; for (int i = 0; i < left_image_cnt; i++) { cv::Mat im = batch_imgs[i]; std::vector im_result; int detect_num = 0; for (int j = 0; j < bbox_num[i]; j++) { PaddleDetection::ObjectResult item = result[item_start_idx + j]; if (item.confidence < threshold_det || item.class_id == -1) { continue; } detect_num += 1; im_result.push_back(item); if (item.rect.size() > 6) { is_rbox = true; printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", item.class_id, item.confidence, item.rect[0], item.rect[1], item.rect[2], item.rect[3], item.rect[4], item.rect[5], item.rect[6], item.rect[7]); } else { printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", item.class_id, item.confidence, item.rect[0], item.rect[1], item.rect[2], item.rect[3]); } } std::cout << all_img_paths.at(idx * batch_size_det + i) << " The number of detected box: " << detect_num << std::endl; item_start_idx = item_start_idx + bbox_num[i]; std::vector compression_params; compression_params.push_back(cv::IMWRITE_JPEG_QUALITY); compression_params.push_back(95); std::string output_path(output_dir); if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) { output_path += OS_PATH_SEP; } std::string image_file_path = all_img_paths.at(idx * batch_size_det + i); if (keypoint) { int imsize = im_result.size(); for (int i = 0; i < imsize; i++) { auto keypoint_start_time = std::chrono::steady_clock::now(); auto item = im_result[i]; cv::Mat crop_img; std::vector keypoint_times; std::vector rect = { item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; std::vector center; std::vector scale; if (item.class_id == 0) { PaddleDetection::CropImg(im, crop_img, rect, center, scale); center_bs.emplace_back(center); scale_bs.emplace_back(scale); imgs_kpts.emplace_back(crop_img); kpts_imgs += 1; } auto keypoint_crop_time = std::chrono::steady_clock::now(); std::chrono::duration midtimediff = keypoint_crop_time - keypoint_start_time; midtimecost += static_cast(midtimediff.count() * 1000); if (imgs_kpts.size() == RT_Config["batch_size_keypoint"].as() || ((i == imsize - 1) && !imgs_kpts.empty())) { if (run_benchmark) { keypoint->Predict(imgs_kpts, center_bs, scale_bs, 10, 10, &result_kpts, &keypoint_times); } else { keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0, 1, &result_kpts, &keypoint_times); } imgs_kpts.clear(); center_bs.clear(); scale_bs.clear(); keypoint_t[0] += keypoint_times[0]; keypoint_t[1] += keypoint_times[1]; keypoint_t[2] += keypoint_times[2]; } } std::string kpts_savepath = output_path + "keypoint_" + image_file_path.substr(image_file_path.find_last_of('/') + 1); cv::Mat kpts_vis_img = VisualizeKptsResult( im, result_kpts, colormap_kpts, keypoint->get_threshold()); cv::imwrite(kpts_savepath, kpts_vis_img, compression_params); printf("Visualized output saved as %s\n", kpts_savepath.c_str()); } else { // Visualization result cv::Mat vis_img = PaddleDetection::VisualizeResult( im, im_result, labels, colormap, is_rbox); std::string det_savepath = output_path + "result_" + image_file_path.substr(image_file_path.find_last_of('/') + 1); cv::imwrite(det_savepath, vis_img, compression_params); printf("Visualized output saved as %s\n", det_savepath.c_str()); } } det_t[0] += det_times[0]; det_t[1] += det_times[1]; det_t[2] += det_times[2]; } PrintBenchmarkLog(det_t, all_img_paths.size()); if (keypoint) { PrintKptsBenchmarkLog(keypoint_t, kpts_imgs); PrintTotalIimeLog( (det_t[0] + det_t[1] + det_t[2]) / all_img_paths.size(), (keypoint_t[0] + keypoint_t[1] + keypoint_t[2]) / all_img_paths.size(), midtimecost / all_img_paths.size()); } } int main(int argc, char** argv) { std::cout << "Usage: " << argv[0] << " [config_path] [image_dir](option)\n"; if (argc < 2) { std::cout << "Usage: ./main det_runtime_config.json" << std::endl; return -1; } std::string config_path = argv[1]; std::string img_path = ""; if (argc >= 3) { img_path = argv[2]; } // Parsing command-line PaddleDetection::load_jsonf(config_path, RT_Config); if (RT_Config["model_dir_det"].as().empty()) { std::cout << "Please set [model_det_dir] in " << config_path << std::endl; return -1; } if (RT_Config["image_file"].as().empty() && RT_Config["image_dir"].as().empty() && img_path.empty()) { std::cout << "Please set [image_file] or [image_dir] in " << config_path << " Or use command: <" << argv[0] << " [image_dir]>" << std::endl; return -1; } if (!img_path.empty()) { std::cout << "Use image_dir in command line overide the path in config file" << std::endl; RT_Config["image_dir"] = img_path; RT_Config["image_file"] = ""; } // Load model and create a object detector PaddleDetection::ObjectDetector det( RT_Config["model_dir_det"].as(), RT_Config["cpu_threads"].as(), RT_Config["batch_size_det"].as()); PaddleDetection::KeyPointDetector* keypoint = nullptr; if (!RT_Config["model_dir_keypoint"].as().empty()) { keypoint = new PaddleDetection::KeyPointDetector( RT_Config["model_dir_keypoint"].as(), RT_Config["cpu_threads"].as(), RT_Config["batch_size_keypoint"].as(), RT_Config["use_dark_decode"].as()); RT_Config["batch_size_det"] = 1; printf( "batchsize of detection forced to be 1 while keypoint model is not " "empty()"); } // Do inference on input image if (!RT_Config["image_file"].as().empty() || !RT_Config["image_dir"].as().empty()) { if (!PathExists(RT_Config["output_dir"].as())) { MkDirs(RT_Config["output_dir"].as()); } std::vector all_img_paths; std::vector cv_all_img_paths; if (!RT_Config["image_file"].as().empty()) { all_img_paths.push_back(RT_Config["image_file"].as()); if (RT_Config["batch_size_det"].as() > 1) { std::cout << "batch_size_det should be 1, when set `image_file`." << std::endl; return -1; } } else { cv::glob(RT_Config["image_dir"].as(), cv_all_img_paths); for (const auto& img_path : cv_all_img_paths) { all_img_paths.push_back(img_path); } } PredictImage(all_img_paths, RT_Config["batch_size_det"].as(), RT_Config["threshold_det"].as(), RT_Config["run_benchmark"].as(), &det, keypoint, RT_Config["output_dir"].as()); } delete keypoint; keypoint = nullptr; return 0; }