diff --git a/deploy/cpp/src/main_keypoint.cc b/deploy/cpp/src/main_keypoint.cc index f903ffbaaa176ca2f8fb3819a75dcbd05e6b6533..968e1b0679dc12527357ca5bafc9090c2bac2992 100644 --- a/deploy/cpp/src/main_keypoint.cc +++ b/deploy/cpp/src/main_keypoint.cc @@ -14,72 +14,96 @@ #include +#include +#include +#include +#include #include +#include #include #include -#include -#include -#include -#include -#include #ifdef _WIN32 #include #include #elif LINUX #include -#include #endif -#include "include/object_detector.h" +#include #include "include/keypoint_detector.h" +#include "include/object_detector.h" #include "include/preprocess_op.h" -#include - DEFINE_string(model_dir, "", "Path of object detector inference model"); -DEFINE_string(model_dir_keypoint, "", "Path of keypoint detector inference model"); +DEFINE_string(model_dir_keypoint, + "", + "Path of keypoint detector inference model"); DEFINE_string(image_file, "", "Path of input image"); -DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher priority."); +DEFINE_string(image_dir, + "", + "Dir of input image, `image_file` has a higher priority."); DEFINE_int32(batch_size, 1, "batch_size of object detector"); DEFINE_int32(batch_size_keypoint, 8, "batch_size of keypoint detector"); -DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority."); +DEFINE_string( + video_file, + "", + "Path of input video, `video_file` or `camera_id` has a highest priority."); DEFINE_int32(camera_id, -1, "Device id of camera to predict"); -DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); -DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); +DEFINE_bool( + use_gpu, + false, + "Deprecated, please use `--device` to set the device you want to run."); +DEFINE_string(device, + "CPU", + "Choose the device you want to run, it can be: CPU/GPU/XPU, " + "default is CPU."); DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_double(threshold_keypoint, 0.5, "Threshold of score."); DEFINE_string(output_dir, "output", "Directory of output visualization files."); -DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); +DEFINE_string(run_mode, + "fluid", + "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); -DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); +DEFINE_bool(run_benchmark, + false, + "Whether to predict a image_file repeatedly for benchmark"); DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI"); DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI"); DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI"); -DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True"); +DEFINE_bool(trt_calib_mode, + false, + "If the model is produced by TRT offline quantitative calibration, " + "trt_calib_mode need to set True"); DEFINE_bool(use_dark, true, "Whether use dark decode in keypoint postprocess"); -void PrintBenchmarkLog(std::vector det_time, int img_num){ +void PrintBenchmarkLog(std::vector det_time, int img_num) { LOG(INFO) << "----------------------- Config info -----------------------"; LOG(INFO) << "runtime_device: " << FLAGS_device; - LOG(INFO) << "ir_optim: " << "True"; - LOG(INFO) << "enable_memory_optim: " << "True"; + LOG(INFO) << "ir_optim: " + << "True"; + LOG(INFO) << "enable_memory_optim: " + << "True"; int has_trt = FLAGS_run_mode.find("trt"); if (has_trt >= 0) { - LOG(INFO) << "enable_tensorrt: " << "True"; + LOG(INFO) << "enable_tensorrt: " + << "True"; std::string precision = FLAGS_run_mode.substr(4, 8); LOG(INFO) << "precision: " << precision; } else { - LOG(INFO) << "enable_tensorrt: " << "False"; - LOG(INFO) << "precision: " << "fp32"; + LOG(INFO) << "enable_tensorrt: " + << "False"; + LOG(INFO) << "precision: " + << "fp32"; } LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads; LOG(INFO) << "----------------------- Data info -----------------------"; LOG(INFO) << "batch_size: " << FLAGS_batch_size; - LOG(INFO) << "input_shape: " << "dynamic shape"; + LOG(INFO) << "input_shape: " + << "dynamic shape"; LOG(INFO) << "----------------------- Model info -----------------------"; FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1); LOG(INFO) << "model_name: " << FLAGS_model_dir; @@ -93,11 +117,12 @@ void PrintBenchmarkLog(std::vector det_time, int img_num){ << ", postprocess_time(ms): " << det_time[2] / img_num; } -void PrintKptsBenchmarkLog(std::vector det_time, int img_num){ +void PrintKptsBenchmarkLog(std::vector det_time, int img_num) { LOG(INFO) << "----------------------- Data info -----------------------"; LOG(INFO) << "batch_size_keypoint: " << FLAGS_batch_size_keypoint; LOG(INFO) << "----------------------- Model info -----------------------"; - FLAGS_model_dir_keypoint.erase(FLAGS_model_dir_keypoint.find_last_not_of("/") + 1); + FLAGS_model_dir_keypoint.erase( + FLAGS_model_dir_keypoint.find_last_not_of("/") + 1); LOG(INFO) << "keypoint_model_name: " << FLAGS_model_dir_keypoint; LOG(INFO) << "----------------------- Perf info ------------------------"; LOG(INFO) << "Total number of predicted data: " << img_num @@ -110,7 +135,7 @@ void PrintKptsBenchmarkLog(std::vector det_time, int img_num){ << ", postprocess_time(ms): " << det_time[2] / img_num; } -static std::string DirName(const std::string &filepath) { +static std::string DirName(const std::string& filepath) { auto pos = filepath.rfind(OS_PATH_SEP); if (pos == std::string::npos) { return ""; @@ -118,7 +143,7 @@ static std::string DirName(const std::string &filepath) { return filepath.substr(0, pos); } -static bool PathExists(const std::string& path){ +static bool PathExists(const std::string& path) { #ifdef _WIN32 struct _stat buffer; return (_stat(path.c_str(), &buffer) == 0); @@ -158,11 +183,12 @@ void PredictVideo(const std::string& video_path, // Open video cv::VideoCapture capture; std::string video_out_name = "output.mp4"; - if (FLAGS_camera_id != -1){ + if (FLAGS_camera_id != -1) { capture.open(FLAGS_camera_id); - }else{ + } else { capture.open(video_path.c_str()); - video_out_name = video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1); + video_out_name = + video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1); } if (!capture.isOpened()) { printf("can not open video : %s\n", video_path.c_str()); @@ -173,7 +199,8 @@ void PredictVideo(const std::string& video_path, int video_width = static_cast(capture.get(CV_CAP_PROP_FRAME_WIDTH)); int video_height = static_cast(capture.get(CV_CAP_PROP_FRAME_HEIGHT)); int video_fps = static_cast(capture.get(CV_CAP_PROP_FPS)); - int video_frame_count = static_cast(capture.get(CV_CAP_PROP_FRAME_COUNT)); + int video_frame_count = + static_cast(capture.get(CV_CAP_PROP_FRAME_COUNT)); printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count); // Create VideoWriter for output @@ -199,7 +226,6 @@ void PredictVideo(const std::string& video_path, auto labels = det->GetLabelList(); auto colormap = PaddleDetection::GenerateColorMap(labels.size()); - // Store keypoint results std::vector result_kpts; std::vector imgs_kpts; @@ -221,55 +247,61 @@ void PredictVideo(const std::string& video_path, std::vector out_result; for (const auto& item : result) { if (item.confidence < FLAGS_threshold || item.class_id == -1) { - continue; + continue; } out_result.push_back(item); - if (item.rect.size() > 6){ - is_rbox = true; - printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3], - item.rect[4], - item.rect[5], - item.rect[6], - item.rect[7]); - } - else{ + if (item.rect.size() > 6) { + is_rbox = true; + printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3], + item.rect[4], + item.rect[5], + item.rect[6], + item.rect[7]); + } else { printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3]); + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3]); } } - if(keypoint) - { + if (keypoint) { + result_kpts.clear(); int imsize = out_result.size(); - for (int i=0; i keypoint_times; - std::vector rect = {item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; + std::vector rect = { + item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; std::vector center; std::vector scale; - if(item.class_id == 0) - { + if (item.class_id == 0) { PaddleDetection::CropImg(frame, crop_img, rect, center, scale); center_bs.emplace_back(center); scale_bs.emplace_back(scale); imgs_kpts.emplace_back(crop_img); } - if (imgs_kpts.size()==FLAGS_batch_size_keypoint || ((i==imsize-1)&&!imgs_kpts.empty())) - { - keypoint->Predict(imgs_kpts, center_bs, scale_bs, FLAGS_threshold, 0, 1, &result_kpts, &keypoint_times); + if (imgs_kpts.size() == FLAGS_batch_size_keypoint || + ((i == imsize - 1) && !imgs_kpts.empty())) { + keypoint->Predict(imgs_kpts, + center_bs, + scale_bs, + FLAGS_threshold, + 0, + 1, + &result_kpts, + &keypoint_times); imgs_kpts.clear(); center_bs.clear(); scale_bs.clear(); @@ -277,8 +309,7 @@ void PredictVideo(const std::string& video_path, } cv::Mat out_im = VisualizeKptsResult(frame, result_kpts, colormap_kpts); video_out.write(out_im); - } - else{ + } else { // Visualization result cv::Mat out_im = PaddleDetection::VisualizeResult( frame, out_result, labels, colormap, is_rbox); @@ -299,11 +330,13 @@ void PredictImage(const std::vector all_img_paths, PaddleDetection::KeyPointDetector* keypoint, const std::string& output_dir = "output") { std::vector det_t = {0, 0, 0}; - int steps = ceil(float(all_img_paths.size()) / batch_size); + int steps = ceil(static_cast(all_img_paths.size()) / batch_size); int kpts_imgs = 0; std::vector keypoint_t = {0, 0, 0}; printf("total images = %d, batch_size = %d, total steps = %d\n", - all_img_paths.size(), batch_size, steps); + all_img_paths.size(), + batch_size, + steps); for (int idx = 0; idx < steps; idx++) { std::vector batch_imgs; int left_image_cnt = all_img_paths.size() - idx * batch_size; @@ -311,11 +344,11 @@ void PredictImage(const std::vector all_img_paths, left_image_cnt = batch_size; } for (int bs = 0; bs < left_image_cnt; bs++) { - std::string image_file_path = all_img_paths.at(idx * batch_size+bs); + std::string image_file_path = all_img_paths.at(idx * batch_size + bs); cv::Mat im = cv::imread(image_file_path, 1); batch_imgs.insert(batch_imgs.end(), im); } - + // Store all detected result std::vector result; std::vector bbox_num; @@ -330,7 +363,8 @@ void PredictImage(const std::vector all_img_paths, bool is_rbox = false; if (run_benchmark) { - det->Predict(batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times); + det->Predict( + batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times); } else { det->Predict(batch_imgs, threshold, 0, 1, &result, &bbox_num, &det_times); } @@ -349,33 +383,33 @@ void PredictImage(const std::vector all_img_paths, } detect_num += 1; im_result.push_back(item); - if (item.rect.size() > 6){ + if (item.rect.size() > 6) { is_rbox = true; printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3], - item.rect[4], - item.rect[5], - item.rect[6], - item.rect[7]); - } - else{ + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3], + item.rect[4], + item.rect[5], + item.rect[6], + item.rect[7]); + } else { printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3]); + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3]); } } - std::cout << all_img_paths.at(idx * batch_size + i) << " The number of detected box: " << detect_num << std::endl; - item_start_idx = item_start_idx + bbox_num[i]; - + std::cout << all_img_paths.at(idx * batch_size + i) + << " The number of detected box: " << detect_num << std::endl; + item_start_idx = item_start_idx + bbox_num[i]; + std::vector compression_params; compression_params.push_back(CV_IMWRITE_JPEG_QUALITY); compression_params.push_back(95); @@ -384,18 +418,17 @@ void PredictImage(const std::vector all_img_paths, output_path += OS_PATH_SEP; } std::string image_file_path = all_img_paths.at(idx * batch_size + i); - if(keypoint) - { + if (keypoint) { int imsize = im_result.size(); - for (int i=0; i keypoint_times; - std::vector rect = {item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; + std::vector rect = { + item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; std::vector center; std::vector scale; - if(item.class_id == 0) - { + if (item.class_id == 0) { PaddleDetection::CropImg(im, crop_img, rect, center, scale); center_bs.emplace_back(center); scale_bs.emplace_back(scale); @@ -403,13 +436,26 @@ void PredictImage(const std::vector all_img_paths, kpts_imgs += 1; } - if (imgs_kpts.size()==FLAGS_batch_size_keypoint || ((i==imsize-1)&&!imgs_kpts.empty())) - { + if (imgs_kpts.size() == FLAGS_batch_size_keypoint || + ((i == imsize - 1) && !imgs_kpts.empty())) { if (run_benchmark) { - keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0.5, 10, 10, &result_kpts, &keypoint_times); - } - else{ - keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0.5, 0, 1, &result_kpts, &keypoint_times); + keypoint->Predict(imgs_kpts, + center_bs, + scale_bs, + 0.5, + 10, + 10, + &result_kpts, + &keypoint_times); + } else { + keypoint->Predict(imgs_kpts, + center_bs, + scale_bs, + 0.5, + 0, + 1, + &result_kpts, + &keypoint_times); } imgs_kpts.clear(); center_bs.clear(); @@ -419,21 +465,25 @@ void PredictImage(const std::vector all_img_paths, keypoint_t[2] += keypoint_times[2]; } } - std::string kpts_savepath = output_path + "keypoint_" + image_file_path.substr(image_file_path.find_last_of('/') + 1); - cv::Mat kpts_vis_img = VisualizeKptsResult(im, result_kpts, colormap_kpts); + std::string kpts_savepath = + output_path + "keypoint_" + + image_file_path.substr(image_file_path.find_last_of('/') + 1); + cv::Mat kpts_vis_img = + VisualizeKptsResult(im, result_kpts, colormap_kpts); cv::imwrite(kpts_savepath, kpts_vis_img, compression_params); printf("Visualized output saved as %s\n", kpts_savepath.c_str()); - } - else{ + } else { // Visualization result cv::Mat vis_img = PaddleDetection::VisualizeResult( im, im_result, labels, colormap, is_rbox); - std::string det_savepath = output_path + image_file_path.substr(image_file_path.find_last_of('/') + 1); + std::string det_savepath = + output_path + + image_file_path.substr(image_file_path.find_last_of('/') + 1); cv::imwrite(det_savepath, vis_img, compression_params); - printf("Visualized output saved as %s\n", det_savepath.c_str()); + printf("Visualized output saved as %s\n", det_savepath.c_str()); } } - + det_t[0] += det_times[0]; det_t[1] += det_times[1]; det_t[2] += det_times[2]; @@ -447,43 +497,65 @@ void PredictImage(const std::vector all_img_paths, int main(int argc, char** argv) { // Parsing command-line google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_model_dir.empty() - || (FLAGS_image_file.empty() && FLAGS_image_dir.empty() && FLAGS_video_file.empty())) { - std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ (--model_dir_keypoint=/PATH/TO/INFERENCE_MODEL/)" - << "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl; + if (FLAGS_model_dir.empty() || + (FLAGS_image_file.empty() && FLAGS_image_dir.empty() && + FLAGS_video_file.empty())) { + std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ " + "(--model_dir_keypoint=/PATH/TO/INFERENCE_MODEL/)" + << "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl; return -1; } - if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" - || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { - std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; + if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" || + FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { + std::cout + << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; return -1; } - transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); - if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { + transform(FLAGS_device.begin(), + FLAGS_device.end(), + FLAGS_device.begin(), + ::toupper); + if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || + FLAGS_device == "XPU")) { std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; return -1; } if (FLAGS_use_gpu) { - std::cout << "Deprecated, please use `--device` to set the device you want to run."; + std::cout << "Deprecated, please use `--device` to set the device you want " + "to run."; return -1; } // Load model and create a object detector - PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn, - FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, - FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, - FLAGS_trt_calib_mode); + PaddleDetection::ObjectDetector det(FLAGS_model_dir, + FLAGS_device, + FLAGS_use_mkldnn, + FLAGS_cpu_threads, + FLAGS_run_mode, + FLAGS_batch_size, + FLAGS_gpu_id, + FLAGS_trt_min_shape, + FLAGS_trt_max_shape, + FLAGS_trt_opt_shape, + FLAGS_trt_calib_mode); PaddleDetection::KeyPointDetector* keypoint = nullptr; - if (!FLAGS_model_dir_keypoint.empty()) - { - keypoint = new PaddleDetection::KeyPointDetector(FLAGS_model_dir_keypoint, FLAGS_device, FLAGS_use_mkldnn, - FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size_keypoint, FLAGS_gpu_id, - FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, - FLAGS_trt_calib_mode, FLAGS_use_dark); + if (!FLAGS_model_dir_keypoint.empty()) { + keypoint = new PaddleDetection::KeyPointDetector(FLAGS_model_dir_keypoint, + FLAGS_device, + FLAGS_use_mkldnn, + FLAGS_cpu_threads, + FLAGS_run_mode, + FLAGS_batch_size_keypoint, + FLAGS_gpu_id, + FLAGS_trt_min_shape, + FLAGS_trt_max_shape, + FLAGS_trt_opt_shape, + FLAGS_trt_calib_mode, + FLAGS_use_dark); } // Do inference on input video or image if (!PathExists(FLAGS_output_dir)) { - MkDirs(FLAGS_output_dir); + MkDirs(FLAGS_output_dir); } if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) { PredictVideo(FLAGS_video_file, &det, keypoint, FLAGS_output_dir); @@ -493,17 +565,23 @@ int main(int argc, char** argv) { if (!FLAGS_image_file.empty()) { all_img_paths.push_back(FLAGS_image_file); if (FLAGS_batch_size > 1) { - std::cout << "batch_size should be 1, when set `image_file`." << std::endl; - return -1; + std::cout << "batch_size should be 1, when set `image_file`." + << std::endl; + return -1; } } else { - cv::glob(FLAGS_image_dir, cv_all_img_paths); - for (const auto & img_path : cv_all_img_paths) { - all_img_paths.push_back(img_path); - } + cv::glob(FLAGS_image_dir, cv_all_img_paths); + for (const auto& img_path : cv_all_img_paths) { + all_img_paths.push_back(img_path); + } } - PredictImage(all_img_paths, FLAGS_batch_size, FLAGS_threshold, - FLAGS_run_benchmark, &det, keypoint, FLAGS_output_dir); + PredictImage(all_img_paths, + FLAGS_batch_size, + FLAGS_threshold, + FLAGS_run_benchmark, + &det, + keypoint, + FLAGS_output_dir); } delete keypoint; keypoint = nullptr;