未验证 提交 d7cb0f91 编写于 作者: Z zhiboniu 提交者: GitHub

cherry-pick 2 pr from develop (#4674)

* add deploy keypoint infer save results (#4480)

* fix cpp infer deploy visualize
上级 f3b4c238
...@@ -14,72 +14,96 @@ ...@@ -14,72 +14,96 @@
#include <glog/logging.h> #include <glog/logging.h>
#include <math.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream> #include <iostream>
#include <numeric>
#include <string> #include <string>
#include <vector> #include <vector>
#include <numeric>
#include <sys/types.h>
#include <sys/stat.h>
#include <math.h>
#include <algorithm>
#ifdef _WIN32 #ifdef _WIN32
#include <direct.h> #include <direct.h>
#include <io.h> #include <io.h>
#elif LINUX #elif LINUX
#include <stdarg.h> #include <stdarg.h>
#include <sys/stat.h>
#endif #endif
#include "include/object_detector.h" #include <gflags/gflags.h>
#include "include/keypoint_detector.h" #include "include/keypoint_detector.h"
#include "include/object_detector.h"
#include "include/preprocess_op.h" #include "include/preprocess_op.h"
#include <gflags/gflags.h>
DEFINE_string(model_dir, "", "Path of object detector inference model"); DEFINE_string(model_dir, "", "Path of object detector inference model");
DEFINE_string(model_dir_keypoint, "", "Path of keypoint detector inference model"); DEFINE_string(model_dir_keypoint,
"",
"Path of keypoint detector inference model");
DEFINE_string(image_file, "", "Path of input image"); DEFINE_string(image_file, "", "Path of input image");
DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher priority."); DEFINE_string(image_dir,
"",
"Dir of input image, `image_file` has a higher priority.");
DEFINE_int32(batch_size, 1, "batch_size of object detector"); DEFINE_int32(batch_size, 1, "batch_size of object detector");
DEFINE_int32(batch_size_keypoint, 8, "batch_size of keypoint detector"); DEFINE_int32(batch_size_keypoint, 8, "batch_size of keypoint detector");
DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority."); DEFINE_string(
video_file,
"",
"Path of input video, `video_file` or `camera_id` has a highest priority.");
DEFINE_int32(camera_id, -1, "Device id of camera to predict"); DEFINE_int32(camera_id, -1, "Device id of camera to predict");
DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); DEFINE_bool(
DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); use_gpu,
false,
"Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device,
"CPU",
"Choose the device you want to run, it can be: CPU/GPU/XPU, "
"default is CPU.");
DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_double(threshold_keypoint, 0.5, "Threshold of score."); DEFINE_double(threshold_keypoint, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files."); DEFINE_string(output_dir, "output", "Directory of output visualization files.");
DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); DEFINE_string(run_mode,
"fluid",
"Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); DEFINE_bool(run_benchmark,
false,
"Whether to predict a image_file repeatedly for benchmark");
DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU");
DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); DEFINE_int32(cpu_threads, 1, "Num of threads with CPU");
DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI"); DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI");
DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI"); DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI");
DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI"); DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI");
DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True"); DEFINE_bool(trt_calib_mode,
false,
"If the model is produced by TRT offline quantitative calibration, "
"trt_calib_mode need to set True");
DEFINE_bool(use_dark, true, "Whether use dark decode in keypoint postprocess"); DEFINE_bool(use_dark, true, "Whether use dark decode in keypoint postprocess");
void PrintBenchmarkLog(std::vector<double> det_time, int img_num){ void PrintBenchmarkLog(std::vector<double> det_time, int img_num) {
LOG(INFO) << "----------------------- Config info -----------------------"; LOG(INFO) << "----------------------- Config info -----------------------";
LOG(INFO) << "runtime_device: " << FLAGS_device; LOG(INFO) << "runtime_device: " << FLAGS_device;
LOG(INFO) << "ir_optim: " << "True"; LOG(INFO) << "ir_optim: "
LOG(INFO) << "enable_memory_optim: " << "True"; << "True";
LOG(INFO) << "enable_memory_optim: "
<< "True";
int has_trt = FLAGS_run_mode.find("trt"); int has_trt = FLAGS_run_mode.find("trt");
if (has_trt >= 0) { if (has_trt >= 0) {
LOG(INFO) << "enable_tensorrt: " << "True"; LOG(INFO) << "enable_tensorrt: "
<< "True";
std::string precision = FLAGS_run_mode.substr(4, 8); std::string precision = FLAGS_run_mode.substr(4, 8);
LOG(INFO) << "precision: " << precision; LOG(INFO) << "precision: " << precision;
} else { } else {
LOG(INFO) << "enable_tensorrt: " << "False"; LOG(INFO) << "enable_tensorrt: "
LOG(INFO) << "precision: " << "fp32"; << "False";
LOG(INFO) << "precision: "
<< "fp32";
} }
LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads; LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads;
LOG(INFO) << "----------------------- Data info -----------------------"; LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size: " << FLAGS_batch_size; LOG(INFO) << "batch_size: " << FLAGS_batch_size;
LOG(INFO) << "input_shape: " << "dynamic shape"; LOG(INFO) << "input_shape: "
<< "dynamic shape";
LOG(INFO) << "----------------------- Model info -----------------------"; LOG(INFO) << "----------------------- Model info -----------------------";
FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1); FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1);
LOG(INFO) << "model_name: " << FLAGS_model_dir; LOG(INFO) << "model_name: " << FLAGS_model_dir;
...@@ -93,11 +117,12 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){ ...@@ -93,11 +117,12 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
<< ", postprocess_time(ms): " << det_time[2] / img_num; << ", postprocess_time(ms): " << det_time[2] / img_num;
} }
void PrintKptsBenchmarkLog(std::vector<double> det_time, int img_num){ void PrintKptsBenchmarkLog(std::vector<double> det_time, int img_num) {
LOG(INFO) << "----------------------- Data info -----------------------"; LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size_keypoint: " << FLAGS_batch_size_keypoint; LOG(INFO) << "batch_size_keypoint: " << FLAGS_batch_size_keypoint;
LOG(INFO) << "----------------------- Model info -----------------------"; LOG(INFO) << "----------------------- Model info -----------------------";
FLAGS_model_dir_keypoint.erase(FLAGS_model_dir_keypoint.find_last_not_of("/") + 1); FLAGS_model_dir_keypoint.erase(
FLAGS_model_dir_keypoint.find_last_not_of("/") + 1);
LOG(INFO) << "keypoint_model_name: " << FLAGS_model_dir_keypoint; LOG(INFO) << "keypoint_model_name: " << FLAGS_model_dir_keypoint;
LOG(INFO) << "----------------------- Perf info ------------------------"; LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total number of predicted data: " << img_num LOG(INFO) << "Total number of predicted data: " << img_num
...@@ -110,7 +135,7 @@ void PrintKptsBenchmarkLog(std::vector<double> det_time, int img_num){ ...@@ -110,7 +135,7 @@ void PrintKptsBenchmarkLog(std::vector<double> det_time, int img_num){
<< ", postprocess_time(ms): " << det_time[2] / img_num; << ", postprocess_time(ms): " << det_time[2] / img_num;
} }
static std::string DirName(const std::string &filepath) { static std::string DirName(const std::string& filepath) {
auto pos = filepath.rfind(OS_PATH_SEP); auto pos = filepath.rfind(OS_PATH_SEP);
if (pos == std::string::npos) { if (pos == std::string::npos) {
return ""; return "";
...@@ -118,7 +143,7 @@ static std::string DirName(const std::string &filepath) { ...@@ -118,7 +143,7 @@ static std::string DirName(const std::string &filepath) {
return filepath.substr(0, pos); return filepath.substr(0, pos);
} }
static bool PathExists(const std::string& path){ static bool PathExists(const std::string& path) {
#ifdef _WIN32 #ifdef _WIN32
struct _stat buffer; struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0); return (_stat(path.c_str(), &buffer) == 0);
...@@ -158,11 +183,12 @@ void PredictVideo(const std::string& video_path, ...@@ -158,11 +183,12 @@ void PredictVideo(const std::string& video_path,
// Open video // Open video
cv::VideoCapture capture; cv::VideoCapture capture;
std::string video_out_name = "output.mp4"; std::string video_out_name = "output.mp4";
if (FLAGS_camera_id != -1){ if (FLAGS_camera_id != -1) {
capture.open(FLAGS_camera_id); capture.open(FLAGS_camera_id);
}else{ } else {
capture.open(video_path.c_str()); capture.open(video_path.c_str());
video_out_name = video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1); video_out_name =
video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1);
} }
if (!capture.isOpened()) { if (!capture.isOpened()) {
printf("can not open video : %s\n", video_path.c_str()); printf("can not open video : %s\n", video_path.c_str());
...@@ -173,7 +199,8 @@ void PredictVideo(const std::string& video_path, ...@@ -173,7 +199,8 @@ void PredictVideo(const std::string& video_path,
int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH)); int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT)); int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS)); int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
int video_frame_count = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_COUNT)); int video_frame_count =
static_cast<int>(capture.get(CV_CAP_PROP_FRAME_COUNT));
printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count); printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count);
// Create VideoWriter for output // Create VideoWriter for output
...@@ -199,7 +226,6 @@ void PredictVideo(const std::string& video_path, ...@@ -199,7 +226,6 @@ void PredictVideo(const std::string& video_path,
auto labels = det->GetLabelList(); auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size()); auto colormap = PaddleDetection::GenerateColorMap(labels.size());
// Store keypoint results // Store keypoint results
std::vector<PaddleDetection::KeyPointResult> result_kpts; std::vector<PaddleDetection::KeyPointResult> result_kpts;
std::vector<cv::Mat> imgs_kpts; std::vector<cv::Mat> imgs_kpts;
...@@ -221,55 +247,61 @@ void PredictVideo(const std::string& video_path, ...@@ -221,55 +247,61 @@ void PredictVideo(const std::string& video_path,
std::vector<PaddleDetection::ObjectResult> out_result; std::vector<PaddleDetection::ObjectResult> out_result;
for (const auto& item : result) { for (const auto& item : result) {
if (item.confidence < FLAGS_threshold || item.class_id == -1) { if (item.confidence < FLAGS_threshold || item.class_id == -1) {
continue; continue;
} }
out_result.push_back(item); out_result.push_back(item);
if (item.rect.size() > 6){ if (item.rect.size() > 6) {
is_rbox = true; is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id, item.class_id,
item.confidence, item.confidence,
item.rect[0], item.rect[0],
item.rect[1], item.rect[1],
item.rect[2], item.rect[2],
item.rect[3], item.rect[3],
item.rect[4], item.rect[4],
item.rect[5], item.rect[5],
item.rect[6], item.rect[6],
item.rect[7]); item.rect[7]);
} } else {
else{
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id, item.class_id,
item.confidence, item.confidence,
item.rect[0], item.rect[0],
item.rect[1], item.rect[1],
item.rect[2], item.rect[2],
item.rect[3]); item.rect[3]);
} }
} }
if(keypoint) if (keypoint) {
{ result_kpts.clear();
int imsize = out_result.size(); int imsize = out_result.size();
for (int i=0; i<imsize; i++){ for (int i = 0; i < imsize; i++) {
auto item = out_result[i]; auto item = out_result[i];
cv::Mat crop_img; cv::Mat crop_img;
std::vector<double> keypoint_times; std::vector<double> keypoint_times;
std::vector<int> rect = {item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; std::vector<int> rect = {
item.rect[0], item.rect[1], item.rect[2], item.rect[3]};
std::vector<float> center; std::vector<float> center;
std::vector<float> scale; std::vector<float> scale;
if(item.class_id == 0) if (item.class_id == 0) {
{
PaddleDetection::CropImg(frame, crop_img, rect, center, scale); PaddleDetection::CropImg(frame, crop_img, rect, center, scale);
center_bs.emplace_back(center); center_bs.emplace_back(center);
scale_bs.emplace_back(scale); scale_bs.emplace_back(scale);
imgs_kpts.emplace_back(crop_img); imgs_kpts.emplace_back(crop_img);
} }
if (imgs_kpts.size()==FLAGS_batch_size_keypoint || ((i==imsize-1)&&!imgs_kpts.empty())) if (imgs_kpts.size() == FLAGS_batch_size_keypoint ||
{ ((i == imsize - 1) && !imgs_kpts.empty())) {
keypoint->Predict(imgs_kpts, center_bs, scale_bs, FLAGS_threshold, 0, 1, &result_kpts, &keypoint_times); keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
FLAGS_threshold,
0,
1,
&result_kpts,
&keypoint_times);
imgs_kpts.clear(); imgs_kpts.clear();
center_bs.clear(); center_bs.clear();
scale_bs.clear(); scale_bs.clear();
...@@ -277,8 +309,7 @@ void PredictVideo(const std::string& video_path, ...@@ -277,8 +309,7 @@ void PredictVideo(const std::string& video_path,
} }
cv::Mat out_im = VisualizeKptsResult(frame, result_kpts, colormap_kpts); cv::Mat out_im = VisualizeKptsResult(frame, result_kpts, colormap_kpts);
video_out.write(out_im); video_out.write(out_im);
} } else {
else{
// Visualization result // Visualization result
cv::Mat out_im = PaddleDetection::VisualizeResult( cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, out_result, labels, colormap, is_rbox); frame, out_result, labels, colormap, is_rbox);
...@@ -299,11 +330,13 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -299,11 +330,13 @@ void PredictImage(const std::vector<std::string> all_img_paths,
PaddleDetection::KeyPointDetector* keypoint, PaddleDetection::KeyPointDetector* keypoint,
const std::string& output_dir = "output") { const std::string& output_dir = "output") {
std::vector<double> det_t = {0, 0, 0}; std::vector<double> det_t = {0, 0, 0};
int steps = ceil(float(all_img_paths.size()) / batch_size); int steps = ceil(static_cast<float>(all_img_paths.size()) / batch_size);
int kpts_imgs = 0; int kpts_imgs = 0;
std::vector<double> keypoint_t = {0, 0, 0}; std::vector<double> keypoint_t = {0, 0, 0};
printf("total images = %d, batch_size = %d, total steps = %d\n", printf("total images = %d, batch_size = %d, total steps = %d\n",
all_img_paths.size(), batch_size, steps); all_img_paths.size(),
batch_size,
steps);
for (int idx = 0; idx < steps; idx++) { for (int idx = 0; idx < steps; idx++) {
std::vector<cv::Mat> batch_imgs; std::vector<cv::Mat> batch_imgs;
int left_image_cnt = all_img_paths.size() - idx * batch_size; int left_image_cnt = all_img_paths.size() - idx * batch_size;
...@@ -311,11 +344,11 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -311,11 +344,11 @@ void PredictImage(const std::vector<std::string> all_img_paths,
left_image_cnt = batch_size; left_image_cnt = batch_size;
} }
for (int bs = 0; bs < left_image_cnt; bs++) { for (int bs = 0; bs < left_image_cnt; bs++) {
std::string image_file_path = all_img_paths.at(idx * batch_size+bs); std::string image_file_path = all_img_paths.at(idx * batch_size + bs);
cv::Mat im = cv::imread(image_file_path, 1); cv::Mat im = cv::imread(image_file_path, 1);
batch_imgs.insert(batch_imgs.end(), im); batch_imgs.insert(batch_imgs.end(), im);
} }
// Store all detected result // Store all detected result
std::vector<PaddleDetection::ObjectResult> result; std::vector<PaddleDetection::ObjectResult> result;
std::vector<int> bbox_num; std::vector<int> bbox_num;
...@@ -330,7 +363,8 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -330,7 +363,8 @@ void PredictImage(const std::vector<std::string> all_img_paths,
bool is_rbox = false; bool is_rbox = false;
if (run_benchmark) { if (run_benchmark) {
det->Predict(batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times); det->Predict(
batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times);
} else { } else {
det->Predict(batch_imgs, threshold, 0, 1, &result, &bbox_num, &det_times); det->Predict(batch_imgs, threshold, 0, 1, &result, &bbox_num, &det_times);
} }
...@@ -349,33 +383,33 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -349,33 +383,33 @@ void PredictImage(const std::vector<std::string> all_img_paths,
} }
detect_num += 1; detect_num += 1;
im_result.push_back(item); im_result.push_back(item);
if (item.rect.size() > 6){ if (item.rect.size() > 6) {
is_rbox = true; is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id, item.class_id,
item.confidence, item.confidence,
item.rect[0], item.rect[0],
item.rect[1], item.rect[1],
item.rect[2], item.rect[2],
item.rect[3], item.rect[3],
item.rect[4], item.rect[4],
item.rect[5], item.rect[5],
item.rect[6], item.rect[6],
item.rect[7]); item.rect[7]);
} } else {
else{
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id, item.class_id,
item.confidence, item.confidence,
item.rect[0], item.rect[0],
item.rect[1], item.rect[1],
item.rect[2], item.rect[2],
item.rect[3]); item.rect[3]);
} }
} }
std::cout << all_img_paths.at(idx * batch_size + i) << " The number of detected box: " << detect_num << std::endl; std::cout << all_img_paths.at(idx * batch_size + i)
item_start_idx = item_start_idx + bbox_num[i]; << " The number of detected box: " << detect_num << std::endl;
item_start_idx = item_start_idx + bbox_num[i];
std::vector<int> compression_params; std::vector<int> compression_params;
compression_params.push_back(CV_IMWRITE_JPEG_QUALITY); compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
compression_params.push_back(95); compression_params.push_back(95);
...@@ -384,18 +418,17 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -384,18 +418,17 @@ void PredictImage(const std::vector<std::string> all_img_paths,
output_path += OS_PATH_SEP; output_path += OS_PATH_SEP;
} }
std::string image_file_path = all_img_paths.at(idx * batch_size + i); std::string image_file_path = all_img_paths.at(idx * batch_size + i);
if(keypoint) if (keypoint) {
{
int imsize = im_result.size(); int imsize = im_result.size();
for (int i=0; i<imsize; i++){ for (int i = 0; i < imsize; i++) {
auto item = im_result[i]; auto item = im_result[i];
cv::Mat crop_img; cv::Mat crop_img;
std::vector<double> keypoint_times; std::vector<double> keypoint_times;
std::vector<int> rect = {item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; std::vector<int> rect = {
item.rect[0], item.rect[1], item.rect[2], item.rect[3]};
std::vector<float> center; std::vector<float> center;
std::vector<float> scale; std::vector<float> scale;
if(item.class_id == 0) if (item.class_id == 0) {
{
PaddleDetection::CropImg(im, crop_img, rect, center, scale); PaddleDetection::CropImg(im, crop_img, rect, center, scale);
center_bs.emplace_back(center); center_bs.emplace_back(center);
scale_bs.emplace_back(scale); scale_bs.emplace_back(scale);
...@@ -403,13 +436,26 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -403,13 +436,26 @@ void PredictImage(const std::vector<std::string> all_img_paths,
kpts_imgs += 1; kpts_imgs += 1;
} }
if (imgs_kpts.size()==FLAGS_batch_size_keypoint || ((i==imsize-1)&&!imgs_kpts.empty())) if (imgs_kpts.size() == FLAGS_batch_size_keypoint ||
{ ((i == imsize - 1) && !imgs_kpts.empty())) {
if (run_benchmark) { if (run_benchmark) {
keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0.5, 10, 10, &result_kpts, &keypoint_times); keypoint->Predict(imgs_kpts,
} center_bs,
else{ scale_bs,
keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0.5, 0, 1, &result_kpts, &keypoint_times); 0.5,
10,
10,
&result_kpts,
&keypoint_times);
} else {
keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
0.5,
0,
1,
&result_kpts,
&keypoint_times);
} }
imgs_kpts.clear(); imgs_kpts.clear();
center_bs.clear(); center_bs.clear();
...@@ -419,21 +465,25 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -419,21 +465,25 @@ void PredictImage(const std::vector<std::string> all_img_paths,
keypoint_t[2] += keypoint_times[2]; keypoint_t[2] += keypoint_times[2];
} }
} }
std::string kpts_savepath = output_path + "keypoint_" + image_file_path.substr(image_file_path.find_last_of('/') + 1); std::string kpts_savepath =
cv::Mat kpts_vis_img = VisualizeKptsResult(im, result_kpts, colormap_kpts); output_path + "keypoint_" +
image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::Mat kpts_vis_img =
VisualizeKptsResult(im, result_kpts, colormap_kpts);
cv::imwrite(kpts_savepath, kpts_vis_img, compression_params); cv::imwrite(kpts_savepath, kpts_vis_img, compression_params);
printf("Visualized output saved as %s\n", kpts_savepath.c_str()); printf("Visualized output saved as %s\n", kpts_savepath.c_str());
} } else {
else{
// Visualization result // Visualization result
cv::Mat vis_img = PaddleDetection::VisualizeResult( cv::Mat vis_img = PaddleDetection::VisualizeResult(
im, im_result, labels, colormap, is_rbox); im, im_result, labels, colormap, is_rbox);
std::string det_savepath = output_path + image_file_path.substr(image_file_path.find_last_of('/') + 1); std::string det_savepath =
output_path +
image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::imwrite(det_savepath, vis_img, compression_params); cv::imwrite(det_savepath, vis_img, compression_params);
printf("Visualized output saved as %s\n", det_savepath.c_str()); printf("Visualized output saved as %s\n", det_savepath.c_str());
} }
} }
det_t[0] += det_times[0]; det_t[0] += det_times[0];
det_t[1] += det_times[1]; det_t[1] += det_times[1];
det_t[2] += det_times[2]; det_t[2] += det_times[2];
...@@ -447,43 +497,65 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -447,43 +497,65 @@ void PredictImage(const std::vector<std::string> all_img_paths,
int main(int argc, char** argv) { int main(int argc, char** argv) {
// Parsing command-line // Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true); google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir.empty() if (FLAGS_model_dir.empty() ||
|| (FLAGS_image_file.empty() && FLAGS_image_dir.empty() && FLAGS_video_file.empty())) { (FLAGS_image_file.empty() && FLAGS_image_dir.empty() &&
std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ (--model_dir_keypoint=/PATH/TO/INFERENCE_MODEL/)" FLAGS_video_file.empty())) {
<< "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl; std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ "
"(--model_dir_keypoint=/PATH/TO/INFERENCE_MODEL/)"
<< "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl;
return -1; return -1;
} }
if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" ||
|| FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; std::cout
<< "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1; return -1;
} }
transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); transform(FLAGS_device.begin(),
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { FLAGS_device.end(),
FLAGS_device.begin(),
::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" ||
FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1; return -1;
} }
if (FLAGS_use_gpu) { if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want to run."; std::cout << "Deprecated, please use `--device` to set the device you want "
"to run.";
return -1; return -1;
} }
// Load model and create a object detector // Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn, PaddleDetection::ObjectDetector det(FLAGS_model_dir,
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_device,
FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_use_mkldnn,
FLAGS_trt_calib_mode); FLAGS_cpu_threads,
FLAGS_run_mode,
FLAGS_batch_size,
FLAGS_gpu_id,
FLAGS_trt_min_shape,
FLAGS_trt_max_shape,
FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode);
PaddleDetection::KeyPointDetector* keypoint = nullptr; PaddleDetection::KeyPointDetector* keypoint = nullptr;
if (!FLAGS_model_dir_keypoint.empty()) if (!FLAGS_model_dir_keypoint.empty()) {
{ keypoint = new PaddleDetection::KeyPointDetector(FLAGS_model_dir_keypoint,
keypoint = new PaddleDetection::KeyPointDetector(FLAGS_model_dir_keypoint, FLAGS_device, FLAGS_use_mkldnn, FLAGS_device,
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size_keypoint, FLAGS_gpu_id, FLAGS_use_mkldnn,
FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_cpu_threads,
FLAGS_trt_calib_mode, FLAGS_use_dark); FLAGS_run_mode,
FLAGS_batch_size_keypoint,
FLAGS_gpu_id,
FLAGS_trt_min_shape,
FLAGS_trt_max_shape,
FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode,
FLAGS_use_dark);
} }
// Do inference on input video or image // Do inference on input video or image
if (!PathExists(FLAGS_output_dir)) { if (!PathExists(FLAGS_output_dir)) {
MkDirs(FLAGS_output_dir); MkDirs(FLAGS_output_dir);
} }
if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) { if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) {
PredictVideo(FLAGS_video_file, &det, keypoint, FLAGS_output_dir); PredictVideo(FLAGS_video_file, &det, keypoint, FLAGS_output_dir);
...@@ -493,17 +565,23 @@ int main(int argc, char** argv) { ...@@ -493,17 +565,23 @@ int main(int argc, char** argv) {
if (!FLAGS_image_file.empty()) { if (!FLAGS_image_file.empty()) {
all_img_paths.push_back(FLAGS_image_file); all_img_paths.push_back(FLAGS_image_file);
if (FLAGS_batch_size > 1) { if (FLAGS_batch_size > 1) {
std::cout << "batch_size should be 1, when set `image_file`." << std::endl; std::cout << "batch_size should be 1, when set `image_file`."
return -1; << std::endl;
return -1;
} }
} else { } else {
cv::glob(FLAGS_image_dir, cv_all_img_paths); cv::glob(FLAGS_image_dir, cv_all_img_paths);
for (const auto & img_path : cv_all_img_paths) { for (const auto& img_path : cv_all_img_paths) {
all_img_paths.push_back(img_path); all_img_paths.push_back(img_path);
} }
} }
PredictImage(all_img_paths, FLAGS_batch_size, FLAGS_threshold, PredictImage(all_img_paths,
FLAGS_run_benchmark, &det, keypoint, FLAGS_output_dir); FLAGS_batch_size,
FLAGS_threshold,
FLAGS_run_benchmark,
&det,
keypoint,
FLAGS_output_dir);
} }
delete keypoint; delete keypoint;
keypoint = nullptr; keypoint = nullptr;
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import os import os
import json
import cv2 import cv2
import math import math
import numpy as np import numpy as np
...@@ -80,7 +80,7 @@ def predict_with_given_det(image, det_res, keypoint_detector, ...@@ -80,7 +80,7 @@ def predict_with_given_det(image, det_res, keypoint_detector,
keypoint_res = {} keypoint_res = {}
keypoint_res['keypoint'] = [ keypoint_res['keypoint'] = [
np.vstack(keypoint_vector), np.vstack(score_vector) np.vstack(keypoint_vector).tolist(), np.vstack(score_vector).tolist()
] if len(keypoint_vector) > 0 else [[], []] ] if len(keypoint_vector) > 0 else [[], []]
keypoint_res['bbox'] = rect_vector keypoint_res['bbox'] = rect_vector
return keypoint_res return keypoint_res
...@@ -89,8 +89,10 @@ def predict_with_given_det(image, det_res, keypoint_detector, ...@@ -89,8 +89,10 @@ def predict_with_given_det(image, det_res, keypoint_detector,
def topdown_unite_predict(detector, def topdown_unite_predict(detector,
topdown_keypoint_detector, topdown_keypoint_detector,
image_list, image_list,
keypoint_batch_size=1): keypoint_batch_size=1,
save_res=False):
det_timer = detector.get_timer() det_timer = detector.get_timer()
store_res = []
for i, img_file in enumerate(image_list): for i, img_file in enumerate(image_list):
# Decode image in advance in det + pose prediction # Decode image in advance in det + pose prediction
det_timer.preprocess_time_s.start() det_timer.preprocess_time_s.start()
...@@ -114,6 +116,11 @@ def topdown_unite_predict(detector, ...@@ -114,6 +116,11 @@ def topdown_unite_predict(detector,
image, results, topdown_keypoint_detector, keypoint_batch_size, image, results, topdown_keypoint_detector, keypoint_batch_size,
FLAGS.det_threshold, FLAGS.keypoint_threshold, FLAGS.run_benchmark) FLAGS.det_threshold, FLAGS.keypoint_threshold, FLAGS.run_benchmark)
if save_res:
store_res.append([
i, keypoint_res['bbox'],
[keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]
])
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
topdown_keypoint_detector.cpu_mem += cm topdown_keypoint_detector.cpu_mem += cm
...@@ -127,12 +134,23 @@ def topdown_unite_predict(detector, ...@@ -127,12 +134,23 @@ def topdown_unite_predict(detector,
keypoint_res, keypoint_res,
visual_thread=FLAGS.keypoint_threshold, visual_thread=FLAGS.keypoint_threshold,
save_dir=FLAGS.output_dir) save_dir=FLAGS.output_dir)
if save_res:
"""
1) store_res: a list of image_data
2) image_data: [imageid, rects, [keypoints, scores]]
3) rects: list of rect [xmin, ymin, xmax, ymax]
4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list
5) scores: mean of all joint conf
"""
with open("det_keypoint_unite_image_results.json", 'w') as wf:
json.dump(store_res, wf, indent=4)
def topdown_unite_predict_video(detector, def topdown_unite_predict_video(detector,
topdown_keypoint_detector, topdown_keypoint_detector,
camera_id, camera_id,
keypoint_batch_size=1): keypoint_batch_size=1,
save_res=False):
video_name = 'output.mp4' video_name = 'output.mp4'
if camera_id != -1: if camera_id != -1:
capture = cv2.VideoCapture(camera_id) capture = cv2.VideoCapture(camera_id)
...@@ -150,9 +168,10 @@ def topdown_unite_predict_video(detector, ...@@ -150,9 +168,10 @@ def topdown_unite_predict_video(detector,
if not os.path.exists(FLAGS.output_dir): if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir) os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name) out_path = os.path.join(FLAGS.output_dir, video_name)
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
index = 0 index = 0
store_res = []
while (1): while (1):
ret, frame = capture.read() ret, frame = capture.read()
if not ret: if not ret:
...@@ -172,6 +191,11 @@ def topdown_unite_predict_video(detector, ...@@ -172,6 +191,11 @@ def topdown_unite_predict_video(detector,
keypoint_res, keypoint_res,
visual_thread=FLAGS.keypoint_threshold, visual_thread=FLAGS.keypoint_threshold,
returnimg=True) returnimg=True)
if save_res:
store_res.append([
index, keypoint_res['bbox'],
[keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]
])
writer.write(im) writer.write(im)
if camera_id != -1: if camera_id != -1:
...@@ -179,6 +203,16 @@ def topdown_unite_predict_video(detector, ...@@ -179,6 +203,16 @@ def topdown_unite_predict_video(detector,
if cv2.waitKey(1) & 0xFF == ord('q'): if cv2.waitKey(1) & 0xFF == ord('q'):
break break
writer.release() writer.release()
if save_res:
"""
1) store_res: a list of frame_data
2) frame_data: [frameid, rects, [keypoints, scores]]
3) rects: list of rect [xmin, ymin, xmax, ymax]
4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list
5) scores: mean of all joint conf
"""
with open("det_keypoint_unite_video_results.json", 'w') as wf:
json.dump(store_res, wf, indent=4)
def main(): def main():
...@@ -219,12 +253,13 @@ def main(): ...@@ -219,12 +253,13 @@ def main():
# predict from video file or camera video stream # predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1: if FLAGS.video_file is not None or FLAGS.camera_id != -1:
topdown_unite_predict_video(detector, topdown_keypoint_detector, topdown_unite_predict_video(detector, topdown_keypoint_detector,
FLAGS.camera_id, FLAGS.keypoint_batch_size) FLAGS.camera_id, FLAGS.keypoint_batch_size,
FLAGS.save_res)
else: else:
# predict from image # predict from image
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
topdown_unite_predict(detector, topdown_keypoint_detector, img_list, topdown_unite_predict(detector, topdown_keypoint_detector, img_list,
FLAGS.keypoint_batch_size) FLAGS.keypoint_batch_size, FLAGS.save_res)
if not FLAGS.run_benchmark: if not FLAGS.run_benchmark:
detector.det_times.info(average=True) detector.det_times.info(average=True)
topdown_keypoint_detector.det_times.info(average=True) topdown_keypoint_detector.det_times.info(average=True)
......
...@@ -115,5 +115,15 @@ def argsparser(): ...@@ -115,5 +115,15 @@ def argsparser():
type=bool, type=bool,
default=True, default=True,
help='whether to use darkpose to get better keypoint position predict ') help='whether to use darkpose to get better keypoint position predict ')
parser.add_argument(
'--save_res',
type=bool,
default=False,
help=(
"whether to save predict results to json file"
"1) store_res: a list of image_data"
"2) image_data: [imageid, rects, [keypoints, scores]]"
"3) rects: list of rect [xmin, ymin, xmax, ymax]"
"4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list"
"5) scores: mean of all joint conf"))
return parser return parser
...@@ -240,6 +240,7 @@ def draw_pose(imgfile, ...@@ -240,6 +240,7 @@ def draw_pose(imgfile,
raise e raise e
skeletons, scores = results['keypoint'] skeletons, scores = results['keypoint']
skeletons = np.array(skeletons)
kpt_nums = 17 kpt_nums = 17
if len(skeletons) > 0: if len(skeletons) > 0:
kpt_nums = skeletons.shape[1] kpt_nums = skeletons.shape[1]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册