From e27674fffc912f95a0e7c7230e91348e68f03424 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Sun, 12 Dec 2021 00:37:00 +0800 Subject: [PATCH] support rcnn bs=2 in cpp infer (#4854) * support rcnn bs=2 in cpp infer * check dynamic shape in padbatch * replace fluid by paddle * fix time display in pptracking cpp * fix timer in bs=2 --- deploy/cpp/docs/Jetson_build.md | 2 +- deploy/cpp/docs/linux_build.md | 2 +- deploy/cpp/docs/windows_vs2019_build.md | 2 +- deploy/cpp/include/config_parser.h | 10 +- deploy/cpp/include/jde_detector.h | 83 +++--- deploy/cpp/include/keypoint_detector.h | 4 +- deploy/cpp/include/object_detector.h | 82 +++--- deploy/cpp/include/preprocess_op.h | 40 ++- deploy/cpp/src/jde_detector.cc | 153 +++++------ deploy/cpp/src/keypoint_detector.cc | 37 +-- deploy/cpp/src/main.cc | 251 +++++++++++------- deploy/cpp/src/main_jde.cc | 126 +++++---- deploy/cpp/src/main_keypoint.cc | 8 +- deploy/cpp/src/object_detector.cc | 218 ++++++++------- deploy/cpp/src/preprocess_op.cc | 220 ++++++++------- deploy/pptracking/cpp/README.md | 2 +- deploy/pptracking/cpp/include/config_parser.h | 4 +- deploy/pptracking/cpp/include/jde_predictor.h | 4 +- deploy/pptracking/cpp/include/pipeline.h | 4 +- deploy/pptracking/cpp/include/predictor.h | 2 +- deploy/pptracking/cpp/include/sde_predictor.h | 4 +- deploy/pptracking/cpp/src/main.cc | 8 +- deploy/pptracking/cpp/src/pipeline.cc | 5 +- deploy/pptracking/python/README.md | 4 +- deploy/pptracking/python/det_infer.py | 16 +- deploy/pptracking/python/mot_jde_infer.py | 4 +- deploy/pptracking/python/mot_sde_infer.py | 12 +- deploy/pptracking/python/utils.py | 4 +- deploy/python/README.md | 4 +- deploy/python/det_keypoint_unite_utils.py | 4 +- deploy/python/infer.py | 18 +- deploy/python/keypoint_infer.py | 4 +- deploy/python/mot_jde_infer.py | 4 +- deploy/python/mot_keypoint_unite_utils.py | 4 +- deploy/python/mot_sde_infer.py | 10 +- deploy/python/utils.py | 4 +- 36 files changed, 759 insertions(+), 604 deletions(-) diff --git a/deploy/cpp/docs/Jetson_build.md b/deploy/cpp/docs/Jetson_build.md index 4f54738de..ea9699a43 100644 --- a/deploy/cpp/docs/Jetson_build.md +++ b/deploy/cpp/docs/Jetson_build.md @@ -169,7 +169,7 @@ WITH_KEYPOINT=ON | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --run_mode | 使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)| | --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | | --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | | --run_benchmark | 是否重复预测来进行benchmark测速 | diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md index b28736541..32348991b 100755 --- a/deploy/cpp/docs/linux_build.md +++ b/deploy/cpp/docs/linux_build.md @@ -108,7 +108,7 @@ make | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --run_mode | 使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)| | --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | | --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | | --run_benchmark | 是否重复预测来进行benchmark测速 | diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md index 9c5a2d33d..e1ae374f7 100755 --- a/deploy/cpp/docs/windows_vs2019_build.md +++ b/deploy/cpp/docs/windows_vs2019_build.md @@ -107,7 +107,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --run_mode | 使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)| | --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | | --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | | --run_benchmark | 是否重复预测来进行benchmark测速 | diff --git a/deploy/cpp/include/config_parser.h b/deploy/cpp/include/config_parser.h index 8548bec53..82d103723 100644 --- a/deploy/cpp/include/config_parser.h +++ b/deploy/cpp/include/config_parser.h @@ -15,9 +15,9 @@ #pragma once #include -#include -#include #include +#include +#include #include "yaml-cpp/yaml.h" @@ -42,13 +42,12 @@ class ConfigPaser { YAML::Node config; config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg); - // Get runtime mode : fluid, trt_fp16, trt_fp32 + // Get runtime mode : paddle, trt_fp16, trt_fp32 if (config["mode"].IsDefined()) { mode_ = config["mode"].as(); } else { std::cerr << "Please set mode, " - << "support value : fluid/trt_fp16/trt_fp32." - << std::endl; + << "support value : paddle/trt_fp16/trt_fp32." << std::endl; return false; } @@ -136,4 +135,3 @@ class ConfigPaser { }; } // namespace PaddleDetection - diff --git a/deploy/cpp/include/jde_detector.h b/deploy/cpp/include/jde_detector.h index 1f0285ed9..959b9b448 100644 --- a/deploy/cpp/include/jde_detector.h +++ b/deploy/cpp/include/jde_detector.h @@ -14,39 +14,37 @@ #pragma once -#include -#include +#include #include +#include #include -#include +#include #include -#include #include +#include -#include "paddle_inference_api.h" // NOLINT +#include "paddle_inference_api.h" // NOLINT -#include "include/preprocess_op.h" #include "include/config_parser.h" +#include "include/preprocess_op.h" #include "include/tracker.h" using namespace paddle_infer; namespace PaddleDetection { // JDE Detection Result -struct MOT_Rect -{ - float left; - float top; - float right; - float bottom; +struct MOT_Rect { + float left; + float top; + float right; + float bottom; }; -struct MOT_Track -{ - int ids; - float score; - MOT_Rect rects; +struct MOT_Track { + int ids; + float score; + MOT_Rect rects; }; typedef std::vector MOT_Result; @@ -56,24 +54,24 @@ cv::Scalar GetColor(int idx); // Visualiztion Detection Result cv::Mat VisualizeTrackResult(const cv::Mat& img, - const MOT_Result& results, - const float fps, const int frame_id); - + const MOT_Result& results, + const float fps, + const int frame_id); class JDEDetector { public: - explicit JDEDetector(const std::string& model_dir, - const std::string& device="CPU", - bool use_mkldnn=false, - int cpu_threads=1, - const std::string& run_mode="fluid", - const int batch_size=1, - const int gpu_id=0, - const int trt_min_shape=1, - const int trt_max_shape=1280, - const int trt_opt_shape=640, - bool trt_calib_mode=false, - const int min_box_area=200) { + explicit JDEDetector(const std::string& model_dir, + const std::string& device = "CPU", + bool use_mkldnn = false, + int cpu_threads = 1, + const std::string& run_mode = "paddle", + const int batch_size = 1, + const int gpu_id = 0, + const int trt_min_shape = 1, + const int trt_max_shape = 1280, + const int trt_opt_shape = 640, + bool trt_calib_mode = false, + const int min_box_area = 200) { this->device_ = device; this->gpu_id_ = gpu_id; this->cpu_math_library_num_threads_ = cpu_threads; @@ -94,18 +92,17 @@ class JDEDetector { } // Load Paddle inference model - void LoadModel( - const std::string& model_dir, - const int batch_size = 1, - const std::string& run_mode = "fluid"); + void LoadModel(const std::string& model_dir, + const int batch_size = 1, + const std::string& run_mode = "paddle"); // Run predictor void Predict(const std::vector imgs, - const double threshold = 0.5, - const int warmup = 0, - const int repeats = 1, - MOT_Result* result = nullptr, - std::vector* times = nullptr); + const double threshold = 0.5, + const int warmup = 0, + const int repeats = 1, + MOT_Result* result = nullptr, + std::vector* times = nullptr); private: std::string device_ = "CPU"; @@ -121,9 +118,7 @@ class JDEDetector { // Preprocess image and copy data to input buffer void Preprocess(const cv::Mat& image_mat); // Postprocess result - void Postprocess( - const cv::Mat dets, const cv::Mat emb, - MOT_Result* result); + void Postprocess(const cv::Mat dets, const cv::Mat emb, MOT_Result* result); std::shared_ptr predictor_; Preprocessor preprocessor_; diff --git a/deploy/cpp/include/keypoint_detector.h b/deploy/cpp/include/keypoint_detector.h index 84924276e..55eed8f91 100644 --- a/deploy/cpp/include/keypoint_detector.h +++ b/deploy/cpp/include/keypoint_detector.h @@ -51,7 +51,7 @@ class KeyPointDetector { const std::string& device = "CPU", bool use_mkldnn = false, int cpu_threads = 1, - const std::string& run_mode = "fluid", + const std::string& run_mode = "paddle", const int batch_size = 1, const int gpu_id = 0, const int trt_min_shape = 1, @@ -80,7 +80,7 @@ class KeyPointDetector { // Load Paddle inference model void LoadModel(const std::string& model_dir, const int batch_size = 1, - const std::string& run_mode = "fluid"); + const std::string& run_mode = "paddle"); // Run predictor void Predict(const std::vector imgs, diff --git a/deploy/cpp/include/object_detector.h b/deploy/cpp/include/object_detector.h index 0e207c119..0a336c334 100644 --- a/deploy/cpp/include/object_detector.h +++ b/deploy/cpp/include/object_detector.h @@ -14,23 +14,23 @@ #pragma once -#include -#include -#include -#include #include +#include #include +#include +#include +#include #include -#include #include +#include -#include "paddle_inference_api.h" // NOLINT +#include "paddle_inference_api.h" // NOLINT -#include "include/preprocess_op.h" #include "include/config_parser.h" -#include "include/utils.h" #include "include/picodet_postprocess.h" +#include "include/preprocess_op.h" +#include "include/utils.h" using namespace paddle_infer; @@ -39,28 +39,27 @@ namespace PaddleDetection { // Generate visualization colormap for each class std::vector GenerateColorMap(int num_class); - // Visualiztion Detection Result -cv::Mat VisualizeResult(const cv::Mat& img, - const std::vector& results, - const std::vector& lables, - const std::vector& colormap, - const bool is_rbox); - +cv::Mat VisualizeResult( + const cv::Mat& img, + const std::vector& results, + const std::vector& lables, + const std::vector& colormap, + const bool is_rbox); class ObjectDetector { public: - explicit ObjectDetector(const std::string& model_dir, - const std::string& device="CPU", - bool use_mkldnn=false, - int cpu_threads=1, - const std::string& run_mode="fluid", - const int batch_size=1, - const int gpu_id=0, - const int trt_min_shape=1, - const int trt_max_shape=1280, - const int trt_opt_shape=640, - bool trt_calib_mode=false) { + explicit ObjectDetector(const std::string& model_dir, + const std::string& device = "CPU", + bool use_mkldnn = false, + int cpu_threads = 1, + const std::string& run_mode = "paddle", + const int batch_size = 1, + const int gpu_id = 0, + const int trt_min_shape = 1, + const int trt_max_shape = 1280, + const int trt_opt_shape = 640, + bool trt_calib_mode = false) { this->device_ = device; this->gpu_id_ = gpu_id; this->cpu_math_library_num_threads_ = cpu_threads; @@ -79,19 +78,18 @@ class ObjectDetector { } // Load Paddle inference model - void LoadModel( - const std::string& model_dir, - const int batch_size = 1, - const std::string& run_mode = "fluid"); + void LoadModel(const std::string& model_dir, + const int batch_size = 1, + const std::string& run_mode = "paddle"); // Run predictor void Predict(const std::vector imgs, - const double threshold = 0.5, - const int warmup = 0, - const int repeats = 1, - std::vector* result = nullptr, - std::vector* bbox_num = nullptr, - std::vector* times = nullptr); + const double threshold = 0.5, + const int warmup = 0, + const int repeats = 1, + std::vector* result = nullptr, + std::vector* bbox_num = nullptr, + std::vector* times = nullptr); // Get Model Label list const std::vector& GetLabelList() const { @@ -112,19 +110,17 @@ class ObjectDetector { // Preprocess image and copy data to input buffer void Preprocess(const cv::Mat& image_mat); // Postprocess result - void Postprocess( - const std::vector mats, - std::vector* result, - std::vector bbox_num, - std::vector output_data_, - bool is_rbox); + void Postprocess(const std::vector mats, + std::vector* result, + std::vector bbox_num, + std::vector output_data_, + bool is_rbox); std::shared_ptr predictor_; Preprocessor preprocessor_; ImageBlob inputs_; float threshold_; ConfigPaser config_; - }; } // namespace PaddleDetection diff --git a/deploy/cpp/include/preprocess_op.h b/deploy/cpp/include/preprocess_op.h index 55ed9b7e1..33d7300b8 100644 --- a/deploy/cpp/include/preprocess_op.h +++ b/deploy/cpp/include/preprocess_op.h @@ -17,16 +17,16 @@ #include #include -#include -#include -#include +#include #include +#include #include -#include +#include +#include #include -#include #include +#include namespace PaddleDetection { @@ -40,9 +40,11 @@ class ImageBlob { // in net data shape(after pad) std::vector in_net_shape_; // Evaluation image width and height - //std::vector eval_im_size_f_; + // std::vector eval_im_size_f_; // Scale factor for image size to origin image size std::vector scale_factor_; + // in net image after preprocessing + cv::Mat in_net_im_; }; // Abstraction of preprocessing opration class @@ -52,7 +54,7 @@ class PreprocessOp { virtual void Run(cv::Mat* im, ImageBlob* data) = 0; }; -class InitInfo : public PreprocessOp{ +class InitInfo : public PreprocessOp { public: virtual void Init(const YAML::Node& item) {} virtual void Run(cv::Mat* im, ImageBlob* data); @@ -79,7 +81,6 @@ class Permute : public PreprocessOp { public: virtual void Init(const YAML::Node& item) {} virtual void Run(cv::Mat* im, ImageBlob* data); - }; class Resize : public PreprocessOp { @@ -88,7 +89,7 @@ class Resize : public PreprocessOp { interp_ = item["interp"].as(); keep_ratio_ = item["keep_ratio"].as(); target_size_ = item["target_size"].as>(); - } + } // Compute best resize scale for x-dimension, y-dimension std::pair GenerateScale(const cv::Mat& im); @@ -106,7 +107,7 @@ class LetterBoxResize : public PreprocessOp { public: virtual void Init(const YAML::Node& item) { target_size_ = item["target_size"].as>(); - } + } float GenerateScale(const cv::Mat& im); @@ -133,7 +134,7 @@ class TopDownEvalAffine : public PreprocessOp { public: virtual void Init(const YAML::Node& item) { trainsize_ = item["trainsize"].as>(); - } + } virtual void Run(cv::Mat* im, ImageBlob* data); @@ -142,7 +143,18 @@ class TopDownEvalAffine : public PreprocessOp { std::vector trainsize_; }; -void CropImg(cv::Mat &img, cv::Mat &crop_img, std::vector &area, std::vector ¢er, std::vector &scale, float expandratio=0.15); +void CropImg(cv::Mat& img, + cv::Mat& crop_img, + std::vector& area, + std::vector& center, + std::vector& scale, + float expandratio = 0.15); + +// check whether the input size is dynamic +bool CheckDynamicInput(const std::vector& imgs); + +// Pad images in batch +std::vector PadBatch(const std::vector& imgs); class Preprocessor { public: @@ -172,7 +184,8 @@ class Preprocessor { } else if (name == "TopDownEvalAffine") { return std::make_shared(); } - std::cerr << "can not find function of OP: " << name << " and return: nullptr" << std::endl; + std::cerr << "can not find function of OP: " << name + << " and return: nullptr" << std::endl; return nullptr; } @@ -186,4 +199,3 @@ class Preprocessor { }; } // namespace PaddleDetection - diff --git a/deploy/cpp/src/jde_detector.cc b/deploy/cpp/src/jde_detector.cc index 066975d9e..5df8b87a7 100644 --- a/deploy/cpp/src/jde_detector.cc +++ b/deploy/cpp/src/jde_detector.cc @@ -13,19 +13,18 @@ // limitations under the License. #include // for setprecision -#include #include +#include #include "include/jde_detector.h" - using namespace paddle_infer; namespace PaddleDetection { // Load Model and create model predictor void JDEDetector::LoadModel(const std::string& model_dir, - const int batch_size, - const std::string& run_mode) { + const int batch_size, + const std::string& run_mode) { paddle_infer::Config config; std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel"; std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams"; @@ -34,47 +33,51 @@ void JDEDetector::LoadModel(const std::string& model_dir, config.EnableUseGpu(200, this->gpu_id_); config.SwitchIrOptim(true); // use tensorrt - if (run_mode != "fluid") { + if (run_mode != "paddle") { auto precision = paddle_infer::Config::Precision::kFloat32; if (run_mode == "trt_fp32") { precision = paddle_infer::Config::Precision::kFloat32; - } - else if (run_mode == "trt_fp16") { + } else if (run_mode == "trt_fp16") { precision = paddle_infer::Config::Precision::kHalf; - } - else if (run_mode == "trt_int8") { + } else if (run_mode == "trt_int8") { precision = paddle_infer::Config::Precision::kInt8; } else { - printf("run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'"); + printf( + "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or " + "'trt_int8'"); } // set tensorrt - config.EnableTensorRtEngine( - 1 << 30, - batch_size, - this->min_subgraph_size_, - precision, - false, - this->trt_calib_mode_); + config.EnableTensorRtEngine(1 << 30, + batch_size, + this->min_subgraph_size_, + precision, + false, + this->trt_calib_mode_); // set use dynamic shape if (this->use_dynamic_shape_) { // set DynamicShsape for image tensor - const std::vector min_input_shape = {1, 3, this->trt_min_shape_, this->trt_min_shape_}; - const std::vector max_input_shape = {1, 3, this->trt_max_shape_, this->trt_max_shape_}; - const std::vector opt_input_shape = {1, 3, this->trt_opt_shape_, this->trt_opt_shape_}; - const std::map> map_min_input_shape = {{"image", min_input_shape}}; - const std::map> map_max_input_shape = {{"image", max_input_shape}}; - const std::map> map_opt_input_shape = {{"image", opt_input_shape}}; - - config.SetTRTDynamicShapeInfo(map_min_input_shape, - map_max_input_shape, - map_opt_input_shape); + const std::vector min_input_shape = { + 1, 3, this->trt_min_shape_, this->trt_min_shape_}; + const std::vector max_input_shape = { + 1, 3, this->trt_max_shape_, this->trt_max_shape_}; + const std::vector opt_input_shape = { + 1, 3, this->trt_opt_shape_, this->trt_opt_shape_}; + const std::map> map_min_input_shape = { + {"image", min_input_shape}}; + const std::map> map_max_input_shape = { + {"image", max_input_shape}}; + const std::map> map_opt_input_shape = { + {"image", opt_input_shape}}; + + config.SetTRTDynamicShapeInfo( + map_min_input_shape, map_max_input_shape, map_opt_input_shape); std::cout << "TensorRT dynamic shape enabled" << std::endl; } } - } else if (this->device_ == "XPU"){ - config.EnableXpu(10*1024*1024); + } else if (this->device_ == "XPU") { + config.EnableXpu(10 * 1024 * 1024); } else { config.DisableGpu(); if (this->use_mkldnn_) { @@ -94,8 +97,9 @@ void JDEDetector::LoadModel(const std::string& model_dir, // Visualiztion results cv::Mat VisualizeTrackResult(const cv::Mat& img, - const MOT_Result& results, - const float fps, const int frame_id) { + const MOT_Result& results, + const float fps, + const int frame_id) { cv::Mat vis_img = img.clone(); int im_h = img.rows; int im_w = img.cols; @@ -105,31 +109,34 @@ cv::Mat VisualizeTrackResult(const cv::Mat& img, std::ostringstream oss; oss << std::setiosflags(std::ios::fixed) << std::setprecision(4); - oss << "frame: " << frame_id<<" "; - oss << "fps: " << fps<<" "; + oss << "frame: " << frame_id << " "; + oss << "fps: " << fps << " "; oss << "num: " << results.size(); std::string text = oss.str(); cv::Point origin; origin.x = 0; origin.y = int(15 * text_scale); - cv::putText( - vis_img, - text, - origin, - cv::FONT_HERSHEY_PLAIN, - text_scale, (0, 0, 255), 2); + cv::putText(vis_img, + text, + origin, + cv::FONT_HERSHEY_PLAIN, + text_scale, + (0, 0, 255), + 2); for (int i = 0; i < results.size(); ++i) { const int obj_id = results[i].ids; const float score = results[i].score; - + cv::Scalar color = GetColor(obj_id); cv::Point pt1 = cv::Point(results[i].rects.left, results[i].rects.top); cv::Point pt2 = cv::Point(results[i].rects.right, results[i].rects.bottom); - cv::Point id_pt = cv::Point(results[i].rects.left, results[i].rects.top + 10); - cv::Point score_pt = cv::Point(results[i].rects.left, results[i].rects.top - 10); + cv::Point id_pt = + cv::Point(results[i].rects.left, results[i].rects.top + 10); + cv::Point score_pt = + cv::Point(results[i].rects.left, results[i].rects.top - 10); cv::rectangle(vis_img, pt1, pt2, color, line_thickness); std::ostringstream idoss; @@ -157,13 +164,13 @@ cv::Mat VisualizeTrackResult(const cv::Mat& img, text_scale, cv::Scalar(0, 255, 255), text_thickness); - } return vis_img; } - -void FilterDets(const float conf_thresh, const cv::Mat dets, std::vector* index) { +void FilterDets(const float conf_thresh, + const cv::Mat dets, + std::vector* index) { for (int i = 0; i < dets.rows; ++i) { float score = *dets.ptr(i, 4); if (score > conf_thresh) { @@ -178,9 +185,9 @@ void JDEDetector::Preprocess(const cv::Mat& ori_im) { preprocessor_.Run(&im, &inputs_); } -void JDEDetector::Postprocess( - const cv::Mat dets, const cv::Mat emb, - MOT_Result* result) { +void JDEDetector::Postprocess(const cv::Mat dets, + const cv::Mat emb, + MOT_Result* result) { result->clear(); std::vector tracks; std::vector valid; @@ -193,7 +200,7 @@ void JDEDetector::Postprocess( JDETracker::instance()->update(new_dets, new_emb, tracks); if (tracks.size() == 0) { MOT_Track mot_track; - MOT_Rect ret = {*dets.ptr(0, 0), + MOT_Rect ret = {*dets.ptr(0, 0), *dets.ptr(0, 1), *dets.ptr(0, 2), *dets.ptr(0, 3)}; @@ -213,26 +220,24 @@ void JDEDetector::Postprocess( float area = w * h; if (area > min_box_area_ && !vertical) { MOT_Track mot_track; - MOT_Rect ret = {titer->ltrb[0], - titer->ltrb[1], - titer->ltrb[2], - titer->ltrb[3]}; + MOT_Rect ret = { + titer->ltrb[0], titer->ltrb[1], titer->ltrb[2], titer->ltrb[3]}; mot_track.rects = ret; mot_track.score = titer->score; mot_track.ids = titer->id; result->push_back(mot_track); } } - } + } } } void JDEDetector::Predict(const std::vector imgs, - const double threshold, - const int warmup, - const int repeats, - MOT_Result* result, - std::vector* times) { + const double threshold, + const int warmup, + const int repeats, + MOT_Result* result, + std::vector* times) { auto preprocess_start = std::chrono::steady_clock::now(); int batch_size = imgs.size(); @@ -240,7 +245,7 @@ void JDEDetector::Predict(const std::vector imgs, std::vector in_data_all; std::vector im_shape_all(batch_size * 2); std::vector scale_factor_all(batch_size * 2); - + // Preprocess image for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) { cv::Mat im = imgs.at(bs_idx); @@ -252,7 +257,8 @@ void JDEDetector::Predict(const std::vector imgs, scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1]; // TODO: reduce cost time - in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); + in_data_all.insert( + in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); } // Prepare input tensor @@ -272,14 +278,13 @@ void JDEDetector::Predict(const std::vector imgs, in_tensor->CopyFromCpu(scale_factor_all.data()); } } - + auto preprocess_end = std::chrono::steady_clock::now(); std::vector bbox_shape; std::vector emb_shape; // Run predictor // warmup - for (int i = 0; i < warmup; i++) - { + for (int i = 0; i < warmup; i++) { predictor_->Run(); // Get output tensor auto output_names = predictor_->GetOutputNames(); @@ -299,15 +304,14 @@ void JDEDetector::Predict(const std::vector imgs, } bbox_data_.resize(bbox_size); - bbox_tensor->CopyToCpu(bbox_data_.data()); + bbox_tensor->CopyToCpu(bbox_data_.data()); emb_data_.resize(emb_size); emb_tensor->CopyToCpu(emb_data_.data()); } - + auto inference_start = std::chrono::steady_clock::now(); - for (int i = 0; i < repeats; i++) - { + for (int i = 0; i < repeats; i++) { predictor_->Run(); // Get output tensor auto output_names = predictor_->GetOutputNames(); @@ -327,7 +331,7 @@ void JDEDetector::Predict(const std::vector imgs, } bbox_data_.resize(bbox_size); - bbox_tensor->CopyToCpu(bbox_data_.data()); + bbox_tensor->CopyToCpu(bbox_data_.data()); emb_data_.resize(emb_size); emb_tensor->CopyToCpu(emb_data_.data()); @@ -344,19 +348,20 @@ void JDEDetector::Predict(const std::vector imgs, auto postprocess_end = std::chrono::steady_clock::now(); - std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; + std::chrono::duration preprocess_diff = + preprocess_end - preprocess_start; (*times)[0] += double(preprocess_diff.count() * 1000); std::chrono::duration inference_diff = inference_end - inference_start; (*times)[1] += double(inference_diff.count() * 1000); - std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; + std::chrono::duration postprocess_diff = + postprocess_end - postprocess_start; (*times)[2] += double(postprocess_diff.count() * 1000); } cv::Scalar GetColor(int idx) { idx = idx * 3; - cv::Scalar color = cv::Scalar((37 * idx) % 255, - (17 * idx) % 255, - (29 * idx) % 255); + cv::Scalar color = + cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255); return color; } diff --git a/deploy/cpp/src/keypoint_detector.cc b/deploy/cpp/src/keypoint_detector.cc index 788855e2e..18af79e31 100644 --- a/deploy/cpp/src/keypoint_detector.cc +++ b/deploy/cpp/src/keypoint_detector.cc @@ -33,7 +33,7 @@ void KeyPointDetector::LoadModel(const std::string& model_dir, config.EnableUseGpu(200, this->gpu_id_); config.SwitchIrOptim(true); // use tensorrt - if (run_mode != "fluid") { + if (run_mode != "paddle") { auto precision = paddle_infer::Config::Precision::kFloat32; if (run_mode == "trt_fp32") { precision = paddle_infer::Config::Precision::kFloat32; @@ -43,7 +43,8 @@ void KeyPointDetector::LoadModel(const std::string& model_dir, precision = paddle_infer::Config::Precision::kInt8; } else { printf( - "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'"); + "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or " + "'trt_int8'"); } // set tensorrt config.EnableTensorRtEngine(1 << 30, @@ -99,22 +100,22 @@ cv::Mat VisualizeKptsResult(const cv::Mat& img, const std::vector& results, const std::vector& colormap) { const int edge[][2] = {{0, 1}, - {0, 2}, - {1, 3}, - {2, 4}, - {3, 5}, - {4, 6}, - {5, 7}, - {6, 8}, - {7, 9}, - {8, 10}, - {5, 11}, - {6, 12}, - {11, 13}, - {12, 14}, - {13, 15}, - {14, 16}, - {11, 12}}; + {0, 2}, + {1, 3}, + {2, 4}, + {3, 5}, + {4, 6}, + {5, 7}, + {6, 8}, + {7, 9}, + {8, 10}, + {5, 11}, + {6, 12}, + {11, 13}, + {12, 14}, + {13, 15}, + {14, 16}, + {11, 12}}; cv::Mat vis_img = img.clone(); for (int batchid = 0; batchid < results.size(); batchid++) { for (int i = 0; i < results[batchid].num_joints; i++) { diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc index 058d7556d..6912031ba 100644 --- a/deploy/cpp/src/main.cc +++ b/deploy/cpp/src/main.cc @@ -14,14 +14,14 @@ #include +#include +#include +#include +#include #include +#include #include #include -#include -#include -#include -#include -#include #ifdef _WIN32 #include @@ -31,62 +31,86 @@ #include #endif -#include "include/object_detector.h" #include - +#include "include/object_detector.h" DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_string(image_file, "", "Path of input image"); -DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher priority."); +DEFINE_string(image_dir, + "", + "Dir of input image, `image_file` has a higher priority."); DEFINE_int32(batch_size, 1, "batch_size"); -DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority."); +DEFINE_string( + video_file, + "", + "Path of input video, `video_file` or `camera_id` has a highest priority."); DEFINE_int32(camera_id, -1, "Device id of camera to predict"); -DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); -DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); +DEFINE_bool( + use_gpu, + false, + "Deprecated, please use `--device` to set the device you want to run."); +DEFINE_string(device, + "CPU", + "Choose the device you want to run, it can be: CPU/GPU/XPU, " + "default is CPU."); DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_string(output_dir, "output", "Directory of output visualization files."); -DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); +DEFINE_string(run_mode, + "paddle", + "Mode of running(paddle/trt_fp32/trt_fp16/trt_int8)"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); -DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); +DEFINE_bool(run_benchmark, + false, + "Whether to predict a image_file repeatedly for benchmark"); DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI"); DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI"); DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI"); -DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True"); +DEFINE_bool(trt_calib_mode, + false, + "If the model is produced by TRT offline quantitative calibration, " + "trt_calib_mode need to set True"); -void PrintBenchmarkLog(std::vector det_time, int img_num){ +void PrintBenchmarkLog(std::vector det_time, int img_num) { LOG(INFO) << "----------------------- Config info -----------------------"; LOG(INFO) << "runtime_device: " << FLAGS_device; - LOG(INFO) << "ir_optim: " << "True"; - LOG(INFO) << "enable_memory_optim: " << "True"; + LOG(INFO) << "ir_optim: " + << "True"; + LOG(INFO) << "enable_memory_optim: " + << "True"; int has_trt = FLAGS_run_mode.find("trt"); if (has_trt >= 0) { - LOG(INFO) << "enable_tensorrt: " << "True"; + LOG(INFO) << "enable_tensorrt: " + << "True"; std::string precision = FLAGS_run_mode.substr(4, 8); LOG(INFO) << "precision: " << precision; } else { - LOG(INFO) << "enable_tensorrt: " << "False"; - LOG(INFO) << "precision: " << "fp32"; + LOG(INFO) << "enable_tensorrt: " + << "False"; + LOG(INFO) << "precision: " + << "fp32"; } LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads; LOG(INFO) << "----------------------- Data info -----------------------"; LOG(INFO) << "batch_size: " << FLAGS_batch_size; - LOG(INFO) << "input_shape: " << "dynamic shape"; + LOG(INFO) << "input_shape: " + << "dynamic shape"; LOG(INFO) << "----------------------- Model info -----------------------"; FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1); - LOG(INFO) << "model_name: " << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1); + LOG(INFO) << "model_name: " + << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1); LOG(INFO) << "----------------------- Perf info ------------------------"; LOG(INFO) << "Total number of predicted data: " << img_num << " and total time spent(ms): " << std::accumulate(det_time.begin(), det_time.end(), 0); LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num << ", inference_time(ms): " << det_time[1] / img_num - << ", postprocess_time(ms): " << det_time[2]; + << ", postprocess_time(ms): " << det_time[2] / img_num; } -static std::string DirName(const std::string &filepath) { +static std::string DirName(const std::string& filepath) { auto pos = filepath.rfind(OS_PATH_SEP); if (pos == std::string::npos) { return ""; @@ -94,7 +118,7 @@ static std::string DirName(const std::string &filepath) { return filepath.substr(0, pos); } -static bool PathExists(const std::string& path){ +static bool PathExists(const std::string& path) { #ifdef _WIN32 struct _stat buffer; return (_stat(path.c_str(), &buffer) == 0); @@ -133,11 +157,12 @@ void PredictVideo(const std::string& video_path, // Open video cv::VideoCapture capture; std::string video_out_name = "output.mp4"; - if (FLAGS_camera_id != -1){ + if (FLAGS_camera_id != -1) { capture.open(FLAGS_camera_id); - }else{ + } else { capture.open(video_path.c_str()); - video_out_name = video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1); + video_out_name = + video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1); } if (!capture.isOpened()) { printf("can not open video : %s\n", video_path.c_str()); @@ -148,7 +173,8 @@ void PredictVideo(const std::string& video_path, int video_width = static_cast(capture.get(CV_CAP_PROP_FRAME_WIDTH)); int video_height = static_cast(capture.get(CV_CAP_PROP_FRAME_HEIGHT)); int video_fps = static_cast(capture.get(CV_CAP_PROP_FPS)); - int video_frame_count = static_cast(capture.get(CV_CAP_PROP_FRAME_COUNT)); + int video_frame_count = + static_cast(capture.get(CV_CAP_PROP_FRAME_COUNT)); printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count); // Create VideoWriter for output @@ -188,35 +214,34 @@ void PredictVideo(const std::string& video_path, std::vector out_result; for (const auto& item : result) { if (item.confidence < FLAGS_threshold || item.class_id == -1) { - continue; + continue; } out_result.push_back(item); - if (item.rect.size() > 6){ - is_rbox = true; - printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3], - item.rect[4], - item.rect[5], - item.rect[6], - item.rect[7]); - } - else{ + if (item.rect.size() > 6) { + is_rbox = true; + printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3], + item.rect[4], + item.rect[5], + item.rect[6], + item.rect[7]); + } else { printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3]); + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3]); } - } + } - cv::Mat out_im = PaddleDetection::VisualizeResult( + cv::Mat out_im = PaddleDetection::VisualizeResult( frame, out_result, labels, colormap, is_rbox); video_out.write(out_im); @@ -235,7 +260,9 @@ void PredictImage(const std::vector all_img_paths, std::vector det_t = {0, 0, 0}; int steps = ceil(float(all_img_paths.size()) / batch_size); printf("total images = %d, batch_size = %d, total steps = %d\n", - all_img_paths.size(), batch_size, steps); + all_img_paths.size(), + batch_size, + steps); for (int idx = 0; idx < steps; idx++) { std::vector batch_imgs; int left_image_cnt = all_img_paths.size() - idx * batch_size; @@ -243,18 +270,19 @@ void PredictImage(const std::vector all_img_paths, left_image_cnt = batch_size; } for (int bs = 0; bs < left_image_cnt; bs++) { - std::string image_file_path = all_img_paths.at(idx * batch_size+bs); + std::string image_file_path = all_img_paths.at(idx * batch_size + bs); cv::Mat im = cv::imread(image_file_path, 1); batch_imgs.insert(batch_imgs.end(), im); } - + // Store all detected result std::vector result; std::vector bbox_num; std::vector det_times; bool is_rbox = false; if (run_benchmark) { - det->Predict(batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times); + det->Predict( + batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times); } else { det->Predict(batch_imgs, threshold, 0, 1, &result, &bbox_num, &det_times); // get labels and colormap @@ -274,31 +302,31 @@ void PredictImage(const std::vector all_img_paths, } detect_num += 1; im_result.push_back(item); - if (item.rect.size() > 6){ + if (item.rect.size() > 6) { is_rbox = true; printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3], - item.rect[4], - item.rect[5], - item.rect[6], - item.rect[7]); - } - else{ + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3], + item.rect[4], + item.rect[5], + item.rect[6], + item.rect[7]); + } else { printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3]); + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3]); } } - std::cout << all_img_paths.at(idx * batch_size + i) << " The number of detected box: " << detect_num << std::endl; + std::cout << all_img_paths.at(idx * batch_size + i) + << " The number of detected box: " << detect_num << std::endl; item_start_idx = item_start_idx + bbox_num[i]; // Visualization result cv::Mat vis_img = PaddleDetection::VisualizeResult( @@ -311,14 +339,16 @@ void PredictImage(const std::vector all_img_paths, output_path += OS_PATH_SEP; } std::string image_file_path = all_img_paths.at(idx * batch_size + i); - output_path += image_file_path.substr(image_file_path.find_last_of('/') + 1); + output_path += + image_file_path.substr(image_file_path.find_last_of('/') + 1); cv::imwrite(output_path, vis_img, compression_params); - printf("Visualized output saved as %s\n", output_path.c_str()); + printf("Visualized output saved as %s\n", output_path.c_str()); } } det_t[0] += det_times[0]; det_t[1] += det_times[1]; det_t[2] += det_times[2]; + det_times.clear(); } PrintBenchmarkLog(det_t, all_img_paths.size()); } @@ -326,34 +356,48 @@ void PredictImage(const std::vector all_img_paths, int main(int argc, char** argv) { // Parsing command-line google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_model_dir.empty() - || (FLAGS_image_file.empty() && FLAGS_image_dir.empty() && FLAGS_video_file.empty())) { + if (FLAGS_model_dir.empty() || + (FLAGS_image_file.empty() && FLAGS_image_dir.empty() && + FLAGS_video_file.empty())) { std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ " - << "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl; + << "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl; return -1; } - if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" - || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { - std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; + if (!(FLAGS_run_mode == "paddle" || FLAGS_run_mode == "trt_fp32" || + FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { + std::cout + << "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; return -1; } - transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); - if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { + transform(FLAGS_device.begin(), + FLAGS_device.end(), + FLAGS_device.begin(), + ::toupper); + if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || + FLAGS_device == "XPU")) { std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; return -1; } if (FLAGS_use_gpu) { - std::cout << "Deprecated, please use `--device` to set the device you want to run."; + std::cout << "Deprecated, please use `--device` to set the device you want " + "to run."; return -1; } // Load model and create a object detector - PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn, - FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, - FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, - FLAGS_trt_calib_mode); + PaddleDetection::ObjectDetector det(FLAGS_model_dir, + FLAGS_device, + FLAGS_use_mkldnn, + FLAGS_cpu_threads, + FLAGS_run_mode, + FLAGS_batch_size, + FLAGS_gpu_id, + FLAGS_trt_min_shape, + FLAGS_trt_max_shape, + FLAGS_trt_opt_shape, + FLAGS_trt_calib_mode); // Do inference on input video or image if (!PathExists(FLAGS_output_dir)) { - MkDirs(FLAGS_output_dir); + MkDirs(FLAGS_output_dir); } if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) { PredictVideo(FLAGS_video_file, &det, FLAGS_output_dir); @@ -363,17 +407,22 @@ int main(int argc, char** argv) { if (!FLAGS_image_file.empty()) { all_img_paths.push_back(FLAGS_image_file); if (FLAGS_batch_size > 1) { - std::cout << "batch_size should be 1, when set `image_file`." << std::endl; - return -1; + std::cout << "batch_size should be 1, when set `image_file`." + << std::endl; + return -1; } } else { - cv::glob(FLAGS_image_dir, cv_all_img_paths); - for (const auto & img_path : cv_all_img_paths) { - all_img_paths.push_back(img_path); - } + cv::glob(FLAGS_image_dir, cv_all_img_paths); + for (const auto& img_path : cv_all_img_paths) { + all_img_paths.push_back(img_path); + } } - PredictImage(all_img_paths, FLAGS_batch_size, FLAGS_threshold, - FLAGS_run_benchmark, &det, FLAGS_output_dir); + PredictImage(all_img_paths, + FLAGS_batch_size, + FLAGS_threshold, + FLAGS_run_benchmark, + &det, + FLAGS_output_dir); } return 0; } diff --git a/deploy/cpp/src/main_jde.cc b/deploy/cpp/src/main_jde.cc index 8010f8086..3bba98dd4 100644 --- a/deploy/cpp/src/main_jde.cc +++ b/deploy/cpp/src/main_jde.cc @@ -14,14 +14,14 @@ #include +#include +#include +#include +#include #include +#include #include #include -#include -#include -#include -#include -#include #ifdef _WIN32 #include @@ -31,52 +31,74 @@ #include #endif -#include "include/object_detector.h" -#include "include/jde_detector.h" #include #include - +#include "include/jde_detector.h" +#include "include/object_detector.h" DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_int32(batch_size, 1, "batch_size"); -DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority."); +DEFINE_string( + video_file, + "", + "Path of input video, `video_file` or `camera_id` has a highest priority."); DEFINE_int32(camera_id, -1, "Device id of camera to predict"); -DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); -DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); +DEFINE_bool( + use_gpu, + false, + "Deprecated, please use `--device` to set the device you want to run."); +DEFINE_string(device, + "CPU", + "Choose the device you want to run, it can be: CPU/GPU/XPU, " + "default is CPU."); DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_string(output_dir, "output", "Directory of output visualization files."); -DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); +DEFINE_string(run_mode, + "paddle", + "Mode of running(paddle/trt_fp32/trt_fp16/trt_int8)"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); -DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); +DEFINE_bool(run_benchmark, + false, + "Whether to predict a image_file repeatedly for benchmark"); DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI"); DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI"); DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI"); -DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True"); +DEFINE_bool(trt_calib_mode, + false, + "If the model is produced by TRT offline quantitative calibration, " + "trt_calib_mode need to set True"); -void PrintBenchmarkLog(std::vector det_time, int img_num){ +void PrintBenchmarkLog(std::vector det_time, int img_num) { LOG(INFO) << "----------------------- Config info -----------------------"; LOG(INFO) << "runtime_device: " << FLAGS_device; - LOG(INFO) << "ir_optim: " << "True"; - LOG(INFO) << "enable_memory_optim: " << "True"; + LOG(INFO) << "ir_optim: " + << "True"; + LOG(INFO) << "enable_memory_optim: " + << "True"; int has_trt = FLAGS_run_mode.find("trt"); if (has_trt >= 0) { - LOG(INFO) << "enable_tensorrt: " << "True"; + LOG(INFO) << "enable_tensorrt: " + << "True"; std::string precision = FLAGS_run_mode.substr(4, 8); LOG(INFO) << "precision: " << precision; } else { - LOG(INFO) << "enable_tensorrt: " << "False"; - LOG(INFO) << "precision: " << "fp32"; + LOG(INFO) << "enable_tensorrt: " + << "False"; + LOG(INFO) << "precision: " + << "fp32"; } LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads; LOG(INFO) << "----------------------- Data info -----------------------"; LOG(INFO) << "batch_size: " << FLAGS_batch_size; - LOG(INFO) << "input_shape: " << "dynamic shape"; + LOG(INFO) << "input_shape: " + << "dynamic shape"; LOG(INFO) << "----------------------- Model info -----------------------"; FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1); - LOG(INFO) << "model_name: " << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1); + LOG(INFO) << "model_name: " + << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1); LOG(INFO) << "----------------------- Perf info ------------------------"; LOG(INFO) << "Total number of predicted data: " << img_num << " and total time spent(ms): " @@ -86,7 +108,7 @@ void PrintBenchmarkLog(std::vector det_time, int img_num){ << ", postprocess_time(ms): " << det_time[2] / img_num; } -static std::string DirName(const std::string &filepath) { +static std::string DirName(const std::string& filepath) { auto pos = filepath.rfind(OS_PATH_SEP); if (pos == std::string::npos) { return ""; @@ -94,7 +116,7 @@ static std::string DirName(const std::string &filepath) { return filepath.substr(0, pos); } -static bool PathExists(const std::string& path){ +static bool PathExists(const std::string& path) { #ifdef _WIN32 struct _stat buffer; return (_stat(path.c_str(), &buffer) == 0); @@ -133,11 +155,12 @@ void PredictVideo(const std::string& video_path, // Open video cv::VideoCapture capture; std::string video_out_name = "output.mp4"; - if (FLAGS_camera_id != -1){ + if (FLAGS_camera_id != -1) { capture.open(FLAGS_camera_id); - }else{ + } else { capture.open(video_path.c_str()); - video_out_name = video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1); + video_out_name = + video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1); } if (!capture.isOpened()) { printf("can not open video : %s\n", video_path.c_str()); @@ -148,7 +171,8 @@ void PredictVideo(const std::string& video_path, int video_width = static_cast(capture.get(CV_CAP_PROP_FRAME_WIDTH)); int video_height = static_cast(capture.get(CV_CAP_PROP_FRAME_HEIGHT)); int video_fps = static_cast(capture.get(CV_CAP_PROP_FPS)); - int video_frame_count = static_cast(capture.get(CV_CAP_PROP_FRAME_COUNT)); + int video_frame_count = + static_cast(capture.get(CV_CAP_PROP_FRAME_COUNT)); printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count); // Create VideoWriter for output @@ -186,47 +210,59 @@ void PredictVideo(const std::string& video_path, times = std::accumulate(det_times.begin(), det_times.end(), 0) / frame_id; cv::Mat out_im = PaddleDetection::VisualizeTrackResult( - frame, result, 1000./times, frame_id); - + frame, result, 1000. / times, frame_id); + video_out.write(out_im); } capture.release(); video_out.release(); PrintBenchmarkLog(det_times, frame_id); - printf("Visualized output saved as %s\n", video_out_path.c_str()); + printf("Visualized output saved as %s\n", video_out_path.c_str()); } int main(int argc, char** argv) { // Parsing command-line google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_model_dir.empty() - || FLAGS_video_file.empty()) { + if (FLAGS_model_dir.empty() || FLAGS_video_file.empty()) { std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ " - << "--video_file=/PATH/TO/INPUT/VIDEO/" << std::endl; + << "--video_file=/PATH/TO/INPUT/VIDEO/" << std::endl; return -1; } - if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" - || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { - std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; + if (!(FLAGS_run_mode == "paddle" || FLAGS_run_mode == "trt_fp32" || + FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { + std::cout + << "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; return -1; } - transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); - if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { + transform(FLAGS_device.begin(), + FLAGS_device.end(), + FLAGS_device.begin(), + ::toupper); + if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || + FLAGS_device == "XPU")) { std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; return -1; } if (FLAGS_use_gpu) { - std::cout << "Deprecated, please use `--device` to set the device you want to run."; + std::cout << "Deprecated, please use `--device` to set the device you want " + "to run."; return -1; } // Do inference on input video or image - PaddleDetection::JDEDetector mot(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn, - FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, - FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, - FLAGS_trt_calib_mode); + PaddleDetection::JDEDetector mot(FLAGS_model_dir, + FLAGS_device, + FLAGS_use_mkldnn, + FLAGS_cpu_threads, + FLAGS_run_mode, + FLAGS_batch_size, + FLAGS_gpu_id, + FLAGS_trt_min_shape, + FLAGS_trt_max_shape, + FLAGS_trt_opt_shape, + FLAGS_trt_calib_mode); if (!PathExists(FLAGS_output_dir)) { - MkDirs(FLAGS_output_dir); + MkDirs(FLAGS_output_dir); } PredictVideo(FLAGS_video_file, &mot, FLAGS_output_dir); return 0; diff --git a/deploy/cpp/src/main_keypoint.cc b/deploy/cpp/src/main_keypoint.cc index 968e1b067..7701d5ebb 100644 --- a/deploy/cpp/src/main_keypoint.cc +++ b/deploy/cpp/src/main_keypoint.cc @@ -62,8 +62,8 @@ DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_double(threshold_keypoint, 0.5, "Threshold of score."); DEFINE_string(output_dir, "output", "Directory of output visualization files."); DEFINE_string(run_mode, - "fluid", - "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); + "paddle", + "Mode of running(paddle/trt_fp32/trt_fp16/trt_int8)"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); DEFINE_bool(run_benchmark, false, @@ -505,10 +505,10 @@ int main(int argc, char** argv) { << "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl; return -1; } - if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" || + if (!(FLAGS_run_mode == "paddle" || FLAGS_run_mode == "trt_fp32" || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { std::cout - << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; + << "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; return -1; } transform(FLAGS_device.begin(), diff --git a/deploy/cpp/src/object_detector.cc b/deploy/cpp/src/object_detector.cc index 134c01092..a99fcd515 100644 --- a/deploy/cpp/src/object_detector.cc +++ b/deploy/cpp/src/object_detector.cc @@ -13,8 +13,8 @@ // limitations under the License. #include // for setprecision -#include #include +#include #include "include/object_detector.h" using namespace paddle_infer; @@ -33,47 +33,51 @@ void ObjectDetector::LoadModel(const std::string& model_dir, config.EnableUseGpu(200, this->gpu_id_); config.SwitchIrOptim(true); // use tensorrt - if (run_mode != "fluid") { + if (run_mode != "paddle") { auto precision = paddle_infer::Config::Precision::kFloat32; if (run_mode == "trt_fp32") { precision = paddle_infer::Config::Precision::kFloat32; - } - else if (run_mode == "trt_fp16") { + } else if (run_mode == "trt_fp16") { precision = paddle_infer::Config::Precision::kHalf; - } - else if (run_mode == "trt_int8") { + } else if (run_mode == "trt_int8") { precision = paddle_infer::Config::Precision::kInt8; } else { - printf("run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'"); + printf( + "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or " + "'trt_int8'"); } // set tensorrt - config.EnableTensorRtEngine( - 1 << 30, - batch_size, - this->min_subgraph_size_, - precision, - false, - this->trt_calib_mode_); + config.EnableTensorRtEngine(1 << 30, + batch_size, + this->min_subgraph_size_, + precision, + false, + this->trt_calib_mode_); // set use dynamic shape if (this->use_dynamic_shape_) { // set DynamicShsape for image tensor - const std::vector min_input_shape = {1, 3, this->trt_min_shape_, this->trt_min_shape_}; - const std::vector max_input_shape = {1, 3, this->trt_max_shape_, this->trt_max_shape_}; - const std::vector opt_input_shape = {1, 3, this->trt_opt_shape_, this->trt_opt_shape_}; - const std::map> map_min_input_shape = {{"image", min_input_shape}}; - const std::map> map_max_input_shape = {{"image", max_input_shape}}; - const std::map> map_opt_input_shape = {{"image", opt_input_shape}}; + const std::vector min_input_shape = { + 1, 3, this->trt_min_shape_, this->trt_min_shape_}; + const std::vector max_input_shape = { + 1, 3, this->trt_max_shape_, this->trt_max_shape_}; + const std::vector opt_input_shape = { + 1, 3, this->trt_opt_shape_, this->trt_opt_shape_}; + const std::map> map_min_input_shape = { + {"image", min_input_shape}}; + const std::map> map_max_input_shape = { + {"image", max_input_shape}}; + const std::map> map_opt_input_shape = { + {"image", opt_input_shape}}; - config.SetTRTDynamicShapeInfo(map_min_input_shape, - map_max_input_shape, - map_opt_input_shape); + config.SetTRTDynamicShapeInfo( + map_min_input_shape, map_max_input_shape, map_opt_input_shape); std::cout << "TensorRT dynamic shape enabled" << std::endl; } } - } else if (this->device_ == "XPU"){ - config.EnableXpu(10*1024*1024); + } else if (this->device_ == "XPU") { + config.EnableXpu(10 * 1024 * 1024); } else { config.DisableGpu(); if (this->use_mkldnn_) { @@ -92,11 +96,12 @@ void ObjectDetector::LoadModel(const std::string& model_dir, } // Visualiztion MaskDetector results -cv::Mat VisualizeResult(const cv::Mat& img, - const std::vector& results, - const std::vector& lables, - const std::vector& colormap, - const bool is_rbox=false) { +cv::Mat VisualizeResult( + const cv::Mat& img, + const std::vector& results, + const std::vector& lables, + const std::vector& colormap, + const bool is_rbox = false) { cv::Mat vis_img = img.clone(); for (int i = 0; i < results.size(); ++i) { // Configure color and text size @@ -112,32 +117,25 @@ cv::Mat VisualizeResult(const cv::Mat& img, int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL; double font_scale = 0.5f; float thickness = 0.5; - cv::Size text_size = cv::getTextSize(text, - font_face, - font_scale, - thickness, - nullptr); + cv::Size text_size = + cv::getTextSize(text, font_face, font_scale, thickness, nullptr); cv::Point origin; - if (is_rbox) - { - // Draw object, text, and background - for (int k = 0; k < 4; k++) - { - cv::Point pt1 = cv::Point(results[i].rect[(k * 2) % 8], - results[i].rect[(k * 2 + 1) % 8]); - cv::Point pt2 = cv::Point(results[i].rect[(k * 2 + 2) % 8], - results[i].rect[(k * 2 + 3) % 8]); - cv::line(vis_img, pt1, pt2, roi_color, 2); - } - } - else - { - int w = results[i].rect[2] - results[i].rect[0]; - int h = results[i].rect[3] - results[i].rect[1]; - cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[1], w, h); - // Draw roi object, text, and background - cv::rectangle(vis_img, roi, roi_color, 2); + if (is_rbox) { + // Draw object, text, and background + for (int k = 0; k < 4; k++) { + cv::Point pt1 = cv::Point(results[i].rect[(k * 2) % 8], + results[i].rect[(k * 2 + 1) % 8]); + cv::Point pt2 = cv::Point(results[i].rect[(k * 2 + 2) % 8], + results[i].rect[(k * 2 + 3) % 8]); + cv::line(vis_img, pt1, pt2, roi_color, 2); + } + } else { + int w = results[i].rect[2] - results[i].rect[0]; + int h = results[i].rect[3] - results[i].rect[1]; + cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[1], w, h); + // Draw roi object, text, and background + cv::rectangle(vis_img, roi, roi_color, 2); } origin.x = results[i].rect[0]; @@ -173,7 +171,7 @@ void ObjectDetector::Postprocess( std::vector* result, std::vector bbox_num, std::vector output_data_, - bool is_rbox=false) { + bool is_rbox = false) { result->clear(); int start_idx = 0; for (int im_id = 0; im_id < mats.size(); im_id++) { @@ -184,7 +182,7 @@ void ObjectDetector::Postprocess( rh = raw_mat.rows; rw = raw_mat.cols; } - for (int j = start_idx; j < start_idx+bbox_num[im_id]; j++) { + for (int j = start_idx; j < start_idx + bbox_num[im_id]; j++) { if (is_rbox) { // Class id int class_id = static_cast(round(output_data_[0 + j * 10])); @@ -198,14 +196,13 @@ void ObjectDetector::Postprocess( int y3 = (output_data_[7 + j * 10] * rh); int x4 = (output_data_[8 + j * 10] * rw); int y4 = (output_data_[9 + j * 10] * rh); - + PaddleDetection::ObjectResult result_item; result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4}; result_item.class_id = class_id; result_item.confidence = score; result->push_back(result_item); - } - else { + } else { // Class id int class_id = static_cast(round(output_data_[0 + j * 6])); // Confidence score @@ -216,7 +213,7 @@ void ObjectDetector::Postprocess( int ymax = (output_data_[5 + j * 6] * rh); int wd = xmax - xmin; int hd = ymax - ymin; - + PaddleDetection::ObjectResult result_item; result_item.rect = {xmin, ymin, xmax, ymax}; result_item.class_id = class_id; @@ -229,12 +226,12 @@ void ObjectDetector::Postprocess( } void ObjectDetector::Predict(const std::vector imgs, - const double threshold, - const int warmup, - const int repeats, - std::vector* result, - std::vector* bbox_num, - std::vector* times) { + const double threshold, + const int warmup, + const int repeats, + std::vector* result, + std::vector* bbox_num, + std::vector* times) { auto preprocess_start = std::chrono::steady_clock::now(); int batch_size = imgs.size(); @@ -242,9 +239,12 @@ void ObjectDetector::Predict(const std::vector imgs, std::vector in_data_all; std::vector im_shape_all(batch_size * 2); std::vector scale_factor_all(batch_size * 2); - std::vector output_data_list_; + std::vector output_data_list_; std::vector out_bbox_num_data_; - + + // in_net img for each batch + std::vector in_net_img_all(batch_size); + // Preprocess image for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) { cv::Mat im = imgs.at(bs_idx); @@ -256,11 +256,39 @@ void ObjectDetector::Predict(const std::vector imgs, scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1]; // TODO: reduce cost time - in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); + in_data_all.insert( + in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); + + // collect in_net img + in_net_img_all[bs_idx] = inputs_.in_net_im_; } + + // Pad Batch if batch size > 1 + if (batch_size > 1 && CheckDynamicInput(in_net_img_all)) { + in_data_all.clear(); + std::vector pad_img_all = PadBatch(in_net_img_all); + int rh = pad_img_all[0].rows; + int rw = pad_img_all[0].cols; + int rc = pad_img_all[0].channels(); + + for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) { + cv::Mat pad_img = pad_img_all[bs_idx]; + pad_img.convertTo(pad_img, CV_32FC3); + std::vector pad_data; + pad_data.resize(rc * rh * rw); + float* base = pad_data.data(); + for (int i = 0; i < rc; ++i) { + cv::extractChannel( + pad_img, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i); + } + in_data_all.insert(in_data_all.end(), pad_data.begin(), pad_data.end()); + } + // update in_net_shape + inputs_.in_net_shape_ = {static_cast(rh), static_cast(rw)}; + } + auto preprocess_end = std::chrono::steady_clock::now(); // Prepare input tensor - auto input_names = predictor_->GetInputNames(); for (const auto& tensor_name : input_names) { auto in_tensor = predictor_->GetInputHandle(tensor_name); @@ -277,7 +305,7 @@ void ObjectDetector::Predict(const std::vector imgs, in_tensor->CopyFromCpu(scale_factor_all.data()); } } - + // Run predictor std::vector> out_tensor_list; std::vector> output_shape_list; @@ -292,8 +320,8 @@ void ObjectDetector::Predict(const std::vector imgs, for (int j = 0; j < output_names.size(); j++) { auto output_tensor = predictor_->GetOutputHandle(output_names[j]); std::vector output_shape = output_tensor->shape(); - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), - 1, std::multiplies()); + int out_num = std::accumulate( + output_shape.begin(), output_shape.end(), 1, std::multiplies()); if (output_tensor->type() == paddle_infer::DataType::INT32) { out_bbox_num_data_.resize(out_num); output_tensor->CopyToCpu(out_bbox_num_data_.data()); @@ -316,8 +344,8 @@ void ObjectDetector::Predict(const std::vector imgs, for (int j = 0; j < output_names.size(); j++) { auto output_tensor = predictor_->GetOutputHandle(output_names[j]); std::vector output_shape = output_tensor->shape(); - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), - 1, std::multiplies()); + int out_num = std::accumulate( + output_shape.begin(), output_shape.end(), 1, std::multiplies()); output_shape_list.push_back(output_shape); if (output_tensor->type() == paddle_infer::DataType::INT32) { out_bbox_num_data_.resize(out_num); @@ -343,35 +371,43 @@ void ObjectDetector::Predict(const std::vector imgs, if (i == config_.fpn_stride_.size()) { reg_max = output_shape_list[i][2] / 4 - 1; } - float *buffer = new float[out_tensor_list[i].size()]; - memcpy(buffer, &out_tensor_list[i][0], - out_tensor_list[i].size()*sizeof(float)); + float* buffer = new float[out_tensor_list[i].size()]; + memcpy(buffer, + &out_tensor_list[i][0], + out_tensor_list[i].size() * sizeof(float)); output_data_list_.push_back(buffer); } PaddleDetection::PicoDetPostProcess( - result, output_data_list_, config_.fpn_stride_, - inputs_.im_shape_, inputs_.scale_factor_, - config_.nms_info_["score_threshold"].as(), - config_.nms_info_["nms_threshold"].as(), num_class, reg_max); + result, + output_data_list_, + config_.fpn_stride_, + inputs_.im_shape_, + inputs_.scale_factor_, + config_.nms_info_["score_threshold"].as(), + config_.nms_info_["nms_threshold"].as(), + num_class, + reg_max); bbox_num->push_back(result->size()); } else { - is_rbox = output_shape_list[0][output_shape_list[0].size()-1] % 10 == 0; + is_rbox = output_shape_list[0][output_shape_list[0].size() - 1] % 10 == 0; Postprocess(imgs, result, out_bbox_num_data_, out_tensor_list[0], is_rbox); - for (int k=0; k < out_bbox_num_data_.size(); k++) { + for (int k = 0; k < out_bbox_num_data_.size(); k++) { int tmp = out_bbox_num_data_[k]; bbox_num->push_back(tmp); } } - + auto postprocess_end = std::chrono::steady_clock::now(); - std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; - times->push_back(double(preprocess_diff.count() * 1000)); + std::chrono::duration preprocess_diff = + preprocess_end - preprocess_start; + times->push_back(static_cast(preprocess_diff.count() * 1000)); std::chrono::duration inference_diff = inference_end - inference_start; - times->push_back(double(inference_diff.count() / repeats * 1000)); - std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; - times->push_back(double(postprocess_diff.count() * 1000)); - + times->push_back( + static_cast(inference_diff.count() / repeats * 1000)); + std::chrono::duration postprocess_diff = + postprocess_end - postprocess_start; + times->push_back(static_cast(postprocess_diff.count() * 1000)); } std::vector GenerateColorMap(int num_class) { diff --git a/deploy/cpp/src/preprocess_op.cc b/deploy/cpp/src/preprocess_op.cc index e7035d3c1..4ac3daa30 100644 --- a/deploy/cpp/src/preprocess_op.cc +++ b/deploy/cpp/src/preprocess_op.cc @@ -12,24 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include +#include #include "include/preprocess_op.h" namespace PaddleDetection { void InitInfo::Run(cv::Mat* im, ImageBlob* data) { - data->im_shape_ = { - static_cast(im->rows), - static_cast(im->cols) - }; + data->im_shape_ = {static_cast(im->rows), + static_cast(im->cols)}; data->scale_factor_ = {1., 1.}; - data->in_net_shape_ = { - static_cast(im->rows), - static_cast(im->cols) - }; + data->in_net_shape_ = {static_cast(im->rows), + static_cast(im->cols)}; } void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) { @@ -41,11 +37,11 @@ void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) { for (int h = 0; h < im->rows; h++) { for (int w = 0; w < im->cols; w++) { im->at(h, w)[0] = - (im->at(h, w)[0] - mean_[0] ) / scale_[0]; + (im->at(h, w)[0] - mean_[0]) / scale_[0]; im->at(h, w)[1] = - (im->at(h, w)[1] - mean_[1] ) / scale_[1]; + (im->at(h, w)[1] - mean_[1]) / scale_[1]; im->at(h, w)[2] = - (im->at(h, w)[2] - mean_[2] ) / scale_[2]; + (im->at(h, w)[2] - mean_[2]) / scale_[2]; } } } @@ -64,27 +60,20 @@ void Permute::Run(cv::Mat* im, ImageBlob* data) { void Resize::Run(cv::Mat* im, ImageBlob* data) { auto resize_scale = GenerateScale(*im); - data->im_shape_ = { - static_cast(im->cols * resize_scale.first), - static_cast(im->rows * resize_scale.second) - }; - data->in_net_shape_ = { - static_cast(im->cols * resize_scale.first), - static_cast(im->rows * resize_scale.second) - }; + data->im_shape_ = {static_cast(im->cols * resize_scale.first), + static_cast(im->rows * resize_scale.second)}; + data->in_net_shape_ = {static_cast(im->cols * resize_scale.first), + static_cast(im->rows * resize_scale.second)}; cv::resize( *im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_); data->im_shape_ = { - static_cast(im->rows), - static_cast(im->cols), + static_cast(im->rows), static_cast(im->cols), }; data->scale_factor_ = { - resize_scale.second, - resize_scale.first, + resize_scale.second, resize_scale.first, }; } - std::pair Resize::GenerateScale(const cv::Mat& im) { std::pair resize_scale; int origin_w = im.cols; @@ -93,8 +82,10 @@ std::pair Resize::GenerateScale(const cv::Mat& im) { if (keep_ratio_) { int im_size_max = std::max(origin_w, origin_h); int im_size_min = std::min(origin_w, origin_h); - int target_size_max = *std::max_element(target_size_.begin(), target_size_.end()); - int target_size_min = *std::min_element(target_size_.begin(), target_size_.end()); + int target_size_max = + *std::max_element(target_size_.begin(), target_size_.end()); + int target_size_min = + *std::min_element(target_size_.begin(), target_size_.end()); float scale_min = static_cast(target_size_min) / static_cast(im_size_min); float scale_max = @@ -114,46 +105,38 @@ void LetterBoxResize::Run(cv::Mat* im, ImageBlob* data) { float resize_scale = GenerateScale(*im); int new_shape_w = std::round(im->cols * resize_scale); int new_shape_h = std::round(im->rows * resize_scale); - data->im_shape_ = { - static_cast(new_shape_h), - static_cast(new_shape_w) - }; + data->im_shape_ = {static_cast(new_shape_h), + static_cast(new_shape_w)}; float padw = (target_size_[1] - new_shape_w) / 2.; float padh = (target_size_[0] - new_shape_h) / 2.; - + int top = std::round(padh - 0.1); int bottom = std::round(padh + 0.1); int left = std::round(padw - 0.1); int right = std::round(padw + 0.1); cv::resize( - *im, *im, cv::Size(new_shape_w, new_shape_h), 0, 0, cv::INTER_AREA); + *im, *im, cv::Size(new_shape_w, new_shape_h), 0, 0, cv::INTER_AREA); data->in_net_shape_ = { - static_cast(im->rows), - static_cast(im->cols), + static_cast(im->rows), static_cast(im->cols), }; - cv::copyMakeBorder( - *im, - *im, - top, - bottom, - left, - right, - cv::BORDER_CONSTANT, - cv::Scalar(127.5)); + cv::copyMakeBorder(*im, + *im, + top, + bottom, + left, + right, + cv::BORDER_CONSTANT, + cv::Scalar(127.5)); data->in_net_shape_ = { - static_cast(im->rows), - static_cast(im->cols), + static_cast(im->rows), static_cast(im->cols), }; data->scale_factor_ = { - resize_scale, - resize_scale, + resize_scale, resize_scale, }; - - } float LetterBoxResize::GenerateScale(const cv::Mat& im) { @@ -165,7 +148,7 @@ float LetterBoxResize::GenerateScale(const cv::Mat& im) { float ratio_h = static_cast(target_h) / static_cast(origin_h); float ratio_w = static_cast(target_w) / static_cast(origin_w); - float resize_scale = std::min(ratio_h, ratio_w); + float resize_scale = std::min(ratio_h, ratio_w); return resize_scale; } @@ -179,34 +162,29 @@ void PadStride::Run(cv::Mat* im, ImageBlob* data) { int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_; int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_; cv::copyMakeBorder( - *im, - *im, - 0, - nh - rh, - 0, - nw - rw, - cv::BORDER_CONSTANT, - cv::Scalar(0)); + *im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0)); + data->in_net_im_ = im->clone(); data->in_net_shape_ = { - static_cast(im->rows), - static_cast(im->cols), + static_cast(im->rows), static_cast(im->cols), }; } void TopDownEvalAffine::Run(cv::Mat* im, ImageBlob* data) { - cv::resize( - *im, *im, cv::Size(trainsize_[0],trainsize_[1]), 0, 0, interp_); + cv::resize(*im, *im, cv::Size(trainsize_[0], trainsize_[1]), 0, 0, interp_); // todo: Simd::ResizeBilinear(); data->in_net_shape_ = { - static_cast(trainsize_[1]), - static_cast(trainsize_[0]), + static_cast(trainsize_[1]), static_cast(trainsize_[0]), }; } // Preprocessor op running order -const std::vector Preprocessor::RUN_ORDER = { - "InitInfo", "TopDownEvalAffine", "Resize", "LetterBoxResize", "NormalizeImage", "PadStride", "Permute" -}; +const std::vector Preprocessor::RUN_ORDER = {"InitInfo", + "TopDownEvalAffine", + "Resize", + "LetterBoxResize", + "NormalizeImage", + "PadStride", + "Permute"}; void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { for (const auto& name : RUN_ORDER) { @@ -216,37 +194,87 @@ void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { } } -void CropImg(cv::Mat &img, cv::Mat &crop_img, std::vector &area, std::vector ¢er, std::vector &scale, float expandratio) { - int crop_x1 = std::max(0, area[0]); - int crop_y1 = std::max(0, area[1]); - int crop_x2 = std::min(img.cols -1, area[2]); - int crop_y2 = std::min(img.rows - 1, area[3]); - int center_x = (crop_x1 + crop_x2)/2.; - int center_y = (crop_y1 + crop_y2)/2.; - int half_h = (crop_y2 - crop_y1)/2.; - int half_w = (crop_x2 - crop_x1)/2.; - - //adjust h or w to keep image ratio, expand the shorter edge - if (half_h*3 > half_w*4){ - half_w = static_cast(half_h*0.75); - } - else{ - half_h = static_cast(half_w*4/3); - } +void CropImg(cv::Mat& img, + cv::Mat& crop_img, + std::vector& area, + std::vector& center, + std::vector& scale, + float expandratio) { + int crop_x1 = std::max(0, area[0]); + int crop_y1 = std::max(0, area[1]); + int crop_x2 = std::min(img.cols - 1, area[2]); + int crop_y2 = std::min(img.rows - 1, area[3]); + int center_x = (crop_x1 + crop_x2) / 2.; + int center_y = (crop_y1 + crop_y2) / 2.; + int half_h = (crop_y2 - crop_y1) / 2.; + int half_w = (crop_x2 - crop_x1) / 2.; + + // adjust h or w to keep image ratio, expand the shorter edge + if (half_h * 3 > half_w * 4) { + half_w = static_cast(half_h * 0.75); + } else { + half_h = static_cast(half_w * 4 / 3); + } - crop_x1 = std::max(0, center_x - static_cast(half_w*(1+expandratio))); - crop_y1 = std::max(0, center_y - static_cast(half_h*(1+expandratio))); - crop_x2 = std::min(img.cols -1, static_cast(center_x + half_w*(1+expandratio))); - crop_y2 = std::min(img.rows - 1, static_cast(center_y + half_h*(1+expandratio))); - crop_img = img(cv::Range(crop_y1, crop_y2+1), cv::Range(crop_x1, crop_x2 + 1)); + crop_x1 = + std::max(0, center_x - static_cast(half_w * (1 + expandratio))); + crop_y1 = + std::max(0, center_y - static_cast(half_h * (1 + expandratio))); + crop_x2 = std::min(img.cols - 1, + static_cast(center_x + half_w * (1 + expandratio))); + crop_y2 = std::min(img.rows - 1, + static_cast(center_y + half_h * (1 + expandratio))); + crop_img = + img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1)); - center.clear(); - center.emplace_back((crop_x1+crop_x2)/2); - center.emplace_back((crop_y1+crop_y2)/2); + center.clear(); + center.emplace_back((crop_x1 + crop_x2) / 2); + center.emplace_back((crop_y1 + crop_y2) / 2); + + scale.clear(); + scale.emplace_back((crop_x2 - crop_x1)); + scale.emplace_back((crop_y2 - crop_y1)); +} - scale.clear(); - scale.emplace_back((crop_x2-crop_x1)); - scale.emplace_back((crop_y2-crop_y1)); +bool CheckDynamicInput(const std::vector& imgs) { + if (imgs.size() == 1) return false; + + int h = imgs.at(0).rows; + int w = imgs.at(0).cols; + for (int i = 1; i < imgs.size(); ++i) { + if (imgs.at(i).rows != h || imgs.at(i).cols != w) { + return true; + } + } + return false; +} + +std::vector PadBatch(const std::vector& imgs) { + std::vector out_imgs; + int max_h = 0; + int max_w = 0; + int rh = 0; + int rw = 0; + // find max_h and max_w in batch + for (int i = 0; i < imgs.size(); ++i) { + rh = imgs.at(i).rows; + rw = imgs.at(i).cols; + if (rh > max_h) max_h = rh; + if (rw > max_w) max_w = rw; + } + for (int i = 0; i < imgs.size(); ++i) { + cv::Mat im = imgs.at(i); + cv::copyMakeBorder(im, + im, + 0, + max_h - imgs.at(i).rows, + 0, + max_w - imgs.at(i).cols, + cv::BORDER_CONSTANT, + cv::Scalar(0)); + out_imgs.push_back(im); + } + return out_imgs; } } // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/README.md b/deploy/pptracking/cpp/README.md index a8ae65409..5c3a9faa2 100644 --- a/deploy/pptracking/cpp/README.md +++ b/deploy/pptracking/cpp/README.md @@ -112,7 +112,7 @@ python tools/export_model.py -c configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_3 | --video_file | 要预测的视频文件路径 | | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --run_mode | 使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)| | --output_dir | 输出图片所在的文件夹, 默认为output | | --use_mkldnn | CPU预测中是否开启MKLDNN加速 | | --cpu_threads | 设置cpu线程数,默认为1 | diff --git a/deploy/pptracking/cpp/include/config_parser.h b/deploy/pptracking/cpp/include/config_parser.h index b801d62db..c71b160db 100644 --- a/deploy/pptracking/cpp/include/config_parser.h +++ b/deploy/pptracking/cpp/include/config_parser.h @@ -42,12 +42,12 @@ class ConfigPaser { YAML::Node config; config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg); - // Get runtime mode : fluid, trt_fp16, trt_fp32 + // Get runtime mode : paddle, trt_fp16, trt_fp32 if (config["mode"].IsDefined()) { mode_ = config["mode"].as(); } else { std::cerr << "Please set mode, " - << "support value : fluid/trt_fp16/trt_fp32." << std::endl; + << "support value : paddle/trt_fp16/trt_fp32." << std::endl; return false; } diff --git a/deploy/pptracking/cpp/include/jde_predictor.h b/deploy/pptracking/cpp/include/jde_predictor.h index 53f1fb937..32f592106 100644 --- a/deploy/pptracking/cpp/include/jde_predictor.h +++ b/deploy/pptracking/cpp/include/jde_predictor.h @@ -39,7 +39,7 @@ class JDEPredictor { explicit JDEPredictor(const std::string& device = "CPU", const std::string& model_dir = "", const double threshold = -1., - const std::string& run_mode = "fluid", + const std::string& run_mode = "paddle", const int gpu_id = 0, const bool use_mkldnn = false, const int cpu_threads = 1, @@ -61,7 +61,7 @@ class JDEPredictor { // Load Paddle inference model void LoadModel(const std::string& model_dir, - const std::string& run_mode = "fluid"); + const std::string& run_mode = "paddle"); // Run predictor void Predict(const std::vector imgs, diff --git a/deploy/pptracking/cpp/include/pipeline.h b/deploy/pptracking/cpp/include/pipeline.h index f3e6799b1..d17b4d35a 100644 --- a/deploy/pptracking/cpp/include/pipeline.h +++ b/deploy/pptracking/cpp/include/pipeline.h @@ -43,7 +43,7 @@ class Pipeline { explicit Pipeline(const std::string& device, const double threshold, const std::string& output_dir, - const std::string& run_mode = "fluid", + const std::string& run_mode = "paddle", const int gpu_id = 0, const bool use_mkldnn = false, const int cpu_threads = 1, @@ -127,7 +127,7 @@ class Pipeline { std::string track_model_dir_; std::string det_model_dir_; std::string reid_model_dir_; - std::string run_mode_ = "fluid"; + std::string run_mode_ = "paddle"; int gpu_id_ = 0; bool use_mkldnn_ = false; int cpu_threads_ = 1; diff --git a/deploy/pptracking/cpp/include/predictor.h b/deploy/pptracking/cpp/include/predictor.h index f4c416872..cfb630651 100644 --- a/deploy/pptracking/cpp/include/predictor.h +++ b/deploy/pptracking/cpp/include/predictor.h @@ -42,7 +42,7 @@ class Predictor { const std::string& det_model_dir = "", const std::string& reid_model_dir = "", const double threshold = -1., - const std::string& run_mode = "fluid", + const std::string& run_mode = "paddle", const int gpu_id = 0, const bool use_mkldnn = false, const int cpu_threads = 1, diff --git a/deploy/pptracking/cpp/include/sde_predictor.h b/deploy/pptracking/cpp/include/sde_predictor.h index f05a8644d..3919eb105 100644 --- a/deploy/pptracking/cpp/include/sde_predictor.h +++ b/deploy/pptracking/cpp/include/sde_predictor.h @@ -40,7 +40,7 @@ class SDEPredictor { const std::string& det_model_dir = "", const std::string& reid_model_dir = "", const double threshold = -1., - const std::string& run_mode = "fluid", + const std::string& run_mode = "paddle", const int gpu_id = 0, const bool use_mkldnn = false, const int cpu_threads = 1, @@ -67,7 +67,7 @@ class SDEPredictor { // Load Paddle inference model void LoadModel(const std::string& det_model_dir, const std::string& reid_model_dir, - const std::string& run_mode = "fluid"); + const std::string& run_mode = "paddle"); // Run predictor void Predict(const std::vector imgs, diff --git a/deploy/pptracking/cpp/src/main.cc b/deploy/pptracking/cpp/src/main.cc index 9861eecb9..40ffc0801 100644 --- a/deploy/pptracking/cpp/src/main.cc +++ b/deploy/pptracking/cpp/src/main.cc @@ -44,8 +44,8 @@ DEFINE_string(device, DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_string(output_dir, "output", "Directory of output visualization files."); DEFINE_string(run_mode, - "fluid", - "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); + "paddle", + "Mode of running(paddle/trt_fp32/trt_fp16/trt_int8)"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); @@ -125,10 +125,10 @@ int main(int argc, char** argv) { return -1; } - if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" || + if (!(FLAGS_run_mode == "paddle" || FLAGS_run_mode == "trt_fp32" || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { LOG(ERROR) - << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; + << "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; return -1; } transform(FLAGS_device.begin(), diff --git a/deploy/pptracking/cpp/src/pipeline.cc b/deploy/pptracking/cpp/src/pipeline.cc index 9606f65a6..7c22c630f 100644 --- a/deploy/pptracking/cpp/src/pipeline.cc +++ b/deploy/pptracking/cpp/src/pipeline.cc @@ -206,7 +206,7 @@ void Pipeline::PredictMOT(const std::string& video_path) { times = total_time / frame_id; LOG(INFO) << "frame_id: " << frame_id - << " predict time(s): " << total_time / 1000; + << " predict time(s): " << times / 1000; cv::Mat out_img = PaddleDetection::VisualizeTrackResult( frame, result, 1000. / times, frame_id); @@ -301,8 +301,7 @@ void Pipeline::RunMOTStream(const cv::Mat img, total_time = std::accumulate(det_times.begin(), det_times.end(), 0.); times = total_time / frame_id; - LOG(INFO) << "frame_id: " << frame_id - << " predict time(s): " << total_time / 1000; + LOG(INFO) << "frame_id: " << frame_id << " predict time(s): " << times / 1000; out_img = PaddleDetection::VisualizeTrackResult( img, result, 1000. / times, frame_id); diff --git a/deploy/pptracking/python/README.md b/deploy/pptracking/python/README.md index 0dcbf61d9..d68d2c174 100644 --- a/deploy/pptracking/python/README.md +++ b/deploy/pptracking/python/README.md @@ -232,7 +232,7 @@ mot_sde_infer.predict_naive(model_dir, | --video_file | Option | 需要预测的视频 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --device | Option | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| -| --run_mode | Option |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --run_mode | Option |使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)| | --batch_size | Option |预测时的batch size,在指定`image_dir`时有效,默认为1 | | --threshold | Option|预测得分的阈值,默认为0.5| | --output_dir | Option|可视化结果保存的根目录,默认为output/| @@ -248,6 +248,6 @@ mot_sde_infer.predict_naive(model_dir, 说明: - 参数优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`。 -- run_mode:fluid代表使用AnalysisPredictor,精度float32来推理,其他参数指用AnalysisPredictor,TensorRT不同精度来推理。 +- run_mode:paddle代表使用AnalysisPredictor,精度float32来推理,其他参数指用AnalysisPredictor,TensorRT不同精度来推理。 - 如果安装的PaddlePaddle不支持基于TensorRT进行预测,需要自行编译,详细可参考[预测库编译教程](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)。 - --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`。 diff --git a/deploy/pptracking/python/det_infer.py b/deploy/pptracking/python/det_infer.py index c15739233..e40d8d9f1 100644 --- a/deploy/pptracking/python/det_infer.py +++ b/deploy/pptracking/python/det_infer.py @@ -47,7 +47,7 @@ class Detector(object): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -62,7 +62,7 @@ class Detector(object): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1280, @@ -180,7 +180,7 @@ class DetectorPicoDet(Detector): config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -195,7 +195,7 @@ class DetectorPicoDet(Detector): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1280, @@ -370,7 +370,7 @@ class PredictConfig(): def load_predictor(model_dir, - run_mode='fluid', + run_mode='paddle', batch_size=1, device='CPU', min_subgraph_size=3, @@ -385,7 +385,7 @@ def load_predictor(model_dir, Args: model_dir (str): root path of __model__ and __params__ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -397,7 +397,7 @@ def load_predictor(model_dir, Raises: ValueError: predict by TensorRT need device == 'GPU'. """ - if device != 'GPU' and run_mode != 'fluid': + if device != 'GPU' and run_mode != 'paddle': raise ValueError( "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" .format(run_mode, device)) @@ -570,7 +570,7 @@ def predict_video(detector, camera_id): if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) out_path = os.path.join(FLAGS.output_dir, video_out_name) - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(* 'mp4v') writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) index = 1 while (1): diff --git a/deploy/pptracking/python/mot_jde_infer.py b/deploy/pptracking/python/mot_jde_infer.py index 73ab25f78..3caa65dff 100644 --- a/deploy/pptracking/python/mot_jde_infer.py +++ b/deploy/pptracking/python/mot_jde_infer.py @@ -44,7 +44,7 @@ class JDE_Detector(Detector): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of per batch in inference, default is 1 in tracking models trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -59,7 +59,7 @@ class JDE_Detector(Detector): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1088, diff --git a/deploy/pptracking/python/mot_sde_infer.py b/deploy/pptracking/python/mot_sde_infer.py index bbf05f27a..b699db8ac 100644 --- a/deploy/pptracking/python/mot_sde_infer.py +++ b/deploy/pptracking/python/mot_sde_infer.py @@ -67,7 +67,7 @@ class SDE_Detector(Detector): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of per batch in inference, default is 1 in tracking models trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -82,7 +82,7 @@ class SDE_Detector(Detector): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1088, @@ -216,7 +216,7 @@ class SDE_DetectorPicoDet(DetectorPicoDet): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of per batch in inference, default is 1 in tracking models trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -231,7 +231,7 @@ class SDE_DetectorPicoDet(DetectorPicoDet): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1088, @@ -367,7 +367,7 @@ class SDE_ReID(object): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of per batch in inference, default 50 means at most 50 sub images can be made a batch and send into ReID model trt_min_shape (int): min shape for dynamic shape in trt @@ -383,7 +383,7 @@ class SDE_ReID(object): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=50, trt_min_shape=1, trt_max_shape=1088, diff --git a/deploy/pptracking/python/utils.py b/deploy/pptracking/python/utils.py index 241d54164..192b880ca 100644 --- a/deploy/pptracking/python/utils.py +++ b/deploy/pptracking/python/utils.py @@ -58,8 +58,8 @@ def argsparser(): parser.add_argument( "--run_mode", type=str, - default='fluid', - help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") + default='paddle', + help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") parser.add_argument( "--device", type=str, diff --git a/deploy/python/README.md b/deploy/python/README.md index 09294ae95..8b672c84d 100644 --- a/deploy/python/README.md +++ b/deploy/python/README.md @@ -34,7 +34,7 @@ python deploy/python/infer.py --model_dir=./output_inference/yolov3_mobilenet_v1 | --video_file | Option | 需要预测的视频 | | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --device | Option | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| -| --run_mode | Option |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --run_mode | Option |使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)| | --batch_size | Option |预测时的batch size,在指定`image_dir`时有效,默认为1 | | --threshold | Option|预测得分的阈值,默认为0.5| | --output_dir | Option|可视化结果保存的根目录,默认为output/| @@ -46,6 +46,6 @@ python deploy/python/infer.py --model_dir=./output_inference/yolov3_mobilenet_v1 说明: - 参数优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`。 -- run_mode:fluid代表使用AnalysisPredictor,精度float32来推理,其他参数指用AnalysisPredictor,TensorRT不同精度来推理。 +- run_mode:paddle代表使用AnalysisPredictor,精度float32来推理,其他参数指用AnalysisPredictor,TensorRT不同精度来推理。 - 如果安装的PaddlePaddle不支持基于TensorRT进行预测,需要自行编译,详细可参考[预测库编译教程](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)。 - --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`。 diff --git a/deploy/python/det_keypoint_unite_utils.py b/deploy/python/det_keypoint_unite_utils.py index f9401749e..cbae04333 100644 --- a/deploy/python/det_keypoint_unite_utils.py +++ b/deploy/python/det_keypoint_unite_utils.py @@ -72,8 +72,8 @@ def argsparser(): parser.add_argument( "--run_mode", type=str, - default='fluid', - help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") + default='paddle', + help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") parser.add_argument( "--device", type=str, diff --git a/deploy/python/infer.py b/deploy/python/infer.py index c5423af31..17e70de68 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -56,7 +56,7 @@ class Detector(object): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -71,7 +71,7 @@ class Detector(object): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1280, @@ -191,7 +191,7 @@ class DetectorSOLOv2(Detector): config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -206,7 +206,7 @@ class DetectorSOLOv2(Detector): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1280, @@ -283,7 +283,7 @@ class DetectorPicoDet(Detector): config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -298,7 +298,7 @@ class DetectorPicoDet(Detector): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1280, @@ -471,7 +471,7 @@ class PredictConfig(): def load_predictor(model_dir, - run_mode='fluid', + run_mode='paddle', batch_size=1, device='CPU', min_subgraph_size=3, @@ -486,7 +486,7 @@ def load_predictor(model_dir, Args: model_dir (str): root path of __model__ and __params__ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -498,7 +498,7 @@ def load_predictor(model_dir, Raises: ValueError: predict by TensorRT need device == 'GPU'. """ - if device != 'GPU' and run_mode != 'fluid': + if device != 'GPU' and run_mode != 'paddle': raise ValueError( "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" .format(run_mode, device)) diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py index 3f663c81f..c983ff772 100644 --- a/deploy/python/keypoint_infer.py +++ b/deploy/python/keypoint_infer.py @@ -46,7 +46,7 @@ class KeyPoint_Detector(Detector): config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -61,7 +61,7 @@ class KeyPoint_Detector(Detector): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1280, diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py index f646d911c..71b2bcf12 100644 --- a/deploy/python/mot_jde_infer.py +++ b/deploy/python/mot_jde_infer.py @@ -44,7 +44,7 @@ class JDE_Detector(Detector): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -59,7 +59,7 @@ class JDE_Detector(Detector): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1088, diff --git a/deploy/python/mot_keypoint_unite_utils.py b/deploy/python/mot_keypoint_unite_utils.py index 783a27236..91a74638b 100644 --- a/deploy/python/mot_keypoint_unite_utils.py +++ b/deploy/python/mot_keypoint_unite_utils.py @@ -72,8 +72,8 @@ def argsparser(): parser.add_argument( "--run_mode", type=str, - default='fluid', - help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") + default='paddle', + help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") parser.add_argument( "--device", type=str, diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py index 16b9a85f0..23744b9cc 100644 --- a/deploy/python/mot_sde_infer.py +++ b/deploy/python/mot_sde_infer.py @@ -104,7 +104,7 @@ class SDE_Detector(Detector): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -118,7 +118,7 @@ class SDE_Detector(Detector): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1088, @@ -238,7 +238,7 @@ class SDE_DetectorPicoDet(DetectorPicoDet): pred_config (object): config of model, defined by `Config(model_dir)` model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU - run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -252,7 +252,7 @@ class SDE_DetectorPicoDet(DetectorPicoDet): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=1, trt_min_shape=1, trt_max_shape=1088, @@ -380,7 +380,7 @@ class SDE_ReID(object): pred_config, model_dir, device='CPU', - run_mode='fluid', + run_mode='paddle', batch_size=50, trt_min_shape=1, trt_max_shape=1088, diff --git a/deploy/python/utils.py b/deploy/python/utils.py index d0ad0d544..8227e282f 100644 --- a/deploy/python/utils.py +++ b/deploy/python/utils.py @@ -57,8 +57,8 @@ def argsparser(): parser.add_argument( "--run_mode", type=str, - default='fluid', - help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") + default='paddle', + help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") parser.add_argument( "--device", type=str, -- GitLab