diff --git a/deploy/cpp/demo/classifier.cpp b/deploy/cpp/demo/classifier.cpp index a4b3bfa3c5695f663a49862d49da9848ae86c4b0..57c4f09c3d8fe4c1dd48c6afd350cca581a2d194 100644 --- a/deploy/cpp/demo/classifier.cpp +++ b/deploy/cpp/demo/classifier.cpp @@ -34,6 +34,7 @@ DEFINE_string(key, "", "key of encryption"); DEFINE_string(image, "", "Path of test image file"); DEFINE_string(image_list, "", "Path of test image list file"); DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads"); int main(int argc, char** argv) { // Parsing command-line @@ -75,12 +76,13 @@ int main(int argc, char** argv) { int im_vec_size = std::min((int)image_paths.size(), i + FLAGS_batch_size); std::vector im_vec(im_vec_size - i); std::vector results(im_vec_size - i, PaddleX::ClsResult()); - #pragma omp parallel for num_threads(im_vec_size - i) + int thread_num = std::min(FLAGS_thread_num, im_vec_size - i); + #pragma omp parallel for num_threads(thread_num) for(int j = i; j < im_vec_size; ++j){ im_vec[j - i] = std::move(cv::imread(image_paths[j], 1)); } auto imread_end = system_clock::now(); - model.predict(im_vec, results); + model.predict(im_vec, results, thread_num); auto imread_duration = duration_cast(imread_end - start); total_imread_time_s += double(imread_duration.count()) * microseconds::period::num / microseconds::period::den; diff --git a/deploy/cpp/demo/detector.cpp b/deploy/cpp/demo/detector.cpp index 5068a35ad49791564f625fa140a2a156ecf58ed8..956a7181808defbe7df127ea06d298f3757dce4b 100644 --- a/deploy/cpp/demo/detector.cpp +++ b/deploy/cpp/demo/detector.cpp @@ -36,12 +36,14 @@ DEFINE_string(key, "", "key of encryption"); DEFINE_string(image, "", "Path of test image file"); DEFINE_string(image_list, "", "Path of test image list file"); DEFINE_string(save_dir, "output", "Path to save visualized image"); -DEFINE_int32(batch_size, 1, ""); +DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_double(threshold, 0.5, "The minimum scores of target boxes which are shown"); +DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads"); int main(int argc, char** argv) { // 解析命令行参数 google::ParseCommandLineFlags(&argc, &argv, true); - + if (FLAGS_model_dir == "") { std::cerr << "--model_dir need to be defined" << std::endl; return -1; @@ -50,7 +52,7 @@ int main(int argc, char** argv) { std::cerr << "--image or --image_list need to be defined" << std::endl; return -1; } - + std::cout << "Thread num: " << FLAGS_thread_num << std::endl; // 加载模型 PaddleX::Model model; model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key, FLAGS_batch_size); @@ -78,12 +80,13 @@ int main(int argc, char** argv) { int im_vec_size = std::min((int)image_paths.size(), i + FLAGS_batch_size); std::vector im_vec(im_vec_size - i); std::vector results(im_vec_size - i, PaddleX::DetResult()); - #pragma omp parallel for num_threads(im_vec_size - i) + int thread_num = std::min(FLAGS_thread_num, im_vec_size - i); + #pragma omp parallel for num_threads(thread_num) for(int j = i; j < im_vec_size; ++j){ im_vec[j - i] = std::move(cv::imread(image_paths[j], 1)); } auto imread_end = system_clock::now(); - model.predict(im_vec, results); + model.predict(im_vec, results, thread_num); auto imread_duration = duration_cast(imread_end - start); total_imread_time_s += double(imread_duration.count()) * microseconds::period::num / microseconds::period::den; auto end = system_clock::now(); @@ -106,7 +109,7 @@ int main(int argc, char** argv) { // 可视化 for(int j = 0; j < im_vec_size - i; ++j) { cv::Mat vis_img = - PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap, 0.5); + PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap, FLAGS_threshold); std::string save_path = PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]); cv::imwrite(save_path, vis_img); @@ -130,7 +133,7 @@ int main(int argc, char** argv) { // 可视化 cv::Mat vis_img = - PaddleX::Visualize(im, result, model.labels, colormap, 0.5); + PaddleX::Visualize(im, result, model.labels, colormap, FLAGS_threshold); std::string save_path = PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image); cv::imwrite(save_path, vis_img); diff --git a/deploy/cpp/demo/segmenter.cpp b/deploy/cpp/demo/segmenter.cpp index 23bdd1f955e089a477d9bc029b292a59d9434475..87e1afdd41c3116b2027c4846d08dcfbb9a7dff6 100644 --- a/deploy/cpp/demo/segmenter.cpp +++ b/deploy/cpp/demo/segmenter.cpp @@ -36,6 +36,7 @@ DEFINE_string(image, "", "Path of test image file"); DEFINE_string(image_list, "", "Path of test image list file"); DEFINE_string(save_dir, "output", "Path to save visualized image"); DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads"); int main(int argc, char** argv) { // 解析命令行参数 @@ -76,12 +77,13 @@ int main(int argc, char** argv) { int im_vec_size = std::min((int)image_paths.size(), i + FLAGS_batch_size); std::vector im_vec(im_vec_size - i); std::vector results(im_vec_size - i, PaddleX::SegResult()); - #pragma omp parallel for num_threads(im_vec_size - i) + int thread_num = std::min(FLAGS_thread_num, im_vec_size - i); + #pragma omp parallel for num_threads(thread_num) for(int j = i; j < im_vec_size; ++j){ im_vec[j - i] = std::move(cv::imread(image_paths[j], 1)); } auto imread_end = system_clock::now(); - model.predict(im_vec, results); + model.predict(im_vec, results, thread_num); auto imread_duration = duration_cast(imread_end - start); total_imread_time_s += double(imread_duration.count()) * microseconds::period::num / microseconds::period::den; auto end = system_clock::now(); diff --git a/deploy/cpp/include/paddlex/paddlex.h b/deploy/cpp/include/paddlex/paddlex.h index bcddd87845f8edff135047a0edbf9706c8579316..c0a48768b334d3a62b22f201989706a393a70a92 100644 --- a/deploy/cpp/include/paddlex/paddlex.h +++ b/deploy/cpp/include/paddlex/paddlex.h @@ -61,19 +61,19 @@ class Model { bool preprocess(const cv::Mat& input_im, ImageBlob* blob); - bool preprocess(const std::vector &input_im_batch, std::vector &blob_batch); + bool preprocess(const std::vector &input_im_batch, std::vector &blob_batch, int thread_num = 1); bool predict(const cv::Mat& im, ClsResult* result); - bool predict(const std::vector &im_batch, std::vector &results); + bool predict(const std::vector &im_batch, std::vector &results, int thread_num = 1); bool predict(const cv::Mat& im, DetResult* result); - bool predict(const std::vector &im_batch, std::vector &result); + bool predict(const std::vector &im_batch, std::vector &result, int thread_num = 1); bool predict(const cv::Mat& im, SegResult* result); - bool predict(const std::vector &im_batch, std::vector &result); + bool predict(const std::vector &im_batch, std::vector &result, int thread_num = 1); std::string type; std::string name; diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index 5dd1237cc7280fcc6fe6dd010463ae1facb57c2a..f52cf7afe36790e7a83d8e6539a23cf8e9eba1f5 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -110,12 +110,13 @@ bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) { } // use openmp -bool Model::preprocess(const std::vector &input_im_batch, std::vector &blob_batch) { +bool Model::preprocess(const std::vector &input_im_batch, std::vector &blob_batch, int thread_num) { int batch_size = input_im_batch.size(); bool success = true; int max_h = -1; int max_w = -1; - #pragma omp parallel for num_threads(batch_size) + thread_num = std::min(thread_num, batch_size); + #pragma omp parallel for num_threads(thread_num) for(int i = 0; i < input_im_batch.size(); ++i) { cv::Mat im = input_im_batch[i].clone(); if(!transforms_.Run(&im, &blob_batch[i])){ @@ -164,7 +165,7 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) { return true; } -bool Model::predict(const std::vector &im_batch, std::vector &results) { +bool Model::predict(const std::vector &im_batch, std::vector &results, int thread_num) { for(auto &inputs: inputs_batch_) { inputs.clear(); } @@ -180,7 +181,7 @@ bool Model::predict(const std::vector &im_batch, std::vector return false; } // 处理输入图像 - if (!preprocess(im_batch, inputs_batch_)) { + if (!preprocess(im_batch, inputs_batch_, thread_num)) { std::cerr << "Preprocess failed!" << std::endl; return false; } @@ -326,7 +327,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { return true; } -bool Model::predict(const std::vector &im_batch, std::vector &result) { +bool Model::predict(const std::vector &im_batch, std::vector &result, int thread_num) { if (type == "classifier") { std::cerr << "Loading model is a 'classifier', ClsResult should be passed " "to function predict()!" @@ -341,7 +342,7 @@ bool Model::predict(const std::vector &im_batch, std::vector int batch_size = im_batch.size(); // 处理输入图像 - if (!preprocess(im_batch, inputs_batch_)) { + if (!preprocess(im_batch, inputs_batch_, thread_num)) { std::cerr << "Preprocess failed!" << std::endl; return false; } @@ -357,7 +358,8 @@ bool Model::predict(const std::vector &im_batch, std::vector << ", " << inputs_batch_[i].new_im_size_[1] << ")" << std::endl; } - #pragma omp parallel for num_threads(batch_size) + thread_num = std::min(thread_num, batch_size); + #pragma omp parallel for num_threads(thread_num) for(int i = 0; i < batch_size; ++i) { int h = inputs_batch_[i].new_im_size_[0]; int w = inputs_batch_[i].new_im_size_[1]; @@ -597,7 +599,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { return true; } -bool Model::predict(const std::vector &im_batch, std::vector &result) { +bool Model::predict(const std::vector &im_batch, std::vector &result, int thread_num) { for(auto &inputs: inputs_batch_) { inputs.clear(); } @@ -614,7 +616,7 @@ bool Model::predict(const std::vector &im_batch, std::vector } // 处理输入图像 - if (!preprocess(im_batch, inputs_batch_)) { + if (!preprocess(im_batch, inputs_batch_, thread_num)) { std::cerr << "Preprocess failed!" << std::endl; return false; }