diff --git a/deploy/cpp/demo/classifier.cpp b/deploy/cpp/demo/classifier.cpp index 8b78d7e8975642b8d10d6c641c0245cdf661d3d9..6fd354d3f9cb6a366f0efb0b31e7ae073a90b4ad 100644 --- a/deploy/cpp/demo/classifier.cpp +++ b/deploy/cpp/demo/classifier.cpp @@ -37,6 +37,7 @@ DEFINE_int32(batch_size, 1, "Batch size of infering"); DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads"); +DEFINE_bool(use_ir_optim, true, "use ir optimization"); int main(int argc, char** argv) { // Parsing command-line @@ -57,7 +58,8 @@ int main(int argc, char** argv) { FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, - FLAGS_key); + FLAGS_key, + FLAGS_use_ir_optim); // 进行预测 double total_running_time_s = 0.0; diff --git a/deploy/cpp/demo/detector.cpp b/deploy/cpp/demo/detector.cpp index 5b4e3a2ba9d2c921cf23774a17e34e0c8e26cc2a..54f93d2995fa24af73bba2855b6b26466129fa20 100644 --- a/deploy/cpp/demo/detector.cpp +++ b/deploy/cpp/demo/detector.cpp @@ -43,6 +43,7 @@ DEFINE_double(threshold, DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads"); +DEFINE_bool(use_ir_optim, true, "use ir optimization"); int main(int argc, char** argv) { // 解析命令行参数 @@ -62,7 +63,8 @@ int main(int argc, char** argv) { FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, - FLAGS_key); + FLAGS_key, + FLAGS_use_ir_optim); double total_running_time_s = 0.0; double total_imread_time_s = 0.0; diff --git a/deploy/cpp/demo/segmenter.cpp b/deploy/cpp/demo/segmenter.cpp index 7dd48e551890f3c8e4550694c45bc3f84088ec0a..1ddbb7538cee628ee02568cd9ff11c515f1bd596 100644 --- a/deploy/cpp/demo/segmenter.cpp +++ b/deploy/cpp/demo/segmenter.cpp @@ -39,6 +39,7 @@ DEFINE_int32(batch_size, 1, "Batch size of infering"); DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads"); +DEFINE_bool(use_ir_optim, true, "use ir optimization"); int main(int argc, char** argv) { // 解析命令行参数 @@ -59,7 +60,8 @@ int main(int argc, char** argv) { FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, - FLAGS_key); + FLAGS_key, + FLAGS_use_ir_optim); double total_running_time_s = 0.0; double total_imread_time_s = 0.0; diff --git a/deploy/cpp/include/paddlex/paddlex.h b/deploy/cpp/include/paddlex/paddlex.h index af4d8898496fee47ed9b5c74599536ddf1fe9f6c..e0d0569341198d0a0b2a8c6d0637c3f5a61e1f3f 100644 --- a/deploy/cpp/include/paddlex/paddlex.h +++ b/deploy/cpp/include/paddlex/paddlex.h @@ -72,20 +72,23 @@ class Model { * @param use_trt: use Tensor RT or not when infering * @param gpu_id: the id of gpu when infering with using gpu * @param key: the key of encryption when using encrypted model + * @param use_ir_optim: use ir optimization when infering * */ void Init(const std::string& model_dir, bool use_gpu = false, bool use_trt = false, int gpu_id = 0, - std::string key = "") { - create_predictor(model_dir, use_gpu, use_trt, gpu_id, key); + std::string key = "", + bool use_ir_optim = true) { + create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim); } void create_predictor(const std::string& model_dir, bool use_gpu = false, bool use_trt = false, int gpu_id = 0, - std::string key = ""); + std::string key = "", + bool use_ir_optim = true); /* * @brief diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index bedd83b356baff41d7f9d16ac6de855e982332b2..cf1dfc955c43f9a61539e93a34c77c6ab4b198a9 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -22,7 +22,8 @@ void Model::create_predictor(const std::string& model_dir, bool use_gpu, bool use_trt, int gpu_id, - std::string key) { + std::string key, + bool use_ir_optim) { paddle::AnalysisConfig config; std::string model_file = model_dir + OS_PATH_SEP + "__model__"; std::string params_file = model_dir + OS_PATH_SEP + "__params__"; @@ -63,6 +64,8 @@ void Model::create_predictor(const std::string& model_dir, } config.SwitchUseFeedFetchOps(false); config.SwitchSpecifyInputNames(true); + // 开启图优化 + config.SwitchIrOptim(use_ir_optim); // 开启内存优化 config.EnableMemoryOptim(); if (use_trt) {