add cpu mkl supported

9cc2b119 · syyxsxx · 5c5ff738 · 9cc2b119 · 9cc2b119 · 9cc2b119
8 changed file
--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -29,6 +29,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -56,8 +57,10 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
             FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
  // Predict
  int imgs = 1;

--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -31,6 +31,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -61,8 +62,10 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
             FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
  int imgs = 1;
  std::string save_dir = "output";
  // Predict

--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -30,6 +30,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -58,8 +59,10 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
             FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
  int imgs = 1;
  // Predict
  if (FLAGS_image_list != "") {

--- a/deploy/cpp/demo/video_classifier.cpp
+++ b/deploy/cpp/demo/video_classifier.cpp
@@ -35,8 +35,12 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
 DEFINE_string(video_path, "", "Path of input video");
@@ -62,8 +66,10 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
             FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
  // Open video
  cv::VideoCapture capture;

--- a/deploy/cpp/demo/video_detector.cpp
+++ b/deploy/cpp/demo/video_detector.cpp
@@ -35,6 +35,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
@@ -42,6 +43,9 @@ DEFINE_string(video_path, "", "Path of input video");
 DEFINE_bool(show_result, false, "show the result of each frame with a window");
 DEFINE_bool(save_result, true, "save the result of each frame to a video");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
 DEFINE_double(threshold,
              0.5,
@@ -64,8 +68,10 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
             FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
  // Open video
  cv::VideoCapture capture;
  if (FLAGS_use_camera) {

--- a/deploy/cpp/demo/video_segmenter.cpp
+++ b/deploy/cpp/demo/video_segmenter.cpp
@@ -35,8 +35,12 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
 DEFINE_string(video_path, "", "Path of input video");
@@ -62,8 +66,10 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
             FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
  // Open video
  cv::VideoCapture capture;
  if (FLAGS_use_camera) {

--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -77,17 +77,29 @@ class Model {
  void Init(const std::string& model_dir,
            bool use_gpu = false,
            bool use_trt = false,
+            bool use_mkl = true,
            int gpu_id = 0,
            std::string key = "",
+            int thread_num = 1,
            bool use_ir_optim = true) {
-    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim);
+    create_predictor(
+                     model_dir,
+                     use_gpu,
+                     use_trt,
+                     use_mkl,
+                     gpu_id,
+                     key,
+                     thread_num,
+                     use_ir_optim);
  }
  void create_predictor(const std::string& model_dir,
                        bool use_gpu = false,
                        bool use_trt = false,
+                        bool use_mkl = true,
                        int gpu_id = 0,
                        std::string key = "",
+                        int thread_num = 1,
                        bool use_ir_optim = true);
  /*

--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
@@ -21,8 +21,10 @@ namespace PaddleX {
 void Model::create_predictor(const std::string& model_dir,
                             bool use_gpu,
                             bool use_trt,
+                             bool use_mkl,
                             int gpu_id,
                             std::string key,
+                             int thread_num,
                             bool use_ir_optim) {
  paddle::AnalysisConfig config;
  std::string model_file = model_dir + OS_PATH_SEP + "__model__";
@@ -57,6 +59,10 @@ void Model::create_predictor(const std::string& model_dir,
  if (key == "") {
    config.SetModel(model_file, params_file);
  }
+  if (use_mkl && name != "HRNet" && name != "DeepLabv3p") {
+    config.EnableMKLDNN();
+    config.SetCpuMathLibraryNumThreads(12);
+  }
  if (use_gpu) {
    config.EnableUseGpu(100, gpu_id);
  } else {