fix mkldnn

0aee586a · syyxsxx · ff90e5a0 · 0aee586a · 0aee586a · 0aee586a
12 changed file
--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -30,6 +30,9 @@ DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
 DEFINE_bool(use_mkl, true, "Infering with MKL");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -60,7 +63,7 @@ int main(int argc, char** argv) {
             FLAGS_use_mkl,
             FLAGS_gpu_id,
             FLAGS_key,
-             FLAGS_thread_num);
+             FLAGS_mkl_thread_num;
  // Predict
  int imgs = 1;

--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -32,6 +32,9 @@ DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
 DEFINE_bool(use_mkl, true, "Infering with MKL");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -65,7 +68,7 @@ int main(int argc, char** argv) {
             FLAGS_use_mkl,
             FLAGS_gpu_id,
             FLAGS_key,
-             FLAGS_thread_num);
+             FLAGS_mkl_thread_num);
  int imgs = 1;
  std::string save_dir = "output";
  // Predict

--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -31,6 +31,9 @@ DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
 DEFINE_bool(use_mkl, true, "Infering with MKL");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -62,7 +65,7 @@ int main(int argc, char** argv) {
             FLAGS_use_mkl,
             FLAGS_gpu_id,
             FLAGS_key,
-             FLAGS_thread_num);
+             FLAGS_mkl_thread_num);
  int imgs = 1;
  // Predict
  if (FLAGS_image_list != "") {

--- a/deploy/cpp/demo/video_classifier.cpp
+++ b/deploy/cpp/demo/video_classifier.cpp
@@ -38,9 +38,9 @@ DEFINE_bool(use_trt, false, "Infering with TensorRT");
 DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
-DEFINE_int32(thread_num,
+DEFINE_int32(mkl_thread_num,
             omp_get_num_procs(),
-             "Number of preprocessing threads");
+             "Number of mkl threads");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
 DEFINE_string(video_path, "", "Path of input video");
@@ -69,7 +69,7 @@ int main(int argc, char** argv) {
             FLAGS_use_mkl,
             FLAGS_gpu_id,
             FLAGS_key,
-             FLAGS_thread_num);
+             FLAGS_mkl_thread_num);
  // Open video
  cv::VideoCapture capture;

--- a/deploy/cpp/demo/video_detector.cpp
+++ b/deploy/cpp/demo/video_detector.cpp
@@ -43,9 +43,9 @@ DEFINE_string(video_path, "", "Path of input video");
 DEFINE_bool(show_result, false, "show the result of each frame with a window");
 DEFINE_bool(save_result, true, "save the result of each frame to a video");
 DEFINE_string(key, "", "key of encryption");
-DEFINE_int32(thread_num,
+DEFINE_int32(mkl_thread_num,
             omp_get_num_procs(),
-             "Number of preprocessing threads");
+             "Number of mkl threads");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
 DEFINE_double(threshold,
              0.5,
@@ -71,7 +71,7 @@ int main(int argc, char** argv) {
             FLAGS_use_mkl,
             FLAGS_gpu_id,
             FLAGS_key,
-             FLAGS_thread_num);
+             FLAGS_mkl_thread_num);
  // Open video
  cv::VideoCapture capture;
  if (FLAGS_use_camera) {

--- a/deploy/cpp/demo/video_segmenter.cpp
+++ b/deploy/cpp/demo/video_segmenter.cpp
@@ -38,9 +38,9 @@ DEFINE_bool(use_trt, false, "Infering with TensorRT");
 DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
-DEFINE_int32(thread_num,
+DEFINE_int32(mkl_thread_num,
             omp_get_num_procs(),
-             "Number of preprocessing threads");
+             "Number of mkl threads");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
 DEFINE_string(video_path, "", "Path of input video");
@@ -69,7 +69,7 @@ int main(int argc, char** argv) {
             FLAGS_use_mkl,
             FLAGS_gpu_id,
             FLAGS_key,
-             FLAGS_thread_num);
+             FLAGS_mkl_thread_num);
  // Open video
  cv::VideoCapture capture;
  if (FLAGS_use_camera) {

--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -80,7 +80,7 @@ class Model {
            bool use_mkl = true,
            int gpu_id = 0,
            std::string key = "",
-            int thread_num = 1,
+            int mkl_thread_num = 4,
            bool use_ir_optim = true) {
    create_predictor(
                     model_dir,
@@ -89,7 +89,7 @@ class Model {
                     use_mkl,
                     gpu_id,
                     key,
-                     thread_num,
+                     mkl_thread_num,
                     use_ir_optim);
  }
@@ -99,7 +99,7 @@ class Model {
                        bool use_mkl = true,
                        int gpu_id = 0,
                        std::string key = "",
-                        int thread_num = 1,
+                        int mkl_thread_num = 4,
                        bool use_ir_optim = true);
  /*

--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
@@ -31,7 +31,7 @@ void Model::create_predictor(const std::string& model_dir,
                             bool use_mkl,
                             int gpu_id,
                             std::string key,
-                             int thread_num,
+                             int mkl_thread_num,
                             bool use_ir_optim) {
  paddle::AnalysisConfig config;
  std::string model_file = model_dir + OS_PATH_SEP + "__model__";
@@ -68,7 +68,7 @@ void Model::create_predictor(const std::string& model_dir,
  }
  if (use_mkl && name != "HRNet" && name != "DeepLabv3p") {
    config.EnableMKLDNN();
-    config.SetCpuMathLibraryNumThreads(12);
+    config.SetCpuMathLibraryNumThreads(mkl_thread_num);
  }
  if (use_gpu) {
    config.EnableUseGpu(100, gpu_id);

--- a/docs/deploy/server/cpp/linux.md
+++ b/docs/deploy/server/cpp/linux.md
@@ -126,6 +126,7 @@ yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | use_trt  | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
 | use_mkl  | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
+| mkl_thread_num | MKL推理的线程数，默认为cpu处理器个数 |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，**classfier无该参数** |
 | key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
@@ -143,12 +144,12 @@ yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | use_trt  | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
 | use_mkl  | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
+| mkl_thread_num | MKL推理的线程数，默认为cpu处理器个数 |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | show_result | 对视频文件做预测时，是否在屏幕上实时显示预测可视化结果(因加入了延迟处理，故显示结果不能反映真实的帧率)，支持值为0或1(默认值为0) |
 | save_result | 是否将每帧的预测可视结果保存为视频文件，支持值为0或1(默认值为1) |
 | save_dir | 保存可视化结果的路径, 默认值为"output"|
 | key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
-| thread_num | 预测的线程数，默认为cpu处理器个数 |
 **注意：若系统无GUI，则不要将show_result设置为1。当使用摄像头预测时，按`ESC`键可关闭摄像头并推出预测程序。**

--- a/docs/deploy/server/cpp/windows.md
+++ b/docs/deploy/server/cpp/windows.md
@@ -110,6 +110,7 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
 | image_list  | 按行存储图片路径的.txt文件 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | use_mkl  | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
+| mkl_thread_num | MKL推理的线程数，默认为cpu处理器个数 |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，classifier无该参数 |
 | key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
@@ -126,12 +127,12 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
 | video_path | 视频文件的路径 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | use_mkl  | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
+| mkl_thread_num | MKL推理的线程数，默认为cpu处理器个数 |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | show_result | 对视频文件做预测时，是否在屏幕上实时显示预测可视化结果(因加入了延迟处理，故显示结果不能反映真实的帧率)，支持值为0或1(默认值为0) |
 | save_result | 是否将每帧的预测可视结果保存为视频文件，支持值为0或1(默认值为1) |
 | save_dir | 保存可视化结果的路径, 默认值为"output" |
 | key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
-| thread_num | 预测的线程数，默认为cpu处理器个数 |
 **注意：若系统无GUI，则不要将show_result设置为1。当使用摄像头预测时，按`ESC`键可关闭摄像头并推出预测程序。**

--- a/paddlex/deploy.py
+++ b/paddlex/deploy.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 import os
 import os.path as osp
-import psutil
 import cv2
 import numpy as np
 import yaml
@@ -31,7 +30,7 @@ class Predictor:
                 use_gpu=True,
                 gpu_id=0,
                 use_mkl=False,
-                 mkl_thread_num=psutil.cpu_count(),
+                 mkl_thread_num=4,
                 use_trt=False,
                 use_glog=False,
                 memory_optimize=True):

--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,6 @@ tqdm
 colorama
 sklearn
 cython
-psutil
 pycocotools
 visualdl >= 2.0.0b
 paddleslim == 1.0.1