diff --git a/deploy/hubserving/clas/config.json b/deploy/hubserving/clas/config.json index 25550d68007de5db75b6480684b3e8e2e2c46d36..647036f6b01baa90b4a7e295426c464a1851111d 100644 --- a/deploy/hubserving/clas/config.json +++ b/deploy/hubserving/clas/config.json @@ -3,7 +3,8 @@ "clas_system": { "init_args": { "version": "1.0.0", - "use_gpu": true + "use_gpu": true, + "enable_mkldnn": false }, "predict_args": { } diff --git a/deploy/hubserving/clas/module.py b/deploy/hubserving/clas/module.py index 3f8449a07cb0bd8e8079f19e12393fe9540cac2d..49f3b9de78db341d69973dd8163a3852d5891b39 100644 --- a/deploy/hubserving/clas/module.py +++ b/deploy/hubserving/clas/module.py @@ -37,13 +37,15 @@ from deploy.hubserving.clas.params import read_params author_email="paddle-dev@baidu.com", type="cv/class") class ClasSystem(hub.Module): - def _initialize(self, use_gpu=None): + def _initialize(self, use_gpu=None, enable_mkldnn=None): """ initialize with the necessary elements """ cfg = read_params() if use_gpu is not None: cfg.use_gpu = use_gpu + if enable_mkldnn is not None: + cfg.enable_mkldnn = enable_mkldnn cfg.hubserving = True cfg.enable_benchmark = False self.args = cfg @@ -59,6 +61,7 @@ class ClasSystem(hub.Module): ) else: print("Use CPU") + print("Enable MKL-DNN") if enable_mkldnn else None def read_images(self, paths=[]): images = [] diff --git a/deploy/hubserving/clas/params.py b/deploy/hubserving/clas/params.py index 3f48771503ad117559c99005508c9d0645b20a0f..a7af70d942cd15af29a5ea36cdcea437177a5cb6 100644 --- a/deploy/hubserving/clas/params.py +++ b/deploy/hubserving/clas/params.py @@ -28,6 +28,7 @@ def read_params(): cfg.params_file = "./inference/cls_infer.pdiparams" cfg.batch_size = 1 cfg.use_gpu = False + cfg.enable_mkldnn = False cfg.ir_optim = True cfg.gpu_mem = 8000 cfg.use_fp16 = False diff --git a/deploy/hubserving/readme.md b/deploy/hubserving/readme.md index 9c08af0090833800b32a9cb9cc0f38098ff7e362..ab97083d9060fde3797f4cae764b2f4e9f361bb2 100644 --- a/deploy/hubserving/readme.md +++ b/deploy/hubserving/readme.md @@ -76,7 +76,8 @@ $ hub serving start --modules Module1==Version1 \ "clas_system": { "init_args": { "version": "1.0.0", - "use_gpu": true + "use_gpu": true, + "enable_mkldnn": false }, "predict_args": { } @@ -88,13 +89,16 @@ $ hub serving start --modules Module1==Version1 \ } ``` -- `init_args`中的可配参数与`module.py`中的`_initialize`函数接口一致。其中,**当`use_gpu`为`true`时,表示使用GPU启动服务**。 +- `init_args`中的可配参数与`module.py`中的`_initialize`函数接口一致。其中, + - 当`use_gpu`为`true`时,表示使用GPU启动服务。 + - 当`enable_mkldnn`为`true`时,表示使用MKL-DNN加速。 - `predict_args`中的可配参数与`module.py`中的`predict`函数接口一致。 **注意:** - 使用配置文件启动服务时,其他参数会被忽略。 - 如果使用GPU预测(即,`use_gpu`置为`true`),则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,如:```export CUDA_VISIBLE_DEVICES=0```,否则不用设置。 - **`use_gpu`不可与`use_multiprocess`同时为`true`**。 +- **`use_gpu`与`enable_mkldnn`同时为`true`时,将忽略`enable_mkldnn`,而使用GPU**。 如,使用GPU 3号卡启动串联服务: ```shell diff --git a/deploy/hubserving/readme_en.md b/deploy/hubserving/readme_en.md index 0f9e92ad58fa086a7028b863308d80f60e51c880..ded2e9933dacfb06b5b883b61caeceb456fdc97b 100644 --- a/deploy/hubserving/readme_en.md +++ b/deploy/hubserving/readme_en.md @@ -78,7 +78,8 @@ Wherein, the format of `config.json` is as follows: "clas_system": { "init_args": { "version": "1.0.0", - "use_gpu": true + "use_gpu": true, + "enable_mkldnn": false }, "predict_args": { } @@ -89,13 +90,16 @@ Wherein, the format of `config.json` is as follows: "workers": 2 } ``` -- The configurable parameters in `init_args` are consistent with the `_initialize` function interface in `module.py`. Among them, **when `use_gpu` is `true`, it means that the GPU is used to start the service**. +- The configurable parameters in `init_args` are consistent with the `_initialize` function interface in `module.py`. Among them, + - when `use_gpu` is `true`, it means that the GPU is used to start the service. + - when `enable_mkldnn` is `true`, it means that use MKL-DNN to accelerate. - The configurable parameters in `predict_args` are consistent with the `predict` function interface in `module.py`. **Note:** - When using the configuration file to start the service, other parameters will be ignored. - If you use GPU prediction (that is, `use_gpu` is set to `true`), you need to set the environment variable CUDA_VISIBLE_DEVICES before starting the service, such as: ```export CUDA_VISIBLE_DEVICES=0```, otherwise you do not need to set it. - **`use_gpu` and `use_multiprocess` cannot be `true` at the same time.** +- **When both `use_gpu` and `enable_mkldnn` are set to `true` at the same time, GPU is used to run and `enable_mkldnn` will be ignored.** For example, use GPU card No. 3 to start the 2-stage series service: ```shell diff --git a/docs/en/tutorials/getting_started_en.md b/docs/en/tutorials/getting_started_en.md index 2dcb6ad3ba668bc2e9f29a6cea089ac040b6a041..db780de21b61ca5e4f4ad2e03ca984d9542baa78 100644 --- a/docs/en/tutorials/getting_started_en.md +++ b/docs/en/tutorials/getting_started_en.md @@ -258,6 +258,8 @@ Among them: + `model_file`: Model file path, such as `./MobileNetV3_large_x1_0/cls_infer.pdmodel`; + `params_file`: Weight file path, such as `./MobileNetV3_large_x1_0/cls_infer.pdiparams`; + `use_tensorrt`: Whether to use the TesorRT, default by `True`; -+ `use_gpu`: Whether to use the GPU, default by `True`. ++ `use_gpu`: Whether to use the GPU, default by `True` ++ `enable_mkldnn`: Wheter to use `MKL-DNN`, default by `False`. When both `use_gpu` and `enable_mkldnn` are set to `True`, GPU is used to run and `enable_mkldnn` will be ignored. + If you want to evaluate the speed of the model, it is recommended to use [predict.py](../../../tools/infer/predict.py), and enable TensorRT to accelerate. diff --git a/docs/zh_CN/tutorials/getting_started.md b/docs/zh_CN/tutorials/getting_started.md index 586f2e04af7e7c2e7a2e7c45e446123253bbd961..0b0a62e6f5fb4173bb2967ab3f732480f5aa6092 100644 --- a/docs/zh_CN/tutorials/getting_started.md +++ b/docs/zh_CN/tutorials/getting_started.md @@ -269,6 +269,7 @@ python tools/infer/predict.py \ + `model_file`:模型结构文件路径,如 `./inference/cls_infer.pdmodel` + `params_file`:模型权重文件路径,如 `./inference/cls_infer.pdiparams` + `use_tensorrt`:是否使用 TesorRT 预测引擎,默认值:`True` -+ `use_gpu`:是否使用 GPU 预测,默认值:`True`。 ++ `use_gpu`:是否使用 GPU 预测,默认值:`True` ++ `enable_mkldnn`:是否启用`MKL-DNN`加速,默认为`False`。注意`enable_mkldnn`与`use_gpu`同时为`True`时,将忽略`enable_mkldnn`,而使用GPU运行。 * 如果你希望评测模型速度,建议使用该脚本(`tools/infer/predict.py`),同时开启TensorRT加速预测。 diff --git a/tools/infer/predict.py b/tools/infer/predict.py index 4a2e6d076415ee7b71191d52ade1dc60ce027b6b..849d4064327d005c9593203fdfe956849bf2b949 100644 --- a/tools/infer/predict.py +++ b/tools/infer/predict.py @@ -30,6 +30,10 @@ def create_paddle_predictor(args): config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() + if args.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true diff --git a/tools/infer/utils.py b/tools/infer/utils.py index c078a0640a796c85866220097fbb9ed607b3979a..67e37926973271352313cfbe94454cd45dd5e83e 100644 --- a/tools/infer/utils.py +++ b/tools/infer/utils.py @@ -41,6 +41,7 @@ def parse_args(): parser.add_argument("--gpu_mem", type=int, default=8000) parser.add_argument("--enable_benchmark", type=str2bool, default=False) parser.add_argument("--top_k", type=int, default=1) + parser.add_argument("--enable_mkldnn", type=bool, default=False) parser.add_argument("--hubserving", type=str2bool, default=False) # params for infer