From 1d29f00f6d63f9135aefcad25b9c487eec34f1e9 Mon Sep 17 00:00:00 2001 From: ShiningZhang Date: Thu, 2 Dec 2021 17:37:26 +0800 Subject: [PATCH] add comment for ascend 310/910 --- doc/Serving_Configure_CN.md | 11 ++++++++--- doc/Serving_Configure_EN.md | 11 ++++++++--- paddle_inference/paddle/include/paddle_engine.h | 4 +++- python/paddle_serving_app/local_predict.py | 2 ++ 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/doc/Serving_Configure_CN.md b/doc/Serving_Configure_CN.md index 84ea0cb5..e613293a 100644 --- a/doc/Serving_Configure_CN.md +++ b/doc/Serving_Configure_CN.md @@ -98,6 +98,7 @@ workdir_9393 | `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 | | `use_calib` | bool | False | Use TRT int8 calibration | | `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS | +| `use_ascend_cl` | bool | False | Enable for ascend910; Use with use_lite for ascend310 | #### 当您的某个模型想使用多张GPU卡部署时. ```BASH @@ -249,6 +250,7 @@ engines { use_gpu: false combined_model: false gpu_multi_stream: false + use_ascend_cl: false runtime_thread_num: 0 batch_infer_size: 32 enable_overrun: false @@ -286,6 +288,7 @@ gpu_ids: 2 - use_gpu:是否使用GPU - combined_model: 是否使用组合模型文件 - gpu_multi_stream: 是否开启gpu多流模式 +- use_ascend_cl: 是否使用昇腾,单独开启适配昇腾910,同时开启lite适配310 - runtime_thread_num: 若大于0, 则启用Async异步模式,并创建对应数量的predictor实例。 - batch_infer_size: Async异步模式下的最大batch数 - enable_overrun: Async异步模式下总是将整个任务放入任务队列 @@ -357,7 +360,7 @@ op: #Fetch结果列表,以client_config中fetch_var的alias_name为准 fetch_list: ["concat_1.tmp_0"] - # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910 device_type: 0 #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 @@ -395,7 +398,7 @@ op: #Fetch结果列表,以client_config中fetch_var的alias_name为准 fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] - # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910 device_type: 0 #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 @@ -434,10 +437,12 @@ Python Pipeline除了支持CPU、GPU之外,还支持多种异构硬件部署 - TensorRT : 2 - CPU(Arm) : 3 - XPU : 4 +- Ascend310(Arm) : 5 +- Ascend910(Arm) : 6 config.yml中硬件配置: ```YAML -#计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu +#计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910 device_type: 0 #计算硬件ID,优先由device_type决定硬件类型。devices为""或空缺时为CPU预测;当为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 devices: "" # "0,1" diff --git a/doc/Serving_Configure_EN.md b/doc/Serving_Configure_EN.md index 2c4be74a..acb4f99a 100644 --- a/doc/Serving_Configure_EN.md +++ b/doc/Serving_Configure_EN.md @@ -98,6 +98,7 @@ More flags: | `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 | | `use_calib` | bool | False | Use TRT int8 calibration | | `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS | +| `use_ascend_cl` | bool | False | Enable for ascend910; Use with use_lite for ascend310 | #### Serving model with multiple gpus. ```BASH @@ -258,6 +259,7 @@ engines { use_gpu: false combined_model: false gpu_multi_stream: false + use_ascend_cl: false runtime_thread_num: 0 batch_infer_size: 32 enable_overrun: false @@ -293,6 +295,7 @@ gpu_ids: 2 - use_gpu: Enbale GPU. - combined_model: Enable combined model. - gpu_multi_stream: Enable gpu multiple stream mode. +- use_ascend_cl: Enable Ascend, use individually for ascend910, use with lite for ascend310 - runtime_thread_num: Enable Async mode when num greater than 0 and creating predictors. - batch_infer_size: The max batch size of Async mode. - enable_overrun: Enable over running of Async mode which means putting the whole task into the task queue. @@ -380,7 +383,7 @@ op: #Fetch data list fetch_list: ["concat_1.tmp_0"] - # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910 device_type: 0 #Device ID @@ -418,7 +421,7 @@ op: #Fetch data list fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] - # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910 device_type: 0 #Device ID @@ -459,10 +462,12 @@ In addition to supporting CPU and GPU, Pipeline also supports the deployment of - TensorRT : 2 - CPU(Arm) : 3 - XPU : 4 +- Ascend310(Arm) : 5 +- Ascend910(Arm) : 6 Reference config.yaml: ```YAML -# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu +# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910 device_type: 0 devices: "" # "0,1" ``` diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index 05960b00..95205610 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -276,6 +276,7 @@ class PaddleInferenceEngine : public EngineCore { if (engine_conf.has_use_ascend_cl() && engine_conf.use_ascend_cl()) { if (engine_conf.has_use_lite() && engine_conf.use_lite()) { + // for ascend 310 FLAGS_nnadapter_device_names = "huawei_ascend_npu"; FLAGS_nnadapter_context_properties = "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" + @@ -294,7 +295,8 @@ class PaddleInferenceEngine : public EngineCore { << ",nnadapter_model_cache_dir=" << FLAGS_nnadapter_model_cache_dir; } else { - config.EnableNpu(gpu_id); + // for ascend 910 + config.EnableNpu(gpu_id); } } diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index e875e8d9..a637b408 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -227,6 +227,7 @@ class LocalPredictor(object): # set ascend cl if use_ascend_cl: if use_lite: + # for ascend 310 nnadapter_device_names = "huawei_ascend_npu" nnadapter_context_properties = \ "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id) @@ -237,6 +238,7 @@ class LocalPredictor(object): .set_context_properties(nnadapter_context_properties) \ .set_model_cache_dir(nnadapter_model_cache_dir) else: + # for ascend 910 config.enable_npu(gpu_id) # set cpu low precision if not use_gpu and not use_lite: -- GitLab