提交 1d29f00f 编写于 作者: S ShiningZhang

add comment for ascend 310/910

上级 7fd87c76
......@@ -98,6 +98,7 @@ workdir_9393
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
| `use_ascend_cl` | bool | False | Enable for ascend910; Use with use_lite for ascend310 |
#### 当您的某个模型想使用多张GPU卡部署时.
```BASH
......@@ -249,6 +250,7 @@ engines {
use_gpu: false
combined_model: false
gpu_multi_stream: false
use_ascend_cl: false
runtime_thread_num: 0
batch_infer_size: 32
enable_overrun: false
......@@ -286,6 +288,7 @@ gpu_ids: 2
- use_gpu:是否使用GPU
- combined_model: 是否使用组合模型文件
- gpu_multi_stream: 是否开启gpu多流模式
- use_ascend_cl: 是否使用昇腾,单独开启适配昇腾910,同时开启lite适配310
- runtime_thread_num: 若大于0, 则启用Async异步模式,并创建对应数量的predictor实例。
- batch_infer_size: Async异步模式下的最大batch数
- enable_overrun: Async异步模式下总是将整个任务放入任务队列
......@@ -357,7 +360,7 @@ op:
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["concat_1.tmp_0"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
......@@ -395,7 +398,7 @@ op:
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
......@@ -434,10 +437,12 @@ Python Pipeline除了支持CPU、GPU之外,还支持多种异构硬件部署
- TensorRT : 2
- CPU(Arm) : 3
- XPU : 4
- Ascend310(Arm) : 5
- Ascend910(Arm) : 6
config.yml中硬件配置:
```YAML
#计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
#计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#计算硬件ID,优先由device_type决定硬件类型。devices为""或空缺时为CPU预测;当为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "" # "0,1"
......
......@@ -98,6 +98,7 @@ More flags:
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
| `use_ascend_cl` | bool | False | Enable for ascend910; Use with use_lite for ascend310 |
#### Serving model with multiple gpus.
```BASH
......@@ -258,6 +259,7 @@ engines {
use_gpu: false
combined_model: false
gpu_multi_stream: false
use_ascend_cl: false
runtime_thread_num: 0
batch_infer_size: 32
enable_overrun: false
......@@ -293,6 +295,7 @@ gpu_ids: 2
- use_gpu: Enbale GPU.
- combined_model: Enable combined model.
- gpu_multi_stream: Enable gpu multiple stream mode.
- use_ascend_cl: Enable Ascend, use individually for ascend910, use with lite for ascend310
- runtime_thread_num: Enable Async mode when num greater than 0 and creating predictors.
- batch_infer_size: The max batch size of Async mode.
- enable_overrun: Enable over running of Async mode which means putting the whole task into the task queue.
......@@ -380,7 +383,7 @@ op:
#Fetch data list
fetch_list: ["concat_1.tmp_0"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#Device ID
......@@ -418,7 +421,7 @@ op:
#Fetch data list
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#Device ID
......@@ -459,10 +462,12 @@ In addition to supporting CPU and GPU, Pipeline also supports the deployment of
- TensorRT : 2
- CPU(Arm) : 3
- XPU : 4
- Ascend310(Arm) : 5
- Ascend910(Arm) : 6
Reference config.yaml:
```YAML
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
devices: "" # "0,1"
```
......
......@@ -276,6 +276,7 @@ class PaddleInferenceEngine : public EngineCore {
if (engine_conf.has_use_ascend_cl() &&
engine_conf.use_ascend_cl()) {
if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
// for ascend 310
FLAGS_nnadapter_device_names = "huawei_ascend_npu";
FLAGS_nnadapter_context_properties =
"HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" +
......@@ -294,7 +295,8 @@ class PaddleInferenceEngine : public EngineCore {
<< ",nnadapter_model_cache_dir="
<< FLAGS_nnadapter_model_cache_dir;
} else {
config.EnableNpu(gpu_id);
// for ascend 910
config.EnableNpu(gpu_id);
}
}
......
......@@ -227,6 +227,7 @@ class LocalPredictor(object):
# set ascend cl
if use_ascend_cl:
if use_lite:
# for ascend 310
nnadapter_device_names = "huawei_ascend_npu"
nnadapter_context_properties = \
"HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id)
......@@ -237,6 +238,7 @@ class LocalPredictor(object):
.set_context_properties(nnadapter_context_properties) \
.set_model_cache_dir(nnadapter_model_cache_dir)
else:
# for ascend 910
config.enable_npu(gpu_id)
# set cpu low precision
if not use_gpu and not use_lite:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册