add comment for ascend 310/910

1d29f00f · ShiningZhang · 7fd87c76 · 1d29f00f · 1d29f00f · 1d29f00f
4 changed file
--- a/doc/Serving_Configure_CN.md
+++ b/doc/Serving_Configure_CN.md
@@ -98,6 +98,7 @@ workdir_9393
 | `precision`                                    | str  | FP32    | Precision Mode, support FP32, FP16, INT8              |
 | `use_calib`                                    | bool | False   | Use TRT int8 calibration                              |
 | `gpu_multi_stream`                             | bool | False   | EnableGpuMultiStream to get larger QPS                |
+| `use_ascend_cl`                                | bool | False   | Enable for ascend910; Use with use_lite for ascend310 |
 #### 当您的某个模型想使用多张GPU卡部署时.
 ```BASH
@@ -249,6 +250,7 @@ engines {
  use_gpu: false
  combined_model: false
  gpu_multi_stream: false
+  use_ascend_cl: false
  runtime_thread_num: 0
  batch_infer_size: 32
  enable_overrun: false
@@ -286,6 +288,7 @@ gpu_ids: 2
 - use_gpu:是否使用GPU
 - combined_model: 是否使用组合模型文件
 - gpu_multi_stream: 是否开启gpu多流模式
+- use_ascend_cl: 是否使用昇腾,单独开启适配昇腾910，同时开启lite适配310
 - runtime_thread_num: 若大于0， 则启用Async异步模式，并创建对应数量的predictor实例。
 - batch_infer_size: Async异步模式下的最大batch数
 - enable_overrun: Async异步模式下总是将整个任务放入任务队列
@@ -357,7 +360,7 @@ op:
            #Fetch结果列表，以client_config中fetch_var的alias_name为准
            fetch_list: ["concat_1.tmp_0"]
-            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
            device_type: 0
            #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
@@ -395,7 +398,7 @@ op:
            #Fetch结果列表，以client_config中fetch_var的alias_name为准
            fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
-            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
            device_type: 0
            #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
@@ -434,10 +437,12 @@ Python Pipeline除了支持CPU、GPU之外，还支持多种异构硬件部署
 - TensorRT : 2
 - CPU(Arm) : 3
 - XPU : 4
+- Ascend310(Arm) : 5
+- Ascend910(Arm) : 6
 config.yml中硬件配置：
 ```YAML
-#计算硬件类型: 空缺时由devices决定(CPU/GPU)，0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+#计算硬件类型: 空缺时由devices决定(CPU/GPU)，0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
 device_type: 0
 #计算硬件ID，优先由device_type决定硬件类型。devices为""或空缺时为CPU预测；当为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
 devices: "" # "0,1"

--- a/doc/Serving_Configure_EN.md
+++ b/doc/Serving_Configure_EN.md
@@ -98,6 +98,7 @@ More flags:
 | `precision`                                    | str  | FP32    | Precision Mode, support FP32, FP16, INT8              |
 | `use_calib`                                    | bool | False   | Use TRT int8 calibration                              |
 | `gpu_multi_stream`                             | bool | False   | EnableGpuMultiStream to get larger QPS                |
+| `use_ascend_cl`                                | bool | False   | Enable for ascend910; Use with use_lite for ascend310 |
 #### Serving model with multiple gpus.
 ```BASH
@@ -258,6 +259,7 @@ engines {
  use_gpu: false
  combined_model: false
  gpu_multi_stream: false
+  use_ascend_cl: false
  runtime_thread_num: 0
  batch_infer_size: 32
  enable_overrun: false
@@ -293,6 +295,7 @@ gpu_ids: 2
 - use_gpu: Enbale GPU.
 - combined_model: Enable combined model.
 - gpu_multi_stream: Enable gpu multiple stream mode.
+- use_ascend_cl: Enable Ascend, use individually for ascend910, use with lite for ascend310
 - runtime_thread_num: Enable Async mode when num greater than 0 and creating predictors.
 - batch_infer_size: The max batch size of Async mode.
 - enable_overrun: Enable over running of Async mode which means putting the whole task into the task queue.
@@ -380,7 +383,7 @@ op:
            #Fetch data list
            fetch_list: ["concat_1.tmp_0"]
-            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
            device_type: 0
            #Device ID
@@ -418,7 +421,7 @@ op:
            #Fetch data list
            fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
-            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
            device_type: 0
            #Device ID
@@ -459,10 +462,12 @@ In addition to supporting CPU and GPU, Pipeline also supports the deployment of
 - TensorRT : 2
 - CPU(Arm) : 3
 - XPU : 4
+- Ascend310(Arm) : 5
+- Ascend910(Arm) : 6
 Reference config.yaml:
 ```YAML
-# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
 device_type: 0
 devices: "" # "0,1"
 ```

--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -276,6 +276,7 @@ class PaddleInferenceEngine : public EngineCore {
    if (engine_conf.has_use_ascend_cl() &&
        engine_conf.use_ascend_cl()) {
      if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
+        // for ascend 310 
        FLAGS_nnadapter_device_names = "huawei_ascend_npu";
        FLAGS_nnadapter_context_properties =
                "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" +
@@ -294,7 +295,8 @@ class PaddleInferenceEngine : public EngineCore {
                  << ",nnadapter_model_cache_dir="
                  << FLAGS_nnadapter_model_cache_dir;
      } else {
-	config.EnableNpu(gpu_id);
+        // for ascend 910
+	      config.EnableNpu(gpu_id);
      }
    }

--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
@@ -227,6 +227,7 @@ class LocalPredictor(object):
        # set ascend cl
        if use_ascend_cl:
            if use_lite:
+                # for ascend 310
                nnadapter_device_names = "huawei_ascend_npu"
                nnadapter_context_properties = \
                    "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id)
@@ -237,6 +238,7 @@ class LocalPredictor(object):
                .set_context_properties(nnadapter_context_properties) \
                .set_model_cache_dir(nnadapter_model_cache_dir)
            else:
+                # for ascend 910
                config.enable_npu(gpu_id)
        # set cpu low precision
        if not use_gpu and not use_lite: