From 1d29f00f6d63f9135aefcad25b9c487eec34f1e9 Mon Sep 17 00:00:00 2001
From: ShiningZhang <zhang_liang1991@126.com>
Date: Thu, 2 Dec 2021 17:37:26 +0800
Subject: [PATCH] add comment for ascend 310/910

---
 doc/Serving_Configure_CN.md                     | 11 ++++++++---
 doc/Serving_Configure_EN.md                     | 11 ++++++++---
 paddle_inference/paddle/include/paddle_engine.h |  4 +++-
 python/paddle_serving_app/local_predict.py      |  2 ++
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/doc/Serving_Configure_CN.md b/doc/Serving_Configure_CN.md
index 84ea0cb5..e613293a 100644
--- a/doc/Serving_Configure_CN.md
+++ b/doc/Serving_Configure_CN.md
@@ -98,6 +98,7 @@ workdir_9393
 | `precision`                                    | str  | FP32    | Precision Mode, support FP32, FP16, INT8              |
 | `use_calib`                                    | bool | False   | Use TRT int8 calibration                              |
 | `gpu_multi_stream`                             | bool | False   | EnableGpuMultiStream to get larger QPS                |
+| `use_ascend_cl`                                | bool | False   | Enable for ascend910; Use with use_lite for ascend310 |
 
 #### 当您的某个模型想使用多张GPU卡部署时.
 ```BASH
@@ -249,6 +250,7 @@ engines {
   use_gpu: false
   combined_model: false
   gpu_multi_stream: false
+  use_ascend_cl: false
   runtime_thread_num: 0
   batch_infer_size: 32
   enable_overrun: false
@@ -286,6 +288,7 @@ gpu_ids: 2
 - use_gpu:是否使用GPU
 - combined_model: 是否使用组合模型文件
 - gpu_multi_stream: 是否开启gpu多流模式
+- use_ascend_cl: 是否使用昇腾,单独开启适配昇腾910，同时开启lite适配310
 - runtime_thread_num: 若大于0， 则启用Async异步模式，并创建对应数量的predictor实例。
 - batch_infer_size: Async异步模式下的最大batch数
 - enable_overrun: Async异步模式下总是将整个任务放入任务队列
@@ -357,7 +360,7 @@ op:
             #Fetch结果列表，以client_config中fetch_var的alias_name为准
             fetch_list: ["concat_1.tmp_0"]
 
-            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
             device_type: 0
 
             #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
@@ -395,7 +398,7 @@ op:
             #Fetch结果列表，以client_config中fetch_var的alias_name为准
             fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
 
-            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
             device_type: 0
 
             #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
@@ -434,10 +437,12 @@ Python Pipeline除了支持CPU、GPU之外，还支持多种异构硬件部署
 - TensorRT : 2
 - CPU(Arm) : 3
 - XPU : 4
+- Ascend310(Arm) : 5
+- Ascend910(Arm) : 6
 
 config.yml中硬件配置：
 ```YAML
-#计算硬件类型: 空缺时由devices决定(CPU/GPU)，0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+#计算硬件类型: 空缺时由devices决定(CPU/GPU)，0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
 device_type: 0
 #计算硬件ID，优先由device_type决定硬件类型。devices为""或空缺时为CPU预测；当为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
 devices: "" # "0,1"
diff --git a/doc/Serving_Configure_EN.md b/doc/Serving_Configure_EN.md
index 2c4be74a..acb4f99a 100644
--- a/doc/Serving_Configure_EN.md
+++ b/doc/Serving_Configure_EN.md
@@ -98,6 +98,7 @@ More flags:
 | `precision`                                    | str  | FP32    | Precision Mode, support FP32, FP16, INT8              |
 | `use_calib`                                    | bool | False   | Use TRT int8 calibration                              |
 | `gpu_multi_stream`                             | bool | False   | EnableGpuMultiStream to get larger QPS                |
+| `use_ascend_cl`                                | bool | False   | Enable for ascend910; Use with use_lite for ascend310 |
 
 #### Serving model with multiple gpus.
 ```BASH
@@ -258,6 +259,7 @@ engines {
   use_gpu: false
   combined_model: false
   gpu_multi_stream: false
+  use_ascend_cl: false
   runtime_thread_num: 0
   batch_infer_size: 32
   enable_overrun: false
@@ -293,6 +295,7 @@ gpu_ids: 2
 - use_gpu: Enbale GPU.
 - combined_model: Enable combined model.
 - gpu_multi_stream: Enable gpu multiple stream mode.
+- use_ascend_cl: Enable Ascend, use individually for ascend910, use with lite for ascend310
 - runtime_thread_num: Enable Async mode when num greater than 0 and creating predictors.
 - batch_infer_size: The max batch size of Async mode.
 - enable_overrun: Enable over running of Async mode which means putting the whole task into the task queue.
@@ -380,7 +383,7 @@ op:
             #Fetch data list
             fetch_list: ["concat_1.tmp_0"]
 
-            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
             device_type: 0
 
             #Device ID
@@ -418,7 +421,7 @@ op:
             #Fetch data list
             fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
 
-            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
             device_type: 0
             
             #Device ID
@@ -459,10 +462,12 @@ In addition to supporting CPU and GPU, Pipeline also supports the deployment of
 - TensorRT : 2
 - CPU(Arm) : 3
 - XPU : 4
+- Ascend310(Arm) : 5
+- Ascend910(Arm) : 6
 
 Reference config.yaml:
 ```YAML
-# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
 device_type: 0
 devices: "" # "0,1"
 ```
diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h
index 05960b00..95205610 100644
--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -276,6 +276,7 @@ class PaddleInferenceEngine : public EngineCore {
     if (engine_conf.has_use_ascend_cl() &&
         engine_conf.use_ascend_cl()) {
       if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
+        // for ascend 310 
         FLAGS_nnadapter_device_names = "huawei_ascend_npu";
         FLAGS_nnadapter_context_properties =
                 "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" +
@@ -294,7 +295,8 @@ class PaddleInferenceEngine : public EngineCore {
                   << ",nnadapter_model_cache_dir="
                   << FLAGS_nnadapter_model_cache_dir;
       } else {
-	config.EnableNpu(gpu_id);
+        // for ascend 910
+	      config.EnableNpu(gpu_id);
       }
     }
 
diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py
index e875e8d9..a637b408 100644
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
@@ -227,6 +227,7 @@ class LocalPredictor(object):
         # set ascend cl
         if use_ascend_cl:
             if use_lite:
+                # for ascend 310
                 nnadapter_device_names = "huawei_ascend_npu"
                 nnadapter_context_properties = \
                     "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id)
@@ -237,6 +238,7 @@ class LocalPredictor(object):
                 .set_context_properties(nnadapter_context_properties) \
                 .set_model_cache_dir(nnadapter_model_cache_dir)
             else:
+                # for ascend 910
                 config.enable_npu(gpu_id)
         # set cpu low precision
         if not use_gpu and not use_lite:
-- 
GitLab