+ Check_Env_Shell().cmdloop()
+ File "/usr/local/lib/python3.7/cmd.py", line 138, in cmdloop
+ stop = self.onecmd(line)
+ File "/usr/local/lib/python3.7/cmd.py", line 217, in onecmd
+ return func(arg)
+ File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/serve.py", line 501, in do_check_all
+ check_env("all")
+ File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/env_check/run.py", line 94, in check_env
+ run_test_cases(pipeline_test_cases, "Pipeline", is_open_std)
+ File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/env_check/run.py", line 66, in run_test_cases
+ mv_log_to_new_dir(new_dir_path)
+ File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/env_check/run.py", line 48, in mv_log_to_new_dir
+ shutil.move(file_path, dir_path)
+ File "/usr/local/lib/python3.7/shutil.py", line 555, in move
+ raise Error("Destination path '%s' already exists" % real_dst)
+shutil.Error: Destination path '/home/work/Pipeline_test_cpu/PipelineServingLogs' already exists
+
+```
+
+
+
+## 服务启动与关闭
+
+服务启动需要三类文件,PYTHON 程序、模型文件和配置文件。以[Python Pipeline 快速部署案例](./3-2_QuickStart_Pipeline_OCR_CN.md)为例,
+```
+.
+├── config.yml
+├── imgs
+│ └── ggg.png
+├── ocr_det_client
+│ ├── serving_client_conf.prototxt
+│ └── serving_client_conf.stream.prototxt
+├── ocr_det_model
+│ ├── inference.pdiparams
+│ ├── inference.pdmodel
+│ ├── serving_server_conf.prototxt
+│ └── serving_server_conf.stream.prototxt
+├── ocr_det.tar.gz
+├── ocr_rec_client
+│ ├── serving_client_conf.prototxt
+│ └── serving_client_conf.stream.prototxt
+├── ocr_rec_model
+│ ├── inference.pdiparams
+│ ├── inference.pdmodel
+│ ├── serving_server_conf.prototxt
+│ └── serving_server_conf.stream.prototxt
+├── pipeline_http_client.py
+├── pipeline_rpc_client.py
+├── ppocr_keys_v1.txt
+└── web_service.py
+```
+
+启动服务端程序运行 `web_service.py`,启动客户端程序运行 `pipeline_http_client.py` 或 `pipeline_rpc_client.py`。服务端启动的日志信息在 `PipelineServingLogs` 目录下可用于调试。
+```
+├── PipelineServingLogs
+│ ├── pipeline.log
+│ ├── pipeline.log.wf
+│ └── pipeline.tracer
+```
+
+关闭程序可使用2种方式,
+- 前台关闭程序:`Ctrl+C` 关停服务
+- 后台关闭程序:
```python
-class JumpOp(Op):
- ## Overload func JumpOp::preprocess
- def preprocess(self, input_dicts, data_id, log_id):
- (_, input_dict), = input_dicts.items()
- if input_dict.has_key("jump"):
- return input_dict, True, None, ""
- else
- return input_dict, False, None, ""
+python3 -m paddle_serving_server.serve stop # 触发 SIGINT 信号
+python3 -m paddle_serving_server.serve kill # 触发 SIGKILL 信号,强制关闭
+```
+
+
+
+## 本地与远程推理
+
+本地推理是指在服务所在机器环境下开启多进程推理,而远程推理是指本地服务请求远程 C++ Serving 推理服务。
+
+本地推理的优势是实现简单,一般本地处理相比于远程推理耗时更低。而远程推理的优势是可实现 Python Pipeline 较难实现的功能,如部署加密模型,大模型推理。
+
+Python Pipeline 的本地推理可参考如下配置,在 `uci` op 中 增加 `local_service_conf` 配置,并设置 `client_type: local_predictor`。
+```
+op:
+ uci:
+ #并发数,is_thread_op=True时,为线程并发;否则为进程并发
+ concurrency: 10
+
+ #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
+ local_service_conf:
+
+ #uci模型路径
+ model_config: uci_housing_model
+
+ #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+ device_type: 0
+
+ #计算硬件ID,优先由device_type决定硬件类型。devices为""或空缺时为CPU预测;当为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
+ devices: "" # "0,1"
+
+ #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
+ client_type: local_predictor
+
+ #Fetch结果列表,以client_config中fetch_var的alias_name为准
+ fetch_list: ["price"]
+```
+
+Python Pipeline 的远程推理可参考如下配置,设置 `client_type: brpc`,`server_endpoints`,`timeout` 和本地 `client_config`。
+
+```
+op:
+ bow:
+ #并发数,is_thread_op=True时,为线程并发;否则为进程并发
+ concurrency: 1
+
+ #client连接类型,brpc
+ client_type: brpc
+
+ #Serving交互重试次数,默认不重试
+ retry: 1
+
+ #Serving交互超时时间, 单位ms
+ timeout: 3000
+
+ #Serving IPs
+ server_endpoints: ["127.0.0.1:9393"]
+
+ #bow模型client端配置
+ client_config: "imdb_bow_client_conf/serving_client_conf.prototxt"
+
+ #Fetch结果列表,以client_config中fetch_var的alias_name为准
+ fetch_list: ["prediction"]
```
+
-**二. 批量推理**
+## 批量推理
Pipeline 支持批量推理,通过增大 batch size 可以提高 GPU 利用率。Python Pipeline 支持3种 batch 形式以及适用的场景如下:
- 场景1:客户端打包批量数据(Client Batch)
- 场景2:服务端合并多个请求动态合并批量(Server auto-batching)
-- 场景3:服务端拆分一个批量数据推理请求成为多个小块推理(Server mini-batch)
+- 场景3:拆分一个大批量的推理请求为多个小批量推理请求(Server mini-batch)
+
-1. 客户端打包批量数据
+**一.客户端打包批量数据**
当输入数据是 numpy 类型,如shape 为[4, 3, 512, 512]的 numpy 数据,即4张图片,可直接作为输入数据。
当输入数据的 shape 不同时,需要按最大的shape的尺寸 Padding 对齐后发送给服务端
-2. 服务端合并多个请求动态合并批量
+
+
+**二.服务端合并多个请求动态合并批量**
+
有助于提升吞吐和计算资源的利用率,当多个请求的 shape 尺寸不相同时,不支持合并。当前有2种合并策略,分别是:
- 等待时间与最大批量结合(推荐):结合`batch_size`和`auto_batching_timeout`配合使用,实际请求的批量条数超过`batch_size`时会立即执行,不超过时会等待`auto_batching_timeout`时间再执行
@@ -119,9 +266,11 @@ op:
```
+
+**三.Mini-Batch**
-3.服务端拆分一个批量数据推理请求成为多个小块推理:会降低批量数据 Padding 对齐的大小,从而提升速度。可参考 [OCR 示例](),核心思路是拆分数据成多个小批量,放入 list 对象 feed_list 并返回
+拆分一个批量数据推理请求成为多个小块推理:会降低批量数据 Padding 对齐的大小,从而提升速度。可参考 [OCR 示例](),核心思路是拆分数据成多个小批量,放入 list 对象 feed_list 并返回
```
def preprocess(self, input_dicts, data_id, log_id):
@@ -181,8 +330,9 @@ def preprocess(self, input_dicts, data_id, log_id):
return feed_list, False, None, ""
```
+
-**三. 单机多卡推理**
+## 单机多卡推理
单机多卡推理与 `config.yml` 中配置4个参数关系紧密,`is_thread_op`、`concurrency`、`device_type` 和 `devices`,必须在进程模型和 GPU 模式,每张卡上可分配多个进程,即 M 个 Op 进程与 N 个 GPU 卡绑定。
```
@@ -218,8 +368,9 @@ op:
对于更灵活的进程与 GPU 卡绑定方式,会持续开发。
+
-**四. 多种计算芯片上推理**
+## 多种计算芯片上推理
除了支持 CPU、GPU 芯片推理之外,Python Pipeline 还支持在多种计算硬件上推理。根据 `config.yml` 中的 `device_type` 和 `devices`来设置推理硬件和加速库如下:
- CPU(Intel) : 0
@@ -232,27 +383,99 @@ op:
当不设置`device_type`时,根据 `devices` 来设置,即当 `device_type` 为 "" 或空缺时为 CPU 推理;当有设定如"0,1,2"时,为 GPU 推理,并指定 GPU 卡。
-以使用 GPU 的编号为0和1号卡并开启 TensorRT 为例,TensorRT 要配合 `ir_optim` 一同开启,`config.yml`详细配置如下:
+以使用 XPU 的编号为0卡为例,配合 `ir_optim` 一同开启,`config.yml`详细配置如下:
```
# 计算硬件类型
-device_type: 2
+device_type: 4
# 计算硬件ID,优先由device_type决定硬件类型
-devices: "0,1"
+devices: "0"
# 开启ir优化
ir_optim: True
```
-
-**五. 低精度推理**
-Pipeline Serving支持低精度推理,CPU、GPU和TensoRT支持的精度类型如下图所示:
+
+
+## TensorRT 推理加速
+TensorRT 是一个高性能的深度学习推理优化器,在 Nvdia 的 GPU 硬件平台运行的推理框架,为深度学习应用提供低延迟、高吞吐率的部署推理。
+通过设置`device_type`、`devices`和`ir_optim` 字段即可实现 TensorRT 高性能推理。必须同时设置 `ir_optim: True` 才能开启 TensorRT。
+
+```
+op:
+ imagenet:
+ #并发数,is_thread_op=True时,为线程并发;否则为进程并发
+ concurrency: 1
+
+ #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
+ local_service_conf:
+
+ #uci模型路径
+ model_config: serving_server/
+
+ #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+ device_type: 2
+
+ #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
+ devices: "1" # "0,1"
+
+ #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
+ client_type: local_predictor
+
+ #Fetch结果列表,以client_config中fetch_var的alias_name为准
+ fetch_list: ["score"]
+
+ #开启 ir_optim
+ ir_optim: True
+```
+
+
+## MKL-DNN 推理加速
+
+MKL-DNN 针对 Intel CPU 和 GPU 的数学核心库,对深度学习网络进行算子和指令集的性能优化,从而提升执行速度。Paddle 框架已集成了 MKL-DNN。
+
+目前仅支持 Intel CPU 推理加速,通过设置`device_type` 和 `devices` 和 `use_mkldnn` 字段使用 MKL-DNN。
+
+```
+op:
+ imagenet:
+ #并发数,is_thread_op=True时,为线程并发;否则为进程并发
+ concurrency: 1
+
+ #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
+ local_service_conf:
+
+ #uci模型路径
+ model_config: serving_server/
+
+ #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+ device_type: 0
+
+ #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
+ devices: ""
+
+ #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
+ client_type: local_predictor
+
+ #Fetch结果列表,以client_config中fetch_var的alias_name为准
+ fetch_list: ["score"]
+
+ #开启 MKLDNN
+ use_mkldnn: True
+```
+
+
+## 低精度推理
+
+Pipeline Serving支持低精度推理,CPU、GPU和TensoRT支持的精度类型如下图所示:
低精度推理需要有量化模型,配合`config.yml`配置一起使用,以[低精度示例]() 为例
-1. CPU 低精度推理配置
+
+
+**一.CPU 低精度推理**
通过设置,`device_type` 和 `devices` 字段使用 CPU 推理,通过调整`precision`、`thread_num`和`use_mkldnn`参数选择低精度和性能调优。
@@ -290,9 +513,11 @@ op:
use_mkldnn: True
```
-2. GPU + TensorRT 低精度推理
+
-通过设置,`device_type` 和 `devices` 字段使用原生 GPU 或 TensorRT 推理,通过调整`precision`、`ir_optim`和`use_calib`参数选择低精度和性能调优,如开启 TensorRT,必须一同开启`ir_optim`,`use_calib`仅配合 int8 使用。
+**二.GPU 和 TensorRT 低精度推理**
+
+通过设置`device_type` 和 `devices` 字段使用原生 GPU 或 TensorRT 推理,通过调整`precision`、`ir_optim`和`use_calib`参数选择低精度和性能调优,如开启 TensorRT,必须一同开启`ir_optim`,`use_calib`仅配合 int8 使用。
```
op:
imagenet:
@@ -327,8 +552,9 @@ op:
ir_optim: True
```
+
-3. 性能测试
+**三.性能测试**
测试环境如下:
- GPU 型号: A100-40GB
@@ -345,7 +571,6 @@ op:
- GPU + int8 + ir_optim + TensorRT + use_calib : 15.1 ms
- GPU + fp16 + ir_optim + TensorRT : 17.2 ms
-
CPU 推理性能较好的配置是
- CPU + bf16 + MKLDNN : 18.2 ms
- CPU + fp32 + thread_num=10 : 18.4 ms
@@ -354,3 +579,50 @@ CPU 推理性能较好的配置是
+
+## 复杂图结构 DAG 跳过某个 Op 运行
+
+此应用场景一般在 Op 前后处理中有 if 条件判断时,不满足条件时,跳过后面处理。实际做法是在跳过此 Op 的 process 阶段,只要在 preprocess 做好判断,跳过 process 阶段,在和 postprocess 后直接返回即可。
+preprocess 返回结果列表的第二个结果是 `is_skip_process=True` 表示是否跳过当前 Op 的 process 阶段,直接进入 postprocess 处理。
+
+```python
+## Op::preprocess() 函数实现
+def preprocess(self, input_dicts, data_id, log_id):
+ """
+ In preprocess stage, assembling data for process stage. users can
+ override this function for model feed features.
+ Args:
+ input_dicts: input data to be preprocessed
+ data_id: inner unique id
+ log_id: global unique id for RTT
+ Return:
+ input_dict: data for process stage
+ is_skip_process: skip process stage or not, False default
+ prod_errcode: None default, otherwise, product errores occured.
+ It is handled in the same way as exception.
+ prod_errinfo: "" default
+ """
+ # multiple previous Op
+ if len(input_dicts) != 1:
+ _LOGGER.critical(
+ self._log(
+ "Failed to run preprocess: this Op has multiple previous "
+ "inputs. Please override this func."))
+ os._exit(-1)
+ (_, input_dict), = input_dicts.items()
+ return input_dict, False, None, ""
+
+```
+以下示例 Jump::preprocess() 重载了原函数,返回了 True 字段
+```python
+class JumpOp(Op):
+ ## Overload func JumpOp::preprocess
+ def preprocess(self, input_dicts, data_id, log_id):
+ (_, input_dict), = input_dicts.items()
+ if input_dict.has_key("jump"):
+ return input_dict, True, None, ""
+ else
+ return input_dict, False, None, ""
+```
diff --git a/doc/Offical_Docs/7-3_Python_Pipeline_Optimize_CN.md b/doc/Offical_Docs/7-3_Python_Pipeline_Optimize_CN.md
index bc49f23cb099da9bbada7e394c0b495ee1e12b36..ef2380193413f564775649e9012a5d642cd044a4 100644
--- a/doc/Offical_Docs/7-3_Python_Pipeline_Optimize_CN.md
+++ b/doc/Offical_Docs/7-3_Python_Pipeline_Optimize_CN.md
@@ -1,24 +1,105 @@
# Python Pipeline 优化指南
+- [优化响应时长](#1)
+ - [1.1 分析响应时长](#1.1)
+ - [Pipeline Trace Tool](#1.1.1)
+ - [Pipeline Profile Tool](#1.1.2)
+ - [1.2 优化思路](#1.2)
+- [优化服务吞吐](#2)
+ - [2.1 分析吞吐瓶颈](#2.1)
+ - [2.2 优化思路](#2.2)
+ - [增加 Op 并发](#2.2.1)
+ - [动态批量](#2.2.2)
+ - [CPU 与 GPU 处理分离](#2.2.3)
-## 如何通过 Timeline 工具进行优化
-为了更好地对性能进行优化,Python Pipeline 提供了 Timeline 工具,对整个服务的各个阶段时间进行打点。
+通常,服务的性能优化是基于耗时分析,首先要掌握服务运行的各阶段耗时信息,从中找到耗时最长的性能瓶颈再做针对性优化。对于模型推理服务化不仅要关注耗时,由于 GPU 芯片昂贵,更要关注服务吞吐,从而提升 GPU 利用率实现降本增效。因此,模型推理服务化可总结为:
+- 优化响应时长
+- 优化服务吞吐
-## 在 Server 端输出 Profile 信息
+经过分析和调优后,各个阶段实现整体服务的性能最优。
-Server 端用 yaml 中的 `use_profile` 字段进行控制:
+
-```yaml
+## 优化响应时长
+
+首先,优化响应时长的主要思路首先要掌握各阶段耗时,并分析出性能瓶颈或者耗时占比较高的阶段,再针对性能瓶颈做专项优化。
+
+Paddle Serving 提供2种耗时分析工具,`Pipeline Trace Tool` 和 `Pipeline Profile Tool`。2个工具的特点如下:
+- Pipeline Trace Tool : 统计服务端所有进程各个阶段的平均耗时,包括每个 `Op` 和 `Channel`,用于定量分析。
+- Pipeline Profile Tool : 是可视化 Trace View 工具,生成多进程并发效果图,用定性和定量分析执行和并发效果。
+
+
+
+**一.耗时分析**
+
+
+
+1.Pipeline Trace Tool
+
+`Pipeline Trace Tool` 统计每个 `Op` 和 `Channel` 中各阶段的处理耗时,
+
+开启方法在配置文件 `config.yml` 的 `dag` 区段内添加 `tracer` 字段,框架会每隔 `interval_s` 时间生成 Trace 信息。
+```
dag:
- use_profile: true
+ #op资源类型, True, 为线程模型;False,为进程模型
+ is_thread_op: True
+
+ #tracer, 跟踪框架吞吐,每个OP和channel的工作情况。无tracer时不生成数据
+ tracer:
+ #每次trace的时间间隔,单位秒/s
+ interval_s: 10
+```
+
+生成的 Trace 信息保存在 `./PipelineServingLogs/pipeline.tracer` 日志中。如下图所示
```
+==================== TRACER ======================
+ Op(uci):
+ in[8473.507333333333 ms]: # 等待前置 Channel 中数据放入 Op 的耗时,如长时间无请求,此值会变大
+ prep[0.6753333333333333 ms] # 推理前处理 preprocess 阶段耗时
+ midp[26.476333333333333 ms] # 推理 process 阶段耗时
+ postp[1.8616666666666666 ms] # 推理后处理 postprocess 阶段耗时
+ out[1.3236666666666668 ms] # 后处理结果放入后置 channel 耗时
+ idle[0.9965882097324374] # 框架自循环耗时,间隔 1 ms,如此值很大说明系统负载高,调度变慢
+ DAGExecutor:
+ Query count[30] # interval_s 间隔时间内请求数量
+ QPS[27.35 q/s] # interval_s 间隔时间内服务 QPS
+ Succ[1.0] # interval_s 间隔时间内请求成功率
+ Error req[] # 异常请求信息
+ Latency:
+ ave[36.55233333333334 ms] # 平均延时
+ .50[8.702 ms] # 50分位延时
+ .60[8.702 ms] # 60分位延时
+ .70[92.346 ms] # 70分位延时
+ .80[92.346 ms] # 70分位延时
+ .90[92.346 ms] # 90分位延时
+ .95[92.346 ms] # 95分位延时
+ .99[92.346 ms] # 99分位延时
+ Channel (server worker num[1]):
+ chl0(In: ['@DAGExecutor'], Out: ['uci']) size[0/0] # 框架 RequestOp 与 uci Op 之间 Channel 中堆积请求数。此值较大,说明下游 uci Op 消费能力不足。
+ chl1(In: ['uci'], Out: ['@DAGExecutor']) size[0/0] # uci Op 与 框架 ResponseOp 之间 Channel 中堆积的请求数。此值较大,说明下游 ReponseOp 消费能力不足。
+ ==================== TRACER ======================
+```
+
+
+2.Pipeline Profile Tool
-开启该功能后,Server 端在预测的过程中会将对应的日志信息打印到标准输出,为了更直观地展现各阶段的耗时,提供 Analyst 模块对日志文件做进一步的分析处理。
+```
+dag:
+ #op资源类型, True, 为线程模型;False,为进程模型
+ is_thread_op: True
+
+ #使用性能分析, 默认为 False,imeline性能数据,对性能有一定影响
+ use_profile: True,
+```
-使用时先将 Server 的输出保存到文件,以 `profile.txt` 为例,脚本将日志中的时间打点信息转换成 json 格式保存到 `trace` 文件,`trace` 文件可以通过 chrome 浏览器的 tracing 功能进行可视化。
+开启后,Server 端在预测的过程中会将对应的日志信息打印到`标准输出`,为了更直观地展现各阶段的耗时,因此服务启动要使用如下命令:
+```
+python3.7 web_service.py > profile.txt 2>&1
+```
-```python
+服务接收请求后,输出 Profile 信息到 `profile.txt` 文件中。再粘贴如下代码到 `trace.py`, 使用框架提供 Analyst 模块对日志文件做进一步的分析处理。
+```
from paddle_serving_server.pipeline import Analyst
import json
import sys
@@ -30,50 +111,104 @@ if __name__ == "__main__":
analyst.save_trace(trace_filename)
```
-具体操作:打开 chrome 浏览器,在地址栏输入 `chrome://tracing/` ,跳转至 tracing 页面,点击 load 按钮,打开保存的 `trace` 文件,即可将预测服务的各阶段时间信息可视化。
+运行命令,脚本将日志中的时间打点信息转换成 json 格式保存到 `trace` 文件。
+```
+python3.7 trace.py
+```
+
+`trace` 文件可以通过 `chrome` 浏览器的 `tracing` 功能进行可视化。
+```
+打开 chrome 浏览器,在地址栏输入 chrome://tracing/ ,跳转至 tracing 页面,点击 load 按钮,打开保存的 trace 文件,即可将预测服务的各阶段时间信息可视化。
+```
+
+通过图示中并发请求的处理流程可观测到推理阶段的流水线状态,以及多个请求在推理阶段的`间隔`信息,进行优化。
+
+
+
+**二.降低响应时长优化思路**
+
+根据 `Pipeline Trace Tool` 输出结果在不同阶段耗时长的问题,常见场景的优化方法如下:
+- Op 推理阶段(midp) 耗时长:
+ - 增加 Op 并发度
+ - 开启 auto-batching (前提是多个请求的 shape 一致)
+ - 若批量数据中某条数据的 shape 很大,padding 很大导致推理很慢,可参考 OCR 示例中 mini-batch 方法。
+ - 开启 TensorRT/MKL-DNN 优化
+ - 开启低精度推理
+- Op 前处理阶段(prep) 或 后处理阶段耗时长:
+ - 增加 OP 并发度
+ - 优化前后处理逻辑
+- in/out 耗时长(channel 堆积>5)
+ - 检查 channel 传递的数据大小,可能为传输的数据大导致延迟大。
+ - 优化传入数据,不传递数据或压缩后再传入
+ - 增加 Op 并发度
+ - 减少上游 Op 并发度
+
+根据 `Pipeline Profile Tool` 输出结果优化流水行并发的效果
+- 增加 Op 并发度,或调整不同 Op 的并发度
+- 开启 auto-batching
-## 在 Client 端输出 Profile 信息
+此外,还有一些优化思路,如将 CPU 处理较慢的过程转换到 GPU 上处理等,客户端与服务端传输较大数据时,可使用共享内存方式传递内存或显存地址等。
-Client 端在 `predict` 接口设置 `profile=True`,即可开启 Profile 功能。
+
-开启该功能后,Client 端在预测的过程中会将该次预测对应的日志信息打印到标准输出,后续分析处理同 Server。
+## 优化服务吞吐
-## 分析方法
-根据 `pipeline.tracer` 日志中的各个阶段耗时,按以下公式逐步分析出主要耗时在哪个阶段。
+
+
+**一.分析吞吐瓶颈**
+
+服务的吞吐量受到多种多因素条件制约,如 Op 处理时长、传输数据耗时、并发数和 DAG 图结构等,可以将这些因素进一步拆解,当传输数据不是极端庞大的时候,最重要因素是流水线中`最慢 Op 的处理时长和并发数`。
```
-单 OP 耗时:
+Op 处理时长:
op_cost = process(pre + mid + post)
-OP 期望并发数:
-op_concurrency = 单OP耗时(s) * 期望QPS
-
服务吞吐量:
-service_throughput = 1 / 最慢OP的耗时 * 并发数
+service_throughput = 1 / 最慢 op_cost * 并发数
服务平响:
service_avg_cost = ∑op_concurrency 【关键路径】
-Channel 堆积:
-channel_acc_size = QPS(down - up) * time
-
批量预测平均耗时:
avg_batch_cost = (N * pre + mid + post) / N
```
+
-## 优化思路
+**二.优化思路**
-根据长耗时在不同阶段,采用不同的优化方法.
-- OP 推理阶段(mid-process):
- - 增加 OP 并发度
- - 开启 auto-batching (前提是多个请求的 shape 一致)
- - 若批量数据中某条数据的 shape 很大,padding 很大导致推理很慢,可使用 mini-batch
- - 开启 TensorRT/MKL-DNN 优化
- - 开启低精度推理
-- OP 前处理阶段(pre-process):
- - 增加 OP 并发度
- - 优化前处理逻辑
-- in/out 耗时长(channel 堆积>5)
- - 检查 channel 传递的数据大小和延迟
- - 优化传入数据,不传递数据或压缩后再传入
- - 增加 OP 并发度
- - 减少上游 OP 并发度
+优化吞吐的主要方法是 `增大 Op 并发数`、`自动批量` 和 `CPU 与 GPU 处理分离`
+
+
+
+1.增加 Op 并发**
+
+调整 Op 的并发数量通过设置 `is_thread_op: False` 进程类型 Op 和 `uci` Op 的 `concurrency` 字段
+```
+dag:
+ #op资源类型, True, 为线程模型;False,为进程模型
+ is_thread_op: False
+op:
+ uci:
+ #并发数,is_thread_op=True时,为线程并发;否则为进程并发
+ concurrency: 10
+```
+Op 的进程数量不是越大越好,受到机器 CPU 核数、内存和显存大小的限制,推荐设置 Op 的并发数不超过系统 CPU 核数。
+
+
+
+2.动态批量
+
+动态批量是增加吞吐的有一种方法,开启方式可参考[Python Pipeline 核心功能](./7-2_Python_Pipeline_Senior_CN.md#批量推理)
+
+
+
+3.CPU 与 GPU 处理分离
+
+在 `CV` 模型中,对图片或视频的前后处理成为主要瓶颈时,可考虑此方案,即将前后处理过程独立成一个 Op 并独立设置并发度。
+
+将 CPU 前后处理和 GPU 推理过程比例调整到服务最佳配比。以 OCR 为例,原有流水线设计为 `RequestOp -> DetOp -> RecOp -> ResponseOp`。
+
+根据耗时分析,`DetOp` 和 `RecOp` 的前处理耗时很长,因此,将2个模型前处理分离成独立 Op,最新的流水线设计为:
+
+`RequestOp -> PreDetOp -> DetOp -> PreRecOp -> RecOp -> ResponseOp`,并调大 `PreDetOp` 和 `PreRecOp`的并发度,从而获得 20% 的性能提升。
+
+由于增加了2次数据传递,单条请求的处理延时会增加。
diff --git a/doc/Offical_Docs/7-4_Python_Pipeline_Benchmark_CN.md b/doc/Offical_Docs/7-4_Python_Pipeline_Benchmark_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..9d29abf823fd24b98176df093bbb0258456f200f
--- /dev/null
+++ b/doc/Offical_Docs/7-4_Python_Pipeline_Benchmark_CN.md
@@ -0,0 +1,59 @@
+# Python Pipeline 性能测试
+
+- [测试环境](#1)
+- [性能指标与结论](#2)
+
+
+
+## 测试环境
+
+测试环境如下表所示:
+| | GPU | 显存 | CPU | 内存 |
+|----------|---------|----------|----------------------------------------------|------|
+| Serving端 | 4x Tesla P4-8GB | 7611MiB | Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz 48核 | 216G |
+| Client端 | 4x Tesla P4-8GB | 7611MiB | Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz 48核 | 216G |
+
+使用单卡GPU,未开启TensorRT。
+模型:ResNet_v2_50
+
+
+
+## 性能指标与结论
+
+通过测试,使用 Python Pipeline 模式通过多进程并发,充分利用 GPU 显卡,具有较好的吞吐性能。
+
+
+测试数据如下:
+
+|model_name |thread_num |batch_size |CPU_util(%) |GPU_memory(mb) |GPU_util(%) |qps(samples/s) |total count |mean(ms) |median(ms) |80 percent(ms) |90 percent(ms) |99 percent(ms) |total cost(s) |each cost(s)|
+|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--
+|ResNet_v2_50 |1 |1 |2.2 |3327 |17.25 |17.633658869240787 |355 |56.428481238996476 |38.646728515625 |39.496826171875 |39.98369140625 |1273.1911083984373 |20.131953477859497 |20.033540725708008|
+|ResNet_v2_50 |1 |4 |2.7 |3617 |28.122 |53.50748430453522 |268 |74.71539215543378 |74.6181640625 |75.3138671875 |75.6051025390625 |77.85322998046874 |20.03458046913147 |20.024930953979492|
+|ResNet_v2_50 |1 |8 |1.7 |3877 |25.7869 |59.60582783086999 |150 |133.5897119140625 |132.7503662109375 |134.968310546875 |136.470703125 |140.79039062499996 |20.132259607315063 |20.03933620452881|
+|ResNet_v2_50 |1 |16 |7.0 |4777 |27.0175 |63.2627646819339 |80 |252.30162048339844 |251.8448486328125 |253.046630859375 |253.91142578125 |263.361640625 |20.233070850372314 |20.18476152420044|
+|ResNet_v2_50 |1 |32 |7.5 |6567 |38.532 |62.945314687348024 |40 |506.8969482421875 |507.3531494140625 |510.562353515625 |511.421240234375 |536.8068920898437 |20.335111618041992 |20.276386737823486|
+|ResNet_v2_50 |2 |1 |4.7 |6567 |49.4828 |50.40600094376044 |1010 |39.63352195815285 |39.5345458984375 |40.452880859375 |41.1375 |42.940522460937494 |20.037296772003174 |20.01696753501892|
+|ResNet_v2_50 |2 |4 |2.7 |6567 |44.4744 |83.4255836891382 |420 |95.38548002697172 |95.7069091796875 |97.599951171875 |98.098583984375 |102.39680908203125 |20.137707471847534 |20.03199553489685|
+|ResNet_v2_50 |2 |8 |2.2 |6567 |42.898 |91.3727510505176 |230 |174.89108568274457 |175.0452880859375 |175.82001953125 |176.7634033203125 |178.64064453125002 |20.13729453086853 |20.1132071018219|
+|ResNet_v2_50 |2 |16 |2.2 |6567 |45 |97.5591285698611 |124 |327.16720088835683 |328.6126708984375 |329.75185546875 |330.386962890625 |336.86397460937496 |20.336385011672974 |20.284939169883728|
+|ResNet_v2_50 |2 |32 |3.2 |6567 |59.5714 |100.70765418116333 |64 |633.9812698364258 |637.8568115234375 |648.103515625 |650.7439697265625 |659.2212915039062 |20.336090803146362 |20.28787398338318|
+|ResNet_v2_50 |4 |1 |3.1 |6567 |64.3333 |80.27845081929433 |1617 |49.56464230756223 |49.4873046875 |51.5537109375 |52.693408203125 |55.207568359374996 |20.142391681671143 |20.038144528865814|
+|ResNet_v2_50 |4 |4 |3.3 |6567 |70.4563 |136.62061939701394 |688 |116.51574919944586 |121.8629150390625 |129.8181640625 |133.384423828125 |142.69500732421875 |20.143372297286987 |20.041599333286285|
+|ResNet_v2_50 |4 |8 |3.0 |6567 |70.896 |158.46554975132275 |399 |201.30669079926378 |210.69775390625 |228.51748046875 |236.427294921875 |252.24822753906233 |20.143179416656494 |20.081032752990723|
+|ResNet_v2_50 |4 |16 |3.2 |6567 |66.3832 |156.4935247130092 |197 |407.6668608224937 |423.974609375 |450.368212890625 |464.45986328125 |482.93658203125 |20.141408443450928 |20.078101694583893|
+|ResNet_v2_50 |4 |32 |3.3 |6567 |72.4791 |162.01742190796557 |104 |785.5079204852765 |813.0341796875 |887.107958984375 |909.6556640625 |935.3334838867188 |20.541000843048096 |20.423666059970856|
+|ResNet_v2_50 |8 |1 |3.5 |6567 |93.977 |115.9749228558386 |2337 |68.5580409078145 |65.45849609375 |76.13930664062501 |83.542041015625 |91.45666015624998 |20.15090799331665 |20.028797417879105|
+|ResNet_v2_50 |8 |4 |4.2 |6567 |90.0952 |175.58748591910316 |889 |180.7330482920592 |170.5810546875 |218.99931640625 |240.06337890625002 |254.413759765625 |20.252012729644775 |20.084695398807526|
+|ResNet_v2_50 |8 |8 |2.6 |6567 |93.8693 |206.76595246418208 |526 |306.52158695119414 |303.043212890625 |321.0791015625 |350.5477294921875 |400.32452392578125 |20.351513147354126 |20.15437400341034|
+|ResNet_v2_50 |8 |16 |3.2 |6567 |85.7273 |205.31850043117367 |265 |614.1745522553066 |552.372314453125 |775.89169921875 |802.022607421875 |902.2763183593761 |20.650842428207397 |20.345011442899704|
+|ResNet_v2_50 |8 |32 |5.0 |6567 |89.8717 |219.8410273718835 |146 |1138.4533474020761 |1039.640869140625 |1364.289794921875 |1474.6744384765625 |1788.2614379882834 |21.251720190048218 |20.777225106954575|
+|ResNet_v2_50 |12 |1 |5.0 |6567 |89.4762 |110.00858327847862 |2218 |108.50048552943953 |103.015625 |121.09404296875003 |137.1392333984375 |151.80401123046872 |20.162063121795654 |20.055511037508648|
+|ResNet_v2_50 |12 |4 |4.1 |6567 |77.7619 |153.7824464757549 |779 |309.68895575507463 |285.585205078125 |378.07421875 |413.481640625 |424.70853515625 |20.262390613555908 |20.104551911354065|
+|ResNet_v2_50 |12 |8 |3.6 |6567 |72.6977 |165.36021780846013 |425 |571.1991590073529 |510.995849609375 |731.9383300781251 |747.6568359375 |757.304716796875 |20.56117272377014 |20.230452219645183|
+|ResNet_v2_50 |12 |16 |1.5 |6567 |76.2222 |189.6414991568285 |252 |987.7153136238219 |926.00390625 |1080.99130859375 |1249.4956298828126 |1434.4802392578124 |21.26116919517517 |20.74245794614156|
+|ResNet_v2_50 |12 |32 |2.8 |6567 |84.25 |203.868228281784 |138 |1811.640237559443 |1764.2760009765625 |1855.28046875 |2023.56826171875 |2586.8038134765625 |21.66105055809021 |20.834286351998646|
+|ResNet_v2_50 |16 |1 |4.8 |6567 |94.3333 |116.34927733312234 |2347 |136.7957122373642 |135.959716796875 |144.1568359375 |146.105517578125 |175.05707519531248 |20.172020435333252 |20.067057371139526|
+|ResNet_v2_50 |16 |4 |15.4 |6567 |83.6364 |160.59012047270738 |822 |393.3079394412447 |396.446533203125 |426.272216796875 |429.777734375 |564.1119360351562 |20.47448492050171 |20.206754431128502|
+|ResNet_v2_50 |16 |8 |6.8 |6567 |81.0233 |169.95774070621547 |437 |741.5512622684854 |751.521484375 |763.199169921875 |948.8041992187501 |1001.156142578125 |20.56981921195984 |20.254074171185493|
+|ResNet_v2_50 |16 |16 |3.5 |6567 |77.8706 |186.56600081516 |248 |1332.1007946383568 |1365.2745361328125 |1399.212255859375 |1432.4037353515625 |1771.4374853515626 |21.26861262321472 |20.64799252152443|
+|ResNet_v2_50 |16 |32 |4.3 |6567 |83.6371 |201.1293408638195 |140 |2419.3400198800223 |2561.09228515625 |2616.081103515625 |2642.0835205078124 |2883.8197412109366 |22.274224042892456 |21.169659316539764|
diff --git a/doc/Offical_Docs/8-0_Cube_CN.md b/doc/Offical_Docs/8-0_Cube_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..88603bad90aaea8d155065086da06b0592c14623
--- /dev/null
+++ b/doc/Offical_Docs/8-0_Cube_CN.md
@@ -0,0 +1,27 @@
+# 稀疏参数索引服务 Cube
+
+在稀疏参数索引场景,如推荐、广告系统中通常会使用大规模 Embedding 表。由于在工业级的场景中,稀疏参数的规模非常大,达到 10^9 数量级。因此在一台机器上启动大规模稀疏参数预测是不实际的,因此我们引入百度多年来在稀疏参数索引领域的工业级产品 Cube,提供分布式的稀疏参数服务。
+
+## Cube 工作原理
+
+本章节介绍了 Cube 的基本使用方法和工作原理。请参考[Cube 架构]()
+
+## Cube 编译安装
+
+本章节介绍了 Cube 各个组件的编译以及安装方法。请参考[Cube 编译安装]()
+
+## Cube 基础功能
+
+本章节介绍了 Cube 的基础功能及使用方法。请参考[Cube 基础功能]()
+
+## Cube 进阶功能
+
+本章节介绍了 Cube 的高级功能使用方法。请参考[Cube 进阶功能]()
+
+## 在 K8S 上使用 Cube
+
+本章节介绍了在 K8S 平台上使用 Cube 的方法。请参考[在 K8S 上使用 Cube]()
+
+## Cube 部署示例
+
+本章节介绍了 Cube 的一个部署示例。请参考[Cube 部署示例]()
\ No newline at end of file
diff --git a/doc/Offical_Docs/8-1_Cube_Architecture_CN.md b/doc/Offical_Docs/8-1_Cube_Architecture_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..1f98e0bcb337c374812bef83c0daf44816e90342
--- /dev/null
+++ b/doc/Offical_Docs/8-1_Cube_Architecture_CN.md
@@ -0,0 +1,38 @@
+# 稀疏参数索引服务 Cube
+
+在稀疏参数索引场景,如推荐、广告系统中通常会使用大规模 Embedding 表。由于在工业级的场景中,稀疏参数的规模非常大,达到 10^9 数量级。因此在一台机器上启动大规模稀疏参数预测是不实际的,因此我们引入百度多年来在稀疏参数索引领域的工业级产品 Cube,用于部署大规模的稀疏参数模型,支持模型的分布式管理和快速更新,并且支持 Paddle Serving 进行低延迟的批量访问。
+
+
+
+## Cube 组件介绍
+
+**一. cube-builder**
+
+cube-builder 是把模型生成分片文件和版本管理的工具。由于稀疏参数文件往往是一个大文件,需要使用哈希函数将其分割为不同的分片,并使用分布式当中的每一个节点去加载不同的分片。与此同时,工业级的场景需要支持定期模型的配送和流式训练,因此对于模型的版本管理十分重要,这也是在训练保存模型时缺失的部分,因此 cube-builder 在生成分片的同时,也可以人为指定增加版本信息。
+
+**二. cube-transfer**
+
+cube-transfer 是调度管理服务。一方面 cube-transfer 会监测上游模型,当模型更新时进行模型下载。另一方面,会调用 cube-builder 将下载好的模型进行分片。而后与 cube-agent 进行对接完成分片文件配送。
+
+**三. cube-agent**
+
+cube-agent 是与cube-transfer 配套使用的调度管理服务。cube-agent 会接收来自 cube-transfer 传输来的分片文件。而后发送信号给 cube-server 对应接口完成配送操作。
+
+**四. cube-server**
+
+cube-server 基于 Cube 的 KV 能力,对外提供稀疏参数服务。它通过 brpc 提供高性能分布式查询服务,并支持 RestAPI 来进行远端调用。
+
+**五. cube-cli**
+
+cube-cli 是 cube-server 的客户端,用于请求 cube-server 进行对应稀疏参数查询功能。这部分组件已经被整合到 paddle serving 当中,当我们准备好 cube.conf 配置文件并在 server 的代码中指定kv_infer 相关的 op 时,cube-cli 就会在 server 端准备就绪。
+
+## 配送过程
+
+一次完整的配送流程如下:
+
+- 将训练好的模型存放到 FileServer 中,并在传输完成后生成完成标志,这里 FileServer 可以使用 http 协议的文件传输服务;
+- cube-transfer 监测到完成标志后,从 FileServer 中下载对应模型文件;
+- cube-transfer 使用 cube-builder 工具对稀疏参数模型进行分片;
+- cube-transfer 向 cube-agent 进行文件配送;
+- cube-agent 向 cube-server 发送加载命令,通知 cube-server 热加载新的参数文件;
+- cube-server 响应 Paddle Serving 发送的查询请求。
\ No newline at end of file
diff --git a/doc/Offical_Docs/8-2_Cube_Compile_CN.md b/doc/Offical_Docs/8-2_Cube_Compile_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..97d5925ed80354f9d45918ad449ad7c3622231f1
--- /dev/null
+++ b/doc/Offical_Docs/8-2_Cube_Compile_CN.md
@@ -0,0 +1,69 @@
+# Cube 编译
+
+## 编译依赖
+
+**以下是主要组件及其编译依赖**
+
+| 组件 | 说明 |
+| :--------------------------: | :-------------------------------: |
+| Cube-Server | C++程序,提供高效快速的 RPC 协议 |
+| Cube-Agent | Go 程序,需要 Go 环境支持 |
+| Cube-Transfer | Go 程序,需要 Go 环境支持 |
+| Cube-Builder | C++程序 |
+| Cube-Cli | C++组件,已集成进 C++ server 中,不需单独编译 |
+
+## 编译方法
+
+推荐使用 Docker 编译,我们已经为您准备好了编译环境并配置好了上述编译依赖,详见[镜像环境]()。
+
+**一. 设置 PYTHON 环境变量**
+
+请按照如下,确定好需要编译的 Python 版本,设置对应的环境变量,一共需要设置三个环境变量,分别是 `PYTHON_INCLUDE_DIR`, `PYTHON_LIBRARIES`, `PYTHON_EXECUTABLE`。以下我们以 python 3.7为例,介绍如何设置这三个环境变量。
+
+```
+# 请自行修改至自身路径
+export PYTHON_INCLUDE_DIR=/usr/local/include/python3.7m/
+export PYTHON_LIBRARIES=/usr/local/lib/x86_64-linux-gnu/libpython3.7m.so
+export PYTHON_EXECUTABLE=/usr/local/bin/python3.7
+
+export GOPATH=$HOME/go
+export PATH=$PATH:$GOPATH/bin
+
+python3.7 -m pip install -r python/requirements.txt
+
+go env -w GO111MODULE=on
+go env -w GOPROXY=https://goproxy.cn,direct
+go install github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
+go install github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
+go install github.com/golang/protobuf/protoc-gen-go@v1.4.3
+go install google.golang.org/grpc@v1.33.0
+go env -w GO111MODULE=auto
+```
+
+环境变量的含义如下表所示。
+
+| cmake 环境变量 | 含义 | 注意事项 | Docker 环境是否需要 |
+|-----------------------|-------------------------------------|-------------------------------|--------------------|
+| PYTHON_INCLUDE_DIR | Python.h 所在的目录,通常为 **/include/python3.7/Python.h | 如果没有找到。说明 1)没有安装开发版本的 Python,需重新安装 2)权限不足无法查看相关系统目录。 | 是(/usr/local/include/python3.7) |
+| PYTHON_LIBRARIES | libpython3.7.so 或 libpython3.7m.so 所在目录,通常为 /usr/local/lib | 如果没有找到。说明 1)没有安装开发版本的 Python,需重新安装 2)权限不足无法查看相关系统目录。 | 是(/usr/local/lib/x86_64-linux-gnu/libpython3.7m.so) |
+| PYTHON_EXECUTABLE | python3.7 所在目录,通常为 /usr/local/bin | | 是(/usr/local/bin/python3.7) |
+
+**二. 编译**
+
+```
+mkdir build_cube
+cd build_cube
+cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
+ -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \
+ -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
+ -DSERVER=ON \
+ -DWITH_GPU=OFF ..
+make -j
+cd ..
+```
+
+最终我们会在`build_cube/core/cube`目录下看到 Cube 组件已经编译完成,其中:
+- Cube-Server:build_cube/core/cube/cube-server/cube
+- Cube-Agent:build_cube/core/cube/cube-agent/src/cube-agent
+- Cube-Transfer:build_cube/core/cube/cube-transfer/src/cube-transfer
+- Cube-Builder:build_cube/core/cube/cube-builder/cube-builder
\ No newline at end of file
diff --git a/doc/Offical_Docs/9-0_Kubernetes_Int_CN.md b/doc/Offical_Docs/9-0_Kubernetes_Int_CN.md
index 16559a58f18c64a2867c072dd892c50f49ddd245..cd06680961ae7272524bbf6852f30b5b45e6f401 100644
--- a/doc/Offical_Docs/9-0_Kubernetes_Int_CN.md
+++ b/doc/Offical_Docs/9-0_Kubernetes_Int_CN.md
@@ -3,6 +3,5 @@ Kubernetes 集群部署
服务部署经历从物理机、虚拟机、容器化、云原生4个阶段。云原生,提供集装箱组合模式的乐高生态,Docker、Kubernetes 已称为云原生时代基础设施,推动应用程序大发展。Kubernetes 的可扩展性和分布式架构一直是人工智能和机器学习的绝佳选择,随着解决方案不断成熟,推动机器学习大规模工程落地。
本章节介绍 Kubernetes 上集群化部署 Paddle Serving 方案以及企业级安全网关部署案例。
-- [Kubernetes 集群部署方案]()
+- [Kubernetes 集群部署方案](./9-1_Kubernetes_CN.md)
- [Kubernetes 安全网关部署案例]()
-
diff --git a/doc/Offical_Docs/Home_Page_CN.md b/doc/Offical_Docs/Home_Page_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..183e060b326a5d9faffe391b5aa8239b3e07c4b0
--- /dev/null
+++ b/doc/Offical_Docs/Home_Page_CN.md
@@ -0,0 +1,88 @@
+# Paddle Serving - 端到端服务化推理框架
+
+## 1.Paddle Serving 介绍
+面向模型服务化部署场景的端到端服务化推理框架 Paddle Serving,可以实现飞桨模型在 X86、ARM 平台多种硬件上高性能服务化部署,支持5种以上的 GPU、NPU 硬件推理加速;此外,Paddle Serving 提供 Docker 和 Kubernetes 的云端部署方案。
+
+## 2.快速上手-代码模块
+
+进入到 Serving 的 git 目录下,进入到 [fit_a_line](https://github.com/PaddlePaddle/Serving/tree/v0.8.3/examples/C%2B%2B/fit_a_line) 示例
+```
+## 下载模型
+sh get_data.sh
+
+## 启动服务
+python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+
+## HTTP curl
+curl -XPOST http://0.0.0.0:9393/GeneralModelService/inference -d ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}'
+```
+
+## 3.部署流程图
+
+开发流程:①准备部署环境;②准备部署模型;③Serving程序开发;④服务启动与优化
+
+**①准备部署环境**
+docker 是一个开源的应用容器引擎,可以让应用程序更加方便地被打包和移植。Paddle Serving 容器化部署建议在 docker 中进行Serving服务化部署。在 Serving Docker 环境中安装 PYTHON Wheel 包
+
+**②准备部署模型**
+
+下载推理模型后,为了便于模型服务化部署,需要将推理模型保存成用于 Serving 部署的参数形式
+
+**③Serving程序开发**
+
+修改服务端和客户端代码适配模型的前后处理,通过修改配置或命令行参数,如端口、指定硬件和并发数量等指定部署参数。
+
+**④服务启动与优化**
+
+命令方式启动服务端和客户端,根据输出结果和性能指标做进一步的性能优化。
+
+## 4.Demo 展示区
+
+参考 [模型库](./4-0_ModelZoo_CN.md)
+
+## 5.核心优势
+
+Paddle Serving 具备工业级功能、高性能等优势。
+
+**一.工业级**
+
+- 支持 HTTP、gRPC、bRPC 等多种协议;提供 C++、Python、Java 语言 SDK
+- 设计并实现基于有向无环图(DAG)的异步流水线高性能推理框架,具有多模型组合、异步调度、并发推理、动态批量、多卡多流推理、请求缓存等特性
+- 适配 x86(Intel) CPU、ARM CPU、Nvidia GPU、昆仑 XPU、华为昇腾310/910、海光 DCU、Nvidia Jetson 等多种硬件
+- 集成 Intel MKLDNN、Nvidia TensorRT 加速库,以及低精度和量化推理
+- 提供一套模型安全部署解决方案,包括加密模型部署、鉴权校验、HTTPs 安全网关,并在实际项目中应用
+- 支持云端部署,提供百度云智能云 kubernetes 集群部署 Paddle Serving 案例
+- 提供丰富的经典模型部署示例,如 PaddleOCR、PaddleClas、PaddleDetection、PaddleSeg、PaddleNLP 和 PaddleRec等套件,共计40多个预训练精品模型
+
+**二.高性能**
+
+# 1. 测试环境和说明
+1) GPU型号:Tesla P4(7611 Mib)
+2) Cuda版本:11.0
+3) 模型:ResNet_v2_50
+4) 为了测试异步合并batch的效果,测试数据中batch=1
+5) [使用的测试代码和使用的数据集](../../examples/C++/PaddleClas/resnet_v2_50)
+6) 下图中蓝色是C++ Serving,灰色为TF-Serving。
+7) 折线图为QPS,数值越大表示每秒钟处理的请求数量越大,性能就越好。
+8) 柱状图为平均处理时延,数值越大表示单个请求处理时间越长,性能就越差。
+
+同步模型默认参数配置情况下,C++ Serving QPS 和平均时延指标均优于 TF-Serving。
+
+
+
+
+
+
+异步模式情况下,两者性能接近,但当 Client 并发数达到70的时候,TF-Serving 服务直接超时,而 C++ Serving 能够正常返回结果。
+
+
+
+
+
+
+
+## 6.合作案例
+
+## 7.资源汇总
+
+## 8.开发者贡献&社区
diff --git a/doc/Offical_Docs/images/6-1_Cpp_Asynchronous_Framwork_CN_1.png b/doc/Offical_Docs/images/6-1_Cpp_Asynchronous_Framwork_CN_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..88c36f7688719d7081598e5177cd64042ab5c9de
Binary files /dev/null and b/doc/Offical_Docs/images/6-1_Cpp_Asynchronous_Framwork_CN_1.png differ
diff --git a/doc/Offical_Docs/images/6-5_Cpp_ABTest_CN_1.png b/doc/Offical_Docs/images/6-5_Cpp_ABTest_CN_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..5e8f8980dffb46f4960390e6edb281968ae8bd83
Binary files /dev/null and b/doc/Offical_Docs/images/6-5_Cpp_ABTest_CN_1.png differ
diff --git a/doc/Offical_Docs/images/8-1_Cube_Architecture_CN_1.png b/doc/Offical_Docs/images/8-1_Cube_Architecture_CN_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..fbc3c42d4d5b66b185833463664426f702994a48
Binary files /dev/null and b/doc/Offical_Docs/images/8-1_Cube_Architecture_CN_1.png differ
diff --git a/doc/TensorRT_Dynamic_Shape_CN.md b/doc/TensorRT_Dynamic_Shape_CN.md
index 7ffc8693a4b8070c6395ad0c0fe200d646fc1df6..7a9759c33ce6e9b5658af91d0deed3919856d340 100644
--- a/doc/TensorRT_Dynamic_Shape_CN.md
+++ b/doc/TensorRT_Dynamic_Shape_CN.md
@@ -33,6 +33,7 @@ python -m paddle_serving_server.serve \
**二. C++ Serving 设置动态 shape**
+1. 方法一:
在`**/paddle_inference/paddle/include/paddle_engine.h` 修改如下代码
```
@@ -127,6 +128,55 @@ python -m paddle_serving_server.serve \
}
```
+2. 方法二:
+在`**/python/paddle_serving_server/serve.py` 参考如下代码生成配置信息,
+并使用`server.set_trt_dynamic_shape_info(info)`方法进行设置
+
+```
+def set_ocr_dynamic_shape_info():
+ info = []
+ min_input_shape = {
+ "x": [1, 3, 50, 50],
+ "conv2d_182.tmp_0": [1, 1, 20, 20],
+ "nearest_interp_v2_2.tmp_0": [1, 1, 20, 20],
+ "nearest_interp_v2_3.tmp_0": [1, 1, 20, 20],
+ "nearest_interp_v2_4.tmp_0": [1, 1, 20, 20],
+ "nearest_interp_v2_5.tmp_0": [1, 1, 20, 20]
+ }
+ max_input_shape = {
+ "x": [1, 3, 1536, 1536],
+ "conv2d_182.tmp_0": [20, 200, 960, 960],
+ "nearest_interp_v2_2.tmp_0": [20, 200, 960, 960],
+ "nearest_interp_v2_3.tmp_0": [20, 200, 960, 960],
+ "nearest_interp_v2_4.tmp_0": [20, 200, 960, 960],
+ "nearest_interp_v2_5.tmp_0": [20, 200, 960, 960],
+ }
+ opt_input_shape = {
+ "x": [1, 3, 960, 960],
+ "conv2d_182.tmp_0": [3, 96, 240, 240],
+ "nearest_interp_v2_2.tmp_0": [3, 96, 240, 240],
+ "nearest_interp_v2_3.tmp_0": [3, 24, 240, 240],
+ "nearest_interp_v2_4.tmp_0": [3, 24, 240, 240],
+ "nearest_interp_v2_5.tmp_0": [3, 24, 240, 240],
+ }
+ det_info = {
+ "min_input_shape": min_input_shape,
+ "max_input_shape": max_input_shape,
+ "opt_input_shape": opt_input_shape,
+ }
+ info.append(det_info)
+ min_input_shape = {"x": [1, 3, 32, 10], "lstm_1.tmp_0": [1, 1, 128]}
+ max_input_shape = {"x": [50, 3, 32, 1000], "lstm_1.tmp_0": [500, 50, 128]}
+ opt_input_shape = {"x": [6, 3, 32, 100], "lstm_1.tmp_0": [25, 5, 128]}
+ rec_info = {
+ "min_input_shape": min_input_shape,
+ "max_input_shape": max_input_shape,
+ "opt_input_shape": opt_input_shape,
+ }
+ info.append(rec_info)
+ return info
+```
+
## Pipeline Serving
diff --git a/doc/TensorRT_Dynamic_Shape_EN.md b/doc/TensorRT_Dynamic_Shape_EN.md
index c9c9aabf2025028cf0aa22e9e86187cdc8f8cfad..47a6704971eda2172e19b460cdd22eea2d16279f 100644
--- a/doc/TensorRT_Dynamic_Shape_EN.md
+++ b/doc/TensorRT_Dynamic_Shape_EN.md
@@ -16,6 +16,8 @@ The following is the dynamic shape api
For detail, please refer to API doc [C++](https://paddleinference.paddlepaddle.org.cn/api_reference/cxx_api_doc/Config/GPUConfig.html#tensorrt)/[Python](https://paddleinference.paddlepaddle.org.cn/api_reference/python_api_doc/Config/GPUConfig.html#tensorrt)
### C++ Serving
+
+1. Method 1:
Modify the following code in `**/paddle_inference/paddle/include/paddle_engine.h`
```
@@ -110,6 +112,54 @@ Modify the following code in `**/paddle_inference/paddle/include/paddle_engine.h
}
```
+2. Method 2:
+Refer to the code of `**/python/paddle_serving_server/serve.py` below to generate the configuration information,
+and using method `server.set_trt_dynamic_shape_info(info)` to set information.
+
+```
+def set_ocr_dynamic_shape_info():
+ info = []
+ min_input_shape = {
+ "x": [1, 3, 50, 50],
+ "conv2d_182.tmp_0": [1, 1, 20, 20],
+ "nearest_interp_v2_2.tmp_0": [1, 1, 20, 20],
+ "nearest_interp_v2_3.tmp_0": [1, 1, 20, 20],
+ "nearest_interp_v2_4.tmp_0": [1, 1, 20, 20],
+ "nearest_interp_v2_5.tmp_0": [1, 1, 20, 20]
+ }
+ max_input_shape = {
+ "x": [1, 3, 1536, 1536],
+ "conv2d_182.tmp_0": [20, 200, 960, 960],
+ "nearest_interp_v2_2.tmp_0": [20, 200, 960, 960],
+ "nearest_interp_v2_3.tmp_0": [20, 200, 960, 960],
+ "nearest_interp_v2_4.tmp_0": [20, 200, 960, 960],
+ "nearest_interp_v2_5.tmp_0": [20, 200, 960, 960],
+ }
+ opt_input_shape = {
+ "x": [1, 3, 960, 960],
+ "conv2d_182.tmp_0": [3, 96, 240, 240],
+ "nearest_interp_v2_2.tmp_0": [3, 96, 240, 240],
+ "nearest_interp_v2_3.tmp_0": [3, 24, 240, 240],
+ "nearest_interp_v2_4.tmp_0": [3, 24, 240, 240],
+ "nearest_interp_v2_5.tmp_0": [3, 24, 240, 240],
+ }
+ det_info = {
+ "min_input_shape": min_input_shape,
+ "max_input_shape": max_input_shape,
+ "opt_input_shape": opt_input_shape,
+ }
+ info.append(det_info)
+ min_input_shape = {"x": [1, 3, 32, 10], "lstm_1.tmp_0": [1, 1, 128]}
+ max_input_shape = {"x": [50, 3, 32, 1000], "lstm_1.tmp_0": [500, 50, 128]}
+ opt_input_shape = {"x": [6, 3, 32, 100], "lstm_1.tmp_0": [25, 5, 128]}
+ rec_info = {
+ "min_input_shape": min_input_shape,
+ "max_input_shape": max_input_shape,
+ "opt_input_shape": opt_input_shape,
+ }
+ info.append(rec_info)
+ return info
+```
### Pipeline Serving
@@ -151,4 +201,4 @@ if use_trt:
names[3]: [10, head_number, 60, 60]
})
-```
\ No newline at end of file
+```
diff --git a/doc/images/wechat_group_1.jpeg b/doc/images/wechat_group_1.jpeg
index 443a1549d5c79e86b26038d7eb2e704ed5f9213f..80e8acc728faaae7bcb254e4fed93dfaffd84d59 100644
Binary files a/doc/images/wechat_group_1.jpeg and b/doc/images/wechat_group_1.jpeg differ
diff --git a/examples/C++/PaddleClas/resnet_50_vd/README_CN.md b/examples/C++/PaddleClas/resnet_50_vd/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..0034b5b4c03e572b7a27ff6296e185fed713eabb
--- /dev/null
+++ b/examples/C++/PaddleClas/resnet_50_vd/README_CN.md
@@ -0,0 +1,69 @@
+# 图像分类
+
+## 1.获取模型
+
+```
+wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_infer.tar && tar xf ResNet50_vd_infer.tar
+
+```
+
+## 2.用 paddle_serving_client 把下载的推理模型保存用于 Serving 部署的模型参数
+```
+# 保存 ResNet50_vd 模型参数
+python3 -m paddle_serving_client.convert --dirname ./ResNet50_vd_infer/ \
+ --model_filename inference.pdmodel \
+ --params_filename inference.pdiparams \
+ --serving_server ./ResNet50_vd_serving/ \
+ --serving_client ./ResNet50_vd_client/
+```
+会在当前文件夹多出 `ResNet50_vd_serving` 和 `ResNet50_vd_client` 的文件夹
+
+
+保存参数后,会在当前文件夹多出 `ResNet50_vd_serving` 和 `ResNet50_vd_client` 的文件夹:
+```
+├── daisy.jpg
+├── http_client.py
+├── imagenet.label
+├── ResNet50_vd_client
+│ ├── serving_client_conf.prototxt
+│ └── serving_client_conf.stream.prototxt
+├── ResNet50_vd_infer
+│ ├── inference.pdiparams
+│ ├── inference.pdiparams.info
+│ └── inference.pdmodel
+├── ResNet50_vd_serving
+│ ├── fluid_time_file
+│ ├── inference.pdiparams
+│ ├── inference.pdmodel
+│ ├── serving_server_conf.prototxt
+│ └── serving_server_conf.stream.prototxt
+├── rpc_client.py
+```
+
+**三.启动服务**
+
+C++ Serving 服务可以指定一个网络端口同时接收 HTTP、gRPC 和 bRPC 请求。命令参数 `--model` 指定模型路径,`--gpu_ids` 指定 GPU 卡,`--port` 指定端口。
+
+```
+python3 -m paddle_serving_server.serve --model ResNet50_vd_serving --gpu_ids 0 --port 9394
+```
+
+**四.启动客户端**
+
+1. `rpc_client.py` 封装了 HTTP 请求客户端
+
+```
+python3 http_client.py
+```
+
+2. `http_client.py` 封装了 gRPC 请求客户端
+
+```
+python3 rpc_client.py
+```
+
+成功运行后,模型预测的结果会打印如下:
+
+```
+prediction: daisy, probability: 0.9341399073600769
+```
diff --git a/examples/C++/PaddleClas/resnet_50_vd/daisy.jpg b/examples/C++/PaddleClas/resnet_50_vd/daisy.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7edeca63e5f32e68550ef720d81f59df58a8eabc
Binary files /dev/null and b/examples/C++/PaddleClas/resnet_50_vd/daisy.jpg differ
diff --git a/examples/C++/PaddleClas/resnet_50_vd/http_client.py b/examples/C++/PaddleClas/resnet_50_vd/http_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..722f489e30d5e7e98408f287540ff1b6c1cf0cfc
--- /dev/null
+++ b/examples/C++/PaddleClas/resnet_50_vd/http_client.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from paddle_serving_client import HttpClient
+
+#app
+from paddle_serving_app.reader import Sequential, URL2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
+import time
+
+client = HttpClient()
+client.load_client_config("./ResNet50_vd_client/serving_client_conf.prototxt")
+'''
+if you want use GRPC-client, set_use_grpc_client(True)
+or you can directly use client.grpc_client_predict(...)
+as for HTTP-client,set_use_grpc_client(False)(which is default)
+or you can directly use client.http_client_predict(...)
+'''
+#client.set_use_grpc_client(True)
+'''
+if you want to enable Encrypt Module,uncommenting the following line
+'''
+#client.use_key("./key")
+'''
+if you want to compress,uncommenting the following line
+'''
+#client.set_response_compress(True)
+#client.set_request_compress(True)
+'''
+we recommend use Proto data format in HTTP-body, set True(which is default)
+if you want use JSON data format in HTTP-body, set False
+'''
+#client.set_http_proto(True)
+client.connect(["127.0.0.1:9394"])
+
+label_dict = {}
+label_idx = 0
+with open("imagenet.label") as fin:
+ for line in fin:
+ label_dict[label_idx] = line.strip()
+ label_idx += 1
+
+#preprocess
+seq = Sequential([
+ URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+ Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+
+start = time.time()
+image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"
+for i in range(1):
+ img = seq(image_file)
+ res = client.predict(feed={"inputs": img}, fetch=[], batch=False)
+
+ if res is None:
+ raise ValueError("predict error")
+
+ if res.err_no != 0:
+ raise ValueError("predict error. Response : {}".format(res))
+
+ max_val = res.outputs[0].tensor[0].float_data[0]
+ max_idx = 0
+ for one_data in enumerate(res.outputs[0].tensor[0].float_data):
+ if one_data[1] > max_val:
+ max_val = one_data[1]
+ max_idx = one_data[0]
+ label = label_dict[max_idx].strip().replace(",", "")
+ print("prediction: {}, probability: {}".format(label, max_val))
+end = time.time()
+print(end - start)
diff --git a/examples/C++/PaddleClas/resnet_50_vd/imagenet.label b/examples/C++/PaddleClas/resnet_50_vd/imagenet.label
new file mode 100644
index 0000000000000000000000000000000000000000..d7146735146ea1894173d6d0e20fb90af36be849
--- /dev/null
+++ b/examples/C++/PaddleClas/resnet_50_vd/imagenet.label
@@ -0,0 +1,1000 @@
+tench, Tinca tinca,
+goldfish, Carassius auratus,
+great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias,
+tiger shark, Galeocerdo cuvieri,
+hammerhead, hammerhead shark,
+electric ray, crampfish, numbfish, torpedo,
+stingray,
+cock,
+hen,
+ostrich, Struthio camelus,
+brambling, Fringilla montifringilla,
+goldfinch, Carduelis carduelis,
+house finch, linnet, Carpodacus mexicanus,
+junco, snowbird,
+indigo bunting, indigo finch, indigo bird, Passerina cyanea,
+robin, American robin, Turdus migratorius,
+bulbul,
+jay,
+magpie,
+chickadee,
+water ouzel, dipper,
+kite,
+bald eagle, American eagle, Haliaeetus leucocephalus,
+vulture,
+great grey owl, great gray owl, Strix nebulosa,
+European fire salamander, Salamandra salamandra,
+common newt, Triturus vulgaris,
+eft,
+spotted salamander, Ambystoma maculatum,
+axolotl, mud puppy, Ambystoma mexicanum,
+bullfrog, Rana catesbeiana,
+tree frog, tree-frog,
+tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui,
+loggerhead, loggerhead turtle, Caretta caretta,
+leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea,
+mud turtle,
+terrapin,
+box turtle, box tortoise,
+banded gecko,
+common iguana, iguana, Iguana iguana,
+American chameleon, anole, Anolis carolinensis,
+whiptail, whiptail lizard,
+agama,
+frilled lizard, Chlamydosaurus kingi,
+alligator lizard,
+Gila monster, Heloderma suspectum,
+green lizard, Lacerta viridis,
+African chameleon, Chamaeleo chamaeleon,
+Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis,
+African crocodile, Nile crocodile, Crocodylus niloticus,
+American alligator, Alligator mississipiensis,
+triceratops,
+thunder snake, worm snake, Carphophis amoenus,
+ringneck snake, ring-necked snake, ring snake,
+hognose snake, puff adder, sand viper,
+green snake, grass snake,
+king snake, kingsnake,
+garter snake, grass snake,
+water snake,
+vine snake,
+night snake, Hypsiglena torquata,
+boa constrictor, Constrictor constrictor,
+rock python, rock snake, Python sebae,
+Indian cobra, Naja naja,
+green mamba,
+sea snake,
+horned viper, cerastes, sand viper, horned asp, Cerastes cornutus,
+diamondback, diamondback rattlesnake, Crotalus adamanteus,
+sidewinder, horned rattlesnake, Crotalus cerastes,
+trilobite,
+harvestman, daddy longlegs, Phalangium opilio,
+scorpion,
+black and gold garden spider, Argiope aurantia,
+barn spider, Araneus cavaticus,
+garden spider, Aranea diademata,
+black widow, Latrodectus mactans,
+tarantula,
+wolf spider, hunting spider,
+tick,
+centipede,
+black grouse,
+ptarmigan,
+ruffed grouse, partridge, Bonasa umbellus,
+prairie chicken, prairie grouse, prairie fowl,
+peacock,
+quail,
+partridge,
+African grey, African gray, Psittacus erithacus,
+macaw,
+sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita,
+lorikeet,
+coucal,
+bee eater,
+hornbill,
+hummingbird,
+jacamar,
+toucan,
+drake,
+red-breasted merganser, Mergus serrator,
+goose,
+black swan, Cygnus atratus,
+tusker,
+echidna, spiny anteater, anteater,
+platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus,
+wallaby, brush kangaroo,
+koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus,
+wombat,
+jellyfish,
+sea anemone, anemone,
+brain coral,
+flatworm, platyhelminth,
+nematode, nematode worm, roundworm,
+conch,
+snail,
+slug,
+sea slug, nudibranch,
+chiton, coat-of-mail shell, sea cradle, polyplacophore,
+chambered nautilus, pearly nautilus, nautilus,
+Dungeness crab, Cancer magister,
+rock crab, Cancer irroratus,
+fiddler crab,
+king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica,
+American lobster, Northern lobster, Maine lobster, Homarus americanus,
+spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish,
+crayfish, crawfish, crawdad, crawdaddy,
+hermit crab,
+isopod,
+white stork, Ciconia ciconia,
+black stork, Ciconia nigra,
+spoonbill,
+flamingo,
+little blue heron, Egretta caerulea,
+American egret, great white heron, Egretta albus,
+bittern,
+crane,
+limpkin, Aramus pictus,
+European gallinule, Porphyrio porphyrio,
+American coot, marsh hen, mud hen, water hen, Fulica americana,
+bustard,
+ruddy turnstone, Arenaria interpres,
+red-backed sandpiper, dunlin, Erolia alpina,
+redshank, Tringa totanus,
+dowitcher,
+oystercatcher, oyster catcher,
+pelican,
+king penguin, Aptenodytes patagonica,
+albatross, mollymawk,
+grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus,
+killer whale, killer, orca, grampus, sea wolf, Orcinus orca,
+dugong, Dugong dugon,
+sea lion,
+Chihuahua,
+Japanese spaniel,
+Maltese dog, Maltese terrier, Maltese,
+Pekinese, Pekingese, Peke,
+Shih-Tzu,
+Blenheim spaniel,
+papillon,
+toy terrier,
+Rhodesian ridgeback,
+Afghan hound, Afghan,
+basset, basset hound,
+beagle,
+bloodhound, sleuthhound,
+bluetick,
+black-and-tan coonhound,
+Walker hound, Walker foxhound,
+English foxhound,
+redbone,
+borzoi, Russian wolfhound,
+Irish wolfhound,
+Italian greyhound,
+whippet,
+Ibizan hound, Ibizan Podenco,
+Norwegian elkhound, elkhound,
+otterhound, otter hound,
+Saluki, gazelle hound,
+Scottish deerhound, deerhound,
+Weimaraner,
+Staffordshire bullterrier, Staffordshire bull terrier,
+American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier,
+Bedlington terrier,
+Border terrier,
+Kerry blue terrier,
+Irish terrier,
+Norfolk terrier,
+Norwich terrier,
+Yorkshire terrier,
+wire-haired fox terrier,
+Lakeland terrier,
+Sealyham terrier, Sealyham,
+Airedale, Airedale terrier,
+cairn, cairn terrier,
+Australian terrier,
+Dandie Dinmont, Dandie Dinmont terrier,
+Boston bull, Boston terrier,
+miniature schnauzer,
+giant schnauzer,
+standard schnauzer,
+Scotch terrier, Scottish terrier, Scottie,
+Tibetan terrier, chrysanthemum dog,
+silky terrier, Sydney silky,
+soft-coated wheaten terrier,
+West Highland white terrier,
+Lhasa, Lhasa apso,
+flat-coated retriever,
+curly-coated retriever,
+golden retriever,
+Labrador retriever,
+Chesapeake Bay retriever,
+German short-haired pointer,
+vizsla, Hungarian pointer,
+English setter,
+Irish setter, red setter,
+Gordon setter,
+Brittany spaniel,
+clumber, clumber spaniel,
+English springer, English springer spaniel,
+Welsh springer spaniel,
+cocker spaniel, English cocker spaniel, cocker,
+Sussex spaniel,
+Irish water spaniel,
+kuvasz,
+schipperke,
+groenendael,
+malinois,
+briard,
+kelpie,
+komondor,
+Old English sheepdog, bobtail,
+Shetland sheepdog, Shetland sheep dog, Shetland,
+collie,
+Border collie,
+Bouvier des Flandres, Bouviers des Flandres,
+Rottweiler,
+German shepherd, German shepherd dog, German police dog, alsatian,
+Doberman, Doberman pinscher,
+miniature pinscher,
+Greater Swiss Mountain dog,
+Bernese mountain dog,
+Appenzeller,
+EntleBucher,
+boxer,
+bull mastiff,
+Tibetan mastiff,
+French bulldog,
+Great Dane,
+Saint Bernard, St Bernard,
+Eskimo dog, husky,
+malamute, malemute, Alaskan malamute,
+Siberian husky,
+dalmatian, coach dog, carriage dog,
+affenpinscher, monkey pinscher, monkey dog,
+basenji,
+pug, pug-dog,
+Leonberg,
+Newfoundland, Newfoundland dog,
+Great Pyrenees,
+Samoyed, Samoyede,
+Pomeranian,
+chow, chow chow,
+keeshond,
+Brabancon griffon,
+Pembroke, Pembroke Welsh corgi,
+Cardigan, Cardigan Welsh corgi,
+toy poodle,
+miniature poodle,
+standard poodle,
+Mexican hairless,
+timber wolf, grey wolf, gray wolf, Canis lupus,
+white wolf, Arctic wolf, Canis lupus tundrarum,
+red wolf, maned wolf, Canis rufus, Canis niger,
+coyote, prairie wolf, brush wolf, Canis latrans,
+dingo, warrigal, warragal, Canis dingo,
+dhole, Cuon alpinus,
+African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus,
+hyena, hyaena,
+red fox, Vulpes vulpes,
+kit fox, Vulpes macrotis,
+Arctic fox, white fox, Alopex lagopus,
+grey fox, gray fox, Urocyon cinereoargenteus,
+tabby, tabby cat,
+tiger cat,
+Persian cat,
+Siamese cat, Siamese,
+Egyptian cat,
+cougar, puma, catamount, mountain lion, painter, panther, Felis concolor,
+lynx, catamount,
+leopard, Panthera pardus,
+snow leopard, ounce, Panthera uncia,
+jaguar, panther, Panthera onca, Felis onca,
+lion, king of beasts, Panthera leo,
+tiger, Panthera tigris,
+cheetah, chetah, Acinonyx jubatus,
+brown bear, bruin, Ursus arctos,
+American black bear, black bear, Ursus americanus, Euarctos americanus,
+ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus,
+sloth bear, Melursus ursinus, Ursus ursinus,
+mongoose,
+meerkat, mierkat,
+tiger beetle,
+ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle,
+ground beetle, carabid beetle,
+long-horned beetle, longicorn, longicorn beetle,
+leaf beetle, chrysomelid,
+dung beetle,
+rhinoceros beetle,
+weevil,
+fly,
+bee,
+ant, emmet, pismire,
+grasshopper, hopper,
+cricket,
+walking stick, walkingstick, stick insect,
+cockroach, roach,
+mantis, mantid,
+cicada, cicala,
+leafhopper,
+lacewing, lacewing fly,
+"dragonfly, darning needle, devils darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+damselfly,
+admiral,
+ringlet, ringlet butterfly,
+monarch, monarch butterfly, milkweed butterfly, Danaus plexippus,
+cabbage butterfly,
+sulphur butterfly, sulfur butterfly,
+lycaenid, lycaenid butterfly,
+starfish, sea star,
+sea urchin,
+sea cucumber, holothurian,
+wood rabbit, cottontail, cottontail rabbit,
+hare,
+Angora, Angora rabbit,
+hamster,
+porcupine, hedgehog,
+fox squirrel, eastern fox squirrel, Sciurus niger,
+marmot,
+beaver,
+guinea pig, Cavia cobaya,
+sorrel,
+zebra,
+hog, pig, grunter, squealer, Sus scrofa,
+wild boar, boar, Sus scrofa,
+warthog,
+hippopotamus, hippo, river horse, Hippopotamus amphibius,
+ox,
+water buffalo, water ox, Asiatic buffalo, Bubalus bubalis,
+bison,
+ram, tup,
+bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis,
+ibex, Capra ibex,
+hartebeest,
+impala, Aepyceros melampus,
+gazelle,
+Arabian camel, dromedary, Camelus dromedarius,
+llama,
+weasel,
+mink,
+polecat, fitch, foulmart, foumart, Mustela putorius,
+black-footed ferret, ferret, Mustela nigripes,
+otter,
+skunk, polecat, wood pussy,
+badger,
+armadillo,
+three-toed sloth, ai, Bradypus tridactylus,
+orangutan, orang, orangutang, Pongo pygmaeus,
+gorilla, Gorilla gorilla,
+chimpanzee, chimp, Pan troglodytes,
+gibbon, Hylobates lar,
+siamang, Hylobates syndactylus, Symphalangus syndactylus,
+guenon, guenon monkey,
+patas, hussar monkey, Erythrocebus patas,
+baboon,
+macaque,
+langur,
+colobus, colobus monkey,
+proboscis monkey, Nasalis larvatus,
+marmoset,
+capuchin, ringtail, Cebus capucinus,
+howler monkey, howler,
+titi, titi monkey,
+spider monkey, Ateles geoffroyi,
+squirrel monkey, Saimiri sciureus,
+Madagascar cat, ring-tailed lemur, Lemur catta,
+indri, indris, Indri indri, Indri brevicaudatus,
+Indian elephant, Elephas maximus,
+African elephant, Loxodonta africana,
+lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens,
+giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca,
+barracouta, snoek,
+eel,
+coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch,
+rock beauty, Holocanthus tricolor,
+anemone fish,
+sturgeon,
+gar, garfish, garpike, billfish, Lepisosteus osseus,
+lionfish,
+puffer, pufferfish, blowfish, globefish,
+abacus,
+abaya,
+"academic gown, academic robe, judges robe",
+accordion, piano accordion, squeeze box,
+acoustic guitar,
+aircraft carrier, carrier, flattop, attack aircraft carrier,
+airliner,
+airship, dirigible,
+altar,
+ambulance,
+amphibian, amphibious vehicle,
+analog clock,
+apiary, bee house,
+apron,
+ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin,
+assault rifle, assault gun,
+backpack, back pack, knapsack, packsack, rucksack, haversack,
+bakery, bakeshop, bakehouse,
+balance beam, beam,
+balloon,
+ballpoint, ballpoint pen, ballpen, Biro,
+Band Aid,
+banjo,
+bannister, banister, balustrade, balusters, handrail,
+barbell,
+barber chair,
+barbershop,
+barn,
+barometer,
+barrel, cask,
+barrow, garden cart, lawn cart, wheelbarrow,
+baseball,
+basketball,
+bassinet,
+bassoon,
+bathing cap, swimming cap,
+bath towel,
+bathtub, bathing tub, bath, tub,
+beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon,
+beacon, lighthouse, beacon light, pharos,
+beaker,
+bearskin, busby, shako,
+beer bottle,
+beer glass,
+bell cote, bell cot,
+bib,
+bicycle-built-for-two, tandem bicycle, tandem,
+bikini, two-piece,
+binder, ring-binder,
+binoculars, field glasses, opera glasses,
+birdhouse,
+boathouse,
+bobsled, bobsleigh, bob,
+bolo tie, bolo, bola tie, bola,
+bonnet, poke bonnet,
+bookcase,
+bookshop, bookstore, bookstall,
+bottlecap,
+bow,
+bow tie, bow-tie, bowtie,
+brass, memorial tablet, plaque,
+brassiere, bra, bandeau,
+breakwater, groin, groyne, mole, bulwark, seawall, jetty,
+breastplate, aegis, egis,
+broom,
+bucket, pail,
+buckle,
+bulletproof vest,
+bullet train, bullet,
+butcher shop, meat market,
+cab, hack, taxi, taxicab,
+caldron, cauldron,
+candle, taper, wax light,
+cannon,
+canoe,
+can opener, tin opener,
+cardigan,
+car mirror,
+carousel, carrousel, merry-go-round, roundabout, whirligig,
+"carpenters kit, tool kit",
+carton,
+car wheel,
+cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM,
+cassette,
+cassette player,
+castle,
+catamaran,
+CD player,
+cello, violoncello,
+cellular telephone, cellular phone, cellphone, cell, mobile phone,
+chain,
+chainlink fence,
+chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour,
+chain saw, chainsaw,
+chest,
+chiffonier, commode,
+chime, bell, gong,
+china cabinet, china closet,
+Christmas stocking,
+church, church building,
+cinema, movie theater, movie theatre, movie house, picture palace,
+cleaver, meat cleaver, chopper,
+cliff dwelling,
+cloak,
+clog, geta, patten, sabot,
+cocktail shaker,
+coffee mug,
+coffeepot,
+coil, spiral, volute, whorl, helix,
+combination lock,
+computer keyboard, keypad,
+confectionery, confectionary, candy store,
+container ship, containership, container vessel,
+convertible,
+corkscrew, bottle screw,
+cornet, horn, trumpet, trump,
+cowboy boot,
+cowboy hat, ten-gallon hat,
+cradle,
+crane,
+crash helmet,
+crate,
+crib, cot,
+Crock Pot,
+croquet ball,
+crutch,
+cuirass,
+dam, dike, dyke,
+desk,
+desktop computer,
+dial telephone, dial phone,
+diaper, nappy, napkin,
+digital clock,
+digital watch,
+dining table, board,
+dishrag, dishcloth,
+dishwasher, dish washer, dishwashing machine,
+disk brake, disc brake,
+dock, dockage, docking facility,
+dogsled, dog sled, dog sleigh,
+dome,
+doormat, welcome mat,
+drilling platform, offshore rig,
+drum, membranophone, tympan,
+drumstick,
+dumbbell,
+Dutch oven,
+electric fan, blower,
+electric guitar,
+electric locomotive,
+entertainment center,
+envelope,
+espresso maker,
+face powder,
+feather boa, boa,
+file, file cabinet, filing cabinet,
+fireboat,
+fire engine, fire truck,
+fire screen, fireguard,
+flagpole, flagstaff,
+flute, transverse flute,
+folding chair,
+football helmet,
+forklift,
+fountain,
+fountain pen,
+four-poster,
+freight car,
+French horn, horn,
+frying pan, frypan, skillet,
+fur coat,
+garbage truck, dustcart,
+gasmask, respirator, gas helmet,
+gas pump, gasoline pump, petrol pump, island dispenser,
+goblet,
+go-kart,
+golf ball,
+golfcart, golf cart,
+gondola,
+gong, tam-tam,
+gown,
+grand piano, grand,
+greenhouse, nursery, glasshouse,
+grille, radiator grille,
+grocery store, grocery, food market, market,
+guillotine,
+hair slide,
+hair spray,
+half track,
+hammer,
+hamper,
+hand blower, blow dryer, blow drier, hair dryer, hair drier,
+hand-held computer, hand-held microcomputer,
+handkerchief, hankie, hanky, hankey,
+hard disc, hard disk, fixed disk,
+harmonica, mouth organ, harp, mouth harp,
+harp,
+harvester, reaper,
+hatchet,
+holster,
+home theater, home theatre,
+honeycomb,
+hook, claw,
+hoopskirt, crinoline,
+horizontal bar, high bar,
+horse cart, horse-cart,
+hourglass,
+iPod,
+iron, smoothing iron,
+"jack-o-lantern",
+jean, blue jean, denim,
+jeep, landrover,
+jersey, T-shirt, tee shirt,
+jigsaw puzzle,
+jinrikisha, ricksha, rickshaw,
+joystick,
+kimono,
+knee pad,
+knot,
+lab coat, laboratory coat,
+ladle,
+lampshade, lamp shade,
+laptop, laptop computer,
+lawn mower, mower,
+lens cap, lens cover,
+letter opener, paper knife, paperknife,
+library,
+lifeboat,
+lighter, light, igniter, ignitor,
+limousine, limo,
+liner, ocean liner,
+lipstick, lip rouge,
+Loafer,
+lotion,
+loudspeaker, speaker, speaker unit, loudspeaker system, speaker system,
+"loupe, jewelers loupe",
+lumbermill, sawmill,
+magnetic compass,
+mailbag, postbag,
+mailbox, letter box,
+maillot,
+maillot, tank suit,
+manhole cover,
+maraca,
+marimba, xylophone,
+mask,
+matchstick,
+maypole,
+maze, labyrinth,
+measuring cup,
+medicine chest, medicine cabinet,
+megalith, megalithic structure,
+microphone, mike,
+microwave, microwave oven,
+military uniform,
+milk can,
+minibus,
+miniskirt, mini,
+minivan,
+missile,
+mitten,
+mixing bowl,
+mobile home, manufactured home,
+Model T,
+modem,
+monastery,
+monitor,
+moped,
+mortar,
+mortarboard,
+mosque,
+mosquito net,
+motor scooter, scooter,
+mountain bike, all-terrain bike, off-roader,
+mountain tent,
+mouse, computer mouse,
+mousetrap,
+moving van,
+muzzle,
+nail,
+neck brace,
+necklace,
+nipple,
+notebook, notebook computer,
+obelisk,
+oboe, hautboy, hautbois,
+ocarina, sweet potato,
+odometer, hodometer, mileometer, milometer,
+oil filter,
+organ, pipe organ,
+oscilloscope, scope, cathode-ray oscilloscope, CRO,
+overskirt,
+oxcart,
+oxygen mask,
+packet,
+paddle, boat paddle,
+paddlewheel, paddle wheel,
+padlock,
+paintbrush,
+"pajama, pyjama, pjs, jammies",
+palace,
+panpipe, pandean pipe, syrinx,
+paper towel,
+parachute, chute,
+parallel bars, bars,
+park bench,
+parking meter,
+passenger car, coach, carriage,
+patio, terrace,
+pay-phone, pay-station,
+pedestal, plinth, footstall,
+pencil box, pencil case,
+pencil sharpener,
+perfume, essence,
+Petri dish,
+photocopier,
+pick, plectrum, plectron,
+pickelhaube,
+picket fence, paling,
+pickup, pickup truck,
+pier,
+piggy bank, penny bank,
+pill bottle,
+pillow,
+ping-pong ball,
+pinwheel,
+pirate, pirate ship,
+pitcher, ewer,
+"plane, carpenters plane, woodworking plane",
+planetarium,
+plastic bag,
+plate rack,
+plow, plough,
+"plunger, plumbers helper",
+Polaroid camera, Polaroid Land camera,
+pole,
+police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria,
+poncho,
+pool table, billiard table, snooker table,
+pop bottle, soda bottle,
+pot, flowerpot,
+"potters wheel",
+power drill,
+prayer rug, prayer mat,
+printer,
+prison, prison house,
+projectile, missile,
+projector,
+puck, hockey puck,
+punching bag, punch bag, punching ball, punchball,
+purse,
+quill, quill pen,
+quilt, comforter, comfort, puff,
+racer, race car, racing car,
+racket, racquet,
+radiator,
+radio, wireless,
+radio telescope, radio reflector,
+rain barrel,
+recreational vehicle, RV, R.V.,
+reel,
+reflex camera,
+refrigerator, icebox,
+remote control, remote,
+restaurant, eating house, eating place, eatery,
+revolver, six-gun, six-shooter,
+rifle,
+rocking chair, rocker,
+rotisserie,
+rubber eraser, rubber, pencil eraser,
+rugby ball,
+rule, ruler,
+running shoe,
+safe,
+safety pin,
+saltshaker, salt shaker,
+sandal,
+sarong,
+sax, saxophone,
+scabbard,
+scale, weighing machine,
+school bus,
+schooner,
+scoreboard,
+screen, CRT screen,
+screw,
+screwdriver,
+seat belt, seatbelt,
+sewing machine,
+shield, buckler,
+shoe shop, shoe-shop, shoe store,
+shoji,
+shopping basket,
+shopping cart,
+shovel,
+shower cap,
+shower curtain,
+ski,
+ski mask,
+sleeping bag,
+slide rule, slipstick,
+sliding door,
+slot, one-armed bandit,
+snorkel,
+snowmobile,
+snowplow, snowplough,
+soap dispenser,
+soccer ball,
+sock,
+solar dish, solar collector, solar furnace,
+sombrero,
+soup bowl,
+space bar,
+space heater,
+space shuttle,
+spatula,
+speedboat,
+"spider web, spiders web",
+spindle,
+sports car, sport car,
+spotlight, spot,
+stage,
+steam locomotive,
+steel arch bridge,
+steel drum,
+stethoscope,
+stole,
+stone wall,
+stopwatch, stop watch,
+stove,
+strainer,
+streetcar, tram, tramcar, trolley, trolley car,
+stretcher,
+studio couch, day bed,
+stupa, tope,
+submarine, pigboat, sub, U-boat,
+suit, suit of clothes,
+sundial,
+sunglass,
+sunglasses, dark glasses, shades,
+sunscreen, sunblock, sun blocker,
+suspension bridge,
+swab, swob, mop,
+sweatshirt,
+swimming trunks, bathing trunks,
+swing,
+switch, electric switch, electrical switch,
+syringe,
+table lamp,
+tank, army tank, armored combat vehicle, armoured combat vehicle,
+tape player,
+teapot,
+teddy, teddy bear,
+television, television system,
+tennis ball,
+thatch, thatched roof,
+theater curtain, theatre curtain,
+thimble,
+thresher, thrasher, threshing machine,
+throne,
+tile roof,
+toaster,
+tobacco shop, tobacconist shop, tobacconist,
+toilet seat,
+torch,
+totem pole,
+tow truck, tow car, wrecker,
+toyshop,
+tractor,
+trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi,
+tray,
+trench coat,
+tricycle, trike, velocipede,
+trimaran,
+tripod,
+triumphal arch,
+trolleybus, trolley coach, trackless trolley,
+trombone,
+tub, vat,
+turnstile,
+typewriter keyboard,
+umbrella,
+unicycle, monocycle,
+upright, upright piano,
+vacuum, vacuum cleaner,
+vase,
+vault,
+velvet,
+vending machine,
+vestment,
+viaduct,
+violin, fiddle,
+volleyball,
+waffle iron,
+wall clock,
+wallet, billfold, notecase, pocketbook,
+wardrobe, closet, press,
+warplane, military plane,
+washbasin, handbasin, washbowl, lavabo, wash-hand basin,
+washer, automatic washer, washing machine,
+water bottle,
+water jug,
+water tower,
+whiskey jug,
+whistle,
+wig,
+window screen,
+window shade,
+Windsor tie,
+wine bottle,
+wing,
+wok,
+wooden spoon,
+wool, woolen, woollen,
+worm fence, snake fence, snake-rail fence, Virginia fence,
+wreck,
+yawl,
+yurt,
+web site, website, internet site, site,
+comic book,
+crossword puzzle, crossword,
+street sign,
+traffic light, traffic signal, stoplight,
+book jacket, dust cover, dust jacket, dust wrapper,
+menu,
+plate,
+guacamole,
+consomme,
+hot pot, hotpot,
+trifle,
+ice cream, icecream,
+ice lolly, lolly, lollipop, popsicle,
+French loaf,
+bagel, beigel,
+pretzel,
+cheeseburger,
+hotdog, hot dog, red hot,
+mashed potato,
+head cabbage,
+broccoli,
+cauliflower,
+zucchini, courgette,
+spaghetti squash,
+acorn squash,
+butternut squash,
+cucumber, cuke,
+artichoke, globe artichoke,
+bell pepper,
+cardoon,
+mushroom,
+Granny Smith,
+strawberry,
+orange,
+lemon,
+fig,
+pineapple, ananas,
+banana,
+jackfruit, jak, jack,
+custard apple,
+pomegranate,
+hay,
+carbonara,
+chocolate sauce, chocolate syrup,
+dough,
+meat loaf, meatloaf,
+pizza, pizza pie,
+potpie,
+burrito,
+red wine,
+espresso,
+cup,
+eggnog,
+alp,
+bubble,
+cliff, drop, drop-off,
+coral reef,
+geyser,
+lakeside, lakeshore,
+promontory, headland, head, foreland,
+sandbar, sand bar,
+seashore, coast, seacoast, sea-coast,
+valley, vale,
+volcano,
+ballplayer, baseball player,
+groom, bridegroom,
+scuba diver,
+rapeseed,
+daisy,
+"yellow ladys slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+corn,
+acorn,
+hip, rose hip, rosehip,
+buckeye, horse chestnut, conker,
+coral fungus,
+agaric,
+gyromitra,
+stinkhorn, carrion fungus,
+earthstar,
+hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa,
+bolete,
+ear, spike, capitulum,
+toilet tissue, toilet paper, bathroom tissue
diff --git a/examples/C++/PaddleClas/resnet_50_vd/rpc_client.py b/examples/C++/PaddleClas/resnet_50_vd/rpc_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e4c42841fccc257b057fd09c06ae495b0fa77bc
--- /dev/null
+++ b/examples/C++/PaddleClas/resnet_50_vd/rpc_client.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from paddle_serving_client import Client
+
+#app
+from paddle_serving_app.reader import Sequential, URL2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
+import time
+
+client = Client()
+client.load_client_config("./ResNet50_vd_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9394"])
+
+label_dict = {}
+label_idx = 0
+with open("imagenet.label") as fin:
+ for line in fin:
+ label_dict[label_idx] = line.strip()
+ label_idx += 1
+
+#preprocess
+seq = Sequential([
+ URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+ Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+
+start = time.time()
+image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"
+for i in range(1):
+ img = seq(image_file)
+ fetch_map = client.predict(feed={"inputs": img}, fetch=[], batch=False)
+
+ prob = max(fetch_map["save_infer_model/scale_0.tmp_1"][0])
+ label = label_dict[fetch_map["save_infer_model/scale_0.tmp_1"][0].tolist()
+ .index(prob)].strip().replace(",", "")
+ print("prediction: {}, probability: {}".format(label, prob))
+end = time.time()
+print(end - start)
diff --git a/examples/C++/imdb/README_CN.md b/examples/C++/imdb/README_CN.md
index a1fecc8af35dcd2f5a38f47480b9b80b3cf96054..42841c6b8d0b4d1f064a33b54503193b88eed635 100755
--- a/examples/C++/imdb/README_CN.md
+++ b/examples/C++/imdb/README_CN.md
@@ -1,4 +1,4 @@
-## IMDB评论情绪预测服务
+## IMDB 评论情绪预测 ABTest 服务
(简体中文|[English](./README.md))
@@ -11,16 +11,24 @@ sh get_data.sh
### 启动预测服务(支持BRPC-Client/GRPC-Client/Http-Client)
+```python
+## 启动 bow 模型服务
+python3 -m paddle_serving_server.serve --model imdb_bow_model/ --port 9297 >/dev/null 2>&1 &
+
+## 启动 cnn 模型服务
+python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9298 >/dev/null 2>&1 &
+
+## 启动 lstm 模型服务
+python3 -m paddle_serving_server.serve --model imdb_lstm_model/ --port 9299 >/dev/null 2>&1 &
```
-python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
-```
-### BRPC-Client预测
+
+### ABTest 预测
```
-head test_data/part-0 | python3 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
+head test_data/part-0 | python3 abtest_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
```
预测test_data/part-0的前十个样例。
-### BRPC-Client预测
+### http预测
```
head test_data/part-0 | python3 test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
```
diff --git a/examples/C++/imdb/abtest_client.py b/examples/C++/imdb/abtest_client.py
index 1a14c87c355552248394fa504d37f54a4c58132a..e0f910e37c7cf5a99c0b5ec1249f0ceed68f21ae 100644
--- a/examples/C++/imdb/abtest_client.py
+++ b/examples/C++/imdb/abtest_client.py
@@ -11,35 +11,35 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
+# pylint: disable=doc-string-missing
from paddle_serving_client import Client
+from paddle_serving_app.reader.imdb_reader import IMDBDataset
+import sys
import numpy as np
client = Client()
-client.load_client_config('imdb_bow_client_conf/serving_client_conf.prototxt')
-client.add_variant("bow", ["127.0.0.1:8000"], 10)
-client.add_variant("lstm", ["127.0.0.1:9000"], 90)
+client.load_client_config(sys.argv[1])
+client.add_variant("bow", ["127.0.0.1:9297"], 10)
+client.add_variant("cnn", ["127.0.0.1:9298"], 30)
+client.add_variant("lstm", ["127.0.0.1:9299"], 60)
client.connect()
-print('please wait for about 10s')
-with open('processed.data') as f:
- cnt = {"bow": {'acc': 0, 'total': 0}, "lstm": {'acc': 0, 'total': 0}}
- for line in f:
- word_ids, label = line.split(';')
- word_ids = [int(x) for x in word_ids.split(',')]
- word_len = len(word_ids)
- feed = {
- "words": np.array(word_ids).reshape(word_len, 1),
- "words.lod": [0, word_len]
- }
- fetch = ["acc", "cost", "prediction"]
- [fetch_map, tag] = client.predict(
- feed=feed, fetch=fetch, need_variant_tag=True, batch=True)
- if (float(fetch_map["prediction"][0][1]) - 0.5) * (float(label[0]) - 0.5
- ) > 0:
- cnt[tag]['acc'] += 1
- cnt[tag]['total'] += 1
+# you can define any english sentence or dataset here
+# This example reuses imdb reader in training, you
+# can define your own data preprocessing easily.
+imdb_dataset = IMDBDataset()
+imdb_dataset.load_resource(sys.argv[2])
- for tag, data in cnt.items():
- print('[{}](total: {}) acc: {}'.format(tag, data[
- 'total'], float(data['acc']) / float(data['total'])))
+for line in sys.stdin:
+ word_ids, label = imdb_dataset.get_words_and_label(line)
+ word_len = len(word_ids)
+ feed = {
+ "words": np.array(word_ids).reshape(word_len, 1),
+ "words.lod": [0, word_len]
+ }
+ #print(feed)
+ fetch = ["prediction"]
+ fetch_map = client.predict(
+ feed=feed, fetch=fetch, batch=True, need_variant_tag=True)
+ print("server_tag={} prediction={} ".format(fetch_map[1], fetch_map[0][
+ "prediction"][0]))
diff --git a/examples/C++/imdb/test_http_client.py b/examples/C++/imdb/test_http_client.py
index e3cc705150ccc197ab1be24bf11e0a92e1d62380..d22b92d7850097130e24d3cf86857ddcc8caef2f 100755
--- a/examples/C++/imdb/test_http_client.py
+++ b/examples/C++/imdb/test_http_client.py
@@ -40,7 +40,7 @@ we recommend use Proto data format in HTTP-body, set True(which is default)
if you want use JSON data format in HTTP-body, set False
'''
#client.set_http_proto(True)
-client.connect(["127.0.0.1:9292"])
+client.connect(["127.0.0.1:9297"])
# you can define any english sentence or dataset here
# This example reuses imdb reader in training, you
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/README.md b/examples/Pipeline/PaddleNLP/semantic_indexing/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..20e0bc04a2b0de6c3fb21355b8636de73c625d42
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/README.md
@@ -0,0 +1,201 @@
+# In-batch Negatives
+
+ **目录**
+
+* [模型下载](#模型下载)
+* [模型部署](#模型部署)
+
+
+
+
+## 1. 语义索引模型
+
+**语义索引训练模型下载链接:**
+
+以下模型结构参数为: `TrasformerLayer:12, Hidden:768, Heads:12, OutputEmbSize: 256`
+
+|Model|训练参数配置|硬件|MD5|
+| ------------ | ------------ | ------------ |-----------|
+|[batch_neg](https://bj.bcebos.com/v1/paddlenlp/models/inbatch_model.zip)|
margin:0.2 scale:30 epoch:3 lr:5E-5 bs:64 max_len:64
|4卡 v100-16g
|f3e5c7d7b0b718c2530c5e1b136b2d74|
+
+```
+wget https://bj.bcebos.com/v1/paddlenlp/models/inbatch_model.zip
+unzip inbatch_model.zip -d checkpoints
+```
+
+
+
+## 2. 模型部署
+
+### 2.1 动转静导出
+
+首先把动态图模型转换为静态图:
+
+```
+python export_model.py --params_path checkpoints/model_40/model_state.pdparams --output_path=./output
+```
+也可以运行下面的bash脚本:
+
+```
+sh scripts/export_model.sh
+```
+
+### 2.2 Paddle Inference预测
+
+预测既可以抽取向量也可以计算两个文本的相似度。
+
+修改id2corpus的样本:
+
+```
+# 抽取向量
+id2corpus={0:'国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
+# 计算相似度
+corpus_list=[['中西方语言与文化的差异','中西方文化差异以及语言体现中西方文化,差异,语言体现'],
+ ['中西方语言与文化的差异','飞桨致力于让深度学习技术的创新与应用更简单']]
+
+```
+
+然后使用PaddleInference
+
+```
+python deploy/python/predict.py --model_dir=./output
+```
+也可以运行下面的bash脚本:
+
+```
+sh deploy.sh
+```
+最终输出的是256维度的特征向量和句子对的预测概率:
+
+```
+(1, 256)
+[[-0.0394925 -0.04474756 -0.065534 0.00939134 0.04359895 0.14659195
+ -0.0091779 -0.07303623 0.09413272 -0.01255222 -0.08685658 0.02762237
+ 0.10138468 0.00962821 0.10888419 0.04553023 0.05898942 0.00694253
+ ....
+
+[0.959269642829895, 0.04725276678800583]
+```
+
+### 2.3 Paddle Serving部署
+
+Paddle Serving 的详细文档请参考 [Pipeline_Design](https://github.com/PaddlePaddle/Serving/blob/v0.7.0/doc/Python_Pipeline/Pipeline_Design_CN.md)和[Serving_Design](https://github.com/PaddlePaddle/Serving/blob/v0.7.0/doc/Serving_Design_CN.md),首先把静态图模型转换成Serving的格式:
+
+```
+python export_to_serving.py \
+ --dirname "output" \
+ --model_filename "inference.get_pooled_embedding.pdmodel" \
+ --params_filename "inference.get_pooled_embedding.pdiparams" \
+ --server_path "./serving_server" \
+ --client_path "./serving_client" \
+ --fetch_alias_names "output_embedding"
+
+```
+
+参数含义说明
+* `dirname`: 需要转换的模型文件存储路径,Program 结构文件和参数文件均保存在此目录。
+* `model_filename`: 存储需要转换的模型 Inference Program 结构的文件名称。如果设置为 None ,则使用 `__model__` 作为默认的文件名
+* `params_filename`: 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保>存在一个单独的二进制文件中,它才需要被指定。如果模型参数是存储在各自分离的文件中,设置它的值为 None
+* `server_path`: 转换后的模型文件和配置文件的存储路径。默认值为 serving_server
+* `client_path`: 转换后的客户端配置文件存储路径。默认值为 serving_client
+* `fetch_alias_names`: 模型输出的别名设置,比如输入的 input_ids 等,都可以指定成其他名字,默认不指定
+* `feed_alias_names`: 模型输入的别名设置,比如输出 pooled_out 等,都可以重新指定成其他模型,默认不指定
+
+也可以运行下面的 bash 脚本:
+```
+sh scripts/export_to_serving.sh
+```
+
+Paddle Serving的部署有两种方式,第一种方式是Pipeline的方式,第二种是C++的方式,下面分别介绍这两种方式的用法:
+
+#### 2.3.1 Pipeline方式
+
+启动 Pipeline Server:
+
+```
+python web_service.py
+```
+
+启动客户端调用 Server。
+
+首先修改rpc_client.py中需要预测的样本:
+
+```
+list_data = [
+ "国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据",
+ "试论翻译过程中的文化差异与语言空缺翻译过程,文化差异,语言空缺,文化对比"
+]
+```
+然后运行:
+
+```
+python rpc_client.py
+```
+模型的输出为:
+
+```
+{'0': '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据', '1': '试论翻译过程中的文化差异与语言空缺翻译过程,文化差异,语言空缺,文化对比'}
+PipelineClient::predict pack_data time:1641450851.3752182
+PipelineClient::predict before time:1641450851.375738
+['output_embedding']
+(2, 256)
+[[ 0.07830612 -0.14036864 0.03433796 -0.14967982 -0.03386067 0.06630666
+ 0.01357943 0.03531194 0.02411093 0.02000859 0.05724002 -0.08119463
+ ......
+```
+
+可以看到客户端发送了2条文本,返回了2个 embedding 向量
+
+#### 2.3.2 C++的方式
+
+启动C++的Serving:
+
+```
+python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_id 2 --thread 5 --ir_optim True --use_trt --precision FP16
+```
+也可以使用脚本:
+
+```
+sh deploy/C++/start_server.sh
+```
+Client 可以使用 http 或者 rpc 两种方式,rpc 的方式为:
+
+```
+python deploy/C++/rpc_client.py
+```
+运行的输出为:
+```
+I0209 20:40:07.978225 20896 general_model.cpp:490] [client]logid=0,client_cost=395.695ms,server_cost=392.559ms.
+time to cost :0.3960278034210205 seconds
+{'output_embedding': array([[ 9.01343748e-02, -1.21870913e-01, 1.32834800e-02,
+ -1.57673359e-01, -2.60387752e-02, 6.98455423e-02,
+ 1.58108603e-02, 3.89952064e-02, 3.22783105e-02,
+ 3.49135026e-02, 7.66086206e-02, -9.12970975e-02,
+ 6.25643134e-02, 7.21886680e-02, 7.03565404e-02,
+ 5.44054210e-02, 3.25332815e-03, 5.01751155e-02,
+......
+```
+可以看到服务端返回了向量
+
+或者使用 http 的客户端访问模式:
+
+```
+python deploy/C++/http_client.py
+```
+运行的输出为:
+
+```
+(2, 64)
+(2, 64)
+outputs {
+ tensor {
+ float_data: 0.09013437479734421
+ float_data: -0.12187091261148453
+ float_data: 0.01328347995877266
+ float_data: -0.15767335891723633
+......
+```
+可以看到服务端返回了向量
+
+
+
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/base_model.py b/examples/Pipeline/PaddleNLP/semantic_indexing/base_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..c471d126c2649fee7554fa8f026284c7300ada2f
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/base_model.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import sys
+
+import numpy as np
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+class SemanticIndexBase(nn.Layer):
+ def __init__(self, pretrained_model, dropout=None, output_emb_size=None):
+ super().__init__()
+ self.ptm = pretrained_model
+ self.dropout = nn.Dropout(dropout if dropout is not None else 0.1)
+
+ # if output_emb_size is not None, then add Linear layer to reduce embedding_size,
+ # we recommend set output_emb_size = 256 considering the trade-off beteween
+ # recall performance and efficiency
+
+ self.output_emb_size = output_emb_size
+ if output_emb_size > 0:
+ weight_attr = paddle.ParamAttr(
+ initializer=paddle.nn.initializer.TruncatedNormal(std=0.02))
+ self.emb_reduce_linear = paddle.nn.Linear(
+ 768, output_emb_size, weight_attr=weight_attr)
+
+ @paddle.jit.to_static(input_spec=[
+ paddle.static.InputSpec(
+ shape=[None, None], dtype='int64'), paddle.static.InputSpec(
+ shape=[None, None], dtype='int64')
+ ])
+ def get_pooled_embedding(self,
+ input_ids,
+ token_type_ids=None,
+ position_ids=None,
+ attention_mask=None):
+ _, cls_embedding = self.ptm(input_ids, token_type_ids, position_ids,
+ attention_mask)
+
+ if self.output_emb_size > 0:
+ cls_embedding = self.emb_reduce_linear(cls_embedding)
+ cls_embedding = self.dropout(cls_embedding)
+ cls_embedding = F.normalize(cls_embedding, p=2, axis=-1)
+
+ return cls_embedding
+
+ def get_semantic_embedding(self, data_loader):
+ self.eval()
+ with paddle.no_grad():
+ for batch_data in data_loader:
+ input_ids, token_type_ids = batch_data
+ input_ids = paddle.to_tensor(input_ids)
+ token_type_ids = paddle.to_tensor(token_type_ids)
+
+ text_embeddings = self.get_pooled_embedding(
+ input_ids, token_type_ids=token_type_ids)
+
+ yield text_embeddings
+
+ def cosine_sim(self,
+ query_input_ids,
+ title_input_ids,
+ query_token_type_ids=None,
+ query_position_ids=None,
+ query_attention_mask=None,
+ title_token_type_ids=None,
+ title_position_ids=None,
+ title_attention_mask=None):
+
+ query_cls_embedding = self.get_pooled_embedding(
+ query_input_ids, query_token_type_ids, query_position_ids,
+ query_attention_mask)
+
+ title_cls_embedding = self.get_pooled_embedding(
+ title_input_ids, title_token_type_ids, title_position_ids,
+ title_attention_mask)
+
+ cosine_sim = paddle.sum(query_cls_embedding * title_cls_embedding,
+ axis=-1)
+ return cosine_sim
+
+ @abc.abstractmethod
+ def forward(self):
+ pass
+
+
+class SemanticIndexBaseStatic(nn.Layer):
+ def __init__(self, pretrained_model, dropout=None, output_emb_size=None):
+ super().__init__()
+ self.ptm = pretrained_model
+ self.dropout = nn.Dropout(dropout if dropout is not None else 0.1)
+
+ # if output_emb_size is not None, then add Linear layer to reduce embedding_size,
+ # we recommend set output_emb_size = 256 considering the trade-off beteween
+ # recall performance and efficiency
+
+ self.output_emb_size = output_emb_size
+ if output_emb_size > 0:
+ weight_attr = paddle.ParamAttr(
+ initializer=paddle.nn.initializer.TruncatedNormal(std=0.02))
+ self.emb_reduce_linear = paddle.nn.Linear(
+ 768, output_emb_size, weight_attr=weight_attr)
+
+ @paddle.jit.to_static(input_spec=[
+ paddle.static.InputSpec(
+ shape=[None, None], dtype='int64'), paddle.static.InputSpec(
+ shape=[None, None], dtype='int64')
+ ])
+ def get_pooled_embedding(self,
+ input_ids,
+ token_type_ids=None,
+ position_ids=None,
+ attention_mask=None):
+ _, cls_embedding = self.ptm(input_ids, token_type_ids, position_ids,
+ attention_mask)
+
+ if self.output_emb_size > 0:
+ cls_embedding = self.emb_reduce_linear(cls_embedding)
+ cls_embedding = self.dropout(cls_embedding)
+ cls_embedding = F.normalize(cls_embedding, p=2, axis=-1)
+
+ return cls_embedding
+
+ def get_semantic_embedding(self, data_loader):
+ self.eval()
+ with paddle.no_grad():
+ for batch_data in data_loader:
+ input_ids, token_type_ids = batch_data
+ input_ids = paddle.to_tensor(input_ids)
+ token_type_ids = paddle.to_tensor(token_type_ids)
+
+ text_embeddings = self.get_pooled_embedding(
+ input_ids, token_type_ids=token_type_ids)
+
+ yield text_embeddings
+
+ def cosine_sim(self,
+ query_input_ids,
+ title_input_ids,
+ query_token_type_ids=None,
+ query_position_ids=None,
+ query_attention_mask=None,
+ title_token_type_ids=None,
+ title_position_ids=None,
+ title_attention_mask=None):
+
+ query_cls_embedding = self.get_pooled_embedding(
+ query_input_ids, query_token_type_ids, query_position_ids,
+ query_attention_mask)
+
+ title_cls_embedding = self.get_pooled_embedding(
+ title_input_ids, title_token_type_ids, title_position_ids,
+ title_attention_mask)
+
+ cosine_sim = paddle.sum(query_cls_embedding * title_cls_embedding,
+ axis=-1)
+ return cosine_sim
+
+ def forward(self,
+ input_ids,
+ token_type_ids=None,
+ position_ids=None,
+ attention_mask=None):
+ _, cls_embedding = self.ptm(input_ids, token_type_ids, position_ids,
+ attention_mask)
+
+ if self.output_emb_size > 0:
+ cls_embedding = self.emb_reduce_linear(cls_embedding)
+ cls_embedding = self.dropout(cls_embedding)
+ cls_embedding = F.normalize(cls_embedding, p=2, axis=-1)
+
+ return cls_embedding
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/http_client.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/http_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..a976ad9fc33b06ce7148adc7153d4b35183e31c0
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/http_client.py
@@ -0,0 +1,81 @@
+# coding:utf-8
+# pylint: disable=doc-string-missing
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import time
+import numpy as np
+import requests
+import json
+
+from paddle_serving_client import HttpClient
+import paddlenlp as ppnlp
+
+
+def convert_example(example,
+ tokenizer,
+ max_seq_length=512,
+ pad_to_max_seq_len=True):
+ list_input_ids = []
+ list_token_type_ids = []
+ for text in example:
+ encoded_inputs = tokenizer(
+ text=text,
+ max_seq_len=max_seq_length,
+ pad_to_max_seq_len=pad_to_max_seq_len)
+ input_ids = encoded_inputs["input_ids"]
+ token_type_ids = encoded_inputs["token_type_ids"]
+
+ list_input_ids.append(input_ids)
+ list_token_type_ids.append(token_type_ids)
+ return list_input_ids, list_token_type_ids
+
+
+# 启动python客户端
+endpoint_list = ['127.0.0.1:9393']
+client = HttpClient()
+client.load_client_config('serving_client')
+client.connect(endpoint_list)
+feed_names = client.feed_names_
+fetch_names = client.fetch_names_
+print(feed_names)
+print(fetch_names)
+
+# 创建tokenizer
+tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0')
+max_seq_len = 64
+
+# 数据预处理
+
+list_data = ['国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据.', '面向生态系统服务的生态系统分类方案研发与应用']
+# for i in range(5):
+# list_data.extend(list_data)
+# print(len(list_data))
+examples = convert_example(list_data, tokenizer, max_seq_length=max_seq_len)
+print(examples)
+
+feed_dict = {}
+feed_dict['input_ids'] = np.array(examples[0])
+feed_dict['token_type_ids'] = np.array(examples[1])
+
+print(feed_dict['input_ids'].shape)
+print(feed_dict['token_type_ids'].shape)
+
+# batch设置为True表示的是批量预测
+b_start = time.time()
+result = client.predict(feed=feed_dict, fetch=fetch_names, batch=True)
+b_end = time.time()
+print(result)
+print("time to cost :{} seconds".format(b_end - b_start))
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/rpc_client.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/rpc_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ea4c245f2a10256166a512f9282282e69d9997b
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/rpc_client.py
@@ -0,0 +1,77 @@
+# coding:utf-8
+# pylint: disable=doc-string-missing
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import time
+import numpy as np
+
+from paddle_serving_client import Client
+import paddlenlp as ppnlp
+
+
+def convert_example(example,
+ tokenizer,
+ max_seq_length=512,
+ pad_to_max_seq_len=True):
+ list_input_ids = []
+ list_token_type_ids = []
+ for text in example:
+ encoded_inputs = tokenizer(
+ text=text,
+ max_seq_len=max_seq_length,
+ pad_to_max_seq_len=pad_to_max_seq_len)
+ input_ids = encoded_inputs["input_ids"]
+ token_type_ids = encoded_inputs["token_type_ids"]
+ list_input_ids.append(input_ids)
+ list_token_type_ids.append(token_type_ids)
+ return list_input_ids, list_token_type_ids
+
+
+# 启动python客户端
+endpoint_list = ['127.0.0.1:9393']
+client = Client()
+client.load_client_config('serving_client')
+client.connect(endpoint_list)
+feed_names = client.feed_names_
+fetch_names = client.fetch_names_
+print(feed_names)
+print(fetch_names)
+
+# 创建tokenizer
+tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0')
+max_seq_len = 64
+
+# 数据预处理
+
+list_data = ['国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据.', '面向生态系统服务的生态系统分类方案研发与应用']
+# for i in range(5):
+# list_data.extend(list_data)
+# print(len(list_data))
+examples = convert_example(list_data, tokenizer, max_seq_length=max_seq_len)
+print(examples)
+
+feed_dict = {}
+feed_dict['input_ids'] = np.array(examples[0])
+feed_dict['token_type_ids'] = np.array(examples[1])
+
+print(feed_dict['input_ids'].shape)
+print(feed_dict['token_type_ids'].shape)
+# batch设置为True表示的是批量预测
+b_start = time.time()
+result = client.predict(feed=feed_dict, fetch=fetch_names, batch=True)
+b_end = time.time()
+print("time to cost :{} seconds".format(b_end - b_start))
+print(result)
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/start_server.sh b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/start_server.sh
new file mode 100644
index 0000000000000000000000000000000000000000..55d380d6f87396887675a008c54bb8544ce2a793
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/start_server.sh
@@ -0,0 +1 @@
+python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_id 2 --thread 5 --ir_optim True --use_trt --precision FP16
\ No newline at end of file
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/config_nlp.yml b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/config_nlp.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d896adbfa1f9671cb569137637cf5f3ec169ef69
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/config_nlp.yml
@@ -0,0 +1,34 @@
+# worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
+# 当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
+worker_num: 20
+# build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG
+build_dag_each_worker: false
+
+dag:
+ # op资源类型, True, 为线程模型;False,为进程模型
+ is_thread_op: False
+ # 使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用
+ tracer:
+ interval_s: 10
+# http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
+http_port: 18082
+# rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时,会自动将rpc_port设置为http_port+1
+rpc_port: 8088
+op:
+ ernie:
+ # 并发数,is_thread_op=True时,为线程并发;否则为进程并发
+ concurrency: 1
+ # 当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
+ local_service_conf:
+ # client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
+ client_type: local_predictor
+ #ir_optim
+ ir_optim: True
+ # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+ device_type: 1
+ # 计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
+ devices: '2'
+ # Fetch结果列表,以client_config中fetch_var的alias_name为准, 如果没有设置则全部返回
+ fetch_list: ['output_embedding']
+ # 模型路径
+ model_config: ../../serving_server/
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/deploy.sh b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/deploy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fe8f071e0a47a47f5dc24d84ea4eaaf8e7503c06
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/deploy.sh
@@ -0,0 +1 @@
+python predict.py --model_dir=../../output
\ No newline at end of file
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/predict.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/predict.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e81dbb5092ce6178587f5aa8f40d758f4446a42
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/predict.py
@@ -0,0 +1,292 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import sys
+
+import numpy as np
+import paddle
+import paddlenlp as ppnlp
+from scipy.special import softmax
+from scipy import spatial
+from paddle import inference
+from paddlenlp.data import Stack, Tuple, Pad
+from paddlenlp.datasets import load_dataset
+from paddlenlp.utils.log import logger
+
+sys.path.append('.')
+
+# yapf: disable
+parser = argparse.ArgumentParser()
+parser.add_argument("--model_dir", type=str, required=True,
+ help="The directory to static model.")
+
+parser.add_argument("--max_seq_length", default=128, type=int,
+ help="The maximum total input sequence length after tokenization. Sequences "
+ "longer than this will be truncated, sequences shorter will be padded.")
+parser.add_argument("--batch_size", default=15, type=int,
+ help="Batch size per GPU/CPU for training.")
+parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu",
+ help="Select which device to train model, defaults to gpu.")
+
+parser.add_argument('--use_tensorrt', default=False, type=eval, choices=[True, False],
+ help='Enable to use tensorrt to speed up.')
+parser.add_argument("--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"],
+ help='The tensorrt precision.')
+
+parser.add_argument('--cpu_threads', default=10, type=int,
+ help='Number of threads to predict when using cpu.')
+parser.add_argument('--enable_mkldnn', default=False, type=eval, choices=[True, False],
+ help='Enable to use mkldnn to speed up when using cpu.')
+
+parser.add_argument("--benchmark", type=eval, default=False,
+ help="To log some information about environment and running.")
+parser.add_argument("--save_log_path", type=str, default="./log_output/",
+ help="The file path to save log.")
+args = parser.parse_args()
+# yapf: enable
+
+
+def convert_example(example,
+ tokenizer,
+ max_seq_length=512,
+ pad_to_max_seq_len=False):
+ """
+ Builds model inputs from a sequence.
+
+ A BERT sequence has the following format:
+
+ - single sequence: ``[CLS] X [SEP]``
+
+ Args:
+ example(obj:`list(str)`): The list of text to be converted to ids.
+ tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
+ which contains most of the methods. Users should refer to the superclass for more information regarding methods.
+ max_seq_len(obj:`int`): The maximum total input sequence length after tokenization.
+ Sequences longer than this will be truncated, sequences shorter will be padded.
+ is_test(obj:`False`, defaults to `False`): Whether the example contains label or not.
+
+ Returns:
+ input_ids(obj:`list[int]`): The list of query token ids.
+ token_type_ids(obj: `list[int]`): List of query sequence pair mask.
+ """
+
+ result = []
+ for key, text in example.items():
+ encoded_inputs = tokenizer(
+ text=text,
+ max_seq_len=max_seq_length,
+ pad_to_max_seq_len=pad_to_max_seq_len)
+ input_ids = encoded_inputs["input_ids"]
+ token_type_ids = encoded_inputs["token_type_ids"]
+ result += [input_ids, token_type_ids]
+ return result
+
+
+class Predictor(object):
+ def __init__(self,
+ model_dir,
+ device="gpu",
+ max_seq_length=128,
+ batch_size=32,
+ use_tensorrt=False,
+ precision="fp32",
+ cpu_threads=10,
+ enable_mkldnn=False):
+ self.max_seq_length = max_seq_length
+ self.batch_size = batch_size
+
+ model_file = model_dir + "/inference.pdmodel"
+ params_file = model_dir + "/inference.pdiparams"
+ if not os.path.exists(model_file):
+ raise ValueError("not find model file path {}".format(model_file))
+ if not os.path.exists(params_file):
+ raise ValueError("not find params file path {}".format(params_file))
+ config = paddle.inference.Config(model_file, params_file)
+
+ if device == "gpu":
+ # set GPU configs accordingly
+ # such as intialize the gpu memory, enable tensorrt
+ config.enable_use_gpu(100, 0)
+ precision_map = {
+ "fp16": inference.PrecisionType.Half,
+ "fp32": inference.PrecisionType.Float32,
+ "int8": inference.PrecisionType.Int8
+ }
+ precision_mode = precision_map[precision]
+
+ if args.use_tensorrt:
+ config.enable_tensorrt_engine(
+ max_batch_size=batch_size,
+ min_subgraph_size=30,
+ precision_mode=precision_mode)
+ elif device == "cpu":
+ # set CPU configs accordingly,
+ # such as enable_mkldnn, set_cpu_math_library_num_threads
+ config.disable_gpu()
+ if args.enable_mkldnn:
+ # cache 10 different shapes for mkldnn to avoid memory leak
+ config.set_mkldnn_cache_capacity(10)
+ config.enable_mkldnn()
+ config.set_cpu_math_library_num_threads(args.cpu_threads)
+ elif device == "xpu":
+ # set XPU configs accordingly
+ config.enable_xpu(100)
+
+ config.switch_use_feed_fetch_ops(False)
+ self.predictor = paddle.inference.create_predictor(config)
+ self.input_handles = [
+ self.predictor.get_input_handle(name)
+ for name in self.predictor.get_input_names()
+ ]
+ self.output_handle = self.predictor.get_output_handle(
+ self.predictor.get_output_names()[0])
+
+ if args.benchmark:
+ import auto_log
+ pid = os.getpid()
+ self.autolog = auto_log.AutoLogger(
+ model_name="ernie-1.0",
+ model_precision=precision,
+ batch_size=self.batch_size,
+ data_shape="dynamic",
+ save_path=args.save_log_path,
+ inference_config=config,
+ pids=pid,
+ process_name=None,
+ gpu_ids=0,
+ time_keys=[
+ 'preprocess_time', 'inference_time', 'postprocess_time'
+ ],
+ warmup=0,
+ logger=logger)
+
+ def extract_embedding(self, data, tokenizer):
+ """
+ Predicts the data labels.
+
+ Args:
+ data (obj:`List(str)`): The batch data whose each element is a raw text.
+ tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
+ which contains most of the methods. Users should refer to the superclass for more information regarding methods.
+
+ Returns:
+ results(obj:`dict`): All the feature vectors.
+ """
+ if args.benchmark:
+ self.autolog.times.start()
+
+ examples = []
+ for text in data:
+ input_ids, segment_ids = convert_example(text, tokenizer)
+ examples.append((input_ids, segment_ids))
+
+ batchify_fn = lambda samples, fn=Tuple(
+ Pad(axis=0, pad_val=tokenizer.pad_token_id), # input
+ Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment
+ ): fn(samples)
+
+ if args.benchmark:
+ self.autolog.times.stamp()
+
+ input_ids, segment_ids = batchify_fn(examples)
+ self.input_handles[0].copy_from_cpu(input_ids)
+ self.input_handles[1].copy_from_cpu(segment_ids)
+ self.predictor.run()
+ logits = self.output_handle.copy_to_cpu()
+ if args.benchmark:
+ self.autolog.times.stamp()
+
+ if args.benchmark:
+ self.autolog.times.end(stamp=True)
+
+ return logits
+
+ def predict(self, data, tokenizer):
+ """
+ Predicts the data labels.
+
+ Args:
+ data (obj:`List(str)`): The batch data whose each element is a raw text.
+ tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
+ which contains most of the methods. Users should refer to the superclass for more information regarding methods.
+
+ Returns:
+ results(obj:`dict`): All the predictions probs.
+ """
+ if args.benchmark:
+ self.autolog.times.start()
+
+ examples = []
+ for idx, text in enumerate(data):
+ input_ids, segment_ids = convert_example({idx: text[0]}, tokenizer)
+ title_ids, title_segment_ids = convert_example({
+ idx: text[1]
+ }, tokenizer)
+ examples.append(
+ (input_ids, segment_ids, title_ids, title_segment_ids))
+
+ batchify_fn = lambda samples, fn=Tuple(
+ Pad(axis=0, pad_val=tokenizer.pad_token_id), # input
+ Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment
+ Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment
+ Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment
+ ): fn(samples)
+
+ if args.benchmark:
+ self.autolog.times.stamp()
+
+ query_ids, query_segment_ids, title_ids, title_segment_ids = batchify_fn(
+ examples)
+ self.input_handles[0].copy_from_cpu(query_ids)
+ self.input_handles[1].copy_from_cpu(query_segment_ids)
+ self.predictor.run()
+ query_logits = self.output_handle.copy_to_cpu()
+
+ self.input_handles[0].copy_from_cpu(title_ids)
+ self.input_handles[1].copy_from_cpu(title_segment_ids)
+ self.predictor.run()
+ title_logits = self.output_handle.copy_to_cpu()
+
+ if args.benchmark:
+ self.autolog.times.stamp()
+
+ if args.benchmark:
+ self.autolog.times.end(stamp=True)
+ result = [
+ float(1 - spatial.distance.cosine(arr1, arr2))
+ for arr1, arr2 in zip(query_logits, title_logits)
+ ]
+ return result
+
+
+if __name__ == "__main__":
+ # Define predictor to do prediction.
+ predictor = Predictor(args.model_dir, args.device, args.max_seq_length,
+ args.batch_size, args.use_tensorrt, args.precision,
+ args.cpu_threads, args.enable_mkldnn)
+
+ # ErnieTinyTokenizer is special for ernie-tiny pretained model.
+ output_emb_size = 256
+ tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0')
+ id2corpus = {0: '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
+ corpus_list = [{idx: text} for idx, text in id2corpus.items()]
+ res = predictor.extract_embedding(corpus_list, tokenizer)
+ print(res.shape)
+ print(res)
+ corpus_list = [['中西方语言与文化的差异', '中西方文化差异以及语言体现中西方文化,差异,语言体现'],
+ ['中西方语言与文化的差异', '飞桨致力于让深度学习技术的创新与应用更简单']]
+ res = predictor.predict(corpus_list, tokenizer)
+ print(res)
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/rpc_client.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/rpc_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..03863db6114b7c381dae17ee3bf33f00f15d8f4a
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/rpc_client.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+import numpy as np
+
+from paddle_serving_server.pipeline import PipelineClient
+
+client = PipelineClient()
+client.connect(['127.0.0.1:8088'])
+
+list_data = [
+ "国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据",
+ "试论翻译过程中的文化差异与语言空缺翻译过程,文化差异,语言空缺,文化对比"
+]
+feed = {}
+for i, item in enumerate(list_data):
+ feed[str(i)] = item
+
+print(feed)
+start_time = time.time()
+ret = client.predict(feed_dict=feed)
+end_time = time.time()
+print("time to cost :{} seconds".format(end_time - start_time))
+
+result = np.array(eval(ret.value[0]))
+print(ret.key)
+print(result.shape)
+print(result)
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/web_service.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/web_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ad12032b3c92d72a5297f15d732b7dfbd19589e
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/web_service.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+import sys
+
+from paddle_serving_server.web_service import WebService, Op
+
+_LOGGER = logging.getLogger()
+
+
+def convert_example(example,
+ tokenizer,
+ max_seq_length=512,
+ pad_to_max_seq_len=False):
+ result = []
+ for text in example:
+ encoded_inputs = tokenizer(
+ text=text,
+ max_seq_len=max_seq_length,
+ pad_to_max_seq_len=pad_to_max_seq_len)
+ input_ids = encoded_inputs["input_ids"]
+ token_type_ids = encoded_inputs["token_type_ids"]
+ result += [input_ids, token_type_ids]
+ return result
+
+
+class ErnieOp(Op):
+ def init_op(self):
+ import paddlenlp as ppnlp
+ self.tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained(
+ 'ernie-1.0')
+
+ def preprocess(self, input_dicts, data_id, log_id):
+ from paddlenlp.data import Stack, Tuple, Pad
+
+ (_, input_dict), = input_dicts.items()
+ print("input dict", input_dict)
+ batch_size = len(input_dict.keys())
+ examples = []
+ for i in range(batch_size):
+ input_ids, segment_ids = convert_example([input_dict[str(i)]],
+ self.tokenizer)
+ examples.append((input_ids, segment_ids))
+ batchify_fn = lambda samples, fn=Tuple(
+ Pad(axis=0, pad_val=self.tokenizer.pad_token_id), # input
+ Pad(axis=0, pad_val=self.tokenizer.pad_token_id), # segment
+ ): fn(samples)
+ input_ids, segment_ids = batchify_fn(examples)
+ feed_dict = {}
+ feed_dict['input_ids'] = input_ids
+ feed_dict['token_type_ids'] = segment_ids
+ return feed_dict, False, None, ""
+
+ def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
+ new_dict = {}
+ new_dict["output_embedding"] = str(fetch_dict["output_embedding"]
+ .tolist())
+ return new_dict, None, ""
+
+
+class ErnieService(WebService):
+ def get_pipeline_response(self, read_op):
+ ernie_op = ErnieOp(name="ernie", input_ops=[read_op])
+ return ernie_op
+
+
+ernie_service = ErnieService(name="ernie")
+ernie_service.prepare_pipeline_config("config_nlp.yml")
+ernie_service.run_service()
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/export_model.py b/examples/Pipeline/PaddleNLP/semantic_indexing/export_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..da468ea7b2c3af6eff093eef98a3e4f9393f9b3d
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/export_model.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+from functools import partial
+
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+import paddlenlp as ppnlp
+from paddlenlp.data import Stack, Tuple, Pad
+
+from base_model import SemanticIndexBase, SemanticIndexBaseStatic
+
+# yapf: disable
+parser = argparse.ArgumentParser()
+parser.add_argument("--params_path", type=str, required=True,
+ default='./checkpoint/model_900/model_state.pdparams', help="The path to model parameters to be loaded.")
+parser.add_argument("--output_path", type=str, default='./output',
+ help="The path of model parameter in static graph to be saved.")
+args = parser.parse_args()
+# yapf: enable
+
+if __name__ == "__main__":
+ # If you want to use ernie1.0 model, plesace uncomment the following code
+ output_emb_size = 256
+
+ pretrained_model = ppnlp.transformers.ErnieModel.from_pretrained(
+ "ernie-1.0")
+
+ tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0')
+ model = SemanticIndexBaseStatic(
+ pretrained_model, output_emb_size=output_emb_size)
+
+ if args.params_path and os.path.isfile(args.params_path):
+ state_dict = paddle.load(args.params_path)
+ model.set_dict(state_dict)
+ print("Loaded parameters from %s" % args.params_path)
+
+ model.eval()
+
+ # Convert to static graph with specific input description
+ model = paddle.jit.to_static(
+ model,
+ input_spec=[
+ paddle.static.InputSpec(
+ shape=[None, None], dtype="int64"), # input_ids
+ paddle.static.InputSpec(
+ shape=[None, None], dtype="int64") # segment_ids
+ ])
+ # Save in static graph model.
+ save_path = os.path.join(args.output_path, "inference")
+ paddle.jit.save(model, save_path)
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/export_to_serving.py b/examples/Pipeline/PaddleNLP/semantic_indexing/export_to_serving.py
new file mode 100644
index 0000000000000000000000000000000000000000..c24f931510e5662ae1b824049d1ac35c4ef34076
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/export_to_serving.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import paddle_serving_client.io as serving_io
+# yapf: disable
+parser = argparse.ArgumentParser()
+parser.add_argument("--dirname", type=str, required=True,
+ default='./output', help="Path of saved model files. Program file and parameter files are saved in this directory.")
+parser.add_argument("--model_filename", type=str, required=True,
+ default='inference.get_pooled_embedding.pdmodel', help="The name of file to load the inference program. If it is None, the default filename __model__ will be used.")
+parser.add_argument("--params_filename", type=str, required=True,
+ default='inference.get_pooled_embedding.pdiparams', help="The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.")
+parser.add_argument("--server_path", type=str, default='./serving_server',
+ help="The path of server parameter in static graph to be saved.")
+parser.add_argument("--client_path", type=str, default='./serving_client',
+ help="The path of client parameter in static graph to be saved.")
+parser.add_argument("--feed_alias_names", type=str, default=None,
+ help='set alias names for feed vars, split by comma \',\', you should run --show_proto to check the number of feed vars')
+parser.add_argument("--fetch_alias_names", type=str, default=None,
+ help='set alias names for feed vars, split by comma \',\', you should run --show_proto to check the number of fetch vars')
+parser.add_argument("--show_proto", type=bool, default=False,
+ help='If yes, you can preview the proto and then determine your feed var alias name and fetch var alias name.')
+# yapf: enable
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ serving_io.inference_model_to_serving(
+ dirname=args.dirname,
+ serving_server=args.server_path,
+ serving_client=args.client_path,
+ model_filename=args.model_filename,
+ params_filename=args.params_filename,
+ show_proto=args.show_proto,
+ feed_alias_names=args.feed_alias_names,
+ fetch_alias_names=args.fetch_alias_names)
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_model.sh b/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_model.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7c79266219cea03e16968ed0d00a3755615c7432
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_model.sh
@@ -0,0 +1 @@
+python export_model.py --params_path checkpoints/model_40/model_state.pdparams --output_path=./output
\ No newline at end of file
diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_to_serving.sh b/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_to_serving.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b0d7a422551fd09eb1a28cfacdf47237a8efc795
--- /dev/null
+++ b/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_to_serving.sh
@@ -0,0 +1,7 @@
+python export_to_serving.py \
+ --dirname "output" \
+ --model_filename "inference.get_pooled_embedding.pdmodel" \
+ --params_filename "inference.get_pooled_embedding.pdiparams" \
+ --server_path "serving_server" \
+ --client_path "serving_client" \
+ --fetch_alias_names "output_embedding"
diff --git a/java/src/main/proto/general_model_service.proto b/java/src/main/proto/general_model_service.proto
index aa06d388a468d71e968aa53b19f25c55f8c42ee1..6b0fc7e0d585d45ca45f26cf9083a7f116a25d33 100644
--- a/java/src/main/proto/general_model_service.proto
+++ b/java/src/main/proto/general_model_service.proto
@@ -89,11 +89,13 @@ message Request {
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
- // Error code
- int32 err_no = 3;
+ bool profile_server = 3;
+ uint64 log_id = 4;
+ // Error code
+ int32 err_no = 5;
// Error messages
- string err_msg = 4;
+ string err_msg = 6;
};
message ModelOutput {
diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h
index 1fbb7222c0f32c7598b24c51f076d47e863f25b6..2d76730555acb6ed0408584db1334e842db126c3 100644
--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -241,10 +241,10 @@ class PaddleInferenceEngine : public EngineCore {
}
config.SwitchSpecifyInputNames(true);
- config.SetCpuMathLibraryNumThreads(1);
+ config.SetCpuMathLibraryNumThreads(engine_conf.cpu_math_thread_num());
if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
// 2000MB GPU memory
- config.EnableUseGpu(50, gpu_id);
+ config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id);
if (engine_conf.has_gpu_multi_stream() &&
engine_conf.gpu_multi_stream()) {
config.EnableGpuMultiStream();
@@ -267,17 +267,17 @@ class PaddleInferenceEngine : public EngineCore {
if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
config.SwitchIrOptim(true);
if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
- config.EnableUseGpu(50, gpu_id);
+ config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id);
if (engine_conf.has_gpu_multi_stream() &&
engine_conf.gpu_multi_stream()) {
config.EnableGpuMultiStream();
}
}
- config.EnableTensorRtEngine(1 << 25,
+ config.EnableTensorRtEngine(engine_conf.trt_workspace_size(),
max_batch,
local_min_subgraph_size,
precision_type,
- false,
+ engine_conf.trt_use_static(),
FLAGS_use_calib);
std::map> min_input_shape;
std::map> max_input_shape;
@@ -413,7 +413,11 @@ class PaddleInferenceEngine : public EngineCore {
<< ", use_ascend_cl: " << engine_conf.has_use_ascend_cl()
<< ", use_xpu: " << engine_conf.use_xpu()
<< ", enable_memory_optimization: "
- << engine_conf.enable_memory_optimization();
+ << engine_conf.enable_memory_optimization()
+ << ", gpu_memory_mb: " << engine_conf.gpu_memory_mb()
+ << ", cpu_math_thread_num: " << engine_conf.cpu_math_thread_num()
+ << ", trt_workspace_size: " << engine_conf.trt_workspace_size()
+ << ", trt_use_static: " << engine_conf.trt_use_static();
VLOG(2) << "create paddle predictor sucess, path: " << model_path;
return 0;
diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py
index 5f922a28f849866fcd08a29b63c70a986d064c68..7ad11e1c2bf0abc4d447311e3081b434cbb25dc9 100644
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
@@ -93,7 +93,9 @@ class LocalPredictor(object):
use_ascend_cl=False,
min_subgraph_size=3,
dynamic_shape_info={},
- use_calib=False):
+ use_calib=False,
+ collect_shape_range_info="",
+ tuned_dynamic_shape_info=""):
"""
Load model configs and create the paddle predictor by Paddle Inference API.
@@ -160,12 +162,14 @@ class LocalPredictor(object):
"use_trt:{}, use_lite:{}, use_xpu:{}, precision:{}, use_calib:{}, "
"use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
"mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, "
- "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{}".
+ "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{},"
+ "collect_shape_range_info:{},tuned_dynamic_shape_info:{}".
format(model_path, use_gpu, gpu_id, use_profile, thread_num,
mem_optim, ir_optim, use_trt, use_lite, use_xpu, precision,
use_calib, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list,
mkldnn_bf16_op_list, use_feed_fetch_ops, use_ascend_cl,
- min_subgraph_size, dynamic_shape_info))
+ min_subgraph_size, dynamic_shape_info,
+ collect_shape_range_info,tuned_dynamic_shape_info))
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
@@ -213,6 +217,8 @@ class LocalPredictor(object):
if mkldnn_op_list is not None:
config.set_mkldnn_op(mkldnn_op_list)
# set gpu
+ if collect_shape_range_info != "":
+ config.collect_shape_range_info(collect_shape_range_info)
if not use_gpu:
config.disable_gpu()
else:
@@ -226,6 +232,9 @@ class LocalPredictor(object):
use_static=False,
use_calib_mode=use_calib)
+ if tuned_dynamic_shape_info != "":
+ config.enable_tuned_tensorrt_dynamic_shape(tuned_dynamic_shape_info, True)
+
@ErrorCatch
@ParamChecker
def dynamic_shape_info_helper(dynamic_shape_info:lambda dynamic_shape_info: check_dynamic_shape_info(dynamic_shape_info)):
@@ -235,7 +244,7 @@ class LocalPredictor(object):
print("dynamic_shape_info configure error, it should contain [min_input_shape', 'max_input_shape', 'opt_input_shape' {}".format(resp.err_msg))
kill_stop_process_by_pid("kill", os.getpgid(os.getpid()))
- if len(dynamic_shape_info):
+ if len(dynamic_shape_info) and tuned_dynamic_shape_info == "":
config.set_trt_dynamic_shape_info(
dynamic_shape_info['min_input_shape'],
dynamic_shape_info['max_input_shape'],
diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py
index 13f3739d19543486b6079450c7fbd86e14be6554..8ac473d36b11bd41909975692215caa166bc94e0 100755
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -280,6 +280,27 @@ def serve_args():
default="",
nargs="+",
help="min_subgraph_size")
+ parser.add_argument(
+ "--gpu_memory_mb",
+ type=int,
+ default=50,
+ help="Initially allocate GPU storage size")
+ parser.add_argument(
+ "--cpu_math_thread_num",
+ type=int,
+ default=1,
+ help="Initialize the number of CPU computing threads")
+ parser.add_argument(
+ "--trt_workspace_size",
+ type=int,
+ default=33554432,
+ help="Initialize allocation 1 << 25 GPU storage size")
+ parser.add_argument(
+ "--trt_use_static",
+ default=False,
+ action="store_true",
+ help="Initialize TRT with static data")
+
return parser.parse_args()
@@ -396,10 +417,14 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi
server.set_dist_endpoints(args.dist_endpoints.split(","))
server.set_dist_subgraph_index(args.dist_subgraph_index)
server.set_min_subgraph_size(args.min_subgraph_size)
+ server.set_gpu_memory_mb(args.gpu_memory_mb)
+ server.set_cpu_math_thread_num(args.cpu_math_thread_num)
if args.use_trt and device == "gpu":
server.set_trt()
server.set_ir_optimize(True)
+ server.set_trt_workspace_size(args.trt_workspace_size)
+ server.set_trt_use_static(args.trt_use_static)
if is_ocr:
info = set_ocr_dynamic_shape_info()
server.set_trt_dynamic_shape_info(info)
diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py
index 266efc3e2f1ca0b383d14c2d0c1f6236347888d3..4c05b43f1970fc0c690413a8bcae5f50b1acce86 100755
--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -119,6 +119,10 @@ class Server(object):
self.dist_master_serving = False
self.min_subgraph_size = []
self.trt_dynamic_shape_info = []
+ self.gpu_memory_mb = 50
+ self.cpu_math_thread_num = 1
+ self.trt_workspace_size = 33554432 # 1 << 25
+ self.trt_use_static = False
def get_fetch_list(self, infer_node_idx=-1):
fetch_names = [
@@ -289,6 +293,18 @@ class Server(object):
def set_trt_dynamic_shape_info(self, info):
self.trt_dynamic_shape_info = info
+ def set_gpu_memory_mb(self, gpu_memory_mb):
+ self.gpu_memory_mb = gpu_memory_mb
+
+ def set_cpu_math_thread_num(self, cpu_math_thread_num):
+ self.cpu_math_thread_num = cpu_math_thread_num
+
+ def set_trt_workspace_size(self, trt_workspace_size):
+ self.trt_workspace_size = trt_workspace_size
+
+ def set_trt_use_static(self, trt_use_static):
+ self.trt_use_static = trt_use_static
+
def _prepare_engine(self, model_config_paths, device, use_encryption_model):
self.device = device
if self.model_toolkit_conf == None:
@@ -342,6 +358,10 @@ class Server(object):
engine.use_xpu = self.use_xpu
engine.use_ascend_cl = self.use_ascend_cl
engine.use_gpu = False
+ #engine.gpu_memory_mb = self.gpu_memory_mb
+ #engine.cpu_math_thread_num = self.cpu_math_thread_num
+ #engine.trt_workspace_size = self.trt_workspace_size
+ #engine.trt_use_static = self.trt_use_static
# use distributed model.
if self.dist_subgraph_index >= 0:
diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py
index 9535281d6a5470c6da942bec9401869f21c21473..70b82095266e474330d8e7efebd0f2ee9656bf61 100755
--- a/python/pipeline/local_service_handler.py
+++ b/python/pipeline/local_service_handler.py
@@ -53,7 +53,9 @@ class LocalServiceHandler(object):
mkldnn_bf16_op_list=None,
min_subgraph_size=3,
dynamic_shape_info={},
- use_calib=False):
+ use_calib=False,
+ collect_shape_range_info="",
+ tuned_dynamic_shape_info=""):
"""
Initialization of localservicehandler
@@ -99,6 +101,8 @@ class LocalServiceHandler(object):
self.min_subgraph_size = 3
self.dynamic_shape_info = {}
self._use_calib = False
+ self.collect_shape_range_info = ""
+ self.tuned_dynamic_shape_info = ""
if device_type == -1:
# device_type is not set, determined by `devices`,
@@ -179,6 +183,8 @@ class LocalServiceHandler(object):
self._mkldnn_op_list = mkldnn_op_list
self._mkldnn_bf16_op_list = mkldnn_bf16_op_list
self._use_calib = use_calib
+ self.collect_shape_range_info = collect_shape_range_info
+ self.tuned_dynamic_shape_info = tuned_dynamic_shape_info
_LOGGER.info(
"Models({}) will be launched by device {}. use_gpu:{}, "
@@ -187,14 +193,16 @@ class LocalServiceHandler(object):
"client_type:{}, fetch_names:{}, precision:{}, use_calib:{}, "
"use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
"mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{},"
- "is_set_dynamic_shape_info:{}".format(
+ "is_set_dynamic_shape_info:{},collect_shape_range_info:{},"
+ "tuned_dynamic_shape_info:{}".format(
model_config, self._device_name, self._use_gpu, self._use_trt,
self._use_lite, self._use_xpu, device_type, self._devices, self.
_mem_optim, self._ir_optim, self._use_profile, self._thread_num,
self._client_type, self._fetch_names, self._precision, self.
_use_calib, self._use_mkldnn, self._mkldnn_cache_capacity, self.
_mkldnn_op_list, self._mkldnn_bf16_op_list, self._use_ascend_cl,
- self.min_subgraph_size, bool(len(self.dynamic_shape_info))))
+ self.min_subgraph_size, bool(len(self.dynamic_shape_info)),
+ self.collect_shape_range_info, self.tuned_dynamic_shape_info))
def get_fetch_list(self):
return self._fetch_names
@@ -254,7 +262,9 @@ class LocalServiceHandler(object):
use_ascend_cl=self._use_ascend_cl,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info,
- use_calib=self._use_calib)
+ use_calib=self._use_calib,
+ collect_shape_range_info=self.collect_shape_range_info,
+ tuned_dynamic_shape_info=self.tuned_dynamic_shape_info)
return self._local_predictor_client
def get_client_config(self):
diff --git a/python/pipeline/operator.py b/python/pipeline/operator.py
index 9341a851d0365782ea407d8ac461a30b4530b793..84a72d09cf2fee4be844128b31f7e15943b114bc 100644
--- a/python/pipeline/operator.py
+++ b/python/pipeline/operator.py
@@ -121,6 +121,8 @@ class Op(object):
self._succ_close_op = False
self.dynamic_shape_info = {}
self.set_dynamic_shape_info()
+ self.collect_shape_range_info = ""
+ self.tuned_dynamic_shape_info = ""
def set_dynamic_shape_info(self):
"""
@@ -235,6 +237,14 @@ class Op(object):
"mkldnn_bf16_op_list")
self.min_subgraph_size = local_service_conf.get(
"min_subgraph_size")
+ self.collect_shape_range_info = local_service_conf.get(
+ "collect_shape_range_info")
+ self.tuned_dynamic_shape_info = local_service_conf.get(
+ "tuned_dynamic_shape_info")
+ if self.collect_shape_range_info is None:
+ self.collect_shape_range_info = ""
+ if self.tuned_dynamic_shape_info is None:
+ self.tuned_dynamic_shape_info = ""
if self.model_config is None:
self.with_serving = False
@@ -259,7 +269,9 @@ class Op(object):
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info,
- use_calib=self.use_calib)
+ use_calib=self.use_calib,
+ collect_shape_range_info=self.collect_shape_range_info,
+ tuned_dynamic_shape_info=self.tuned_dynamic_shape_info)
service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list()
self._server_endpoints = [
@@ -290,7 +302,9 @@ class Op(object):
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info,
- use_calib=self.use_calib)
+ use_calib=self.use_calib,
+ collect_shape_range_info=self.collect_shape_range_info,
+ tuned_dynamic_shape_info=self.tuned_dynamic_shape_info)
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
@@ -1387,7 +1401,9 @@ class Op(object):
mkldnn_bf16_op_list=mkldnn_bf16_op_list,
min_subgraph_size=min_subgraph_size,
dynamic_shape_info=dynamic_shape_info,
- use_calib=use_calib)
+ use_calib=use_calib,
+ collect_shape_range_info=self.collect_shape_range_info,
+ tuned_dynamic_shape_info=self.tuned_dynamic_shape_info)
_LOGGER.info("Init cuda env in process {}".format(
concurrency_idx))
diff --git a/python/pipeline/pipeline_server.py b/python/pipeline/pipeline_server.py
index 3ff765c4725a31c31a0de4effc01303507f1ebbc..17c408609b2066c47f184474c3b8ee8a6115bd86 100644
--- a/python/pipeline/pipeline_server.py
+++ b/python/pipeline/pipeline_server.py
@@ -261,6 +261,8 @@ class PipelineServer(object):
"use_mkldnn": False,
"mkldnn_cache_capacity": 0,
"min_subgraph_size": 3,
+ "collect_shape_range_info": "",
+ "tuned_dynamic_shape_info": "",
},
}
for op in self._used_op:
@@ -422,6 +424,8 @@ class ServerYamlConfChecker(object):
"use_mkldnn": False,
"mkldnn_cache_capacity": 0,
"min_subgraph_size": 3,
+ "collect_shape_range_info": "",
+ "tuned_dynamic_shape_info": "",
}
conf_type = {
"model_config": str,
@@ -438,6 +442,8 @@ class ServerYamlConfChecker(object):
"mkldnn_op_list": list,
"mkldnn_bf16_op_list": list,
"min_subgraph_size": int,
+ "collect_shape_range_info": str,
+ "tuned_dynamic_shape_info": str,
}
conf_qualification = {"thread_num": (">=", 1), }
ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,
diff --git a/tools/cpp_examples/demo-serving/proto/general_model_service.proto b/tools/cpp_examples/demo-serving/proto/general_model_service.proto
index 8fedb60e97ec5b81263687b47ff0794880da8671..3a1cba2c72fde19c9288dca1e6302b40273aac93 100755
--- a/tools/cpp_examples/demo-serving/proto/general_model_service.proto
+++ b/tools/cpp_examples/demo-serving/proto/general_model_service.proto
@@ -42,6 +42,13 @@ message Request {
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
+ bool profile_server = 3;
+ uint64 log_id = 4;
+
+ // Error code
+ int32 err_no = 5;
+ // Error messages
+ string err_msg = 6;
};
message ModelOutput {