提交 9a2ffb15 编写于 作者: T TeslaZhao

Update doc

上级 4de287c6
...@@ -30,7 +30,7 @@ message( "WITH_GPU = ${WITH_GPU}") ...@@ -30,7 +30,7 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of: # Paddle Version should be one of:
# latest: latest develop build # latest: latest develop build
# version number like 1.5.2 # version number like 1.5.2
SET(PADDLE_VERSION "2.3.0") SET(PADDLE_VERSION "2.3.0-no-ort")
if (WITH_GPU) if (WITH_GPU)
message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}") message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}")
# cuda 11.0 is not supported, 11.2 would be added. # cuda 11.0 is not supported, 11.2 would be added.
...@@ -171,25 +171,25 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib) ...@@ -171,25 +171,25 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib") SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib") #SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib) #LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib") #SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib) #LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib)
if (NOT WITH_MKLML) if (NOT WITH_MKLML)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
endif() endif()
ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL) #ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so) #SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so)
ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL) #ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0) #SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0)
ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL) ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so) SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
if (WITH_ASCEND_CL) if (WITH_ASCEND_CL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so) SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so)
endif() endif()
......
...@@ -92,7 +92,6 @@ message Response { ...@@ -92,7 +92,6 @@ message Response {
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
bool profile_server = 3; bool profile_server = 3;
uint64 log_id = 4; uint64 log_id = 4;
// Error code // Error code
int32 err_no = 5; int32 err_no = 5;
// Error messages // Error messages
......
...@@ -51,17 +51,14 @@ message EngineDesc { ...@@ -51,17 +51,14 @@ message EngineDesc {
/* /*
* "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu() * "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu()
* "cpu_math_thread_num": set thread numbers of cpu math by * "cpu_math_thread_num": set thread numbers of cpu math by config.SetCpuMathLibraryNumThreads()
* config.SetCpuMathLibraryNumThreads() * "trt_workspace_size": set TensorRT workspace size by config.EnableTensorRtEngine(), 1 << 25 default
* "trt_workspace_size": set TensorRT workspace size by * "trt_use_static": If true, save the optimization information of the TRT serialized to the disk, and load from the disk.
* config.EnableTensorRtEngine(), 1 << 25 default
* "trt_use_static": If true, save the optimization information of the TRT
* serialized to the disk, and load from the disk.
*/ */
optional int32 gpu_memory_mb = 22 [ default = 100 ]; optional int32 gpu_memory_mb = 22 [default = 100];
optional int32 cpu_math_thread_num = 23 [ default = 1 ]; optional int32 cpu_math_thread_num = 23 [default = 1];
optional int32 trt_workspace_size = 24 [ default = 33554432 ]; optional int32 trt_workspace_size = 24 [default = 33554432];
optional bool trt_use_static = 25 [ default = false ]; optional bool trt_use_static = 25 [default = false];
/* /*
* "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling * "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
......
...@@ -94,9 +94,9 @@ message Response { ...@@ -94,9 +94,9 @@ message Response {
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
bool profile_server = 3; bool profile_server = 3;
uint64 log_id = 4; uint64 log_id = 4;
// Error code // Error code
int32 err_no = 5; int32 err_no = 5;
// Error messages // Error messages
string err_msg = 6; string err_msg = 6;
}; };
......
...@@ -94,9 +94,9 @@ message Response { ...@@ -94,9 +94,9 @@ message Response {
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
bool profile_server = 3; bool profile_server = 3;
uint64 log_id = 4; uint64 log_id = 4;
// Error code // Error code
int32 err_no = 5; int32 err_no = 5;
// Error messages // Error messages
string err_msg = 6; string err_msg = 6;
}; };
......
...@@ -21,6 +21,7 @@ option cc_generic_services = true; ...@@ -21,6 +21,7 @@ option cc_generic_services = true;
message RequestAndResponse { message RequestAndResponse {
required int32 a = 1; required int32 a = 1;
required float b = 2; required float b = 2;
required uint64 log_id = 3 [ default = 0 ];
}; };
service LoadGeneralModelService { service LoadGeneralModelService {
......
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
- [开启同步模式](#2.1) - [开启同步模式](#2.1)
- [开启异步模式](#2.2) - [开启异步模式](#2.2)
- [性能测试](#3) - [性能测试](#3)
- [测试数据](#3.1) - [测试结果](#3.1)
- [测试结论](#3.2) - [测试数据](#3.2)
<a name="1"></a> <a name="1"></a>
...@@ -75,7 +75,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --p ...@@ -75,7 +75,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --p
**二.开启异步模式** **二.开启异步模式**
启动命令使用 `--runtime_thread_num 4``--batch_infer_size 32` 开启异步模式,Serving 框架会启动8个异步线程,单次合并最大批量为32,自动开启动态 Padding。 启动命令使用 `--runtime_thread_num 2``--batch_infer_size 32` 开启异步模式,Serving 框架会启动2个异步线程,单次合并最大批量为32,自动开启动态 Padding。
``` ```
python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --port 9292 --runtime_thread_num 4 --batch_infer_size 32 --ir_optim --gpu_multi_stream --gpu_ids 0 python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --port 9292 --runtime_thread_num 4 --batch_infer_size 32 --ir_optim --gpu_multi_stream --gpu_ids 0
``` ```
...@@ -84,11 +84,12 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --p ...@@ -84,11 +84,12 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --p
## 性能测试 ## 性能测试
GPU:Tesla P4 7611 MiB
Cuda:cuda11.2-cudnn8-trt8 - GPU:Tesla P4 7611 MiB
Python:python3.7 - CUDA:cuda11.2-cudnn8-trt8
模型:ResNet_v2_50 - Python 版本:python3.7
测试数据:构造全1输入,单client请求100次,shape 范围(1, 224 ± 50, 224 ± 50) - 模型:ResNet_v2_50
- 测试数据:构造全1输入,单client请求100次,shape 范围(1, 224 ± 50, 224 ± 50)
同步模式启动命令: 同步模式启动命令:
``` ```
...@@ -102,7 +103,25 @@ python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --por ...@@ -102,7 +103,25 @@ python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --por
<a name="3.1"></a> <a name="3.1"></a>
**一.测试数据** **一.测试结果**
使用异步模式,并开启动态批量后,并发测试不同 shape 数据时,吞吐性能大幅提升。
<div align=center>
<img src='images/6-1_Cpp_Asynchronous_Framwork_CN_1.png' height = "600" align="middle"/>
</div
由于动态批量导致响应时长增长,经过测试,大多数场景下吞吐增量高于响应时长增长,尤其在高并发场景(client=70时),在响应时长增长 33% 情况下,吞吐增加 105%。
|Client |1 |5 |10 | 20 |30 |40 |50 |70 |
|---|---|---|---|---|---|---|---|---|
|QPS |-2.08% |-7.23% |-1.89% |20.55% |23.02% |23.34% |46.41% |105.27% |
|响应时长 | 2.70% |7.09% |5.24% |13.34% |10.80% |43.60% |8.72% |33.89% |
异步模式可有效提升服务吞吐性能。
<a name="3.2"></a>
**二.测试数据**
1. 同步模式 1. 同步模式
...@@ -147,20 +166,5 @@ python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --por ...@@ -147,20 +166,5 @@ python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --por
|50 |1 |1.50 |50.60 |7578 |89.04 |121.545 |5000 |411.364 |331.118 |605.809 |874.543 |1285.650 |48.2343 |41.1369 |9350.0000 |2568777.6400 |295.8593| |50 |1 |1.50 |50.60 |7578 |89.04 |121.545 |5000 |411.364 |331.118 |605.809 |874.543 |1285.650 |48.2343 |41.1369 |9350.0000 |2568777.6400 |295.8593|
|70 |1 |3.80 |83.20 |7602 |89.59 |133.568 |7000 |524.073 |382.653 |799.463 |1202.179 |1576.809 |57.2885 |52.4077 |10761.0000 |3013600.9670 |315.2540| |70 |1 |3.80 |83.20 |7602 |89.59 |133.568 |7000 |524.073 |382.653 |799.463 |1202.179 |1576.809 |57.2885 |52.4077 |10761.0000 |3013600.9670 |315.2540|
<a name="3.2"></a>
**二.测试结论**
使用异步模式,并开启动态批量后,并发测试不同 shape 数据时,吞吐性能大幅提升。
<div align=center>
<img src='images/6-1_Cpp_Asynchronous_Framwork_CN_1.png' height = "600" align="middle"/>
</div
由于动态批量导致响应时长增长,经过测试,大多数场景下吞吐增量高于响应时长增长,尤其在高并发场景(client=70时),在响应时长增长 33% 情况下,吞吐增加 105%。
|Client |1 |5 |10 | 20 |30 |40 |50 |70 |
|---|---|---|---|---|---|---|---|---|
|QPS |-2.08% |-7.23% |-1.89% |20.55% |23.02% |23.34% |46.41% |105.27% |
|响应时长 | 2.70% |7.09% |5.24% |13.34% |10.80% |43.60% |8.72% |33.89% |
异步模式可有效提升吞吐性能。
...@@ -33,7 +33,7 @@ C++ Serving 请求和应答的数据格式为 protobuf,重要的结构有以 ...@@ -33,7 +33,7 @@ C++ Serving 请求和应答的数据格式为 protobuf,重要的结构有以
## Tensor ## Tensor
Tensor 可以装载多种类型的数据,是 Request 和 Response 的基础单元。Tensor 的定义如下: [Tensor](https://github.com/PaddlePaddle/Serving/blob/develop/core/general-server/proto/general_model_service.proto#L22) 可以装载多种类型的数据,是 Request 和 Response 的基础单元。Tensor 的定义如下:
```protobuf ```protobuf
message Tensor { message Tensor {
...@@ -104,7 +104,7 @@ Tensor 结构中重要成员 `elem_type`、`shape`、`lod` 和 `name/alias_name` ...@@ -104,7 +104,7 @@ Tensor 结构中重要成员 `elem_type`、`shape`、`lod` 和 `name/alias_name`
- name/alias_name: 名称及别名,与模型配置对应 - name/alias_name: 名称及别名,与模型配置对应
- elem_type:数据类型,当前支持FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 - elem_type:数据类型,当前支持FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
- shape:数据维度 - shape:数据维度
- lod:长结构 LoD(Level-of-Detail) Tensor 是 Paddle 的高级特性,是对 Tensor 的一种扩充,用于支持更自由的数据输入。详见[LOD](../LOD_CN.md) - lod:长结构 LoD(Level-of-Detail) Tensor 是 Paddle 的高级特性,是对 Tensor 的一种扩充,用于支持更自由的数据输入。详见[LOD](../LOD_CN.md)
|elem_type|类型| |elem_type|类型|
|---------|----| |---------|----|
...@@ -146,7 +146,7 @@ tensor->set_alias_name(alias_name); ...@@ -146,7 +146,7 @@ tensor->set_alias_name(alias_name);
// 拷贝数据 // 拷贝数据
int total_number = float_data.size(); int total_number = float_data.size();
tensor->mutable_float_data()->Resize(total_number, 0); tensor->mutable_float_data()->Resize(total_number, 0);
memcpy(tensor->mutable_float_data()->mutable_data(), float_datadata(), total_number * sizeof(float)); memcpy(tensor->mutable_float_data()->mutable_data(), float_data.data(), total_number * sizeof(float));
``` ```
<a name="1.2"></a> <a name="1.2"></a>
...@@ -174,8 +174,7 @@ tensor->set_tensor_content(string_data); ...@@ -174,8 +174,7 @@ tensor->set_tensor_content(string_data);
## Request ## Request
Request 为客户端需要发送的请求数据,其以 Tensor 为基础数据单元,并包含了额外的请求信息。定义如下:
Request为客户端需要发送的请求数据,其以Tensor为基础数据单元,并包含了额外的请求信息。定义如下:
```protobuf ```protobuf
message Request { message Request {
...@@ -186,7 +185,7 @@ message Request { ...@@ -186,7 +185,7 @@ message Request {
}; };
``` ```
- fetch_vat_names: 需要获取的输出数据名称,在GeneralResponseOP会根据该列表进行过滤.请参考模型文件serving_client_conf.prototxt中的`fetch_var`字段下的`alias_name` - fetch_vat_names: 需要获取的输出数据名称,在 `GeneralResponseOP` 会根据该列表进行过滤.请参考模型文件 `serving_client_conf.prototxt` 中的 `fetch_var` 字段下的 `alias_name`
- profile_server: 调试参数,打开时会输出性能信息 - profile_server: 调试参数,打开时会输出性能信息
- log_id: 请求ID - log_id: 请求ID
...@@ -211,12 +210,14 @@ Tensor *tensor = req.add_tensor(); ...@@ -211,12 +210,14 @@ Tensor *tensor = req.add_tensor();
**二.构建 Json Request** **二.构建 Json Request**
当使用 RESTful 请求时,可以使用 Json 格式数据,具体格式如下: 当使用 RESTful 请求时,可以使用 Json 格式数据,示例如下:
```JSON ```JSON
{"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0} {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}
``` ```
可参考示例,不用修改整体结构,仅需修改数据类型和数据。
<a name="3"></a> <a name="3"></a>
## Response ## Response
...@@ -242,8 +243,8 @@ message ModelOutput { ...@@ -242,8 +243,8 @@ message ModelOutput {
Response 结构中核心成员: Response 结构中核心成员:
- profile_time:当设置 `request->set_profile_server(true)` 时,会返回性能信息 - profile_time:当设置 `request->set_profile_server(true)` 时,会返回性能信息
- err_no:错误码,详见 `core/predictor/common/constant.h` - err_no:错误码
- err_msg:错误信息,详见 `core/predictor/common/constant.h` - err_msg:错误信息
- engine_name:输出节点名称 - engine_name:输出节点名称
|err_no|err_msg| |err_no|err_msg|
......
...@@ -38,7 +38,7 @@ Paddle Serving 的 ABTest 功能是基于 PYTHON SDK 和 多个服务端构成 ...@@ -38,7 +38,7 @@ Paddle Serving 的 ABTest 功能是基于 PYTHON SDK 和 多个服务端构成
**一.安装 Paddle Serving Wheels** **一.安装 Paddle Serving Wheels**
使用 ABTest 功能的前提是使用 PYTHON SDK,因此需要安装 `paddle_serving_client` 的 wheel 包。安装方法如下: 使用 ABTest 功能的前提是使用 PYTHON SDK,因此需要安装 `paddle_serving_client` 的 wheel 包。[安装方法](./2-1_Docker_Images_CN.md) 如下:
``` ```
pip3 install paddle-serving-client==0.8.3 -i https://pypi.tuna.tsinghua.edu.cn/simple pip3 install paddle-serving-client==0.8.3 -i https://pypi.tuna.tsinghua.edu.cn/simple
...@@ -48,7 +48,7 @@ pip3 install paddle-serving-client==0.8.3 -i https://pypi.tuna.tsinghua.edu.cn/s ...@@ -48,7 +48,7 @@ pip3 install paddle-serving-client==0.8.3 -i https://pypi.tuna.tsinghua.edu.cn/s
**二.下载多个模型并保存模型参数** **二.下载多个模型并保存模型参数**
本示例已提供了一键下载脚本 `sh get_data.sh`,下载 `bow``cnn``lstm` 3种不同方式训练的模型。 本示例已提供了一键下载脚本 `sh get_data.sh`,下载自训练的模型 `bow``cnn``lstm` 3种不同方式训练的模型。
``` ```
sh get_data.sh sh get_data.sh
......
# Python Pipeline 性能测试 # Python Pipeline 性能测试
- [测试环境](#1) - [测试环境](#1)
- [性能指标](#2) - [性能指标与结论](#2)
<a name="1"></a> <a name="1"></a>
...@@ -18,7 +18,12 @@ ...@@ -18,7 +18,12 @@
<a name="2"></a> <a name="2"></a>
## 性能指标 ## 性能指标与结论
通过测试,使用 Python Pipeline 模式通过多进程并发,充分利用 GPU 显卡,具有较好的吞吐性能。
测试数据如下:
|model_name |thread_num |batch_size |CPU_util(%) |GPU_memory(mb) |GPU_util(%) |qps(samples/s) |total count |mean(ms) |median(ms) |80 percent(ms) |90 percent(ms) |99 percent(ms) |total cost(s) |each cost(s)| |model_name |thread_num |batch_size |CPU_util(%) |GPU_memory(mb) |GPU_util(%) |qps(samples/s) |total count |mean(ms) |median(ms) |80 percent(ms) |90 percent(ms) |99 percent(ms) |total cost(s) |each cost(s)|
|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:-- |:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--
......
...@@ -121,7 +121,7 @@ class Server(object): ...@@ -121,7 +121,7 @@ class Server(object):
self.trt_dynamic_shape_info = [] self.trt_dynamic_shape_info = []
self.gpu_memory_mb = 50 self.gpu_memory_mb = 50
self.cpu_math_thread_num = 1 self.cpu_math_thread_num = 1
self.trt_workspace_size = 33554432 # 1 << 25 self.trt_workspace_size = 33554432 # 1 << 25
self.trt_use_static = False self.trt_use_static = False
def get_fetch_list(self, infer_node_idx=-1): def get_fetch_list(self, infer_node_idx=-1):
...@@ -358,10 +358,10 @@ class Server(object): ...@@ -358,10 +358,10 @@ class Server(object):
engine.use_xpu = self.use_xpu engine.use_xpu = self.use_xpu
engine.use_ascend_cl = self.use_ascend_cl engine.use_ascend_cl = self.use_ascend_cl
engine.use_gpu = False engine.use_gpu = False
engine.gpu_memory_mb = self.gpu_memory_mb #engine.gpu_memory_mb = self.gpu_memory_mb
engine.cpu_math_thread_num = self.cpu_math_thread_num #engine.cpu_math_thread_num = self.cpu_math_thread_num
engine.trt_workspace_size = self.trt_workspace_size #engine.trt_workspace_size = self.trt_workspace_size
engine.trt_use_static = self.trt_use_static #engine.trt_use_static = self.trt_use_static
# use distributed model. # use distributed model.
if self.dist_subgraph_index >= 0: if self.dist_subgraph_index >= 0:
......
...@@ -42,6 +42,13 @@ message Request { ...@@ -42,6 +42,13 @@ message Request {
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
bool profile_server = 3;
uint64 log_id = 4;
// Error code
int32 err_no = 5;
// Error messages
string err_msg = 6;
}; };
message ModelOutput { message ModelOutput {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册