提交 81593989 编写于 作者: H HexToString

fix_encryption and doc

...@@ -31,8 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}") ...@@ -31,8 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of: # Paddle Version should be one of:
# latest: latest develop build # latest: latest develop build
# version number like 1.5.2 # version number like 1.5.2
#SET(PADDLE_VERSION "2.0.0-rc1") SET(PADDLE_VERSION "2.0.0")
SET(PADDLE_VERSION "latest")
if (WITH_GPU) if (WITH_GPU)
if (WITH_TRT) if (WITH_TRT)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7-avx-mkl-trt6") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7-avx-mkl-trt6")
...@@ -136,8 +135,8 @@ if (WITH_TRT) ...@@ -136,8 +135,8 @@ if (WITH_TRT)
endif() endif()
if (WITH_LITE) if (WITH_LITE)
ADD_LIBRARY(paddle_api_full_bundled STATIC IMPORTED GLOBAL) ADD_LIBRARY(paddle_full_api_shared STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_api_full_bundled PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_api_full_bundled.a) SET_PROPERTY(TARGET paddle_full_api_shared PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_full_api_shared.so)
if (WITH_XPU) if (WITH_XPU)
ADD_LIBRARY(xpuapi SHARED IMPORTED GLOBAL) ADD_LIBRARY(xpuapi SHARED IMPORTED GLOBAL)
...@@ -160,7 +159,7 @@ LIST(APPEND paddle_depend_libs ...@@ -160,7 +159,7 @@ LIST(APPEND paddle_depend_libs
xxhash cryptopp) xxhash cryptopp)
if(WITH_LITE) if(WITH_LITE)
LIST(APPEND paddle_depend_libs paddle_api_full_bundled) LIST(APPEND paddle_depend_libs paddle_full_api_shared)
if(WITH_XPU) if(WITH_XPU)
LIST(APPEND paddle_depend_libs xpuapi xpurt) LIST(APPEND paddle_depend_libs xpuapi xpurt)
endif() endif()
......
...@@ -7,6 +7,7 @@ PROTOBUF_GENERATE_CPP(pdcodegen_proto_srcs pdcodegen_proto_hdrs ...@@ -7,6 +7,7 @@ PROTOBUF_GENERATE_CPP(pdcodegen_proto_srcs pdcodegen_proto_hdrs
LIST(APPEND pdcodegen_srcs ${pdcodegen_proto_srcs}) LIST(APPEND pdcodegen_srcs ${pdcodegen_proto_srcs})
add_executable(pdcodegen ${pdcodegen_srcs}) add_executable(pdcodegen ${pdcodegen_srcs})
add_dependencies(pdcodegen boost)
target_link_libraries(pdcodegen protobuf ${PROTOBUF_PROTOC_LIBRARY}) target_link_libraries(pdcodegen protobuf ${PROTOBUF_PROTOC_LIBRARY})
# install # install
......
...@@ -34,6 +34,42 @@ ...@@ -34,6 +34,42 @@
**A:** http rpc **A:** http rpc
## 安装问题
#### Q: pip install安装whl包过程,报错信息如下:
```
Collecting opencv-python
Using cached opencv-python-4.3.0.38.tar.gz (88.0 MB)
Installing build dependencies ... done
Getting requirements to build wheel ... error
ERROR: Command errored out with exit status 1:
command: /home/work/Python-2.7.17/build/bin/python /home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpLiweA9
cwd: /tmp/pip-install-_w6AUI/opencv-python
Complete output (22 lines):
Traceback (most recent call last):
File "/home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py", line 280, in <module>
main()
File "/home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py", line 263, in main
json_out['return_val'] = hook(**hook_input['kwargs'])
File "/home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py", line 114, in get_requires_for_build_wheel
return hook(config_settings)
File "/tmp/pip-build-env-AUCbP4/overlay/lib/python2.7/site-packages/setuptools/build_meta.py", line 146, in get_requires_for_build_wheel
return self._get_build_requires(config_settings, requirements=['wheel'])
File "/tmp/pip-build-env-AUCbP4/overlay/lib/python2.7/site-packages/setuptools/build_meta.py", line 127, in _get_build_requires
self.run_setup()
File "/tmp/pip-build-env-AUCbP4/overlay/lib/python2.7/site-packages/setuptools/build_meta.py", line 243, in run_setup
self).run_setup(setup_script=setup_script)
File "/tmp/pip-build-env-AUCbP4/overlay/lib/python2.7/site-packages/setuptools/build_meta.py", line 142, in run_setup
exec(compile(code, __file__, 'exec'), locals())
File "setup.py", line 448, in <module>
main()
File "setup.py", line 99, in main
% {"ext": re.escape(sysconfig.get_config_var("EXT_SUFFIX"))}
File "/home/work/Python-2.7.17/build/lib/python2.7/re.py", line 210, in escape
s = list(pattern)
TypeError: 'NoneType' object is not iterable
```
**A:** 指定opencv-python版本安装,pip install opencv-python==4.2.0.32,再安装whl包
## 编译问题 ## 编译问题
......
...@@ -2,35 +2,15 @@ ...@@ -2,35 +2,15 @@
([简体中文](./INFERENCE_TO_SERVING_CN.md)|English) ([简体中文](./INFERENCE_TO_SERVING_CN.md)|English)
We should know something before converting to serving model you can use a build-in python module called `paddle_serving_client.convert` to convert it.
```python
**inference_model_dir**:the directory of Paddle inference model python -m paddle_serving_client.convert --dirname ./your_inference_model_dir
**serving_client_dir**: the directory of server side configuration
**serving_client_dir**: the directory of client side configuration
**model_filename**: this is model description file whose default value is `__model__`, if it's not default name, set `model_filename` explicitly
**params_filename**: during `save_inference_model` every Variable will be save as a single file. If we have the inference model whose params are compressed into one file, please set `params_filename` explicitly
## Example
``` python
from paddle_serving_client.io import inference_model_to_serving
inference_model_dir = "your_inference_model"
serving_client_dir = "serving_client_dir"
serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_server_dir, serving_client_dir)
```
if your model file and params file are both standalone, please use the following api.
```
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_server_dir, serving_client_dir,
model_filename="model", params_filename="params")
``` ```
Arguments are the same as `inference_model_to_serving` API.
| Argument | Type | Default | Description |
|--------------|------|-----------|--------------------------------|
| `dirname` | str | - | Path of saved model files. Program file and parameter files are saved in this directory. |
| `serving_server` | str | `"serving_server"` | The path of model files and configuration files for server. |
| `serving_client` | str | `"serving_client"` | The path of configuration files for client. |
| `model_filename` | str | None | The name of file to load the inference program. If it is None, the default filename `__model__` will be used. |
| `params_filename` | str | None | The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. |
...@@ -2,32 +2,15 @@ ...@@ -2,32 +2,15 @@
([English](./INFERENCE_TO_SERVING.md)|简体中文) ([English](./INFERENCE_TO_SERVING.md)|简体中文)
## 示例 你可以使用Paddle Serving提供的名为`paddle_serving_client.convert`的内置模块进行转换。
```python
在下列代码中,我们需要知道以下信息。 python -m paddle_serving_client.convert --dirname ./your_inference_model_dir
**模型文件夹**:这个文件夹就是Paddle的inference_model所在的文件夹
**serving_client_dir**: 这个文件夹是inference_model转换成Serving模型后,服务端配置的保存路径
**serving_client_dir**: 这个文件夹是inference_model转换成Serving模型后,客户端配置的保存路径
**模型描述文件**: 模型描述文件也就是`model_filename`默认值为`__model__`,是一个pb2文本文件,如果是别的文件名需要显式指定
**模型参数文件**: 在`save_inference_model`阶段,默认方式是每一个Variable保存一个二进制文件,如果是这种情况就不需要做指定。如果所有参数用压缩成一个文件的形式保存,则需要显式指定`params_filename`
``` python
from paddle_serving_client.io import inference_model_to_serving
inference_model_dir = "your_inference_model"
serving_client_dir = "serving_client_dir"
serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_server_dir, serving_client_dir)
```
如果模型中有模型描述文件`model_filename` 和 模型参数文件`params_filename`,那么请用
```
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_server_dir, serving_client_dir,
model_filename="model", params_filename="params")
``` ```
模块参数与`inference_model_to_serving`接口参数相同。
| 参数 | 类型 | 默认值 | 描述 |
|--------------|------|-----------|--------------------------------|
| `dirname` | str | - | 需要转换的模型文件存储路径,Program结构文件和参数文件均保存在此目录。|
| `serving_server` | str | `"serving_server"` | 转换后的模型文件和配置文件的存储路径。默认值为serving_server |
| `serving_client` | str | `"serving_client"` | 转换后的客户端配置文件存储路径。默认值为serving_client |
| `model_filename` | str | None | 存储需要转换的模型Inference Program结构的文件名称。如果设置为None,则使用 `__model__` 作为默认的文件名 |
| `params_filename` | str | None | 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的>二进制文件中,它才需要被指定。如果模型参数是存储在各自分离的文件中,设置它的值为None |
...@@ -19,7 +19,9 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-p ...@@ -19,7 +19,9 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-p
#cuda 10.0 #cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
#cuda10.1 with TensorRT 6 #cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
#cuda10.2 with TensorRT 7
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post102-py3-none-any.whl
``` ```
### Python 2 ### Python 2
``` ```
...@@ -27,8 +29,11 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3 ...@@ -27,8 +29,11 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
#cuda 10.0 #cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
##cuda10.1 with TensorRT 6 #cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post101-py2-none-any.whl
#cuda10.2 with TensorRT 7
>>>>>>> fit_a_line
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post102-py2-none-any.whl
``` ```
## Client ## Client
......
...@@ -112,10 +112,8 @@ The second is to deploy GPU Serving and Java Client separately. If they are on t ...@@ -112,10 +112,8 @@ The second is to deploy GPU Serving and Java Client separately. If they are on t
**It should be noted that in the example, all models(not pipeline) need to use `--use_multilang` to start GRPC multi-programming language support, and the port number is 9393. If you need another port, you need to modify it in the java file** **It should be noted that in the example, all models(not pipeline) need to use `--use_multilang` to start GRPC multi-programming language support, and the port number is 9393. If you need another port, you need to modify it in the java file**
**Currently Serving has launched the Pipeline mode (see [Pipeline Serving](../doc/PIPELINE_SERVING.md) for details). Pipeline Serving Client for Java is released, the next version multi-thread java client example will be released** **Currently Serving has launched the Pipeline mode (see [Pipeline Serving](../doc/PIPELINE_SERVING.md) for details). Pipeline Serving Client for Java is released.**
**It should be noted that in the example, Java Pipeline Client code is in path /Java/Examples and /Java/src/main, and the Pipeline server code is in path /python/examples/pipeline/ **It should be noted that in the example, Java Pipeline Client code is in path /Java/Examples and /Java/src/main, and the Pipeline server code is in path /python/examples/pipeline/ The Client IP and Port(which is configured in java/examples/src/main/java/PipelineClientExample.java) should be corresponding to the Pipeline Server IP and Port(which is configured in config.yaml) **
The Client IP and Port(which is configured in java/examples/src/main/java/PipelineClientExample.java) should be corresponding to the Pipeline Server IP and Port(which is configured in config.yaml)
**
...@@ -111,11 +111,9 @@ java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar Pipeli ...@@ -111,11 +111,9 @@ java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar Pipeli
第二种是GPU Serving和Java Client分开部署,如果在同一台宿主机,可以通过ifconfig了解对应容器的IP地址,然后在`examples/src/main/java/PaddleServingClientExample.java`当中对client.connect时的endpoint做修改,然后再编译一次。 或者在docker启动时选择 `--net=host`来绑定docker和宿主机的网络设备,这样不需要定制java代码可以直接运行。 第二种是GPU Serving和Java Client分开部署,如果在同一台宿主机,可以通过ifconfig了解对应容器的IP地址,然后在`examples/src/main/java/PaddleServingClientExample.java`当中对client.connect时的endpoint做修改,然后再编译一次。 或者在docker启动时选择 `--net=host`来绑定docker和宿主机的网络设备,这样不需要定制java代码可以直接运行。
**需要注意的是,在示例中,所有非pipeline模型都需要使用`--use_multilang`来启动GRPC多编程语言支持,以及端口号都是9393,如果需要别的端口,需要在java文件里修改** **需要注意的是,在示例中,所有非Pipeline模型都需要使用`--use_multilang`来启动GRPC多编程语言支持,以及端口号都是9393,如果需要别的端口,需要在java文件里修改**
**目前Serving已推出Pipeline模式(详见[Pipeline Serving](../doc/PIPELINE_SERVING_CN.md)),面向Java的Pipeline Serving Client已发布。** **目前Serving已推出Pipeline模式(详见[Pipeline Serving](../doc/PIPELINE_SERVING_CN.md)),面向Java的Pipeline Serving Client已发布。**
**需要注意的是,Java Pipeline Client相关示例在/Java/Examples和/Java/src/main中,对应的Pipeline server在/python/examples/pipeline/中。 **需要注意的是,Java Pipeline Client相关示例在/Java/Examples和/Java/src/main中,对应的Pipeline server在/python/examples/pipeline/中
注意java/examples/src/main/java/PipelineClientExample.java中的ip和port,需要与/python/examples/pipeline/中对应Pipeline server的config.yaml文件中配置的ip和port相对应** 注意java/examples/src/main/java/PipelineClientExample.java中的ip和port,需要与/python/examples/pipeline/中对应Pipeline server的config.yaml文件中配置的ip和port相对应。**
...@@ -128,20 +128,22 @@ class FluidArmAnalysisCore : public FluidFamilyCore { ...@@ -128,20 +128,22 @@ class FluidArmAnalysisCore : public FluidFamilyCore {
config.DisableGpu(); config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1); config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) { if (params.use_lite()) {
config.EnableMemoryOptim(); config.EnableLiteEngine(PrecisionType::kFloat32, true);
} }
if (params.enable_memory_optimization()) { if (params.use_xpu()) {
config.EnableMemoryOptim(); config.EnableXpu(2 * 1024 * 1024);
} }
if (params.use_lite()) { if (params.enable_memory_optimization()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true); config.EnableMemoryOptim();
} }
if (params.use_xpu()) { if (params.enable_ir_optimization()) {
config.EnableXpu(100); config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
} }
config.SwitchSpecifyInputNames(true); config.SwitchSpecifyInputNames(true);
...@@ -173,6 +175,14 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore { ...@@ -173,6 +175,14 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore {
config.SwitchSpecifyInputNames(true); config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1); config.SetCpuMathLibraryNumThreads(1);
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
config.EnableXpu(2 * 1024 * 1024);
}
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
} }
...@@ -183,14 +193,6 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore { ...@@ -183,14 +193,6 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore {
config.SwitchIrOptim(false); config.SwitchIrOptim(false);
} }
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
config.EnableXpu(100);
}
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config); _core = CreatePredictor(config);
if (NULL == _core.get()) { if (NULL == _core.get()) {
......
...@@ -81,25 +81,45 @@ if (SERVER) ...@@ -81,25 +81,45 @@ if (SERVER)
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_TRT) elseif(WITH_TRT)
if(CUDA_VERSION EQUAL 10.1)
set(SUFFIX 101)
elseif(CUDA_VERSION EQUAL 10.2)
set(SUFFIX 102)
elseif(CUDA_VERSION EQUAL 11.0)
set(SUFFIX 110)
endif()
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" trt "server_gpu" ${SUFFIX}
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_LITE) elseif(WITH_LITE)
add_custom_command( if(WITH_XPU)
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp add_custom_command(
COMMAND cp -r OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ COMMAND cp -r
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
"server_gpu" arm COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel "server_gpu" arm-xpu
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif()
else() else()
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
......
...@@ -3,9 +3,10 @@ ...@@ -3,9 +3,10 @@
([简体中文](./README_CN.md)|English) ([简体中文](./README_CN.md)|English)
In the example, a BERT model is used for semantic understanding prediction, and the text is represented as a vector, which can be used for further analysis and prediction. In the example, a BERT model is used for semantic understanding prediction, and the text is represented as a vector, which can be used for further analysis and prediction.
If your python version is 3.X, replace the 'pip' field in the following command with 'pip3',replace 'python' with 'python3'.
### Getting Model ### Getting Model
method 1:
This example use model [BERT Chinese Model](https://www.paddlepaddle.org.cn/hubdetail?name=bert_chinese_L-12_H-768_A-12&en_category=SemanticModel) from [Paddlehub](https://github.com/PaddlePaddle/PaddleHub). This example use model [BERT Chinese Model](https://www.paddlepaddle.org.cn/hubdetail?name=bert_chinese_L-12_H-768_A-12&en_category=SemanticModel) from [Paddlehub](https://github.com/PaddlePaddle/PaddleHub).
Install paddlehub first Install paddlehub first
...@@ -22,11 +23,13 @@ the 128 in the command above means max_seq_len in BERT model, which is the lengt ...@@ -22,11 +23,13 @@ the 128 in the command above means max_seq_len in BERT model, which is the lengt
the config file and model file for server side are saved in the folder bert_seq128_model. the config file and model file for server side are saved in the folder bert_seq128_model.
the config file generated for client side is saved in the folder bert_seq128_client. the config file generated for client side is saved in the folder bert_seq128_client.
method 2:
You can also download the above model from BOS(max_seq_len=128). After decompression, the config file and model file for server side are stored in the bert_chinese_L-12_H-768_A-12_model folder, and the config file generated for client side is stored in the bert_chinese_L-12_H-768_A-12_client folder: You can also download the above model from BOS(max_seq_len=128). After decompression, the config file and model file for server side are stored in the bert_chinese_L-12_H-768_A-12_model folder, and the config file generated for client side is stored in the bert_chinese_L-12_H-768_A-12_client folder:
```shell ```shell
wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz
``` ```
if your model is bert_chinese_L-12_H-768_A-12_model, replace the 'bert_seq128_model' field in the following command with 'bert_chinese_L-12_H-768_A-12_model',replace 'bert_seq128_client' with 'bert_chinese_L-12_H-768_A-12_client'.
### Getting Dict and Sample Dataset ### Getting Dict and Sample Dataset
...@@ -36,11 +39,11 @@ sh get_data.sh ...@@ -36,11 +39,11 @@ sh get_data.sh
this script will download Chinese Dictionary File vocab.txt and Chinese Sample Data data-c.txt this script will download Chinese Dictionary File vocab.txt and Chinese Sample Data data-c.txt
### RPC Inference Service ### RPC Inference Service
Run start cpu inference service,Run
``` ```
python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #cpu inference service python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #cpu inference service
``` ```
Or Or,start gpu inference service,Run
``` ```
python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0 python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
``` ```
...@@ -59,12 +62,18 @@ head data-c.txt | python bert_client.py --model bert_seq128_client/serving_clien ...@@ -59,12 +62,18 @@ head data-c.txt | python bert_client.py --model bert_seq128_client/serving_clien
the client reads data from data-c.txt and send prediction request, the prediction is given by word vector. (Due to massive data in the word vector, we do not print it). the client reads data from data-c.txt and send prediction request, the prediction is given by word vector. (Due to massive data in the word vector, we do not print it).
### HTTP Inference Service ### HTTP Inference Service
start cpu HTTP inference service,Run
```
python bert_web_service.py bert_seq128_model/ 9292 #launch gpu inference service
```
Or,start gpu HTTP inference service,Run
``` ```
export CUDA_VISIBLE_DEVICES=0,1 export CUDA_VISIBLE_DEVICES=0,1
``` ```
set environmental variable to specify which gpus are used, the command above means gpu 0 and gpu 1 is used. set environmental variable to specify which gpus are used, the command above means gpu 0 and gpu 1 is used.
``` ```
python bert_web_service.py bert_seq128_model/ 9292 #launch gpu inference service python bert_web_service_gpu.py bert_seq128_model/ 9292 #launch gpu inference service
``` ```
### HTTP Inference ### HTTP Inference
......
...@@ -4,8 +4,9 @@ ...@@ -4,8 +4,9 @@
示例中采用BERT模型进行语义理解预测,将文本表示为向量的形式,可以用来做进一步的分析和预测。 示例中采用BERT模型进行语义理解预测,将文本表示为向量的形式,可以用来做进一步的分析和预测。
若使用python的版本为3.X, 将以下命令中的pip 替换为pip3, python替换为python3.
### 获取模型 ### 获取模型
方法1:
示例中采用[Paddlehub](https://github.com/PaddlePaddle/PaddleHub)中的[BERT中文模型](https://www.paddlepaddle.org.cn/hubdetail?name=bert_chinese_L-12_H-768_A-12&en_category=SemanticModel) 示例中采用[Paddlehub](https://github.com/PaddlePaddle/PaddleHub)中的[BERT中文模型](https://www.paddlepaddle.org.cn/hubdetail?name=bert_chinese_L-12_H-768_A-12&en_category=SemanticModel)
请先安装paddlehub 请先安装paddlehub
``` ```
...@@ -19,11 +20,15 @@ python prepare_model.py 128 ...@@ -19,11 +20,15 @@ python prepare_model.py 128
生成server端配置文件与模型文件,存放在bert_seq128_model文件夹。 生成server端配置文件与模型文件,存放在bert_seq128_model文件夹。
生成client端配置文件,存放在bert_seq128_client文件夹。 生成client端配置文件,存放在bert_seq128_client文件夹。
方法2:
您也可以从bos上直接下载上述模型(max_seq_len=128),解压后server端配置文件与模型文件存放在bert_chinese_L-12_H-768_A-12_model文件夹,client端配置文件存放在bert_chinese_L-12_H-768_A-12_client文件夹: 您也可以从bos上直接下载上述模型(max_seq_len=128),解压后server端配置文件与模型文件存放在bert_chinese_L-12_H-768_A-12_model文件夹,client端配置文件存放在bert_chinese_L-12_H-768_A-12_client文件夹:
```shell ```shell
wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz
``` ```
若使用bert_chinese_L-12_H-768_A-12_model模型,将下面命令中的bert_seq128_model字段替换为bert_chinese_L-12_H-768_A-12_model,bert_seq128_client字段替换为bert_chinese_L-12_H-768_A-12_client.
### 获取词典和样例数据 ### 获取词典和样例数据
...@@ -33,13 +38,15 @@ sh get_data.sh ...@@ -33,13 +38,15 @@ sh get_data.sh
脚本将下载中文词典vocab.txt和中文样例数据data-c.txt 脚本将下载中文词典vocab.txt和中文样例数据data-c.txt
### 启动RPC预测服务 ### 启动RPC预测服务
执行 启动cpu预测服务,执行
``` ```
python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #启动cpu预测服务 python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #启动cpu预测服务
``` ```
或者 或者,启动gpu预测服务,执行
``` ```
python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务 python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
``` ```
### 执行预测 ### 执行预测
...@@ -51,17 +58,28 @@ pip install paddle_serving_app ...@@ -51,17 +58,28 @@ pip install paddle_serving_app
执行 执行
``` ```
head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
``` ```
启动client读取data-c.txt中的数据进行预测,预测结果为文本的向量表示(由于数据较多,脚本中没有将输出进行打印),server端的地址在脚本中修改。 启动client读取data-c.txt中的数据进行预测,预测结果为文本的向量表示(由于数据较多,脚本中没有将输出进行打印),server端的地址在脚本中修改。
### 启动HTTP预测服务 ### 启动HTTP预测服务
启动cpu HTTP预测服务,执行
```
python bert_web_service.py bert_seq128_model/ 9292 #启动gpu预测服务
```
或者,启动gpu HTTP预测服务,执行
``` ```
export CUDA_VISIBLE_DEVICES=0,1 export CUDA_VISIBLE_DEVICES=0,1
``` ```
通过环境变量指定gpu预测服务使用的gpu,示例中指定索引为0和1的两块gpu 通过环境变量指定gpu预测服务使用的gpu,示例中指定索引为0和1的两块gpu
``` ```
python bert_web_service.py bert_seq128_model/ 9292 #启动gpu预测服务 python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务
``` ```
### 执行预测 ### 执行预测
``` ```
......
# coding=utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader
import sys
import os
import numpy as np
class BertService(WebService):
def load(self):
self.reader = ChineseBertReader({
"vocab_file": "vocab.txt",
"max_seq_len": 128
})
def preprocess(self, feed=[], fetch=[]):
feed_res = []
is_batch = False
for ins in feed:
feed_dict = self.reader.process(ins["words"].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(len(feed_dict[key]), 1))
feed_res.append(feed_dict)
return feed_res, fetch, is_batch
bert_service = BertService(name="bert")
bert_service.load()
bert_service.load_model_config(sys.argv[1])
bert_service.prepare_server(
workdir="workdir", port=int(sys.argv[2]), device="gpu")
bert_service.run_rpc_service()
bert_service.run_web_service()
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing_example/encrypt.tar.gz wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing_example/encrypt.tar.gz
tar -xzf encrypt.tar.gz tar -xzf encrypt.tar.gz
cp -rvf ../fit_a_line/uci_housing_model . wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
cp -rvf ../fit_a_line/uci_housing_client . tar -xzf uci_housing.tar.gz
...@@ -34,7 +34,7 @@ python test_client.py uci_housing_client/serving_client_conf.prototxt ...@@ -34,7 +34,7 @@ python test_client.py uci_housing_client/serving_client_conf.prototxt
Start a web service with default web service hosting modules: Start a web service with default web service hosting modules:
``` shell ``` shell
python test_server.py python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
``` ```
### Client prediction ### Client prediction
......
...@@ -35,7 +35,7 @@ python test_client.py uci_housing_client/serving_client_conf.prototxt ...@@ -35,7 +35,7 @@ python test_client.py uci_housing_client/serving_client_conf.prototxt
通过下面的一行代码开启默认web服务: 通过下面的一行代码开启默认web服务:
``` shell ``` shell
python test_server.py python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -20,6 +20,9 @@ op: ...@@ -20,6 +20,9 @@ op:
#uci模型路径 #uci模型路径
model_config: ResNet50_vd_model model_config: ResNet50_vd_model
#计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
device_type: 1
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0" # "0,1" devices: "0" # "0,1"
......
...@@ -20,7 +20,10 @@ op: ...@@ -20,7 +20,10 @@ op:
#uci模型路径 #uci模型路径
model_config: uci_housing_model model_config: uci_housing_model
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
device_type: 0
#计算硬件ID,优先由device_type决定硬件类型。devices为""或空缺时为CPU预测;当为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "" # "0,1" devices: "" # "0,1"
#client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
......
...@@ -20,6 +20,7 @@ import google.protobuf.text_format ...@@ -20,6 +20,7 @@ import google.protobuf.text_format
import numpy as np import numpy as np
import argparse import argparse
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.inference as inference
from .proto import general_model_config_pb2 as m_config from .proto import general_model_config_pb2 as m_config
from paddle.fluid.core import PaddleTensor from paddle.fluid.core import PaddleTensor
from paddle.fluid.core import AnalysisConfig from paddle.fluid.core import AnalysisConfig
...@@ -125,14 +126,14 @@ class LocalPredictor(object): ...@@ -125,14 +126,14 @@ class LocalPredictor(object):
if use_lite: if use_lite:
config.enable_lite_engine( config.enable_lite_engine(
precision_mode = PrecisionType.Float32, precision_mode=inference.PrecisionType.Float32,
zero_copy = True, zero_copy=True,
passes_filter = [], passes_filter=[],
ops_filter = [] ops_filter=[])
)
if use_xpu: if use_xpu:
config.enable_xpu(100 * 1024 * 1024) # 2MB l3 cache
config.enable_xpu(8 * 1024 * 1024)
self.predictor = create_paddle_predictor(config) self.predictor = create_paddle_predictor(config)
......
...@@ -20,7 +20,7 @@ from paddle_serving_server import OpMaker, OpSeqMaker, Server ...@@ -20,7 +20,7 @@ from paddle_serving_server import OpMaker, OpSeqMaker, Server
from paddle_serving_client import Client from paddle_serving_client import Client
from contextlib import closing from contextlib import closing
import socket import socket
import numpy as np
from paddle_serving_server import pipeline from paddle_serving_server import pipeline
from paddle_serving_server.pipeline import Op from paddle_serving_server.pipeline import Op
...@@ -74,8 +74,8 @@ class WebService(object): ...@@ -74,8 +74,8 @@ class WebService(object):
f = open(client_config, 'r') f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge( model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf) str(f.read()), model_conf)
self.feed_names = [var.alias_name for var in model_conf.feed_var] self.feed_vars = {var.name: var for var in model_conf.feed_var}
self.fetch_names = [var.alias_name for var in model_conf.fetch_var] self.fetch_vars = {var.name: var for var in model_conf.fetch_var}
def _launch_rpc_service(self): def _launch_rpc_service(self):
op_maker = OpMaker() op_maker = OpMaker()
...@@ -211,6 +211,15 @@ class WebService(object): ...@@ -211,6 +211,15 @@ class WebService(object):
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
print("This API will be deprecated later. Please do not use it") print("This API will be deprecated later. Please do not use it")
is_batch = True is_batch = True
feed_dict = {}
for var_name in self.feed_vars.keys():
feed_dict[var_name] = []
for feed_ins in feed:
for key in feed_ins:
feed_dict[key].append(np.array(feed_ins[key]).reshape(list(self.feed_vars[key].shape))[np.newaxis,:])
feed = {}
for key in feed_dict:
feed[key] = np.concatenate(feed_dict[key], axis=0)
return feed, fetch, is_batch return feed, fetch, is_batch
def postprocess(self, feed=[], fetch=[], fetch_map=None): def postprocess(self, feed=[], fetch=[], fetch_map=None):
......
...@@ -217,6 +217,7 @@ class Server(object): ...@@ -217,6 +217,7 @@ class Server(object):
self.module_path = os.path.dirname(paddle_serving_server.__file__) self.module_path = os.path.dirname(paddle_serving_server.__file__)
self.cur_path = os.getcwd() self.cur_path = os.getcwd()
self.use_local_bin = False self.use_local_bin = False
self.device = "cpu"
self.gpuid = 0 self.gpuid = 0
self.use_trt = False self.use_trt = False
self.use_lite = False self.use_lite = False
...@@ -284,6 +285,9 @@ class Server(object): ...@@ -284,6 +285,9 @@ class Server(object):
"GPU not found, please check your environment or use cpu version by \"pip install paddle_serving_server\"" "GPU not found, please check your environment or use cpu version by \"pip install paddle_serving_server\""
) )
def set_device(self, device="cpu"):
self.device = device
def set_gpuid(self, gpuid=0): def set_gpuid(self, gpuid=0):
self.gpuid = gpuid self.gpuid = gpuid
...@@ -316,24 +320,25 @@ class Server(object): ...@@ -316,24 +320,25 @@ class Server(object):
engine.static_optimization = False engine.static_optimization = False
engine.force_update_static_cache = False engine.force_update_static_cache = False
engine.use_trt = self.use_trt engine.use_trt = self.use_trt
engine.use_lite = self.use_lite if os.path.exists('{}/__params__'.format(model_config_path)):
engine.use_xpu = self.use_xpu suffix = ""
else:
suffix = "_DIR"
if device == "arm":
engine.use_lite = self.use_lite
engine.use_xpu = self.use_xpu
if device == "cpu": if device == "cpu":
if use_encryption_model: if use_encryption_model:
engine.type = "FLUID_CPU_ANALYSIS_ENCRPT" engine.type = "FLUID_CPU_ANALYSIS_ENCRPT"
else: else:
engine.type = "FLUID_CPU_ANALYSIS_DIR" engine.type = "FLUID_CPU_ANALYSIS"+suffix
elif device == "gpu": elif device == "gpu":
if use_encryption_model: if use_encryption_model:
engine.type = "FLUID_GPU_ANALYSIS_ENCRPT" engine.type = "FLUID_GPU_ANALYSIS_ENCRPT"
else: else:
engine.type = "FLUID_GPU_ANALYSIS_DIR" engine.type = "FLUID_GPU_ANALYSIS"+suffix
elif device == "arm": elif device == "arm":
engine.type = "FLUID_ARM_ANALYSIS_DIR" engine.type = "FLUID_ARM_ANALYSIS" + suffix
self.model_toolkit_conf.engines.extend([engine]) self.model_toolkit_conf.engines.extend([engine])
def _prepare_infer_service(self, port): def _prepare_infer_service(self, port):
...@@ -434,9 +439,9 @@ class Server(object): ...@@ -434,9 +439,9 @@ class Server(object):
for line in version_file.readlines(): for line in version_file.readlines():
if re.match("cuda_version", line): if re.match("cuda_version", line):
cuda_version = line.split("\"")[1] cuda_version = line.split("\"")[1]
if cuda_version == "trt": if cuda_version == "101" or cuda_version == "102" or cuda_version == "110":
device_version = "serving-gpu-" + cuda_version + "-" device_version = "serving-gpu-" + cuda_version + "-"
elif cuda_version == "arm": elif cuda_version == "arm" or cuda_version == "arm-xpu":
device_version = "serving-" + cuda_version + "-" device_version = "serving-" + cuda_version + "-"
else: else:
device_version = "serving-gpu-cuda" + cuda_version + "-" device_version = "serving-gpu-cuda" + cuda_version + "-"
...@@ -541,7 +546,8 @@ class Server(object): ...@@ -541,7 +546,8 @@ class Server(object):
else: else:
print("Use local bin : {}".format(self.bin_path)) print("Use local bin : {}".format(self.bin_path))
#self.check_cuda() #self.check_cuda()
if self.use_lite: # Todo: merge CPU and GPU code, remove device to model_toolkit
if self.device == "cpu" or self.device == "arm":
command = "{} " \ command = "{} " \
"-enable_model_toolkit " \ "-enable_model_toolkit " \
"-inferservice_path {} " \ "-inferservice_path {} " \
......
...@@ -76,6 +76,7 @@ def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-strin ...@@ -76,6 +76,7 @@ def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-strin
server.set_lite() server.set_lite()
device = "arm" device = "arm"
server.set_device(device)
if args.use_xpu: if args.use_xpu:
server.set_xpu() server.set_xpu()
......
...@@ -81,8 +81,8 @@ class WebService(object): ...@@ -81,8 +81,8 @@ class WebService(object):
f = open(client_config, 'r') f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge( model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf) str(f.read()), model_conf)
self.feed_names = [var.alias_name for var in model_conf.feed_var] self.feed_vars = {var.name: var for var in model_conf.feed_var}
self.fetch_names = [var.alias_name for var in model_conf.fetch_var] self.fetch_vars = {var.name: var for var in model_conf.fetch_var}
def set_gpus(self, gpus): def set_gpus(self, gpus):
print("This API will be deprecated later. Please do not use it") print("This API will be deprecated later. Please do not use it")
...@@ -118,6 +118,7 @@ class WebService(object): ...@@ -118,6 +118,7 @@ class WebService(object):
server.set_num_threads(thread_num) server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim) server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim) server.set_ir_optimize(ir_optim)
server.set_device(device)
if use_lite: if use_lite:
server.set_lite() server.set_lite()
...@@ -289,6 +290,15 @@ class WebService(object): ...@@ -289,6 +290,15 @@ class WebService(object):
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
print("This API will be deprecated later. Please do not use it") print("This API will be deprecated later. Please do not use it")
is_batch = True is_batch = True
feed_dict = {}
for var_name in self.feed_vars.keys():
feed_dict[var_name] = []
for feed_ins in feed:
for key in feed_ins:
feed_dict[key].append(np.array(feed_ins[key]).reshape(list(self.feed_vars[key].shape))[np.newaxis,:])
feed = {}
for key in feed_dict:
feed[key] = np.concatenate(feed_dict[key], axis=0)
return feed, fetch, is_batch return feed, fetch, is_batch
def postprocess(self, feed=[], fetch=[], fetch_map=None): def postprocess(self, feed=[], fetch=[], fetch_map=None):
......
...@@ -38,14 +38,12 @@ class LocalServiceHandler(object): ...@@ -38,14 +38,12 @@ class LocalServiceHandler(object):
client_type='local_predictor', client_type='local_predictor',
workdir="", workdir="",
thread_num=2, thread_num=2,
device_type=-1,
devices="", devices="",
fetch_names=None, fetch_names=None,
mem_optim=True, mem_optim=True,
ir_optim=False, ir_optim=False,
available_port_generator=None, available_port_generator=None,
use_trt=False,
use_lite=False,
use_xpu=False,
use_profile=False): use_profile=False):
""" """
Initialization of localservicehandler Initialization of localservicehandler
...@@ -55,15 +53,14 @@ class LocalServiceHandler(object): ...@@ -55,15 +53,14 @@ class LocalServiceHandler(object):
client_type: brpc, grpc and local_predictor[default] client_type: brpc, grpc and local_predictor[default]
workdir: work directory workdir: work directory
thread_num: number of threads, concurrent quantity. thread_num: number of threads, concurrent quantity.
device_type: support multiple devices. -1=Not set, determined by
`devices`. 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
devices: gpu id list[gpu], "" default[cpu] devices: gpu id list[gpu], "" default[cpu]
fetch_names: get fetch names out of LocalServiceHandler in fetch_names: get fetch names out of LocalServiceHandler in
local_predictor mode. fetch_names_ is compatible for Client(). local_predictor mode. fetch_names_ is compatible for Client().
mem_optim: use memory/graphics memory optimization, True default. mem_optim: use memory/graphics memory optimization, True default.
ir_optim: use calculation chart optimization, False default. ir_optim: use calculation chart optimization, False default.
available_port_generator: generate available ports available_port_generator: generate available ports
use_trt: use nvidia tensorRt engine, False default.
use_lite: use Paddle-Lite engine, False default.
use_xpu: run predict on Baidu Kunlun, False default.
use_profile: use profiling, False default. use_profile: use profiling, False default.
Returns: Returns:
...@@ -74,28 +71,61 @@ class LocalServiceHandler(object): ...@@ -74,28 +71,61 @@ class LocalServiceHandler(object):
self._model_config = model_config self._model_config = model_config
self._port_list = [] self._port_list = []
self._device_type = "cpu" self._device_name = "cpu"
if devices == "": self._use_gpu = False
# cpu self._use_trt = False
devices = [-1] self._use_lite = False
if use_lite: self._use_xpu = False
self._device_type = "arm"
self._port_list.append(available_port_generator.next()) if device_type == -1:
_LOGGER.info("Model({}) will be launch in arm device. Port({})" # device_type is not set, determined by `devices`,
.format(model_config, self._port_list)) if devices == "":
# CPU
self._device_name = "cpu"
devices = [-1]
else: else:
self._device_type = "cpu" # GPU
self._port_list.append(available_port_generator.next()) self._device_name = "gpu"
_LOGGER.info("Model({}) will be launch in cpu device. Port({})" self._use_gpu = True
.format(model_config, self._port_list)) devices = [int(x) for x in devices.split(",")]
else:
# gpu elif device_type == 0:
self._device_type = "gpu" # CPU
self._device_name = "cpu"
devices = [-1]
elif device_type == 1:
# GPU
self._device_name = "gpu"
self._use_gpu = True
devices = [int(x) for x in devices.split(",")]
elif device_type == 2:
# Nvidia Tensor RT
self._device_name = "gpu"
self._use_gpu = True
devices = [int(x) for x in devices.split(",")] devices = [int(x) for x in devices.split(",")]
self._use_trt = True
elif device_type == 3:
# ARM CPU
self._device_name = "arm"
devices = [-1]
self._use_lite = True
elif device_type == 4:
# Kunlun XPU
self._device_name = "arm"
devices = [int(x) for x in devices.split(",")]
self._use_lite = True
self._use_xpu = True
else:
_LOGGER.error(
"LocalServiceHandler initialization fail. device_type={}"
.format(device_type))
if client_type == "brpc" or client_type == "grpc":
for _ in devices: for _ in devices:
self._port_list.append(available_port_generator.next()) self._port_list.append(available_port_generator.next())
_LOGGER.info("Model({}) will be launch in gpu device: {}. Port({})" _LOGGER.info("Create ports for devices:{}. Port:{}"
.format(model_config, devices, self._port_list)) .format(devices, self._port_list))
self._client_type = client_type self._client_type = client_type
self._workdir = workdir self._workdir = workdir
self._devices = devices self._devices = devices
...@@ -105,14 +135,21 @@ class LocalServiceHandler(object): ...@@ -105,14 +135,21 @@ class LocalServiceHandler(object):
self._local_predictor_client = None self._local_predictor_client = None
self._rpc_service_list = [] self._rpc_service_list = []
self._server_pros = [] self._server_pros = []
self._use_trt = use_trt
self._use_lite = use_lite
self._use_xpu = use_xpu
self._use_profile = use_profile self._use_profile = use_profile
self.fetch_names_ = fetch_names self._fetch_names = fetch_names
_LOGGER.info(
"Models({}) will be launched by device {}. use_gpu:{}, "
"use_trt:{}, use_lite:{}, use_xpu:{}, device_type:{}, devices:{}, "
"mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, "
"client_type:{}, fetch_names:{}".format(
model_config, self._device_name, self._use_gpu, self._use_trt,
self._use_lite, self._use_xpu, device_type, self._devices,
self._mem_optim, self._ir_optim, self._use_profile,
self._thread_num, self._client_type, self._fetch_names))
def get_fetch_list(self): def get_fetch_list(self):
return self.fetch_names_ return self._fetch_names
def get_port_list(self): def get_port_list(self):
return self._port_list return self._port_list
...@@ -149,22 +186,17 @@ class LocalServiceHandler(object): ...@@ -149,22 +186,17 @@ class LocalServiceHandler(object):
from paddle_serving_app.local_predict import LocalPredictor from paddle_serving_app.local_predict import LocalPredictor
if self._local_predictor_client is None: if self._local_predictor_client is None:
self._local_predictor_client = LocalPredictor() self._local_predictor_client = LocalPredictor()
use_gpu = False
use_lite = False
if self._device_type == "gpu":
use_gpu = True
elif self._device_type == "arm":
use_lite = True
self._local_predictor_client.load_model_config( self._local_predictor_client.load_model_config(
model_path=self._model_config, model_path=self._model_config,
use_gpu=use_gpu, use_gpu=self._use_gpu,
gpu_id=self._devices[concurrency_idx], gpu_id=self._devices[concurrency_idx],
use_profile=self._use_profile, use_profile=self._use_profile,
thread_num=self._thread_num, thread_num=self._thread_num,
mem_optim=self._mem_optim, mem_optim=self._mem_optim,
ir_optim=self._ir_optim, ir_optim=self._ir_optim,
use_trt=self._use_trt, use_trt=self._use_trt,
use_lite=use_lite, use_lite=self._use_lite,
use_xpu=self._use_xpu) use_xpu=self._use_xpu)
return self._local_predictor_client return self._local_predictor_client
...@@ -174,7 +206,7 @@ class LocalServiceHandler(object): ...@@ -174,7 +206,7 @@ class LocalServiceHandler(object):
def _prepare_one_server(self, workdir, port, gpuid, thread_num, mem_optim, def _prepare_one_server(self, workdir, port, gpuid, thread_num, mem_optim,
ir_optim): ir_optim):
""" """
According to _device_type, generating one CpuServer or GpuServer, and According to self._device_name, generating one Cpu/Gpu/Arm Server, and
setting the model config amd startup params. setting the model config amd startup params.
Args: Args:
...@@ -188,7 +220,7 @@ class LocalServiceHandler(object): ...@@ -188,7 +220,7 @@ class LocalServiceHandler(object):
Returns: Returns:
server: CpuServer/GpuServer server: CpuServer/GpuServer
""" """
if self._device_type == "cpu": if self._device_name == "cpu":
from paddle_serving_server import OpMaker, OpSeqMaker, Server from paddle_serving_server import OpMaker, OpSeqMaker, Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
...@@ -217,6 +249,8 @@ class LocalServiceHandler(object): ...@@ -217,6 +249,8 @@ class LocalServiceHandler(object):
server = Server() server = Server()
if gpuid >= 0: if gpuid >= 0:
server.set_gpuid(gpuid) server.set_gpuid(gpuid)
# TODO: support arm or arm + xpu later
server.set_device(self._device_name)
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num) server.set_num_threads(thread_num)
...@@ -225,9 +259,9 @@ class LocalServiceHandler(object): ...@@ -225,9 +259,9 @@ class LocalServiceHandler(object):
server.load_model_config(self._model_config) server.load_model_config(self._model_config)
server.prepare_server( server.prepare_server(
workdir=workdir, port=port, device=self._device_type) workdir=workdir, port=port, device=self._device_name)
if self.fetch_names_ is None: if self._fetch_names is None:
self.fetch_names_ = server.get_fetch_list() self._fetch_names = server.get_fetch_list()
return server return server
def _start_one_server(self, service_idx): def _start_one_server(self, service_idx):
...@@ -264,7 +298,7 @@ class LocalServiceHandler(object): ...@@ -264,7 +298,7 @@ class LocalServiceHandler(object):
""" """
Start multiple processes and start one server in each process Start multiple processes and start one server in each process
""" """
for i, service in enumerate(self._rpc_service_list): for i, _ in enumerate(self._rpc_service_list):
p = multiprocessing.Process( p = multiprocessing.Process(
target=self._start_one_server, args=(i, )) target=self._start_one_server, args=(i, ))
p.daemon = True p.daemon = True
......
...@@ -134,6 +134,7 @@ class Op(object): ...@@ -134,6 +134,7 @@ class Op(object):
self.model_config = None self.model_config = None
self.workdir = None self.workdir = None
self.thread_num = self.concurrency self.thread_num = self.concurrency
self.device_type = -1
self.devices = "" self.devices = ""
self.mem_optim = False self.mem_optim = False
self.ir_optim = False self.ir_optim = False
...@@ -153,6 +154,7 @@ class Op(object): ...@@ -153,6 +154,7 @@ class Op(object):
self.client_type = local_service_conf.get("client_type") self.client_type = local_service_conf.get("client_type")
self.workdir = local_service_conf.get("workdir") self.workdir = local_service_conf.get("workdir")
self.thread_num = local_service_conf.get("thread_num") self.thread_num = local_service_conf.get("thread_num")
self.device_type = local_service_conf.get("device_type")
self.devices = local_service_conf.get("devices") self.devices = local_service_conf.get("devices")
self.mem_optim = local_service_conf.get("mem_optim") self.mem_optim = local_service_conf.get("mem_optim")
self.ir_optim = local_service_conf.get("ir_optim") self.ir_optim = local_service_conf.get("ir_optim")
...@@ -168,6 +170,7 @@ class Op(object): ...@@ -168,6 +170,7 @@ class Op(object):
client_type=self.client_type, client_type=self.client_type,
workdir=self.workdir, workdir=self.workdir,
thread_num=self.thread_num, thread_num=self.thread_num,
device_type=self.device_type,
devices=self.devices, devices=self.devices,
mem_optim=self.mem_optim, mem_optim=self.mem_optim,
ir_optim=self.ir_optim) ir_optim=self.ir_optim)
...@@ -188,8 +191,11 @@ class Op(object): ...@@ -188,8 +191,11 @@ class Op(object):
client_type=self.client_type, client_type=self.client_type,
workdir=self.workdir, workdir=self.workdir,
thread_num=self.thread_num, thread_num=self.thread_num,
device_type=self.device_type,
devices=self.devices, devices=self.devices,
fetch_names=self._fetch_names) fetch_names=self._fetch_names,
mem_optim=self.mem_optim,
ir_optim=self.ir_optim)
if self._client_config is None: if self._client_config is None:
self._client_config = service_handler.get_client_config( self._client_config = service_handler.get_client_config(
) )
...@@ -550,7 +556,8 @@ class Op(object): ...@@ -550,7 +556,8 @@ class Op(object):
args=(concurrency_idx, self._get_input_channel(), args=(concurrency_idx, self._get_input_channel(),
self._get_output_channels(), False, trace_buffer, self._get_output_channels(), False, trace_buffer,
self.model_config, self.workdir, self.thread_num, self.model_config, self.workdir, self.thread_num,
self.devices, self.mem_optim, self.ir_optim)) self.device_type, self.devices, self.mem_optim,
self.ir_optim))
p.daemon = True p.daemon = True
p.start() p.start()
process.append(p) process.append(p)
...@@ -583,7 +590,8 @@ class Op(object): ...@@ -583,7 +590,8 @@ class Op(object):
args=(concurrency_idx, self._get_input_channel(), args=(concurrency_idx, self._get_input_channel(),
self._get_output_channels(), True, trace_buffer, self._get_output_channels(), True, trace_buffer,
self.model_config, self.workdir, self.thread_num, self.model_config, self.workdir, self.thread_num,
self.devices, self.mem_optim, self.ir_optim)) self.device_type, self.devices, self.mem_optim,
self.ir_optim))
# When a process exits, it attempts to terminate # When a process exits, it attempts to terminate
# all of its daemonic child processes. # all of its daemonic child processes.
t.daemon = True t.daemon = True
...@@ -991,7 +999,7 @@ class Op(object): ...@@ -991,7 +999,7 @@ class Op(object):
def _run(self, concurrency_idx, input_channel, output_channels, def _run(self, concurrency_idx, input_channel, output_channels,
is_thread_op, trace_buffer, model_config, workdir, thread_num, is_thread_op, trace_buffer, model_config, workdir, thread_num,
devices, mem_optim, ir_optim): device_type, devices, mem_optim, ir_optim):
""" """
_run() is the entry function of OP process / thread model.When client _run() is the entry function of OP process / thread model.When client
type is local_predictor in process mode, the CUDA environment needs to type is local_predictor in process mode, the CUDA environment needs to
...@@ -1009,6 +1017,7 @@ class Op(object): ...@@ -1009,6 +1017,7 @@ class Op(object):
model_config: model config path model_config: model config path
workdir: work directory workdir: work directory
thread_num: number of threads, concurrent quantity thread_num: number of threads, concurrent quantity
device_type: support multiple devices
devices: gpu id list[gpu], "" default[cpu] devices: gpu id list[gpu], "" default[cpu]
mem_optim: use memory/graphics memory optimization, True default. mem_optim: use memory/graphics memory optimization, True default.
ir_optim: use calculation chart optimization, False default. ir_optim: use calculation chart optimization, False default.
...@@ -1017,7 +1026,6 @@ class Op(object): ...@@ -1017,7 +1026,6 @@ class Op(object):
None None
""" """
op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx) op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx)
tid = threading.current_thread().ident
# init ops # init ops
profiler = None profiler = None
...@@ -1028,6 +1036,7 @@ class Op(object): ...@@ -1028,6 +1036,7 @@ class Op(object):
client_type="local_predictor", client_type="local_predictor",
workdir=workdir, workdir=workdir,
thread_num=thread_num, thread_num=thread_num,
device_type=device_type,
devices=devices, devices=devices,
mem_optim=mem_optim, mem_optim=mem_optim,
ir_optim=ir_optim) ir_optim=ir_optim)
......
...@@ -234,6 +234,7 @@ class PipelineServer(object): ...@@ -234,6 +234,7 @@ class PipelineServer(object):
"local_service_conf": { "local_service_conf": {
"workdir": "", "workdir": "",
"thread_num": 2, "thread_num": 2,
"device_type": -1,
"devices": "", "devices": "",
"mem_optim": True, "mem_optim": True,
"ir_optim": False, "ir_optim": False,
...@@ -389,6 +390,7 @@ class ServerYamlConfChecker(object): ...@@ -389,6 +390,7 @@ class ServerYamlConfChecker(object):
default_conf = { default_conf = {
"workdir": "", "workdir": "",
"thread_num": 2, "thread_num": 2,
"device_type": -1,
"devices": "", "devices": "",
"mem_optim": True, "mem_optim": True,
"ir_optim": False, "ir_optim": False,
...@@ -397,6 +399,7 @@ class ServerYamlConfChecker(object): ...@@ -397,6 +399,7 @@ class ServerYamlConfChecker(object):
"model_config": str, "model_config": str,
"workdir": str, "workdir": str,
"thread_num": int, "thread_num": int,
"device_type": int,
"devices": str, "devices": str,
"mem_optim": bool, "mem_optim": bool,
"ir_optim": bool, "ir_optim": bool,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册