diff --git a/CMakeLists.txt b/CMakeLists.txt index f4e8c64c4ff73d0a417c35159901c2e67d0ca4ef..af065158699199af61aca02f563dda1b1cddf2b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,9 @@ set(THIRD_PARTY_BUILD_TYPE Release) option(WITH_AVX "Compile Paddle Serving with AVX intrinsics" OFF) option(WITH_MKL "Compile Paddle Serving with MKL support." OFF) option(WITH_GPU "Compile Paddle Serving with NVIDIA GPU" OFF) -option(CLIENT_ONLY "Compile client libraries and demos only" OFF) +option(CLIENT "Compile Paddle Serving Client" OFF) +option(SERVER "Compile Paddle Serving Server" OFF) +option(APP "Compile Paddle Serving App package" OFF) option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF) option(PACK "Compile for whl" OFF) @@ -63,12 +65,12 @@ if (NOT DEFINED WITH_MKLDNN) endif() endif() -if (NOT CLIENT_ONLY) +if (SERVER) include(external/jsoncpp) #include(external/rocksdb) endif() -#include(external/gtest) +if (SERVER OR CLIENT) include(external/snappy) include(external/leveldb) include(external/zlib) @@ -81,8 +83,9 @@ include(external/pybind11) include(external/python) include(generic) include(flags) +endif() -if (NOT CLIENT_ONLY) +if (SERVER) include(external/cudnn) include(paddlepaddle) endif() @@ -91,7 +94,7 @@ message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR}) include_directories(${PADDLE_SERVING_SOURCE_DIR}) include_directories(${PADDLE_SERVING_BINARY_DIR}) -if(NOT CLIENT_ONLY) +if(SERVER) set(EXTERNAL_LIBS jsoncpp gflags @@ -109,28 +112,27 @@ set(EXTERNAL_LIBS brpc ) -if(NOT CLIENT_ONLY) +if(SERVER) if(WITH_MKLML) list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) endif() endif() -if(NOT CLIENT_ONLY) +if(SERVER) if(WITH_MKLDNN) list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) endif() endif() -if (NOT CLIENT_ONLY) +if (SERVER) list(APPEND EXTERNAL_LIBS paddlepaddle) endif() add_subdirectory(core) -if(NOT CLIENT_ONLY) +if(SERVER) add_subdirectory(paddle_inference) endif() add_subdirectory(python) -#add_subdirectory(examples) diff --git a/README.md b/README.md index 456ab1c29643c98958105229469b0374af546bd4..70676e69bda97b7de096fa186ccb20d1e5be777e 100644 --- a/README.md +++ b/README.md @@ -18,12 +18,12 @@
+
+
+
+
+
+
+
+
+ +### 2.4 微服务插件模式 +由于Paddle Serving底层采用基于C++的通信组件,并且核心框架也是基于C/C++编写,当用户想要在服务端定义复杂的前处理与后处理逻辑时,一种办法是修改Paddle Serving底层框架,重新编译源码。另一种方式可以通过在服务端嵌入轻量级的Web服务,通过在Web服务中实现更复杂的预处理逻辑,从而搭建一套逻辑完整的服务。当访问量超过了Web服务能够接受的范围,开发者有足够的理由开发一些高性能的C++预处理逻辑,并嵌入到Serving的原生服务库中。Web服务和RPC服务的关系以及他们的组合方式可以参考下文`用户类型`中的说明。 + +## 3. 工业级特性 + +### 3.1 分布式稀疏参数索引 + +分布式稀疏参数索引通常在广告推荐中出现,并与分布式训练配合形成完整的离线-在线一体化部署。下图解释了其中的流程,产品的在线服务接受用户请求后将请求发送给预估服务,同时系统会记录用户的请求以进行相应的训练日志处理和拼接。离线分布式训练系统会针对流式产出的训练日志进行模型增量训练,而增量产生的模型会配送至分布式稀疏参数索引服务,同时对应的稠密的模型参数也会配送至在线的预估服务。在线服务由两部分组成,一部分是针对用户的请求提取特征后,将需要进行模型的稀疏参数索引的特征发送请求给分布式稀疏参数索引服务,针对分布式稀疏参数索引服务返回的稀疏参数再进行后续深度学习模型的计算流程,从而完成预估。 + +
+
+
+
+
+ +为什么要使用Paddle Serving提供的分布式稀疏参数索引服务?1)在一些推荐场景中,模型的输入特征规模通常可以达到上千亿,单台机器无法支撑T级别模型在内存的保存,因此需要进行分布式存储。2)Paddle Serving提供的分布式稀疏参数索引服务,具有并发请求多个节点的能力,从而以较低的延时完成预估服务。 + +### 3.2 模型管理、在线A/B流量测试、模型热加载 + +Paddle Serving的C++引擎支持模型管理、在线A/B流量测试、模型热加载等功能,当前在Python API还有没完全开放这部分功能的配置,敬请期待。 + +## 4. 用户类型 +Paddle Serving面向的用户提供RPC和HTTP两种访问协议。对于HTTP协议,我们更倾向于流量中小型的服务使用,并且对延时没有严格要求的AI服务开发者。对于RPC协议,我们面向流量较大,对延时要求更高的用户,此外RPC的客户端可能也处在一个大系统的服务中,这种情况下非常适合使用Paddle Serving提供的RPC服务。对于使用分布式稀疏参数索引服务而言,Paddle Serving的用户不需要关心底层的细节,其调用本质也是通过RPC服务再调用RPC服务。下图给出了当前设计的Paddle Serving可能会使用Serving服务的几种场景。 + +
+
+
+
+
+ +对于普通的模型而言(具体指通过Serving提供的IO保存的模型,并且没有对模型进行后处理),用户使用RPC服务不需要额外的开发即可实现服务启动,但需要开发一些Client端的代码来使用服务。对于Web服务的开发,需要用户现在Paddle Serving提供的Web Service框架中进行前后处理的开发,从而实现整个HTTP服务。 + +### 4.1 Web服务开发 + +Web服务有很多开源的框架,Paddle Serving当前集成了Flask框架,但这部分对用户不可见,在未来可能会提供性能更好的Web框架作为底层HTTP服务集成引擎。用户需要继承WebService,从而实现对rpc服务的输入输出进行加工的目的。 + +``` python +from paddle_serving_server.web_service import WebService +from imdb_reader import IMDBDataset +import sys + + +class IMDBService(WebService): + def prepare_dict(self, args={}): + if len(args) == 0: + exit(-1) + self.dataset = IMDBDataset() + self.dataset.load_resource(args["dict_file_path"]) + + def preprocess(self, feed={}, fetch=[]): + if "words" not in feed: + exit(-1) + res_feed = {} + res_feed["words"] = self.dataset.get_words_only(feed["words"])[0] + return res_feed, fetch + + +imdb_service = IMDBService(name="imdb") +imdb_service.load_model_config(sys.argv[1]) +imdb_service.prepare_server( + workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu") +imdb_service.prepare_dict({"dict_file_path": sys.argv[4]}) +imdb_service.run_server() +``` + +`WebService`作为基类,提供将用户接受的HTTP请求转化为RPC输入的接口`preprocess`,同时提供对RPC请求返回的结果进行后处理的接口`postprocess`,继承`WebService`的子类,可以定义各种类型的成员函数。`WebService`的启动命令和普通RPC服务提供的启动API一致。 + +## 5. 未来计划 + +### 5.1 有向无环图结构定义开放 +当前版本开放的python API仅支持用户定义Sequential类型的执行流,如果想要进行Server进程内复杂的计算,需要增加对应的用户API。 + +### 5.2 云端自动部署能力 +为了方便用户更容易将Paddle的预测模型部署到线上,Paddle Serving在接下来的版本会提供Kubernetes生态下任务编排的工具。 + +### 5.3 向量检索、树结构检索 +在推荐与广告场景的召回系统中,通常需要采用基于向量的快速检索或者基于树结构的快速检索,Paddle Serving会对这方面的检索引擎进行集成或扩展。 diff --git a/doc/DESIGN_DOC_EN.md b/doc/DESIGN_DOC_EN.md new file mode 100644 index 0000000000000000000000000000000000000000..2f8a36ea6686b5add2a7e4e407eabfd14167490d --- /dev/null +++ b/doc/DESIGN_DOC_EN.md @@ -0,0 +1,227 @@ +# Paddle Serving Design Doc + +## 1. Design Objectives + +- Long Term Vision: Online deployment of deep learning models will be a user-facing application in the future. Any AI developer will face the problem of deploying an online service for his or her trained model. +Paddle Serving is the official open source online deployment framework. The long term goal of Paddle Serving is to provide professional, reliable and easy-to-use online service to the last mile of AI application. + +- Easy-To-Use: For algorithmic developers to quickly deploy their models online, Paddle Serving designs APIs that can be used with Paddle's training process seamlessly, most Paddle models can be deployed as a service with one line command. + +- Industrial Oriented: To meet industrial deployment requirements, Paddle Serving supports lots of large-scale deployment functions: 1) Distributed Sparse Embedding Indexing. 2) Highly concurrent underlying communications. 3) Model Management, online A/B test, model online loading. + +- Extensibility: Paddle Serving supports C++, Python and Golang client, and will support more clients with different languages. It is very easy to extend Paddle Serving to support other machine learning inference library, although currently Paddle inference library is the only official supported inference backend. + + +## 2. Module design and implementation + +### 2.1 Python API interface design + +#### 2.1.1 save a servable model +The inference phase of Paddle model focuses on 1) input variables of the model. 2) output variables of the model. 3) model structure and model parameters. Paddle Serving Python API provides a `save_model` interface for trained model, and save necessary information for Paddle Serving to use during deployment phase. An example is as follows: + +``` python +import paddle_serving_client.io as serving_io +serving_io.save_model("serving_model", "client_conf", + {"words": data}, {"prediction": prediction}, + fluid.default_main_program()) +``` +In the example, `{"words": data}` and `{"prediction": prediction}` assign the inputs and outputs of a model. `"words"` and `"prediction"` are alias names of inputs and outputs. The design of alias name is to help developers to memorize model inputs and model outputs. `data` and `prediction` are Paddle `[Variable](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Variable_cn.html#variable)` in training phase that often represents ([Tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Tensor_cn.html#tensor)) or ([LodTensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor)). When the `save_model` API is called, two directories called `"serving_model"` and `"client_conf"` will be generated. The content of the saved model is as follows: + +``` shell +. +├── client_conf +│ ├── serving_client_conf.prototxt +│ └── serving_client_conf.stream.prototxt +└── serving_model + ├── embedding_0.w_0 + ├── fc_0.b_0 + ├── fc_0.w_0 + ├── fc_1.b_0 + ├── fc_1.w_0 + ├── fc_2.b_0 + ├── fc_2.w_0 + ├── lstm_0.b_0 + ├── lstm_0.w_0 + ├── __model__ + ├── serving_server_conf.prototxt + └── serving_server_conf.stream.prototxt +``` +`"serving_client_conf.prototxt"` and `"serving_server_conf.prototxt"` are the client side and the server side configurations of Paddle Serving, and `"serving_client_conf.stream.prototxt"` and `"serving_server_conf.stream.prototxt"` are the corresponding parts. Other contents saved in the directory are the same as Paddle saved inference model. We are considering to support `save_model` interface in Paddle training framework so that a user is not aware of the servable configurations. + +#### 2.1.2 Model loading on the server side + +Prediction logics on the server side can be defined through Paddle Serving Server API with a few lines of code, an example is as follows: +``` python +import paddle_serving_server as serving +op_maker = serving.OpMaker() +read_op = op_maker.create('general_reader') +dist_kv_op = op_maker.create('general_dist_kv') +general_infer_op = op_maker.create('general_infer') +general_response_op = op_maker.create('general_response') + +op_seq_maker = serving.OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(dist_kv_op) +op_seq_maker.add_op(general_infer_op) +op_seq_maker.add_op(general_response_op) +``` +Current Paddle Serving supports operator list on the server side as follows: + +
+
+
+
+
+ +### 2.4 Micro service plugin +The underlying communication of Paddle Serving is implemented with C++ as well as the core framework, it is hard for users who do not familiar with C++ to implement new Paddle Serving Server Operators. Another approach is to use the light-weighted Web Service in Paddle Serving Server that can be viewed as a plugin. A user can implement complex data preprocessing and postprocessing logics to build a complex AI service. If access of the AI service has a large volumn, it is worth to implement the service with high performance Paddle Serving Server operators. The relationship between Web Service and RPC Service can be referenced in `User Type`. + +## 3. Industrial Features + +### 3.1 Distributed Sparse Parameter Indexing + +Distributed Sparse Parameter Indexing is commonly seen in advertising and recommendation scenarios, and is often used coupled with distributed training. The figure below explains a commonly seen architecture for online recommendation. When the recommendation service receives a request from a user, the system will automatically collects training log for the offline distributed online training. Mean while, the request is sent to Paddle Serving Server. For sparse features, distributed sparse parameter index service is called so that sparse parameters can be looked up. The dense input features together with the looked up sparse model parameters are fed into the Paddle Inference Node of the DAG in Paddle Serving Server. Then the score can be responsed through RPC to product service for item ranking. + +
+
+
+
+
+ +Why do we need to support distributed sparse parameter indexing in Paddle Serving? 1) In some recommendation scenarios, the number of features can be up to hundreds of billions that a single node can not hold the parameters within random access memory. 2) Paddle Serving supports distributed sparse parameter indexing that can couple with paddle inference. Users do not need to do extra work to have a low latency inference engine with hundreds of billions of parameters. + +### 3.2 Model Management, online A/B test, Model Online Reloading + +Paddle Serving's C++ engine supports model management, online A/B test and model online reloading. Currently, python API is not released yet, please wait for the next release. + +## 4. User Types +Paddle Serving provides RPC and HTTP protocol for users. For HTTP service, we recommend users with median or small traffic services to use, and the latency is not a strict requirement. For RPC protocol, we recommend high traffic services and low latency required services to use. For users who use distributed sparse parameter indexing built-in service, it is not necessary to care about the underlying details of communication. The following figure gives out several scenarios that user may want to use Paddle Serving. + +
+
+
+
+
+ +For servable models saved from Paddle Serving IO API, users do not need to do extra coding work to startup a service, but may need some coding work on the client side. For development of Web Service plugin, a user needs to provide implementation of Web Service's preprocessing and postprocessing work if needed to get a HTTP service. + +### 4.1 Web Service Development + +Web Service has lots of open sourced framework. Currently Paddle Serving uses Flask as built-in service framework, and users are not aware of this. More efficient web service will be integrated in the furture if needed. + +``` python +from paddle_serving_server.web_service import WebService +from imdb_reader import IMDBDataset +import sys + + +class IMDBService(WebService): + def prepare_dict(self, args={}): + if len(args) == 0: + exit(-1) + self.dataset = IMDBDataset() + self.dataset.load_resource(args["dict_file_path"]) + + def preprocess(self, feed={}, fetch=[]): + if "words" not in feed: + exit(-1) + res_feed = {} + res_feed["words"] = self.dataset.get_words_only(feed["words"])[0] + return res_feed, fetch + + +imdb_service = IMDBService(name="imdb") +imdb_service.load_model_config(sys.argv[1]) +imdb_service.prepare_server( + workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu") +imdb_service.prepare_dict({"dict_file_path": sys.argv[4]}) +imdb_service.run_server() +``` + +`WebService` is a Base Class, providing inheritable interfaces such `preprocess` and `postprocess` for users to implement. In the inherited class of `WebService` class, users can define any functions they want and the startup function interface is the same as RPC service. + +## 5. Future Plan + +### 5.1 Open DAG definition API +Current version of Paddle Serving Server supports sequential type of execution flow. DAG definition API can be more helpful to users on complex tasks. + +### 5.2 Auto Deployment on Cloud +In order to make deployment more easily on public cloud, Paddle Serving considers to provides Operators on Kubernetes in submitting a service job. + +### 5.3 Vector Indexing and Tree based Indexing +In recommendation and advertisement systems, it is commonly seen to use vector based index or tree based indexing service to do candidate retrievals. These retrieval tasks will be built-in services of Paddle Serving. diff --git a/doc/INSTALL.md b/doc/INSTALL.md deleted file mode 100644 index 26d7bfe22772e114d7d8bd50011a98057cdcb395..0000000000000000000000000000000000000000 --- a/doc/INSTALL.md +++ /dev/null @@ -1,117 +0,0 @@ -# Install - -## 系统需求 - -OS: Linux - -CMake: (验证过的版本:3.2/3.5.2) - -C++编译器 (验证过的版本:GCC 4.8.2/5.4.0) - -python (验证过的版本:2.7) - -Go编译器 (>=1.8 验证过的版本:1.9.2/1.12.0) - -openssl & openssl-devel - -curl-devel - -bzip2-devel - -## 编译 - -推荐使用Docker准备Paddle Serving编译环境。[Docker编译使用说明](./DOCKER.md) - -以下命令将会下载Paddle Serving最新代码,并执行编译。 - -```shell -$ git clone https://github.com/PaddlePaddle/Serving.git -$ cd Serving -$ mkdir build -$ cd build -$ cmake .. -$ make -j4 -$ make install -``` - -`make install`将把目标产出放在/path/to/Paddle-Serving/build/output/目录下,目录结构: - -``` -. -|-- bin # Paddle Serving工具和protobuf编译插件pdcodegen所在目录 -|-- conf -|-- demo # demo总目录 -| |-- client # Demo client端 -| | |-- bert # bert模型客户端 -| | |-- ctr_prediction # CTR prediction模型客户端 -| | |-- dense_format # dense_format客户端 -| | |-- echo # 最简单的echo service客户端 -| | |-- echo_kvdb # local KV读取demo客户端 -| | |-- image_classification # 图像分类任务客户端 -| | |-- int64tensor_format # int64tensor_format示例客户端 -| | |-- sparse_format # sparse_format示例客户端 -| | `-- text_classification # 文本分类任务示例客户端 -| |-- db_func -| |-- db_thread -| |-- kvdb_test -| `-- serving # Demo serving端;该serving可同时响应所有demo client请求 -|-- include # Paddle Serving发布的头文件 -|-- lib # Paddle Serving发布的libs -`-- tool # Paddle Serving发布的工具目录 - -``` - -如要编写新的预测服务,请参考[从零开始写一个预测服务](CREATING.md) - -# CMake编译选项说明 - -| 编译选项 | 说明 | -|----------|------| -| WITH_AVX | For configuring PaddlePaddle. Compile PaddlePaddle with AVX intrinsics | -| WITH_MKL | For configuring PaddlePaddle. Compile PaddlePaddle with MKLML library | -| WITH_GPU | For configuring PaddlePaddle. Compile PaddlePaddle with NVIDIA GPU | -| CUDNN_ROOT| For configuring PaddlePaddle. Define CuDNN library and header path | -| CLINET_ONLY | Compile client libraries and demos only | - -## WITH_GPU选项 - -Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选项用于检测系统上CUDA/CUDNN等基础库,如检测到合适版本,在编译PaddlePaddle时就会编译出GPU版本的OP Kernel。 - -在裸机上编译Paddle Serving GPU版本,需要安装这些基础库: - -- CUDA -- CuDNN -- NCCL2 - -这里要注意的是: -1) 编译Serving所在的系统上所安装的CUDA/CUDNN等基础库版本,需要兼容实际的GPU设备。例如,Tesla V100卡至少要CUDA 9.0。如果编译时所用CUDA等基础库版本过低,由于生成的GPU代码和实际硬件设备不兼容,会导致Serving进程无法启动,或出现coredump等严重问题。 -2) 运行Paddle Serving的系统上安装与实际GPU设备兼容的CUDA driver,并安装与编译期所用的CUDA/CuDNN等版本兼容的基础库。如运行Paddle Serving的系统上安装的CUDA/CuDNN的版本低于编译时所用版本,可能会导致奇怪的cuda函数调用失败等问题。 - -以下是PaddlePaddle发布版本所使用的基础库版本匹配关系,供参考: - -| | CUDA | CuDNN | NCCL2 | -|-|-------|--------------------------|-------| -| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 | -| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0| 2.2.12 | - -### 如何让Paddle Serving编译系统探测到CuDNN库 - -从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加-DCUDNN_ROOT参数,指定CuDNN库所在路径: - -``` -$ pwd -/path/to/paddle-serving - -$ mkdir build && cd build -$ cmake -DWITH_GPU=ON -DCUDNN_ROOT=/path/to/cudnn/cudnn_v7/cuda .. -``` - -### 如何让Paddle Serving编译系统探测到nccl库 - -从NVIDIA developer官网下载对应版本nccl2库并解压后,增加如下环境变量 (以nccl2.1.4为例): - -``` -$ export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH -$ export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH -$ export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH -``` diff --git a/doc/RUN_IN_DOCKER.md b/doc/RUN_IN_DOCKER.md index 345aabed52cb30282057ea7f5ba4953a9681d6d8..8a932fe1af844d2d5001b24edc0a2a816f5b6994 100644 --- a/doc/RUN_IN_DOCKER.md +++ b/doc/RUN_IN_DOCKER.md @@ -13,7 +13,7 @@ You can get images in two ways: 1. Pull image directly ```bash - docker pull hub.baidubce.com/ctr/paddleserving:0.1.3 + docker pull hub.baidubce.com/paddlepaddle/serving:0.1.3 ``` 2. Building image based on dockerfile @@ -21,13 +21,13 @@ You can get images in two ways: Create a new folder and copy [Dockerfile](../tools/Dockerfile) to this folder, and run the following command: ```bash - docker build -t hub.baidubce.com/ctr/paddleserving:0.1.3 . + docker build -t hub.baidubce.com/paddlepaddle/serving:0.1.3 . ``` ### Create container ```bash -docker run -p 9292:9292 --name test -dit hub.baidubce.com/ctr/paddleserving:0.1.3 +docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.1.3 docker exec -it test bash ``` @@ -99,7 +99,7 @@ You can also get images in two ways: 1. Pull image directly ```bash - nvidia-docker pull hub.baidubce.com/ctr/paddleserving:0.1.3-gpu + nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.1.3-gpu ``` 2. Building image based on dockerfile @@ -107,13 +107,13 @@ You can also get images in two ways: Create a new folder and copy [Dockerfile.gpu](../tools/Dockerfile.gpu) to this folder, and run the following command: ```bash - nvidia-docker build -t hub.baidubce.com/ctr/paddleserving:0.1.3-gpu . + nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:0.1.3-gpu . ``` ### Create container ```bash -nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/ctr/paddleserving:0.1.3-gpu +nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.1.3-gpu nvidia-docker exec -it test bash ``` diff --git a/doc/RUN_IN_DOCKER_CN.md b/doc/RUN_IN_DOCKER_CN.md index 7e2f28bdbd73c793faee96def6b625e1bbff2ba9..a4676ced427784c2091e07b2690116020ac6657b 100644 --- a/doc/RUN_IN_DOCKER_CN.md +++ b/doc/RUN_IN_DOCKER_CN.md @@ -13,7 +13,7 @@ Docker(GPU版本需要在GPU机器上安装nvidia-docker) 1. 直接拉取镜像 ```bash - docker pull hub.baidubce.com/ctr/paddleserving:0.1.3 + docker pull hub.baidubce.com/paddlepaddle/serving:0.1.3 ``` 2. 基于Dockerfile构建镜像 @@ -21,13 +21,13 @@ Docker(GPU版本需要在GPU机器上安装nvidia-docker) 建立新目录,复制[Dockerfile](../tools/Dockerfile)内容到该目录下Dockerfile文件。执行 ```bash - docker build -t hub.baidubce.com/ctr/paddleserving:0.1.3 . + docker build -t hub.baidubce.com/paddlepaddle/serving:0.1.3 . ``` ### 创建容器并进入 ```bash -docker run -p 9292:9292 --name test -dit hub.baidubce.com/ctr/paddleserving:0.1.3 +docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.1.3 docker exec -it test bash ``` @@ -97,7 +97,7 @@ GPU版本与CPU版本基本一致,只有部分接口命名的差别(GPU版 1. 直接拉取镜像 ```bash - nvidia-docker pull hub.baidubce.com/ctr/paddleserving:0.1.3-gpu + nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.1.3-gpu ``` 2. 基于Dockerfile构建镜像 @@ -105,13 +105,13 @@ GPU版本与CPU版本基本一致,只有部分接口命名的差别(GPU版 建立新目录,复制[Dockerfile.gpu](../tools/Dockerfile.gpu)内容到该目录下Dockerfile文件。执行 ```bash - nvidia-docker build -t hub.baidubce.com/ctr/paddleserving:0.1.3-gpu . + nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:0.1.3-gpu . ``` ### 创建容器并进入 ```bash -nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/ctr/paddleserving:0.1.3-gpu +nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.1.3-gpu nvidia-docker exec -it test bash ``` diff --git a/doc/SAVE.md b/doc/SAVE.md index d2cb82980017e900b6c5a8d0d939abd96281bc1c..59464a4e7c1931291d4a21b8d9d802a07dd22ec6 100644 --- a/doc/SAVE.md +++ b/doc/SAVE.md @@ -7,7 +7,7 @@ serving_io.save_model("imdb_model", "imdb_client_conf", fluid.default_main_program()) ``` `imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. Serving has a -dictionary for `Feed` and `Fetch` variables for client to assign. An alias name can be defined for each variable. An example of how to use alias name +dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name is as follows: ``` python from paddle_serving_client import Client diff --git a/doc/TRAIN_TO_SERVICE.md b/doc/TRAIN_TO_SERVICE.md index cf8ea5e8217078caed0448fd3f735469eaad1cba..11e64eebed84be9889f6e833511bdade897aeb23 100644 --- a/doc/TRAIN_TO_SERVICE.md +++ b/doc/TRAIN_TO_SERVICE.md @@ -1,4 +1,4 @@ -# 使用PaddleServing快速搭建预测服务 +# 端到端完成从训练到部署全流程 Paddle Serving是Paddle的高性能在线预测服务框架,可以灵活支持大多数模型的部署。本文中将以IMDB评论情感分析任务为例通过9步展示从模型的训练到部署预测服务的全流程。 diff --git a/doc/blank.png b/doc/blank.png new file mode 100644 index 0000000000000000000000000000000000000000..d132d5abf7c1298a100ef2f7401d39879e77396d Binary files /dev/null and b/doc/blank.png differ diff --git a/doc/coding_mode.png b/doc/coding_mode.png new file mode 100644 index 0000000000000000000000000000000000000000..617bc14166b60ec81609dd1c596a87d2a24347c9 Binary files /dev/null and b/doc/coding_mode.png differ diff --git a/doc/cube-cli.png b/doc/cube-cli.png new file mode 100644 index 0000000000000000000000000000000000000000..de85c0420fb50acc4c144f62fc96e7d226a8dcfd Binary files /dev/null and b/doc/cube-cli.png differ diff --git a/doc/cube.png b/doc/cube.png new file mode 100644 index 0000000000000000000000000000000000000000..b377d4aba2c57f0f3fd85bb7b6f0368095a4f6f0 Binary files /dev/null and b/doc/cube.png differ diff --git a/doc/cube_eng.png b/doc/cube_eng.png new file mode 100644 index 0000000000000000000000000000000000000000..4ba5d40ce626ecc36e7bd10cda298b6e5ed19131 Binary files /dev/null and b/doc/cube_eng.png differ diff --git a/doc/design_doc.png b/doc/design_doc.png new file mode 100644 index 0000000000000000000000000000000000000000..2fb68e511cf3d91fa291ad2ffc8e54d640772f28 Binary files /dev/null and b/doc/design_doc.png differ diff --git a/doc/user_groups.png b/doc/user_groups.png new file mode 100644 index 0000000000000000000000000000000000000000..455c015e8e4b5503e1f5bd96f67a0977aadf54eb Binary files /dev/null and b/doc/user_groups.png differ diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 855b006949e8b621260e7ed86c4a8c86e2332104..c1590fb1b36de669f89711f95c4d49aedadb0c91 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,11 +1,11 @@ -if (CLIENT_ONLY) +if (CLIENT) file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py) set(PY_FILES ${SERVING_CLIENT_PY_FILES}) SET(PACKAGE_NAME "serving_client") set(SETUP_LOG_FILE "setup.py.client.log") endif() -if (NOT CLIENT_ONLY) +if (SERVER) if (NOT WITH_GPU) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) else() @@ -16,12 +16,17 @@ if (NOT CLIENT_ONLY) set(SETUP_LOG_FILE "setup.py.server.log") endif() -if (CLIENT_ONLY) +if (CLIENT) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) endif() -if (NOT CLIENT_ONLY) +if (APP) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in + ${CMAKE_CURRENT_BINARY_DIR}/setup.py) +endif() + +if (SERVER) if (NOT WITH_GPU) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) @@ -34,7 +39,15 @@ endif() set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so) message("python env: " ${py_env}) -if (CLIENT_ONLY) +if (APP) +add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel) +add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) +endif() + +if (CLIENT) add_custom_command( OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/ @@ -44,7 +57,7 @@ add_custom_command( add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp) endif() -if (NOT CLIENT_ONLY) +if (SERVER) if(NOT WITH_GPU) add_custom_command( OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp @@ -66,20 +79,22 @@ endif() set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) set(SERVING_SERVER_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) -if (CLIENT_ONLY) +if (CLIENT) install(DIRECTORY ${SERVING_CLIENT_PYTHON_PACKAGE_DIR} DESTINATION opt/serving_client/share/wheels ) endif() -if (NOT CLIENT_ONLY) +if (SERVER) install(DIRECTORY ${SERVING_SERVER_PYTHON_PACKAGE_DIR} DESTINATION opt/serving_server/share/wheels ) endif() +if (CLIENT OR SERVER) find_program(PATCHELF_EXECUTABLE patchelf) -if(NOT PATCHELF_EXECUTABLE) +if (NOT PATCHELF_EXECUTABLE) message(FATAL_ERROR "patchelf not found, please install it.\n" "For Ubuntu, the command is: apt-get install -y patchelf.") endif() +endif() diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py index b4d13c7db6b3c32c7e8ccd75c33ce25a196e0ea8..e0f677146a47c0366a1bbafe9eff049e2671a617 100644 --- a/python/examples/bert/benchmark_batch.py +++ b/python/examples/bert/benchmark_batch.py @@ -41,13 +41,13 @@ def single_func(idx, resource): client = Client() client.load_client_config(args.model) client.connect([resource["endpoint"][idx % len(resource["endpoint"])]]) + feed_batch = [] + for bi in range(args.batch_size): + feed_batch.append(reader.process(dataset[bi])) start = time.time() for i in range(1000): if args.batch_size >= 1: - feed_batch = [] - for bi in range(args.batch_size): - feed_batch.append(reader.process(dataset[i])) result = client.batch_predict( feed_batch=feed_batch, fetch=fetch) else: @@ -61,7 +61,9 @@ def single_func(idx, resource): if __name__ == '__main__': multi_thread_runner = MultiThreadRunner() - endpoint_list = ["127.0.0.1:9292"] + endpoint_list = [ + "127.0.0.1:9295", "127.0.0.1:9296", "127.0.0.1:9297", "127.0.0.1:9298" + ] result = multi_thread_runner.run(single_func, args.thread, {"endpoint": endpoint_list}) avg_cost = 0 diff --git a/python/examples/bert/benchmark_batch.sh b/python/examples/bert/benchmark_batch.sh index 46ba451d0ade36c24151e260d5c9b3cc3666a548..272923776d6640880175745920a8fad9e84972fd 100644 --- a/python/examples/bert/benchmark_batch.sh +++ b/python/examples/bert/benchmark_batch.sh @@ -1,10 +1,17 @@ rm profile_log +export CUDA_VISIBLE_DEVICES=0,1,2,3 +python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & + +sleep 5 + for thread_num in 1 2 4 8 16 do for batch_size in 1 2 4 8 16 32 64 128 256 512 do $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1 echo "========================================" + echo "thread num: ", $thread_num + echo "batch size: ", $batch_size echo "batch size : $batch_size" >> profile_log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log tail -n 1 profile >> profile_log diff --git a/python/examples/bert/benchmark_with_profile.sh b/python/examples/bert/benchmark_with_profile.sh new file mode 100644 index 0000000000000000000000000000000000000000..8102e30d5c794d5e21d34e2f4ffd88a1af791b5e --- /dev/null +++ b/python/examples/bert/benchmark_with_profile.sh @@ -0,0 +1,10 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & +export FLAGS_profile_client=1 +export FLAGS_profile_server=1 +sleep 5 +thread_num=4 +python benchmark_batch.py --thread ${thread_num} --batch_size 64 --model serving_client_conf/serving_client_conf.prototxt 2> profile + +python show_profile.py profile ${thread_num} +python timeline_trace.py profile trace diff --git a/python/examples/criteo_ctr_with_cube/README.md b/python/examples/criteo_ctr_with_cube/README.md index 9c80f935454ca5b6c5de961c4f06c83ebca5a5b3..eed612f4043ff7fdb41538b2a425d98f0d045718 100755 --- a/python/examples/criteo_ctr_with_cube/README.md +++ b/python/examples/criteo_ctr_with_cube/README.md @@ -1,30 +1,35 @@ -## 带稀疏参数服务器的CTR预测服务 +## Criteo CTR with Sparse Parameter Indexing Service + +([简体中文](./README_CN.md)|English) + +### Get Sample Dataset -### 获取样例数据 ``` sh get_data.sh ``` -### 保存模型和配置文件 +### Train and Save Model ``` python local_train.py ``` -执行脚本后会在当前目录生成ctr_server_model和ctr_client_config文件夹,以及ctr_server_model_kv, ctr_client_conf_kv。 +the trained model will be in ./ctr_server_model and ./ctr_client_config, and ctr_server_model_kv, ctr_client_conf_kv。 -### 启动稀疏参数服务器 +### Start Sparse Parameter Indexing Service ``` cp ../../../build_server/core/predictor/seq_generator seq_generator cp ../../../build_server/output/bin/cube* ./cube/ sh cube_prepare.sh & ``` -### 启动RPC预测服务,服务端线程数为4(可在test_server.py配置) +Here, the sparse parameter is loaded by cube sparse parameter indexing service Cube,for more details please read [Cube: Sparse Parameter Indexing Service (Local Mode)](../../../doc/CUBE_LOCAL.md) + +### Start RPC Predictor, the number of serving thread is 4(configurable in test_server.py) ``` python test_server.py ctr_serving_model_kv ``` -### 执行预测 +### Run Prediction ``` python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data @@ -32,17 +37,17 @@ python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data ### Benchmark -设备 :Intel(R) Xeon(R) CPU 6148 @ 2.40GHz +CPU :Intel(R) Xeon(R) CPU 6148 @ 2.40GHz -模型 :[Criteo CTR](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/ctr_criteo_with_cube/network_conf.py) +Model :[Criteo CTR](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/ctr_criteo_with_cube/network_conf.py) server core/thread num : 4/8 -执行 +Run ``` bash benchmark.sh ``` -客户端每个线程会发送1000个batch +1000 batches will be sent by every client | client thread num | prepro | client infer | op0 | op1 | op2 | postpro | avg_latency | qps | | ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- | ----- | @@ -52,10 +57,10 @@ bash benchmark.sh | 8 | 0.044 | 8.230 | 0.028 | 0.464 | 0.0023 | 0.0034 | 14.191 | 563.8 | | 16 | 0.048 | 21.037 | 0.028 | 0.455 | 0.0025 | 0.0041 | 27.236 | 587.5 | -平均每个线程耗时图如下 +the average latency of threads ![avg cost](../../../doc/criteo-cube-benchmark-avgcost.png) -每个线程QPS耗时如下 +The QPS is ![qps](../../../doc/criteo-cube-benchmark-qps.png) diff --git a/python/examples/criteo_ctr_with_cube/README_CN.md b/python/examples/criteo_ctr_with_cube/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..868e8bce5a624904d532bf956fd5868abc0a1c52 --- /dev/null +++ b/python/examples/criteo_ctr_with_cube/README_CN.md @@ -0,0 +1,64 @@ +## 带稀疏参数索引服务的CTR预测服务 +(简体中文|[English](./README.md)) + +### 获取样例数据 +``` +sh get_data.sh +``` + +### 保存模型和配置文件 +``` +python local_train.py +``` +执行脚本后会在当前目录生成ctr_server_model和ctr_client_config文件夹,以及ctr_server_model_kv, ctr_client_conf_kv。 + +### 启动稀疏参数索引服务 +``` +cp ../../../build_server/core/predictor/seq_generator seq_generator +cp ../../../build_server/output/bin/cube* ./cube/ +sh cube_prepare.sh & +``` + +此处,模型当中的稀疏参数会被存放在稀疏参数索引服务Cube当中,关于稀疏参数索引服务Cube的介绍,请阅读[稀疏参数索引服务Cube单机版使用指南](../../../doc/CUBE_LOCAL_CN.md) + +### 启动RPC预测服务,服务端线程数为4(可在test_server.py配置) + +``` +python test_server.py ctr_serving_model_kv +``` + +### 执行预测 + +``` +python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data +``` + +### Benchmark + +设备 :Intel(R) Xeon(R) CPU 6148 @ 2.40GHz + +模型 :[Criteo CTR](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/ctr_criteo_with_cube/network_conf.py) + +server core/thread num : 4/8 + +执行 +``` +bash benchmark.sh +``` +客户端每个线程会发送1000个batch + +| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | avg_latency | qps | +| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- | ----- | +| 1 | 0.035 | 1.596 | 0.021 | 0.518 | 0.0024 | 0.0025 | 6.774 | 147.7 | +| 2 | 0.034 | 1.780 | 0.027 | 0.463 | 0.0020 | 0.0023 | 6.931 | 288.3 | +| 4 | 0.038 | 2.954 | 0.025 | 0.455 | 0.0019 | 0.0027 | 8.378 | 477.5 | +| 8 | 0.044 | 8.230 | 0.028 | 0.464 | 0.0023 | 0.0034 | 14.191 | 563.8 | +| 16 | 0.048 | 21.037 | 0.028 | 0.455 | 0.0025 | 0.0041 | 27.236 | 587.5 | + +平均每个线程耗时图如下 + +![avg cost](../../../doc/criteo-cube-benchmark-avgcost.png) + +每个线程QPS耗时如下 + +![qps](../../../doc/criteo-cube-benchmark-qps.png) diff --git a/python/examples/imagenet/image_classification_service.py b/python/examples/imagenet/image_classification_service.py index c78ae1d252d8fbc15acf68dbb7e68f443e9b6186..2776eb1bc7126fab32dbb05774fb0060506b61af 100644 --- a/python/examples/imagenet/image_classification_service.py +++ b/python/examples/imagenet/image_classification_service.py @@ -25,11 +25,21 @@ class ImageService(WebService): reader = ImageReader() if "image" not in feed: raise ("feed data error!") - sample = base64.b64decode(feed["image"]) - img = reader.process_image(sample) - res_feed = {} - res_feed["image"] = img.reshape(-1) - return res_feed, fetch + if isinstance(feed["image"], list): + feed_batch = [] + for image in feed["image"]: + sample = base64.b64decode(image) + img = reader.process_image(sample) + res_feed = {} + res_feed["image"] = img.reshape(-1) + feed_batch.append(res_feed) + return feed_batch, fetch + else: + sample = base64.b64decode(feed["image"]) + img = reader.process_image(sample) + res_feed = {} + res_feed["image"] = img.reshape(-1) + return res_feed, fetch image_service = ImageService(name="image") diff --git a/python/examples/imagenet/image_classification_service_gpu.py b/python/examples/imagenet/image_classification_service_gpu.py index 8a0bea938638c57a609a604181420929c4a9ca59..287392e4f3ea922686cb03a032ba0b8e13d39709 100644 --- a/python/examples/imagenet/image_classification_service_gpu.py +++ b/python/examples/imagenet/image_classification_service_gpu.py @@ -25,16 +25,27 @@ class ImageService(WebService): reader = ImageReader() if "image" not in feed: raise ("feed data error!") - sample = base64.b64decode(feed["image"]) - img = reader.process_image(sample) - res_feed = {} - res_feed["image"] = img.reshape(-1) - return res_feed, fetch + print(type(feed["image"]), isinstance(feed["image"], list)) + if isinstance(feed["image"], list): + feed_batch = [] + for image in feed["image"]: + sample = base64.b64decode(image) + img = reader.process_image(sample) + res_feed = {} + res_feed["image"] = img.reshape(-1) + feed_batch.append(res_feed) + return feed_batch, fetch + else: + sample = base64.b64decode(feed["image"]) + img = reader.process_image(sample) + res_feed = {} + res_feed["image"] = img.reshape(-1) + return res_feed, fetch image_service = ImageService(name="image") image_service.load_model_config(sys.argv[1]) -image_service.set_gpus("0,1,2,3") +image_service.set_gpus("0,1") image_service.prepare_server( workdir=sys.argv[2], port=int(sys.argv[3]), device="gpu") image_service.run_server() diff --git a/python/examples/imagenet/image_http_client.py b/python/examples/imagenet/image_http_client.py index b61f0dd7d8d5ed25ecc828b5d0882ba11a116019..c567b9003bfe87f9ddd20c3553b9e2d400bce4b9 100644 --- a/python/examples/imagenet/image_http_client.py +++ b/python/examples/imagenet/image_http_client.py @@ -24,17 +24,26 @@ def predict(image_path, server): req = json.dumps({"image": image, "fetch": ["score"]}) r = requests.post( server, data=req, headers={"Content-Type": "application/json"}) + print(r.json()["score"][0]) + return r + + +def batch_predict(image_path, server): + image = base64.b64encode(open(image_path).read()) + req = json.dumps({"image": [image, image], "fetch": ["score"]}) + r = requests.post( + server, data=req, headers={"Content-Type": "application/json"}) + print(r.json()["result"][1]["score"][0]) return r if __name__ == "__main__": - server = "http://127.0.0.1:9295/image/prediction" + server = "http://127.0.0.1:9393/image/prediction" #image_path = "./data/n01440764_10026.JPEG" - image_list = os.listdir("./data/image_data/n01440764/") + image_list = os.listdir("./image_data/n01440764/") start = time.time() for img in image_list: - image_file = "./data/image_data/n01440764/" + img + image_file = "./image_data/n01440764/" + img res = predict(image_file, server) - print(res.json()["score"][0]) end = time.time() print(end - start) diff --git a/python/paddle_serving_app/__init__.py b/python/paddle_serving_app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..968e5582cc286455d5200e154033087b71ac86de --- /dev/null +++ b/python/paddle_serving_app/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .reader.chinese_bert_reader import ChineseBertReader diff --git a/python/paddle_serving_app/reader/__init__.py b/python/paddle_serving_app/reader/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..847ddc47ac89114f2012bc6b9990a69abfe39fb3 --- /dev/null +++ b/python/paddle_serving_app/reader/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle_serving_app/reader/batching.py b/python/paddle_serving_app/reader/batching.py new file mode 100644 index 0000000000000000000000000000000000000000..5ec5f320cf5ec7bd0ab4624d9b39ef936553c774 --- /dev/null +++ b/python/paddle_serving_app/reader/batching.py @@ -0,0 +1,126 @@ +#coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mask, padding and batching.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + + +def prepare_batch_data(insts, + total_token_num, + max_seq_len=128, + pad_id=None, + cls_id=None, + sep_id=None, + mask_id=None, + return_input_mask=True, + return_max_len=True, + return_num_token=False): + """ + 1. generate Tensor of data + 2. generate Tensor of position + 3. generate self attention mask, [shape: batch_size * max_len * max_len] + """ + + batch_src_ids = [inst[0] for inst in insts] + batch_sent_ids = [inst[1] for inst in insts] + batch_pos_ids = [inst[2] for inst in insts] + labels_list = [] + # compatible with squad, whose example includes start/end positions, + # or unique id + + for i in range(3, len(insts[0]), 1): + labels = [inst[i] for inst in insts] + labels = np.array(labels).astype("int64").reshape([-1, 1]) + labels_list.append(labels) + + out = batch_src_ids + # Second step: padding + src_id, self_input_mask = pad_batch_data( + out, pad_idx=pad_id, max_seq_len=max_seq_len, return_input_mask=True) + pos_id = pad_batch_data( + batch_pos_ids, + pad_idx=pad_id, + max_seq_len=max_seq_len, + return_pos=False, + return_input_mask=False) + sent_id = pad_batch_data( + batch_sent_ids, + pad_idx=pad_id, + max_seq_len=max_seq_len, + return_pos=False, + return_input_mask=False) + + return_list = [src_id, pos_id, sent_id, self_input_mask] + labels_list + + return return_list if len(return_list) > 1 else return_list[0] + + +def pad_batch_data(insts, + pad_idx=0, + max_seq_len=128, + return_pos=False, + return_input_mask=False, + return_max_len=False, + return_num_token=False, + return_seq_lens=False): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and input mask. + """ + return_list = [] + #max_len = max(len(inst) for inst in insts) + max_len = max_seq_len + # Any token included in dict can be used to pad, since the paddings' loss + # will be masked out by weights and make no effect on parameter gradients. + + inst_data = np.array([ + list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts + ]) + return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] + + # position data + if return_pos: + inst_pos = np.array([ + list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst)) + for inst in insts + ]) + + return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] + + if return_input_mask: + # This is used to avoid attention on paddings. + input_mask_data = np.array( + [[1] * len(inst) + [0] * (max_len - len(inst)) for inst in insts]) + input_mask_data = np.expand_dims(input_mask_data, axis=-1) + return_list += [input_mask_data.astype("float32")] + + if return_max_len: + return_list += [max_len] + + if return_num_token: + num_token = 0 + for inst in insts: + num_token += len(inst) + return_list += [num_token] + + if return_seq_lens: + seq_lens = np.array([len(inst) for inst in insts]) + return_list += [seq_lens.astype("int64").reshape([-1, 1])] + + return return_list if len(return_list) > 1 else return_list[0] diff --git a/python/paddle_serving_app/reader/bert_base_reader.py b/python/paddle_serving_app/reader/bert_base_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..9888dbe82675f5353b881ba528acf8b3af504ddb --- /dev/null +++ b/python/paddle_serving_app/reader/bert_base_reader.py @@ -0,0 +1,24 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .reader import ReaderBase + + +class BertBaseReader(ReaderBase): + def __init__(self): + super(BertBaseReader, self).__init__() + pass + + def process(self, line): + super(BertBaseReader, self).process(line) + pass diff --git a/python/paddle_serving_app/reader/chinese_bert_reader.py b/python/paddle_serving_app/reader/chinese_bert_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..6c884c2aa8f9efa07bb1f13d17709a78921989ca --- /dev/null +++ b/python/paddle_serving_app/reader/chinese_bert_reader.py @@ -0,0 +1,128 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# coding=utf-8 +from .bert_base_reader import BertBaseReader +from .batching import pad_batch_data +from .tokenization import FullTokenizer, convert_to_unicode + + +class ChineseBertReader(BertBaseReader): + """ + ChineseBertReader handles the most traditional Chinese Bert + preprocessing, a user can define the vocab file through initialization + + Examples: + from paddle_serving_app import ChineseBertReader + + line = ["this is China"] + reader = ChineseBertReader() + reader.process(line[0]) + + """ + + def __init__(self, args={}): + super(ChineseBertReader, self).__init__() + vocab_file = "" + if "vocab_file" in args: + vocab_file = args["vocab_file"] + else: + vocab_file = self._download_or_not() + + self.tokenizer = FullTokenizer(vocab_file=vocab_file) + if "max_seq_len" in args: + self.max_seq_len = args["max_seq_len"] + else: + self.max_seq_len = 20 + self.vocab = self.tokenizer.vocab + self.pad_id = self.vocab["[PAD]"] + self.cls_id = self.vocab["[CLS]"] + self.sep_id = self.vocab["[SEP]"] + self.mask_id = self.vocab["[MASK]"] + self.feed_keys = [ + "input_ids", "position_ids", "segment_ids", "input_mask" + ] + + """ + inner function + """ + + def _download_or_not(self): + import os + import paddle_serving_app + module_path = os.path.dirname(paddle_serving_app.__file__) + full_path = "{}/tmp/chinese_bert".format(module_path) + os.system("mkdir -p {}".format(full_path)) + if os.path.exists("{}/vocab.txt".format(full_path)): + pass + else: + url = "https://paddle-serving.bj.bcebos.com/reader/chinese_bert/vocab.txt" + r = os.system("wget --no-check-certificate " + url) + os.system("mv vocab.txt {}".format(full_path)) + if r != 0: + raise SystemExit('Download failed, please check your network') + return "{}/vocab.txt".format(full_path) + + """ + inner function + """ + + def _pad_batch(self, token_ids, text_type_ids, position_ids): + batch_token_ids = [token_ids] + batch_text_type_ids = [text_type_ids] + batch_position_ids = [position_ids] + + padded_token_ids, input_mask = pad_batch_data( + batch_token_ids, + max_seq_len=self.max_seq_len, + pad_idx=self.pad_id, + return_input_mask=True) + padded_text_type_ids = pad_batch_data( + batch_text_type_ids, + max_seq_len=self.max_seq_len, + pad_idx=self.pad_id) + padded_position_ids = pad_batch_data( + batch_position_ids, + max_seq_len=self.max_seq_len, + pad_idx=self.pad_id) + return padded_token_ids, padded_position_ids, padded_text_type_ids, input_mask + + """ + process function deals with a raw Chinese string as a sentence + this funtion returns a feed_dict + default key of the returned feed_dict: input_ids, position_ids, segment_ids, input_mask + """ + + def process(self, line): + text_a = convert_to_unicode(line) + tokens_a = self.tokenizer.tokenize(text_a) + if len(tokens_a) > self.max_seq_len - 2: + tokens_a = tokens_a[0:(self.max_seq_len - 2)] + tokens = [] + text_type_ids = [] + tokens.append("[CLS]") + text_type_ids.append(0) + for token in tokens_a: + tokens.append(token) + text_type_ids.append(0) + token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + position_ids = list(range(len(token_ids))) + p_token_ids, p_pos_ids, p_text_type_ids, input_mask = \ + self._pad_batch(token_ids, text_type_ids, position_ids) + feed_result = { + self.feed_keys[0]: p_token_ids.reshape(-1).tolist(), + self.feed_keys[1]: p_pos_ids.reshape(-1).tolist(), + self.feed_keys[2]: p_text_type_ids.reshape(-1).tolist(), + self.feed_keys[3]: input_mask.reshape(-1).tolist() + } + return feed_result diff --git a/python/paddle_serving_app/reader/reader.py b/python/paddle_serving_app/reader/reader.py new file mode 100644 index 0000000000000000000000000000000000000000..0a0fa97b02abd5e8952f8d900e39ca7e30ec5028 --- /dev/null +++ b/python/paddle_serving_app/reader/reader.py @@ -0,0 +1,24 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class ReaderBase(object): + def __init__(self): + self.feed_keys = [] + + def set_feed_keys(self, keys): + self.feed_keys = keys + + def get_feed_keys(self): + return self.feed_keys diff --git a/python/paddle_serving_app/reader/tokenization.py b/python/paddle_serving_app/reader/tokenization.py new file mode 100644 index 0000000000000000000000000000000000000000..0d84ed38468207e853e5270a59179b4274900cb0 --- /dev/null +++ b/python/paddle_serving_app/reader/tokenization.py @@ -0,0 +1,441 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import io +import unicodedata +import six +import sentencepiece as spm +import pickle + + +def convert_to_unicode(text): # pylint: disable=doc-string-with-all-args + """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text.decode("utf-8", "ignore") + elif isinstance(text, unicode): # noqa + return text + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def printable_text(text): # pylint: disable=doc-string-with-all-args + """Returns text encoded in a way suitable for print or `tf.logging`.""" + + # These functions want `str` for both Python2 and Python3, but in one case + # it's a Unicode string and in the other it's a byte string. + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text + elif isinstance(text, unicode): # noqa + return text.encode("utf-8") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def load_vocab(vocab_file): # pylint: disable=doc-string-with-all-args, doc-string-with-returns + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + fin = io.open(vocab_file, "r", encoding="UTF-8") + for num, line in enumerate(fin): + items = convert_to_unicode(line.strip()).split("\t") + if len(items) > 2: + break + token = items[0] + index = items[1] if len(items) == 2 else num + token = token.strip() + vocab[token] = int(index) + fin.close() + return vocab + + +def convert_by_vocab(vocab, items): + """Converts a sequence of [tokens|ids] using the vocab.""" + output = [] + for item in items: + output.append(vocab[item]) + return output + + +def convert_tokens_to_ids(vocab, tokens): + return convert_by_vocab(vocab, tokens) + + +def convert_ids_to_tokens(inv_vocab, ids): + return convert_by_vocab(inv_vocab, ids) + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a peice of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class FullTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, + vocab_file, + do_lower_case=True, + use_sentence_piece_vocab=False): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + self.use_sentence_piece_vocab = use_sentence_piece_vocab + self.wordpiece_tokenizer = WordpieceTokenizer( + vocab=self.vocab, + use_sentence_piece_vocab=self.use_sentence_piece_vocab) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class CharTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, vocab_file, do_lower_case=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + split_tokens = [] + for token in text.lower().split(" "): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class WSSPTokenizer(object): # pylint: disable=doc-string-missing + def __init__(self, vocab_file, sp_model_dir, word_dict, ws=True, + lower=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.ws = ws + self.lower = lower + self.dict = pickle.load(open(word_dict, 'rb')) + self.sp_model = spm.SentencePieceProcessor() + self.window_size = 5 + self.sp_model.Load(sp_model_dir) + + def cut(self, chars): # pylint: disable=doc-string-missing + words = [] + idx = 0 + while idx < len(chars): + matched = False + for i in range(self.window_size, 0, -1): + cand = chars[idx:idx + i] + if cand in self.dict: + words.append(cand) + matched = True + break + if not matched: + i = 1 + words.append(chars[idx]) + idx += i + return words + + def tokenize(self, text, unk_token="[UNK]"): # pylint: disable=doc-string-missing + text = convert_to_unicode(text) + if self.ws: + text = [s for s in self.cut(text) if s != ' '] + else: + text = text.split(' ') + if self.lower: + text = [s.lower() for s in text] + text = ' '.join(text) + tokens = self.sp_model.EncodeAsPieces(text) + in_vocab_tokens = [] + for token in tokens: + if token in self.vocab: + in_vocab_tokens.append(token) + else: + in_vocab_tokens.append(unk_token) + return in_vocab_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + + def tokenize(self, text): # pylint: disable=doc-string-with-all-args, doc-string-with-returns + """Tokenizes a piece of text.""" + text = convert_to_unicode(text) + text = self._clean_text(text) + + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Runs WordPiece tokenziation.""" + + def __init__(self, + vocab, + unk_token="[UNK]", + max_input_chars_per_word=100, + use_sentence_piece_vocab=False): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + self.use_sentence_piece_vocab = use_sentence_piece_vocab + + def tokenize(self, text): # pylint: disable=doc-string-with-all-args + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer. + + Returns: + A list of wordpiece tokens. + """ + + text = convert_to_unicode(text) + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0 and self.use_sentence_piece_vocab: + substr = u'\u2581' + substr + if start > 0 and not self.use_sentence_piece_vocab: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/python/paddle_serving_app/version.py b/python/paddle_serving_app/version.py new file mode 100644 index 0000000000000000000000000000000000000000..80f647be56d09740adfb9d68dd47bb0b1fa2c985 --- /dev/null +++ b/python/paddle_serving_app/version.py @@ -0,0 +1,15 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Paddle Serving App version string """ +serving_app_version = "0.0.1" diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index ce0eb8c83d1eabb79e0e51608c9b2e906faa4c70..d70e4374c4169d5f34ef4b3565fce2a993290709 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -79,6 +79,8 @@ class Client(object): self.feed_names_to_idx_ = {} self.rpath() self.pid = os.getpid() + self.producers = [] + self.consumer = None def rpath(self): lib_path = os.path.dirname(paddle_serving_client.__file__) @@ -137,7 +139,6 @@ class Client(object): predictor_sdk = SDKConfig() predictor_sdk.set_server_endpoints(endpoints) sdk_desc = predictor_sdk.gen_desc() - print(sdk_desc) self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString( )) @@ -155,44 +156,26 @@ class Client(object): raise SystemExit("The shape of feed tensor {} not match.".format( key)) - def predict(self, feed={}, fetch=[]): - int_slot = [] - float_slot = [] - int_feed_names = [] - float_feed_names = [] - fetch_names = [] - - for key in feed: - self.shape_check(feed, key) - if key not in self.feed_names_: - continue - if self.feed_types_[key] == int_type: - int_feed_names.append(key) - int_slot.append(feed[key]) - elif self.feed_types_[key] == float_type: - float_feed_names.append(key) - float_slot.append(feed[key]) - - for key in fetch: - if key in self.fetch_names_: - fetch_names.append(key) + def predict(self, feed=None, fetch=None): + if feed is None or fetch is None: + raise ValueError("You should specify feed and fetch for prediction") + + fetch_list = [] + if isinstance(fetch, str): + fetch_list = [fetch] + elif isinstance(fetch, list): + fetch_list = fetch + else: + raise ValueError("fetch only accepts string and list of string") + + feed_batch = [] + if isinstance(feed, dict): + feed_batch.append(feed) + elif isinstance(feed, list): + feed_batch = feed + else: + raise ValueError("feed only accepts dict and list of dict") - ret = self.client_handle_.predict(float_slot, float_feed_names, - int_slot, int_feed_names, fetch_names, - self.result_handle_, self.pid) - - result_map = {} - for i, name in enumerate(fetch_names): - if self.fetch_names_to_type_[name] == int_type: - result_map[name] = self.result_handle_.get_int64_by_name(name)[ - 0] - elif self.fetch_names_to_type_[name] == float_type: - result_map[name] = self.result_handle_.get_float_by_name(name)[ - 0] - - return result_map - - def batch_predict(self, feed_batch=[], fetch=[]): int_slot_batch = [] float_slot_batch = [] int_feed_names = [] @@ -200,28 +183,33 @@ class Client(object): fetch_names = [] counter = 0 batch_size = len(feed_batch) - for feed in feed_batch: + + for key in fetch_list: + if key in self.fetch_names_: + fetch_names.append(key) + + if len(fetch_names) == 0: + raise ValueError( + "fetch names should not be empty or out of saved fetch list") + return {} + + for i, feed_i in enumerate(feed_batch): int_slot = [] float_slot = [] - for key in feed: + for key in feed_i: if key not in self.feed_names_: continue if self.feed_types_[key] == int_type: - if counter == 0: + if i == 0: int_feed_names.append(key) int_slot.append(feed[key]) elif self.feed_types_[key] == float_type: - if counter == 0: + if i == 0: float_feed_names.append(key) - float_slot.append(feed[key]) - counter += 1 + float_slot.append(feed_i[key]) int_slot_batch.append(int_slot) float_slot_batch.append(float_slot) - for key in fetch: - if key in self.fetch_names_: - fetch_names.append(key) - result_batch = self.result_handle_ res = self.client_handle_.batch_predict( float_slot_batch, float_feed_names, int_slot_batch, int_feed_names, @@ -240,7 +228,10 @@ class Client(object): single_result[key] = result_map[key][i] result_map_batch.append(single_result) - return result_map_batch + if batch_size == 1: + return result_map_batch[0] + else: + return result_map_batch def release(self): self.client_handle_.destroy_predictor() diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index 71614129d0b8ae923de71d4aa7c30efbe3a8c86a..298e65e73c50241a20bbc319199afa30ac9c978b 100755 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -64,12 +64,19 @@ class WebService(object): if "fetch" not in request.json: abort(400) feed, fetch = self.preprocess(request.json, request.json["fetch"]) - if "fetch" in feed: - del feed["fetch"] - fetch_map = client_service.predict(feed=feed, fetch=fetch) - fetch_map = self.postprocess( - feed=request.json, fetch=fetch, fetch_map=fetch_map) - return fetch_map + if isinstance(feed, list): + fetch_map_batch = client_service.batch_predict( + feed_batch=feed, fetch=fetch) + fetch_map_batch = self.postprocess( + feed=request.json, fetch=fetch, fetch_map=fetch_map_batch) + result = {"result": fetch_map_batch} + elif isinstance(feed, dict): + if "fetch" in feed: + del feed["fetch"] + fetch_map = client_service.predict(feed=feed, fetch=fetch) + result = self.postprocess( + feed=request.json, fetch=fetch, fetch_map=fetch_map) + return result app_instance.run(host="0.0.0.0", port=self.port, @@ -92,5 +99,5 @@ class WebService(object): def preprocess(self, feed={}, fetch=[]): return feed, fetch - def postprocess(self, feed={}, fetch=[], fetch_map={}): + def postprocess(self, feed={}, fetch=[], fetch_map=None): return fetch_map diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 5d9d96d517d64b21313fda0b44a83b34142b014b..9c8d10e4b36a7830aed25996a309cb4163ca126c 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -23,14 +23,14 @@ from multiprocessing import Pool, Process from paddle_serving_server_gpu import serve_args -def start_gpu_card_model(gpuid, args): # pylint: disable=doc-string-missing +def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-missing gpuid = int(gpuid) device = "gpu" port = args.port if gpuid == -1: device = "cpu" elif gpuid >= 0: - port = args.port + gpuid + port = args.port + index thread_num = args.thread model = args.model workdir = "{}_{}".format(args.workdir, gpuid) @@ -78,6 +78,7 @@ def start_multi_card(args): # pylint: disable=doc-string-missing p = Process( target=start_gpu_card_model, args=( i, + gpu_id, args, )) gpu_processes.append(p) for p in gpu_processes: @@ -91,15 +92,15 @@ if __name__ == "__main__": if args.name == "None": start_multi_card(args) else: + from .web_service import WebService web_service = WebService(name=args.name) web_service.load_model_config(args.model) - gpu_ids = [] - if args.gpu_ids == "": + gpu_ids = args.gpu_ids + if gpu_ids == "": if "CUDA_VISIBLE_DEVICES" in os.environ: gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"] if len(gpu_ids) > 0: - gpus = [int(x) for x in gpu_ids.split(",")] - web_service.set_gpus(gpus) + web_service.set_gpus(gpu_ids) web_service.prepare_server( workdir=args.workdir, port=args.port, device=args.device) web_service.run_server() diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index 4d88994cc6094488aaf71ff3e37a74acc93579c4..22b534ddf8b8bc017685f4bf3ac67759d030bafc 100755 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -95,12 +95,20 @@ class WebService(object): while True: request_json = inputqueue.get() feed, fetch = self.preprocess(request_json, request_json["fetch"]) - if "fetch" in feed: - del feed["fetch"] - fetch_map = client.predict(feed=feed, fetch=fetch) - fetch_map = self.postprocess( - feed=request_json, fetch=fetch, fetch_map=fetch_map) - self.output_queue.put(fetch_map) + if isinstance(feed, list): + fetch_map_batch = client.batch_predict( + feed_batch=feed, fetch=fetch) + fetch_map_batch = self.postprocess( + feed=request_json, fetch=fetch, fetch_map=fetch_map_batch) + result = {"result": fetch_map_batch} + elif isinstance(feed, dict): + if "fetch" in feed: + del feed["fetch"] + fetch_map = client.predict(feed=feed, fetch=fetch) + result = self.postprocess( + feed=request_json, fetch=fetch, fetch_map=fetch_map) + + self.output_queue.put(result) def _launch_web_service(self, gpu_num): app_instance = Flask(__name__) @@ -186,5 +194,5 @@ class WebService(object): def preprocess(self, feed={}, fetch=[]): return feed, fetch - def postprocess(self, feed={}, fetch=[], fetch_map={}): + def postprocess(self, feed={}, fetch=[], fetch_map=None): return fetch_map diff --git a/tools/Dockerfile.gpu b/tools/Dockerfile.gpu index 427ae83bcb805ec70c1e6d575e84234f17e9fb30..091f4a546b549a3dd53645e78ab49b1cd46bf5b3 100644 --- a/tools/Dockerfile.gpu +++ b/tools/Dockerfile.gpu @@ -10,6 +10,6 @@ RUN yum -y install wget && \ curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ python get-pip.py && rm get-pip.py && \ ln -s /usr/local/cuda-9.0/lib64/libcublas.so.9.0 /usr/local/cuda-9.0/lib64/libcublas.so && \ - echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64':$LD_LIBRARY_PATH >> /root/.bashrc && \ + echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> /root/.bashrc && \ ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so && \ echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc diff --git a/tools/Dockerfile.gpu.devel b/tools/Dockerfile.gpu.devel new file mode 100644 index 0000000000000000000000000000000000000000..a2233908dbcff4f2f2bbd3edad24b83cb5252e16 --- /dev/null +++ b/tools/Dockerfile.gpu.devel @@ -0,0 +1,23 @@ +FROM nvidia/cuda:9.0-cudnn7-devel-centos7 + +RUN yum -y install wget >/dev/null \ + && yum -y install gcc gcc-c++ make glibc-static which >/dev/null \ + && yum -y install git openssl-devel curl-devel bzip2-devel python-devel >/dev/null \ + && wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \ + && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \ + && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \ + && echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \ + && rm cmake-3.2.0-Linux-x86_64.tar.gz \ + && wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ + && tar xzf go1.14.linux-amd64.tar.gz \ + && mv go /usr/local/go \ + && echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \ + && echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \ + && rm go1.14.linux-amd64.tar.gz \ + && yum -y install python-devel sqlite-devel >/dev/null \ + && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ + && python get-pip.py >/dev/null \ + && pip install google protobuf setuptools wheel flask >/dev/null \ + && rm get-pip.py \ + && yum -y install epel-release && yum -y install patchelf \ + && yum clean all diff --git a/tools/serving_build.sh b/tools/serving_build.sh index dd6a3f6da8b3e40f2e379cb9457c4e5f00bc900c..93c11012108fbc8ed32503e96ff1422e0844c041 100644 --- a/tools/serving_build.sh +++ b/tools/serving_build.sh @@ -1,12 +1,23 @@ #!/usr/bin/env bash +function unsetproxy() { + HTTP_PROXY_TEMP=$http_proxy + HTTPS_PROXY_TEMP=$https_proxy + unset http_proxy + unset https_proxy +} + +function setproxy() { + export http_proxy=$HTTP_PROXY_TEMP + export https_proxy=$HTTPS_PROXY_TEMP +} + function init() { source /root/.bashrc set -v - #export http_proxy=http://172.19.56.199:3128 - #export https_proxy=http://172.19.56.199:3128 export PYTHONROOT=/usr cd Serving + export SERVING_WORKDIR=$PWD } function check_cmd() { @@ -16,18 +27,40 @@ function check_cmd() { fi } +function rerun() { + if [ $# -ne 2 ]; then + echo "usage: rerun command rerun-times" + exit 1 + fi + local command=$1 + local times=$2 + for((i=1;i<=${times};i++)) + do + if [ ${i} != 1 ]; then + echo "${i}-th run command: ${command}..." + fi + eval $command + if [ $? -eq 0 ]; then + return 0 + fi + echo "${i}-th run(command: ${command}) failed." + done + exit 1 +} + function build_client() { local TYPE=$1 local DIRNAME=build-client-$TYPE - mkdir $DIRNAME && cd $DIRNAME + mkdir $DIRNAME # pwd: /Serving + cd $DIRNAME # pwd: /Serving/build-client-$TYPE case $TYPE in CPU|GPU) cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ - -DCLIENT_ONLY=ON .. - check_cmd "make -j2 >/dev/null" - pip install python/dist/paddle_serving_client* >/dev/null + -DCLIENT=ON .. + rerun "make -j2 >/dev/null" 3 # due to some network reasons, compilation may fail + pip install -U python/dist/paddle_serving_client* >/dev/null ;; *) echo "error type" @@ -35,31 +68,34 @@ function build_client() { ;; esac echo "build client $TYPE part finished as expected." - cd .. - rm -rf $DIRNAME + cd .. # pwd: /Serving + # rm -rf $DIRNAME } function build_server() { local TYPE=$1 local DIRNAME=build-server-$TYPE - mkdir $DIRNAME && cd $DIRNAME + mkdir $DIRNAME # pwd: /Serving + cd $DIRNAME # pwd: /Serving/build-server-$TYPE case $TYPE in CPU) cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ - -DCLIENT_ONLY=OFF .. - check_cmd "make -j2 >/dev/null && make install -j2 >/dev/null" - pip install python/dist/paddle_serving_server* >/dev/null + -DSERVER=ON .. + rerun "make -j2 >/dev/null" 3 # due to some network reasons, compilation may fail + check_cmd "make install -j2 >/dev/null" + pip install -U python/dist/paddle_serving_server* >/dev/null ;; GPU) cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ - -DCLIENT_ONLY=OFF \ + -DSERVER=ON \ -DWITH_GPU=ON .. - check_cmd "make -j2 >/dev/null && make install -j2 >/dev/null" - pip install python/dist/paddle_serving_server* >/dev/null + rerun "make -j2 >/dev/null" 3 # due to some network reasons, compilation may fail + check_cmd "make install -j2 >/dev/null" + pip install -U python/dist/paddle_serving_server* >/dev/null ;; *) echo "error type" @@ -67,30 +103,63 @@ function build_server() { ;; esac echo "build server $TYPE part finished as expected." - cd .. + cd .. # pwd: /Serving + # rm -rf $DIRNAME for export SERVING_BIN } +function kill_server_process() { + ps -ef | grep "serving" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill +} + + function python_test_fit_a_line() { - cd fit_a_line + # pwd: /Serving/python/examples + cd fit_a_line # pwd: /Serving/python/examples/fit_a_line sh get_data.sh local TYPE=$1 - echo $TYPE + export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving case $TYPE in CPU) # test rpc - check_cmd "python test_server.py uci_housing_model/ > /dev/null &" - sleep 5 + check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --port 9393 --thread 4 > /dev/null &" + sleep 5 # wait for the server to start check_cmd "python test_client.py uci_housing_client/serving_client_conf.prototxt > /dev/null" - ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill + kill_server_process + # test web - check_cmd "python -m paddle_serving_server.serve --model uci_housing_model/ --name uci --port 9399 --name uci > /dev/null &" - sleep 5 - check_cmd "curl -H \"Content-Type:application/json\" -X POST -d '{\"x\": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], \"fetch\":[\"price\"]}' http://127.0.0.1:9399/uci/prediction" - ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill + unsetproxy # maybe the proxy is used on iPipe, which makes web-test failed. + check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --name uci --port 9393 --thread 4 --name uci > /dev/null &" + sleep 5 # wait for the server to start + check_cmd "curl -H \"Content-Type:application/json\" -X POST -d '{\"x\": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], \"fetch\":[\"price\"]}' http://127.0.0.1:9393/uci/prediction" + # check http code + http_code=`curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' -s -w "%{http_code}" -o /dev/null http://127.0.0.1:9393/uci/prediction` + setproxy # recover proxy state + kill_server_process + if [ ${http_code} -ne 200 ]; then + echo "HTTP status code -ne 200" + exit 1 + fi ;; GPU) - echo "not support yet" - exit 1 + # test rpc + check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9393 --thread 4 --gpu_ids 0 > /dev/null &" + sleep 5 # wait for the server to start + check_cmd "python test_client.py uci_housing_client/serving_client_conf.prototxt > /dev/null" + kill_server_process + + # test web + unsetproxy # maybe the proxy is used on iPipe, which makes web-test failed. + check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9393 --thread 2 --gpu_ids 0 --name uci > /dev/null &" + sleep 5 # wait for the server to start + check_cmd "curl -H \"Content-Type:application/json\" -X POST -d '{\"x\": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], \"fetch\":[\"price\"]}' http://127.0.0.1:9393/uci/prediction" + # check http code + http_code=`curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' -s -w "%{http_code}" -o /dev/null http://127.0.0.1:9393/uci/prediction` + setproxy # recover proxy state + kill_server_process + if [ ${http_code} -ne 200 ]; then + echo "HTTP status code -ne 200" + exit 1 + fi ;; *) echo "error type" @@ -99,57 +168,69 @@ function python_test_fit_a_line() { esac echo "test fit_a_line $TYPE part finished as expected." rm -rf image kvdb log uci_housing* work* - cd .. + unset SERVING_BIN + cd .. # pwd: /Serving/python/examples } function python_run_criteo_ctr_with_cube() { + # pwd: /Serving/python/examples local TYPE=$1 yum install -y bc >/dev/null - cd criteo_ctr_with_cube - check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz" - check_cmd "tar xf ctr_cube_unittest.tar.gz" - check_cmd "mv models/ctr_client_conf ./" - check_cmd "mv models/ctr_serving_model_kv ./" - check_cmd "mv models/data ./cube/" - check_cmd "mv models/ut_data ./" - cp ../../../build-server-$TYPE/output/bin/cube* ./cube/ - mkdir -p $PYTHONROOT/lib/python2.7/site-packages/paddle_serving_server/serving-cpu-avx-openblas-0.1.3/ - yes | cp ../../../build-server-$TYPE/output/demo/serving/bin/serving $PYTHONROOT/lib/python2.7/site-packages/paddle_serving_server/serving-cpu-avx-openblas-0.1.3/ + cd criteo_ctr_with_cube # pwd: /Serving/python/examples/criteo_ctr_with_cube + case $TYPE in + CPU) + check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz" + check_cmd "tar xf ctr_cube_unittest.tar.gz" + check_cmd "mv models/ctr_client_conf ./" + check_cmd "mv models/ctr_serving_model_kv ./" + check_cmd "mv models/data ./cube/" + check_cmd "mv models/ut_data ./" + cp ../../../build-server-$TYPE/output/bin/cube* ./cube/ + mkdir -p $PYTHONROOT/lib/python2.7/site-packages/paddle_serving_server/serving-cpu-avx-openblas-0.1.3/ + yes | cp ../../../build-server-$TYPE/output/demo/serving/bin/serving $PYTHONROOT/lib/python2.7/site-packages/paddle_serving_server/serving-cpu-avx-openblas-0.1.3/ - sh cube_prepare.sh & - check_cmd "mkdir work_dir1 && cp cube/conf/cube.conf ./work_dir1/" - python test_server.py ctr_serving_model_kv & - check_cmd "python test_client.py ctr_client_conf/serving_client_conf.prototxt ./ut_data >score" - AUC=$(tail -n 2 score | awk 'NR==1') - VAR2="0.70" - RES=$( echo "$AUC>$VAR2" | bc ) - if [[ $RES -eq 0 ]]; then - echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.70" - exit 1 - fi - echo "criteo_ctr_with_cube inference auc test success" - ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill - ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill + sh cube_prepare.sh & + check_cmd "mkdir work_dir1 && cp cube/conf/cube.conf ./work_dir1/" + python test_server.py ctr_serving_model_kv & + check_cmd "python test_client.py ctr_client_conf/serving_client_conf.prototxt ./ut_data >score" + AUC=$(tail -n 2 score | awk 'NR==1') + VAR2="0.70" + RES=$( echo "$AUC>$VAR2" | bc ) + if [[ $RES -eq 0 ]]; then + echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.70" + exit 1 + fi + echo "criteo_ctr_with_cube inference auc test success" + ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill + ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill + ;; + GPU) + ;; + *) + echo "error type" + exit 1 + ;; + esac + echo "test criteo_ctr_with_cube $TYPE part finished as expected." + cd .. # pwd: /Serving/python/examples } function python_run_test() { - cd python/examples - local TYPE=$1 - # Frist time run, downloading PaddleServing components ... - python -c "from paddle_serving_server import Server; server = Server(); server.download_bin()" - python_test_fit_a_line $TYPE - python_run_criteo_ctr_with_cube $TYPE + # Using the compiled binary + local TYPE=$1 # pwd: /Serving + cd python/examples # pwd: /Serving/python/examples + python_test_fit_a_line $TYPE # pwd: /Serving/python/examples + python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples echo "test python $TYPE part finished as expected." - cd ../.. + cd ../.. # pwd: /Serving } function main() { - local TYPE=$1 - init - build_client $TYPE - build_server $TYPE - cd Serving/ - python_run_test $TYPE + local TYPE=$1 # pwd: / + init # pwd: /Serving + build_client $TYPE # pwd: /Serving + build_server $TYPE # pwd: /Serving + python_run_test $TYPE # pwd: /Serving echo "serving $TYPE part finished as expected." }