diff --git a/README.md b/README.md index fb537b65db83d013f570c8208f21c219ca5084a3..5305056a737c60dcf83812e45f87d33eeb769155 100644 --- a/README.md +++ b/README.md @@ -55,11 +55,13 @@ You may need to use a domestic mirror source (in China, you can use the Tsinghua If you need install modules compiled with develop branch, please download packages from [latest packages list](./doc/LATEST_PACKAGES.md) and install with `pip install` command. -Packages of paddle-serving-server and paddle-serving-server-gpu support Centos 6/7 and Ubuntu 16/18. +Packages of paddle-serving-server and paddle-serving-server-gpu support Centos 6/7, Ubuntu 16/18, Windows 10. Packages of paddle-serving-client and paddle-serving-app support Linux and Windows, but paddle-serving-client only support python2.7/3.6/3.7. -Recommended to install paddle >= 1.8.2. +Recommended to install paddle >= 1.8.4. + +For **Windows Users**, please read the document [Paddle Serving for Windows Users](./doc/WINDOWS_TUTORIAL.md)

Pre-built services with Paddle Serving

diff --git a/README_CN.md b/README_CN.md index 2c37a26681d4291adcf7e8e70d3392772fabbe6b..d1627c23b68e242f0fc79214dff578d47b589cbd 100644 --- a/README_CN.md +++ b/README_CN.md @@ -57,11 +57,13 @@ pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0 如果需要使用develop分支编译的安装包,请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载,使用`pip install`命令进行安装。 -paddle-serving-server和paddle-serving-server-gpu安装包支持Centos 6/7和Ubuntu 16/18。 +paddle-serving-server和paddle-serving-server-gpu安装包支持Centos 6/7, Ubuntu 16/18和Windows 10。 paddle-serving-client和paddle-serving-app安装包支持Linux和Windows,其中paddle-serving-client仅支持python2.7/3.5/3.6。 -推荐安装1.8.2及以上版本的paddle +推荐安装1.8.4及以上版本的paddle + +对于**Windows 10 用户**,请参考文档[Windows平台使用Paddle Serving指导](./doc/WINDOWS_TUTORIAL_CN.md)。

Paddle Serving预装的服务

diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 4b7d3ed1f620bfcd2e1e214c49c57ee3848129e7..15076c15961e96317bf31647b1b64c6fee5ebd7d 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -114,7 +114,7 @@ ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so) +SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a) if (WITH_TRT) ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) @@ -127,12 +127,17 @@ endif() ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a) +ADD_LIBRARY(cryptopp STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET cryptopp PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/cryptopp/lib/libcryptopp.a) + LIST(APPEND external_project_dependencies paddle) LIST(APPEND paddle_depend_libs - xxhash) + xxhash cryptopp) + if(WITH_TRT) LIST(APPEND paddle_depend_libs nvinfer nvinfer_plugin) endif() + diff --git a/core/predictor/tools/seq_generator.cpp b/core/predictor/tools/seq_generator.cpp index eb7e7ed7f9a609e0c21be9a2c3d686dd7d9a1abd..c667445c7cf380a0f085eaeec24f5201b3445c73 100644 --- a/core/predictor/tools/seq_generator.cpp +++ b/core/predictor/tools/seq_generator.cpp @@ -17,11 +17,11 @@ #include #include #include -#include +#include //NOLINT #include "core/predictor/framework.pb.h" -#include "quant.h" -#include "seq_file.h" +#include "quant.h" // NOLINT +#include "seq_file.h" // NOLINT inline uint64_t time_diff(const struct timeval &start_time, const struct timeval &end_time) { @@ -113,13 +113,15 @@ int dump_parameter(const char *input_file, const char *output_file) { // std::cout << "key_len " << key_len << " value_len " << value_buf_len // << std::endl; memcpy(value_buf, tensor_buf + offset, value_buf_len); - seq_file_writer.write((char *)&i, sizeof(i), value_buf, value_buf_len); + seq_file_writer.write( + std::to_string(i).c_str(), sizeof(i), value_buf, value_buf_len); offset += value_buf_len; } return 0; } -float *read_embedding_table(const char *file1, std::vector &dims) { +float *read_embedding_table(const char *file1, + std::vector &dims) { // NOLINT std::ifstream is(file1); // Step 1: is read version, os write version uint32_t version; @@ -242,7 +244,7 @@ int compress_parameter_parallel(const char *file1, float x = *(emb_table + k * emb_size + e); int val = round((x - xmin) / scale); val = std::max(0, val); - val = std::min((int)pow2bits - 1, val); + val = std::min(static_cast(pow2bits) - 1, val); *(tensor_temp + 2 * sizeof(float) + e) = val; } result[k] = tensor_temp; @@ -262,7 +264,8 @@ int compress_parameter_parallel(const char *file1, } SeqFileWriter seq_file_writer(file2); for (int64_t i = 0; i < dict_size; i++) { - seq_file_writer.write((char *)&i, sizeof(i), result[i], per_line_size); + seq_file_writer.write( + std::to_string(i).c_str(), sizeof(i), result[i], per_line_size); } return 0; } diff --git a/doc/WINDOWS_TUTORIAL.md b/doc/WINDOWS_TUTORIAL.md new file mode 100644 index 0000000000000000000000000000000000000000..8d197df3e720495a2e93d21b02c2340126bb2813 --- /dev/null +++ b/doc/WINDOWS_TUTORIAL.md @@ -0,0 +1,126 @@ +## Paddle Serving for Windows Users + +(English|[简体中文](./WINDOWS_TUTORIAL_CN.md)) + +### Summary + +This document guides users how to build Paddle Serving service on the Windows platform. Due to the limited support of third-party libraries, the Windows platform currently only supports the use of web services to build local predictor prediction services. If you want to experience all the services, you need to use Docker for Windows to simulate the operating environment of Linux. + +### Running Paddle Serving on Native Windows System + +**Configure Python environment variables to PATH**: First, you need to add the directory where the Python executable program is located to the PATH. Usually in **System Properties/My Computer Properties**-**Advanced**-**Environment Variables**, click Path and add the path at the beginning. For example, `C:\Users\$USER\AppData\Local\Programs\Python\Python36`, and finally click **OK** continuously. If you enter python on Powershell, you can enter the python interactive interface, indicating that the environment variable configuration is successful. + +**Install wget**: Because all the downloads in the tutorial and the built-in model download function in `paddle_serving_app` all use the wget tool, download the binary package at the [link](http://gnuwin32.sourceforge.net/packages/wget.htm), unzip and copy it to `C:\Windows\System32`, if there is a security prompt, you need to pass it. + +**Install Git**: For details, see [Git official website](https://git-scm.com/downloads) + +**Install the necessary C++ library (optional)**: Some users may encounter the problem that the dll cannot be linked during the `import paddle` stage. It is recommended to [Install Visual Studio Community Edition](https://visualstudio.microsoft.com/), and install the relevant components of C++. + +**Install Paddle and Serving**: In Powershell, execute + +``` +python -m pip install -U paddle_serving_server paddle_serving_client paddle_serving_app paddlepaddle` +``` + +for GPU users, + +``` +python -m pip install -U paddle_serving_server_gpu paddle_serving_client paddle_serving_app paddlepaddle-gpu +``` + +**Git clone Serving Project:** + +``` +git clone https://github.com/paddlepaddle/Serving +``` + +**Run OCR example**: + +``` +cd Serving/python/example/ocr +python -m paddle_serving_app.package --get_model ocr_rec +tar -xzvf ocr_rec.tar.gz +python -m paddle_serving_app.package --get_model ocr_det +tar -xzvf ocr_det.tar.gz +python ocr_debugger_server.py & +python ocr_web_client.py +``` + +### Create a new Paddle Serving Web Service on Windows + +Currently Windows supports the Local Predictor of the Web Service framework. The server code framework is as follows + +``` +# filename:your_webservice.py +from paddle_serving_server.web_service import WebService +# If it is the GPU version, please use from paddle_serving_server_gpu.web_service import WebService +class YourWebService(WebService): + def preprocess(self, feed=[], fetch=[]): + #Implement pre-processing here + #feed_dict is key: var names, value: numpy array input + #fetch_names is a list of fetch variable names + The meaning of #is_batch is whether the numpy array in the value of feed_dict contains the batch dimension + return feed_dict, fetch_names, is_batch + def postprocess(self, feed={}, fetch=[], fetch_map=None): + #fetch map is the returned dictionary after prediction, the key is the fetch names given when the process returns, and the value is the var specific value corresponding to the fetch names + #After processing here, the result needs to be converted into a dictionary again, and the type of values should be a list, so that it can be serialized in JSON to facilitate web return + return response + +your_service = YourService(name="XXX") +your_service.load_model_config("your_model_path") +your_service.prepare_server(workdir="workdir", port=9292) +# If you are a GPU user, you can refer to the python example under python/examples/ocr +your_service.run_debugger_service() +# Windows platform cannot use run_rpc_service() interface +your_service.run_web_service() +``` + +Client code example + +``` +# filename:your_client.py +import requests +import json +import base64 +import os, sys +import time +import cv2 # If you need to upload pictures +# Used for image reading, the principle is to use base64 encoding file content +def cv2_to_base64(image): + return base64.b64encode(image).decode( + 'utf8') #data.tostring()).decode('utf8') + +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:9292/XXX/prediction" # XXX depends on the initial name parameter of the server YourService +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +print(r.json()) +``` + +The user only needs to follow the above instructions and implement the relevant content in the corresponding function. For more information, please refer to [How to develop a new Web Service? ](./NEW_WEB_SERVICE.md) + +Execute after development + +``` +python your_webservice.py & +python your_client.py +``` + +Because the port needs to be occupied, there may be a security prompt during the startup process. Please click through and an IP address will be generated. It should be noted that when the Windows platform starts the service, the local IP address may not be 127.0.0.1. You need to confirm the IP address and then see how the Client should set the access IP. + +### Docker for Windows User Guide + +The above content is used for native Windows. If users want to experience complete functions, they need to use Docker tools to model Linux systems. + +Please refer to [Docker Desktop](https://www.docker.com/products/docker-desktop) to install Docker + +After installation, start the docker linux engine and download the relevant image. In the Serving directory + +``` +docker pull hub.baidubce.com/paddlepaddle/serving:latest-devel +# There is no expose port here, users can set -p to perform port mapping as needed +docker run --rm -dit --name serving_devel -v $PWD:/Serving hub.baidubce.com/paddlepaddle/serving:latest-devel +docker exec -it serving_devel bash +cd /Serving +``` + +The rest of the operations are exactly the same as the Linux version. diff --git a/doc/WINDOWS_TUTORIAL_CN.md b/doc/WINDOWS_TUTORIAL_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..dd1074bdba6a4e00572b010b28f80f3f424787e0 --- /dev/null +++ b/doc/WINDOWS_TUTORIAL_CN.md @@ -0,0 +1,126 @@ +## Windows平台使用Paddle Serving指导 + +([English](./WINDOWS_TUTORIAL.md)|简体中文) + +### 综述 + +本文档指导用户如何在Windows平台手把手搭建Paddle Serving服务。由于受限第三方库的支持,Windows平台目前只支持用web service的方式搭建local predictor预测服务。如果想要体验全部的服务,需要使用Docker for Windows,来模拟Linux的运行环境。 + +### 原生Windows系统运行Paddle Serving + +**配置Python环境变量到PATH**:首先需要将Python的可执行程序所在目录加入到PATH当中。通常在**系统属性/我的电脑属性**-**高级**-**环境变量** ,点选Path,并在开头加上路径。例如`C:\Users\$USER\AppData\Local\Programs\Python\Python36`,最后连续点击**确定** 。在Powershell上如果输入python可以进入python交互界面,说明环境变量配置成功。 + +**安装wget工具**:由于教程当中所有的下载,以及`paddle_serving_app`当中内嵌的模型下载功能,都是用到wget工具,在链接[下载wget](http://gnuwin32.sourceforge.net/packages/wget.htm),解压后复制到`C:\Windows\System32`下,如有安全提示需要通过。 + +**安装Git工具**: 详情参见[Git官网](https://git-scm.com/downloads) + +**安装必要的C++库(可选)**:部分用户可能会在`import paddle`阶段遇见dll无法链接的问题,建议可以[安装Visual Studio社区版本](`https://visualstudio.microsoft.com/`) ,并且安装C++的相关组件。 + +**安装Paddle和Serving**:在Powershell,执行 + +``` +python -m pip install -U paddle_serving_server paddle_serving_client paddle_serving_app paddlepaddle` +``` + +如果是GPU用户 + +``` +python -m pip install -U paddle_serving_server_gpu paddle_serving_client paddle_serving_app paddlepaddle-gpu +``` + +**下载Serving库**: + +``` +git clone https://github.com/paddlepaddle/Serving +``` + +**运行OCR示例**: + +``` +cd Serving/python/example/ocr +python -m paddle_serving_app.package --get_model ocr_rec +tar -xzvf ocr_rec.tar.gz +python -m paddle_serving_app.package --get_model ocr_det +tar -xzvf ocr_det.tar.gz +python ocr_debugger_server.py & +python ocr_web_client.py +``` + +### 创建新的Windows支持的Paddle Serving服务 + +目前Windows支持Web Service框架的Local Predictor。服务端代码框架如下 + +``` +# filename:your_webservice.py +from paddle_serving_server.web_service import WebService +# 如果是GPU版本,请使用 from paddle_serving_server_gpu.web_service import WebService +class YourWebService(WebService): + def preprocess(self, feed=[], fetch=[]): + #在这里实现前处理 + #feed_dict是 key: var names, value: numpy array input + #fetch_names 是fetch变量名列表 + #is_batch的含义是feed_dict的value里的numpy array是否包含了batch维度 + return feed_dict, fetch_names, is_batch + def postprocess(self, feed={}, fetch=[], fetch_map=None): + #fetch map是经过预测之后的返回字典,key是process返回时给定的fetch names,value是对应fetch names的var具体值 + #在这里做处理之后,结果需重新转换成字典,并且values的类型应是列表list,这样可以JSON序列化方便web返回 + return response + +your_service = YourService(name="XXX") +your_service.load_model_config("your_model_path") +your_service.prepare_server(workdir="workdir", port=9292) +# 如果是GPU用户,可以参照python/examples/ocr下的python示例 +your_service.run_debugger_service() +# Windows平台不可以使用 run_rpc_service()接口 +your_service.run_web_service() +``` + +客户端代码示例 + +``` +# filename:your_client.py +import requests +import json +import base64 +import os, sys +import time +import cv2 # 如果需要上传图片 +# 用于图片读取,原理是采用base64编码文件内容 +def cv2_to_base64(image): + return base64.b64encode(image).decode( + 'utf8') #data.tostring()).decode('utf8') + +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:9292/XXX/prediction" # XXX取决于服务端YourService的初始化name参数 +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +print(r.json()) +``` + +用户只需要按照如上指示,在对应函数中实现相关内容即可。更多信息请参见[如何开发一个新的Web Service?](./NEW_WEB_SERVICE_CN.md) + +开发完成后执行 + +``` +python your_webservice.py & +python your_client.py +``` + +因为需要占用端口,因此启动过程可能会有安全提示,请点选通过,就会有IP地址生成。需要注意的是,Windows平台启动服务时,本地IP地址可能不是127.0.0.1,需要确认好IP地址再看Client应该如何设定访问IP。 + +### Docker for Windows 使用指南 + +以上内容用于原生的Windows,如果用户想要体验完整的功能,需要使用Docker工具,来模拟Linux系统。 + +安装Docker请参考[Docker Desktop](https://www.docker.com/products/docker-desktop) + +安装之后启动docker的linux engine,下载相关镜像。在Serving目录下 + +``` +docker pull hub.baidubce.com/paddlepaddle/serving:latest-devel +# 此处没有expose端口,用户可根据需要设置-p来进行端口映射 +docker run --rm -dit --name serving_devel -v $PWD:/Serving hub.baidubce.com/paddlepaddle/serving:latest-devel +docker exec -it serving_devel bash +cd /Serving +``` + +其余操作与Linux版本完全一致。 diff --git a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h index a4d8dda71a7977185106bb1552cb8f39ef6bc50e..5f54bf3ceb3808eeff7d9d87cb56e3549d9ec44f 100644 --- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -13,7 +13,6 @@ // limitations under the License. #pragma once - #include #include #include @@ -29,7 +28,6 @@ namespace paddle_serving { namespace fluid_cpu { using configure::SigmoidConf; - class AutoLock { public: explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) { @@ -530,7 +528,60 @@ class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore { return 0; } }; +class FluidCpuAnalysisEncryptCore : public FluidFamilyCore { + public: + void ReadBinaryFile(const std::string& filename, std::string* contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); + } + + int create(const predictor::InferEngineCreationParams& params) { + std::string data_path = params.get_path(); + if (access(data_path.c_str(), F_OK) == -1) { + LOG(ERROR) << "create paddle predictor failed, path note exits: " + << data_path; + return -1; + } + + std::string model_buffer, params_buffer, key_buffer; + ReadBinaryFile(data_path + "encrypt_model", &model_buffer); + ReadBinaryFile(data_path + "encrypt_params", ¶ms_buffer); + ReadBinaryFile(data_path + "key", &key_buffer); + VLOG(2) << "prepare for encryption model"; + + auto cipher = paddle::MakeCipher(""); + std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer); + std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer); + + paddle::AnalysisConfig analysis_config; + analysis_config.SetModelBuffer(&real_model_buffer[0], + real_model_buffer.size(), + &real_params_buffer[0], + real_params_buffer.size()); + analysis_config.DisableGpu(); + analysis_config.SetCpuMathLibraryNumThreads(1); + if (params.enable_memory_optimization()) { + analysis_config.EnableMemoryOptim(); + } + analysis_config.SwitchSpecifyInputNames(true); + AutoLock lock(GlobalPaddleCreateMutex::instance()); + VLOG(2) << "decrypt model file sucess"; + _core = + paddle::CreatePaddlePredictor(analysis_config); + if (NULL == _core.get()) { + LOG(ERROR) << "create paddle predictor failed, path: " << data_path; + return -1; + } + VLOG(2) << "create paddle predictor sucess, path: " << data_path; + return 0; + } +}; } // namespace fluid_cpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp index af3f93a8129282920f4cb6fd1d074e0c7eb46228..1399f1359013905bff24887587aa671ee5be87b0 100644 --- a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp @@ -52,6 +52,13 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ::baidu::paddle_serving::predictor::InferEngine, "FLUID_CPU_NATIVE_DIR_SIGMOID"); +#if 1 +REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( + ::baidu::paddle_serving::predictor::FluidInferEngine< + FluidCpuAnalysisEncryptCore>, + ::baidu::paddle_serving::predictor::InferEngine, + "FLUID_CPU_ANALYSIS_ENCRYPT"); +#endif } // namespace fluid_cpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index 3782c967823d07c23ba02e5ce0f388dc6b46e181..01b343340b31e7b668e8a2db37ef9c5ef24e355a 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -25,7 +25,6 @@ #include "core/configure/inferencer_configure.pb.h" #include "core/predictor/framework/infer.h" #include "paddle_inference_api.h" // NOLINT - DECLARE_int32(gpuid); namespace baidu { @@ -591,6 +590,60 @@ class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore { } }; +class FluidGpuAnalysisEncryptCore : public FluidFamilyCore { + public: + void ReadBinaryFile(const std::string& filename, std::string* contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); + } + + int create(const predictor::InferEngineCreationParams& params) { + std::string data_path = params.get_path(); + if (access(data_path.c_str(), F_OK) == -1) { + LOG(ERROR) << "create paddle predictor failed, path note exits: " + << data_path; + return -1; + } + + std::string model_buffer, params_buffer, key_buffer; + ReadBinaryFile(data_path + "encrypt_model", &model_buffer); + ReadBinaryFile(data_path + "encrypt_params", ¶ms_buffer); + ReadBinaryFile(data_path + "key", &key_buffer); + + VLOG(2) << "prepare for encryption model"; + + auto cipher = paddle::MakeCipher(""); + std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer); + std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer); + + paddle::AnalysisConfig analysis_config; + analysis_config.SetModelBuffer(&real_model_buffer[0], + real_model_buffer.size(), + &real_params_buffer[0], + real_params_buffer.size()); + analysis_config.EnableUseGpu(100, FLAGS_gpuid); + analysis_config.SetCpuMathLibraryNumThreads(1); + if (params.enable_memory_optimization()) { + analysis_config.EnableMemoryOptim(); + } + analysis_config.SwitchSpecifyInputNames(true); + AutoLock lock(GlobalPaddleCreateMutex::instance()); + VLOG(2) << "decrypt model file sucess"; + _core = + paddle::CreatePaddlePredictor(analysis_config); + if (NULL == _core.get()) { + LOG(ERROR) << "create paddle predictor failed, path: " << data_path; + return -1; + } + VLOG(2) << "create paddle predictor sucess, path: " << data_path; + return 0; + } +}; } // namespace fluid_gpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp index 7447a417338a37716eff025721126e4c817408a6..41e9c8794315911ded9f806855e63e12c38eb939 100644 --- a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp @@ -54,6 +54,12 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ::baidu::paddle_serving::predictor::InferEngine, "FLUID_GPU_NATIVE_DIR_SIGMOID"); +REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( + ::baidu::paddle_serving::predictor::FluidInferEngine< + FluidGpuAnalysisEncryptCore>, + ::baidu::paddle_serving::predictor::InferEngine, + "FLUID_GPU_ANALYSIS_ENCRPT") + } // namespace fluid_gpu } // namespace paddle_serving } // namespace baidu diff --git a/python/examples/encryption/README.md b/python/examples/encryption/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dd662582f6531ca9c8d7419f749f9d02a105bb70 --- /dev/null +++ b/python/examples/encryption/README.md @@ -0,0 +1,34 @@ +# Encryption Model Prediction + +([简体中文](README_CN.md)|English) + +## Get Origin Model + +The example uses the model file of the fit_a_line example as a origin model + +``` +sh get_data.sh +``` + +## Encrypt Model + +``` +python encrypt.py +``` +The key is stored in the `key` file, and the encrypted model file and server-side configuration file are stored in the `encrypt_server` directory. +client-side configuration file are stored in the `encrypt_client` directory. + +## Start Encryption Service +CPU Service +``` +python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model +``` +GPU Service +``` +python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 +``` + +## Prediction +``` +python test_client.py uci_housing_client/serving_client_conf.prototxt +``` diff --git a/python/examples/encryption/README_CN.md b/python/examples/encryption/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..cd690363f92b5ca404faef5a078497aaa5338e36 --- /dev/null +++ b/python/examples/encryption/README_CN.md @@ -0,0 +1,33 @@ +# 加密模型预测 + +(简体中文|[English](README.md)) + +## 获取明文模型 + +示例中使用fit_a_line示例的模型文件作为明文模型 + +``` +sh get_data.sh +``` + +## 模型加密 + +``` +python encrypt.py +``` +密钥保存在`key`文件中,加密模型文件以及server端配置文件保存在`encrypt_server`目录下,client端配置文件保存在`encrypt_client`目录下。 + +## 启动加密预测服务 +CPU预测服务 +``` +python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model +``` +GPU预测服务 +``` +python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 +``` + +## 预测 +``` +python test_client.py uci_housing_client/serving_client_conf.prototxt +``` diff --git a/python/examples/encryption/encrypt.py b/python/examples/encryption/encrypt.py new file mode 100644 index 0000000000000000000000000000000000000000..9e01b5c63c95100c46b91c7f0c9c59191e66ae26 --- /dev/null +++ b/python/examples/encryption/encrypt.py @@ -0,0 +1,27 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_client.io import inference_model_to_serving + + +def serving_encryption(): + inference_model_to_serving( + dirname="./uci_housing_model", + serving_server="encrypt_server", + serving_client="encrypt_client", + encryption=True) + + +if __name__ == "__main__": + serving_encryption() diff --git a/python/examples/encryption/get_data.sh b/python/examples/encryption/get_data.sh new file mode 100644 index 0000000000000000000000000000000000000000..fd7865f99865504a358c46d39c8962f89b13e475 --- /dev/null +++ b/python/examples/encryption/get_data.sh @@ -0,0 +1,2 @@ +wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing_example/encrypt.tar.gz +tar -xzf encrypt.tar.gz diff --git a/python/examples/encryption/test_client.py b/python/examples/encryption/test_client.py new file mode 100644 index 0000000000000000000000000000000000000000..4d211a562733d2a2b1e653a7684fdcd6cf0285d1 --- /dev/null +++ b/python/examples/encryption/test_client.py @@ -0,0 +1,32 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import Client +import sys + +client = Client() +client.load_client_config(sys.argv[1]) +client.use_key("./key") +client.connect(["127.0.0.1:9300"], encryption=True) + +import paddle +test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.uci_housing.test(), buf_size=500), + batch_size=1) + +for data in test_reader(): + fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"]) + print("{} {}".format(fetch_map["price"][0], data[0][1][0])) diff --git a/python/examples/ocr_detection/7.jpg b/python/examples/ocr_detection/7.jpg deleted file mode 100644 index a9483bb74f66d88699b09545366c32a4fe108e54..0000000000000000000000000000000000000000 Binary files a/python/examples/ocr_detection/7.jpg and /dev/null differ diff --git a/python/examples/ocr_detection/text_det_client.py b/python/examples/ocr_detection/text_det_client.py deleted file mode 100644 index aaa1c5b1179fcbf1d010bb9f6335ef2886435a83..0000000000000000000000000000000000000000 --- a/python/examples/ocr_detection/text_det_client.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from paddle_serving_client import Client -from paddle_serving_app.reader import Sequential, File2Image, ResizeByFactor -from paddle_serving_app.reader import Div, Normalize, Transpose -from paddle_serving_app.reader import DBPostProcess, FilterBoxes - -client = Client() -client.load_client_config("ocr_det_client/serving_client_conf.prototxt") -client.connect(["127.0.0.1:9494"]) - -read_image_file = File2Image() -preprocess = Sequential([ - ResizeByFactor(32, 960), Div(255), - Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( - (2, 0, 1)) -]) -post_func = DBPostProcess({ - "thresh": 0.3, - "box_thresh": 0.5, - "max_candidates": 1000, - "unclip_ratio": 1.5, - "min_size": 3 -}) -filter_func = FilterBoxes(10, 10) - -img = read_image_file(name) -ori_h, ori_w, _ = img.shape -img = preprocess(img) -new_h, new_w, _ = img.shape -ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] -outputs = client.predict(feed={"image": img}, fetch=["concat_1.tmp_0"]) -dt_boxes_list = post_func(outputs["concat_1.tmp_0"], [ratio_list]) -dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 6f3908fd6445854f7c398d6b228112b99898028d..9ee2f000f8da141a1f848cbc0feb61811921f788 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -13,16 +13,19 @@ # limitations under the License. # pylint: disable=doc-string-missing -import paddle_serving_client import os -from .proto import sdk_configure_pb2 as sdk -from .proto import general_model_config_pb2 as m_config -import google.protobuf.text_format -import numpy as np import time import sys +import requests +import json +import base64 +import numpy as np +import paddle_serving_client +import google.protobuf.text_format import grpc +from .proto import sdk_configure_pb2 as sdk +from .proto import general_model_config_pb2 as m_config from .proto import multi_lang_general_model_service_pb2 sys.path.append( os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) @@ -161,6 +164,7 @@ class Client(object): self.fetch_names_to_idx_ = {} self.lod_tensor_set = set() self.feed_tensor_len = {} + self.key = None for i, var in enumerate(model_conf.feed_var): self.feed_names_to_idx_[var.alias_name] = i @@ -193,7 +197,28 @@ class Client(object): else: self.rpc_timeout_ms = rpc_timeout - def connect(self, endpoints=None): + def use_key(self, key_filename): + with open(key_filename, "r") as f: + self.key = f.read() + + def get_serving_port(self, endpoints): + if self.key is not None: + req = json.dumps({"key": base64.b64encode(self.key)}) + else: + req = json.dumps({}) + r = requests.post("http://" + endpoints[0], req) + result = r.json() + print(result) + if "endpoint_list" not in result: + raise ValueError("server not ready") + else: + endpoints = [ + endpoints[0].split(":")[0] + ":" + + str(result["endpoint_list"][0]) + ] + return endpoints + + def connect(self, endpoints=None, encryption=False): # check whether current endpoint is available # init from client config # create predictor here @@ -203,6 +228,8 @@ class Client(object): "You must set the endpoints parameter or use add_variant function to create a variant." ) else: + if encryption: + endpoints = self.get_serving_port(endpoints) if self.predictor_sdk_ is None: self.add_variant('default_tag_{}'.format(id(self)), endpoints, 100) diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py index 5ffa6262ec9187d649c207bf753f3d051cd48778..40d39af4b59ea2b0d8679adc368fdf4373667bc4 100644 --- a/python/paddle_serving_client/io/__init__.py +++ b/python/paddle_serving_client/io/__init__.py @@ -21,6 +21,9 @@ from paddle.fluid.framework import Program from paddle.fluid import CPUPlace from paddle.fluid.io import save_inference_model import paddle.fluid as fluid +from paddle.fluid.core import CipherUtils +from paddle.fluid.core import CipherFactory +from paddle.fluid.core import Cipher from ..proto import general_model_config_pb2 as model_conf import os @@ -29,7 +32,10 @@ def save_model(server_model_folder, client_config_folder, feed_var_dict, fetch_var_dict, - main_program=None): + main_program=None, + encryption=False, + key_len=128, + encrypt_conf=None): executor = Executor(place=CPUPlace()) feed_var_names = [feed_var_dict[x].name for x in feed_var_dict] @@ -38,14 +44,29 @@ def save_model(server_model_folder, for key in sorted(fetch_var_dict.keys()): target_vars.append(fetch_var_dict[key]) target_var_names.append(key) - - save_inference_model( - server_model_folder, - feed_var_names, - target_vars, - executor, - main_program=main_program) - + if not encryption: + save_inference_model( + server_model_folder, + feed_var_names, + target_vars, + executor, + main_program=main_program) + else: + if encrypt_conf == None: + aes_cipher = CipherFactory.create_cipher() + else: + #todo: more encryption algorithms + pass + key = CipherUtils.gen_key_to_file(128, "key") + params = fluid.io.save_persistables( + executor=executor, dirname=None, main_program=main_program) + model = main_program.desc.serialize_to_string() + if not os.path.exists(server_model_folder): + os.makedirs(server_model_folder) + os.chdir(server_model_folder) + aes_cipher.encrypt_to_file(params, key, "encrypt_params") + aes_cipher.encrypt_to_file(model, key, "encrypt_model") + os.chdir("..") config = model_conf.GeneralModelConfig() #int64 = 0; float32 = 1; int32 = 2; @@ -116,7 +137,10 @@ def inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, - params_filename=None): + params_filename=None, + encryption=False, + key_len=128, + encrypt_conf=None): place = fluid.CPUPlace() exe = fluid.Executor(place) inference_program, feed_target_names, fetch_targets = \ @@ -127,7 +151,7 @@ def inference_model_to_serving(dirname, } fetch_dict = {x.name: x for x in fetch_targets} save_model(serving_server, serving_client, feed_dict, fetch_dict, - inference_program) + inference_program, encryption, key_len, encrypt_conf) feed_names = feed_dict.keys() fetch_names = fetch_dict.keys() return feed_names, fetch_names diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 30f4583a3b785dfe8824a5c14014c5e816fbc27e..3a314dc5f6690146d472484b5520624074c14ce6 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -157,6 +157,7 @@ class Server(object): self.cur_path = os.getcwd() self.use_local_bin = False self.mkl_flag = False + self.encryption_model = False self.product_name = None self.container_id = None self.model_config_paths = None # for multi-model in a workflow @@ -197,6 +198,9 @@ class Server(object): def set_ir_optimize(self, flag=False): self.ir_optimization = flag + def use_encryption_model(self, flag=False): + self.encryption_model = flag + def set_product_name(self, product_name=None): if product_name == None: raise ValueError("product_name can't be None.") @@ -232,9 +236,15 @@ class Server(object): engine.force_update_static_cache = False if device == "cpu": - engine.type = "FLUID_CPU_ANALYSIS_DIR" + if self.encryption_model: + engine.type = "FLUID_CPU_ANALYSIS_ENCRYPT" + else: + engine.type = "FLUID_CPU_ANALYSIS_DIR" elif device == "gpu": - engine.type = "FLUID_GPU_ANALYSIS_DIR" + if self.encryption_model: + engine.type = "FLUID_GPU_ANALYSIS_ENCRYPT" + else: + engine.type = "FLUID_GPU_ANALYSIS_DIR" self.model_toolkit_conf.engines.extend([engine]) diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index d282ac076e377806e9a3b320b880ffed6300b971..982ff5562be50e5716e91ced058983e724b4155a 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -18,8 +18,14 @@ Usage: python -m paddle_serving_server.serve --model ./serving_server_model --port 9292 """ import argparse -from .web_service import WebService +import sys +import json +import base64 +import time +from multiprocessing import Process +from web_service import WebService, port_is_available from flask import Flask, request +from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer def parse_args(): # pylint: disable=doc-string-missing @@ -53,6 +59,11 @@ def parse_args(): # pylint: disable=doc-string-missing type=int, default=512 * 1024 * 1024, help="Limit sizes of messages") + parser.add_argument( + "--use_encryption_model", + default=False, + action="store_true", + help="Use encryption model") parser.add_argument( "--use_multilang", default=False, @@ -71,17 +82,18 @@ def parse_args(): # pylint: disable=doc-string-missing return parser.parse_args() -def start_standard_model(): # pylint: disable=doc-string-missing +def start_standard_model(serving_port): # pylint: disable=doc-string-missing args = parse_args() thread_num = args.thread model = args.model - port = args.port + port = serving_port workdir = args.workdir device = args.device mem_optim = args.mem_optim_off is False ir_optim = args.ir_optim max_body_size = args.max_body_size use_mkl = args.use_mkl + use_encryption_model = args.use_encryption_model use_multilang = args.use_multilang if model == "": @@ -111,6 +123,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.use_mkl(use_mkl) server.set_max_body_size(max_body_size) server.set_port(port) + server.use_encryption_model(use_encryption_model) if args.product_name != None: server.set_product_name(args.product_name) if args.container_id != None: @@ -121,11 +134,88 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.run_server() -if __name__ == "__main__": +class MainService(BaseHTTPRequestHandler): + def get_available_port(self): + default_port = 12000 + for i in range(1000): + if port_is_available(default_port + i): + return default_port + i + + def start_serving(self): + start_standard_model(serving_port) + + def get_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "w") as f: + f.write(key) + return True + + def check_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "r") as f: + cur_key = f.read() + return (key == cur_key) + + def start(self, post_data): + post_data = json.loads(post_data) + global p_flag + if not p_flag: + if args.use_encryption_model: + print("waiting key for model") + if not self.get_key(post_data): + print("not found key in request") + return False + global serving_port + global p + serving_port = self.get_available_port() + p = Process(target=self.start_serving) + p.start() + time.sleep(3) + if p.is_alive(): + p_flag = True + else: + return False + else: + if p.is_alive(): + if not self.check_key(post_data): + return False + else: + return False + return True + + def do_POST(self): + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + if self.start(post_data): + response = {"endpoint_list": [serving_port]} + else: + response = {"message": "start serving failed"} + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response)) + +if __name__ == "__main__": args = parse_args() if args.name == "None": - start_standard_model() + if args.use_encryption_model: + p_flag = False + p = None + serving_port = 0 + server = HTTPServer(('localhost', int(args.port)), MainService) + print( + 'Starting encryption server, waiting for key from client, use to stop' + ) + server.serve_forever() + else: + start_standard_model(args.port) else: service = WebService(name=args.name) service.load_model_config(args.model) diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index 539c613d40af7c1f6824fc03ce3733c942816318..9bd559704b47205ab46ba5984986d843ba7e52ca 100644 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -25,6 +25,16 @@ from paddle_serving_server import pipeline from paddle_serving_server.pipeline import Op +def port_is_available(port): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex(('0.0.0.0', port)) + if result != 0: + return True + else: + return False + + class WebService(object): def __init__(self, name="default_service"): self.name = name @@ -110,7 +120,7 @@ class WebService(object): self.mem_optim = mem_optim self.ir_optim = ir_optim for i in range(1000): - if self.port_is_available(default_port + i): + if port_is_available(default_port + i): self.port_list.append(default_port + i) break diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index c7e36aede5dc87141b286eeb589c46663a24ebd1..86f714156fcb60e842275a88418d2fba1b85fa02 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -68,6 +68,11 @@ def serve_args(): type=int, default=512 * 1024 * 1024, help="Limit sizes of messages") + parser.add_argument( + "--use_encryption_model", + default=False, + action="store_true", + help="Use encryption model") parser.add_argument( "--use_multilang", default=False, @@ -277,7 +282,8 @@ class Server(object): def set_trt(self): self.use_trt = True - def _prepare_engine(self, model_config_paths, device): + def _prepare_engine(self, model_config_paths, device, use_encryption_model): + if self.model_toolkit_conf == None: self.model_toolkit_conf = server_sdk.ModelToolkitConf() @@ -299,9 +305,15 @@ class Server(object): engine.use_trt = self.use_trt if device == "cpu": - engine.type = "FLUID_CPU_ANALYSIS_DIR" + if use_encryption_model: + engine.type = "FLUID_CPU_ANALYSIS_ENCRPT" + else: + engine.type = "FLUID_CPU_ANALYSIS_DIR" elif device == "gpu": - engine.type = "FLUID_GPU_ANALYSIS_DIR" + if use_encryption_model: + engine.type = "FLUID_GPU_ANALYSIS_ENCRPT" + else: + engine.type = "FLUID_GPU_ANALYSIS_DIR" self.model_toolkit_conf.engines.extend([engine]) @@ -458,6 +470,7 @@ class Server(object): workdir=None, port=9292, device="cpu", + use_encryption_model=False, cube_conf=None): if workdir == None: workdir = "./tmp" @@ -471,7 +484,8 @@ class Server(object): self.set_port(port) self._prepare_resource(workdir, cube_conf) - self._prepare_engine(self.model_config_paths, device) + self._prepare_engine(self.model_config_paths, device, + use_encryption_model) self._prepare_infer_service(port) self.workdir = workdir diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index c2b170fbeb3f9ee772e86c216fe3776f34187743..d35e9568660f226aef8eb35ccf10a3f2f93288f0 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -19,19 +19,21 @@ Usage: """ import argparse import os +import json +import base64 from multiprocessing import Pool, Process from paddle_serving_server_gpu import serve_args from flask import Flask, request +from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-missing +def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing gpuid = int(gpuid) device = "gpu" - port = args.port if gpuid == -1: device = "cpu" elif gpuid >= 0: - port = args.port + index + port = port + index thread_num = args.thread model = args.model mem_optim = args.mem_optim_off is False @@ -73,14 +75,20 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server.set_container_id(args.container_id) server.load_model_config(model) - server.prepare_server(workdir=workdir, port=port, device=device) + server.prepare_server( + workdir=workdir, + port=port, + device=device, + use_encryption_model=args.use_encryption_model) if gpuid >= 0: server.set_gpuid(gpuid) server.run_server() -def start_multi_card(args): # pylint: disable=doc-string-missing +def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing gpus = "" + if serving_port == None: + serving_port = args.port if args.gpu_ids == "": gpus = [] else: @@ -97,14 +105,16 @@ def start_multi_card(args): # pylint: disable=doc-string-missing env_gpus = [] if len(gpus) <= 0: print("gpu_ids not set, going to run cpu service.") - start_gpu_card_model(-1, -1, args) + start_gpu_card_model(-1, -1, serving_port, args) else: gpu_processes = [] for i, gpu_id in enumerate(gpus): p = Process( - target=start_gpu_card_model, args=( + target=start_gpu_card_model, + args=( i, gpu_id, + serving_port, args, )) gpu_processes.append(p) for p in gpu_processes: @@ -113,10 +123,89 @@ def start_multi_card(args): # pylint: disable=doc-string-missing p.join() +class MainService(BaseHTTPRequestHandler): + def get_available_port(self): + default_port = 12000 + for i in range(1000): + if port_is_available(default_port + i): + return default_port + i + + def start_serving(self): + start_multi_card(args, serving_port) + + def get_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "w") as f: + f.write(key) + return True + + def check_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "r") as f: + cur_key = f.read() + return (key == cur_key) + + def start(self, post_data): + post_data = json.loads(post_data) + global p_flag + if not p_flag: + if args.use_encryption_model: + print("waiting key for model") + if not self.get_key(post_data): + print("not found key in request") + return False + global serving_port + global p + serving_port = self.get_available_port() + p = Process(target=self.start_serving) + p.start() + time.sleep(3) + if p.is_alive(): + p_flag = True + else: + return False + else: + if p.is_alive(): + if not self.check_key(post_data): + return False + else: + return False + return True + + def do_POST(self): + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + if self.start(post_data): + response = {"endpoint_list": [serving_port]} + else: + response = {"message": "start serving failed"} + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response)) + + if __name__ == "__main__": args = serve_args() if args.name == "None": - start_multi_card(args) + from .web_service import port_is_available + if args.use_encryption_model: + p_flag = False + p = None + serving_port = 0 + server = HTTPServer(('localhost', int(args.port)), MainService) + print( + 'Starting encryption server, waiting for key from client, use to stop' + ) + server.serve_forever() + else: + start_multi_card(args) else: from .web_service import WebService web_service = WebService(name=args.name) diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index c086e393e537a0024d0e605c281fb9344a6ee754..e77c3d29a375f71e48b968704945fe97866a6bdc 100644 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -28,6 +28,16 @@ from paddle_serving_server_gpu import pipeline from paddle_serving_server_gpu.pipeline import Op +def port_is_available(port): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex(('0.0.0.0', port)) + if result != 0: + return True + else: + return False + + class WebService(object): def __init__(self, name="default_service"): self.name = name @@ -136,7 +146,7 @@ class WebService(object): self.port_list = [] default_port = 12000 for i in range(1000): - if self.port_is_available(default_port + i): + if port_is_available(default_port + i): self.port_list.append(default_port + i) if len(self.port_list) > len(self.gpus): break diff --git a/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel b/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel index d871e4e97f6e0201cb8d533ba9ca8e89664c7a18..eddd7e8b912b4cd2bb19f558413ffec1aea58071 100644 --- a/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel +++ b/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel @@ -39,6 +39,8 @@ RUN yum -y install wget && \ make clean && \ echo 'export PATH=/usr/local/python3.6/bin:$PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ + pip install requests && \ + pip3 install requests && \ source /root/.bashrc && \ cd .. && rm -rf Python-3.6.8* && \ wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \ diff --git a/tools/Dockerfile.centos6.devel b/tools/Dockerfile.centos6.devel index add3d9245ce3763d5f4ab9e8619a80bf058386c3..d0a4559ca29a22a8eb6627d19eb5e2f641ac37ec 100644 --- a/tools/Dockerfile.centos6.devel +++ b/tools/Dockerfile.centos6.devel @@ -49,6 +49,8 @@ RUN yum -y install wget && \ cd .. && rm -rf protobuf-* && \ yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \ yum clean all && \ + pip install requests && \ + pip3 install requests && \ localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \ echo "export LANG=en_US.utf8" >> /root/.bashrc && \ echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc diff --git a/tools/Dockerfile.ci b/tools/Dockerfile.ci index 390d67eb955e1fe8d51faa27c06351f38b2d7462..ec50f76ab881c0c19d5cbdcbf5885cd1e33510b9 100644 --- a/tools/Dockerfile.ci +++ b/tools/Dockerfile.ci @@ -23,7 +23,8 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel >/dev/null \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 \ && yum -y install bzip2 >/dev/null \ @@ -34,6 +35,9 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 && cd .. \ && rm -rf patchelf-0.10* +RUN yum install -y python3 python3-devel \ + && pip3 install requests + RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \ tar zxf protobuf-all-3.11.2.tar.gz && \ cd protobuf-3.11.2 && \ @@ -41,8 +45,6 @@ RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/p make clean && \ cd .. && rm -rf protobuf-* -RUN yum install -y python3 python3-devel - RUN yum -y update >/dev/null \ && yum -y install dnf >/dev/null \ && yum -y install dnf-plugins-core >/dev/null \ diff --git a/tools/Dockerfile.cuda10.0-cudnn7.devel b/tools/Dockerfile.cuda10.0-cudnn7.devel index c633c593ca5ad13a14b7ebee5edca3caf9882d9f..195c6010c5ca97a0a0760514e53ad387acd7fc7e 100644 --- a/tools/Dockerfile.cuda10.0-cudnn7.devel +++ b/tools/Dockerfile.cuda10.0-cudnn7.devel @@ -30,11 +30,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/Dockerfile.cuda9.0-cudnn7.devel b/tools/Dockerfile.cuda9.0-cudnn7.devel index 0fe6d69b1f39bb8bbea1008ea74a0c30607c6c73..3331085a2d10f0757daf417740d859cfe54f6452 100644 --- a/tools/Dockerfile.cuda9.0-cudnn7.devel +++ b/tools/Dockerfile.cuda9.0-cudnn7.devel @@ -29,11 +29,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel index 83e3b491c30fe99eaa615e836efeef6aad0c0cc4..17ae2110e246861a639f9a4332c6e1dc4af43ace 100644 --- a/tools/Dockerfile.devel +++ b/tools/Dockerfile.devel @@ -19,11 +19,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/serving_build.sh b/tools/serving_build.sh index 880c509e762131104478ad4b5b39f5e11ded0656..97270a298f9d2856ab1cc859fca53947c918bc1d 100644 --- a/tools/serving_build.sh +++ b/tools/serving_build.sh @@ -514,6 +514,40 @@ function python_test_lac() { cd .. } + +function python_test_encryption(){ + #pwd: /Serving/python/examples + cd encryption + sh get_data.sh + local TYPE=$1 + export SERVING_BIN=${SERIVNG_WORKDIR}/build-server-${TYPE}/core/general-server/serving + case $TYPE in + CPU) + #check_cmd "python encrypt.py" + #sleep 5 + check_cmd "python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model > /dev/null &" + sleep 5 + check_cmd "python test_client.py encrypt_client/serving_client_conf.prototxt" + kill_server_process + ;; + GPU) + #check_cmd "python encrypt.py" + #sleep 5 + check_cmd "python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0" + sleep 5 + check_cmd "python test_client.py encrypt_client/serving_client_conf.prototxt" + kill_servere_process + ;; + *) + echo "error type" + exit 1 + ;; + esac + echo "encryption $TYPE test finished as expected" + setproxy + unset SERVING_BIN + cd .. +} function java_run_test() { # pwd: /Serving local TYPE=$1 @@ -529,7 +563,7 @@ function java_run_test() { cd examples # pwd: /Serving/java/examples mvn compile > /dev/null mvn install > /dev/null - + # fit_a_line (general, asyn_predict, batch_predict) cd ../../python/examples/grpc_impl_example/fit_a_line # pwd: /Serving/python/examples/grpc_impl_example/fit_a_line sh get_data.sh @@ -786,7 +820,7 @@ function python_test_pipeline(){ python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 --workdir test9292 &> cnn.log & python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 --workdir test9393 &> bow.log & sleep 5 - + # test: thread servicer & thread op cat << EOF > config.yml rpc_port: 18080 @@ -960,6 +994,7 @@ function python_run_test() { python_test_lac $TYPE # pwd: /Serving/python/examples python_test_multi_process $TYPE # pwd: /Serving/python/examples python_test_multi_fetch $TYPE # pwd: /Serving/python/examples + python_test_encryption $TYPE # pwd: /Serving/python/examples python_test_yolov4 $TYPE # pwd: /Serving/python/examples python_test_grpc_impl $TYPE # pwd: /Serving/python/examples python_test_resnet50 $TYPE # pwd: /Serving/python/examples