From 058b87929184d03120e2bac8a2557b9385a7dc4c Mon Sep 17 00:00:00 2001 From: Jiawei Wang Date: Thu, 28 Jan 2021 16:28:09 +0800 Subject: [PATCH] Merge pull request #995 from HexToString/encryption_branch use paddle 2.0.0 fix encryption and some doc --- CMakeLists.txt | 5 + cmake/paddlepaddle.cmake | 9 +- doc/COMPILE.md | 4 +- doc/COMPILE_CN.md | 4 +- .../include/fluid_cpu_engine.h | 56 +++++++++ .../src/fluid_cpu_engine.cpp | 7 ++ .../include/fluid_gpu_engine.h | 57 +++++++++ .../src/fluid_gpu_engine.cpp | 5 + python/examples/fit_a_line/README_CN.md | 6 - python/examples/fit_a_line/test_server.py | 2 +- python/paddle_serving_client/__init__.py | 29 ++++- python/paddle_serving_client/io/__init__.py | 49 ++++++-- python/paddle_serving_server/__init__.py | 15 ++- python/paddle_serving_server/serve.py | 97 ++++++++++++++- python/paddle_serving_server/web_service.py | 12 +- python/paddle_serving_server_gpu/__init__.py | 21 +++- python/paddle_serving_server_gpu/serve.py | 114 ++++++++++++++++-- .../paddle_serving_server_gpu/web_service.py | 13 +- tools/Dockerfile.centos6.cuda9.0-cudnn7.devel | 2 + tools/Dockerfile.centos6.devel | 2 + tools/Dockerfile.ci | 8 +- tools/Dockerfile.cuda10.0-cudnn7.devel | 6 +- tools/Dockerfile.cuda9.0-cudnn7.devel | 6 +- tools/Dockerfile.devel | 6 +- tools/serving_build.sh | 37 ++++++ 25 files changed, 514 insertions(+), 58 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f05e52ee..6228877f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,12 @@ option(APP "Compile Paddle Serving App package" OFF) option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF) option(PACK "Compile for whl" OFF) option(WITH_TRT "Compile Paddle Serving with TRT" OFF) +option(PADDLE_ON_INFERENCE "Compile for encryption" ON) +if (PADDLE_ON_INFERENCE) + add_definitions(-DPADDLE_ON_INFERENCE) + message(STATUS "Use PADDLE_ON_INFERENCE") +endif() set(WITH_MKLML ${WITH_MKL}) if (NOT DEFINED WITH_MKLDNN) if (WITH_MKL AND AVX2_FOUND) diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 5e637b46..f1fdfb6c 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -124,8 +124,8 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) -ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so) +ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a) if (WITH_TRT) ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) @@ -151,10 +151,13 @@ endif() ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a) +ADD_LIBRARY(cryptopp STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET cryptopp PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/cryptopp/lib/libcryptopp.a) + LIST(APPEND external_project_dependencies paddle) LIST(APPEND paddle_depend_libs - xxhash) + xxhash cryptopp) if(WITH_LITE) LIST(APPEND paddle_depend_libs paddle_full_api_shared) diff --git a/doc/COMPILE.md b/doc/COMPILE.md index 84254f4f..03d135e0 100644 --- a/doc/COMPILE.md +++ b/doc/COMPILE.md @@ -104,7 +104,7 @@ you can execute `make install` to put targets under directory `./output`, you ne ### CUDNN_LIBRARY && CUDA_CUDART_LIBRARY is the lib path, it should be /usr/local/cuda/lib64/ ``` shell -export CUDA_PATH='/usr/local' +export CUDA_PATH='/usr/local/cuda' export CUDNN_LIBRARY='/usr/local/cuda/lib64/' export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/" @@ -123,7 +123,7 @@ make -j10 ### Integrated TRT version paddle inference library ``` -export CUDA_PATH='/usr/local' +export CUDA_PATH='/usr/local/cuda' export CUDNN_LIBRARY='/usr/local/cuda/lib64/' export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/" diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md index 9691808e..e5024b1a 100644 --- a/doc/COMPILE_CN.md +++ b/doc/COMPILE_CN.md @@ -100,7 +100,7 @@ make -j10 ### CUDA_PATH是cuda的安装路径,可以使用命令行whereis cuda命令确认你的cuda安装路径,通常应该是/usr/local/cuda ### CUDNN_LIBRARY CUDA_CUDART_LIBRARY 是cuda库文件的路径,通常应该是/usr/local/cuda/lib64/ ``` shell -export CUDA_PATH='/usr/local' +export CUDA_PATH='/usr/local/cuda' export CUDNN_LIBRARY='/usr/local/cuda/lib64/' export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/" @@ -119,7 +119,7 @@ make -j10 ### 集成TensorRT版本Paddle Inference Library ``` -export CUDA_PATH='/usr/local' +export CUDA_PATH='/usr/local/cuda' export CUDNN_LIBRARY='/usr/local/cuda/lib64/' export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/" export TENSORRT_LIBRARY_PATH="/usr/local/TensorRT-6.0.1.5/targets/x86_64-linux-gnu/" diff --git a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h index b20a4f4c..10b962fd 100644 --- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -263,6 +263,62 @@ class Parameter { float* _params; }; +class FluidCpuAnalysisEncryptCore : public FluidFamilyCore { + public: + void ReadBinaryFile(const std::string& filename, std::string* contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); + } + + int create(const predictor::InferEngineCreationParams& params) { + std::string data_path = params.get_path(); + if (access(data_path.c_str(), F_OK) == -1) { + LOG(ERROR) << "create paddle predictor failed, path note exits: " + << data_path; + return -1; + } + + std::string model_buffer, params_buffer, key_buffer; + ReadBinaryFile(data_path + "encrypt_model", &model_buffer); + ReadBinaryFile(data_path + "encrypt_params", ¶ms_buffer); + ReadBinaryFile(data_path + "key", &key_buffer); + + VLOG(2) << "prepare for encryption model"; + + auto cipher = paddle::MakeCipher(""); + std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer); + std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer); + + Config analysis_config; + //paddle::AnalysisConfig analysis_config; + analysis_config.SetModelBuffer(&real_model_buffer[0], + real_model_buffer.size(), + &real_params_buffer[0], + real_params_buffer.size()); + analysis_config.DisableGpu(); + analysis_config.SetCpuMathLibraryNumThreads(1); + if (params.enable_memory_optimization()) { + analysis_config.EnableMemoryOptim(); + } + analysis_config.SwitchSpecifyInputNames(true); + AutoLock lock(GlobalPaddleCreateMutex::instance()); + VLOG(2) << "decrypt model file sucess"; + _core = + CreatePredictor(analysis_config); + if (NULL == _core.get()) { + LOG(ERROR) << "create paddle predictor failed, path: " << data_path; + return -1; + } + VLOG(2) << "create paddle predictor sucess, path: " << data_path; + return 0; + } +}; + } // namespace fluid_cpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp index 91cb0bd2..f8cf24ef 100644 --- a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp @@ -30,6 +30,13 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ::baidu::paddle_serving::predictor::InferEngine, "FLUID_CPU_ANALYSIS_DIR"); +#if 1 +REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( + ::baidu::paddle_serving::predictor::FluidInferEngine< + FluidCpuAnalysisEncryptCore>, + ::baidu::paddle_serving::predictor::InferEngine, + "FLUID_CPU_ANALYSIS_ENCRYPT"); +#endif } // namespace fluid_cpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index 3d59a500..7cac57a4 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -283,6 +283,63 @@ class Parameter { float* _params; }; + +class FluidGpuAnalysisEncryptCore : public FluidFamilyCore { + public: + void ReadBinaryFile(const std::string& filename, std::string* contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); + } + + int create(const predictor::InferEngineCreationParams& params) { + std::string data_path = params.get_path(); + if (access(data_path.c_str(), F_OK) == -1) { + LOG(ERROR) << "create paddle predictor failed, path note exits: " + << data_path; + return -1; + } + + std::string model_buffer, params_buffer, key_buffer; + ReadBinaryFile(data_path + "encrypt_model", &model_buffer); + ReadBinaryFile(data_path + "encrypt_params", ¶ms_buffer); + ReadBinaryFile(data_path + "key", &key_buffer); + + VLOG(2) << "prepare for encryption model"; + + auto cipher = paddle::MakeCipher(""); + std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer); + std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer); + + Config analysis_config; + analysis_config.SetModelBuffer(&real_model_buffer[0], + real_model_buffer.size(), + &real_params_buffer[0], + real_params_buffer.size()); + analysis_config.EnableUseGpu(100, FLAGS_gpuid); + analysis_config.SetCpuMathLibraryNumThreads(1); + if (params.enable_memory_optimization()) { + analysis_config.EnableMemoryOptim(); + } + analysis_config.SwitchSpecifyInputNames(true); + AutoLock lock(GlobalPaddleCreateMutex::instance()); + VLOG(2) << "decrypt model file sucess"; + _core = + CreatePredictor(analysis_config); + if (NULL == _core.get()) { + LOG(ERROR) << "create paddle predictor failed, path: " << data_path; + return -1; + } + VLOG(2) << "create paddle predictor sucess, path: " << data_path; + return 0; + } +}; + + } // namespace fluid_gpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp index c00ea871..613b8343 100644 --- a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp @@ -31,6 +31,11 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( FluidGpuAnalysisDirCore>, ::baidu::paddle_serving::predictor::InferEngine, "FLUID_GPU_ANALYSIS_DIR"); +REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( + ::baidu::paddle_serving::predictor::FluidInferEngine< + FluidGpuAnalysisEncryptCore>, + ::baidu::paddle_serving::predictor::InferEngine, + "FLUID_GPU_ANALYSIS_ENCRPT") } // namespace fluid_gpu } // namespace paddle_serving diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md index b18b7204..9ef55749 100644 --- a/python/examples/fit_a_line/README_CN.md +++ b/python/examples/fit_a_line/README_CN.md @@ -14,12 +14,6 @@ sh get_data.sh ### 开启服务端 -``` shell -python test_server.py uci_housing_model/ -``` - -也可以通过下面的一行代码开启默认RPC服务: - ```shell python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 ``` diff --git a/python/examples/fit_a_line/test_server.py b/python/examples/fit_a_line/test_server.py index c3f24066..d055b309 100644 --- a/python/examples/fit_a_line/test_server.py +++ b/python/examples/fit_a_line/test_server.py @@ -31,6 +31,6 @@ class UciService(WebService): uci_service = UciService(name="uci") uci_service.load_model_config("uci_housing_model") -uci_service.prepare_server(workdir="workdir", port=9292) +uci_service.prepare_server(workdir="workdir", port=9393) uci_service.run_rpc_service() uci_service.run_web_service() diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index b2094b3b..047c97d7 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -19,6 +19,9 @@ from .proto import sdk_configure_pb2 as sdk from .proto import general_model_config_pb2 as m_config import google.protobuf.text_format import numpy as np +import requests +import json +import base64 import time import sys @@ -161,6 +164,7 @@ class Client(object): self.fetch_names_to_idx_ = {} self.lod_tensor_set = set() self.feed_tensor_len = {} + self.key = None for i, var in enumerate(model_conf.feed_var): self.feed_names_to_idx_[var.alias_name] = i @@ -193,7 +197,28 @@ class Client(object): else: self.rpc_timeout_ms = rpc_timeout - def connect(self, endpoints=None): + def use_key(self, key_filename): + with open(key_filename, "r") as f: + self.key = f.read() + + def get_serving_port(self, endpoints): + if self.key is not None: + req = json.dumps({"key": base64.b64encode(self.key)}) + else: + req = json.dumps({}) + r = requests.post("http://" + endpoints[0], req) + result = r.json() + print(result) + if "endpoint_list" not in result: + raise ValueError("server not ready") + else: + endpoints = [ + endpoints[0].split(":")[0] + ":" + + str(result["endpoint_list"][0]) + ] + return endpoints + + def connect(self, endpoints=None, encryption=False): # check whether current endpoint is available # init from client config # create predictor here @@ -203,6 +228,8 @@ class Client(object): "You must set the endpoints parameter or use add_variant function to create a variant." ) else: + if encryption: + endpoints = self.get_serving_port(endpoints) if self.predictor_sdk_ is None: self.add_variant('default_tag_{}'.format(id(self)), endpoints, 100) diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py index 48e0c8f2..f18d4b2b 100644 --- a/python/paddle_serving_client/io/__init__.py +++ b/python/paddle_serving_client/io/__init__.py @@ -21,10 +21,14 @@ from paddle.fluid.framework import Program from paddle.fluid import CPUPlace from paddle.fluid.io import save_inference_model import paddle.fluid as fluid +from paddle.fluid.core import CipherUtils +from paddle.fluid.core import CipherFactory +from paddle.fluid.core import Cipher from ..proto import general_model_config_pb2 as model_conf import os import paddle import paddle.nn.functional as F +import errno from paddle.jit import to_static def save_dygraph_model(serving_model_folder, client_config_folder, model): @@ -112,7 +116,10 @@ def save_model(server_model_folder, client_config_folder, feed_var_dict, fetch_var_dict, - main_program=None): + main_program=None, + encryption=False, + key_len=128, + encrypt_conf=None): executor = Executor(place=CPUPlace()) feed_var_names = [feed_var_dict[x].name for x in feed_var_dict] @@ -122,14 +129,31 @@ def save_model(server_model_folder, target_vars.append(fetch_var_dict[key]) target_var_names.append(key) - save_inference_model( - server_model_folder, - feed_var_names, - target_vars, - executor, - model_filename="__model__", - params_filename="__params__", - main_program=main_program) + if not encryption: + save_inference_model( + server_model_folder, + feed_var_names, + target_vars, + executor, + model_filename="__model__", + params_filename="__params__", + main_program=main_program) + else: + if encrypt_conf == None: + aes_cipher = CipherFactory.create_cipher() + else: + #todo: more encryption algorithms + pass + key = CipherUtils.gen_key_to_file(128, "key") + params = fluid.io.save_persistables( + executor=executor, dirname=None, main_program=main_program) + model = main_program.desc.serialize_to_string() + if not os.path.exists(server_model_folder): + os.makedirs(server_model_folder) + os.chdir(server_model_folder) + aes_cipher.encrypt_to_file(params, key, "encrypt_params") + aes_cipher.encrypt_to_file(model, key, "encrypt_model") + os.chdir("..") config = model_conf.GeneralModelConfig() @@ -201,7 +225,10 @@ def inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, - params_filename=None): + params_filename=None, + encryption=False, + key_len=128, + encrypt_conf=None): paddle.enable_static() place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -213,7 +240,7 @@ def inference_model_to_serving(dirname, } fetch_dict = {x.name: x for x in fetch_targets} save_model(serving_server, serving_client, feed_dict, fetch_dict, - inference_program) + inference_program, encryption, key_len, encrypt_conf) feed_names = feed_dict.keys() fetch_names = fetch_dict.keys() return feed_names, fetch_names diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index a46d0f24..ac58ed21 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -157,7 +157,8 @@ class Server(object): self.cur_path = os.getcwd() self.use_local_bin = False self.mkl_flag = False - self.product_name = None + self.encryption_model = False + self.product_name = None self.container_id = None self.model_config_paths = None # for multi-model in a workflow @@ -196,6 +197,8 @@ class Server(object): def set_ir_optimize(self, flag=False): self.ir_optimization = flag + def use_encryption_model(self, flag=False): + self.encryption_model = flag def set_product_name(self, product_name=None): if product_name == None: @@ -236,9 +239,15 @@ class Server(object): suffix = "_DIR" if device == "cpu": - engine.type = "FLUID_CPU_ANALYSIS" + suffix + if self.encryption_model: + engine.type = "FLUID_CPU_ANALYSIS_ENCRYPT" + else: + engine.type = "FLUID_CPU_ANALYSIS" + suffix elif device == "gpu": - engine.type = "FLUID_GPU_ANALYSIS" + suffix + if self.encryption_model: + engine.type = "FLUID_GPU_ANALYSIS_ENCRYPT" + else: + engine.type = "FLUID_GPU_ANALYSIS" + suffix self.model_toolkit_conf.engines.extend([engine]) diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index d282ac07..0fa7984b 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -18,8 +18,14 @@ Usage: python -m paddle_serving_server.serve --model ./serving_server_model --port 9292 """ import argparse -from .web_service import WebService +import sys +import json +import base64 +import time +from multiprocessing import Process +from web_service import WebService, port_is_available from flask import Flask, request +from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer def parse_args(): # pylint: disable=doc-string-missing @@ -53,6 +59,11 @@ def parse_args(): # pylint: disable=doc-string-missing type=int, default=512 * 1024 * 1024, help="Limit sizes of messages") + parser.add_argument( + "--use_encryption_model", + default=False, + action="store_true", + help="Use encryption model") parser.add_argument( "--use_multilang", default=False, @@ -71,17 +82,18 @@ def parse_args(): # pylint: disable=doc-string-missing return parser.parse_args() -def start_standard_model(): # pylint: disable=doc-string-missing +def start_standard_model(serving_port): # pylint: disable=doc-string-missing args = parse_args() thread_num = args.thread model = args.model - port = args.port + port = serving_port workdir = args.workdir device = args.device mem_optim = args.mem_optim_off is False ir_optim = args.ir_optim max_body_size = args.max_body_size use_mkl = args.use_mkl + use_encryption_model = args.use_encryption_model use_multilang = args.use_multilang if model == "": @@ -111,6 +123,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.use_mkl(use_mkl) server.set_max_body_size(max_body_size) server.set_port(port) + server.use_encryption_model(use_encryption_model) if args.product_name != None: server.set_product_name(args.product_name) if args.container_id != None: @@ -120,12 +133,88 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.prepare_server(workdir=workdir, port=port, device=device) server.run_server() +class MainService(BaseHTTPRequestHandler): + def get_available_port(self): + default_port = 12000 + for i in range(1000): + if port_is_available(default_port + i): + return default_port + i + + def start_serving(self): + start_standard_model(serving_port) + + def get_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "w") as f: + f.write(key) + return True + + def check_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "r") as f: + cur_key = f.read() + return (key == cur_key) + + def start(self, post_data): + post_data = json.loads(post_data) + global p_flag + if not p_flag: + if args.use_encryption_model: + print("waiting key for model") + if not self.get_key(post_data): + print("not found key in request") + return False + global serving_port + global p + serving_port = self.get_available_port() + p = Process(target=self.start_serving) + p.start() + time.sleep(3) + if p.is_alive(): + p_flag = True + else: + return False + else: + if p.is_alive(): + if not self.check_key(post_data): + return False + else: + return False + return True + + def do_POST(self): + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + if self.start(post_data): + response = {"endpoint_list": [serving_port]} + else: + response = {"message": "start serving failed"} + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response)) if __name__ == "__main__": args = parse_args() if args.name == "None": - start_standard_model() + if args.use_encryption_model: + p_flag = False + p = None + serving_port = 0 + server = HTTPServer(('localhost', int(args.port)), MainService) + print( + 'Starting encryption server, waiting for key from client, use to stop' + ) + server.serve_forever() + else: + start_standard_model(args.port) else: service = WebService(name=args.name) service.load_model_config(args.model) diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index fbe48180..f1eb8409 100644 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -25,6 +25,16 @@ from paddle_serving_server import pipeline from paddle_serving_server.pipeline import Op +def port_is_available(port): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex(('0.0.0.0', port)) + if result != 0: + return True + else: + return False + + class WebService(object): def __init__(self, name="default_service"): self.name = name @@ -110,7 +120,7 @@ class WebService(object): self.mem_optim = mem_optim self.ir_optim = ir_optim for i in range(1000): - if self.port_is_available(default_port + i): + if port_is_available(default_port + i): self.port_list.append(default_port + i) break diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 44402e73..f951d321 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -70,6 +70,11 @@ def serve_args(): type=int, default=512 * 1024 * 1024, help="Limit sizes of messages") + parser.add_argument( + "--use_encryption_model", + default=False, + action="store_true", + help="Use encryption model") parser.add_argument( "--use_multilang", default=False, @@ -295,7 +300,7 @@ class Server(object): def set_xpu(self): self.use_xpu = True - def _prepare_engine(self, model_config_paths, device): + def _prepare_engine(self, model_config_paths, device, use_encryption_model): if self.model_toolkit_conf == None: self.model_toolkit_conf = server_sdk.ModelToolkitConf() @@ -323,9 +328,15 @@ class Server(object): engine.use_lite = self.use_lite engine.use_xpu = self.use_xpu if device == "cpu": - engine.type = "FLUID_CPU_ANALYSIS" + suffix + if use_encryption_model: + engine.type = "FLUID_CPU_ANALYSIS_ENCRPT" + else: + engine.type = "FLUID_CPU_ANALYSIS"+suffix elif device == "gpu": - engine.type = "FLUID_GPU_ANALYSIS" + suffix + if use_encryption_model: + engine.type = "FLUID_GPU_ANALYSIS_ENCRPT" + else: + engine.type = "FLUID_GPU_ANALYSIS"+suffix elif device == "arm": engine.type = "FLUID_ARM_ANALYSIS" + suffix self.model_toolkit_conf.engines.extend([engine]) @@ -485,6 +496,7 @@ class Server(object): workdir=None, port=9292, device="cpu", + use_encryption_model=False, cube_conf=None): if workdir == None: workdir = "./tmp" @@ -498,7 +510,8 @@ class Server(object): self.set_port(port) self._prepare_resource(workdir, cube_conf) - self._prepare_engine(self.model_config_paths, device) + self._prepare_engine(self.model_config_paths, device, + use_encryption_model) self._prepare_infer_service(port) self.workdir = workdir diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 057a25e4..2bba8a45 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -19,19 +19,22 @@ Usage: """ import argparse import os +import json +import base64 +import time from multiprocessing import Pool, Process from paddle_serving_server_gpu import serve_args from flask import Flask, request +from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-missing +def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing gpuid = int(gpuid) device = "gpu" - port = args.port if gpuid == -1: device = "cpu" elif gpuid >= 0: - port = args.port + index + port = port + index thread_num = args.thread model = args.model mem_optim = args.mem_optim_off is False @@ -83,14 +86,20 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server.set_container_id(args.container_id) server.load_model_config(model) - server.prepare_server(workdir=workdir, port=port, device=device) + server.prepare_server( + workdir=workdir, + port=port, + device=device, + use_encryption_model=args.use_encryption_model) if gpuid >= 0: server.set_gpuid(gpuid) server.run_server() -def start_multi_card(args): # pylint: disable=doc-string-missing +def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing gpus = "" + if serving_port == None: + serving_port = args.port if args.gpu_ids == "": gpus = [] else: @@ -110,14 +119,16 @@ def start_multi_card(args): # pylint: disable=doc-string-missing start_gpu_card_model(-1, -1, args) elif len(gpus) <= 0: print("gpu_ids not set, going to run cpu service.") - start_gpu_card_model(-1, -1, args) + start_gpu_card_model(-1, -1, serving_port, args) else: gpu_processes = [] for i, gpu_id in enumerate(gpus): p = Process( - target=start_gpu_card_model, args=( + target=start_gpu_card_model, + args=( i, gpu_id, + serving_port, args, )) gpu_processes.append(p) for p in gpu_processes: @@ -126,10 +137,89 @@ def start_multi_card(args): # pylint: disable=doc-string-missing p.join() +class MainService(BaseHTTPRequestHandler): + def get_available_port(self): + default_port = 12000 + for i in range(1000): + if port_is_available(default_port + i): + return default_port + i + + def start_serving(self): + start_multi_card(args, serving_port) + + def get_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "w") as f: + f.write(key) + return True + + def check_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "r") as f: + cur_key = f.read() + return (key == cur_key) + + def start(self, post_data): + post_data = json.loads(post_data) + global p_flag + if not p_flag: + if args.use_encryption_model: + print("waiting key for model") + if not self.get_key(post_data): + print("not found key in request") + return False + global serving_port + global p + serving_port = self.get_available_port() + p = Process(target=self.start_serving) + p.start() + time.sleep(3) + if p.is_alive(): + p_flag = True + else: + return False + else: + if p.is_alive(): + if not self.check_key(post_data): + return False + else: + return False + return True + + def do_POST(self): + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + if self.start(post_data): + response = {"endpoint_list": [serving_port]} + else: + response = {"message": "start serving failed"} + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response)) + + if __name__ == "__main__": args = serve_args() if args.name == "None": - start_multi_card(args) + from .web_service import port_is_available + if args.use_encryption_model: + p_flag = False + p = None + serving_port = 0 + server = HTTPServer(('localhost', int(args.port)), MainService) + print( + 'Starting encryption server, waiting for key from client, use to stop' + ) + server.serve_forever() + else: + start_multi_card(args) else: from .web_service import WebService web_service = WebService(name=args.name) @@ -141,8 +231,12 @@ if __name__ == "__main__": if len(gpu_ids) > 0: web_service.set_gpus(gpu_ids) web_service.prepare_server( - workdir=args.workdir, port=args.port, device=args.device, - use_lite=args.use_lite, use_xpu=args.use_xpu, ir_optim=args.ir_optim) + workdir=args.workdir, + port=args.port, + device=args.device, + use_lite=args.use_lite, + use_xpu=args.use_xpu, + ir_optim=args.ir_optim) web_service.run_rpc_service() app_instance = Flask(__name__) diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index e2c24f40..ec1f93ef 100644 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +#!flask/bin/python # pylint: disable=doc-string-missing from flask import Flask, request, abort @@ -28,6 +29,16 @@ from paddle_serving_server_gpu import pipeline from paddle_serving_server_gpu.pipeline import Op +def port_is_available(port): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex(('0.0.0.0', port)) + if result != 0: + return True + else: + return False + + class WebService(object): def __init__(self, name="default_service"): self.name = name @@ -149,7 +160,7 @@ class WebService(object): self.port_list = [] default_port = 12000 for i in range(1000): - if self.port_is_available(default_port + i): + if port_is_available(default_port + i): self.port_list.append(default_port + i) if len(self.port_list) > len(self.gpus): break diff --git a/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel b/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel index d871e4e9..eddd7e8b 100644 --- a/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel +++ b/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel @@ -39,6 +39,8 @@ RUN yum -y install wget && \ make clean && \ echo 'export PATH=/usr/local/python3.6/bin:$PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ + pip install requests && \ + pip3 install requests && \ source /root/.bashrc && \ cd .. && rm -rf Python-3.6.8* && \ wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \ diff --git a/tools/Dockerfile.centos6.devel b/tools/Dockerfile.centos6.devel index add3d924..d0a4559c 100644 --- a/tools/Dockerfile.centos6.devel +++ b/tools/Dockerfile.centos6.devel @@ -49,6 +49,8 @@ RUN yum -y install wget && \ cd .. && rm -rf protobuf-* && \ yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \ yum clean all && \ + pip install requests && \ + pip3 install requests && \ localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \ echo "export LANG=en_US.utf8" >> /root/.bashrc && \ echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc diff --git a/tools/Dockerfile.ci b/tools/Dockerfile.ci index 390d67eb..b3da3aaf 100644 --- a/tools/Dockerfile.ci +++ b/tools/Dockerfile.ci @@ -23,7 +23,8 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel >/dev/null \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 \ && yum -y install bzip2 >/dev/null \ @@ -34,6 +35,9 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 && cd .. \ && rm -rf patchelf-0.10* +RUN yum install -y python3 python3-devel \ + && pip3 install requests + RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \ tar zxf protobuf-all-3.11.2.tar.gz && \ cd protobuf-3.11.2 && \ @@ -41,8 +45,6 @@ RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/p make clean && \ cd .. && rm -rf protobuf-* -RUN yum install -y python3 python3-devel - RUN yum -y update >/dev/null \ && yum -y install dnf >/dev/null \ && yum -y install dnf-plugins-core >/dev/null \ diff --git a/tools/Dockerfile.cuda10.0-cudnn7.devel b/tools/Dockerfile.cuda10.0-cudnn7.devel index c633c593..3215ee7d 100644 --- a/tools/Dockerfile.cuda10.0-cudnn7.devel +++ b/tools/Dockerfile.cuda10.0-cudnn7.devel @@ -30,11 +30,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/Dockerfile.cuda9.0-cudnn7.devel b/tools/Dockerfile.cuda9.0-cudnn7.devel index 0fe6d69b..42b2d7eb 100644 --- a/tools/Dockerfile.cuda9.0-cudnn7.devel +++ b/tools/Dockerfile.cuda9.0-cudnn7.devel @@ -29,11 +29,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel index 83e3b491..a0f1d039 100644 --- a/tools/Dockerfile.devel +++ b/tools/Dockerfile.devel @@ -19,11 +19,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/serving_build.sh b/tools/serving_build.sh index 6bc142c3..5d5abaf6 100644 --- a/tools/serving_build.sh +++ b/tools/serving_build.sh @@ -485,6 +485,42 @@ function python_test_lac() { cd .. } + +function python_test_encryption(){ + #pwd: /Serving/python/examples + cd encryption + sh get_data.sh + local TYPE=$1 + export SERVING_BIN=${SERIVNG_WORKDIR}/build-server-${TYPE}/core/general-server/serving + case $TYPE in + CPU) + #check_cmd "python encrypt.py" + #sleep 5 + check_cmd "python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model > /dev/null &" + sleep 5 + check_cmd "python test_client.py encrypt_client/serving_client_conf.prototxt" + kill_server_process + ;; + GPU) + #check_cmd "python encrypt.py" + #sleep 5 + check_cmd "python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0" + sleep 5 + check_cmd "python test_client.py encrypt_client/serving_client_conf.prototxt" + kill_servere_process + ;; + *) + echo "error type" + exit 1 + ;; + esac + echo "encryption $TYPE test finished as expected" + setproxy + unset SERVING_BIN + cd .. +} + + function java_run_test() { # pwd: /Serving local TYPE=$1 @@ -921,6 +957,7 @@ function python_run_test() { python_test_lac $TYPE # pwd: /Serving/python/examples python_test_multi_process $TYPE # pwd: /Serving/python/examples python_test_multi_fetch $TYPE # pwd: /Serving/python/examples + python_test_encryption $TYPE # pwd: /Serving/python/examples python_test_yolov4 $TYPE # pwd: /Serving/python/examples python_test_grpc_impl $TYPE # pwd: /Serving/python/examples python_test_resnet50 $TYPE # pwd: /Serving/python/examples -- GitLab