diff --git a/CMakeLists.txt b/CMakeLists.txt index f05e52ee447e06ba812ce5ac52e238dcebc9bbbc..6228877f582b82e89bd1c73707460a7ce8224b97 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,12 @@ option(APP "Compile Paddle Serving App package" OFF) option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF) option(PACK "Compile for whl" OFF) option(WITH_TRT "Compile Paddle Serving with TRT" OFF) +option(PADDLE_ON_INFERENCE "Compile for encryption" ON) +if (PADDLE_ON_INFERENCE) + add_definitions(-DPADDLE_ON_INFERENCE) + message(STATUS "Use PADDLE_ON_INFERENCE") +endif() set(WITH_MKLML ${WITH_MKL}) if (NOT DEFINED WITH_MKLDNN) if (WITH_MKL AND AVX2_FOUND) diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 5e637b46e7642a8928bd36318363a099069d2c40..f1fdfb6c2db878bcced73afbc78ab62b496df57a 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -124,8 +124,8 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) -ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so) +ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a) if (WITH_TRT) ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) @@ -151,10 +151,13 @@ endif() ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a) +ADD_LIBRARY(cryptopp STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET cryptopp PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/cryptopp/lib/libcryptopp.a) + LIST(APPEND external_project_dependencies paddle) LIST(APPEND paddle_depend_libs - xxhash) + xxhash cryptopp) if(WITH_LITE) LIST(APPEND paddle_depend_libs paddle_full_api_shared) diff --git a/doc/COMPILE.md b/doc/COMPILE.md index 84254f4f46949de8b4b91896eef4e5158155ed48..03d135e006c36e57e52b1d353c79217b53baa5e1 100644 --- a/doc/COMPILE.md +++ b/doc/COMPILE.md @@ -104,7 +104,7 @@ you can execute `make install` to put targets under directory `./output`, you ne ### CUDNN_LIBRARY && CUDA_CUDART_LIBRARY is the lib path, it should be /usr/local/cuda/lib64/ ``` shell -export CUDA_PATH='/usr/local' +export CUDA_PATH='/usr/local/cuda' export CUDNN_LIBRARY='/usr/local/cuda/lib64/' export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/" @@ -123,7 +123,7 @@ make -j10 ### Integrated TRT version paddle inference library ``` -export CUDA_PATH='/usr/local' +export CUDA_PATH='/usr/local/cuda' export CUDNN_LIBRARY='/usr/local/cuda/lib64/' export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/" diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md index 9691808eda61a77808a971cc99648a7212b5747c..e5024b1a11aa871ca404287333ac3ff4ee70e21c 100644 --- a/doc/COMPILE_CN.md +++ b/doc/COMPILE_CN.md @@ -100,7 +100,7 @@ make -j10 ### CUDA_PATH是cuda的安装路径,可以使用命令行whereis cuda命令确认你的cuda安装路径,通常应该是/usr/local/cuda ### CUDNN_LIBRARY CUDA_CUDART_LIBRARY 是cuda库文件的路径,通常应该是/usr/local/cuda/lib64/ ``` shell -export CUDA_PATH='/usr/local' +export CUDA_PATH='/usr/local/cuda' export CUDNN_LIBRARY='/usr/local/cuda/lib64/' export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/" @@ -119,7 +119,7 @@ make -j10 ### 集成TensorRT版本Paddle Inference Library ``` -export CUDA_PATH='/usr/local' +export CUDA_PATH='/usr/local/cuda' export CUDNN_LIBRARY='/usr/local/cuda/lib64/' export CUDA_CUDART_LIBRARY="/usr/local/cuda/lib64/" export TENSORRT_LIBRARY_PATH="/usr/local/TensorRT-6.0.1.5/targets/x86_64-linux-gnu/" diff --git a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h index b20a4f4cf34e2f250788ae84c1b5b681d36cea4f..10b962fde3e366d1c865b0742bf4059bf604c063 100644 --- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -263,6 +263,62 @@ class Parameter { float* _params; }; +class FluidCpuAnalysisEncryptCore : public FluidFamilyCore { + public: + void ReadBinaryFile(const std::string& filename, std::string* contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); + } + + int create(const predictor::InferEngineCreationParams& params) { + std::string data_path = params.get_path(); + if (access(data_path.c_str(), F_OK) == -1) { + LOG(ERROR) << "create paddle predictor failed, path note exits: " + << data_path; + return -1; + } + + std::string model_buffer, params_buffer, key_buffer; + ReadBinaryFile(data_path + "encrypt_model", &model_buffer); + ReadBinaryFile(data_path + "encrypt_params", ¶ms_buffer); + ReadBinaryFile(data_path + "key", &key_buffer); + + VLOG(2) << "prepare for encryption model"; + + auto cipher = paddle::MakeCipher(""); + std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer); + std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer); + + Config analysis_config; + //paddle::AnalysisConfig analysis_config; + analysis_config.SetModelBuffer(&real_model_buffer[0], + real_model_buffer.size(), + &real_params_buffer[0], + real_params_buffer.size()); + analysis_config.DisableGpu(); + analysis_config.SetCpuMathLibraryNumThreads(1); + if (params.enable_memory_optimization()) { + analysis_config.EnableMemoryOptim(); + } + analysis_config.SwitchSpecifyInputNames(true); + AutoLock lock(GlobalPaddleCreateMutex::instance()); + VLOG(2) << "decrypt model file sucess"; + _core = + CreatePredictor(analysis_config); + if (NULL == _core.get()) { + LOG(ERROR) << "create paddle predictor failed, path: " << data_path; + return -1; + } + VLOG(2) << "create paddle predictor sucess, path: " << data_path; + return 0; + } +}; + } // namespace fluid_cpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp index 91cb0bd20c97e53952f95bb05a25582242793f57..f8cf24ef2218705bef71a05194860565961451a1 100644 --- a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp @@ -30,6 +30,13 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ::baidu::paddle_serving::predictor::InferEngine, "FLUID_CPU_ANALYSIS_DIR"); +#if 1 +REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( + ::baidu::paddle_serving::predictor::FluidInferEngine< + FluidCpuAnalysisEncryptCore>, + ::baidu::paddle_serving::predictor::InferEngine, + "FLUID_CPU_ANALYSIS_ENCRYPT"); +#endif } // namespace fluid_cpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index 3d59a5009471ff5c76e037a941a0da87377684ab..7cac57a44ecdb73433f1d9f4860b61c8df85aab2 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -283,6 +283,63 @@ class Parameter { float* _params; }; + +class FluidGpuAnalysisEncryptCore : public FluidFamilyCore { + public: + void ReadBinaryFile(const std::string& filename, std::string* contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); + } + + int create(const predictor::InferEngineCreationParams& params) { + std::string data_path = params.get_path(); + if (access(data_path.c_str(), F_OK) == -1) { + LOG(ERROR) << "create paddle predictor failed, path note exits: " + << data_path; + return -1; + } + + std::string model_buffer, params_buffer, key_buffer; + ReadBinaryFile(data_path + "encrypt_model", &model_buffer); + ReadBinaryFile(data_path + "encrypt_params", ¶ms_buffer); + ReadBinaryFile(data_path + "key", &key_buffer); + + VLOG(2) << "prepare for encryption model"; + + auto cipher = paddle::MakeCipher(""); + std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer); + std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer); + + Config analysis_config; + analysis_config.SetModelBuffer(&real_model_buffer[0], + real_model_buffer.size(), + &real_params_buffer[0], + real_params_buffer.size()); + analysis_config.EnableUseGpu(100, FLAGS_gpuid); + analysis_config.SetCpuMathLibraryNumThreads(1); + if (params.enable_memory_optimization()) { + analysis_config.EnableMemoryOptim(); + } + analysis_config.SwitchSpecifyInputNames(true); + AutoLock lock(GlobalPaddleCreateMutex::instance()); + VLOG(2) << "decrypt model file sucess"; + _core = + CreatePredictor(analysis_config); + if (NULL == _core.get()) { + LOG(ERROR) << "create paddle predictor failed, path: " << data_path; + return -1; + } + VLOG(2) << "create paddle predictor sucess, path: " << data_path; + return 0; + } +}; + + } // namespace fluid_gpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp index c00ea8719414f5ac324ac62e3e36128ad6035f91..613b83432ba5f8fb6c3217f1ba24162bc33b493d 100644 --- a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp @@ -31,6 +31,11 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( FluidGpuAnalysisDirCore>, ::baidu::paddle_serving::predictor::InferEngine, "FLUID_GPU_ANALYSIS_DIR"); +REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( + ::baidu::paddle_serving::predictor::FluidInferEngine< + FluidGpuAnalysisEncryptCore>, + ::baidu::paddle_serving::predictor::InferEngine, + "FLUID_GPU_ANALYSIS_ENCRPT") } // namespace fluid_gpu } // namespace paddle_serving diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md index b18b7204ef2c678ac2811c2bc78df611e0dc538b..9ef55749b123f00b1e0da4627bdad6de5cea0d98 100644 --- a/python/examples/fit_a_line/README_CN.md +++ b/python/examples/fit_a_line/README_CN.md @@ -14,12 +14,6 @@ sh get_data.sh ### 开启服务端 -``` shell -python test_server.py uci_housing_model/ -``` - -也可以通过下面的一行代码开启默认RPC服务: - ```shell python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 ``` diff --git a/python/examples/fit_a_line/test_server.py b/python/examples/fit_a_line/test_server.py index c3f2406640537190ebfc0cae35ecc0297f3aa661..d055b309d7530ccbe928d50e2bcaba23fb1ddaff 100644 --- a/python/examples/fit_a_line/test_server.py +++ b/python/examples/fit_a_line/test_server.py @@ -31,6 +31,6 @@ class UciService(WebService): uci_service = UciService(name="uci") uci_service.load_model_config("uci_housing_model") -uci_service.prepare_server(workdir="workdir", port=9292) +uci_service.prepare_server(workdir="workdir", port=9393) uci_service.run_rpc_service() uci_service.run_web_service() diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index b2094b3b29b9fedfacd01af179841a135c36f9f9..047c97d7d71e5d524183c71c1c1f8fa4852b652d 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -19,6 +19,9 @@ from .proto import sdk_configure_pb2 as sdk from .proto import general_model_config_pb2 as m_config import google.protobuf.text_format import numpy as np +import requests +import json +import base64 import time import sys @@ -161,6 +164,7 @@ class Client(object): self.fetch_names_to_idx_ = {} self.lod_tensor_set = set() self.feed_tensor_len = {} + self.key = None for i, var in enumerate(model_conf.feed_var): self.feed_names_to_idx_[var.alias_name] = i @@ -193,7 +197,28 @@ class Client(object): else: self.rpc_timeout_ms = rpc_timeout - def connect(self, endpoints=None): + def use_key(self, key_filename): + with open(key_filename, "r") as f: + self.key = f.read() + + def get_serving_port(self, endpoints): + if self.key is not None: + req = json.dumps({"key": base64.b64encode(self.key)}) + else: + req = json.dumps({}) + r = requests.post("http://" + endpoints[0], req) + result = r.json() + print(result) + if "endpoint_list" not in result: + raise ValueError("server not ready") + else: + endpoints = [ + endpoints[0].split(":")[0] + ":" + + str(result["endpoint_list"][0]) + ] + return endpoints + + def connect(self, endpoints=None, encryption=False): # check whether current endpoint is available # init from client config # create predictor here @@ -203,6 +228,8 @@ class Client(object): "You must set the endpoints parameter or use add_variant function to create a variant." ) else: + if encryption: + endpoints = self.get_serving_port(endpoints) if self.predictor_sdk_ is None: self.add_variant('default_tag_{}'.format(id(self)), endpoints, 100) diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py index 48e0c8f2535db90e741eec4f8326a0b02b04486b..f18d4b2bdd3a2c8786083ad3743c3710cc8671cd 100644 --- a/python/paddle_serving_client/io/__init__.py +++ b/python/paddle_serving_client/io/__init__.py @@ -21,10 +21,14 @@ from paddle.fluid.framework import Program from paddle.fluid import CPUPlace from paddle.fluid.io import save_inference_model import paddle.fluid as fluid +from paddle.fluid.core import CipherUtils +from paddle.fluid.core import CipherFactory +from paddle.fluid.core import Cipher from ..proto import general_model_config_pb2 as model_conf import os import paddle import paddle.nn.functional as F +import errno from paddle.jit import to_static def save_dygraph_model(serving_model_folder, client_config_folder, model): @@ -112,7 +116,10 @@ def save_model(server_model_folder, client_config_folder, feed_var_dict, fetch_var_dict, - main_program=None): + main_program=None, + encryption=False, + key_len=128, + encrypt_conf=None): executor = Executor(place=CPUPlace()) feed_var_names = [feed_var_dict[x].name for x in feed_var_dict] @@ -122,14 +129,31 @@ def save_model(server_model_folder, target_vars.append(fetch_var_dict[key]) target_var_names.append(key) - save_inference_model( - server_model_folder, - feed_var_names, - target_vars, - executor, - model_filename="__model__", - params_filename="__params__", - main_program=main_program) + if not encryption: + save_inference_model( + server_model_folder, + feed_var_names, + target_vars, + executor, + model_filename="__model__", + params_filename="__params__", + main_program=main_program) + else: + if encrypt_conf == None: + aes_cipher = CipherFactory.create_cipher() + else: + #todo: more encryption algorithms + pass + key = CipherUtils.gen_key_to_file(128, "key") + params = fluid.io.save_persistables( + executor=executor, dirname=None, main_program=main_program) + model = main_program.desc.serialize_to_string() + if not os.path.exists(server_model_folder): + os.makedirs(server_model_folder) + os.chdir(server_model_folder) + aes_cipher.encrypt_to_file(params, key, "encrypt_params") + aes_cipher.encrypt_to_file(model, key, "encrypt_model") + os.chdir("..") config = model_conf.GeneralModelConfig() @@ -201,7 +225,10 @@ def inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, - params_filename=None): + params_filename=None, + encryption=False, + key_len=128, + encrypt_conf=None): paddle.enable_static() place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -213,7 +240,7 @@ def inference_model_to_serving(dirname, } fetch_dict = {x.name: x for x in fetch_targets} save_model(serving_server, serving_client, feed_dict, fetch_dict, - inference_program) + inference_program, encryption, key_len, encrypt_conf) feed_names = feed_dict.keys() fetch_names = fetch_dict.keys() return feed_names, fetch_names diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index a46d0f246cc471b7c98f678b3e87d95e601db774..ac58ed21de8148a05d97faccbab18a7702c3d9f2 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -157,7 +157,8 @@ class Server(object): self.cur_path = os.getcwd() self.use_local_bin = False self.mkl_flag = False - self.product_name = None + self.encryption_model = False + self.product_name = None self.container_id = None self.model_config_paths = None # for multi-model in a workflow @@ -196,6 +197,8 @@ class Server(object): def set_ir_optimize(self, flag=False): self.ir_optimization = flag + def use_encryption_model(self, flag=False): + self.encryption_model = flag def set_product_name(self, product_name=None): if product_name == None: @@ -236,9 +239,15 @@ class Server(object): suffix = "_DIR" if device == "cpu": - engine.type = "FLUID_CPU_ANALYSIS" + suffix + if self.encryption_model: + engine.type = "FLUID_CPU_ANALYSIS_ENCRYPT" + else: + engine.type = "FLUID_CPU_ANALYSIS" + suffix elif device == "gpu": - engine.type = "FLUID_GPU_ANALYSIS" + suffix + if self.encryption_model: + engine.type = "FLUID_GPU_ANALYSIS_ENCRYPT" + else: + engine.type = "FLUID_GPU_ANALYSIS" + suffix self.model_toolkit_conf.engines.extend([engine]) diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index d282ac076e377806e9a3b320b880ffed6300b971..0fa7984b394aa16538e8e2735e2927b89b0d1260 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -18,8 +18,14 @@ Usage: python -m paddle_serving_server.serve --model ./serving_server_model --port 9292 """ import argparse -from .web_service import WebService +import sys +import json +import base64 +import time +from multiprocessing import Process +from web_service import WebService, port_is_available from flask import Flask, request +from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer def parse_args(): # pylint: disable=doc-string-missing @@ -53,6 +59,11 @@ def parse_args(): # pylint: disable=doc-string-missing type=int, default=512 * 1024 * 1024, help="Limit sizes of messages") + parser.add_argument( + "--use_encryption_model", + default=False, + action="store_true", + help="Use encryption model") parser.add_argument( "--use_multilang", default=False, @@ -71,17 +82,18 @@ def parse_args(): # pylint: disable=doc-string-missing return parser.parse_args() -def start_standard_model(): # pylint: disable=doc-string-missing +def start_standard_model(serving_port): # pylint: disable=doc-string-missing args = parse_args() thread_num = args.thread model = args.model - port = args.port + port = serving_port workdir = args.workdir device = args.device mem_optim = args.mem_optim_off is False ir_optim = args.ir_optim max_body_size = args.max_body_size use_mkl = args.use_mkl + use_encryption_model = args.use_encryption_model use_multilang = args.use_multilang if model == "": @@ -111,6 +123,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.use_mkl(use_mkl) server.set_max_body_size(max_body_size) server.set_port(port) + server.use_encryption_model(use_encryption_model) if args.product_name != None: server.set_product_name(args.product_name) if args.container_id != None: @@ -120,12 +133,88 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.prepare_server(workdir=workdir, port=port, device=device) server.run_server() +class MainService(BaseHTTPRequestHandler): + def get_available_port(self): + default_port = 12000 + for i in range(1000): + if port_is_available(default_port + i): + return default_port + i + + def start_serving(self): + start_standard_model(serving_port) + + def get_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "w") as f: + f.write(key) + return True + + def check_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "r") as f: + cur_key = f.read() + return (key == cur_key) + + def start(self, post_data): + post_data = json.loads(post_data) + global p_flag + if not p_flag: + if args.use_encryption_model: + print("waiting key for model") + if not self.get_key(post_data): + print("not found key in request") + return False + global serving_port + global p + serving_port = self.get_available_port() + p = Process(target=self.start_serving) + p.start() + time.sleep(3) + if p.is_alive(): + p_flag = True + else: + return False + else: + if p.is_alive(): + if not self.check_key(post_data): + return False + else: + return False + return True + + def do_POST(self): + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + if self.start(post_data): + response = {"endpoint_list": [serving_port]} + else: + response = {"message": "start serving failed"} + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response)) if __name__ == "__main__": args = parse_args() if args.name == "None": - start_standard_model() + if args.use_encryption_model: + p_flag = False + p = None + serving_port = 0 + server = HTTPServer(('localhost', int(args.port)), MainService) + print( + 'Starting encryption server, waiting for key from client, use to stop' + ) + server.serve_forever() + else: + start_standard_model(args.port) else: service = WebService(name=args.name) service.load_model_config(args.model) diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index fbe48180867faf9f2baba71fc3c5c8cf6ab771e2..f1eb8409a51974ef382a2b893f1fb16a63ef46cc 100644 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -25,6 +25,16 @@ from paddle_serving_server import pipeline from paddle_serving_server.pipeline import Op +def port_is_available(port): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex(('0.0.0.0', port)) + if result != 0: + return True + else: + return False + + class WebService(object): def __init__(self, name="default_service"): self.name = name @@ -110,7 +120,7 @@ class WebService(object): self.mem_optim = mem_optim self.ir_optim = ir_optim for i in range(1000): - if self.port_is_available(default_port + i): + if port_is_available(default_port + i): self.port_list.append(default_port + i) break diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 44402e734f3b9dd22db4ae674cf85e5cff614f8f..f951d321130c30a513e9c07e3cf2db9169931f3f 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -70,6 +70,11 @@ def serve_args(): type=int, default=512 * 1024 * 1024, help="Limit sizes of messages") + parser.add_argument( + "--use_encryption_model", + default=False, + action="store_true", + help="Use encryption model") parser.add_argument( "--use_multilang", default=False, @@ -295,7 +300,7 @@ class Server(object): def set_xpu(self): self.use_xpu = True - def _prepare_engine(self, model_config_paths, device): + def _prepare_engine(self, model_config_paths, device, use_encryption_model): if self.model_toolkit_conf == None: self.model_toolkit_conf = server_sdk.ModelToolkitConf() @@ -323,9 +328,15 @@ class Server(object): engine.use_lite = self.use_lite engine.use_xpu = self.use_xpu if device == "cpu": - engine.type = "FLUID_CPU_ANALYSIS" + suffix + if use_encryption_model: + engine.type = "FLUID_CPU_ANALYSIS_ENCRPT" + else: + engine.type = "FLUID_CPU_ANALYSIS"+suffix elif device == "gpu": - engine.type = "FLUID_GPU_ANALYSIS" + suffix + if use_encryption_model: + engine.type = "FLUID_GPU_ANALYSIS_ENCRPT" + else: + engine.type = "FLUID_GPU_ANALYSIS"+suffix elif device == "arm": engine.type = "FLUID_ARM_ANALYSIS" + suffix self.model_toolkit_conf.engines.extend([engine]) @@ -485,6 +496,7 @@ class Server(object): workdir=None, port=9292, device="cpu", + use_encryption_model=False, cube_conf=None): if workdir == None: workdir = "./tmp" @@ -498,7 +510,8 @@ class Server(object): self.set_port(port) self._prepare_resource(workdir, cube_conf) - self._prepare_engine(self.model_config_paths, device) + self._prepare_engine(self.model_config_paths, device, + use_encryption_model) self._prepare_infer_service(port) self.workdir = workdir diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 057a25e483cd7c160bc7bbef8b9378f9bf08f32c..2bba8a451090f345b34a48ff58fda7d07b7794a7 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -19,19 +19,22 @@ Usage: """ import argparse import os +import json +import base64 +import time from multiprocessing import Pool, Process from paddle_serving_server_gpu import serve_args from flask import Flask, request +from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-missing +def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing gpuid = int(gpuid) device = "gpu" - port = args.port if gpuid == -1: device = "cpu" elif gpuid >= 0: - port = args.port + index + port = port + index thread_num = args.thread model = args.model mem_optim = args.mem_optim_off is False @@ -83,14 +86,20 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server.set_container_id(args.container_id) server.load_model_config(model) - server.prepare_server(workdir=workdir, port=port, device=device) + server.prepare_server( + workdir=workdir, + port=port, + device=device, + use_encryption_model=args.use_encryption_model) if gpuid >= 0: server.set_gpuid(gpuid) server.run_server() -def start_multi_card(args): # pylint: disable=doc-string-missing +def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing gpus = "" + if serving_port == None: + serving_port = args.port if args.gpu_ids == "": gpus = [] else: @@ -110,14 +119,16 @@ def start_multi_card(args): # pylint: disable=doc-string-missing start_gpu_card_model(-1, -1, args) elif len(gpus) <= 0: print("gpu_ids not set, going to run cpu service.") - start_gpu_card_model(-1, -1, args) + start_gpu_card_model(-1, -1, serving_port, args) else: gpu_processes = [] for i, gpu_id in enumerate(gpus): p = Process( - target=start_gpu_card_model, args=( + target=start_gpu_card_model, + args=( i, gpu_id, + serving_port, args, )) gpu_processes.append(p) for p in gpu_processes: @@ -126,10 +137,89 @@ def start_multi_card(args): # pylint: disable=doc-string-missing p.join() +class MainService(BaseHTTPRequestHandler): + def get_available_port(self): + default_port = 12000 + for i in range(1000): + if port_is_available(default_port + i): + return default_port + i + + def start_serving(self): + start_multi_card(args, serving_port) + + def get_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "w") as f: + f.write(key) + return True + + def check_key(self, post_data): + if "key" not in post_data: + return False + else: + key = base64.b64decode(post_data["key"]) + with open(args.model + "/key", "r") as f: + cur_key = f.read() + return (key == cur_key) + + def start(self, post_data): + post_data = json.loads(post_data) + global p_flag + if not p_flag: + if args.use_encryption_model: + print("waiting key for model") + if not self.get_key(post_data): + print("not found key in request") + return False + global serving_port + global p + serving_port = self.get_available_port() + p = Process(target=self.start_serving) + p.start() + time.sleep(3) + if p.is_alive(): + p_flag = True + else: + return False + else: + if p.is_alive(): + if not self.check_key(post_data): + return False + else: + return False + return True + + def do_POST(self): + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + if self.start(post_data): + response = {"endpoint_list": [serving_port]} + else: + response = {"message": "start serving failed"} + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response)) + + if __name__ == "__main__": args = serve_args() if args.name == "None": - start_multi_card(args) + from .web_service import port_is_available + if args.use_encryption_model: + p_flag = False + p = None + serving_port = 0 + server = HTTPServer(('localhost', int(args.port)), MainService) + print( + 'Starting encryption server, waiting for key from client, use to stop' + ) + server.serve_forever() + else: + start_multi_card(args) else: from .web_service import WebService web_service = WebService(name=args.name) @@ -141,8 +231,12 @@ if __name__ == "__main__": if len(gpu_ids) > 0: web_service.set_gpus(gpu_ids) web_service.prepare_server( - workdir=args.workdir, port=args.port, device=args.device, - use_lite=args.use_lite, use_xpu=args.use_xpu, ir_optim=args.ir_optim) + workdir=args.workdir, + port=args.port, + device=args.device, + use_lite=args.use_lite, + use_xpu=args.use_xpu, + ir_optim=args.ir_optim) web_service.run_rpc_service() app_instance = Flask(__name__) diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index e2c24f4068da1a6ccccaa789186cab4e2a8fa6d9..ec1f93efbcc044f7e8348a9feda582e484b20081 100644 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +#!flask/bin/python # pylint: disable=doc-string-missing from flask import Flask, request, abort @@ -28,6 +29,16 @@ from paddle_serving_server_gpu import pipeline from paddle_serving_server_gpu.pipeline import Op +def port_is_available(port): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex(('0.0.0.0', port)) + if result != 0: + return True + else: + return False + + class WebService(object): def __init__(self, name="default_service"): self.name = name @@ -149,7 +160,7 @@ class WebService(object): self.port_list = [] default_port = 12000 for i in range(1000): - if self.port_is_available(default_port + i): + if port_is_available(default_port + i): self.port_list.append(default_port + i) if len(self.port_list) > len(self.gpus): break diff --git a/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel b/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel index d871e4e97f6e0201cb8d533ba9ca8e89664c7a18..eddd7e8b912b4cd2bb19f558413ffec1aea58071 100644 --- a/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel +++ b/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel @@ -39,6 +39,8 @@ RUN yum -y install wget && \ make clean && \ echo 'export PATH=/usr/local/python3.6/bin:$PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ + pip install requests && \ + pip3 install requests && \ source /root/.bashrc && \ cd .. && rm -rf Python-3.6.8* && \ wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \ diff --git a/tools/Dockerfile.centos6.devel b/tools/Dockerfile.centos6.devel index add3d9245ce3763d5f4ab9e8619a80bf058386c3..d0a4559ca29a22a8eb6627d19eb5e2f641ac37ec 100644 --- a/tools/Dockerfile.centos6.devel +++ b/tools/Dockerfile.centos6.devel @@ -49,6 +49,8 @@ RUN yum -y install wget && \ cd .. && rm -rf protobuf-* && \ yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \ yum clean all && \ + pip install requests && \ + pip3 install requests && \ localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \ echo "export LANG=en_US.utf8" >> /root/.bashrc && \ echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc diff --git a/tools/Dockerfile.ci b/tools/Dockerfile.ci index 390d67eb955e1fe8d51faa27c06351f38b2d7462..b3da3aafd041f34436b86323306dc9d4bc82adcf 100644 --- a/tools/Dockerfile.ci +++ b/tools/Dockerfile.ci @@ -23,7 +23,8 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel >/dev/null \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 \ && yum -y install bzip2 >/dev/null \ @@ -34,6 +35,9 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 && cd .. \ && rm -rf patchelf-0.10* +RUN yum install -y python3 python3-devel \ + && pip3 install requests + RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \ tar zxf protobuf-all-3.11.2.tar.gz && \ cd protobuf-3.11.2 && \ @@ -41,8 +45,6 @@ RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/p make clean && \ cd .. && rm -rf protobuf-* -RUN yum install -y python3 python3-devel - RUN yum -y update >/dev/null \ && yum -y install dnf >/dev/null \ && yum -y install dnf-plugins-core >/dev/null \ diff --git a/tools/Dockerfile.cuda10.0-cudnn7.devel b/tools/Dockerfile.cuda10.0-cudnn7.devel index c633c593ca5ad13a14b7ebee5edca3caf9882d9f..3215ee7dee0e64a0d5583a7d2024b413d526b000 100644 --- a/tools/Dockerfile.cuda10.0-cudnn7.devel +++ b/tools/Dockerfile.cuda10.0-cudnn7.devel @@ -30,11 +30,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/Dockerfile.cuda9.0-cudnn7.devel b/tools/Dockerfile.cuda9.0-cudnn7.devel index 0fe6d69b1f39bb8bbea1008ea74a0c30607c6c73..42b2d7eb5c0766c8a97130ec93ea5945607cf32b 100644 --- a/tools/Dockerfile.cuda9.0-cudnn7.devel +++ b/tools/Dockerfile.cuda9.0-cudnn7.devel @@ -29,11 +29,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel index 83e3b491c30fe99eaa615e836efeef6aad0c0cc4..a0f1d03983466b3ca70ce2d4673ad8874e323a08 100644 --- a/tools/Dockerfile.devel +++ b/tools/Dockerfile.devel @@ -19,11 +19,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ - && rm get-pip.py + && rm get-pip.py \ + && pip install requests RUN yum install -y python3 python3-devel \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all + && yum clean all \ + && pip3 install requests RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \ diff --git a/tools/serving_build.sh b/tools/serving_build.sh index 6bc142c36efad60ec26f7dac6200c3127aef8252..5d5abaf64c575d6d3728d1c1fdea281f94e3c2d6 100644 --- a/tools/serving_build.sh +++ b/tools/serving_build.sh @@ -485,6 +485,42 @@ function python_test_lac() { cd .. } + +function python_test_encryption(){ + #pwd: /Serving/python/examples + cd encryption + sh get_data.sh + local TYPE=$1 + export SERVING_BIN=${SERIVNG_WORKDIR}/build-server-${TYPE}/core/general-server/serving + case $TYPE in + CPU) + #check_cmd "python encrypt.py" + #sleep 5 + check_cmd "python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model > /dev/null &" + sleep 5 + check_cmd "python test_client.py encrypt_client/serving_client_conf.prototxt" + kill_server_process + ;; + GPU) + #check_cmd "python encrypt.py" + #sleep 5 + check_cmd "python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0" + sleep 5 + check_cmd "python test_client.py encrypt_client/serving_client_conf.prototxt" + kill_servere_process + ;; + *) + echo "error type" + exit 1 + ;; + esac + echo "encryption $TYPE test finished as expected" + setproxy + unset SERVING_BIN + cd .. +} + + function java_run_test() { # pwd: /Serving local TYPE=$1 @@ -921,6 +957,7 @@ function python_run_test() { python_test_lac $TYPE # pwd: /Serving/python/examples python_test_multi_process $TYPE # pwd: /Serving/python/examples python_test_multi_fetch $TYPE # pwd: /Serving/python/examples + python_test_encryption $TYPE # pwd: /Serving/python/examples python_test_yolov4 $TYPE # pwd: /Serving/python/examples python_test_grpc_impl $TYPE # pwd: /Serving/python/examples python_test_resnet50 $TYPE # pwd: /Serving/python/examples