diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index fd11fc6dd842d147d29cde212d5b9dce6cc9f7df..a73fcdf0b4006962fc3f5c0261c8cbe46ea14aee 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -54,8 +54,8 @@ class OpMaker(object): def create(self, node_type, engine_name=None, inputs=[], outputs=[]): if node_type not in self.op_dict: - raise Exception("Op type {} is not supported right now".format( - node_type)) + raise Exception( + "Op type {} is not supported right now".format(node_type)) node = server_sdk.DAGNode() # node.name will be used as the infer engine name if engine_name: @@ -103,9 +103,9 @@ class OpSeqMaker(object): elif len(node.dependencies) == 1: if node.dependencies[0].name != self.workflow.nodes[-1].name: raise Exception( - 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'. - format(node.dependencies[0].name, self.workflow.nodes[ - -1].name)) + 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.' + .format(node.dependencies[0].name, + self.workflow.nodes[-1].name)) self.workflow.nodes.extend([node]) def get_op_sequence(self): @@ -308,8 +308,8 @@ class Server(object): self.model_config_paths[node.name] = path print("You have specified multiple model paths, please ensure " "that the input and output of multiple models are the same.") - workflow_oi_config_path = list(self.model_config_paths.items())[0][ - 1] + workflow_oi_config_path = list( + self.model_config_paths.items())[0][1] else: raise Exception("The type of model_config_paths must be str or " "dict({op: model_path}), not {}.".format( @@ -367,8 +367,8 @@ class Server(object): if os.path.exists(tar_name): os.remove(tar_name) raise SystemExit( - 'Download failed, please check your network or permission of {}.'. - format(self.module_path)) + 'Download failed, please check your network or permission of {}.' + .format(self.module_path)) else: try: print('Decompressing files ..') @@ -379,8 +379,8 @@ class Server(object): if os.path.exists(exe_path): os.remove(exe_path) raise SystemExit( - 'Decompressing failed, please check your permission of {} or disk space left.'. - format(self.module_path)) + 'Decompressing failed, please check your permission of {} or disk space left.' + .format(self.module_path)) finally: os.remove(tar_name) #release lock @@ -569,20 +569,20 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. tensor.data = model_result[name].tobytes() else: if v_type == 0: # int64 - tensor.int64_data.extend(model_result[name].reshape(-1) - .tolist()) + tensor.int64_data.extend( + model_result[name].reshape(-1).tolist()) elif v_type == 1: # float32 - tensor.float_data.extend(model_result[name].reshape(-1) - .tolist()) + tensor.float_data.extend( + model_result[name].reshape(-1).tolist()) elif v_type == 2: # int32 - tensor.int_data.extend(model_result[name].reshape(-1) - .tolist()) + tensor.int_data.extend( + model_result[name].reshape(-1).tolist()) else: raise Exception("error type.") tensor.shape.extend(list(model_result[name].shape)) if name in self.lod_tensor_set_: - tensor.lod.extend(model_result["{}.lod".format(name)] - .tolist()) + tensor.lod.extend( + model_result["{}.lod".format(name)].tolist()) inst.tensor_array.append(tensor) model_output.insts.append(inst) model_output.engine_name = model_name @@ -601,11 +601,10 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. def Inference(self, request, context): feed_dict, fetch_names, is_python, log_id = \ self._unpack_inference_request(request) - ret = self.bclient_.predict( - feed=feed_dict, - fetch=fetch_names, - need_variant_tag=True, - log_id=log_id) + ret = self.bclient_.predict(feed=feed_dict, + fetch=fetch_names, + need_variant_tag=True, + log_id=log_id) return self._pack_inference_response(ret, fetch_names, is_python) def GetClientConfig(self, request, context): @@ -685,15 +684,14 @@ class MultiLangServer(object): default_port = 12000 self.port_list_ = [] for i in range(1000): - if default_port + i != port and self._port_is_available(default_port - + i): + if default_port + i != port and self._port_is_available( + default_port + i): self.port_list_.append(default_port + i) break - self.bserver_.prepare_server( - workdir=workdir, - port=self.port_list_[0], - device=device, - cube_conf=cube_conf) + self.bserver_.prepare_server(workdir=workdir, + port=self.port_list_[0], + device=device, + cube_conf=cube_conf) self.set_port(port) def _launch_brpc_service(self, bserver): @@ -706,8 +704,8 @@ class MultiLangServer(object): return result != 0 def run_server(self): - p_bserver = Process( - target=self._launch_brpc_service, args=(self.bserver_, )) + p_bserver = Process(target=self._launch_brpc_service, + args=(self.bserver_, )) p_bserver.start() options = [('grpc.max_send_message_length', self.body_size_), ('grpc.max_receive_message_length', self.body_size_)] diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index d282ac076e377806e9a3b320b880ffed6300b971..6dfcd04e64ae39a0b61e8ec05c10e2c72bde4741 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -24,50 +24,58 @@ from flask import Flask, request def parse_args(): # pylint: disable=doc-string-missing parser = argparse.ArgumentParser("serve") - parser.add_argument( - "--thread", type=int, default=10, help="Concurrency of server") - parser.add_argument( - "--model", type=str, default="", help="Model for serving") - parser.add_argument( - "--port", type=int, default=9292, help="Port the server") - parser.add_argument( - "--name", type=str, default="None", help="Web service name") - parser.add_argument( - "--workdir", - type=str, - default="workdir", - help="Working dir of current service") - parser.add_argument( - "--device", type=str, default="cpu", help="Type of device") - parser.add_argument( - "--mem_optim_off", - default=False, - action="store_true", - help="Memory optimize") - parser.add_argument( - "--ir_optim", default=False, action="store_true", help="Graph optimize") - parser.add_argument( - "--use_mkl", default=False, action="store_true", help="Use MKL") - parser.add_argument( - "--max_body_size", - type=int, - default=512 * 1024 * 1024, - help="Limit sizes of messages") - parser.add_argument( - "--use_multilang", - default=False, - action="store_true", - help="Use Multi-language-service") - parser.add_argument( - "--product_name", - type=str, - default=None, - help="product_name for authentication") - parser.add_argument( - "--container_id", - type=str, - default=None, - help="container_id for authentication") + parser.add_argument("--thread", + type=int, + default=10, + help="Concurrency of server") + parser.add_argument("--model", + type=str, + default="", + help="Model for serving") + parser.add_argument("--port", + type=int, + default=9292, + help="Port the server") + parser.add_argument("--name", + type=str, + default="None", + help="Web service name") + parser.add_argument("--workdir", + type=str, + default="workdir", + help="Working dir of current service") + parser.add_argument("--device", + type=str, + default="cpu", + help="Type of device") + parser.add_argument("--mem_optim_off", + default=False, + action="store_true", + help="Memory optimize") + parser.add_argument("--ir_optim", + default=False, + action="store_true", + help="Graph optimize") + parser.add_argument("--use_mkl", + default=False, + action="store_true", + help="Use MKL") + parser.add_argument("--max_body_size", + type=int, + default=512 * 1024 * 1024, + help="Limit sizes of messages") + parser.add_argument("--use_multilang", + default=False, + action="store_true", + help="Use Multi-language-service") + parser.add_argument("--product_name", + type=str, + default=None, + help="product_name for authentication") + parser.add_argument("--container_id", + type=str, + default=None, + help="container_id for authentication") return parser.parse_args() @@ -129,8 +137,9 @@ if __name__ == "__main__": else: service = WebService(name=args.name) service.load_model_config(args.model) - service.prepare_server( - workdir=args.workdir, port=args.port, device=args.device) + service.prepare_server(workdir=args.workdir, + port=args.port, + device=args.device) service.run_rpc_service() app_instance = Flask(__name__) diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 90208d2547d4dd51df34c6059b246b8b4f315faa..99ad8dc9d8df9008e9dc9775d56f8df121b006cd 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -40,49 +40,55 @@ from concurrent import futures def serve_args(): parser = argparse.ArgumentParser("serve") - parser.add_argument( - "--thread", type=int, default=2, help="Concurrency of server") - parser.add_argument( - "--model", type=str, default="", help="Model for serving") - parser.add_argument( - "--port", type=int, default=9292, help="Port of the starting gpu") - parser.add_argument( - "--workdir", - type=str, - default="workdir", - help="Working dir of current service") - parser.add_argument( - "--device", type=str, default="gpu", help="Type of device") + parser.add_argument("--thread", + type=int, + default=2, + help="Concurrency of server") + parser.add_argument("--model", + type=str, + default="", + help="Model for serving") + parser.add_argument("--port", + type=int, + default=9292, + help="Port of the starting gpu") + parser.add_argument("--workdir", + type=str, + default="workdir", + help="Working dir of current service") + parser.add_argument("--device", + type=str, + default="gpu", + help="Type of device") parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids") - parser.add_argument( - "--name", type=str, default="None", help="Default service name") - parser.add_argument( - "--mem_optim_off", - default=False, - action="store_true", - help="Memory optimize") - parser.add_argument( - "--ir_optim", default=False, action="store_true", help="Graph optimize") - parser.add_argument( - "--max_body_size", - type=int, - default=512 * 1024 * 1024, - help="Limit sizes of messages") - parser.add_argument( - "--use_multilang", - default=False, - action="store_true", - help="Use Multi-language-service") - parser.add_argument( - "--product_name", - type=str, - default=None, - help="product_name for authentication") - parser.add_argument( - "--container_id", - type=str, - default=None, - help="container_id for authentication") + parser.add_argument("--name", + type=str, + default="None", + help="Default service name") + parser.add_argument("--mem_optim_off", + default=False, + action="store_true", + help="Memory optimize") + parser.add_argument("--ir_optim", + default=False, + action="store_true", + help="Graph optimize") + parser.add_argument("--max_body_size", + type=int, + default=512 * 1024 * 1024, + help="Limit sizes of messages") + parser.add_argument("--use_multilang", + default=False, + action="store_true", + help="Use Multi-language-service") + parser.add_argument("--product_name", + type=str, + default=None, + help="product_name for authentication") + parser.add_argument("--container_id", + type=str, + default=None, + help="container_id for authentication") return parser.parse_args() @@ -102,8 +108,8 @@ class OpMaker(object): def create(self, node_type, engine_name=None, inputs=[], outputs=[]): if node_type not in self.op_dict: - raise Exception("Op type {} is not supported right now".format( - node_type)) + raise Exception( + "Op type {} is not supported right now".format(node_type)) node = server_sdk.DAGNode() # node.name will be used as the infer engine name if engine_name: @@ -151,9 +157,9 @@ class OpSeqMaker(object): elif len(node.dependencies) == 1: if node.dependencies[0].name != self.workflow.nodes[-1].name: raise Exception( - 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'. - format(node.dependencies[0].name, self.workflow.nodes[ - -1].name)) + 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.' + .format(node.dependencies[0].name, + self.workflow.nodes[-1].name)) self.workflow.nodes.extend([node]) def get_op_sequence(self): @@ -366,8 +372,8 @@ class Server(object): self.model_config_paths[node.name] = path print("You have specified multiple model paths, please ensure " "that the input and output of multiple models are the same.") - workflow_oi_config_path = list(self.model_config_paths.items())[0][ - 1] + workflow_oi_config_path = list( + self.model_config_paths.items())[0][1] else: raise Exception("The type of model_config_paths must be str or " "dict({op: model_path}), not {}.".format( @@ -419,8 +425,8 @@ class Server(object): if os.path.exists(tar_name): os.remove(tar_name) raise SystemExit( - 'Download failed, please check your network or permission of {}.'. - format(self.module_path)) + 'Download failed, please check your network or permission of {}.' + .format(self.module_path)) else: try: print('Decompressing files ..') @@ -431,8 +437,8 @@ class Server(object): if os.path.exists(exe_path): os.remove(exe_path) raise SystemExit( - 'Decompressing failed, please check your permission of {} or disk space left.'. - format(self.module_path)) + 'Decompressing failed, please check your permission of {} or disk space left.' + .format(self.module_path)) finally: os.remove(tar_name) #release lock @@ -630,20 +636,20 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. tensor.data = model_result[name].tobytes() else: if v_type == 0: # int64 - tensor.int64_data.extend(model_result[name].reshape(-1) - .tolist()) + tensor.int64_data.extend( + model_result[name].reshape(-1).tolist()) elif v_type == 1: # float32 - tensor.float_data.extend(model_result[name].reshape(-1) - .tolist()) + tensor.float_data.extend( + model_result[name].reshape(-1).tolist()) elif v_type == 2: # int32 - tensor.int_data.extend(model_result[name].reshape(-1) - .tolist()) + tensor.int_data.extend( + model_result[name].reshape(-1).tolist()) else: raise Exception("error type.") tensor.shape.extend(list(model_result[name].shape)) if name in self.lod_tensor_set_: - tensor.lod.extend(model_result["{}.lod".format(name)] - .tolist()) + tensor.lod.extend( + model_result["{}.lod".format(name)].tolist()) inst.tensor_array.append(tensor) model_output.insts.append(inst) model_output.engine_name = model_name @@ -662,11 +668,10 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. def Inference(self, request, context): feed_dict, fetch_names, is_python, log_id \ = self._unpack_inference_request(request) - ret = self.bclient_.predict( - feed=feed_dict, - fetch=fetch_names, - need_variant_tag=True, - log_id=log_id) + ret = self.bclient_.predict(feed=feed_dict, + fetch=fetch_names, + need_variant_tag=True, + log_id=log_id) return self._pack_inference_response(ret, fetch_names, is_python) def GetClientConfig(self, request, context): @@ -743,15 +748,14 @@ class MultiLangServer(object): default_port = 12000 self.port_list_ = [] for i in range(1000): - if default_port + i != port and self._port_is_available(default_port - + i): + if default_port + i != port and self._port_is_available( + default_port + i): self.port_list_.append(default_port + i) break - self.bserver_.prepare_server( - workdir=workdir, - port=self.port_list_[0], - device=device, - cube_conf=cube_conf) + self.bserver_.prepare_server(workdir=workdir, + port=self.port_list_[0], + device=device, + cube_conf=cube_conf) self.set_port(port) def _launch_brpc_service(self, bserver): @@ -764,8 +768,8 @@ class MultiLangServer(object): return result != 0 def run_server(self): - p_bserver = Process( - target=self._launch_brpc_service, args=(self.bserver_, )) + p_bserver = Process(target=self._launch_brpc_service, + args=(self.bserver_, )) p_bserver.start() options = [('grpc.max_send_message_length', self.body_size_), ('grpc.max_receive_message_length', self.body_size_)] diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 9388add0306b71822933c5bb2b87aaa678f0f3f6..79d191f73454b2e9b22fc2185ded8b35cdbf4ae8 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -88,8 +88,8 @@ def start_multi_card(args): # pylint: disable=doc-string-missing for ids in gpus: if int(ids) >= len(env_gpus): print( - " Max index of gpu_ids out of range, the number of CUDA_VISIBLE_DEVICES is {}.". - format(len(env_gpus))) + " Max index of gpu_ids out of range, the number of CUDA_VISIBLE_DEVICES is {}." + .format(len(env_gpus))) exit(-1) else: env_gpus = [] @@ -99,11 +99,11 @@ def start_multi_card(args): # pylint: disable=doc-string-missing else: gpu_processes = [] for i, gpu_id in enumerate(gpus): - p = Process( - target=start_gpu_card_model, args=( - i, - gpu_id, - args, )) + p = Process(target=start_gpu_card_model, args=( + i, + gpu_id, + args, + )) gpu_processes.append(p) for p in gpu_processes: p.start() @@ -125,8 +125,9 @@ if __name__ == "__main__": gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"] if len(gpu_ids) > 0: web_service.set_gpus(gpu_ids) - web_service.prepare_server( - workdir=args.workdir, port=args.port, device=args.device) + web_service.prepare_server(workdir=args.workdir, + port=args.port, + device=args.device) web_service.run_rpc_service() app_instance = Flask(__name__)