diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 1a88797b285c0b168e52e54755da3b7ea5bad434..12aab2697e0a7966df91518f891e38489d354c76 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -18,6 +18,7 @@ Usage: python -m paddle_serving_server.serve --model ./serving_server_model --port 9292 """ import argparse +from multiprocessing import Pool, Process def parse_args(): @@ -27,7 +28,7 @@ def parse_args(): parser.add_argument( "--model", type=str, default="", help="Model for serving") parser.add_argument( - "--port", type=int, default=9292, help="Port the server") + "--port", type=int, default=9292, help="Port of the starting gpu") parser.add_argument( "--workdir", type=str, @@ -35,18 +36,21 @@ def parse_args(): help="Working dir of current service") parser.add_argument( "--device", type=str, default="gpu", help="Type of device") - parser.add_argument("--gpuid", type=int, default=0, help="Index of GPU") + parser.add_argument( + "--gpu_ids", type=int, default=0, help="gpu ids") return parser.parse_args() +args = parse_args() -def start_standard_model(): - args = parse_args() +def start_gpu_card_model(gpuid): + device = "gpu" + port = args.port + if gpuid == -1: + device = "cpu" + port = args.port + gpuid thread_num = args.thread model = args.model - port = args.port workdir = args.workdir - device = args.device - gpuid = args.gpuid if model == "": print("You must specify your serving model") @@ -57,7 +61,7 @@ def start_standard_model(): read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') general_response_op = op_maker.create('general_response') - + op_seq_maker = serving.OpSeqMaker() op_seq_maker.add_op(read_op) op_seq_maker.add_op(general_infer_op) @@ -69,9 +73,21 @@ def start_standard_model(): server.load_model_config(model) server.prepare_server(workdir=workdir, port=port, device=device) - server.set_gpuid(gpuid) + if gpuid >= 0: + server.set_gpuid(gpuid) server.run_server() - if __name__ == "__main__": - start_standard_model() + gpus = args.gpu_ids.split(",") + if len(gpus) <= 0: + start_gpu_card_model(-1) + else: + gpu_processes = [] + for i, gpu_id in gpus: + p = Process(target=start_gpu_card_model, (i,)) + gpu_processes.append(p) + for p in gpu_processes: + p.start() + for p in gpu_processes: + p.join() + diff --git a/python/paddle_serving_server_gpu/web_serve.py b/python/paddle_serving_server_gpu/web_serve.py index e7b44034797a8de75ca8dc5d97f7dc93c9671954..9f010f06e7100468de187f31b0dafa2a3e254da6 100644 --- a/python/paddle_serving_server_gpu/web_serve.py +++ b/python/paddle_serving_server_gpu/web_serve.py @@ -37,6 +37,8 @@ def parse_args(): help="Working dir of current service") parser.add_argument( "--device", type=str, default="cpu", help="Type of device") + parser.add_argument( + "--gpu_ids", type=str, default="", help="GPU ids of current service") parser.add_argument( "--name", type=str, default="default", help="Default service name") return parser.parse_args() @@ -48,4 +50,6 @@ if __name__ == "__main__": service.load_model_config(args.model) service.prepare_server( workdir=args.workdir, port=args.port, device=args.device) - service.run_server() + service.run_server(args.gpu_ids) + + diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index 3f129a45853b02711f96953b0b902015d2f2d3e8..b4f80ccd4a59910c88710a827be42da604ec1d80 100755 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -25,7 +25,12 @@ class WebService(object): def load_model_config(self, model_config): self.model_config = model_config - def _launch_rpc_service(self): + def _launch_rpc_service(self, gpuid): + if gpuid < 0: + device = "cpu" + else: + device = "gpu" + op_maker = OpMaker() read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') @@ -36,11 +41,13 @@ class WebService(object): op_seq_maker.add_op(general_response_op) server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) - server.set_num_threads(16) - server.set_gpuid = self.gpuid + server.set_num_threads(10) + if gpuid >= 0: + server.set_gpuid(gpuid) server.load_model_config(self.model_config) server.prepare_server( - workdir=self.workdir, port=self.port + 1, device=self.device) + workdir="{}_{}".format(self.workdir, gpuid), + port=self.port + gpuid + 1, device=device) server.run_server() def prepare_server(self, workdir="", port=9393, device="gpu", gpuid=0): @@ -74,18 +81,27 @@ class WebService(object): threaded=False, processes=1) - def run_server(self): + def run_server(self, gpu_ids): import socket localIP = socket.gethostbyname(socket.gethostname()) print("web service address:") print("http://{}:{}/{}/prediction".format(localIP, self.port, self.name)) - p_rpc = Process(target=self._launch_rpc_service) - p_web = Process(target=self._launch_web_service) - p_rpc.start() - p_web.start() - p_web.join() - p_rpc.join() + + gpus = gpu_ids.split(",") + if len(gpus) <= 0: + self._launch_rpc_service(-1) + else: + gpu_processes = [] + for i, gpu_id in gpus: + p = Process(target=self._launch_rpc_service, (i,)) + gpu_processes.append(p) + for p in gpu_processes: + p.start() + p_web = Process(target=self._launch_web_service) + for p in gpu_processes: + p.join() + p_web.join() def preprocess(self, feed={}, fetch=[]): return feed, fetch