diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 5fa4f010f2112bd400b81ba2f616e4ebe963a810..5a06bd712a836617047b0cc947956fc5d2213daa 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -47,6 +47,8 @@ def serve_args(): "--name", type=str, default="None", help="Default service name") parser.add_argument( "--mem_optim", type=bool, default=False, help="Memory optimize") + parser.add_argument( + "--ir_optim", type=bool, default=False, help="Graph optimize") parser.add_argument( "--max_body_size", type=int, @@ -156,6 +158,7 @@ class Server(object): self.model_toolkit_conf = None self.resource_conf = None self.memory_optimization = False + self.ir_optimization = False self.model_conf = None self.workflow_fn = "workflow.prototxt" self.resource_fn = "resource.prototxt" @@ -204,6 +207,9 @@ class Server(object): def set_memory_optimize(self, flag=False): self.memory_optimization = flag + def set_ir_optimize(self, flag=False): + self.ir_optimization = flag + def check_local_bin(self): if "SERVING_BIN" in os.environ: self.use_local_bin = True @@ -240,6 +246,7 @@ class Server(object): engine.enable_batch_align = 0 engine.model_data_path = model_config_path engine.enable_memory_optimization = self.memory_optimization + engine.enable_ir_optimization = self.ir_optimization engine.static_optimization = False engine.force_update_static_cache = False diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 512b5ec0a7d15a030afdcaa5e8daa344b29fb96e..297ff25d2084bead186fa4b9037e5de8282df0fe 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -35,6 +35,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss thread_num = args.thread model = args.model mem_optim = args.mem_optim + ir_optim = args.ir_optim max_body_size = args.max_body_size workdir = "{}_{}".format(args.workdir, gpuid) @@ -57,6 +58,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) + server.set_ir_optimize(ir_optim) server.set_max_body_size(max_body_size) server.load_model_config(model)