diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 2fd35c6d66e4bf282224a8775f1a6bf0d1c6a8c5..3dd330b18921c81cf17601ff7e52d860f0322f95 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -43,6 +43,8 @@ def serve_args(): parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids") parser.add_argument( "--name", type=str, default="None", help="Default service name") + parser.add_argument( + "--mem_optim", type=bool, default=False, help="Memory optimize") return parser.parse_args() diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index d09efbfc8e1512ecb75b063ad760ce66e1a3159e..cb82e02cbec83324a6cb6029208325d8ce38e263 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -33,6 +33,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss port = args.port + index thread_num = args.thread model = args.model + mem_optim = args.mem_optim workdir = "{}_{}".format(args.workdir, gpuid) if model == "": @@ -53,6 +54,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) + server.set_memory_optimize(mem_optim) server.load_model_config(model) server.prepare_server(workdir=workdir, port=port, device=device)