diff --git a/README.md b/README.md index 02187085126e57a98647aedb2e50c2446995a6a5..f20012d0b9c47965e50be0bde6158ac8912419d7 100644 --- a/README.md +++ b/README.md @@ -254,7 +254,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md) ### FAQ -- [FAQ(Chinese)](doc/FAQ.md) +- [FAQ(Chinese)](doc/deprecated/FAQ.md) ### Design diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 279e3a895e975473fc5569c4716368c3dda1d9f1..088e3928f4409eaac4d42d771a72ecc9d13fdbce 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -38,6 +38,8 @@ def parse_args(): # pylint: disable=doc-string-missing help="Working dir of current service") parser.add_argument( "--device", type=str, default="cpu", help="Type of device") + parser.add_argument( + "--mem_optim", type=bool, default=False, help="Memory optimize") return parser.parse_args() @@ -48,6 +50,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing port = args.port workdir = args.workdir device = args.device + mem_optim = args.mem_optim if model == "": print("You must specify your serving model") @@ -67,6 +70,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) + server.set_memory_optimize(mem_optim) server.load_model_config(model) server.prepare_server(workdir=workdir, port=port, device=device) diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 2fd35c6d66e4bf282224a8775f1a6bf0d1c6a8c5..3dd330b18921c81cf17601ff7e52d860f0322f95 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -43,6 +43,8 @@ def serve_args(): parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids") parser.add_argument( "--name", type=str, default="None", help="Default service name") + parser.add_argument( + "--mem_optim", type=bool, default=False, help="Memory optimize") return parser.parse_args() diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index d09efbfc8e1512ecb75b063ad760ce66e1a3159e..cb82e02cbec83324a6cb6029208325d8ce38e263 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -33,6 +33,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss port = args.port + index thread_num = args.thread model = args.model + mem_optim = args.mem_optim workdir = "{}_{}".format(args.workdir, gpuid) if model == "": @@ -53,6 +54,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) + server.set_memory_optimize(mem_optim) server.load_model_config(model) server.prepare_server(workdir=workdir, port=port, device=device)