diff --git a/doc/FAQ.md b/doc/FAQ.md index 3bdd2dfd4739b54bf39b6b3f561c43bab3edabde..bb567b78dbe0bfa08ac199013a5654458be2101f 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -12,4 +12,7 @@ client.load_client_config(sys.argv[1]) client.set_rpc_timeout_ms(100000) client.connect(["127.0.0.1:9393"]) - ``` + ``` + +- Q: 执行GPU预测时出现显存不足的问题应该怎么办? + A: 请使用--thread 4参数限制server端的线程数为4,并使用--mem_optim参数开启显存优化选项。 diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 0261003a7863d11fb342d1572b124d1cbb533a2b..ee926221321de1e573d6e0131064b005b54e69cb 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -41,7 +41,7 @@ from concurrent import futures def serve_args(): parser = argparse.ArgumentParser("serve") parser.add_argument( - "--thread", type=int, default=10, help="Concurrency of server") + "--thread", type=int, default=4, help="Concurrency of server") parser.add_argument( "--model", type=str, default="", help="Model for serving") parser.add_argument(