提交 2f3f9bb6 编写于 作者: M MRXLT

optimize memory

上级 ab34adee
...@@ -124,7 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -124,7 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `port` | int | `9292` | Exposed port of current service to users| | `port` | int | `9292` | Exposed port of current service to users|
| `name` | str | `""` | Service name, can be used to generate HTTP request url | | `name` | str | `""` | Service name, can be used to generate HTTP request url |
| `model` | str | `""` | Path of paddle model directory to be served | | `model` | str | `""` | Path of paddle model directory to be served |
| `mem_optim` | - | - | Enable memory / graphic memory optimization | | `mem_optim_off` | - | - | Disable memory / graphic memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph | | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL | | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
......
...@@ -120,7 +120,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -120,7 +120,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `port` | int | `9292` | Exposed port of current service to users| | `port` | int | `9292` | Exposed port of current service to users|
| `name` | str | `""` | Service name, can be used to generate HTTP request url | | `name` | str | `""` | Service name, can be used to generate HTTP request url |
| `model` | str | `""` | Path of paddle model directory to be served | | `model` | str | `""` | Path of paddle model directory to be served |
| `mem_optim` | - | - | Enable memory optimization | | `mem_optim_off` | - | - | Disable memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph | | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL | | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
......
...@@ -40,8 +40,8 @@ ExternalProject_Add( ...@@ -40,8 +40,8 @@ ExternalProject_Add(
extern_brpc extern_brpc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
# TODO(gongwb): change to de newst repo when they changed. # TODO(gongwb): change to de newst repo when they changed.
GIT_REPOSITORY "https://github.com/gongweibao/brpc" GIT_REPOSITORY "https://github.com/wangjiawei04/brpc"
GIT_TAG "e9b67ec1b7458f2af5fae76451afe1e27e01b4b4" GIT_TAG "6d79e0b17f25107c35b705ea58d888083f59ff47"
PREFIX ${BRPC_SOURCES_DIR} PREFIX ${BRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
......
...@@ -13,6 +13,3 @@ ...@@ -13,6 +13,3 @@
client.set_rpc_timeout_ms(100000) client.set_rpc_timeout_ms(100000)
client.connect(["127.0.0.1:9393"]) client.connect(["127.0.0.1:9393"])
``` ```
- Q: 执行GPU预测时出现显存不足的问题应该怎么办?
A: 请使用--thread 4参数限制server端的线程数为4,并使用--mem_optim参数开启显存优化选项。
...@@ -40,7 +40,7 @@ def parse_args(): # pylint: disable=doc-string-missing ...@@ -40,7 +40,7 @@ def parse_args(): # pylint: disable=doc-string-missing
parser.add_argument( parser.add_argument(
"--device", type=str, default="cpu", help="Type of device") "--device", type=str, default="cpu", help="Type of device")
parser.add_argument( parser.add_argument(
"--mem_optim", "--mem_optim_off",
default=False, default=False,
action="store_true", action="store_true",
help="Memory optimize") help="Memory optimize")
...@@ -68,7 +68,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing ...@@ -68,7 +68,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing
port = args.port port = args.port
workdir = args.workdir workdir = args.workdir
device = args.device device = args.device
mem_optim = args.mem_optim mem_optim = args.mem_optim_off is False
ir_optim = args.ir_optim ir_optim = args.ir_optim
max_body_size = args.max_body_size max_body_size = args.max_body_size
use_mkl = args.use_mkl use_mkl = args.use_mkl
......
...@@ -41,6 +41,8 @@ class WebService(object): ...@@ -41,6 +41,8 @@ class WebService(object):
server = Server() server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(16) server.set_num_threads(16)
server.set_memory_optimize(self.mem_optim)
server.set_ir_optimize(self.ir_optim)
server.load_model_config(self.model_config) server.load_model_config(self.model_config)
server.prepare_server( server.prepare_server(
workdir=self.workdir, port=self.port_list[0], device=self.device) workdir=self.workdir, port=self.port_list[0], device=self.device)
...@@ -55,12 +57,19 @@ class WebService(object): ...@@ -55,12 +57,19 @@ class WebService(object):
else: else:
return False return False
def prepare_server(self, workdir="", port=9393, device="cpu"): def prepare_server(self,
workdir="",
port=9393,
device="cpu",
mem_optim=True,
ir_optim=False):
self.workdir = workdir self.workdir = workdir
self.port = port self.port = port
self.device = device self.device = device
default_port = 12000 default_port = 12000
self.port_list = [] self.port_list = []
self.mem_optim = mem_optim
self.ir_optim = ir_optim
for i in range(1000): for i in range(1000):
if self.port_is_available(default_port + i): if self.port_is_available(default_port + i):
self.port_list.append(default_port + i) self.port_list.append(default_port + i)
......
...@@ -41,7 +41,7 @@ from concurrent import futures ...@@ -41,7 +41,7 @@ from concurrent import futures
def serve_args(): def serve_args():
parser = argparse.ArgumentParser("serve") parser = argparse.ArgumentParser("serve")
parser.add_argument( parser.add_argument(
"--thread", type=int, default=4, help="Concurrency of server") "--thread", type=int, default=2, help="Concurrency of server")
parser.add_argument( parser.add_argument(
"--model", type=str, default="", help="Model for serving") "--model", type=str, default="", help="Model for serving")
parser.add_argument( parser.add_argument(
...@@ -57,7 +57,7 @@ def serve_args(): ...@@ -57,7 +57,7 @@ def serve_args():
parser.add_argument( parser.add_argument(
"--name", type=str, default="None", help="Default service name") "--name", type=str, default="None", help="Default service name")
parser.add_argument( parser.add_argument(
"--mem_optim", "--mem_optim_off",
default=False, default=False,
action="store_true", action="store_true",
help="Memory optimize") help="Memory optimize")
...@@ -187,7 +187,7 @@ class Server(object): ...@@ -187,7 +187,7 @@ class Server(object):
self.cube_config_fn = "cube.conf" self.cube_config_fn = "cube.conf"
self.workdir = "" self.workdir = ""
self.max_concurrency = 0 self.max_concurrency = 0
self.num_threads = 4 self.num_threads = 2
self.port = 8080 self.port = 8080
self.reload_interval_s = 10 self.reload_interval_s = 10
self.max_body_size = 64 * 1024 * 1024 self.max_body_size = 64 * 1024 * 1024
......
...@@ -34,7 +34,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ...@@ -34,7 +34,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
port = args.port + index port = args.port + index
thread_num = args.thread thread_num = args.thread
model = args.model model = args.model
mem_optim = args.mem_optim mem_optim = args.mem_optim_off is False
ir_optim = args.ir_optim ir_optim = args.ir_optim
max_body_size = args.max_body_size max_body_size = args.max_body_size
use_multilang = args.use_multilang use_multilang = args.use_multilang
......
...@@ -41,7 +41,9 @@ class WebService(object): ...@@ -41,7 +41,9 @@ class WebService(object):
workdir="conf", workdir="conf",
port=9292, port=9292,
gpuid=0, gpuid=0,
thread_num=10): thread_num=2,
mem_optim=True,
ir_optim=False):
device = "gpu" device = "gpu"
if gpuid == -1: if gpuid == -1:
device = "cpu" device = "cpu"
...@@ -58,6 +60,8 @@ class WebService(object): ...@@ -58,6 +60,8 @@ class WebService(object):
server = Server() server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num) server.set_num_threads(thread_num)
server.set_mem_optim(mem_optim)
server.set_ir_optim(ir_optim)
server.load_model_config(self.model_config) server.load_model_config(self.model_config)
if gpuid >= 0: if gpuid >= 0:
...@@ -77,7 +81,13 @@ class WebService(object): ...@@ -77,7 +81,13 @@ class WebService(object):
else: else:
return False return False
def prepare_server(self, workdir="", port=9393, device="gpu", gpuid=0): def prepare_server(self,
workdir="",
port=9393,
device="gpu",
gpuid=0,
mem_optim=True,
ir_optim=False):
self.workdir = workdir self.workdir = workdir
self.port = port self.port = port
self.device = device self.device = device
...@@ -94,7 +104,12 @@ class WebService(object): ...@@ -94,7 +104,12 @@ class WebService(object):
# init cpu service # init cpu service
self.rpc_service_list.append( self.rpc_service_list.append(
self.default_rpc_service( self.default_rpc_service(
self.workdir, self.port_list[0], -1, thread_num=10)) self.workdir,
self.port_list[0],
-1,
thread_num=2,
mem_optim,
ir_optim))
else: else:
for i, gpuid in enumerate(self.gpus): for i, gpuid in enumerate(self.gpus):
self.rpc_service_list.append( self.rpc_service_list.append(
...@@ -102,7 +117,9 @@ class WebService(object): ...@@ -102,7 +117,9 @@ class WebService(object):
"{}_{}".format(self.workdir, i), "{}_{}".format(self.workdir, i),
self.port_list[i], self.port_list[i],
gpuid, gpuid,
thread_num=10)) thread_num=2,
mem_optim,
ir_optim))
def _launch_web_service(self): def _launch_web_service(self):
gpu_num = len(self.gpus) gpu_num = len(self.gpus)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册