未验证 提交 a3f85163 编写于 作者: D Dong Daxiang 提交者: GitHub

Merge pull request #463 from MRXLT/server-robust

bug fix && change limit size of messages
...@@ -108,7 +108,6 @@ void PredictorClient::set_predictor_conf(const std::string &conf_path, ...@@ -108,7 +108,6 @@ void PredictorClient::set_predictor_conf(const std::string &conf_path,
_predictor_path = conf_path; _predictor_path = conf_path;
_predictor_conf = conf_file; _predictor_conf = conf_file;
} }
int PredictorClient::destroy_predictor() { int PredictorClient::destroy_predictor() {
_api.thrd_finalize(); _api.thrd_finalize();
_api.destroy(); _api.destroy();
...@@ -160,6 +159,7 @@ int PredictorClient::batch_predict( ...@@ -160,6 +159,7 @@ int PredictorClient::batch_predict(
VLOG(2) << "fetch general model predictor done."; VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req; Request req;
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
req.add_fetch_var_names(name); req.add_fetch_var_names(name);
...@@ -179,12 +179,16 @@ int PredictorClient::batch_predict( ...@@ -179,12 +179,16 @@ int PredictorClient::batch_predict(
tensor_vec.push_back(inst->add_tensor_array()); tensor_vec.push_back(inst->add_tensor_array());
} }
VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name" VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
<< "prepared"; << "prepared";
int vec_idx = 0; int vec_idx = 0;
VLOG(2) << "tensor_vec size " << tensor_vec.size() << " float shape "
<< float_shape.size();
for (auto &name : float_feed_name) { for (auto &name : float_feed_name) {
int idx = _feed_name_to_idx[name]; int idx = _feed_name_to_idx[name];
Tensor *tensor = tensor_vec[idx]; Tensor *tensor = tensor_vec[idx];
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape[vec_idx].size();
for (int j = 0; j < float_shape[vec_idx].size(); ++j) { for (int j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]); tensor->add_shape(float_shape[vec_idx][j]);
} }
...@@ -202,6 +206,8 @@ int PredictorClient::batch_predict( ...@@ -202,6 +206,8 @@ int PredictorClient::batch_predict(
for (auto &name : int_feed_name) { for (auto &name : int_feed_name) {
int idx = _feed_name_to_idx[name]; int idx = _feed_name_to_idx[name];
Tensor *tensor = tensor_vec[idx]; Tensor *tensor = tensor_vec[idx];
VLOG(2) << "prepare int feed " << name << " shape size "
<< int_shape[vec_idx].size();
for (int j = 0; j < int_shape[vec_idx].size(); ++j) { for (int j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]); tensor->add_shape(int_shape[vec_idx][j]);
} }
...@@ -243,8 +249,11 @@ int PredictorClient::batch_predict( ...@@ -243,8 +249,11 @@ int PredictorClient::batch_predict(
postprocess_start = client_infer_end; postprocess_start = client_infer_end;
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
int idx = _fetch_name_to_idx[name]; // int idx = _fetch_name_to_idx[name];
int idx = 0;
int shape_size = res.insts(0).tensor_array(idx).shape_size(); int shape_size = res.insts(0).tensor_array(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
predict_res_batch._shape_map[name].resize(shape_size); predict_res_batch._shape_map[name].resize(shape_size);
for (int i = 0; i < shape_size; ++i) { for (int i = 0; i < shape_size; ++i) {
predict_res_batch._shape_map[name][i] = predict_res_batch._shape_map[name][i] =
...@@ -258,11 +267,14 @@ int PredictorClient::batch_predict( ...@@ -258,11 +267,14 @@ int PredictorClient::batch_predict(
res.insts(0).tensor_array(idx).lod(i); res.insts(0).tensor_array(idx).lod(i);
} }
} }
idx += 1;
} }
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
int idx = _fetch_name_to_idx[name]; // int idx = _fetch_name_to_idx[name];
int idx = 0;
if (_fetch_name_to_type[name] == 0) { if (_fetch_name_to_type[name] == 0) {
VLOG(2) << "ferch var " << name << "type int";
predict_res_batch._int64_value_map[name].resize( predict_res_batch._int64_value_map[name].resize(
res.insts(0).tensor_array(idx).int64_data_size()); res.insts(0).tensor_array(idx).int64_data_size());
int size = res.insts(0).tensor_array(idx).int64_data_size(); int size = res.insts(0).tensor_array(idx).int64_data_size();
...@@ -271,6 +283,7 @@ int PredictorClient::batch_predict( ...@@ -271,6 +283,7 @@ int PredictorClient::batch_predict(
res.insts(0).tensor_array(idx).int64_data(i); res.insts(0).tensor_array(idx).int64_data(i);
} }
} else { } else {
VLOG(2) << "fetch var " << name << "type float";
predict_res_batch._float_value_map[name].resize( predict_res_batch._float_value_map[name].resize(
res.insts(0).tensor_array(idx).float_data_size()); res.insts(0).tensor_array(idx).float_data_size());
int size = res.insts(0).tensor_array(idx).float_data_size(); int size = res.insts(0).tensor_array(idx).float_data_size();
...@@ -279,6 +292,7 @@ int PredictorClient::batch_predict( ...@@ -279,6 +292,7 @@ int PredictorClient::batch_predict(
res.insts(0).tensor_array(idx).float_data(i); res.insts(0).tensor_array(idx).float_data(i);
} }
} }
idx += 1;
} }
postprocess_end = timeline.TimeStampUS(); postprocess_end = timeline.TimeStampUS();
} }
......
...@@ -64,6 +64,8 @@ int GeneralResponseOp::inference() { ...@@ -64,6 +64,8 @@ int GeneralResponseOp::inference() {
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
std::vector<int> fetch_index; std::vector<int> fetch_index;
fetch_index.resize(req->fetch_var_names_size()); fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) { for (int i = 0; i < req->fetch_var_names_size(); ++i) {
......
...@@ -111,7 +111,9 @@ class Client(object): ...@@ -111,7 +111,9 @@ class Client(object):
self.result_handle_ = PredictorRes() self.result_handle_ = PredictorRes()
self.client_handle_ = PredictorClient() self.client_handle_ = PredictorClient()
self.client_handle_.init(path) self.client_handle_.init(path)
read_env_flags = ["profile_client", "profile_server"] if "FLAGS_max_body_size" not in os.environ:
os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024)
read_env_flags = ["profile_client", "profile_server", "max_body_size"]
self.client_handle_.init_gflags([sys.argv[ self.client_handle_.init_gflags([sys.argv[
0]] + ["--tryfromenv=" + ",".join(read_env_flags)]) 0]] + ["--tryfromenv=" + ",".join(read_env_flags)])
self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
...@@ -223,8 +225,6 @@ class Client(object): ...@@ -223,8 +225,6 @@ class Client(object):
for i, feed_i in enumerate(feed_batch): for i, feed_i in enumerate(feed_batch):
int_slot = [] int_slot = []
float_slot = [] float_slot = []
int_shape = []
float_shape = []
for key in feed_i: for key in feed_i:
if key not in self.feed_names_: if key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key)) raise ValueError("Wrong feed name: {}.".format(key))
......
...@@ -89,6 +89,7 @@ class Server(object): ...@@ -89,6 +89,7 @@ class Server(object):
self.num_threads = 4 self.num_threads = 4
self.port = 8080 self.port = 8080
self.reload_interval_s = 10 self.reload_interval_s = 10
self.max_body_size = 64 * 1024 * 1024
self.module_path = os.path.dirname(paddle_serving_server.__file__) self.module_path = os.path.dirname(paddle_serving_server.__file__)
self.cur_path = os.getcwd() self.cur_path = os.getcwd()
self.use_local_bin = False self.use_local_bin = False
...@@ -100,6 +101,14 @@ class Server(object): ...@@ -100,6 +101,14 @@ class Server(object):
def set_num_threads(self, threads): def set_num_threads(self, threads):
self.num_threads = threads self.num_threads = threads
def set_max_body_size(self, body_size):
if body_size >= self.max_body_size:
self.max_body_size = body_size
else:
print(
"max_body_size is less than default value, will use default value in service."
)
def set_port(self, port): def set_port(self, port):
self.port = port self.port = port
...@@ -292,7 +301,8 @@ class Server(object): ...@@ -292,7 +301,8 @@ class Server(object):
"-resource_file {} " \ "-resource_file {} " \
"-workflow_path {} " \ "-workflow_path {} " \
"-workflow_file {} " \ "-workflow_file {} " \
"-bthread_concurrency {} ".format( "-bthread_concurrency {} " \
"-max_body_size {} ".format(
self.bin_path, self.bin_path,
self.workdir, self.workdir,
self.infer_service_fn, self.infer_service_fn,
...@@ -304,7 +314,8 @@ class Server(object): ...@@ -304,7 +314,8 @@ class Server(object):
self.resource_fn, self.resource_fn,
self.workdir, self.workdir,
self.workflow_fn, self.workflow_fn,
self.num_threads) self.num_threads,
self.max_body_size)
print("Going to Run Command") print("Going to Run Command")
print(command) print(command)
os.system(command) os.system(command)
...@@ -41,6 +41,11 @@ def parse_args(): # pylint: disable=doc-string-missing ...@@ -41,6 +41,11 @@ def parse_args(): # pylint: disable=doc-string-missing
"--device", type=str, default="cpu", help="Type of device") "--device", type=str, default="cpu", help="Type of device")
parser.add_argument( parser.add_argument(
"--mem_optim", type=bool, default=False, help="Memory optimize") "--mem_optim", type=bool, default=False, help="Memory optimize")
parser.add_argument(
"--max_body_size",
type=int,
default=512 * 1024 * 1024,
help="Limit sizes of messages")
return parser.parse_args() return parser.parse_args()
...@@ -52,6 +57,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing ...@@ -52,6 +57,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing
workdir = args.workdir workdir = args.workdir
device = args.device device = args.device
mem_optim = args.mem_optim mem_optim = args.mem_optim
max_body_size = args.max_body_size
if model == "": if model == "":
print("You must specify your serving model") print("You must specify your serving model")
...@@ -72,6 +78,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing ...@@ -72,6 +78,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num) server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim) server.set_memory_optimize(mem_optim)
server.set_max_body_size(max_body_size)
server.load_model_config(model) server.load_model_config(model)
server.prepare_server(workdir=workdir, port=port, device=device) server.prepare_server(workdir=workdir, port=port, device=device)
......
...@@ -46,6 +46,11 @@ def serve_args(): ...@@ -46,6 +46,11 @@ def serve_args():
"--name", type=str, default="None", help="Default service name") "--name", type=str, default="None", help="Default service name")
parser.add_argument( parser.add_argument(
"--mem_optim", type=bool, default=False, help="Memory optimize") "--mem_optim", type=bool, default=False, help="Memory optimize")
parser.add_argument(
"--max_body_size",
type=int,
default=512 * 1024 * 1024,
help="Limit sizes of messages")
return parser.parse_args() return parser.parse_args()
...@@ -114,6 +119,7 @@ class Server(object): ...@@ -114,6 +119,7 @@ class Server(object):
self.num_threads = 4 self.num_threads = 4
self.port = 8080 self.port = 8080
self.reload_interval_s = 10 self.reload_interval_s = 10
self.max_body_size = 64 * 1024 * 1024
self.module_path = os.path.dirname(paddle_serving_server.__file__) self.module_path = os.path.dirname(paddle_serving_server.__file__)
self.cur_path = os.getcwd() self.cur_path = os.getcwd()
self.check_cuda() self.check_cuda()
...@@ -126,6 +132,14 @@ class Server(object): ...@@ -126,6 +132,14 @@ class Server(object):
def set_num_threads(self, threads): def set_num_threads(self, threads):
self.num_threads = threads self.num_threads = threads
def set_max_body_size(self, body_size):
if body_size >= self.max_body_size:
self.max_body_size = body_size
else:
print(
"max_body_size is less than default value, will use default value in service."
)
def set_port(self, port): def set_port(self, port):
self.port = port self.port = port
...@@ -324,7 +338,8 @@ class Server(object): ...@@ -324,7 +338,8 @@ class Server(object):
"-workflow_path {} " \ "-workflow_path {} " \
"-workflow_file {} " \ "-workflow_file {} " \
"-bthread_concurrency {} " \ "-bthread_concurrency {} " \
"-gpuid {} ".format( "-gpuid {} " \
"-max_body_size {} ".format(
self.bin_path, self.bin_path,
self.workdir, self.workdir,
self.infer_service_fn, self.infer_service_fn,
...@@ -337,7 +352,8 @@ class Server(object): ...@@ -337,7 +352,8 @@ class Server(object):
self.workdir, self.workdir,
self.workflow_fn, self.workflow_fn,
self.num_threads, self.num_threads,
self.gpuid,) self.gpuid,
self.max_body_size)
print("Going to Run Comand") print("Going to Run Comand")
print(command) print(command)
......
...@@ -35,6 +35,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ...@@ -35,6 +35,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
thread_num = args.thread thread_num = args.thread
model = args.model model = args.model
mem_optim = args.mem_optim mem_optim = args.mem_optim
max_body_size = args.max_body_size
workdir = "{}_{}".format(args.workdir, gpuid) workdir = "{}_{}".format(args.workdir, gpuid)
if model == "": if model == "":
...@@ -56,6 +57,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ...@@ -56,6 +57,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num) server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim) server.set_memory_optimize(mem_optim)
server.set_max_body_size(max_body_size)
server.load_model_config(model) server.load_model_config(model)
server.prepare_server(workdir=workdir, port=port, device=device) server.prepare_server(workdir=workdir, port=port, device=device)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册