diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index 55a4b508b3f9c7a960bd09ff16355de65d2c4705..0270caf91441745ab02d460de8c6396462a3a3fe 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -108,7 +108,6 @@ void PredictorClient::set_predictor_conf(const std::string &conf_path, _predictor_path = conf_path; _predictor_conf = conf_file; } - int PredictorClient::destroy_predictor() { _api.thrd_finalize(); _api.destroy(); @@ -160,6 +159,7 @@ int PredictorClient::batch_predict( VLOG(2) << "fetch general model predictor done."; VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size(); + VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; Request req; for (auto &name : fetch_name) { req.add_fetch_var_names(name); @@ -179,12 +179,16 @@ int PredictorClient::batch_predict( tensor_vec.push_back(inst->add_tensor_array()); } - VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name" + VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name " << "prepared"; int vec_idx = 0; + VLOG(2) << "tensor_vec size " << tensor_vec.size() << " float shape " + << float_shape.size(); for (auto &name : float_feed_name) { int idx = _feed_name_to_idx[name]; Tensor *tensor = tensor_vec[idx]; + VLOG(2) << "prepare float feed " << name << " shape size " + << float_shape[vec_idx].size(); for (int j = 0; j < float_shape[vec_idx].size(); ++j) { tensor->add_shape(float_shape[vec_idx][j]); } @@ -202,6 +206,8 @@ int PredictorClient::batch_predict( for (auto &name : int_feed_name) { int idx = _feed_name_to_idx[name]; Tensor *tensor = tensor_vec[idx]; + VLOG(2) << "prepare int feed " << name << " shape size " + << int_shape[vec_idx].size(); for (int j = 0; j < int_shape[vec_idx].size(); ++j) { tensor->add_shape(int_shape[vec_idx][j]); } @@ -243,8 +249,11 @@ int PredictorClient::batch_predict( postprocess_start = client_infer_end; for (auto &name : fetch_name) { - int idx = _fetch_name_to_idx[name]; + // int idx = _fetch_name_to_idx[name]; + int idx = 0; int shape_size = res.insts(0).tensor_array(idx).shape_size(); + VLOG(2) << "fetch var " << name << " index " << idx << " shape size " + << shape_size; predict_res_batch._shape_map[name].resize(shape_size); for (int i = 0; i < shape_size; ++i) { predict_res_batch._shape_map[name][i] = @@ -258,11 +267,14 @@ int PredictorClient::batch_predict( res.insts(0).tensor_array(idx).lod(i); } } + idx += 1; } for (auto &name : fetch_name) { - int idx = _fetch_name_to_idx[name]; + // int idx = _fetch_name_to_idx[name]; + int idx = 0; if (_fetch_name_to_type[name] == 0) { + VLOG(2) << "ferch var " << name << "type int"; predict_res_batch._int64_value_map[name].resize( res.insts(0).tensor_array(idx).int64_data_size()); int size = res.insts(0).tensor_array(idx).int64_data_size(); @@ -271,6 +283,7 @@ int PredictorClient::batch_predict( res.insts(0).tensor_array(idx).int64_data(i); } } else { + VLOG(2) << "fetch var " << name << "type float"; predict_res_batch._float_value_map[name].resize( res.insts(0).tensor_array(idx).float_data_size()); int size = res.insts(0).tensor_array(idx).float_data_size(); @@ -279,6 +292,7 @@ int PredictorClient::batch_predict( res.insts(0).tensor_array(idx).float_data(i); } } + idx += 1; } postprocess_end = timeline.TimeStampUS(); } diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index c4d4c253983e0efe4dd75486d052e3e46fe489fe..b84cb199001a864ba55de7fe278262c650e98c22 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -64,6 +64,8 @@ int GeneralResponseOp::inference() { std::shared_ptr model_config = resource.get_general_model_config(); + VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; + std::vector fetch_index; fetch_index.resize(req->fetch_var_names_size()); for (int i = 0; i < req->fetch_var_names_size(); ++i) { diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 91a5929916474387338def342e4839f7fd817cb5..98d233f059a1ad0b588bce5bf3ef831d783c3a44 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -111,7 +111,9 @@ class Client(object): self.result_handle_ = PredictorRes() self.client_handle_ = PredictorClient() self.client_handle_.init(path) - read_env_flags = ["profile_client", "profile_server"] + if "FLAGS_max_body_size" not in os.environ: + os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024) + read_env_flags = ["profile_client", "profile_server", "max_body_size"] self.client_handle_.init_gflags([sys.argv[ 0]] + ["--tryfromenv=" + ",".join(read_env_flags)]) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] @@ -223,8 +225,6 @@ class Client(object): for i, feed_i in enumerate(feed_batch): int_slot = [] float_slot = [] - int_shape = [] - float_shape = [] for key in feed_i: if key not in self.feed_names_: raise ValueError("Wrong feed name: {}.".format(key)) diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 08f3febc14ecd0d3c3cde7dc54f17bd143f9d5b3..6da0e6597e69d605b7172139d32c00b77d538386 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -89,6 +89,7 @@ class Server(object): self.num_threads = 4 self.port = 8080 self.reload_interval_s = 10 + self.max_body_size = 64 * 1024 * 1024 self.module_path = os.path.dirname(paddle_serving_server.__file__) self.cur_path = os.getcwd() self.use_local_bin = False @@ -100,6 +101,14 @@ class Server(object): def set_num_threads(self, threads): self.num_threads = threads + def set_max_body_size(self, body_size): + if body_size >= self.max_body_size: + self.max_body_size = body_size + else: + print( + "max_body_size is less than default value, will use default value in service." + ) + def set_port(self, port): self.port = port @@ -292,7 +301,8 @@ class Server(object): "-resource_file {} " \ "-workflow_path {} " \ "-workflow_file {} " \ - "-bthread_concurrency {} ".format( + "-bthread_concurrency {} " \ + "-max_body_size {} ".format( self.bin_path, self.workdir, self.infer_service_fn, @@ -304,7 +314,8 @@ class Server(object): self.resource_fn, self.workdir, self.workflow_fn, - self.num_threads) + self.num_threads, + self.max_body_size) print("Going to Run Command") print(command) os.system(command) diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index b57d2253dbe1f14caff50eb79543f224b8d0ec45..396ed17c2074923003515b26352e87fc8309a252 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -41,6 +41,11 @@ def parse_args(): # pylint: disable=doc-string-missing "--device", type=str, default="cpu", help="Type of device") parser.add_argument( "--mem_optim", type=bool, default=False, help="Memory optimize") + parser.add_argument( + "--max_body_size", + type=int, + default=512 * 1024 * 1024, + help="Limit sizes of messages") return parser.parse_args() @@ -52,6 +57,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing workdir = args.workdir device = args.device mem_optim = args.mem_optim + max_body_size = args.max_body_size if model == "": print("You must specify your serving model") @@ -72,6 +78,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) + server.set_max_body_size(max_body_size) server.load_model_config(model) server.prepare_server(workdir=workdir, port=port, device=device) diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index d4984c39df866dcca45daa45d9fc15feaaba8635..0e5a49c4870956557c99fdf8abf08edf47bc4aa6 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -46,6 +46,11 @@ def serve_args(): "--name", type=str, default="None", help="Default service name") parser.add_argument( "--mem_optim", type=bool, default=False, help="Memory optimize") + parser.add_argument( + "--max_body_size", + type=int, + default=512 * 1024 * 1024, + help="Limit sizes of messages") return parser.parse_args() @@ -114,6 +119,7 @@ class Server(object): self.num_threads = 4 self.port = 8080 self.reload_interval_s = 10 + self.max_body_size = 64 * 1024 * 1024 self.module_path = os.path.dirname(paddle_serving_server.__file__) self.cur_path = os.getcwd() self.check_cuda() @@ -126,6 +132,14 @@ class Server(object): def set_num_threads(self, threads): self.num_threads = threads + def set_max_body_size(self, body_size): + if body_size >= self.max_body_size: + self.max_body_size = body_size + else: + print( + "max_body_size is less than default value, will use default value in service." + ) + def set_port(self, port): self.port = port @@ -324,7 +338,8 @@ class Server(object): "-workflow_path {} " \ "-workflow_file {} " \ "-bthread_concurrency {} " \ - "-gpuid {} ".format( + "-gpuid {} " \ + "-max_body_size {} ".format( self.bin_path, self.workdir, self.infer_service_fn, @@ -337,7 +352,8 @@ class Server(object): self.workdir, self.workflow_fn, self.num_threads, - self.gpuid,) + self.gpuid, + self.max_body_size) print("Going to Run Comand") print(command) diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 916af05ab6c6741b6504ce8f7660f6c7648c50f2..512b5ec0a7d15a030afdcaa5e8daa344b29fb96e 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -35,6 +35,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss thread_num = args.thread model = args.model mem_optim = args.mem_optim + max_body_size = args.max_body_size workdir = "{}_{}".format(args.workdir, gpuid) if model == "": @@ -56,6 +57,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) + server.set_max_body_size(max_body_size) server.load_model_config(model) server.prepare_server(workdir=workdir, port=port, device=device)