diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index 5f922a28f849866fcd08a29b63c70a986d064c68..7ad11e1c2bf0abc4d447311e3081b434cbb25dc9 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -93,7 +93,9 @@ class LocalPredictor(object): use_ascend_cl=False, min_subgraph_size=3, dynamic_shape_info={}, - use_calib=False): + use_calib=False, + collect_shape_range_info="", + tuned_dynamic_shape_info=""): """ Load model configs and create the paddle predictor by Paddle Inference API. @@ -160,12 +162,14 @@ class LocalPredictor(object): "use_trt:{}, use_lite:{}, use_xpu:{}, precision:{}, use_calib:{}, " "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " "mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, " - "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{}". + "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{}," + "collect_shape_range_info:{},tuned_dynamic_shape_info:{}". format(model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim, ir_optim, use_trt, use_lite, use_xpu, precision, use_calib, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list, use_feed_fetch_ops, use_ascend_cl, - min_subgraph_size, dynamic_shape_info)) + min_subgraph_size, dynamic_shape_info, + collect_shape_range_info,tuned_dynamic_shape_info)) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] @@ -213,6 +217,8 @@ class LocalPredictor(object): if mkldnn_op_list is not None: config.set_mkldnn_op(mkldnn_op_list) # set gpu + if collect_shape_range_info != "": + config.collect_shape_range_info(collect_shape_range_info) if not use_gpu: config.disable_gpu() else: @@ -226,6 +232,9 @@ class LocalPredictor(object): use_static=False, use_calib_mode=use_calib) + if tuned_dynamic_shape_info != "": + config.enable_tuned_tensorrt_dynamic_shape(tuned_dynamic_shape_info, True) + @ErrorCatch @ParamChecker def dynamic_shape_info_helper(dynamic_shape_info:lambda dynamic_shape_info: check_dynamic_shape_info(dynamic_shape_info)): @@ -235,7 +244,7 @@ class LocalPredictor(object): print("dynamic_shape_info configure error, it should contain [min_input_shape', 'max_input_shape', 'opt_input_shape' {}".format(resp.err_msg)) kill_stop_process_by_pid("kill", os.getpgid(os.getpid())) - if len(dynamic_shape_info): + if len(dynamic_shape_info) and tuned_dynamic_shape_info == "": config.set_trt_dynamic_shape_info( dynamic_shape_info['min_input_shape'], dynamic_shape_info['max_input_shape'], diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py index 9535281d6a5470c6da942bec9401869f21c21473..70b82095266e474330d8e7efebd0f2ee9656bf61 100755 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -53,7 +53,9 @@ class LocalServiceHandler(object): mkldnn_bf16_op_list=None, min_subgraph_size=3, dynamic_shape_info={}, - use_calib=False): + use_calib=False, + collect_shape_range_info="", + tuned_dynamic_shape_info=""): """ Initialization of localservicehandler @@ -99,6 +101,8 @@ class LocalServiceHandler(object): self.min_subgraph_size = 3 self.dynamic_shape_info = {} self._use_calib = False + self.collect_shape_range_info = "" + self.tuned_dynamic_shape_info = "" if device_type == -1: # device_type is not set, determined by `devices`, @@ -179,6 +183,8 @@ class LocalServiceHandler(object): self._mkldnn_op_list = mkldnn_op_list self._mkldnn_bf16_op_list = mkldnn_bf16_op_list self._use_calib = use_calib + self.collect_shape_range_info = collect_shape_range_info + self.tuned_dynamic_shape_info = tuned_dynamic_shape_info _LOGGER.info( "Models({}) will be launched by device {}. use_gpu:{}, " @@ -187,14 +193,16 @@ class LocalServiceHandler(object): "client_type:{}, fetch_names:{}, precision:{}, use_calib:{}, " "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " "mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{}," - "is_set_dynamic_shape_info:{}".format( + "is_set_dynamic_shape_info:{},collect_shape_range_info:{}," + "tuned_dynamic_shape_info:{}".format( model_config, self._device_name, self._use_gpu, self._use_trt, self._use_lite, self._use_xpu, device_type, self._devices, self. _mem_optim, self._ir_optim, self._use_profile, self._thread_num, self._client_type, self._fetch_names, self._precision, self. _use_calib, self._use_mkldnn, self._mkldnn_cache_capacity, self. _mkldnn_op_list, self._mkldnn_bf16_op_list, self._use_ascend_cl, - self.min_subgraph_size, bool(len(self.dynamic_shape_info)))) + self.min_subgraph_size, bool(len(self.dynamic_shape_info)), + self.collect_shape_range_info, self.tuned_dynamic_shape_info)) def get_fetch_list(self): return self._fetch_names @@ -254,7 +262,9 @@ class LocalServiceHandler(object): use_ascend_cl=self._use_ascend_cl, min_subgraph_size=self.min_subgraph_size, dynamic_shape_info=self.dynamic_shape_info, - use_calib=self._use_calib) + use_calib=self._use_calib, + collect_shape_range_info=self.collect_shape_range_info, + tuned_dynamic_shape_info=self.tuned_dynamic_shape_info) return self._local_predictor_client def get_client_config(self): diff --git a/python/pipeline/operator.py b/python/pipeline/operator.py index 9341a851d0365782ea407d8ac461a30b4530b793..84a72d09cf2fee4be844128b31f7e15943b114bc 100644 --- a/python/pipeline/operator.py +++ b/python/pipeline/operator.py @@ -121,6 +121,8 @@ class Op(object): self._succ_close_op = False self.dynamic_shape_info = {} self.set_dynamic_shape_info() + self.collect_shape_range_info = "" + self.tuned_dynamic_shape_info = "" def set_dynamic_shape_info(self): """ @@ -235,6 +237,14 @@ class Op(object): "mkldnn_bf16_op_list") self.min_subgraph_size = local_service_conf.get( "min_subgraph_size") + self.collect_shape_range_info = local_service_conf.get( + "collect_shape_range_info") + self.tuned_dynamic_shape_info = local_service_conf.get( + "tuned_dynamic_shape_info") + if self.collect_shape_range_info is None: + self.collect_shape_range_info = "" + if self.tuned_dynamic_shape_info is None: + self.tuned_dynamic_shape_info = "" if self.model_config is None: self.with_serving = False @@ -259,7 +269,9 @@ class Op(object): mkldnn_bf16_op_list=self.mkldnn_bf16_op_list, min_subgraph_size=self.min_subgraph_size, dynamic_shape_info=self.dynamic_shape_info, - use_calib=self.use_calib) + use_calib=self.use_calib, + collect_shape_range_info=self.collect_shape_range_info, + tuned_dynamic_shape_info=self.tuned_dynamic_shape_info) service_handler.prepare_server() # get fetch_list serivce_ports = service_handler.get_port_list() self._server_endpoints = [ @@ -290,7 +302,9 @@ class Op(object): mkldnn_bf16_op_list=self.mkldnn_bf16_op_list, min_subgraph_size=self.min_subgraph_size, dynamic_shape_info=self.dynamic_shape_info, - use_calib=self.use_calib) + use_calib=self.use_calib, + collect_shape_range_info=self.collect_shape_range_info, + tuned_dynamic_shape_info=self.tuned_dynamic_shape_info) if self._client_config is None: self._client_config = service_handler.get_client_config( ) @@ -1387,7 +1401,9 @@ class Op(object): mkldnn_bf16_op_list=mkldnn_bf16_op_list, min_subgraph_size=min_subgraph_size, dynamic_shape_info=dynamic_shape_info, - use_calib=use_calib) + use_calib=use_calib, + collect_shape_range_info=self.collect_shape_range_info, + tuned_dynamic_shape_info=self.tuned_dynamic_shape_info) _LOGGER.info("Init cuda env in process {}".format( concurrency_idx)) diff --git a/python/pipeline/pipeline_server.py b/python/pipeline/pipeline_server.py index 3ff765c4725a31c31a0de4effc01303507f1ebbc..17c408609b2066c47f184474c3b8ee8a6115bd86 100644 --- a/python/pipeline/pipeline_server.py +++ b/python/pipeline/pipeline_server.py @@ -261,6 +261,8 @@ class PipelineServer(object): "use_mkldnn": False, "mkldnn_cache_capacity": 0, "min_subgraph_size": 3, + "collect_shape_range_info": "", + "tuned_dynamic_shape_info": "", }, } for op in self._used_op: @@ -422,6 +424,8 @@ class ServerYamlConfChecker(object): "use_mkldnn": False, "mkldnn_cache_capacity": 0, "min_subgraph_size": 3, + "collect_shape_range_info": "", + "tuned_dynamic_shape_info": "", } conf_type = { "model_config": str, @@ -438,6 +442,8 @@ class ServerYamlConfChecker(object): "mkldnn_op_list": list, "mkldnn_bf16_op_list": list, "min_subgraph_size": int, + "collect_shape_range_info": str, + "tuned_dynamic_shape_info": str, } conf_qualification = {"thread_num": (">=", 1), } ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,