提交 ccfaad36 编写于 作者: F felixhjh

configure dynamic shape tensorrt

上级 27d9e17c
...@@ -37,7 +37,7 @@ op: ...@@ -37,7 +37,7 @@ op:
model_config: ocr_det_model model_config: ocr_det_model
#Fetch结果列表,以client_config中fetch_var的alias_name为准 #Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["concat_1.tmp_0"] fetch_list: ["save_infer_model/scale_0.tmp_1"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
device_type: 0 device_type: 0
...@@ -53,6 +53,9 @@ op: ...@@ -53,6 +53,9 @@ op:
#ir_optim #ir_optim
ir_optim: True ir_optim: True
#开启tensorrt后,进行优化的子图包含的最少节点数
#min_subgraph_size: 13
rec: rec:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发 #并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 3 concurrency: 3
...@@ -73,7 +76,7 @@ op: ...@@ -73,7 +76,7 @@ op:
model_config: ocr_rec_model model_config: ocr_rec_model
#Fetch结果列表,以client_config中fetch_var的alias_name为准 #Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] fetch_list: ["save_infer_model/scale_0.tmp_1"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
device_type: 0 device_type: 0
...@@ -88,3 +91,6 @@ op: ...@@ -88,3 +91,6 @@ op:
#ir_optim #ir_optim
ir_optim: True ir_optim: True
#开启tensorrt后,进行优化的子图包含的最少节点数
#min_subgraph_size: 3
...@@ -40,6 +40,37 @@ class DetOp(Op): ...@@ -40,6 +40,37 @@ class DetOp(Op):
"min_size": 3 "min_size": 3
}) })
def set_dynamic_shape_info(self):
min_input_shape = {
"x": [1, 3, 50, 50],
"conv2d_182.tmp_0": [1, 1, 20, 20],
"nearest_interp_v2_2.tmp_0": [1, 1, 20, 20],
"nearest_interp_v2_3.tmp_0": [1, 1, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 1, 20, 20],
"nearest_interp_v2_5.tmp_0": [1, 1, 20, 20]
}
max_input_shape = {
"x": [1, 3, 1536, 1536],
"conv2d_182.tmp_0": [20, 200, 960, 960],
"nearest_interp_v2_2.tmp_0": [20, 200, 960, 960],
"nearest_interp_v2_3.tmp_0": [20, 200, 960, 960],
"nearest_interp_v2_4.tmp_0": [20, 200, 960, 960],
"nearest_interp_v2_5.tmp_0": [20, 200, 960, 960],
}
opt_input_shape = {
"x": [1, 3, 960, 960],
"conv2d_182.tmp_0": [3, 96, 240, 240],
"nearest_interp_v2_2.tmp_0": [3, 96, 240, 240],
"nearest_interp_v2_3.tmp_0": [3, 24, 240, 240],
"nearest_interp_v2_4.tmp_0": [3, 24, 240, 240],
"nearest_interp_v2_5.tmp_0": [3, 24, 240, 240],
}
self.dynamic_shape_info = {
"min_input_shape": min_input_shape,
"max_input_shape": max_input_shape,
"opt_input_shape": opt_input_shape,
}
def preprocess(self, input_dicts, data_id, log_id): def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
imgs = [] imgs = []
...@@ -52,11 +83,11 @@ class DetOp(Op): ...@@ -52,11 +83,11 @@ class DetOp(Op):
det_img = self.det_preprocess(self.im) det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape _, self.new_h, self.new_w = det_img.shape
imgs.append(det_img[np.newaxis, :].copy()) imgs.append(det_img[np.newaxis, :].copy())
return {"image": np.concatenate(imgs, axis=0)}, False, None, "" return {"x": np.concatenate(imgs, axis=0)}, False, None, ""
def postprocess(self, input_dicts, fetch_dict, data_id, log_id): def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
# print(fetch_dict) # print(fetch_dict)
det_out = fetch_dict["concat_1.tmp_0"] det_out = fetch_dict["save_infer_model/scale_0.tmp_1"]
ratio_list = [ ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
] ]
...@@ -72,6 +103,25 @@ class RecOp(Op): ...@@ -72,6 +103,25 @@ class RecOp(Op):
self.get_rotate_crop_image = GetRotateCropImage() self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes() self.sorted_boxes = SortedBoxes()
def set_dynamic_shape_info(self):
min_input_shape = {
"x": [1, 3, 32, 10],
"lstm_1.tmp_0": [1, 1, 128]
}
max_input_shape = {
"x": [50, 3, 32, 1000],
"lstm_1.tmp_0": [500, 50, 128]
}
opt_input_shape = {
"x": [6, 3, 32, 100],
"lstm_1.tmp_0": [25, 5, 128]
}
self.dynamic_shape_info = {
"min_input_shape": min_input_shape,
"max_input_shape": max_input_shape,
"opt_input_shape": opt_input_shape,
}
def preprocess(self, input_dicts, data_id, log_id): def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
raw_im = input_dict["image"] raw_im = input_dict["image"]
...@@ -143,7 +193,7 @@ class RecOp(Op): ...@@ -143,7 +193,7 @@ class RecOp(Op):
for id, img in enumerate(img_list): for id, img in enumerate(img_list):
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
imgs[id] = norm_img imgs[id] = norm_img
feed = {"image": imgs.copy()} feed = {"x": imgs.copy()}
feed_list.append(feed) feed_list.append(feed)
#_LOGGER.info("feed_list : {}".format(feed_list)) #_LOGGER.info("feed_list : {}".format(feed_list))
......
...@@ -88,7 +88,9 @@ class LocalPredictor(object): ...@@ -88,7 +88,9 @@ class LocalPredictor(object):
mkldnn_op_list=None, mkldnn_op_list=None,
mkldnn_bf16_op_list=None, mkldnn_bf16_op_list=None,
use_feed_fetch_ops=False, use_feed_fetch_ops=False,
use_ascend_cl=False): use_ascend_cl=False,
min_subgraph_size=3,
dynamic_shape_info={}):
""" """
Load model configs and create the paddle predictor by Paddle Inference API. Load model configs and create the paddle predictor by Paddle Inference API.
...@@ -102,6 +104,9 @@ class LocalPredictor(object): ...@@ -102,6 +104,9 @@ class LocalPredictor(object):
ir_optim: open calculation chart optimization, False default. ir_optim: open calculation chart optimization, False default.
use_trt: use nvidia TensorRT optimization, False default use_trt: use nvidia TensorRT optimization, False default
use_lite: use Paddle-Lite Engint, False default use_lite: use Paddle-Lite Engint, False default
ir_optim: open calculation chart optimization, False default.
use_trt: use nvidia TensorRT optimization, False default
use_lite: use Paddle-Lite Engint, False default
use_xpu: run predict on Baidu Kunlun, False default use_xpu: run predict on Baidu Kunlun, False default
precision: precision mode, "fp32" default precision: precision mode, "fp32" default
use_calib: use TensorRT calibration, False default use_calib: use TensorRT calibration, False default
...@@ -211,9 +216,13 @@ class LocalPredictor(object): ...@@ -211,9 +216,13 @@ class LocalPredictor(object):
precision_mode=precision_type, precision_mode=precision_type,
workspace_size=1 << 20, workspace_size=1 << 20,
max_batch_size=32, max_batch_size=32,
min_subgraph_size=3, min_subgraph_size=min_subgraph_size,
use_static=False, use_static=False,
use_calib_mode=False) use_calib_mode=False)
if len(dynamic_shape_info):
config.set_trt_dynamic_shape_info(
dynamic_shape_info['min_input_shape'], dynamic_shape_info['max_input_shape'], dynamic_shape_info['opt_input_shape'])
# set lite # set lite
if use_lite: if use_lite:
config.enable_lite_engine( config.enable_lite_engine(
......
...@@ -50,7 +50,9 @@ class LocalServiceHandler(object): ...@@ -50,7 +50,9 @@ class LocalServiceHandler(object):
use_mkldnn=False, use_mkldnn=False,
mkldnn_cache_capacity=0, mkldnn_cache_capacity=0,
mkldnn_op_list=None, mkldnn_op_list=None,
mkldnn_bf16_op_list=None): mkldnn_bf16_op_list=None,
min_subgraph_size=3,
dynamic_shape_info={}):
""" """
Initialization of localservicehandler Initialization of localservicehandler
...@@ -92,6 +94,8 @@ class LocalServiceHandler(object): ...@@ -92,6 +94,8 @@ class LocalServiceHandler(object):
self._mkldnn_cache_capacity = 0 self._mkldnn_cache_capacity = 0
self._mkldnn_op_list = None self._mkldnn_op_list = None
self._mkldnn_bf16_op_list = None self._mkldnn_bf16_op_list = None
self.min_subgraph_size = 3
self.dynamic_shape_info = {}
if device_type == -1: if device_type == -1:
# device_type is not set, determined by `devices`, # device_type is not set, determined by `devices`,
...@@ -120,6 +124,8 @@ class LocalServiceHandler(object): ...@@ -120,6 +124,8 @@ class LocalServiceHandler(object):
self._use_gpu = True self._use_gpu = True
devices = [int(x) for x in devices.split(",")] devices = [int(x) for x in devices.split(",")]
self._use_trt = True self._use_trt = True
self.min_subgraph_size = min_subgraph_size
self.dynamic_shape_info = dynamic_shape_info
elif device_type == 3: elif device_type == 3:
# ARM CPU # ARM CPU
self._device_name = "arm" self._device_name = "arm"
...@@ -176,14 +182,14 @@ class LocalServiceHandler(object): ...@@ -176,14 +182,14 @@ class LocalServiceHandler(object):
"mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, " "mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, "
"client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, " "client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, "
"mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " "mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
"mkldnn_bf16_op_list:{}, use_ascend_cl:{}".format( "mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{}".format(
model_config, self._device_name, self._use_gpu, self._use_trt, model_config, self._device_name, self._use_gpu, self._use_trt,
self._use_lite, self._use_xpu, device_type, self._devices, self._use_lite, self._use_xpu, device_type, self._devices,
self._mem_optim, self._ir_optim, self._use_profile, self._mem_optim, self._ir_optim, self._use_profile,
self._thread_num, self._client_type, self._fetch_names, self._thread_num, self._client_type, self._fetch_names,
self._precision, self._use_mkldnn, self._mkldnn_cache_capacity, self._precision, self._use_mkldnn, self._mkldnn_cache_capacity,
self._mkldnn_op_list, self._mkldnn_bf16_op_list, self._mkldnn_op_list, self._mkldnn_bf16_op_list,
self._use_ascend_cl)) self._use_ascend_cl, self.min_subgraph_size))
def get_fetch_list(self): def get_fetch_list(self):
return self._fetch_names return self._fetch_names
...@@ -240,7 +246,9 @@ class LocalServiceHandler(object): ...@@ -240,7 +246,9 @@ class LocalServiceHandler(object):
mkldnn_cache_capacity=self._mkldnn_cache_capacity, mkldnn_cache_capacity=self._mkldnn_cache_capacity,
mkldnn_op_list=self._mkldnn_op_list, mkldnn_op_list=self._mkldnn_op_list,
mkldnn_bf16_op_list=self._mkldnn_bf16_op_list, mkldnn_bf16_op_list=self._mkldnn_bf16_op_list,
use_ascend_cl=self._use_ascend_cl) use_ascend_cl=self._use_ascend_cl,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info)
return self._local_predictor_client return self._local_predictor_client
def get_client_config(self): def get_client_config(self):
......
...@@ -116,6 +116,11 @@ class Op(object): ...@@ -116,6 +116,11 @@ class Op(object):
self._for_close_op_lock = threading.Lock() self._for_close_op_lock = threading.Lock()
self._succ_init_op = False self._succ_init_op = False
self._succ_close_op = False self._succ_close_op = False
self.dynamic_shape_info = {}
self.set_dynamic_shape_info()
def set_dynamic_shape_info(self):
pass
# for feed/fetch dict cehck # for feed/fetch dict cehck
@staticmethod @staticmethod
...@@ -182,6 +187,7 @@ class Op(object): ...@@ -182,6 +187,7 @@ class Op(object):
self.mkldnn_cache_capacity = 0 self.mkldnn_cache_capacity = 0
self.mkldnn_op_list = None self.mkldnn_op_list = None
self.mkldnn_bf16_op_list = None self.mkldnn_bf16_op_list = None
self.min_subgraph_size = 3
if self._server_endpoints is None: if self._server_endpoints is None:
server_endpoints = conf.get("server_endpoints", []) server_endpoints = conf.get("server_endpoints", [])
...@@ -212,6 +218,8 @@ class Op(object): ...@@ -212,6 +218,8 @@ class Op(object):
"mkldnn_op_list") "mkldnn_op_list")
self.mkldnn_bf16_op_list = local_service_conf.get( self.mkldnn_bf16_op_list = local_service_conf.get(
"mkldnn_bf16_op_list") "mkldnn_bf16_op_list")
self.min_subgraph_size = local_service_conf.get(
"min_subgraph_size")
if self.model_config is None: if self.model_config is None:
self.with_serving = False self.with_serving = False
...@@ -233,7 +241,9 @@ class Op(object): ...@@ -233,7 +241,9 @@ class Op(object):
mkldnn_cache_capacity=self. mkldnn_cache_capacity=self.
mkldnn_cache_capacity, mkldnn_cache_capacity,
mkldnn_op_list=self.mkldnn_bf16_op_list, mkldnn_op_list=self.mkldnn_bf16_op_list,
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list) mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info)
service_handler.prepare_server() # get fetch_list service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list() serivce_ports = service_handler.get_port_list()
self._server_endpoints = [ self._server_endpoints = [
...@@ -261,7 +271,9 @@ class Op(object): ...@@ -261,7 +271,9 @@ class Op(object):
mkldnn_cache_capacity=self. mkldnn_cache_capacity=self.
mkldnn_cache_capacity, mkldnn_cache_capacity,
mkldnn_op_list=self.mkldnn_op_list, mkldnn_op_list=self.mkldnn_op_list,
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list) mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info)
if self._client_config is None: if self._client_config is None:
self._client_config = service_handler.get_client_config( self._client_config = service_handler.get_client_config(
) )
...@@ -766,7 +778,9 @@ class Op(object): ...@@ -766,7 +778,9 @@ class Op(object):
self.ir_optim, self.precision, self.use_mkldnn, self.ir_optim, self.precision, self.use_mkldnn,
self.mkldnn_cache_capacity, self.mkldnn_op_list, self.mkldnn_cache_capacity, self.mkldnn_op_list,
self.mkldnn_bf16_op_list, self.is_jump_op(), self.mkldnn_bf16_op_list, self.is_jump_op(),
self.get_output_channels_of_jump_ops())) self.get_output_channels_of_jump_ops(),
self.min_subgraph_size,
self.dynamic_shape_info))
p.daemon = True p.daemon = True
p.start() p.start()
process.append(p) process.append(p)
...@@ -803,7 +817,9 @@ class Op(object): ...@@ -803,7 +817,9 @@ class Op(object):
self.ir_optim, self.precision, self.use_mkldnn, self.ir_optim, self.precision, self.use_mkldnn,
self.mkldnn_cache_capacity, self.mkldnn_op_list, self.mkldnn_cache_capacity, self.mkldnn_op_list,
self.mkldnn_bf16_op_list, self.is_jump_op(), self.mkldnn_bf16_op_list, self.is_jump_op(),
self.get_output_channels_of_jump_ops())) self.get_output_channels_of_jump_ops(),
self.min_subgraph_size,
self.dynamic_shape_info))
# When a process exits, it attempts to terminate # When a process exits, it attempts to terminate
# all of its daemonic child processes. # all of its daemonic child processes.
t.daemon = True t.daemon = True
...@@ -1264,7 +1280,7 @@ class Op(object): ...@@ -1264,7 +1280,7 @@ class Op(object):
is_thread_op, trace_buffer, model_config, workdir, thread_num, is_thread_op, trace_buffer, model_config, workdir, thread_num,
device_type, devices, mem_optim, ir_optim, precision, use_mkldnn, device_type, devices, mem_optim, ir_optim, precision, use_mkldnn,
mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list, mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list,
is_jump_op, output_channels_of_jump_ops): is_jump_op, output_channels_of_jump_ops, min_subgraph_size, dynamic_shape_info):
""" """
_run() is the entry function of OP process / thread model.When client _run() is the entry function of OP process / thread model.When client
type is local_predictor in process mode, the CUDA environment needs to type is local_predictor in process mode, the CUDA environment needs to
...@@ -1316,7 +1332,9 @@ class Op(object): ...@@ -1316,7 +1332,9 @@ class Op(object):
use_mkldnn=use_mkldnn, use_mkldnn=use_mkldnn,
mkldnn_cache_capacity=mkldnn_cache_capacity, mkldnn_cache_capacity=mkldnn_cache_capacity,
mkldnn_op_list=mkldnn_op_list, mkldnn_op_list=mkldnn_op_list,
mkldnn_bf16_op_list=mkldnn_bf16_op_list) mkldnn_bf16_op_list=mkldnn_bf16_op_list,
min_subgraph_size=min_subgraph_size,
dynamic_shape_info=dynamic_shape_info)
_LOGGER.info("Init cuda env in process {}".format( _LOGGER.info("Init cuda env in process {}".format(
concurrency_idx)) concurrency_idx))
......
...@@ -260,6 +260,7 @@ class PipelineServer(object): ...@@ -260,6 +260,7 @@ class PipelineServer(object):
"use_calib": False, "use_calib": False,
"use_mkldnn": False, "use_mkldnn": False,
"mkldnn_cache_capacity": 0, "mkldnn_cache_capacity": 0,
"min_subgraph_size": 3,
}, },
} }
for op in self._used_op: for op in self._used_op:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册