未验证 提交 9a0f2887 编写于 作者: W wangguanzhong 提交者: GitHub

Fix timer in deploy (#4817)

* fix timer in deploy

* fix mot_keypoint deploy
上级 8ad63b1a
...@@ -121,32 +121,32 @@ class JDE_Detector(Detector): ...@@ -121,32 +121,32 @@ class JDE_Detector(Detector):
online_scores[cls_id].append(tscore) online_scores[cls_id].append(tscore)
return online_tlwhs, online_scores, online_ids return online_tlwhs, online_scores, online_ids
def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True):
''' '''
Args: Args:
image_list (list[str]): path of images, only support one image path image_list (list[str]): path of images, only support one image path
(batch_size=1) in tracking model (batch_size=1) in tracking model
threshold (float): threshold of predicted box' score threshold (float): threshold of predicted box' score
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
online_tlwhs, online_scores, online_ids (dict[np.array]) online_tlwhs, online_scores, online_ids (dict[np.array])
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_list) inputs = self.preprocess(image_list)
self.det_times.preprocess_time_s.end()
pred_dets, pred_embs = None, None pred_dets, pred_embs = None, None
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
if add_timer:
for i in range(warmup): self.det_times.preprocess_time_s.end()
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
pred_dets = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
...@@ -154,14 +154,16 @@ class JDE_Detector(Detector): ...@@ -154,14 +154,16 @@ class JDE_Detector(Detector):
pred_dets = boxes_tensor.copy_to_cpu() pred_dets = boxes_tensor.copy_to_cpu()
embs_tensor = self.predictor.get_output_handle(output_names[1]) embs_tensor = self.predictor.get_output_handle(output_names[1])
pred_embs = embs_tensor.copy_to_cpu() pred_embs = embs_tensor.copy_to_cpu()
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
online_tlwhs, online_scores, online_ids = self.postprocess( online_tlwhs, online_scores, online_ids = self.postprocess(
pred_dets, pred_embs, threshold) pred_dets, pred_embs, threshold)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1 self.det_times.img_num += 1
return online_tlwhs, online_scores, online_ids return online_tlwhs, online_scores, online_ids
...@@ -175,7 +177,12 @@ def predict_image(detector, image_list): ...@@ -175,7 +177,12 @@ def predict_image(detector, image_list):
for frame_id, img_file in enumerate(image_list): for frame_id, img_file in enumerate(image_list):
frame = cv2.imread(img_file) frame = cv2.imread(img_file)
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
detector.predict([img_file], FLAGS.threshold, warmup=10, repeats=10) # warmup
detector.predict(
[img_file], FLAGS.threshold, repeats=10, add_timer=False)
# run benchmark
detector.predict(
[img_file], FLAGS.threshold, repeats=10, add_timer=True)
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm detector.cpu_mem += cm
detector.gpu_mem += gm detector.gpu_mem += gm
......
...@@ -154,8 +154,8 @@ class SDE_Detector(Detector): ...@@ -154,8 +154,8 @@ class SDE_Detector(Detector):
ori_image_shape, ori_image_shape,
threshold=0.5, threshold=0.5,
scaled=False, scaled=False,
warmup=0, repeats=1,
repeats=1): add_timer=True):
''' '''
Args: Args:
image_path (list[str]): path of images, only support one image path image_path (list[str]): path of images, only support one image path
...@@ -164,43 +164,46 @@ class SDE_Detector(Detector): ...@@ -164,43 +164,46 @@ class SDE_Detector(Detector):
threshold (float): threshold of predicted box' score threshold (float): threshold of predicted box' score
scaled (bool): whether the coords after detector outputs are scaled, scaled (bool): whether the coords after detector outputs are scaled,
default False in jde yolov3, set True in general detector. default False in jde yolov3, set True in general detector.
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id' pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id'
pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2' pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2'
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_path) inputs = self.preprocess(image_path)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
if add_timer:
for i in range(warmup): self.det_times.preprocess_time_s.end()
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0]) boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu() boxes = boxes_tensor.copy_to_cpu()
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
if len(boxes) == 0: if len(boxes) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32) pred_dets = np.zeros((1, 6), dtype=np.float32)
pred_xyxys = np.zeros((1, 4), dtype=np.float32) pred_xyxys = np.zeros((1, 4), dtype=np.float32)
else: else:
pred_dets, pred_xyxys = self.postprocess( pred_dets, pred_xyxys = self.postprocess(
boxes, ori_image_shape, threshold, inputs, scaled=scaled) boxes, ori_image_shape, threshold, inputs, scaled=scaled)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1 self.det_times.img_num += 1
return pred_dets, pred_xyxys return pred_dets, pred_xyxys
...@@ -284,8 +287,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ...@@ -284,8 +287,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
ori_image_shape, ori_image_shape,
threshold=0.5, threshold=0.5,
scaled=False, scaled=False,
warmup=0, repeats=1,
repeats=1): add_timer=True):
''' '''
Args: Args:
image_path (list[str]): path of images, only support one image path image_path (list[str]): path of images, only support one image path
...@@ -294,27 +297,26 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ...@@ -294,27 +297,26 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
threshold (float): threshold of predicted box' score threshold (float): threshold of predicted box' score
scaled (bool): whether the coords after detector outputs are scaled, scaled (bool): whether the coords after detector outputs are scaled,
default False in jde yolov3, set True in general detector. default False in jde yolov3, set True in general detector.
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id' pred_dets (np.ndarray, [N, 6]): 'x,y,w,h,score,cls_id'
pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2' pred_xyxys (np.ndarray, [N, 4]): 'x1,y1,x2,y2'
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_path) inputs = self.preprocess(image_path)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
if add_timer:
np_score_list, np_boxes_list = [], [] self.det_times.preprocess_time_s.end()
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
np_score_list.clear() np_score_list.clear()
...@@ -328,9 +330,11 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ...@@ -328,9 +330,11 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
np_boxes_list.append( np_boxes_list.append(
self.predictor.get_output_handle(output_names[ self.predictor.get_output_handle(output_names[
out_idx + num_outs]).copy_to_cpu()) out_idx + num_outs]).copy_to_cpu())
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
self.picodet_postprocess = PicoDetPostProcess( self.picodet_postprocess = PicoDetPostProcess(
inputs['image'].shape[2:], inputs['image'].shape[2:],
inputs['im_shape'], inputs['im_shape'],
...@@ -346,6 +350,7 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ...@@ -346,6 +350,7 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
else: else:
pred_dets, pred_xyxys = self.postprocess(boxes, ori_image_shape, pred_dets, pred_xyxys = self.postprocess(boxes, ori_image_shape,
threshold) threshold)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1 self.det_times.img_num += 1
...@@ -503,40 +508,41 @@ class SDE_ReID(object): ...@@ -503,40 +508,41 @@ class SDE_ReID(object):
def predict(self, def predict(self,
crops, crops,
pred_dets, pred_dets,
warmup=0,
repeats=1, repeats=1,
add_timer=True,
MTMCT=False, MTMCT=False,
frame_id=0, frame_id=0,
seq_name=''): seq_name=''):
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(crops) inputs = self.preprocess(crops)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup): if add_timer:
self.predictor.run() self.det_times.preprocess_time_s.end()
output_names = self.predictor.get_output_names()
feature_tensor = self.predictor.get_output_handle(output_names[0])
pred_embs = feature_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
feature_tensor = self.predictor.get_output_handle(output_names[0]) feature_tensor = self.predictor.get_output_handle(output_names[0])
pred_embs = feature_tensor.copy_to_cpu() pred_embs = feature_tensor.copy_to_cpu()
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
if MTMCT == False: if MTMCT == False:
tracking_outs = self.postprocess(pred_dets, pred_embs) tracking_outs = self.postprocess(pred_dets, pred_embs)
else: else:
tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs, tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs,
frame_id, seq_name) frame_id, seq_name)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1 self.det_times.img_num += 1
...@@ -549,13 +555,23 @@ def predict_image(detector, reid_model, image_list): ...@@ -549,13 +555,23 @@ def predict_image(detector, reid_model, image_list):
frame = cv2.imread(img_file) frame = cv2.imread(img_file)
ori_image_shape = list(frame.shape[:2]) ori_image_shape = list(frame.shape[:2])
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
# warmup
pred_dets, pred_xyxys = detector.predict( pred_dets, pred_xyxys = detector.predict(
[img_file], [img_file],
ori_image_shape, ori_image_shape,
FLAGS.threshold, FLAGS.threshold,
FLAGS.scaled, FLAGS.scaled,
warmup=10, repeats=10,
repeats=10) add_timer=False)
# run benchmark
pred_dets, pred_xyxys = detector.predict(
[img_file],
ori_image_shape,
FLAGS.threshold,
FLAGS.scaled,
repeats=10,
add_timer=True)
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm detector.cpu_mem += cm
detector.gpu_mem += gm detector.gpu_mem += gm
...@@ -574,8 +590,13 @@ def predict_image(detector, reid_model, image_list): ...@@ -574,8 +590,13 @@ def predict_image(detector, reid_model, image_list):
crops = reid_model.get_crops(pred_xyxys, frame) crops = reid_model.get_crops(pred_xyxys, frame)
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
# warmup
tracking_outs = reid_model.predict( tracking_outs = reid_model.predict(
crops, pred_dets, warmup=10, repeats=10) crops, pred_dets, repeats=10, add_timer=False)
# run benchmark
tracking_outs = reid_model.predict(
crops, pred_dets, repeats=10, add_timer=True)
else: else:
tracking_outs = reid_model.predict(crops, pred_dets) tracking_outs = reid_model.predict(crops, pred_dets)
......
...@@ -68,8 +68,12 @@ def predict_with_given_det(image, det_res, keypoint_detector, ...@@ -68,8 +68,12 @@ def predict_with_given_det(image, det_res, keypoint_detector,
batch_images = rec_images[start_index:end_index] batch_images = rec_images[start_index:end_index]
batch_records = np.array(records[start_index:end_index]) batch_records = np.array(records[start_index:end_index])
if run_benchmark: if run_benchmark:
# warmup
keypoint_result = keypoint_detector.predict( keypoint_result = keypoint_detector.predict(
batch_images, keypoint_threshold, warmup=10, repeats=10) batch_images, keypoint_threshold, repeats=10, add_timer=False)
# run benchmark
keypoint_result = keypoint_detector.predict(
batch_images, keypoint_threshold, repeats=10, add_timer=True)
else: else:
keypoint_result = keypoint_detector.predict(batch_images, keypoint_result = keypoint_detector.predict(batch_images,
keypoint_threshold) keypoint_threshold)
...@@ -100,8 +104,12 @@ def topdown_unite_predict(detector, ...@@ -100,8 +104,12 @@ def topdown_unite_predict(detector,
det_timer.preprocess_time_s.end() det_timer.preprocess_time_s.end()
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
# warmup
results = detector.predict(
[image], FLAGS.det_threshold, repeats=10, add_timer=False)
# run benchmark
results = detector.predict( results = detector.predict(
[image], FLAGS.det_threshold, warmup=10, repeats=10) [image], FLAGS.det_threshold, repeats=10, add_timer=True)
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm detector.cpu_mem += cm
detector.gpu_mem += gm detector.gpu_mem += gm
......
...@@ -126,35 +126,33 @@ class Detector(object): ...@@ -126,35 +126,33 @@ class Detector(object):
results['masks'] = np_masks results['masks'] = np_masks
return results return results
def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True):
''' '''
Args: Args:
image_list (list): list of image image_list (list): list of image
threshold (float): threshold of predicted box' score threshold (float): threshold of predicted box' score
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max] matix element:[class, score, x_min, y_min, x_max, y_max]
MaskRCNN's results include 'masks': np.ndarray: MaskRCNN's results include 'masks': np.ndarray:
shape: [N, im_h, im_w] shape: [N, im_h, im_w]
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_list) inputs = self.preprocess(image_list)
self.det_times.preprocess_time_s.end()
np_boxes, np_masks = None, None np_boxes, np_masks = None, None
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup): if add_timer:
self.predictor.run() self.det_times.preprocess_time_s.end()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
if self.pred_config.mask:
masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
...@@ -165,9 +163,12 @@ class Detector(object): ...@@ -165,9 +163,12 @@ class Detector(object):
if self.pred_config.mask: if self.pred_config.mask:
masks_tensor = self.predictor.get_output_handle(output_names[2]) masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu() np_masks = masks_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
results = [] results = []
if reduce(lambda x, y: x * y, np_boxes.shape) < 6: if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
print('[WARNNING] No object detected.') print('[WARNNING] No object detected.')
...@@ -175,6 +176,7 @@ class Detector(object): ...@@ -175,6 +176,7 @@ class Detector(object):
else: else:
results = self.postprocess( results = self.postprocess(
np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold) np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(image_list) self.det_times.img_num += len(image_list)
return results return results
...@@ -229,35 +231,29 @@ class DetectorSOLOv2(Detector): ...@@ -229,35 +231,29 @@ class DetectorSOLOv2(Detector):
self.det_times = Timer() self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
def predict(self, image, threshold=0.5, warmup=0, repeats=1): def predict(self, image, threshold=0.5, repeats=1, add_timer=True):
''' '''
Args: Args:
image (str/np.ndarray): path of image/ np.ndarray read by cv2 image (str/np.ndarray): path of image/ np.ndarray read by cv2
threshold (float): threshold of predicted box' score threshold (float): threshold of predicted box' score
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
results (dict): 'segm': np.ndarray,shape:[N, im_h, im_w] results (dict): 'segm': np.ndarray,shape:[N, im_h, im_w]
'cate_label': label of segm, shape:[N] 'cate_label': label of segm, shape:[N]
'cate_score': confidence score of segm, shape:[N] 'cate_score': confidence score of segm, shape:[N]
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image) inputs = self.preprocess(image)
self.det_times.preprocess_time_s.end()
np_label, np_score, np_segms = None, None, None np_label, np_score, np_segms = None, None, None
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup): if add_timer:
self.predictor.run() self.det_times.preprocess_time_s.end()
output_names = self.predictor.get_output_names()
np_boxes_num = self.predictor.get_output_handle(output_names[
0]).copy_to_cpu()
np_label = self.predictor.get_output_handle(output_names[
1]).copy_to_cpu()
np_score = self.predictor.get_output_handle(output_names[
2]).copy_to_cpu()
np_segms = self.predictor.get_output_handle(output_names[
3]).copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
...@@ -270,6 +266,7 @@ class DetectorSOLOv2(Detector): ...@@ -270,6 +266,7 @@ class DetectorSOLOv2(Detector):
2]).copy_to_cpu() 2]).copy_to_cpu()
np_segms = self.predictor.get_output_handle(output_names[ np_segms = self.predictor.get_output_handle(output_names[
3]).copy_to_cpu() 3]).copy_to_cpu()
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.img_num += 1 self.det_times.img_num += 1
...@@ -326,38 +323,32 @@ class DetectorPicoDet(Detector): ...@@ -326,38 +323,32 @@ class DetectorPicoDet(Detector):
self.det_times = Timer() self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
def predict(self, image, threshold=0.5, warmup=0, repeats=1): def predict(self, image, threshold=0.5, repeats=1, add_timer=True):
''' '''
Args: Args:
image (str/np.ndarray): path of image/ np.ndarray read by cv2 image (str/np.ndarray): path of image/ np.ndarray read by cv2
threshold (float): threshold of predicted box' score threshold (float): threshold of predicted box' score
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max] matix element:[class, score, x_min, y_min, x_max, y_max]
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image) inputs = self.preprocess(image)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
np_score_list, np_boxes_list = [], []
for i in range(warmup):
self.predictor.run()
np_score_list.clear()
np_boxes_list.clear()
output_names = self.predictor.get_output_names()
num_outs = int(len(output_names) / 2)
for out_idx in range(num_outs):
np_score_list.append(
self.predictor.get_output_handle(output_names[out_idx])
.copy_to_cpu())
np_boxes_list.append(
self.predictor.get_output_handle(output_names[
out_idx + num_outs]).copy_to_cpu())
np_score_list, np_boxes_list = [], []
if add_timer:
self.det_times.preprocess_time_s.end()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model_prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
np_score_list.clear() np_score_list.clear()
...@@ -371,9 +362,12 @@ class DetectorPicoDet(Detector): ...@@ -371,9 +362,12 @@ class DetectorPicoDet(Detector):
np_boxes_list.append( np_boxes_list.append(
self.predictor.get_output_handle(output_names[ self.predictor.get_output_handle(output_names[
out_idx + num_outs]).copy_to_cpu()) out_idx + num_outs]).copy_to_cpu())
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.img_num += 1 self.det_times.img_num += 1
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
self.postprocess = PicoDetPostProcess( self.postprocess = PicoDetPostProcess(
inputs['image'].shape[2:], inputs['image'].shape[2:],
inputs['im_shape'], inputs['im_shape'],
...@@ -381,6 +375,7 @@ class DetectorPicoDet(Detector): ...@@ -381,6 +375,7 @@ class DetectorPicoDet(Detector):
strides=self.pred_config.fpn_stride, strides=self.pred_config.fpn_stride,
nms_threshold=self.pred_config.nms['nms_threshold']) nms_threshold=self.pred_config.nms['nms_threshold'])
np_boxes, np_boxes_num = self.postprocess(np_score_list, np_boxes_list) np_boxes, np_boxes_num = self.postprocess(np_score_list, np_boxes_list)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
return dict(boxes=np_boxes, boxes_num=np_boxes_num) return dict(boxes=np_boxes, boxes_num=np_boxes_num)
...@@ -647,8 +642,13 @@ def predict_image(detector, image_list, batch_size=1): ...@@ -647,8 +642,13 @@ def predict_image(detector, image_list, batch_size=1):
end_index = min((i + 1) * batch_size, len(image_list)) end_index = min((i + 1) * batch_size, len(image_list))
batch_image_list = image_list[start_index:end_index] batch_image_list = image_list[start_index:end_index]
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
# warmup
detector.predict( detector.predict(
batch_image_list, FLAGS.threshold, warmup=10, repeats=10) batch_image_list, FLAGS.threshold, repeats=10, add_timer=False)
# run benchmark
detector.predict(
batch_image_list, FLAGS.threshold, repeats=10, add_timer=True)
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm detector.cpu_mem += cm
detector.gpu_mem += gm detector.gpu_mem += gm
...@@ -681,7 +681,7 @@ def predict_video(detector, camera_id): ...@@ -681,7 +681,7 @@ def predict_video(detector, camera_id):
if not os.path.exists(FLAGS.output_dir): if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir) os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_out_name) out_path = os.path.join(FLAGS.output_dir, video_out_name)
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
index = 1 index = 1
while (1): while (1):
......
...@@ -145,41 +145,33 @@ class KeyPoint_Detector(Detector): ...@@ -145,41 +145,33 @@ class KeyPoint_Detector(Detector):
raise ValueError("Unsupported arch: {}, expect {}".format( raise ValueError("Unsupported arch: {}, expect {}".format(
self.pred_config.arch, KEYPOINT_SUPPORT_MODELS)) self.pred_config.arch, KEYPOINT_SUPPORT_MODELS))
def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True):
''' '''
Args: Args:
image_list (list): list of image image_list (list): list of image
threshold (float): threshold of predicted box' score threshold (float): threshold of predicted box' score
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max] matix element:[class, score, x_min, y_min, x_max, y_max]
MaskRCNN's results include 'masks': np.ndarray: MaskRCNN's results include 'masks': np.ndarray:
shape: [N, im_h, im_w] shape: [N, im_h, im_w]
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_list) inputs = self.preprocess(image_list)
np_boxes, np_masks = None, None np_boxes, np_masks = None, None
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
if add_timer:
self.det_times.preprocess_time_s.end() self.det_times.preprocess_time_s.end()
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
if self.pred_config.tagmap:
masks_tensor = self.predictor.get_output_handle(output_names[1])
heat_k = self.predictor.get_output_handle(output_names[2])
inds_k = self.predictor.get_output_handle(output_names[3])
np_masks = [
masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(),
inds_k.copy_to_cpu()
]
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
...@@ -193,11 +185,14 @@ class KeyPoint_Detector(Detector): ...@@ -193,11 +185,14 @@ class KeyPoint_Detector(Detector):
masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(), masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(),
inds_k.copy_to_cpu() inds_k.copy_to_cpu()
] ]
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
results = self.postprocess( results = self.postprocess(
np_boxes, np_masks, inputs, threshold=threshold) np_boxes, np_masks, inputs, threshold=threshold)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(image_list) self.det_times.img_num += len(image_list)
return results return results
...@@ -266,7 +261,12 @@ class PredictConfig_KeyPoint(): ...@@ -266,7 +261,12 @@ class PredictConfig_KeyPoint():
def predict_image(detector, image_list): def predict_image(detector, image_list):
for i, img_file in enumerate(image_list): for i, img_file in enumerate(image_list):
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
detector.predict([img_file], FLAGS.threshold, warmup=10, repeats=10) # warmup
detector.predict(
[img_file], FLAGS.threshold, repeats=10, add_timer=False)
# run benchmark
detector.predict(
[img_file], FLAGS.threshold, repeats=10, add_timer=True)
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm detector.cpu_mem += cm
detector.gpu_mem += gm detector.gpu_mem += gm
...@@ -300,7 +300,7 @@ def predict_video(detector, camera_id): ...@@ -300,7 +300,7 @@ def predict_video(detector, camera_id):
if not os.path.exists(FLAGS.output_dir): if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir) os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name + '.mp4') out_path = os.path.join(FLAGS.output_dir, video_name + '.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
index = 1 index = 1
while (1): while (1):
......
...@@ -120,31 +120,31 @@ class JDE_Detector(Detector): ...@@ -120,31 +120,31 @@ class JDE_Detector(Detector):
online_scores[cls_id].append(tscore) online_scores[cls_id].append(tscore)
return online_tlwhs, online_scores, online_ids return online_tlwhs, online_scores, online_ids
def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): def predict(self, image_list, threshold=0.5, repeats=1, add_timer=True):
''' '''
Args: Args:
image_list (list): list of image image_list (list): list of image
threshold (float): threshold of predicted box' score threshold (float): threshold of predicted box' score
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
online_tlwhs, online_scores, online_ids (dict[np.array]) online_tlwhs, online_scores, online_ids (dict[np.array])
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_list) inputs = self.preprocess(image_list)
self.det_times.preprocess_time_s.end()
pred_dets, pred_embs = None, None pred_dets, pred_embs = None, None
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
if add_timer:
for i in range(warmup): self.det_times.preprocess_time_s.end()
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
pred_dets = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
...@@ -152,11 +152,15 @@ class JDE_Detector(Detector): ...@@ -152,11 +152,15 @@ class JDE_Detector(Detector):
pred_dets = boxes_tensor.copy_to_cpu() pred_dets = boxes_tensor.copy_to_cpu()
embs_tensor = self.predictor.get_output_handle(output_names[1]) embs_tensor = self.predictor.get_output_handle(output_names[1])
pred_embs = embs_tensor.copy_to_cpu() pred_embs = embs_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
online_tlwhs, online_scores, online_ids = self.postprocess( online_tlwhs, online_scores, online_ids = self.postprocess(
pred_dets, pred_embs, threshold) pred_dets, pred_embs, threshold)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1 self.det_times.img_num += 1
return online_tlwhs, online_scores, online_ids return online_tlwhs, online_scores, online_ids
...@@ -172,7 +176,12 @@ def predict_image(detector, image_list): ...@@ -172,7 +176,12 @@ def predict_image(detector, image_list):
for frame_id, img_file in enumerate(image_list): for frame_id, img_file in enumerate(image_list):
frame = cv2.imread(img_file) frame = cv2.imread(img_file)
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
detector.predict([frame], FLAGS.threshold, warmup=10, repeats=10) # warmup
detector.predict(
[frame], FLAGS.threshold, repeats=10, add_timer=False)
# run benchmark
detector.predict(
[frame], FLAGS.threshold, repeats=10, add_timer=True)
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm detector.cpu_mem += cm
detector.gpu_mem += gm detector.gpu_mem += gm
...@@ -181,8 +190,13 @@ def predict_image(detector, image_list): ...@@ -181,8 +190,13 @@ def predict_image(detector, image_list):
else: else:
online_tlwhs, online_scores, online_ids = detector.predict( online_tlwhs, online_scores, online_ids = detector.predict(
[frame], FLAGS.threshold) [frame], FLAGS.threshold)
online_im = plot_tracking_dict(frame, num_classes, online_tlwhs, online_im = plot_tracking_dict(
online_ids, online_scores, frame_id, frame,
num_classes,
online_tlwhs,
online_ids,
online_scores,
frame_id,
ids2names=ids2names) ids2names=ids2names)
if FLAGS.save_images: if FLAGS.save_images:
if not os.path.exists(FLAGS.output_dir): if not os.path.exists(FLAGS.output_dir):
...@@ -211,7 +225,7 @@ def predict_video(detector, camera_id): ...@@ -211,7 +225,7 @@ def predict_video(detector, camera_id):
os.makedirs(FLAGS.output_dir) os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name) out_path = os.path.join(FLAGS.output_dir, video_name)
if not FLAGS.save_images: if not FLAGS.save_images:
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
frame_id = 0 frame_id = 0
timer = MOTTimer() timer = MOTTimer()
......
...@@ -64,8 +64,12 @@ def mot_keypoint_unite_predict_image(mot_model, ...@@ -64,8 +64,12 @@ def mot_keypoint_unite_predict_image(mot_model,
frame = cv2.imread(img_file) frame = cv2.imread(img_file)
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
# warmup
online_tlwhs, online_scores, online_ids = mot_model.predict( online_tlwhs, online_scores, online_ids = mot_model.predict(
[frame], FLAGS.mot_threshold, warmup=10, repeats=10) [frame], FLAGS.mot_threshold, repeats=10, add_timer=False)
# run benchmark
online_tlwhs, online_scores, online_ids = mot_model.predict(
[frame], FLAGS.mot_threshold, repeats=10, add_timer=True)
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
mot_model.cpu_mem += cm mot_model.cpu_mem += cm
mot_model.gpu_mem += gm mot_model.gpu_mem += gm
...@@ -84,13 +88,16 @@ def mot_keypoint_unite_predict_image(mot_model, ...@@ -84,13 +88,16 @@ def mot_keypoint_unite_predict_image(mot_model,
FLAGS.run_benchmark) FLAGS.run_benchmark)
else: else:
warmup = 10 if FLAGS.run_benchmark else 0 if FLAGS.run_benchmark:
repeats = 10 if FLAGS.run_benchmark else 1
keypoint_results = keypoint_model.predict( keypoint_results = keypoint_model.predict(
[frame], [frame],
FLAGS.keypoint_threshold, FLAGS.keypoint_threshold,
warmup=warmup, repeats=10,
repeats=repeats) add_timer=False)
repeats = 10 if FLAGS.run_benchmark else 1
keypoint_results = keypoint_model.predict(
[frame], FLAGS.keypoint_threshold, repeats=repeats)
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
...@@ -103,7 +110,7 @@ def mot_keypoint_unite_predict_image(mot_model, ...@@ -103,7 +110,7 @@ def mot_keypoint_unite_predict_image(mot_model,
keypoint_results, keypoint_results,
visual_thread=FLAGS.keypoint_threshold, visual_thread=FLAGS.keypoint_threshold,
returnimg=True, returnimg=True,
ids=online_ids ids=online_ids[0]
if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown'
else None) else None)
...@@ -144,7 +151,7 @@ def mot_keypoint_unite_predict_video(mot_model, ...@@ -144,7 +151,7 @@ def mot_keypoint_unite_predict_video(mot_model,
os.makedirs(FLAGS.output_dir) os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name) out_path = os.path.join(FLAGS.output_dir, video_name)
if not FLAGS.save_images: if not FLAGS.save_images:
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
frame_id = 0 frame_id = 0
timer_mot = FPSTimer() timer_mot = FPSTimer()
...@@ -193,7 +200,7 @@ def mot_keypoint_unite_predict_video(mot_model, ...@@ -193,7 +200,7 @@ def mot_keypoint_unite_predict_video(mot_model,
keypoint_results, keypoint_results,
visual_thread=FLAGS.keypoint_threshold, visual_thread=FLAGS.keypoint_threshold,
returnimg=True, returnimg=True,
ids=online_ids ids=online_ids[0]
if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' else if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' else
None) None)
......
...@@ -178,40 +178,43 @@ class SDE_Detector(Detector): ...@@ -178,40 +178,43 @@ class SDE_Detector(Detector):
return pred_dets, pred_xyxys return pred_dets, pred_xyxys
def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): def predict(self, image, scaled, threshold=0.5, repeats=1, add_timer=True):
''' '''
Args: Args:
image (np.ndarray): image numpy data image (np.ndarray): image numpy data
threshold (float): threshold of predicted box' score
scaled (bool): whether the coords after detector outputs are scaled, scaled (bool): whether the coords after detector outputs are scaled,
default False in jde yolov3, set True in general detector. default False in jde yolov3, set True in general detector.
threshold (float): threshold of predicted box' score
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
pred_dets (np.ndarray, [N, 6]) pred_dets (np.ndarray, [N, 6])
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image) inputs = self.preprocess(image)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup): if add_timer:
self.predictor.run() self.det_times.preprocess_time_s.end()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0]) boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu() boxes = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
if len(boxes) == 0: if len(boxes) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32) pred_dets = np.zeros((1, 6), dtype=np.float32)
pred_xyxys = np.zeros((1, 4), dtype=np.float32) pred_xyxys = np.zeros((1, 4), dtype=np.float32)
...@@ -223,6 +226,7 @@ class SDE_Detector(Detector): ...@@ -223,6 +226,7 @@ class SDE_Detector(Detector):
pred_dets, pred_xyxys = self.postprocess( pred_dets, pred_xyxys = self.postprocess(
boxes, input_shape, im_shape, scale_factor, threshold, scaled) boxes, input_shape, im_shape, scale_factor, threshold, scaled)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1 self.det_times.img_num += 1
return pred_dets, pred_xyxys return pred_dets, pred_xyxys
...@@ -271,7 +275,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ...@@ -271,7 +275,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
assert batch_size == 1, "The JDE Detector only supports batch size=1 now" assert batch_size == 1, "The JDE Detector only supports batch size=1 now"
self.pred_config = pred_config self.pred_config = pred_config
def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor, threshold): def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor,
threshold):
over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0] over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0]
if len(over_thres_idx) == 0: if len(over_thres_idx) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32) pred_dets = np.zeros((1, 6), dtype=np.float32)
...@@ -299,33 +304,35 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ...@@ -299,33 +304,35 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
(pred_tlwhs, pred_scores, pred_cls_ids), axis=1) (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
return pred_dets, pred_xyxys return pred_dets, pred_xyxys
def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): def predict(self, image, scaled, threshold=0.5, repeats=1, add_timer=True):
''' '''
Args: Args:
image (np.ndarray): image numpy data image (np.ndarray): image numpy data
threshold (float): threshold of predicted box' score
scaled (bool): whether the coords after detector outputs are scaled, scaled (bool): whether the coords after detector outputs are scaled,
default False in jde yolov3, set True in general detector. default False in jde yolov3, set True in general detector.
threshold (float): threshold of predicted box' score
repeats (int): repeat number for prediction
add_timer (bool): whether add timer during prediction
Returns: Returns:
pred_dets (np.ndarray, [N, 6]) pred_dets (np.ndarray, [N, 6])
''' '''
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image) inputs = self.preprocess(image)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
np_score_list, np_boxes_list = [], [] if add_timer:
for i in range(warmup): self.det_times.preprocess_time_s.end()
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
np_score_list, np_boxes_list = [], []
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
np_score_list.clear() np_score_list.clear()
...@@ -340,9 +347,12 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ...@@ -340,9 +347,12 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
self.predictor.get_output_handle(output_names[ self.predictor.get_output_handle(output_names[
out_idx + num_outs]).copy_to_cpu()) out_idx + num_outs]).copy_to_cpu())
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.img_num += 1 self.det_times.img_num += 1
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
self.postprocess = PicoDetPostProcess( self.postprocess = PicoDetPostProcess(
inputs['image'].shape[2:], inputs['image'].shape[2:],
inputs['im_shape'], inputs['im_shape'],
...@@ -360,7 +370,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet): ...@@ -360,7 +370,8 @@ class SDE_DetectorPicoDet(DetectorPicoDet):
scale_factor = inputs['scale_factor'] scale_factor = inputs['scale_factor']
pred_dets, pred_xyxys = self.postprocess_bboxes( pred_dets, pred_xyxys = self.postprocess_bboxes(
boxes, input_shape, im_shape, scale_factor, threshold) boxes, input_shape, im_shape, scale_factor, threshold)
if add_timer:
self.det_times.postprocess_time_s.end()
return pred_dets, pred_xyxys return pred_dets, pred_xyxys
...@@ -445,33 +456,34 @@ class SDE_ReID(object): ...@@ -445,33 +456,34 @@ class SDE_ReID(object):
return online_tlwhs, online_scores, online_ids return online_tlwhs, online_scores, online_ids
def predict(self, crops, pred_dets, warmup=0, repeats=1): def predict(self, crops, pred_dets, repeats=1, add_timer=True):
# preprocess
if add_timer:
self.det_times.preprocess_time_s.start() self.det_times.preprocess_time_s.start()
inputs = self.preprocess(crops) inputs = self.preprocess(crops)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for i in range(len(input_names)): for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]]) input_tensor.copy_from_cpu(inputs[input_names[i]])
if add_timer:
for i in range(warmup): self.det_times.preprocess_time_s.end()
self.predictor.run()
output_names = self.predictor.get_output_names()
feature_tensor = self.predictor.get_output_handle(output_names[0])
pred_embs = feature_tensor.copy_to_cpu()
self.det_times.inference_time_s.start() self.det_times.inference_time_s.start()
# model prediction
for i in range(repeats): for i in range(repeats):
self.predictor.run() self.predictor.run()
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
feature_tensor = self.predictor.get_output_handle(output_names[0]) feature_tensor = self.predictor.get_output_handle(output_names[0])
pred_embs = feature_tensor.copy_to_cpu() pred_embs = feature_tensor.copy_to_cpu()
if add_timer:
self.det_times.inference_time_s.end(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start() self.det_times.postprocess_time_s.start()
# postprocess
online_tlwhs, online_scores, online_ids = self.postprocess(pred_dets, online_tlwhs, online_scores, online_ids = self.postprocess(pred_dets,
pred_embs) pred_embs)
if add_timer:
self.det_times.postprocess_time_s.end() self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1 self.det_times.img_num += 1
...@@ -483,8 +495,20 @@ def predict_image(detector, reid_model, image_list): ...@@ -483,8 +495,20 @@ def predict_image(detector, reid_model, image_list):
for i, img_file in enumerate(image_list): for i, img_file in enumerate(image_list):
frame = cv2.imread(img_file) frame = cv2.imread(img_file)
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
# warmup
pred_dets, pred_xyxys = detector.predict( pred_dets, pred_xyxys = detector.predict(
[frame], FLAGS.scaled, FLAGS.threshold, warmup=10, repeats=10) [frame],
FLAGS.scaled,
FLAGS.threshold,
repeats=10,
add_timer=True)
# run benchmark
pred_dets, pred_xyxys = detector.predict(
[frame],
FLAGS.scaled,
FLAGS.threshold,
repeats=10,
add_timer=True)
cm, gm, gu = get_current_memory_mb() cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm detector.cpu_mem += cm
detector.gpu_mem += gm detector.gpu_mem += gm
...@@ -503,8 +527,12 @@ def predict_image(detector, reid_model, image_list): ...@@ -503,8 +527,12 @@ def predict_image(detector, reid_model, image_list):
crops = reid_model.get_crops(pred_xyxys, frame) crops = reid_model.get_crops(pred_xyxys, frame)
if FLAGS.run_benchmark: if FLAGS.run_benchmark:
# warmup
online_tlwhs, online_scores, online_ids = reid_model.predict(
crops, pred_dets, repeats=10, add_timer=False)
# run benchmark
online_tlwhs, online_scores, online_ids = reid_model.predict( online_tlwhs, online_scores, online_ids = reid_model.predict(
crops, pred_dets, warmup=10, repeats=10) crops, pred_dets, repeats=10, add_timer=False)
else: else:
online_tlwhs, online_scores, online_ids = reid_model.predict( online_tlwhs, online_scores, online_ids = reid_model.predict(
crops, pred_dets) crops, pred_dets)
...@@ -538,7 +566,7 @@ def predict_video(detector, reid_model, camera_id): ...@@ -538,7 +566,7 @@ def predict_video(detector, reid_model, camera_id):
os.makedirs(FLAGS.output_dir) os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name) out_path = os.path.join(FLAGS.output_dir, video_name)
if not FLAGS.save_images: if not FLAGS.save_images:
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
frame_id = 0 frame_id = 0
timer = MOTTimer() timer = MOTTimer()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册