From cff6841a44d687df5e16d7c95e949e3d4b517ca5 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Mon, 14 Mar 2022 13:55:52 +0800 Subject: [PATCH] refine pipeline visualize (#5347) * refine pipeline visualize * add fps vis * refine vis when box is empty --- deploy/pphuman/config/infer_cfg.yml | 2 +- deploy/pphuman/pipe_utils.py | 16 ++-- deploy/pphuman/pipeline.py | 43 ++++++++--- deploy/python/attr_infer.py | 112 ++++++++++++++++++---------- deploy/python/visualize.py | 37 +++++---- 5 files changed, 141 insertions(+), 69 deletions(-) diff --git a/deploy/pphuman/config/infer_cfg.yml b/deploy/pphuman/config/infer_cfg.yml index 45db7a9bb..b2c2eb764 100644 --- a/deploy/pphuman/config/infer_cfg.yml +++ b/deploy/pphuman/config/infer_cfg.yml @@ -3,7 +3,7 @@ attr_thresh: 0.5 visual: True DET: - model_dir: output_inference/pedestrian_yolov3_darknet/ + model_dir: output_inference/mot_ppyolov3// batch_size: 1 ATTR: diff --git a/deploy/pphuman/pipe_utils.py b/deploy/pphuman/pipe_utils.py index 5cdb3943d..580939e39 100644 --- a/deploy/pphuman/pipe_utils.py +++ b/deploy/pphuman/pipe_utils.py @@ -134,9 +134,17 @@ class PipeTimer(Times): } self.img_num = 0 - def info(self, average=False): + def get_total_time(self): total_time = self.total_time.value() total_time = round(total_time, 4) + average_latency = total_time / max(1, self.img_num) + qps = 0 + if total_time > 0: + qps = 1 / average_latency + return total_time, average_latency, qps + + def info(self): + total_time, average_latency, qps = self.get_total_time() print("------------------ Inference Time Info ----------------------") print("total_time(ms): {}, img_num: {}".format(total_time * 1000, self.img_num)) @@ -146,13 +154,9 @@ class PipeTimer(Times): if v_time > 0: print("{} time(ms): {}".format(k, v_time * 1000)) - average_latency = total_time / max(1, self.img_num) - qps = 0 - if total_time > 0: - qps = 1 / average_latency - print("average latency time(ms): {:.2f}, QPS: {:2f}".format( average_latency * 1000, qps)) + return qps def report(self, average=False): dic = {} diff --git a/deploy/pphuman/pipeline.py b/deploy/pphuman/pipeline.py index 9b1b04503..c5790d086 100644 --- a/deploy/pphuman/pipeline.py +++ b/deploy/pphuman/pipeline.py @@ -109,6 +109,8 @@ class Pipeline(object): cpu_threads=cpu_threads, enable_mkldnn=enable_mkldnn, output_dir=output_dir) + if self.is_video: + self.predictor.set_file_name(video_file) def _parse_input(self, image_file, image_dir, video_file, camera_id): @@ -234,6 +236,7 @@ class PipePredictor(object): self.warmup_frame = 1 self.pipeline_res = Result() self.pipe_timer = PipeTimer() + self.file_name = None if not is_video: det_cfg = self.cfg['DET'] @@ -274,6 +277,9 @@ class PipePredictor(object): self.kpt_collector = KeyPointCollector() self.action_predictor = ActionDetector() + def set_file_name(self, path): + self.file_name = os.path.split(path)[-1] + def get_result(self): return self.pipeline_res @@ -282,7 +288,7 @@ class PipePredictor(object): self.predict_video(input) else: self.predict_image(input) - self.pipe_timer.info(True) + self.pipe_timer.info() def predict_image(self, input): # det @@ -334,7 +340,7 @@ class PipePredictor(object): # mot # mot -> attr # mot -> pose -> action - video_out_name = 'output.mp4' + video_out_name = 'output.mp4' if self.file_name is None else self.file_name # Get Video info : resolution, fps, frame count width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) @@ -366,6 +372,18 @@ class PipePredictor(object): # mot output format: id, class, score, xmin, ymin, xmax, ymax mot_res = parse_mot_res(res) + # nothing detected + if len(mot_res['boxes']) == 0: + frame_id += 1 + self.pipe_timer.img_num += 1 + self.pipe_timer.total_time.end() + if self.cfg['visual']: + _, _, fps = self.pipe_timer.get_total_time() + im = self.visualize_video(frame, mot_res, frame_id, + fps) # visualize + writer.write(im) + continue + self.pipeline_res.update(mot_res, 'mot') if self.with_attr or self.with_action: crop_input = crop_image_with_mot(frame, mot_res) @@ -403,20 +421,25 @@ class PipePredictor(object): self.pipeline_res) # parse output result for multi-camera if self.cfg['visual']: - im = self.visualize_video(frame, self.pipeline_res, - frame_id) # visualize + _, _, fps = self.pipe_timer.get_total_time() + im = self.visualize_video(frame, self.pipeline_res, frame_id, + fps) # visualize writer.write(im) writer.release() print('save result to {}'.format(out_path)) - def visualize_video(self, image, result, frame_id): + def visualize_video(self, image, result, frame_id, fps): mot_res = result.get('mot') - ids = mot_res['boxes'][:, 0] - boxes = mot_res['boxes'][:, 3:] - boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - image = plot_tracking(image, boxes, ids, frame_id=frame_id) + if mot_res is not None: + ids = mot_res['boxes'][:, 0] + boxes = mot_res['boxes'][:, 3:] + boxes[:, 2] = boxes[:, 2] - boxes[:, 0] + boxes[:, 3] = boxes[:, 3] - boxes[:, 1] + else: + boxes = np.zeros([0, 4]) + ids = np.zeros([0]) + image = plot_tracking(image, boxes, ids, frame_id=frame_id, fps=fps) attr_res = result.get('attr') if attr_res is not None: diff --git a/deploy/python/attr_infer.py b/deploy/python/attr_infer.py index 654ec99fb..9e24acb75 100644 --- a/deploy/python/attr_infer.py +++ b/deploy/python/attr_infer.py @@ -91,48 +91,82 @@ class AttrDetector(Detector): def postprocess(self, inputs, result): # postprocess output of predictor im_results = result['output'] - im_results = np.where(im_results < self.threshold, 0, im_results) - label_list = [['Head', ['Hat', 'Glasses']], [ - 'Upper', [ - 'ShortSleeve', 'LongSleeve', 'UpperStride', 'UpperLogo', - 'UpperPlaid', 'UpperSplice' - ] - ], [ - 'Lower', [ - 'LowerStripe', 'LowerPattern', 'LongCoat', 'Trousers', 'Shorts', - 'Skirt&Dress' - ] - ], ['Shoes', ['boots']], [ - 'Accessory', - ['HandBag', 'ShoulderBag', 'Backpack', 'HoldObjectsInFront'] - ], ['Age', ['AgeOver60', 'Age18-60', 'AgeLess18']], - ['Gender', ['Female']], - ['Direction', ['Front', 'Side', 'Back']]] - - attr_type = [name[0] for name in label_list] + labels = self.pred_config.labels + age_list = ['AgeLess18', 'Age18-60', 'AgeOver60'] + direct_list = ['Front', 'Side', 'Back'] + bag_list = ['HandBag', 'ShoulderBag', 'Backpack'] + upper_list = [ + 'UpperStride', 'UpperLogo', 'UpperPlaid', 'UpperSplice', 'LongCoat' + ] + lower_list = [ + 'LowerStripe', 'LowerPattern', 'Trousers', 'Shorts', 'Skirt&Dress' + ] batch_res = [] for res in im_results: - label_res = {} - label_res = {t: [] for t in attr_type} - num = 0 - for i in range(len(label_list)): - type_name_i = attr_type[i] - attr_name_list = label_list[i][1] - for attr_name in attr_name_list: - attr_name = labels[num] - output_prob = res[num] - if output_prob != 0: - label_res[type_name_i].append(attr_name) - num += 1 - - if len(label_res['Shoes']) == 0: - label_res['Shoes'] = ['no boots'] - if len(label_res['Gender']) == 0: - label_res['Gender'] = ['Male'] - label_res['Age'] = [labels[19 + np.argmax(res[19:22])]] - label_res['Direction'] = [labels[23 + np.argmax(res[23:])]] + res = res.tolist() + label_res = [] + # gender + gender = 'Female' if res[22] > self.threshold else 'Male' + label_res.append(gender) + # age + age = age_list[np.argmax(res[19:22])] + label_res.append(age) + # direction + direction = direct_list[np.argmax(res[23:])] + label_res.append(direction) + # glasses + glasses = 'Glasses: ' + if res[1] > self.threshold: + glasses += 'True' + else: + glasses += 'False' + label_res.append(glasses) + # hat + hat = 'Hat: ' + if res[0] > self.threshold: + hat += 'True' + else: + hat += 'False' + label_res.append(hat) + # hold obj + hold_obj = 'HoldObjectsInFront: ' + if res[18] > self.threshold: + hold_obj += 'True' + else: + hold_obj += 'False' + label_res.append(hold_obj) + # bag + bag = bag_list[np.argmax(res[15:18])] + bag_score = res[15 + np.argmax(res[15:18])] + bag_label = bag if bag_score > self.threshold else 'No bag' + label_res.append(bag_label) + # upper + upper_res = res[4:8] + res[10:11] + upper_label = 'Upper:' + sleeve = 'LongSleeve' if res[3] > res[2] else 'ShortSleeve' + upper_label += ' {}'.format(sleeve) + for i, r in enumerate(upper_res): + if r > self.threshold: + upper_label += ' {}'.format(upper_list[i]) + label_res.append(upper_label) + # lower + lower_res = res[8:10] + res[11:14] + lower_label = 'Lower: ' + has_lower = False + for i, l in enumerate(lower_res): + if l > self.threshold: + lower_label += ' {}'.format(lower_list[i]) + has_lower = True + if not has_lower: + lower_label += ' {}'.format(lower_list[np.argmax(lower_res)]) + + label_res.append(lower_label) + # shoe + shoe = 'Boots' if res[14] > self.threshold else 'No boots' + label_res.append(shoe) + batch_res.append(label_res) result = {'output': batch_res} return result @@ -240,7 +274,7 @@ def visualize(image_list, batch_res, output_dir='output'): os.makedirs(output_dir) img_name = os.path.split(image_file)[-1] out_path = os.path.join(output_dir, img_name) - im.save(out_path, quality=95) + cv2.imwrite(out_path, im) print("save result to: " + out_path) diff --git a/deploy/python/visualize.py b/deploy/python/visualize.py index f080d06ff..3671ab847 100644 --- a/deploy/python/visualize.py +++ b/deploy/python/visualize.py @@ -331,23 +331,34 @@ def visualize_pose(imgfile, def visualize_attr(im, results, boxes=None): - if isinstance(im, str): - im = Image.open(im).convert('RGB') - elif isinstance(im, np.ndarray): - im = Image.fromarray(im) + im = Image.open(im) + im = np.ascontiguousarray(np.copy(im)) + im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) + else: + im = np.ascontiguousarray(np.copy(im)) - draw = ImageDraw.Draw(im) + im_h, im_w = im.shape[:2] + text_scale = max(1, int(im.shape[0] / 1600.)) + text_thickness = 2 + + line_inter = im.shape[0] / 50. for i, res in enumerate(results): - text = "" - for k, v in res.items(): - if len(v) == 0: continue - test_line = "{}: {}\n".format(k, *v) - text += test_line if boxes is None: - text_loc = (1, 1) + text_w = 1 + text_h = 1 else: box = boxes[i] - text_loc = (box[2], box[3]) - draw.text(text_loc, text, fill=(0, 0, 255)) + text_w = int(box[2]) + text_h = int(box[3]) + for text in res: + text_h += int(line_inter) + text_loc = (text_w, text_h) + cv2.putText( + im, + text, + text_loc, + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=text_thickness) return im -- GitLab