refine pipeline visualize (#5347)

* refine pipeline visualize * add fps vis * refine vis when box is empty

refine pipeline visualize (#5347)
* refine pipeline visualize * add fps vis * refine vis when box is empty
cff6841a · wangguanzhong · GitHub · 8ffa45af · cff6841a · cff6841a
5 changed file
--- a/deploy/pphuman/config/infer_cfg.yml
+++ b/deploy/pphuman/config/infer_cfg.yml
@@ -3,7 +3,7 @@ attr_thresh: 0.5
 visual: True

 DET:
-  model_dir: output_inference/pedestrian_yolov3_darknet/
+  model_dir: output_inference/mot_ppyolov3//
  batch_size: 1

 ATTR:

--- a/deploy/pphuman/pipe_utils.py
+++ b/deploy/pphuman/pipe_utils.py
@@ -134,9 +134,17 @@ class PipeTimer(Times):
        }
        self.img_num = 0

-    def info(self, average=False):
+    def get_total_time(self):
        total_time = self.total_time.value()
        total_time = round(total_time, 4)
+        average_latency = total_time / max(1, self.img_num)
+        qps = 0
+        if total_time > 0:
+            qps = 1 / average_latency
+        return total_time, average_latency, qps
+
+    def info(self):
+        total_time, average_latency, qps = self.get_total_time()
        print("------------------ Inference Time Info ----------------------")
        print("total_time(ms): {}, img_num: {}".format(total_time * 1000,
                                                       self.img_num))
@@ -146,13 +154,9 @@ class PipeTimer(Times):
            if v_time > 0:
                print("{} time(ms): {}".format(k, v_time * 1000))

-        average_latency = total_time / max(1, self.img_num)
-        qps = 0
-        if total_time > 0:
-            qps = 1 / average_latency
-
        print("average latency time(ms): {:.2f}, QPS: {:2f}".format(
            average_latency * 1000, qps))
+        return qps

    def report(self, average=False):
        dic = {}

--- a/deploy/pphuman/pipeline.py
+++ b/deploy/pphuman/pipeline.py
@@ -109,6 +109,8 @@ class Pipeline(object):
                cpu_threads=cpu_threads,
                enable_mkldnn=enable_mkldnn,
                output_dir=output_dir)
+            if self.is_video:
+                self.predictor.set_file_name(video_file)

    def _parse_input(self, image_file, image_dir, video_file, camera_id):

@@ -234,6 +236,7 @@ class PipePredictor(object):
        self.warmup_frame = 1
        self.pipeline_res = Result()
        self.pipe_timer = PipeTimer()
+        self.file_name = None

        if not is_video:
            det_cfg = self.cfg['DET']
@@ -274,6 +277,9 @@ class PipePredictor(object):
                self.kpt_collector = KeyPointCollector()
                self.action_predictor = ActionDetector()

+    def set_file_name(self, path):
+        self.file_name = os.path.split(path)[-1]
+
    def get_result(self):
        return self.pipeline_res

@@ -282,7 +288,7 @@ class PipePredictor(object):
            self.predict_video(input)
        else:
            self.predict_image(input)
-        self.pipe_timer.info(True)
+        self.pipe_timer.info()

    def predict_image(self, input):
        # det
@@ -334,7 +340,7 @@ class PipePredictor(object):
        # mot
        # mot -> attr
        # mot -> pose -> action
-        video_out_name = 'output.mp4'
+        video_out_name = 'output.mp4' if self.file_name is None else self.file_name

        # Get Video info : resolution, fps, frame count
        width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -366,6 +372,18 @@ class PipePredictor(object):
            # mot output format: id, class, score, xmin, ymin, xmax, ymax
            mot_res = parse_mot_res(res)

+            # nothing detected
+            if len(mot_res['boxes']) == 0:
+                frame_id += 1
+                self.pipe_timer.img_num += 1
+                self.pipe_timer.total_time.end()
+                if self.cfg['visual']:
+                    _, _, fps = self.pipe_timer.get_total_time()
+                    im = self.visualize_video(frame, mot_res, frame_id,
+                                              fps)  # visualize
+                    writer.write(im)
+                continue
+
            self.pipeline_res.update(mot_res, 'mot')
            if self.with_attr or self.with_action:
                crop_input = crop_image_with_mot(frame, mot_res)
@@ -403,20 +421,25 @@ class PipePredictor(object):
                    self.pipeline_res)  # parse output result for multi-camera

            if self.cfg['visual']:
-                im = self.visualize_video(frame, self.pipeline_res,
-                                          frame_id)  # visualize
+                _, _, fps = self.pipe_timer.get_total_time()
+                im = self.visualize_video(frame, self.pipeline_res, frame_id,
+                                          fps)  # visualize
                writer.write(im)

        writer.release()
        print('save result to {}'.format(out_path))

-    def visualize_video(self, image, result, frame_id):
+    def visualize_video(self, image, result, frame_id, fps):
        mot_res = result.get('mot')
-        ids = mot_res['boxes'][:, 0]
-        boxes = mot_res['boxes'][:, 3:]
-        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
-        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
-        image = plot_tracking(image, boxes, ids, frame_id=frame_id)
+        if mot_res is not None:
+            ids = mot_res['boxes'][:, 0]
+            boxes = mot_res['boxes'][:, 3:]
+            boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+            boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        else:
+            boxes = np.zeros([0, 4])
+            ids = np.zeros([0])
+        image = plot_tracking(image, boxes, ids, frame_id=frame_id, fps=fps)

        attr_res = result.get('attr')
        if attr_res is not None:

--- a/deploy/python/attr_infer.py
+++ b/deploy/python/attr_infer.py
@@ -91,48 +91,82 @@ class AttrDetector(Detector):
    def postprocess(self, inputs, result):
        # postprocess output of predictor
        im_results = result['output']
-        im_results = np.where(im_results < self.threshold, 0, im_results)
-        label_list = [['Head', ['Hat', 'Glasses']], [
-            'Upper', [
-                'ShortSleeve', 'LongSleeve', 'UpperStride', 'UpperLogo',
-                'UpperPlaid', 'UpperSplice'
-            ]
-        ], [
-            'Lower', [
-                'LowerStripe', 'LowerPattern', 'LongCoat', 'Trousers', 'Shorts',
-                'Skirt&Dress'
-            ]
-        ], ['Shoes', ['boots']], [
-            'Accessory',
-            ['HandBag', 'ShoulderBag', 'Backpack', 'HoldObjectsInFront']
-        ], ['Age', ['AgeOver60', 'Age18-60', 'AgeLess18']],
-                      ['Gender', ['Female']],
-                      ['Direction', ['Front', 'Side', 'Back']]]
-
-        attr_type = [name[0] for name in label_list]
+
        labels = self.pred_config.labels
+        age_list = ['AgeLess18', 'Age18-60', 'AgeOver60']
+        direct_list = ['Front', 'Side', 'Back']
+        bag_list = ['HandBag', 'ShoulderBag', 'Backpack']
+        upper_list = [
+            'UpperStride', 'UpperLogo', 'UpperPlaid', 'UpperSplice', 'LongCoat'
+        ]
+        lower_list = [
+            'LowerStripe', 'LowerPattern', 'Trousers', 'Shorts', 'Skirt&Dress'
+        ]

        batch_res = []
        for res in im_results:
-            label_res = {}
-            label_res = {t: [] for t in attr_type}
-            num = 0
-            for i in range(len(label_list)):
-                type_name_i = attr_type[i]
-                attr_name_list = label_list[i][1]
-                for attr_name in attr_name_list:
-                    attr_name = labels[num]
-                    output_prob = res[num]
-                    if output_prob != 0:
-                        label_res[type_name_i].append(attr_name)
-                    num += 1
-
-            if len(label_res['Shoes']) == 0:
-                label_res['Shoes'] = ['no boots']
-            if len(label_res['Gender']) == 0:
-                label_res['Gender'] = ['Male']
-            label_res['Age'] = [labels[19 + np.argmax(res[19:22])]]
-            label_res['Direction'] = [labels[23 + np.argmax(res[23:])]]
+            res = res.tolist()
+            label_res = []
+            # gender 
+            gender = 'Female' if res[22] > self.threshold else 'Male'
+            label_res.append(gender)
+            # age
+            age = age_list[np.argmax(res[19:22])]
+            label_res.append(age)
+            # direction 
+            direction = direct_list[np.argmax(res[23:])]
+            label_res.append(direction)
+            # glasses
+            glasses = 'Glasses: '
+            if res[1] > self.threshold:
+                glasses += 'True'
+            else:
+                glasses += 'False'
+            label_res.append(glasses)
+            # hat
+            hat = 'Hat: '
+            if res[0] > self.threshold:
+                hat += 'True'
+            else:
+                hat += 'False'
+            label_res.append(hat)
+            # hold obj
+            hold_obj = 'HoldObjectsInFront: '
+            if res[18] > self.threshold:
+                hold_obj += 'True'
+            else:
+                hold_obj += 'False'
+            label_res.append(hold_obj)
+            # bag
+            bag = bag_list[np.argmax(res[15:18])]
+            bag_score = res[15 + np.argmax(res[15:18])]
+            bag_label = bag if bag_score > self.threshold else 'No bag'
+            label_res.append(bag_label)
+            # upper
+            upper_res = res[4:8] + res[10:11]
+            upper_label = 'Upper:'
+            sleeve = 'LongSleeve' if res[3] > res[2] else 'ShortSleeve'
+            upper_label += ' {}'.format(sleeve)
+            for i, r in enumerate(upper_res):
+                if r > self.threshold:
+                    upper_label += ' {}'.format(upper_list[i])
+            label_res.append(upper_label)
+            # lower
+            lower_res = res[8:10] + res[11:14]
+            lower_label = 'Lower: '
+            has_lower = False
+            for i, l in enumerate(lower_res):
+                if l > self.threshold:
+                    lower_label += ' {}'.format(lower_list[i])
+                    has_lower = True
+            if not has_lower:
+                lower_label += ' {}'.format(lower_list[np.argmax(lower_res)])
+
+            label_res.append(lower_label)
+            # shoe
+            shoe = 'Boots' if res[14] > self.threshold else 'No boots'
+            label_res.append(shoe)
+
            batch_res.append(label_res)
        result = {'output': batch_res}
        return result
@@ -240,7 +274,7 @@ def visualize(image_list, batch_res, output_dir='output'):
            os.makedirs(output_dir)
        img_name = os.path.split(image_file)[-1]
        out_path = os.path.join(output_dir, img_name)
-        im.save(out_path, quality=95)
+        cv2.imwrite(out_path, im)
        print("save result to: " + out_path)



--- a/deploy/python/visualize.py
+++ b/deploy/python/visualize.py
@@ -331,23 +331,34 @@ def visualize_pose(imgfile,


 def visualize_attr(im, results, boxes=None):
-
    if isinstance(im, str):
-        im = Image.open(im).convert('RGB')
-    elif isinstance(im, np.ndarray):
-        im = Image.fromarray(im)
+        im = Image.open(im)
+        im = np.ascontiguousarray(np.copy(im))
+        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
+    else:
+        im = np.ascontiguousarray(np.copy(im))

-    draw = ImageDraw.Draw(im)
+    im_h, im_w = im.shape[:2]
+    text_scale = max(1, int(im.shape[0] / 1600.))
+    text_thickness = 2
+
+    line_inter = im.shape[0] / 50.
    for i, res in enumerate(results):
-        text = ""
-        for k, v in res.items():
-            if len(v) == 0: continue
-            test_line = "{}: {}\n".format(k, *v)
-            text += test_line
        if boxes is None:
-            text_loc = (1, 1)
+            text_w = 1
+            text_h = 1
        else:
            box = boxes[i]
-            text_loc = (box[2], box[3])
-        draw.text(text_loc, text, fill=(0, 0, 255))
+            text_w = int(box[2])
+            text_h = int(box[3])
+        for text in res:
+            text_h += int(line_inter)
+            text_loc = (text_w, text_h)
+            cv2.putText(
+                im,
+                text,
+                text_loc,
+                cv2.FONT_HERSHEY_PLAIN,
+                text_scale, (0, 0, 255),
+                thickness=text_thickness)
    return im