Scale frames before fight action recognition (#6170)

* Scale frames before fight action recognition * put short_size = self.cfg["VIDEO_ACTION"]["short_size"] scale = Scale(short_size) out of while * change class name from Scale to ShortSizeScale

Scale frames before fight action recognition (#6170)
* Scale frames before fight action recognition * put short_size = self.cfg["VIDEO_ACTION"]["short_size"] scale = Scale(short_size) out of while * change class name from Scale to ShortSizeScale
1c4da10b · XYZ_916 · GitHub · 690a7826 · 1c4da10b · 1c4da10b
Showing with 99 addition and 3 deletion

deploy/pphuman/pipeline.py deploy/pphuman/pipeline.py +8 -2

deploy/python/preprocess.py deploy/python/preprocess.py +90 -0

deploy/python/video_action_infer.py deploy/python/video_action_infer.py +1 -1

未找到文件。
--- a/deploy/pphuman/pipeline.py
+++ b/deploy/pphuman/pipeline.py
@@ -42,7 +42,7 @@ from python.action_utils import KeyPointBuff, SkeletonActionVisualHelper

 from pipe_utils import argsparser, print_arguments, merge_cfg, PipeTimer
 from pipe_utils import get_test_images, crop_image_with_det, crop_image_with_mot, parse_mot_res, parse_mot_keypoint
-from python.preprocess import decode_image
+from python.preprocess import decode_image, ShortSizeScale
 from python.visualize import visualize_box_mask, visualize_attr, visualize_pose, visualize_action

 from pptracking.python.mot_sde_infer import SDE_Detector
@@ -554,6 +554,10 @@ class PipePredictor(object):

        video_action_imgs = []

+        if self.with_video_action:
+            short_size = self.cfg["VIDEO_ACTION"]["short_size"]
+            scale = ShortSizeScale(short_size)
+
        while (1):
            if frame_id % 10 == 0:
                print('frame id: ', frame_id)
@@ -705,7 +709,9 @@ class PipePredictor(object):

                # collect frames
                if frame_id % sample_freq == 0:
-                    video_action_imgs.append(frame)
+                    # Scale image
+                    scaled_img = scale(frame)
+                    video_action_imgs.append(scaled_img)

                # the number of collected frames is enough to predict video action
                if len(video_action_imgs) == frame_len:

--- a/deploy/python/preprocess.py
+++ b/deploy/python/preprocess.py
@@ -15,6 +15,7 @@
 import cv2
 import numpy as np
 from keypoint_preprocess import get_affine_transform
+from PIL import Image


 def decode_image(im_file, im_info):
@@ -106,6 +107,95 @@ class Resize(object):
        return im_scale_y, im_scale_x


+class ShortSizeScale(object):
+    """
+    Scale images by short size.
+    Args:
+        short_size(float | int): Short size of an image will be scaled to the short_size.
+        fixed_ratio(bool): Set whether to zoom according to a fixed ratio. default: True
+        do_round(bool): Whether to round up when calculating the zoom ratio. default: False
+        backend(str): Choose pillow or cv2 as the graphics processing backend. default: 'pillow'
+    """
+
+    def __init__(self,
+                 short_size,
+                 fixed_ratio=True,
+                 keep_ratio=None,
+                 do_round=False,
+                 backend='pillow'):
+        self.short_size = short_size
+        assert (fixed_ratio and not keep_ratio) or (
+            not fixed_ratio
+        ), "fixed_ratio and keep_ratio cannot be true at the same time"
+        self.fixed_ratio = fixed_ratio
+        self.keep_ratio = keep_ratio
+        self.do_round = do_round
+
+        assert backend in [
+            'pillow', 'cv2'
+        ], "Scale's backend must be pillow or cv2, but get {backend}"
+
+        self.backend = backend
+
+    def __call__(self, img):
+        """
+        Performs resize operations.
+        Args:
+            img (PIL.Image): a PIL.Image.
+        return:
+            resized_img: a PIL.Image after scaling.
+        """
+
+        result_img = None
+
+        if isinstance(img, np.ndarray):
+            h, w, _ = img.shape
+        elif isinstance(img, Image.Image):
+            w, h = img.size
+        else:
+            raise NotImplementedError
+
+        if w <= h:
+            ow = self.short_size
+            if self.fixed_ratio:  # default is True
+                oh = int(self.short_size * 4.0 / 3.0)
+            elif not self.keep_ratio:  # no
+                oh = self.short_size
+            else:
+                scale_factor = self.short_size / w
+                oh = int(h * float(scale_factor) +
+                         0.5) if self.do_round else int(h * self.short_size / w)
+                ow = int(w * float(scale_factor) +
+                         0.5) if self.do_round else int(w * self.short_size / h)
+        else:
+            oh = self.short_size
+            if self.fixed_ratio:
+                ow = int(self.short_size * 4.0 / 3.0)
+            elif not self.keep_ratio:  # no
+                ow = self.short_size
+            else:
+                scale_factor = self.short_size / h
+                oh = int(h * float(scale_factor) +
+                         0.5) if self.do_round else int(h * self.short_size / w)
+                ow = int(w * float(scale_factor) +
+                         0.5) if self.do_round else int(w * self.short_size / h)
+
+        if type(img) == np.ndarray:
+            img = Image.fromarray(img, mode='RGB')
+
+        if self.backend == 'pillow':
+            result_img = img.resize((ow, oh), Image.BILINEAR)
+        elif self.backend == 'cv2' and (self.keep_ratio is not None):
+            result_img = cv2.resize(
+                img, (ow, oh), interpolation=cv2.INTER_LINEAR)
+        else:
+            result_img = Image.fromarray(
+                cv2.resize(
+                    np.asarray(img), (ow, oh), interpolation=cv2.INTER_LINEAR))
+
+        return result_img
+
+
 class NormalizeImage(object):
    """normalize image
    Args:

--- a/deploy/python/video_action_infer.py
+++ b/deploy/python/video_action_infer.py
@@ -197,7 +197,7 @@ class VideoActionRecognizer(object):
        img_mean = [0.485, 0.456, 0.406]
        img_std = [0.229, 0.224, 0.225]
        ops = [
-            Scale(self.short_size), CenterCrop(self.target_size), Image2Array(),
+            CenterCrop(self.target_size), Image2Array(),
            Normalization(img_mean, img_std)
        ]
        for op in ops: