diff --git a/deploy/pphuman/pipeline.py b/deploy/pphuman/pipeline.py index 47928b7e859b5bbbbc0abdd4de60a0c48154a8c5..e40b961829ae85b77ed98f7aeb6a57aa6255a3d0 100644 --- a/deploy/pphuman/pipeline.py +++ b/deploy/pphuman/pipeline.py @@ -42,7 +42,7 @@ from python.action_utils import KeyPointBuff, SkeletonActionVisualHelper from pipe_utils import argsparser, print_arguments, merge_cfg, PipeTimer from pipe_utils import get_test_images, crop_image_with_det, crop_image_with_mot, parse_mot_res, parse_mot_keypoint -from python.preprocess import decode_image +from python.preprocess import decode_image, ShortSizeScale from python.visualize import visualize_box_mask, visualize_attr, visualize_pose, visualize_action from pptracking.python.mot_sde_infer import SDE_Detector @@ -554,6 +554,10 @@ class PipePredictor(object): video_action_imgs = [] + if self.with_video_action: + short_size = self.cfg["VIDEO_ACTION"]["short_size"] + scale = ShortSizeScale(short_size) + while (1): if frame_id % 10 == 0: print('frame id: ', frame_id) @@ -705,7 +709,9 @@ class PipePredictor(object): # collect frames if frame_id % sample_freq == 0: - video_action_imgs.append(frame) + # Scale image + scaled_img = scale(frame) + video_action_imgs.append(scaled_img) # the number of collected frames is enough to predict video action if len(video_action_imgs) == frame_len: diff --git a/deploy/python/preprocess.py b/deploy/python/preprocess.py index 315364775850dd2e19d59f226dc896cc933a328a..4703243b861d616d440e785ae59f2564086bfb13 100644 --- a/deploy/python/preprocess.py +++ b/deploy/python/preprocess.py @@ -15,6 +15,7 @@ import cv2 import numpy as np from keypoint_preprocess import get_affine_transform +from PIL import Image def decode_image(im_file, im_info): @@ -106,6 +107,95 @@ class Resize(object): return im_scale_y, im_scale_x +class ShortSizeScale(object): + """ + Scale images by short size. + Args: + short_size(float | int): Short size of an image will be scaled to the short_size. + fixed_ratio(bool): Set whether to zoom according to a fixed ratio. default: True + do_round(bool): Whether to round up when calculating the zoom ratio. default: False + backend(str): Choose pillow or cv2 as the graphics processing backend. default: 'pillow' + """ + + def __init__(self, + short_size, + fixed_ratio=True, + keep_ratio=None, + do_round=False, + backend='pillow'): + self.short_size = short_size + assert (fixed_ratio and not keep_ratio) or ( + not fixed_ratio + ), "fixed_ratio and keep_ratio cannot be true at the same time" + self.fixed_ratio = fixed_ratio + self.keep_ratio = keep_ratio + self.do_round = do_round + + assert backend in [ + 'pillow', 'cv2' + ], "Scale's backend must be pillow or cv2, but get {backend}" + + self.backend = backend + + def __call__(self, img): + """ + Performs resize operations. + Args: + img (PIL.Image): a PIL.Image. + return: + resized_img: a PIL.Image after scaling. + """ + + result_img = None + + if isinstance(img, np.ndarray): + h, w, _ = img.shape + elif isinstance(img, Image.Image): + w, h = img.size + else: + raise NotImplementedError + + if w <= h: + ow = self.short_size + if self.fixed_ratio: # default is True + oh = int(self.short_size * 4.0 / 3.0) + elif not self.keep_ratio: # no + oh = self.short_size + else: + scale_factor = self.short_size / w + oh = int(h * float(scale_factor) + + 0.5) if self.do_round else int(h * self.short_size / w) + ow = int(w * float(scale_factor) + + 0.5) if self.do_round else int(w * self.short_size / h) + else: + oh = self.short_size + if self.fixed_ratio: + ow = int(self.short_size * 4.0 / 3.0) + elif not self.keep_ratio: # no + ow = self.short_size + else: + scale_factor = self.short_size / h + oh = int(h * float(scale_factor) + + 0.5) if self.do_round else int(h * self.short_size / w) + ow = int(w * float(scale_factor) + + 0.5) if self.do_round else int(w * self.short_size / h) + + if type(img) == np.ndarray: + img = Image.fromarray(img, mode='RGB') + + if self.backend == 'pillow': + result_img = img.resize((ow, oh), Image.BILINEAR) + elif self.backend == 'cv2' and (self.keep_ratio is not None): + result_img = cv2.resize( + img, (ow, oh), interpolation=cv2.INTER_LINEAR) + else: + result_img = Image.fromarray( + cv2.resize( + np.asarray(img), (ow, oh), interpolation=cv2.INTER_LINEAR)) + + return result_img + + class NormalizeImage(object): """normalize image Args: diff --git a/deploy/python/video_action_infer.py b/deploy/python/video_action_infer.py index 34f63a94cb23c142c89cbb777d57a0db68c024f6..865f47d41ac708f667edfb5a022a0315d69392bd 100644 --- a/deploy/python/video_action_infer.py +++ b/deploy/python/video_action_infer.py @@ -197,7 +197,7 @@ class VideoActionRecognizer(object): img_mean = [0.485, 0.456, 0.406] img_std = [0.229, 0.224, 0.225] ops = [ - Scale(self.short_size), CenterCrop(self.target_size), Image2Array(), + CenterCrop(self.target_size), Image2Array(), Normalization(img_mean, img_std) ] for op in ops: