未验证 提交 1c4da10b 编写于 作者: XYZ_916's avatar XYZ_916 提交者: GitHub

Scale frames before fight action recognition (#6170)

* Scale frames before fight action recognition

* put
short_size = self.cfg["VIDEO_ACTION"]["short_size"]
scale = Scale(short_size)
out of while

* change class name from Scale to ShortSizeScale
上级 690a7826
......@@ -42,7 +42,7 @@ from python.action_utils import KeyPointBuff, SkeletonActionVisualHelper
from pipe_utils import argsparser, print_arguments, merge_cfg, PipeTimer
from pipe_utils import get_test_images, crop_image_with_det, crop_image_with_mot, parse_mot_res, parse_mot_keypoint
from python.preprocess import decode_image
from python.preprocess import decode_image, ShortSizeScale
from python.visualize import visualize_box_mask, visualize_attr, visualize_pose, visualize_action
from pptracking.python.mot_sde_infer import SDE_Detector
......@@ -554,6 +554,10 @@ class PipePredictor(object):
video_action_imgs = []
if self.with_video_action:
short_size = self.cfg["VIDEO_ACTION"]["short_size"]
scale = ShortSizeScale(short_size)
while (1):
if frame_id % 10 == 0:
print('frame id: ', frame_id)
......@@ -705,7 +709,9 @@ class PipePredictor(object):
# collect frames
if frame_id % sample_freq == 0:
video_action_imgs.append(frame)
# Scale image
scaled_img = scale(frame)
video_action_imgs.append(scaled_img)
# the number of collected frames is enough to predict video action
if len(video_action_imgs) == frame_len:
......
......@@ -15,6 +15,7 @@
import cv2
import numpy as np
from keypoint_preprocess import get_affine_transform
from PIL import Image
def decode_image(im_file, im_info):
......@@ -106,6 +107,95 @@ class Resize(object):
return im_scale_y, im_scale_x
class ShortSizeScale(object):
"""
Scale images by short size.
Args:
short_size(float | int): Short size of an image will be scaled to the short_size.
fixed_ratio(bool): Set whether to zoom according to a fixed ratio. default: True
do_round(bool): Whether to round up when calculating the zoom ratio. default: False
backend(str): Choose pillow or cv2 as the graphics processing backend. default: 'pillow'
"""
def __init__(self,
short_size,
fixed_ratio=True,
keep_ratio=None,
do_round=False,
backend='pillow'):
self.short_size = short_size
assert (fixed_ratio and not keep_ratio) or (
not fixed_ratio
), "fixed_ratio and keep_ratio cannot be true at the same time"
self.fixed_ratio = fixed_ratio
self.keep_ratio = keep_ratio
self.do_round = do_round
assert backend in [
'pillow', 'cv2'
], "Scale's backend must be pillow or cv2, but get {backend}"
self.backend = backend
def __call__(self, img):
"""
Performs resize operations.
Args:
img (PIL.Image): a PIL.Image.
return:
resized_img: a PIL.Image after scaling.
"""
result_img = None
if isinstance(img, np.ndarray):
h, w, _ = img.shape
elif isinstance(img, Image.Image):
w, h = img.size
else:
raise NotImplementedError
if w <= h:
ow = self.short_size
if self.fixed_ratio: # default is True
oh = int(self.short_size * 4.0 / 3.0)
elif not self.keep_ratio: # no
oh = self.short_size
else:
scale_factor = self.short_size / w
oh = int(h * float(scale_factor) +
0.5) if self.do_round else int(h * self.short_size / w)
ow = int(w * float(scale_factor) +
0.5) if self.do_round else int(w * self.short_size / h)
else:
oh = self.short_size
if self.fixed_ratio:
ow = int(self.short_size * 4.0 / 3.0)
elif not self.keep_ratio: # no
ow = self.short_size
else:
scale_factor = self.short_size / h
oh = int(h * float(scale_factor) +
0.5) if self.do_round else int(h * self.short_size / w)
ow = int(w * float(scale_factor) +
0.5) if self.do_round else int(w * self.short_size / h)
if type(img) == np.ndarray:
img = Image.fromarray(img, mode='RGB')
if self.backend == 'pillow':
result_img = img.resize((ow, oh), Image.BILINEAR)
elif self.backend == 'cv2' and (self.keep_ratio is not None):
result_img = cv2.resize(
img, (ow, oh), interpolation=cv2.INTER_LINEAR)
else:
result_img = Image.fromarray(
cv2.resize(
np.asarray(img), (ow, oh), interpolation=cv2.INTER_LINEAR))
return result_img
class NormalizeImage(object):
"""normalize image
Args:
......
......@@ -197,7 +197,7 @@ class VideoActionRecognizer(object):
img_mean = [0.485, 0.456, 0.406]
img_std = [0.229, 0.224, 0.225]
ops = [
Scale(self.short_size), CenterCrop(self.target_size), Image2Array(),
CenterCrop(self.target_size), Image2Array(),
Normalization(img_mean, img_std)
]
for op in ops:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册