From 24c088f6a23c443da198228cfcab10dcf2593b62 Mon Sep 17 00:00:00 2001 From: felixhjh <852142024@qq.com> Date: Sun, 27 Feb 2022 18:01:59 +0000 Subject: [PATCH] add PaddleVideo demo PPTSN --- .../PPTSN_K400/Kinetics-400_label_list.txt | 400 ++++++++++ .../PaddleVideo/PPTSN_K400/config.yml | 55 ++ .../PPTSN_K400/pipeline_http_client.py | 23 + .../PaddleVideo/PPTSN_K400/web_service.py | 102 +++ python/paddle_serving_app/reader/__init__.py | 1 + .../paddle_serving_app/reader/pptsn_reader.py | 693 ++++++++++++++++++ 6 files changed, 1274 insertions(+) create mode 100644 examples/Pipeline/PaddleVideo/PPTSN_K400/Kinetics-400_label_list.txt create mode 100644 examples/Pipeline/PaddleVideo/PPTSN_K400/config.yml create mode 100644 examples/Pipeline/PaddleVideo/PPTSN_K400/pipeline_http_client.py create mode 100644 examples/Pipeline/PaddleVideo/PPTSN_K400/web_service.py create mode 100644 python/paddle_serving_app/reader/pptsn_reader.py diff --git a/examples/Pipeline/PaddleVideo/PPTSN_K400/Kinetics-400_label_list.txt b/examples/Pipeline/PaddleVideo/PPTSN_K400/Kinetics-400_label_list.txt new file mode 100644 index 00000000..8488908b --- /dev/null +++ b/examples/Pipeline/PaddleVideo/PPTSN_K400/Kinetics-400_label_list.txt @@ -0,0 +1,400 @@ +0 abseiling +1 air_drumming +2 answering_questions +3 applauding +4 applying_cream +5 archery +6 arm_wrestling +7 arranging_flowers +8 assembling_computer +9 auctioning +10 baby_waking_up +11 baking_cookies +12 balloon_blowing +13 bandaging +14 barbequing +15 bartending +16 beatboxing +17 bee_keeping +18 belly_dancing +19 bench_pressing +20 bending_back +21 bending_metal +22 biking_through_snow +23 blasting_sand +24 blowing_glass +25 blowing_leaves +26 blowing_nose +27 blowing_out_candles +28 bobsledding +29 bookbinding +30 bouncing_on_trampoline +31 bowling +32 braiding_hair +33 breading_or_breadcrumbing +34 breakdancing +35 brush_painting +36 brushing_hair +37 brushing_teeth +38 building_cabinet +39 building_shed +40 bungee_jumping +41 busking +42 canoeing_or_kayaking +43 capoeira +44 carrying_baby +45 cartwheeling +46 carving_pumpkin +47 catching_fish +48 catching_or_throwing_baseball +49 catching_or_throwing_frisbee +50 catching_or_throwing_softball +51 celebrating +52 changing_oil +53 changing_wheel +54 checking_tires +55 cheerleading +56 chopping_wood +57 clapping +58 clay_pottery_making +59 clean_and_jerk +60 cleaning_floor +61 cleaning_gutters +62 cleaning_pool +63 cleaning_shoes +64 cleaning_toilet +65 cleaning_windows +66 climbing_a_rope +67 climbing_ladder +68 climbing_tree +69 contact_juggling +70 cooking_chicken +71 cooking_egg +72 cooking_on_campfire +73 cooking_sausages +74 counting_money +75 country_line_dancing +76 cracking_neck +77 crawling_baby +78 crossing_river +79 crying +80 curling_hair +81 cutting_nails +82 cutting_pineapple +83 cutting_watermelon +84 dancing_ballet +85 dancing_charleston +86 dancing_gangnam_style +87 dancing_macarena +88 deadlifting +89 decorating_the_christmas_tree +90 digging +91 dining +92 disc_golfing +93 diving_cliff +94 dodgeball +95 doing_aerobics +96 doing_laundry +97 doing_nails +98 drawing +99 dribbling_basketball +100 drinking +101 drinking_beer +102 drinking_shots +103 driving_car +104 driving_tractor +105 drop_kicking +106 drumming_fingers +107 dunking_basketball +108 dying_hair +109 eating_burger +110 eating_cake +111 eating_carrots +112 eating_chips +113 eating_doughnuts +114 eating_hotdog +115 eating_ice_cream +116 eating_spaghetti +117 eating_watermelon +118 egg_hunting +119 exercising_arm +120 exercising_with_an_exercise_ball +121 extinguishing_fire +122 faceplanting +123 feeding_birds +124 feeding_fish +125 feeding_goats +126 filling_eyebrows +127 finger_snapping +128 fixing_hair +129 flipping_pancake +130 flying_kite +131 folding_clothes +132 folding_napkins +133 folding_paper +134 front_raises +135 frying_vegetables +136 garbage_collecting +137 gargling +138 getting_a_haircut +139 getting_a_tattoo +140 giving_or_receiving_award +141 golf_chipping +142 golf_driving +143 golf_putting +144 grinding_meat +145 grooming_dog +146 grooming_horse +147 gymnastics_tumbling +148 hammer_throw +149 headbanging +150 headbutting +151 high_jump +152 high_kick +153 hitting_baseball +154 hockey_stop +155 holding_snake +156 hopscotch +157 hoverboarding +158 hugging +159 hula_hooping +160 hurdling +161 hurling_(sport) +162 ice_climbing +163 ice_fishing +164 ice_skating +165 ironing +166 javelin_throw +167 jetskiing +168 jogging +169 juggling_balls +170 juggling_fire +171 juggling_soccer_ball +172 jumping_into_pool +173 jumpstyle_dancing +174 kicking_field_goal +175 kicking_soccer_ball +176 kissing +177 kitesurfing +178 knitting +179 krumping +180 laughing +181 laying_bricks +182 long_jump +183 lunge +184 making_a_cake +185 making_a_sandwich +186 making_bed +187 making_jewelry +188 making_pizza +189 making_snowman +190 making_sushi +191 making_tea +192 marching +193 massaging_back +194 massaging_feet +195 massaging_legs +196 massaging_person's_head +197 milking_cow +198 mopping_floor +199 motorcycling +200 moving_furniture +201 mowing_lawn +202 news_anchoring +203 opening_bottle +204 opening_present +205 paragliding +206 parasailing +207 parkour +208 passing_American_football_(in_game) +209 passing_American_football_(not_in_game) +210 peeling_apples +211 peeling_potatoes +212 petting_animal_(not_cat) +213 petting_cat +214 picking_fruit +215 planting_trees +216 plastering +217 playing_accordion +218 playing_badminton +219 playing_bagpipes +220 playing_basketball +221 playing_bass_guitar +222 playing_cards +223 playing_cello +224 playing_chess +225 playing_clarinet +226 playing_controller +227 playing_cricket +228 playing_cymbals +229 playing_didgeridoo +230 playing_drums +231 playing_flute +232 playing_guitar +233 playing_harmonica +234 playing_harp +235 playing_ice_hockey +236 playing_keyboard +237 playing_kickball +238 playing_monopoly +239 playing_organ +240 playing_paintball +241 playing_piano +242 playing_poker +243 playing_recorder +244 playing_saxophone +245 playing_squash_or_racquetball +246 playing_tennis +247 playing_trombone +248 playing_trumpet +249 playing_ukulele +250 playing_violin +251 playing_volleyball +252 playing_xylophone +253 pole_vault +254 presenting_weather_forecast +255 pull_ups +256 pumping_fist +257 pumping_gas +258 punching_bag +259 punching_person_(boxing) +260 push_up +261 pushing_car +262 pushing_cart +263 pushing_wheelchair +264 reading_book +265 reading_newspaper +266 recording_music +267 riding_a_bike +268 riding_camel +269 riding_elephant +270 riding_mechanical_bull +271 riding_mountain_bike +272 riding_mule +273 riding_or_walking_with_horse +274 riding_scooter +275 riding_unicycle +276 ripping_paper +277 robot_dancing +278 rock_climbing +279 rock_scissors_paper +280 roller_skating +281 running_on_treadmill +282 sailing +283 salsa_dancing +284 sanding_floor +285 scrambling_eggs +286 scuba_diving +287 setting_table +288 shaking_hands +289 shaking_head +290 sharpening_knives +291 sharpening_pencil +292 shaving_head +293 shaving_legs +294 shearing_sheep +295 shining_shoes +296 shooting_basketball +297 shooting_goal_(soccer) +298 shot_put +299 shoveling_snow +300 shredding_paper +301 shuffling_cards +302 side_kick +303 sign_language_interpreting +304 singing +305 situp +306 skateboarding +307 ski_jumping +308 skiing_(not_slalom_or_crosscountry) +309 skiing_crosscountry +310 skiing_slalom +311 skipping_rope +312 skydiving +313 slacklining +314 slapping +315 sled_dog_racing +316 smoking +317 smoking_hookah +318 snatch_weight_lifting +319 sneezing +320 sniffing +321 snorkeling +322 snowboarding +323 snowkiting +324 snowmobiling +325 somersaulting +326 spinning_poi +327 spray_painting +328 spraying +329 springboard_diving +330 squat +331 sticking_tongue_out +332 stomping_grapes +333 stretching_arm +334 stretching_leg +335 strumming_guitar +336 surfing_crowd +337 surfing_water +338 sweeping_floor +339 swimming_backstroke +340 swimming_breast_stroke +341 swimming_butterfly_stroke +342 swing_dancing +343 swinging_legs +344 swinging_on_something +345 sword_fighting +346 tai_chi +347 taking_a_shower +348 tango_dancing +349 tap_dancing +350 tapping_guitar +351 tapping_pen +352 tasting_beer +353 tasting_food +354 testifying +355 texting +356 throwing_axe +357 throwing_ball +358 throwing_discus +359 tickling +360 tobogganing +361 tossing_coin +362 tossing_salad +363 training_dog +364 trapezing +365 trimming_or_shaving_beard +366 trimming_trees +367 triple_jump +368 tying_bow_tie +369 tying_knot_(not_on_a_tie) +370 tying_tie +371 unboxing +372 unloading_truck +373 using_computer +374 using_remote_controller_(not_gaming) +375 using_segway +376 vault +377 waiting_in_line +378 walking_the_dog +379 washing_dishes +380 washing_feet +381 washing_hair +382 washing_hands +383 water_skiing +384 water_sliding +385 watering_plants +386 waxing_back +387 waxing_chest +388 waxing_eyebrows +389 waxing_legs +390 weaving_basket +391 welding +392 whistling +393 windsurfing +394 wrapping_present +395 wrestling +396 writing +397 yawning +398 yoga +399 zumba diff --git a/examples/Pipeline/PaddleVideo/PPTSN_K400/config.yml b/examples/Pipeline/PaddleVideo/PPTSN_K400/config.yml new file mode 100644 index 00000000..44014150 --- /dev/null +++ b/examples/Pipeline/PaddleVideo/PPTSN_K400/config.yml @@ -0,0 +1,55 @@ +#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时,会自动将rpc_port设置为http_port+1 +rpc_port: 18090 + +#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port +http_port: 9999 + +#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG +##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num +worker_num: 20 + +#build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG +build_dag_each_worker: false + +dag: + #op资源类型, True, 为线程模型;False,为进程模型 + is_thread_op: False + + #重试次数 + retry: 1 + + #使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用 + use_profile: false + tracer: + interval_s: 10 + +op: + ppTSN: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #det模型路径 + model_config: serving_server + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["linear_1.tmp_1"] + + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 1 + + #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "0" + + #use_mkldnn + #use_mkldnn: True + + #thread_num + thread_num: 2 + + #ir_optim + ir_optim: True diff --git a/examples/Pipeline/PaddleVideo/PPTSN_K400/pipeline_http_client.py b/examples/Pipeline/PaddleVideo/PPTSN_K400/pipeline_http_client.py new file mode 100644 index 00000000..e1025598 --- /dev/null +++ b/examples/Pipeline/PaddleVideo/PPTSN_K400/pipeline_http_client.py @@ -0,0 +1,23 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# from paddle_serving_server.pipeline import PipelineClient +import requests +import json + +url = "http://127.0.0.1:9999/ppTSN/prediction" +video_url = "https://paddle-serving.bj.bcebos.com/huangjianhui04/example.avi" +for i in range(4): + data = {"key": ["filename"], "value": [video_url]} + r = requests.post(url=url, data=json.dumps(data)) + print(r.json()) diff --git a/examples/Pipeline/PaddleVideo/PPTSN_K400/web_service.py b/examples/Pipeline/PaddleVideo/PPTSN_K400/web_service.py new file mode 100644 index 00000000..1c670926 --- /dev/null +++ b/examples/Pipeline/PaddleVideo/PPTSN_K400/web_service.py @@ -0,0 +1,102 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddle_serving_server.web_service import WebService, Op +import logging +import numpy as np +import cv2 +import base64 +from paddle_serving_app.reader import VideoDecoder, Sampler, Scale, TenCrop, Image2Array, Normalization +import urllib.request +_LOGGER = logging.getLogger() + + +class ppTSNOp(Op): + def init_op(self, + num_seg=25, + seg_len=1, + short_size=256, + target_size=224, + top_k=1): + self.top_k = top_k + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + self.ops = [ + VideoDecoder(), + Sampler(num_seg, seg_len, valid_mode=True, select_left=True), + Scale(short_size, fixed_ratio=True, do_round=True, backend='cv2'), + TenCrop(target_size), + Image2Array(), + Normalization(img_mean, img_std) + ] + self.label_dict = {} + with open("Kinetics-400_label_list.txt") as fin: + for line in fin: + label_list = line.strip().split(" ") + index = int(label_list[0]) + label = label_list[1] + self.label_dict[index] = label + + def preprocess(self, input_dicts, data_id, log_id): + (_, input_dict), = input_dicts.items() + self.input_file = [] + for key in input_dict.keys(): + try: + filename = urllib.request.urlretrieve(input_dict[key], key) + self.input_file.append(filename[0]) + print("download video success") + except: + print("download video failed") + batched_inputs = [] + for filename in self.input_file: + results = {'filename': filename} + for op in self.ops: + results = op(results) + res = np.expand_dims(results['imgs'], axis=0).copy() + batched_inputs.append(res) + batched_inputs = [ + np.concatenate([item[i] for item in batched_inputs]) + for i in range(len(batched_inputs[0])) + ] + return {"data_batch_0": batched_inputs[0]}, False, None, "" + + def postprocess(self, input_dicts, fetch_dict, data_id, log_id): + output = fetch_dict["linear_1.tmp_1"] + if not isinstance(self.input_file, list): + self.input_file = [ + self.input_file, + ] + N = len(self.input_file) + if output.shape[0] != N: + output = output.reshape([N] + [output.shape[0] // N] + + list(output.shape[1:])) # [N, T, C] + output = output.mean(axis=1) # [N, C] + import paddle + import paddle.nn.functional as F + output = F.softmax(paddle.to_tensor(output), axis=-1).numpy() + for i in range(N): + classes = np.argpartition(output[i], -self.top_k)[-self.top_k:] + classes = classes[np.argsort(-output[i, classes])] + labels = [self.label_dict[label] for label in classes.tolist()] + scores = output[i, classes] + res = {"res" :"class: {} score: {}".format(labels, scores)} + return res, None, "" + +class ppTSNService(WebService): + def get_pipeline_response(self, read_op): + ppTSN_op = ppTSNOp(name="ppTSN", input_ops=[read_op]) + return ppTSN_op + +pptsn_service = ppTSNService(name="ppTSN") +pptsn_service.prepare_pipeline_config("config.yml") +pptsn_service.run_service() diff --git a/python/paddle_serving_app/reader/__init__.py b/python/paddle_serving_app/reader/__init__.py index 185b2099..8d782595 100644 --- a/python/paddle_serving_app/reader/__init__.py +++ b/python/paddle_serving_app/reader/__init__.py @@ -21,3 +21,4 @@ from .lac_reader import LACReader from .senta_reader import SentaReader #from .imdb_reader import IMDBDataset from .ocr_reader import OCRReader +from .pptsn_reader import VideoDecoder, Sampler, Scale, TenCrop, Image2Array, Normalization diff --git a/python/paddle_serving_app/reader/pptsn_reader.py b/python/paddle_serving_app/reader/pptsn_reader.py new file mode 100644 index 00000000..c74c9db6 --- /dev/null +++ b/python/paddle_serving_app/reader/pptsn_reader.py @@ -0,0 +1,693 @@ +import numpy as np +import av +import cv2 +import pickle +import decord as de +import math +import random + +import os + +from PIL import Image +import SimpleITK as sitk + + +try: + import cPickle as pickle + from cStringIO import StringIO +except ImportError: + import pickle + from io import BytesIO + +from collections.abc import Sequence + + +class VideoDecoder(object): + """ + Decode mp4 file to frames. + Args: + filepath: the file path of mp4 file + """ + def __init__(self, + backend='cv2', + mode='train', + sampling_rate=32, + num_seg=8, + num_clips=1, + target_fps=30): + + self.backend = backend + # params below only for TimeSformer + self.mode = mode + self.sampling_rate = sampling_rate + self.num_seg = num_seg + self.num_clips = num_clips + self.target_fps = target_fps + + def __call__(self, results): + """ + Perform mp4 decode operations. + return: + List where each item is a numpy array after decoder. + """ + file_path = results['filename'] + results['format'] = 'video' + results['backend'] = self.backend + + if self.backend == 'cv2': + cap = cv2.VideoCapture(file_path) + videolen = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + sampledFrames = [] + for i in range(videolen): + ret, frame = cap.read() + # maybe first frame is empty + if ret == False: + continue + img = frame[:, :, ::-1] + sampledFrames.append(img) + results['frames'] = sampledFrames + results['frames_len'] = len(sampledFrames) + + elif self.backend == 'decord': + container = de.VideoReader(file_path) + frames_len = len(container) + results['frames'] = container + results['frames_len'] = frames_len + + elif self.backend == 'pyav': # for TimeSformer + if self.mode in ["train", "valid"]: + clip_idx = -1 + elif self.mode in ["test"]: + clip_idx = 0 + else: + raise NotImplementedError + + container = av.open(file_path) + + num_clips = 1 # always be 1 + + # decode process + fps = float(container.streams.video[0].average_rate) + + frames_length = container.streams.video[0].frames + duration = container.streams.video[0].duration + + if duration is None: + # If failed to fetch the decoding information, decode the entire video. + decode_all_video = True + video_start_pts, video_end_pts = 0, math.inf + else: + decode_all_video = False + start_idx, end_idx = get_start_end_idx( + frames_length, + self.sampling_rate * self.num_seg / self.target_fps * fps, + clip_idx, num_clips) + timebase = duration / frames_length + video_start_pts = int(start_idx * timebase) + video_end_pts = int(end_idx * timebase) + + frames = None + # If video stream was found, fetch video frames from the video. + if container.streams.video: + margin = 1024 + seek_offset = max(video_start_pts - margin, 0) + + container.seek(seek_offset, + any_frame=False, + backward=True, + stream=container.streams.video[0]) + tmp_frames = {} + buffer_count = 0 + max_pts = 0 + for frame in container.decode(**{"video": 0}): + max_pts = max(max_pts, frame.pts) + if frame.pts < video_start_pts: + continue + if frame.pts <= video_end_pts: + tmp_frames[frame.pts] = frame + else: + buffer_count += 1 + tmp_frames[frame.pts] = frame + if buffer_count >= 0: + break + video_frames = [tmp_frames[pts] for pts in sorted(tmp_frames)] + + container.close() + + frames = [frame.to_rgb().to_ndarray() for frame in video_frames] + clip_sz = self.sampling_rate * self.num_seg / self.target_fps * fps + + start_idx, end_idx = get_start_end_idx( + len(frames), # frame_len + clip_sz, + clip_idx if decode_all_video else + 0, # If decode all video, -1 in train and valid, 0 in test; + # else, always 0 in train, valid and test, as we has selected clip size frames when decode. + 1) + results['frames'] = frames + results['frames_len'] = len(frames) + results['start_idx'] = start_idx + results['end_idx'] = end_idx + else: + raise NotImplementedError + return results + + +class Sampler(object): + """ + Sample frames id. + NOTE: Use PIL to read image here, has diff with CV2 + Args: + num_seg(int): number of segments. + seg_len(int): number of sampled frames in each segment. + valid_mode(bool): True or False. + select_left: Whether to select the frame to the left in the middle when the sampling interval is even in the test mode. + Returns: + frames_idx: the index of sampled #frames. + """ + def __init__(self, + num_seg, + seg_len, + frame_interval=None, + valid_mode=False, + select_left=False, + dense_sample=False, + linspace_sample=False, + use_pil=True): + self.num_seg = num_seg + self.seg_len = seg_len + self.frame_interval = frame_interval + self.valid_mode = valid_mode + self.select_left = select_left + self.dense_sample = dense_sample + self.linspace_sample = linspace_sample + self.use_pil = use_pil + + def _get(self, frames_idx, results): + data_format = results['format'] + + if data_format == "frame": + frame_dir = results['frame_dir'] + imgs = [] + for idx in frames_idx: + img = Image.open( + os.path.join(frame_dir, + results['suffix'].format(idx))).convert('RGB') + imgs.append(img) + + elif data_format == "MRI": + frame_dir = results['frame_dir'] + imgs = [] + MRI = sitk.GetArrayFromImage(sitk.ReadImage(frame_dir)) + for idx in frames_idx: + item = MRI[idx] + item = cv2.resize(item, (224, 224)) + imgs.append(item) + + elif data_format == "video": + if results['backend'] == 'cv2': + frames = np.array(results['frames']) + imgs = [] + for idx in frames_idx: + imgbuf = frames[idx] + img = Image.fromarray(imgbuf, mode='RGB') + imgs.append(img) + elif results['backend'] == 'decord': + container = results['frames'] + if self.use_pil: + frames_select = container.get_batch(frames_idx) + # dearray_to_img + np_frames = frames_select.asnumpy() + imgs = [] + for i in range(np_frames.shape[0]): + imgbuf = np_frames[i] + imgs.append(Image.fromarray(imgbuf, mode='RGB')) + else: + if frames_idx.ndim != 1: + frames_idx = np.squeeze(frames_idx) + frame_dict = { + idx: container[idx].asnumpy() + for idx in np.unique(frames_idx) + } + imgs = [frame_dict[idx] for idx in frames_idx] + elif results['backend'] == 'pyav': + imgs = [] + frames = np.array(results['frames']) + for idx in frames_idx: + imgbuf = frames[idx] + imgs.append(imgbuf) + imgs = np.stack(imgs) # thwc + else: + raise NotImplementedError + else: + raise NotImplementedError + results['imgs'] = imgs + return results + + def _get_train_clips(self, num_frames): + ori_seg_len = self.seg_len * self.frame_interval + avg_interval = (num_frames - ori_seg_len + 1) // self.num_seg + + if avg_interval > 0: + base_offsets = np.arange(self.num_seg) * avg_interval + clip_offsets = base_offsets + np.random.randint(avg_interval, + size=self.num_seg) + elif num_frames > max(self.num_seg, ori_seg_len): + clip_offsets = np.sort( + np.random.randint(num_frames - ori_seg_len + 1, + size=self.num_seg)) + elif avg_interval == 0: + ratio = (num_frames - ori_seg_len + 1.0) / self.num_seg + clip_offsets = np.around(np.arange(self.num_seg) * ratio) + else: + clip_offsets = np.zeros((self.num_seg, ), dtype=np.int) + return clip_offsets + + def _get_test_clips(self, num_frames): + ori_seg_len = self.seg_len * self.frame_interval + avg_interval = (num_frames - ori_seg_len + 1) / float(self.num_seg) + if num_frames > ori_seg_len - 1: + base_offsets = np.arange(self.num_seg) * avg_interval + clip_offsets = (base_offsets + avg_interval / 2.0).astype(np.int) + else: + clip_offsets = np.zeros((self.num_seg, ), dtype=np.int) + return clip_offsets + + def __call__(self, results): + """ + Args: + frames_len: length of frames. + return: + sampling id. + """ + frames_len = int(results['frames_len']) + frames_idx = [] + if self.frame_interval is not None: + assert isinstance(self.frame_interval, int) + if not self.valid_mode: + offsets = self._get_train_clips(frames_len) + else: + offsets = self._get_test_clips(frames_len) + + offsets = offsets[:, None] + np.arange( + self.seg_len)[None, :] * self.frame_interval + offsets = np.concatenate(offsets) + + offsets = offsets.reshape((-1, self.seg_len)) + offsets = np.mod(offsets, frames_len) + offsets = np.concatenate(offsets) + + if results['format'] == 'video': + frames_idx = offsets + elif results['format'] == 'frame': + frames_idx = list(offsets + 1) + else: + raise NotImplementedError + + return self._get(frames_idx, results) + + if self.linspace_sample: + if 'start_idx' in results and 'end_idx' in results: + offsets = np.linspace(results['start_idx'], results['end_idx'], + self.num_seg) + else: + offsets = np.linspace(0, frames_len - 1, self.num_seg) + offsets = np.clip(offsets, 0, frames_len - 1).astype(np.int64) + if results['format'] == 'video': + frames_idx = list(offsets) + frames_idx = [x % frames_len for x in frames_idx] + elif results['format'] == 'frame': + frames_idx = list(offsets + 1) + + elif results['format'] == 'MRI': + frames_idx = list(offsets) + + else: + raise NotImplementedError + return self._get(frames_idx, results) + + average_dur = int(frames_len / self.num_seg) + if not self.select_left: + if self.dense_sample: # For ppTSM + if not self.valid_mode: # train + sample_pos = max(1, 1 + frames_len - 64) + t_stride = 64 // self.num_seg + start_idx = 0 if sample_pos == 1 else np.random.randint( + 0, sample_pos - 1) + offsets = [(idx * t_stride + start_idx) % frames_len + 1 + for idx in range(self.num_seg)] + frames_idx = offsets + else: + sample_pos = max(1, 1 + frames_len - 64) + t_stride = 64 // self.num_seg + start_list = np.linspace(0, + sample_pos - 1, + num=10, + dtype=int) + offsets = [] + for start_idx in start_list.tolist(): + offsets += [ + (idx * t_stride + start_idx) % frames_len + 1 + for idx in range(self.num_seg) + ] + frames_idx = offsets + else: + for i in range(self.num_seg): + idx = 0 + if not self.valid_mode: + if average_dur >= self.seg_len: + idx = random.randint(0, average_dur - self.seg_len) + idx += i * average_dur + elif average_dur >= 1: + idx += i * average_dur + else: + idx = i + else: + if average_dur >= self.seg_len: + idx = (average_dur - 1) // 2 + idx += i * average_dur + elif average_dur >= 1: + idx += i * average_dur + else: + idx = i + for jj in range(idx, idx + self.seg_len): + if results['format'] == 'video': + frames_idx.append(int(jj % frames_len)) + elif results['format'] == 'frame': + frames_idx.append(jj + 1) + + elif results['format'] == 'MRI': + frames_idx.append(jj) + else: + raise NotImplementedError + return self._get(frames_idx, results) + + else: # for TSM + if not self.valid_mode: + if average_dur > 0: + offsets = np.multiply(list(range(self.num_seg)), + average_dur) + np.random.randint( + average_dur, size=self.num_seg) + elif frames_len > self.num_seg: + offsets = np.sort( + np.random.randint(frames_len, size=self.num_seg)) + else: + offsets = np.zeros(shape=(self.num_seg, )) + else: + if frames_len > self.num_seg: + average_dur_float = frames_len / self.num_seg + offsets = np.array([ + int(average_dur_float / 2.0 + average_dur_float * x) + for x in range(self.num_seg) + ]) + else: + offsets = np.zeros(shape=(self.num_seg, )) + + if results['format'] == 'video': + frames_idx = list(offsets) + frames_idx = [x % frames_len for x in frames_idx] + elif results['format'] == 'frame': + frames_idx = list(offsets + 1) + + elif results['format'] == 'MRI': + frames_idx = list(offsets) + + else: + raise NotImplementedError + + return self._get(frames_idx, results) + +class CenterCrop(object): + """ + Center crop images. + Args: + target_size(int): Center crop a square with the target_size from an image. + do_round(bool): Whether to round up the coordinates of the upper left corner of the cropping area. default: True + """ + def __init__(self, target_size, do_round=True, backend='pillow'): + self.target_size = target_size + self.do_round = do_round + self.backend = backend + + def __call__(self, results): + """ + Performs Center crop operations. + Args: + imgs: List where each item is a PIL.Image. + For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...] + return: + ccrop_imgs: List where each item is a PIL.Image after Center crop. + """ + import paddle + imgs = results['imgs'] + ccrop_imgs = [] + th, tw = self.target_size, self.target_size + if isinstance(imgs, paddle.Tensor): + h, w = imgs.shape[-2:] + x1 = int(round((w - tw) / 2.0)) if self.do_round else (w - tw) // 2 + y1 = int(round((h - th) / 2.0)) if self.do_round else (h - th) // 2 + ccrop_imgs = imgs[:, :, y1:y1 + th, x1:x1 + tw] + else: + for img in imgs: + if self.backend == 'pillow': + w, h = img.size + elif self.backend == 'cv2': + h, w, _ = img.shape + else: + raise NotImplementedError + assert (w >= self.target_size) and (h >= self.target_size), \ + "image width({}) and height({}) should be larger than crop size".format( + w, h, self.target_size) + x1 = int(round( + (w - tw) / 2.0)) if self.do_round else (w - tw) // 2 + y1 = int(round( + (h - th) / 2.0)) if self.do_round else (h - th) // 2 + if self.backend == 'cv2': + ccrop_imgs.append(img[y1:y1 + th, x1:x1 + tw]) + elif self.backend == 'pillow': + ccrop_imgs.append(img.crop((x1, y1, x1 + tw, y1 + th))) + results['imgs'] = ccrop_imgs + return results + +class Scale(object): + """ + Scale images. + Args: + short_size(float | int): Short size of an image will be scaled to the short_size. + fixed_ratio(bool): Set whether to zoom according to a fixed ratio. default: True + do_round(bool): Whether to round up when calculating the zoom ratio. default: False + backend(str): Choose pillow or cv2 as the graphics processing backend. default: 'pillow' + """ + def __init__(self, + short_size, + fixed_ratio=True, + keep_ratio=None, + do_round=False, + backend='pillow'): + self.short_size = short_size + assert (fixed_ratio and not keep_ratio) or (not fixed_ratio), \ + f"fixed_ratio and keep_ratio cannot be true at the same time" + self.fixed_ratio = fixed_ratio + self.keep_ratio = keep_ratio + self.do_round = do_round + + assert backend in [ + 'pillow', 'cv2' + ], f"Scale's backend must be pillow or cv2, but get {backend}" + self.backend = backend + + def __call__(self, results): + """ + Performs resize operations. + Args: + imgs (Sequence[PIL.Image]): List where each item is a PIL.Image. + For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...] + return: + resized_imgs: List where each item is a PIL.Image after scaling. + """ + imgs = results['imgs'] + resized_imgs = [] + for i in range(len(imgs)): + img = imgs[i] + if isinstance(img, np.ndarray): + h, w, _ = img.shape + elif isinstance(img, Image.Image): + w, h = img.size + else: + raise NotImplementedError + + if w <= h: + ow = self.short_size + if self.fixed_ratio: + oh = int(self.short_size * 4.0 / 3.0) + elif not self.keep_ratio: # no + oh = self.short_size + else: + scale_factor = self.short_size / w + oh = int(h * float(scale_factor) + + 0.5) if self.do_round else int(h * + self.short_size / w) + ow = int(w * float(scale_factor) + + 0.5) if self.do_round else int(w * + self.short_size / h) + else: + oh = self.short_size + if self.fixed_ratio: + ow = int(self.short_size * 4.0 / 3.0) + elif not self.keep_ratio: # no + ow = self.short_size + else: + scale_factor = self.short_size / h + oh = int(h * float(scale_factor) + + 0.5) if self.do_round else int(h * + self.short_size / w) + ow = int(w * float(scale_factor) + + 0.5) if self.do_round else int(w * + self.short_size / h) + if self.backend == 'pillow': + resized_imgs.append(img.resize((ow, oh), Image.BILINEAR)) + elif self.backend == 'cv2' and (self.keep_ratio is not None): + resized_imgs.append( + cv2.resize(img, (ow, oh), interpolation=cv2.INTER_LINEAR)) + else: + resized_imgs.append( + Image.fromarray( + cv2.resize(np.asarray(img), (ow, oh), + interpolation=cv2.INTER_LINEAR))) + results['imgs'] = resized_imgs + return results + +class Image2Array(object): + """ + transfer PIL.Image to Numpy array and transpose dimensions from 'dhwc' to 'dchw'. + Args: + transpose: whether to transpose or not, default True, False for slowfast. + """ + def __init__(self, transpose=True, data_format='tchw'): + assert data_format in [ + 'tchw', 'cthw' + ], f"Target format must in ['tchw', 'cthw'], but got {data_format}" + self.transpose = transpose + self.data_format = data_format + + def __call__(self, results): + """ + Performs Image to NumpyArray operations. + Args: + imgs: List where each item is a PIL.Image. + For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...] + return: + np_imgs: Numpy array. + """ + imgs = results['imgs'] + if 'backend' in results and results[ + 'backend'] == 'pyav': # [T,H,W,C] in [0, 1] + if self.transpose: + if self.data_format == 'tchw': + t_imgs = imgs.transpose((0, 3, 1, 2)) # tchw + else: + t_imgs = imgs.transpose((3, 0, 1, 2)) # cthw + results['imgs'] = t_imgs + else: + t_imgs = np.stack(imgs).astype('float32') + if self.transpose: + if self.data_format == 'tchw': + t_imgs = t_imgs.transpose(0, 3, 1, 2) # tchw + else: + t_imgs = t_imgs.transpose(3, 0, 1, 2) # cthw + results['imgs'] = t_imgs + return results + +class Normalization(object): + """ + Normalization. + Args: + mean(Sequence[float]): mean values of different channels. + std(Sequence[float]): std values of different channels. + tensor_shape(list): size of mean, default [3,1,1]. For slowfast, [1,1,1,3] + """ + def __init__(self, mean, std, tensor_shape=[3, 1, 1], inplace=False): + if not isinstance(mean, Sequence): + raise TypeError( + f'Mean must be list, tuple or np.ndarray, but got {type(mean)}') + if not isinstance(std, Sequence): + raise TypeError( + f'Std must be list, tuple or np.ndarray, but got {type(std)}') + + self.inplace = inplace + if not inplace: + self.mean = np.array(mean).reshape(tensor_shape).astype(np.float32) + self.std = np.array(std).reshape(tensor_shape).astype(np.float32) + else: + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + + def __call__(self, results): + """ + Performs normalization operations. + Args: + imgs: Numpy array. + return: + np_imgs: Numpy array after normalization. + """ + import paddle + if self.inplace: + n = len(results['imgs']) + h, w, c = results['imgs'][0].shape + norm_imgs = np.empty((n, h, w, c), dtype=np.float32) + for i, img in enumerate(results['imgs']): + norm_imgs[i] = img + + for img in norm_imgs: # [n,h,w,c] + mean = np.float64(self.mean.reshape(1, -1)) # [1, 3] + stdinv = 1 / np.float64(self.std.reshape(1, -1)) # [1, 3] + cv2.subtract(img, mean, img) + cv2.multiply(img, stdinv, img) + else: + imgs = results['imgs'] + norm_imgs = imgs / 255.0 + norm_imgs -= self.mean + norm_imgs /= self.std + if 'backend' in results and results['backend'] == 'pyav': + norm_imgs = paddle.to_tensor(norm_imgs, dtype=paddle.float32) + results['imgs'] = norm_imgs + return results + +class TenCrop: + """ + Crop out 5 regions (4 corner points + 1 center point) from the picture, + and then flip the cropping result to get 10 cropped images, which can make the prediction result more robust. + Args: + target_size(int | tuple[int]): (w, h) of target size for crop. + """ + def __init__(self, target_size): + self.target_size = (target_size, target_size) + + def __call__(self, results): + imgs = results['imgs'] + img_w, img_h = imgs[0].size + crop_w, crop_h = self.target_size + w_step = (img_w - crop_w) // 4 + h_step = (img_h - crop_h) // 4 + offsets = [ + (0, 0), + (4 * w_step, 0), + (0, 4 * h_step), + (4 * w_step, 4 * h_step), + (2 * w_step, 2 * h_step), + ] + img_crops = list() + for x_offset, y_offset in offsets: + crop = [ + img.crop( + (x_offset, y_offset, x_offset + crop_w, y_offset + crop_h)) + for img in imgs + ] + crop_fliped = [ + timg.transpose(Image.FLIP_LEFT_RIGHT) for timg in crop + ] + img_crops.extend(crop) + img_crops.extend(crop_fliped) + + results['imgs'] = img_crops + return results -- GitLab