From fe44893bdf014a1adc03d16a37445bf7d4ccede7 Mon Sep 17 00:00:00 2001 From: kinghuin Date: Mon, 11 May 2020 23:56:09 +0800 Subject: [PATCH] add videotag_tsn_lstm (#576) * add videotag_tsn_lstm --- .../videotag_tsn_lstm/README.md | 64 + .../videotag_tsn_lstm/__init__.py | 0 .../videotag_tsn_lstm/module.py | 235 ++ .../videotag_tsn_lstm/resource/__init__.py | 0 .../resource/configs/attention_lstm.yaml | 15 + .../resource/configs/tsn.yaml | 22 + .../videotag_tsn_lstm/resource/label_3396.txt | 3396 +++++++++++++++++ .../resource/metrics/__init__.py | 1 + .../resource/metrics/metrics_util.py | 157 + .../resource/metrics/youtube8m/__init__.py | 0 .../youtube8m/average_precision_calculator.py | 275 ++ .../resource/metrics/youtube8m/eval_util.py | 245 ++ .../mean_average_precision_calculator.py | 114 + .../resource/models/__init__.py | 7 + .../models/attention_lstm/__init__.py | 1 + .../models/attention_lstm/attention_lstm.py | 176 + .../models/attention_lstm/lstm_attention.py | 114 + .../resource/models/model.py | 191 + .../resource/models/tsn/__init__.py | 1 + .../resource/models/tsn/name.py | 107 + .../resource/models/tsn/name1 | 521 +++ .../resource/models/tsn/name2 | 521 +++ .../resource/models/tsn/name_map.json | 1 + .../resource/models/tsn/tsn.py | 190 + .../resource/models/tsn/tsn_res_model.py | 149 + .../resource/models/utils.py | 47 + .../resource/reader/__init__.py | 5 + .../resource/reader/kinetics_reader.py | 459 +++ .../resource/reader/reader_utils.py | 81 + .../resource/utils/__init__.py | 0 .../resource/utils/config_utils.py | 75 + .../resource/utils/train_utils.py | 177 + .../resource/utils/utility.py | 71 + .../scripts/configs/videotag_tsn_lstm.yml | 9 + 34 files changed, 7427 insertions(+) create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/README.md create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/module.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/attention_lstm.yaml create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/tsn.yaml create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/label_3396.txt create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/model.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name1 create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name2 create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name_map.json create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/utils.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/__init__.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py create mode 100644 hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/utility.py create mode 100644 hub_module/scripts/configs/videotag_tsn_lstm.yml diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/README.md b/hub_module/modules/video/classification/videotag_tsn_lstm/README.md new file mode 100644 index 00000000..2748164a --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/README.md @@ -0,0 +1,64 @@ +```shell +$ hub install videotag_tsn_lstm==1.0.0 +``` +

+
+

+具体网络结构可参考论文[TSN](https://arxiv.org/abs/1608.00859)和[AttentionLSTM](https://arxiv.org/abs/1503.08909)。 + +## 命令行预测示例 +```shell +hub run videotag_tsn_lstm --input_path 1.mp4 --use_gpu False +``` +示例文件下载: +* [1.mp4](https://paddlehub.bj.bcebos.com/model/video/video_classifcation/1.mp4) +* [2.mp4](https://paddlehub.bj.bcebos.com/model/video/video_classifcation/2.mp4) + +## API +```python +def classification(paths, + use_gpu=False, + threshold=0.5, + top_k=10) +``` + +用于视频分类预测 + +**参数** + +* paths(list\[str\]):mp4文件路径 +* use_gpu(bool):是否使用GPU预测,默认为False +* threshold(float):预测结果阈值,只有预测概率大于阈值的类别会被返回,默认为0.5 +* top_k(int): 返回预测结果的前k个,默认为10 + +**返回** + +* results(list\[dict\]): result中的每个元素为对应输入的预测结果,预测单个mp4文件时仅有1个元素。每个预测结果为dict,包含mp4文件路径path及其分类概率。例: +```shell +[{'path': '1.mp4', 'prediction': {'训练': 0.9771281480789185, '蹲': 0.9389840960502625, '杠铃': 0.8554490804672241, '健身房': 0.8479971885681152}}, {'path': '2.mp4', 'prediction': {'舞蹈': 0.8504238724708557}}] +``` + +**代码示例** + +```python +import paddlehub as hub + +videotag = hub.Module(name="videotag_tsn_lstm") + +# execute predict and print the result +results = videotag.classification(paths=["1.mp4","2.mp4"], use_gpu=True) +for result in results: + print(result) +``` + +## 依赖 + +paddlepaddle >= 1.6.2 + +paddlehub >= 1.6.0 + +## 更新历史 + +* 1.0.0 + + 初始发布 diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/module.py b/hub_module/modules/video/classification/videotag_tsn_lstm/module.py new file mode 100644 index 00000000..dc70f8f2 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/module.py @@ -0,0 +1,235 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import os + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable +from paddlehub.common.logger import logger + +from videotag_tsn_lstm.resource.utils.config_utils import * +import videotag_tsn_lstm.resource.models as models +from videotag_tsn_lstm.resource.reader import get_reader +from videotag_tsn_lstm.resource.metrics import get_metrics +from videotag_tsn_lstm.resource.utils.utility import check_cuda +from videotag_tsn_lstm.resource.utils.utility import check_version + + +@moduleinfo( + name="videotag_tsn_lstm", + version="1.0.0", + summary= + "videotag_tsn_lstm is a video classification model, using TSN for feature extraction and AttentionLSTM for classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="video/classification", +) +class VideoTag(hub.Module): + def _initialize(self): + # add arg parser + self.parser = argparse.ArgumentParser( + description="Run the videotag_tsn_lstm module.", + prog='hub run videotag_tsn_lstm', + usage='%(prog)s', + add_help=True) + self.parser.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help='default use gpu.') + self.parser.add_argument( + '--input_path', + type=str, + default=None, + help='path of video data, single video') + self._has_load = False + + def _extractor(self, args, exe, place): + extractor_scope = fluid.Scope() + with fluid.scope_guard(extractor_scope): + extractor_startup_prog = fluid.Program() + extractor_main_prog = fluid.Program() + with fluid.program_guard(extractor_main_prog, + extractor_startup_prog): + extractor_config = parse_config(args.extractor_config) + extractor_infer_config = merge_configs(extractor_config, + 'infer', vars(args)) + + # build model + extractor_model = models.get_model( + "TSN", extractor_infer_config, mode='infer') + extractor_model.build_input(use_dataloader=False) + extractor_model.build_model() + extractor_feeds = extractor_model.feeds() + extractor_fetch_list = extractor_model.fetches() + + exe.run(extractor_startup_prog) + + logger.info('load extractor weights from {}'.format( + args.extractor_weights)) + extractor_model.load_test_weights(exe, args.extractor_weights, + extractor_main_prog) + + # get reader and metrics + extractor_reader = get_reader("TSN", 'infer', + extractor_infer_config) + extractor_feeder = fluid.DataFeeder( + place=place, feed_list=extractor_feeds) + return extractor_reader, extractor_main_prog, extractor_fetch_list, extractor_feeder, extractor_scope + + def _predictor(self, args, exe, place): + predictor_scope = fluid.Scope() + with fluid.scope_guard(predictor_scope): + predictor_startup_prog = fluid.default_startup_program() + predictor_main_prog = fluid.default_main_program() + with fluid.program_guard(predictor_main_prog, + predictor_startup_prog): + # parse config + predictor_config = parse_config(args.predictor_config) + predictor_infer_config = merge_configs(predictor_config, + 'infer', vars(args)) + + predictor_model = models.get_model( + "AttentionLSTM", predictor_infer_config, mode='infer') + predictor_model.build_input(use_dataloader=False) + predictor_model.build_model() + predictor_feeds = predictor_model.feeds() + predictor_outputs = predictor_model.outputs() + + exe.run(predictor_startup_prog) + + logger.info('load lstm weights from {}'.format( + args.predictor_weights)) + predictor_model.load_test_weights(exe, args.predictor_weights, + predictor_main_prog) + + predictor_feeder = fluid.DataFeeder( + place=place, feed_list=predictor_feeds) + predictor_fetch_list = predictor_model.fetches() + return predictor_main_prog, predictor_fetch_list, predictor_feeder, predictor_scope + + @runnable + def run_cmd(self, argsv): + args = self.parser.parse_args(argsv) + results = self.classification( + paths=[args.input_path], use_gpu=args.use_gpu) + return results + + def classification(self, paths, use_gpu=False, threshold=0.5, top_k=10): + """ + API of Classification. + + Args: + paths (list[str]): the path of mp4s. + use_gpu (bool): whether to use gpu or not. + threshold (float): the result value >= threshold will be returned. + top_k (int): the top k result will be returned. + + Returns: + results (list[dict]): every dict includes the mp4 file path and prediction. + """ + args = self.parser.parse_args([]) + # config the args in videotag_tsn_lstm + args.use_gpu = use_gpu + args.filelist = paths + args.topk = top_k + args.threshold = threshold + args.extractor_config = os.path.join(self.directory, 'resource', + 'configs', 'tsn.yaml') + args.predictor_config = os.path.join(self.directory, 'resource', + 'configs', 'attention_lstm.yaml') + args.extractor_weights = os.path.join(self.directory, 'weights', 'tsn') + args.predictor_weights = os.path.join(self.directory, 'weights', + 'attention_lstm') + args.label_file = os.path.join(self.directory, 'resource', + 'label_3396.txt') + + check_cuda(args.use_gpu) + check_version() + + if not self._has_load: + self.place = fluid.CUDAPlace( + 0) if args.use_gpu else fluid.CPUPlace() + self.exe = fluid.Executor(self.place) + self.extractor_reader, self.extractor_main_prog, self.extractor_fetch_list, self.extractor_feeder, self.extractor_scope = self._extractor( + args, self.exe, self.place) + self.predictor_main_prog, self.predictor_fetch_list, self.predictor_feeder, self.predictor_scope = self._predictor( + args, self.exe, self.place) + self._has_load = True + + feature_list = [] + file_list = [] + for idx, data in enumerate(self.extractor_reader()): + file_id = [item[-1] for item in data] + feed_data = [item[:-1] for item in data] + feature_out = self.exe.run( + program=self.extractor_main_prog, + fetch_list=self.extractor_fetch_list, + feed=self.extractor_feeder.feed(feed_data), + scope=self.extractor_scope) + feature_list.append(feature_out) + file_list.append(file_id) + logger.info( + '========[Stage 1 Sample {} ] Tsn feature extractor finished======' + .format(idx)) + + # get AttentionLSTM input from Tsn output + num_frames = 300 + predictor_feed_list = [] + for i in range(len(feature_list)): + feature_out = feature_list[i] + extractor_feature = feature_out[0] + predictor_feed_data = [[ + extractor_feature[0].astype(float)[0:num_frames, :] + ]] + predictor_feed_list.append((predictor_feed_data, file_list[i])) + + metrics_config = parse_config(args.predictor_config) + metrics_config['MODEL']['topk'] = args.topk + metrics_config['MODEL']['threshold'] = args.threshold + predictor_metrics = get_metrics("AttentionLSTM".upper(), 'infer', + metrics_config) + predictor_metrics.reset() + for idx, data in enumerate(predictor_feed_list): + file_id = data[1] + predictor_feed_data = data[0] + final_outs = self.exe.run( + program=self.predictor_main_prog, + fetch_list=self.predictor_fetch_list, + feed=self.predictor_feeder.feed(predictor_feed_data, ), + scope=self.predictor_scope) + logger.info( + '=======[Stage 2 Sample {} ] AttentionLSTM predict finished========' + .format(idx)) + final_result_list = [item for item in final_outs] + [file_id] + + predictor_metrics.accumulate(final_result_list) + results = predictor_metrics.finalize_and_log_out( + label_file=args.label_file) + return results + + +if __name__ == '__main__': + test_module = VideoTag() + print( + test_module.run_cmd( + argsv=['--input_path', "1.mp4", '--use_gpu', + str(False)])) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/attention_lstm.yaml b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/attention_lstm.yaml new file mode 100644 index 00000000..eb60a980 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/attention_lstm.yaml @@ -0,0 +1,15 @@ +MODEL: + name: "AttentionLSTM" + dataset: "YouTube-8M" + bone_nework: None + drop_rate: 0.5 + feature_num: 2 + feature_names: ['rgb'] + feature_dims: [2048] + embedding_size: 1024 + lstm_size: 512 + num_classes: 3396 + topk: 10 + +INFER: + batch_size: 1 diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/tsn.yaml b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/tsn.yaml new file mode 100644 index 00000000..09976851 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/tsn.yaml @@ -0,0 +1,22 @@ +MODEL: + name: "TSN" + format: "mp4" + num_classes: 400 + seg_num: 3 + seglen: 1 + image_mean: [0.485, 0.456, 0.406] + image_std: [0.229, 0.224, 0.225] + num_layers: 50 + topk: 5 + +INFER: + seg_num: 300 + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 1 + kinetics_labels: "./data/kinetics_labels.json" + filelist: "./data/tsn.list" + video_path: "./data/mp4/1.mp4" + single_file: True diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/label_3396.txt b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/label_3396.txt new file mode 100644 index 00000000..bcda50c0 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/label_3396.txt @@ -0,0 +1,3396 @@ +胶合板 +坠楼 +空手道 +弹奏 +直升机 +罗盘 +健身 +羽毛球拍 +龙与地下城 +漆 +混合器 +学生 +安全气囊 +法庭 +游泳池 +潜艇 +穆斯林头巾 +奇葩 +绞狐大冒险 +飞行器 +演出 +喷枪 +萝莉 +暗黑血统 +彭彭丁满历险记 +出生 +嫩模 +流星雨 +超市 +StepMania +自动扶梯 +讲座 +缝纫机 +自助餐 +衣服 +翼装飞行 +手语 +可爱颂 +复合弓 +列车 +欧洲模拟卡车 +吃豆人 +队长 +僵尸 +猩红 +战争片 +通关攻略 +横梁 +机场 +引体向上 +暴力片 +橱柜 +卡车 +美人 +仙境传说 +格斗 +奇趣蛋 +健美 +新能源 +佳能 +电视 +喊麦 +信件 +双胞胎 +膳食补充剂 +胸部 +碟子 +女排 +地铁:最后的曙光 +牛肉 +激光照明 +毛巾 +面包店 +时空之轮 +泰迪 +吉他 +绿茶 +自驾游 +签名会 +酱 +抽屉 +山火 +T台 +喝醉 +马桶 +巴松管 +皇帝 +沙丘 +主播 +炖汤 +糖 +球球大作战 +彩票 +中暑 +雷达 +独木舟 +星座 +弓箭 +跑车 +大豆 +妖怪 +激光 +中秋节 +风景 +橡皮筋 +固体 +音乐会 +幽灵 +救生员 +彩虹 +政治 +眼线 +柴 +医疗 +购物中心 +舰载机 +空战 +服装 +钢模 +拖鞋 +教室 +羽毛球 +烤肉 +煎饼 +金星 +火箭 +婴儿车 +黑暗之魂 +夏目友人帐 +图像处理 +恐龙 +柔术 +剪刀 +冒险任务世界 +冰雹 +木工刨 +白金汉宫 +可丽饼 +绅士 +盖瑞模组 +滑板 +游戏网站 +套房 +动作教学 +DOTA +海盗传说 +小马慢跑 +怪物中学 +快闪 +冠军 +手风琴 +工具 +进击的巨人 +怀孕 +停车场 +舌钉 +自行车运动 +飞檐走壁 +滑雪板 +保健 +大蒜 +门 +咏春 +热火吉他手 +筷子 +饮料罐 +拳无虚发 +糗事 +豆腐 +动物园大亨 +佛兰肯斯坦 +动漫 +机长 +脱发 +石英 +医生 +母婴 +数码 +螳螂 +加仑 +核电站 +老鹰 +哑铃 +成语故事 +情景剧 +小提琴 +熊猫 +泥石流 +贴花 +合唱团 +质量效应 +东京食尸鬼 +流行音乐 +犁 +帆 +监拍 +城市 +液氮 +扳手 +卫星 +跳伞 +三维 +美味 +特种部队 +名模 +手帕 +瀑布 +教师 +风铃 +爱丽丝梦游仙境 +风光 +通用电气公司 +逗比 +豹子 +石油 +仙乐传说 +晴天 +皮革 +露台·天井 +实验室 +口琴 +驾车 +枕头 +鸡 +遥控器 +铁路运输 +瓦片 +原子弹 +偶像剧 +闯关 +西游记 +吉他音箱 +车速表 +甜品 +电源供应器 +人行道 +疲劳驾驶 +房车 +量子 +民工 +薄暮传说 +节日 +连连看 +遥控 +科学探索 +银河 +雨水沟 +小丑 +建造 +鹅 +地毯 +赛车俱乐部 +超级飞侠 +美女与野兽 +克兰娜德 +中央处理器 +儿童故事 +口罩 +警匪片 +美女直播 +海洋 +睡衣 +忍者 +烧伤 +裙子 +剪影 +生活大爆炸 +麦田怪圈 +勺子 +狮子王 +床戏 +导管 +冰雪奇缘 +彩泥 +货物 +驼铃 +牙膏 +高铁 +古风 +新娘 +深空传说 +鹰 +鹿 +铲车 +星际战甲 +怪物猎人 +转蛋 +香奈儿 +醉驾 +坦克世界 +新能源汽车 +幻想传奇 +纺织品 +超级英雄 +谍战片 +起重机 +钥匙·按键 +苹果商店 +河粉 +名侦探柯南 +蜂窝 +演唱会 +喷泉 +比基尼 +面粉 +日本食玩 +王子 +画画 +激情戏 +中国队 +帆船 +电商 +消防员 +美腿 +侏罗纪 +吃饭 +锯木机 +烤面包机 +土星 +珠子 +大头儿子 +穴位 +旅客 +演员 +短信 +擂台 +东方永夜抄 +龙之谷 +马路 +袜子 +神秘岛 +勋章 +斑马 +攻壳特工队 +激流回旋 +路易鬼屋 +飞盘 +汽车 +走秀 +异度之刃 +奥利奥 +相声 +房屋 +三国无双 +猫和老鼠 +高校 +鬼片 +维修 +巢 +煎蛋 +哪吒 +排球 +人体穿孔 +核武器 +明星 +水底 +水库 +海军陆战队 +景区 +陀螺战士 +战斗公主西娜 +教学 +火花塞 +收费站 +风力 +马里奥派对 +操作系统 +灼眼的夏娜 +古罗马 +哈士奇 +气象 +神魔之塔 +锁定:现代空战 +球接头娃娃 +神鬼寓言 +幽灵战车 +战争前线 +骡子 +出游 +早餐 +华为 +房间 +现代片 +海报 +游戏王 +咳嗽 +金丝雀 +音乐剧 +根 +灯泡 +星界边境 +视频教学 +剥 +钢铁 +星之卡比 +试驾 +车技 +剑 +树 +茄子 +轨道 +坠毁 +面团 +玩具屋 +拳击 +音乐中心 +行李 +长江 +花絮 +纯情罗曼史 +地精 +铁铲 +公园 +杠铃 +旅游团 +特斯拉线圈 +喷染术 +电子书 +猪猪侠 +骆驼 +假人挑战 +推杆 +图书馆 +洗澡 +耀西之岛 +武装突袭 +幼儿园 +印刷电路板 +头盔式相机 +金字塔 +双簧管 +养老院 +黎明杀机 +复活节兔子 +马棚 +枪杀 +二维码 +击杀 +刷子 +古筝 +财经 +武术 +影视周边 +游览车 +鳄鱼 +开箱 +水晶 +街头霸王 +恐怖袭击 +过生日 +陶瓷 +健身球 +慢镜头 +贝斯 +异形附身 +风扇 +时装秀 +海底 +奔驰小哥 +弹弓 +生化奇兵 +俱乐部 +人字拖 +推土机 +钞票 +救人 +派对 +土豆 +宿舍 +玉米 +乐动魔方 +国产剧 +柚子 +模子 +细菌 +背包 +婚礼 +菠菜 +遛狗 +东方红魔乡 +山口 +驴友 +偶像大师 +噬神者 +假面骑士 +瑞奇与叮当 +新郎 +坦克在线 +网吧 +酵母 +车手 +枪击 +杂志封面 +孩之宝 +猎人 +夜市 +黑岩射手 +王座 +雕塑粘土 +同人志 +浪客剑心 +车票 +重生娃娃 +驱逐舰 +反叛的鲁路修 +领带 +死亡空间 +幽默 +障碍技巧 +运输机 +铙钹 +条码 +采石场 +排骨 +壁橱 +高尔夫球 +恐怖主义 +圆号 +悠悠球 +科技奇趣 +陶轮 +石头 +枪战 +纸板 +斯诺克 +荒野大镖客 +吉祥物 +满月 +野蛮人柯南 +家电 +电子竞技 +但丁地狱 +天花板 +披萨 +车辆 +巨人 +风车 +高速公路 +婚房 +蛤蜊 +抢救 +兔子 +航展 +火山 +发动机 +装载机 +皮艇 +梳子 +维秘 +星际火狐 +嫦娥 +沼泽 +舞曲 +炒鸡蛋 +心灵杀手 +怪物 +中国风 +理发师 +悬崖 +铅笔 +博士 +海豚 +芥末 +磨刀 +卸妆 +黄牌 +魔法门 +飞行 +游泳 +羚羊 +自动售货机 +优惠券 +银行 +打车 +东北二人转 +演讲 +香槟酒 +油罐车 +海豹 +万智牌 +步枪 +造型师 +空间站 +大风 +鼻子 +外卖 +X战警 +田径 +外星人 +木材 +速度生活 +豪车 +鬼魂 +手榴弹 +海底隧道 +表演者 +木琴 +月饼 +活页乐谱 +红牛 +天才 +南瓜饼 +鸟 +离合器 +精灵复兴 +击倒 +农产品 +轰炸 +商家 +美貌 +狗粮 +绞盘 +虚构人物 +冰川 +怒之铁拳 +车祸 +星火 +陆战队 +太阳 +大学 +录音机 +全职猎人 +内衣 +赛车总动员 +同学会 +四重奏 +桨 +驾驶员 +健身房 +瓷器 +抢劫 +爆米花 +绿色 +蕾丝 +黑熊 +公主抱 +刀剑神域 +馒头 +圣诞礼物 +墙壁 +幼儿 +信用卡 +刀 +狂飙旧金山 +日历 +新生 +婚戒 +雪 +雨 +竹子 +美人鱼 +音乐键盘 +娃娃 +键盘 +动力火车 +骑兵·装甲兵 +立交桥 +散步 +成就 +荣誉勋章 +助攻 +沙滩 +蚯蚓 +动物 +汽车越野赛 +项链 +啤酒 +女装 +和尚 +乳清蛋白 +圣诞树 +手绘 +投篮 +大麦 +光头强 +工作会议 +苍蝇 +宝藏 +射击游戏 +粉笔 +杏仁 +碗 +神舟 +胭脂 +惊天动地 +马 +封面 +小学 +物联网 +沙子 +录音棚 +挖土机 +穿衣 +飞机 +大盘 +内涝 +恶魔 +鳄梨 +飞驰竞速 +西兰花 +实验 +录影机 +气球塔防 +跑酷 +交警 +熊 +桔梗 +解放军 +活动房屋 +相机 +数学 +特斯拉 +太空堡垒 +宅男女神 +安卓 +冰块 +鸡舍 +美妙天堂 +化石 +超时空要塞 +数字 +网球 +神秘海域 +艺考 +艺术节 +编织 +打字 +明星派 +二十一点 +护栏 +大海 +极光 +舞力全开 +广场 +神庙逃亡 +纽扣 +时装周 +西葫芦 +炊具和烤盘 +星巴克 +油炸 +划船 +创世纪 +摩托车越野赛 +星星 +金刚 +弹球 +美女 +三明治 +工艺 +冒险 +垃圾桶 +极限竞速 +加菲猫 +宝宝辅食 +首饰 +场地赛 +球 +幻想水浒 +生活剧 +希曼 +插图 +潜水 +秃鹫 +诺亚方舟 +少女 +比武 +糖果粉碎传奇 +拳皇 +墨水 +校园暴力 +引擎 +脱口秀 +路由·伐木 +牡蛎 +漂移 +熊出没 +校车 +牧羊人 +功夫 +植物大战僵尸 +朗诵 +娇妻 +镜框·画框 +百叶窗 +客流 +咖啡 +塑像 +生物学 +手电筒 +机器 +座位 +沙包·沙袋 +森林 +乐高主题公园 +视频制作 +充电器 +犬夜叉 +超级粉碎兄弟 +交通安全 +躲猫猫 +翼 +粘土动画 +山羊 +海王星 +导弹 +街头表演 +水獭 +访谈节目 +石榴 +讲解教学 +拥堵 +变形 +电饭煲 +星际公民 +猿 +头 +丝路传说 +极品飞车 +皮卡丘 +拍照 +化油器 +肥料 +鲨鱼 +星云 +冬奥会 +模拟器 +CD机 +中国梦 +捕食 +泰坦陨落 +白宫 +饺子 +光环 +火鸡 +男装 +火爆狂飙 +推钱机 +命令与征服 +大金刚国度 +古琴 +食堂 +消防站 +愤怒的小鸟 +护士 +母亲 +暗杀 +美妙旋律 +芦笋 +荷花 +弓猎 +超车 +松下 +宙斯 +生活记录 +公路 +模拟合成器 +时尚 +宾馆 +难民 +立体声扬声器 +旋转 +杯子 +模型 +坦克 +生食 +波西杰克逊 +气球 +峡谷 +锁 +粉蜡笔画 +铅笔盒 +收藏 +激光笔 +智能家居 +翻筋斗 +烤面包 +生化危机 +演奏 +百货公司 +屁股 +锯 +车站 +瓜 +极速前进 +篮子 +蹦极 +纸片马里奥 +秦时明月 +全面战争 +游乐园 +最终幻想 +水手 +水上乐园 +尾巴 +鸡蛋 +相声演员 +坚果 +硬盘驱动器 +吃货 +望远镜 +夹克 +僧侣 +山洪 +打斗 +仓库 +独奏 +毁灭战士 +牵手 +普乐路路轨道 +天鹅 +旅行社 +柔道 +景观 +古墓丽影 +蓝龙 +甜美 +拍手 +酒店 +膝盖 +歌曲 +滑翔伞 +小马宝莉 +修道院 +滑板公园 +旅馆 +云朵 +麦片 +灾区 +水槽 +卧室 +避暑 +小熊维尼 +棒球帽 +拖车 +四大名助 +铜管乐器 +沙画 +外太空 +模拟人生 +健身教练 +数字电子 +公寓 +乐迪 +枪战片 +便秘 +姑娘 +大宅门 +猪蹄 +山峰 +三国志大战 +灯 +锅炉 +火 +气球造型 +面部 +光标 +动作片 +上网本 +汽艇 +棉花 +雪橇 +热泵 +装修 +记者 +女警 +恐怖 +龙 +夜景 +民警 +算命 +手里剑 +夜晚 +笑傲江湖 +精灵 +炮弹 +表情包 +刮刮卡 +三轮车 +护目镜 +墙纸 +洗头 +红包 +星系 +运动鞋 +菌类 +冰 +拔牙 +腿 +肿瘤 +先锋 +开心农场 +迪士尼 +山体滑坡 +表格 +文物 +眉毛 +刷牙 +绝命毒师 +电子宠物 +咖啡机 +流苏花边 +素描 +超级跑跑 +搏击 +司机 +卡通 +灰姑娘 +晨练 +记号笔 +心脏 +大提琴 +卫生巾 +受灾 +任天堂 +珠宝 +英雄连 +溜冰场 +青岛大姨 +大灰熊 +骑车 +基督 +道具 +料理 +甜菜根 +鱼饵 +车床 +反曲弓 +影视 +网络直播 +车库 +波斯王子 +船厂 +捕食者 +青铜 +橄榄 +污点·着色 +咖啡屋 +水稻 +改装车 +小正太 +烧烤 +卡布奇诺 +蝴蝶结 +桥梁 +邮件 +数码宝贝 +手臂 +炉子 +学校 +霸王龙 +山 +客车 +焊接 +小车 +分裂细胞 +管道 +爱情剧 +摇滚名人堂 +游行 +完美世界 +开枪 +微波炉 +中学 +东方大头条 +香菇 +虾 +双眼皮 +椅子 +格雷少年 +相亲节目 +称重秤 +香精油 +小路 +压力清洗 +木头 +水彩画 +土豆泥 +电脑 +方舟 +乐高好友 +球体 +冷空气 +大闸蟹 +帽子 +涂料 +手提包 +战争 +水球 +汤 +西红柿 +唇妆 +商铺 +王者之剑 +腕表 +藤蔓 +钱包 +刀工 +平衡车 +奥斯卡金像奖 +抗日剧 +导游 +行星边际 +泡沫 +任务栏 +中药 +死侍 +小小大星球 +自行车 +签名 +胸肌 +太极 +儿童安全座椅 +口哨 +罗技 +休闲 +汉堡 +德军司令部 +变压器 +考拉 +动物之森 +手势 +竖琴 +椰子 +大炮 +医保 +杂技 +电影摄像机 +表演艺术 +话剧 +工作室 +黄河 +吸毒 +黄油 +无限试驾 +高空 +冬天 +酒 +洞穴 +甘薯 +流星体 +手表 +救护车 +金牌 +麦迪逊广场花园 +特技演员 +饼干 +垃圾车 +服装搭配 +出租车 +暴力 +女王 +盗墓 +手提箱 +丝巾 +化学反应 +海贼王 +淋浴 +选秀 +成型 +童话故事 +麦克风 +黑客 +无尽传说 +羊 +狙击手 +小轮车 +夺宝奇兵 +美食 +食品 +肥皂泡 +骑牛 +辫子 +重型设备 +战队 +制服诱惑 +法官 +蝎子 +小屋 +酒精灯 +青鬼 +马赛克 +南方公园 +无人机 +调酒师 +万万没想到 +粉底 +捕鱼 +初音未来 +毒贩 +矮人 +好莱坞 +六孔哨 +棺材 +猜拳 +潜水服 +搞笑 +火星 +盗窃 +DJ +沐浴类产品 +长颈鹿 +整蛊 +围攻 +教堂 +黑带 +浮桥 +单眼皮 +陷 +软件 +过山车大亨 +围巾 +幸存者 +情感剧 +洗剂 +拆除 +星际迷航 +浮子 +雪地 +安保 +黄金眼 +追尾 +岩石 +电视广告 +行窃 +会计 +鸭子 +VR显示器 +莱克斯卢瑟 +反恐精英 +蒸汽机 +球场 +游戏动漫 +玉米卷 +漫威传奇 +腾讯 +亚洲 +卫生间 +吸烟 +战争机器 +青蛙 +喜羊羊与灰太狼 +飞艇 +猎犬 +招式 +拉伸 +连帽衫 +欧美音乐 +恶魔岛 +拳击之夜 +车 +大型强子对撞机 +舰艇 +枫之谷 +真功夫 +轴 +飞碟 +生物 +魔兽争霸 +欧巴 +平底锅 +石膏 +钢琴 +海关 +剪纸 +坐垫 +镜子 +夏令营 +战争之人 +简历 +彩排 +船 +真空管 +邮轮 +法制节目 +皇室战争 +小龙斯派罗 +博览会 +舞蹈革命 +生活 +圣诞贺卡 +拥抱 +飞飞全明星 +驾考 +卫生纸 +上市 +果酱 +儿子 +教会 +艺术团 +刷卡 +信封 +军阀 +军队 +黑塔利亚 +玉米饼 +滑雪 +猕猴桃 +提拉米苏 +航天 +芭蕾 +狮子 +跑步机 +杀出重围 +忍者龙剑传 +碰撞 +使命召唤 +自拍 +火柴 +火车站 +枫树 +咖啡师 +解说 +狒狒 +终极格斗冠军 +魔法禁书目录 +消防车 +极限运动 +电脑机箱 +兵 +家畜 +墨镜 +演技派 +大长腿 +功夫片 +梯子 +夏日 +排箫 +法师 +急救 +福尔摩斯 +农场 +发型 +决战之夜 +太子妃 +华夫饼 +刺猬索尼克 +赌博 +磨砂机 +办公室 +器官 +毕业 +军训 +带子 +治愈 +船长 +砂浆 +最游记 +绿野仙踪 +炉石传说 +数字录像机 +清洁 +喷气艇 +刺猬 +恒温器 +透视装 +黑执事 +基金 +守望者 +ATM取款机 +干墙 +曲棍球 +双节棍 +明胶 +锤子 +婚宴 +街道 +甜饼怪 +上帝模式 +狂神国度 +烈火战车 +麻将 +X音素 +液压机 +水杯 +扭曲 +魔界战记 +车评 +独角兽 +特种兵 +诱饵 +活动 +面具 +九阴真经 +实况足球 +护肤品 +游戏工作室 +榴莲 +马戏团 +原油 +蚁类 +分娩 +钓鱼 +游戏手柄 +影评 +虚幻竞技场 +神枪手 +架线工 +无线遥控飞机 +轮滑 +排气系统 +水管 +电源 +星之海洋 +摄像机 +纪录片 +优雅 +闺蜜 +曼妥思 +作曲家 +锡罐 +骑行 +快递 +电影节 +车队 +犀牛 +肌肉 +纽约时代广场 +敌人 +英雄 +八路 +纹身 +留声机唱片 +家常菜 +影视原声 +撞车 +达人秀 +古玩 +吊坠手链 +旅游 +录节目 +竞技 +黄梅戏 +村民 +昆虫 +旅行车 +草原 +毛衣 +叉车 +决斗大师 +灌木 +手工 +神之浩劫 +广场舞 +工厂 +练习室 +智能硬件 +龙珠 +龙梦幻境 +模仿 +枪支 +加速处理单元 +皮卡 +踏板车 +卡丁车 +歹徒 +跳跃 +大屠杀 +阀 +霍比特人 +煤矿 +遥控车 +女仆 +眼镜 +遇难者 +足球 +英雄工厂 +种族 +武打 +皇牌空战 +曲奇饼 +蜡像 +衬衫 +平衡木 +火灾 +水果蜜饯 +孔雀 +头文字D +战国 +正手击打 +港台剧 +空中巴士 +部队 +挡风玻璃刮水器 +楼梯 +无人驾驶 +写作 +塑料袋 +灯塔 +徒步旅行 +埃菲尔铁塔 +快餐 +丛林 +怪兽 +灌篮高手 +导航 +台球 +裤子 +包子 +绘图仪 +宠物 +冲浪板 +厕所 +龙虾 +寿司 +海蜇 +赛车游戏 +下午茶 +跨栏 +图像扫描仪 +王者荣耀 +钢琴弹奏 +润肤膏 +真人快打 +橡皮泥 +二胡 +新封印传说 +衣服熨斗 +红烧肉 +除毛 +变脸 +泡菜 +酸奶 +中文 +甘蔗 +拉丁 +萨克斯 +鼓 +炸弹人 +壁炉 +球员 +角斗士 +轮缘 +病毒 +洛基 +科技数码 +梦想俱乐部 +私房菜 +平板 +灯光 +圆筒 +工人 +音乐 +灯具 +探险 +相亲 +传送门 +互联网 +喝 +鼠 +齿轮 +油脂 +旗 +糖霜酥皮 +光学错觉 +数字音频工作站 +击球 +截拳道 +指环王 +高达 +网球王子 +瘦腿 +神秘博士 +自行火炮 +向日葵 +纤维 +电视台 +羊肉 +飞行员 +电车 +按摩 +射箭 +欧洲杯 +戒指 +英雄传说 +棋牌 +魔术 +电动车 +体操 +毁灭公爵 +T恤 +宗教 +豚鼠 +精彩剪辑 +卡拉OK +护肤 +海盗 +染发 +名人采访 +锐化 +午夜俱乐部 +吃鱼 +飙车 +吸管 +肾脏 +焙烧 +跑步 +紫罗兰 +海岛奇兵 +东京喵喵 +阅兵 +偷窃 +奶茶 +辣条 +特战先锋 +蝙蝠侠 +孤岛危机 +魔法王国 +挖掘机 +U盘 +荧光棒 +图章 +女婴 +光晕 +礼品 +会议 +车展 +电音 +家具 +木雕 +台锯 +终极奇迹 +草坪 +模拟城市 +画眉 +淑女 +酒馆 +唇膏 +手机数码 +橄榄球 +锻造 +水疗 +音悦台 +反导系统 +动感 +第二人生 +星空 +园艺 +稻草人 +无头骑士 +盔甲 +舞会 +蛋 +高空抛物 +无敌浩克 +姜饼 +印刷 +帝国时代 +黄山 +鲁邦三世 +盲人 +蛇 +睡眠 +战舰世界 +蟑螂 +面包车 +缝纫针 +脂肪 +纸模型 +室内装潢 +恐怖分子 +客机 +欧美影视 +便利店 +核弹 +双面人 +厨师 +跑道 +计算机 +灾难片 +飞哥与小佛 +放牧 +文艺演出 +肖像 +红绿灯 +锥体 +喇叭 +赛道狂飙 +全家福 +麻辣烫 +包包 +身体护甲 +航空 +毒品 +天空 +针织 +魔杖 +猪肉 +砖 +松糕 +圣诞装饰 +轰炸机 +无尽的任务 +摇滚史密斯 +网页 +汽车照明系统 +小镇 +巫师 +月球 +硬汉 +机车 +面食 +手术 +海鲜 +玩具熊的五夜后宫 +巧克力 +手机 +Vox +画法 +莫妮卡的团伙 +大米 +全金属狂潮 +随声听 +旋律 +放生 +操场 +窗户 +恐怖喜剧 +大力水手 +惩罚者 +木工 +悬疑 +长方形 +木片 +电子电路 +查理与巧克力工厂 +不锈钢 +苍翼默示录 +盒子 +耐力赛 +保龄球 +海啸 +舰队收藏 +死亡岛 +歌手 +电话 +感染:幸存者故事 +真人秀 +恶魔城 +五佳球 +机械 +马里奥与路易吉 +饲养员 +滑水 +龙舟 +大理石 +港片 +葫芦娃 +武装分子 +奶油烤菜 +吓人 +斧头 +正义联盟 +超凡双生 +蜜蜂 +游艇 +头骨 +道路 +神奇四侠 +弓道 +呼啦圈 +拍客 +航空母舰 +狂热节拍 +宇宙 +美景 +健身队 +武侠 +武林高手 +测评 +薄樱鬼 +人物专访 +颈椎 +皮带 +少年泰坦 +黑色 +交响乐 +震荡 +火炉 +光盘 +喝水 +守望先锋 +烹饪 +装甲车 +棒球 +网游 +黄蜂 +安全带 +泰坦 +巴掌 +指南 +复活节彩蛋 +餐馆 +樱花 +溜冰鞋 +机甲战士 +耐克 +命运石之门 +装扮 +山水画 +耀斑 +贺卡 +日本团子 +月亮 +黑人 +科普 +钥匙扣 +甜瓜 +垃圾 +美食猎人 +头巾 +无线电遥控船 +骨牌 +单挑 +上古世纪 +覆盆子 +绳子 +海绵 +超模 +香肠 +奇观 +直线加速赛 +菜园 +雨伞 +十二生肖 +奶油 +汽车修理 +大号 +倒霉熊 +音乐节目 +唇彩 +几何冲刺 +视频游戏厅 +射击 +鬼屋 +手套 +驾驶 +青蛙军曹 +鞍 +港口 +彩灯 +广播公司 +摄影 +鞋 +我的世界 +大发 +马甲线 +模式·图案 +干衣机 +机器人战斗 +人工呼吸 +华尔兹 +水族馆 +国庆 +领奖 +巫师之怒 +火影忍者 +马克杯 +战鹰 +年会 +垂钓 +摩天大楼 +炸酱面 +企鹅 +整形 +睫毛 +暴走大事件 +教程 +钢铁侠 +日出 +国家公园 +戏剧 +折纸 +花 +说唱史诗战 +白娘子 +头盔 +威浮球 +热血无赖 +眼球 +香烟 +抗战片 +小鲜肉 +音响 +武功 +场地自行车 +稻田 +真侍魂 +海战英豪 +火焰之纹章 +婚纱摄影 +发布会 +损伤 +下水道 +雕刻 +制服 +延时摄影 +凯蒂猫 +截屏 +奇幻森林 +舞台剧 +雪糕 +飞车手罗德 +我想当爷们 +肉丸 +短号 +炮兵 +孩子 +搞怪 +军事 +对决 +战神 +菜花 +欧冠 +冰壶 +蓝莓 +帐篷 +幸运星 +化妆 +激战 +方便面 +旋转木马 +人物 +磁带 +恐怖片 +梦幻龙族 +牙齿 +海滩 +猛鬼街 +鲸 +唱片公司 +露营 +松饼 +安妮 +百乐门 +圣诞 +扬琴 +棚子 +调解 +发射 +体育 +通心粉 +热可可 +二次元 +迷人 +宇航员 +运钞车 +行车记录仪 +官员 +奥数 +玉米地 +音乐人 +彗星 +颁奖典礼 +表演 +粉丝 +军人 +堂吉诃德 +狙击枪 +减脂 +古装 +游戏机 +饥饿游戏 +撒旦 +邮票 +理发店 +网络主播 +身材火辣 +棒球 +兔八哥 +大巴车 +耳环 +数码产品 +游民星空 +泰拳 +配音秀 +机器人 +盛装舞步 +玩具人 +袋鼠 +酒吧 +蘑菇 +死亡边境 +世界杯 +驾驶舱 +海藻 +乐高 +艺术 +龙之信条 +开关 +武警 +日蚀·月蚀 +手机评测 +诛仙 +行李箱 +恐龙世界 +天宫 +滑板 +青贮饲料 +摄像头 +工程车 +阀门·龙头 +石工 +孤岛惊魂 +胫骨 +砸车 +迷你人形 +超级玛丽 +生活技巧 +武打片 +胡子 +苹果 +橙色 +灾害 +猫 +翅膀 +吵架 +唱诗班 +雷神 +扑克 +史酷比 +魔龙骑士 +人体 +拾音器 +圆圈·循环 +地狱 +运球 +游轮 +疯狂动物城 +战舰 +核反应堆 +雾霾 +版画 +真正的家庭主妇 +海龟 +烘培 +电容器 +核试验 +寒潮 +垂死之光 +橡木 +游乐场 +养生 +杀手 +魔法 +台阶·门廊 +倒塌 +法院 +硬币 +拳击比赛 +弩 +可爱 +笔记本 +花卉设计 +僵尸末日 +闹钟 +调制解调器 +狗窝 +萌妹 +部落战争 +聚会 +乐器 +劫匪 +腹语 +电动工具 +头发 +地下城与勇士 +卡牌 +卡片 +别墅 +地球冒险 +暴风雪 +瑜伽 +海狸 +安检 +绘画 +沙拉 +浴缸 +毛绒玩具 +海狮 +琵琶 +肯得基 +口红 +娱乐 +魔戒 +婴儿 +烫发器 +狂飙 +积水 +机动车 +奖 +椰奶 +芦荟 +刺客 +拖拉机 +蒙娜丽莎 +牛仔 +葡萄酒 +猴子 +潜水员 +盘式制动器 +比赛 +吸尘器 +豌豆 +拍摄现场 +帆布 +喜剧演员 +蜡笔小新 +香蕉 +全民健身 +牛排 +音响系统 +啦啦队 +街头采访 +视觉小说 +弹唱 +飞车 +装甲核心 +罐头 +哈利波特 +沉香 +举重 +纸 +拼图 +电视频道 +防护 +视频游戏 +家居 +平屋顶 +开车 +航拍 +特技 +杂货店 +拍卖 +薯条 +珍珠 +手指 +柔力球 +美少女战士 +游戏公司 +冰球 +天气预报 +充气船 +爆炒 +机油 +眼泪 +西区故事 +镶嵌 +仪表着陆系统 +鱼 +爆炸 +骑马 +礼服 +植物 +战地 +淘宝 +烟花 +求婚 +饮料 +蹲 +喜剧 +猎天使魔女 +潜行者 +船员 +汽油 +低音炮 +美甲 +无花果 +超级大金刚 +猩猩 +带锯 +国旗 +开幕式 +货运工具 +腹部 +泥潭 +秀逗魔导士 +交通 +小米 +钢琴家 +机票 +肉 +姜黄 +龙腾世纪 +杀戮空间 +婴儿吊带 +拿铁 +僵尸片 +孤儿院 +自爆 +马里奥赛车 +火锅 +冬季运动 +女巫 +大厦 +街头赛车 +快板 +驾校 +秀场 +侠盗猎车手 +杂志拍摄 +乌龟 +蜂蜜 +减肥操 +水上艇筏 +象 +播种 +单词 +偷车 +玻璃贴膜 +俄罗斯方块 +惊悚 +火车头托马斯 +净水器 +电影解说 +画家 +谷类 +机枪 +滑翔翼 +瓶子 +合唱 +超胆侠 +轮盘 +电气布线 +考古 +豆类 +集装箱 +异形 +洗碗机 +割草机 +茶 +计算器 +魔方 +宝莱坞 +辣妹 +军官 +牛人 +后备箱 +海边 +电磁线圈 +印度 +红酒 +食谱 +工地 +特技飞行 +家庭剧 +培乐多 +温泉 +钩针 +宫殿 +时装 +鹦鹉 +棕熊 +运动会 +空姐 +球星卡 +葱油饼 +洛奇 +女团 +老虎机 +记者会 +体育场 +票房 +无冬城 +浣熊 +洗衣服 +菜市场 +寂静岭 +肉汁 +大力士 +鼓棒 +金属加工 +壶铃 +德云社 +国际军事 +驾照 +面条 +手枪 +金条 +泰迪熊 +河马 +洗涤 +阁楼 +爆炸袭击 +桑拿 +踢打 +爱探险的朵拉 +葡萄园 +闪光 +妈妈 +骨头 +钓竿 +颜色 +摩托车头盔 +纱线 +驯鹿 +银魂 +独轮车 +虚拟玩家角色 +圣经 +毛笔字 +电影 +音乐影片 +西餐 +菠萝 +西湖 +清洁剂 +斗牛 +小红帽 +餐巾 +单杠 +地球 +爽肤水 +打印机 +吹风机 +记号笔 +小麦 +螺帽 +乐高都市 +白酒 +显卡 +都市 +画展 +光之美少女 +银行卡 +群星 +穿越火线 +古装剧 +单簧管 +网络 +洪水 +美容 +汤姆猫 +讲故事 +海底世界 +操作杆 +赛车方向盘 +倚天 +球赛 +海岸 +空调 +铁路 +怪物卡车大毁灭 +下巴 +票 +复仇者联盟 +新闻 +雪崩 +彩绘 +狂野飙车 +沙雕 +木偶 +轮椅 +文艺 +家电公司 +海岛 +苹果派 +降龙十八掌 +打结 +素食 +深渊传说 +骑士 +视频解说 +活塞 +小猪佩奇 +直播 +蟋蟀 +乘客 +英雄联盟 +大气污染 +硬石餐厅 +晶体管 +宝石 +奶酪 +图表 +鲜花 +背心 +反恐 +科学家 +种子 +喂食 +爪子 +火线精英 +体育用品 +照片 +军事武器 +直线 +电脑硬件 +开锁 +鼓手 +模型车 +航天器 +屏幕 +花生 +直排轮滑鞋 +军舰 +钻石 +橄榄油 +稻草 +蜡笔 +妆容 +杀手本能 +餐厅 +摔跤 +内裤 +蹦床 +樱兰高校男公关部 +跆拳道 +科幻 +豪宅 +停车 +冰淇淋 +钢盘·平底深锅 +大乱斗 +服装店 +千与千寻 +音标 +吉他英雄 +南瓜 +采访 +小吃 +漫画英雄 +最后生还者 +红薯 +镜之边缘 +燃脂 +葫芦丝 +篮球 +组装 +台球杆 +过滤器 +空翻 +壁画 +闪电 +海域 +红唇 +面试 +吊坠 +武侠剧 +睫毛膏 +香水 +舞蹈室 +资讯 +眼影 +军装 +躺骑车 +白色 +英魂之刃 +魔鬼 +饭团 +琴弦 +冰箱 +通灵王 +公交 +魔法之战 +泳装 +文本 +长号 +羊毛 +古诗 +马克思佩恩 +演习 +陀螺仪 +车牌 +静物写生 +木屋 +米饭 +萝卜 +高尔夫球 +散热器 +直播间 +星球大战 +黄金 +果汁 +疯狂橄榄球 +散打 +犰狳 +爱情故事 +决斗 +电动汽车 +缝纫 +餐饮 +魔兽世界 +设计师 +航班 +麻薯 +以撒的结合 +中提琴 +孢子 +说唱 +死神 +迷宫 +战斗 +警长 +手球 +睡袋 +镲片 +城堡 +性感 +酒精 +生化模式 +湖 +黑暗 +小小世界 +户外休闲 +球技 +同步带 +制动 +剧情片 +球鞋 +清纯 +聚餐 +刺绣 +减肥 +对唱 +睡美人 +儿童 +烤箱 +黄色 +干草 +神灵 +航空公司 +元素周期表 +电影院 +女神转生 +字典 +飞镖 +战锤 +失忆症 +死亡笔记 +亚马逊公司 +虐杀原形 +象棋 +虚幻引擎 +烧烤架 +奶粉 +悉尼歌剧院 +伐木 +草莓 +爆破 +忍者神龟 +银 +四轮车 +鬼泣 +娱乐八卦 +浴室 +鸡肉 +胡萝卜 +胎儿 +液体 +收割机 +铜 +玩具世界 +一字马 +飞船 +修剪器 +煤炭 +简笔图 +网剧 +小品 +洋葱 +便当 +百事 +蜘蛛 +警车 +马车 +尼姑 +河流 +斗牛士 +染色 +黄瓜 +跳水 +音乐大师课 +蜗牛 +钢笔 +故宫 +公益片 +渔船 +蓝色 +卷发器 +超级快递 +鞭炮 +珊瑚 +实战 +跳绳 +滑冰 +小行星 +翻车 +博物馆 +欧元 +哆啦A梦 +乐乐天使娃娃 +空难 +阴阳师 +辣椒 +青之驱魔师 +鸿雁 +SaGa +凝胶 +池塘 +节拍器 +亲子节目 +播放机 +打印 +歌迷 +荒野星球 +农业 +地震 +时政 +吴哥窟 +拉面 +音乐节 +甜甜圈 +藤球 +灾难意外 +骑马与砍杀 +柑橘 +不明飞行物 +软管 +相册 +触摸屏 +飞行表演 +圣杯神器 +紫色 +笛子 +存储卡 +鸽赛 +蔬菜 +山地自行车 +哑剧大师 +双簧 +长椅 +松弛熊 +官兵 +巧克力 +动画 +侦探 +溜冰 +拉链 +警察局 +工程师 +分屏 +牧师 +球拍 +馅饼 +马展 +蜡烛 +游戏 +舌头 +增压器 +泰拉瑞亚 +三国 +污染 +管带夹 +丫鬟 +歌剧魅影 +温室 +八卦 +晚会 +多米诺骨牌 +西瓜 +无主之地 +薯片 +降落伞 +家具装饰 +螃蟹 +模拟山羊 +麦当劳 +传感器 +粉扑 +太阳能 +裁判 +保卫萝卜 +地铁 +松鼠 +猫女 +课堂 +木星 +耳机 +耳朵 +医学 +尼尔机械纪元 +驾驶证 +婚车 +砂锅 +死海 +海绵宝宝 +模拟农场 +警官 +调酒 +龙战士 +动车 +老鼠 +辛普森一家 +蜥蜴 +和服 +女生 +影视混剪 +长毛绒 +广告牌 +撒娇 +炒锅 +萌宝 +自然 +指甲油 +灰泥 +火腿 +桌子 +月姬格斗 +塑料 +大脑 +接线盒 +攀岩 +水果忍者 +货币 +秋千 +销售 +卷轴 +化妆品 +包裹 +斑马线 +面包超人 +蛋糕 +肉桂 +寺庙 +书法 +团队套牛 +仙人掌 +餐饮 +火箭炮 +视频直播 +鬼娃回魂 +画线骑士 +宜家 +春晚 +步行 +日落 +袋子 +击剑 +理发 +地下室 +斗地主 +打针 +喝酒 +喷漆 +柯南时代 +锦鲤 +凝乳 +杀戮地带 +恶霸鲁尼 +奖牌 +猫头鹰 +赛道 +战士 +美照 +购物 +蝴蝶 +字母表 +客厅 +乌鸦 +唢呐 +反串 +潘多拉 +监控 +烤鸭 +明星大乱斗 +葡萄 +飓风 +病人 +吊车 +蝙蝠 +伪装 +益智玩具 +舞蹈 +合金装备 +跳楼 +勇者斗恶龙 +油 +网站 +厨师机 +凯恩的遗产 +钱 +食材 +外交部 +酒厂 +显示器 +主持 +羽绒服 +牛仔布 +车模 +盐 +芝麻 +痘痘 +股票 +微笑 +菜单 +地板 +烤鸡 +自动唱机 +雪貂 +涡轮 +扎染 +歌剧 +变形金刚 +失火 +门票 +雪山 +风筝 +长袍·礼服 +书柜 +家庭教师 +死亡之屋 +DarkOrbit +粮食 +公益活动 +藏獒 +渔民 +下一站巨星 +彩虹手环 +苦瓜 +冲浪 +卷心菜 +珠饰 +西贡小姐 +地铁酷跑 +训练营 +运输 +磁铁 +健康 +床垫 +摇摆 +街头恶搞 +糕点 +拳王 +肋骨 +猫 +曲艺 +加油站 +凉宫春日 +妖怪手表 +动力伞 +墓地 +工程 +民房 +胶片 +色带 +主教 +樱桃小丸子 +鸡翅 +轮子 +牛 +邻里 +萌 +音乐制作 +洛克人 +芒果 +地图 +劈木机 +勇士 +火锅店 +电梯 +吻 +弹球盘 +三角形 +粘土 +鸡尾酒 +慈善 +天天酷跑 +唱片骑师 +结婚 +家庭 +手机壳 +航线 +职业摔跤 +肥皂 +竞技场 +丧钟 +摩天轮 +天使 +台面 +外汇市场 +肉搏 +求生之路 +铜牌 +泡面 +流亡黯道 +灯笼 +谜题 +婴儿室 +捕猎 +尿布袋 +鱼鹰 +雪犁 +方块世界 +斑鸠 +建筑 +电视剧 +堆肥 +细胞 +邪恶力量 +零食 +湾岸竞速 +太鼓达人 +赛车 +金枪鱼 +司令 +皮肤 +马拉松 +末日 +垒球 +涂鸦 +充气城堡 +十字架 +食疗 +早教 +速叠杯 +纸牌 +披肩 +躲避球 +柠檬 +打牌 +抗战 +绕口令 +美容院 +惠普 +情感节目 +永恒之塔 +电脑鼠标 +虚拟现实 +特警 +吊床 +货车 +飞绑 +可乐 +运动 +双重国度 +多功能工具 +妹子 +农村 +眼睛 +干冰 +果冻 +相声小品 +电线杆 +战友 +影视配音 +孤岛生存大乱斗 +奥运 +沃尔玛 +太空 +星际之门 +装饰 +灰色 +樱桃 +电锯 +手铃 +科幻片 +身份证 +古墓 +乒乓 +溪流 +手链 +野外生存 +天线 +玻璃 +营地 +庆典 +玩具 +袭击事件 +美术 +橡皮 +加农 +镜头 +探测器 +洗发精 +彩虹岛 +武器 +装置艺术 +葱 +护理 +命运 +仓鼠 +碎石 +青蛙科密特 +螺旋桨 +七日杀 +整容 +行星 +小宝宝 +科技 +台风 +勇者前线 +皇家国教骑士团 +狂欢节 +热狗 +捉迷藏 +弦乐琴 +叶子 +床 +彼得潘 +写真 +托儿所 +设备 +冰桶挑战 +萌物 +变色龙 +花瓣 +伴郎 +打戏 +画报 +罪恶装备 +漫画 +瘫痪 +飞机失事 +奇闻趣事 +大选 +花瓶 +钢之炼金术师 +杂志 +鼠型车 +教育 +旺达与巨像 +插花 +城堡破坏者 +泵 +混音带 +字体 +超人 +倒计时 +恶作剧 +鹌鹑 +吸血鬼 +小朋友 +颤音琴 +符号 +调音台 +梦幻之星 +橘子 +奶昔 +面糊 +冬不拉 +北斗神拳 +越野 +灭火器 +水果 +婚纱 +上古卷轴 +007 +暮光之城 +蜘蛛侠 +冰沙 +下坡 +毡 +警察 +超市特工 +外套 +汉服 +女童 +筏流 +花园 +布丁 +花圈 +生菜 +新年 +清雪机 +气雾喷雾器 +暮蝉悲鸣时 +公主 +显微镜 +秋天 +模特 +收藏品 +咖喱 +空气净化器 +漫威宇宙 +混凝土 +育儿 +电子琴 +遮瑕膏 +火车 +芭比娃娃 +爵士 +音箱 +黑洞 +积木 +剑球 +奶爸 +监管 +美国队长 +爆笑 +闪电 +降世神通 +祷告 +家禽 +穿越时空 +分裂 +轮胎 +水坝 +索尼 +战斗机 +恶搞路人 +拍戏 +电池 +爆胎 +光棍 +俯卧撑 +摩斯 +饮用水 +狂热 +阅读器 +训练 +奥特曼 +王国之心 +学车 +快递员 +住宅 +袋狼大冒险 +悟空 +面包 +雷曼疯狂兔子 +杀手 +赛马 +啄木鸟伍迪 +国务院 +拖把 +壁虎 +铁拳 +高跟鞋 +动物园 +唱片 +金鹰节 +棒球公园 +宠物小精灵 +手游 +部落冲突 +兽人 +魔术师 +谷仓 +圣剑传说 +商场 +起火 +内饰 +暴龙 +鲸 +上课 +油画 +剧本 +武士 +村庄 +脖子 +卷饼 +蚊子 +狩猎 +保健品 +红毯 +总统 +塔罗牌 +偶像活动 +涂层 +合金弹头 +黑白 +沙漠 +白头鹰 +芝士 +宅男 +战利品 +军营 +围棋 +洗衣店 +教育部 +模糊 +国画 +菲比娃娃 +雕塑 +施工 +书呆子 +冬季 +F-Zero +核桃 +狱警 +游戏人物 +旗袍 +笑话 +衣柜 +综艺 +迫击炮 +梨 +圣斗士 +媒体 +辩论 +健美操 +速降 +男团 +杀人 +圣诞老人 +圆顶 +海豚音 +特技表演 +耙 +探索 +僵尸围城 +银河战士 +长城 +雪人 +作画 +狼 +星际争霸 +立方体 +武装·装备 +被子 +自行车赛 +吃东西 +金属 +交易 +铲屎官 +培根 +档案 +飞去来器 +歌舞表演 +报纸 +仙女 +舞蹈中心 +亚瑟王传奇 +浏览器 +钟 +狗 +露营车 +艺术品 +洗衣机 +睡姿 +打野 +西装 +管风琴 +半机械人 +U型场地 +光 +鸽子 +窗帘 +练习生 +刺客信条 +黑道圣徒 +农民 +煤气灶 +播放器 +塞尔达传说 +消防 +黄铜 +胶带 +挡泥板 +越战越勇 +糖浆 +武装部队 +录像带 +倒车 +牛奶 +冰棍 +阳台 +饮品 +番茄 +灵异事件 +屋顶 +角色扮演 +大富翁 +饿狼传说 +玫瑰 +猪 +海马 +防汛抗洪 +水井 +书 +土地 +村长 +权力的游戏 +东方妖妖梦 +半条命 +国家队 +木瓜 +绿箭 +滑翔 +视频艺术 +人猿泰山 +国防部 +报警装置 +吉尼斯 +厢型布景 +突袭 +狐狸 +倒立 +搅拌机 +腹肌 +飙酷车神 +电子键盘 +惩罚 +失落的星球 +乐队 +丝绸 +冲突 +豆芽 +交通工具 +滑翔机 +亲子 +拳击手 +少儿 +厨房 +花栗鼠 +楼市 +卡通城 +夜店 +洗车 +广告 +饭店 +合气道 +雪地车 +留声机 +全民枪战 +毛皮 +迷你四驱车 +钻头 +生活常识 +少林 +校园 +拔河 +事故 +菊花 +小蛮腰 +过山车 +鸡腿 +暗黑破坏神 +炸鸡 +排版 +拼贴画 +制造业 +艺人 +选美 +猛兽 +英语 +手 +酥皮 +运动员 +卡士达酱 +内衣秀 +护照 +民航 +土匪 +监狱 +靴子 +积雪草 +沙发 +加勒比海盗 +咱们穿越吧 +极度恐慌 +拉力赛 +背部 +伴娘 +投影机 +面膜 +水 +玉·翡翠 +易拉罐 +度假村 +益智 +吻戏 +丈夫 +吊扇 +模具 +水泥 +火柴人 +公安部 +泥土 +地铁站 +打火机 +小小宠物店 +橙子 +子弹 +猴子岛 +闪电十一人 +雪碧 +指甲 +摩托车 +摄影师 +角色 +电人 +老虎 +音乐合奏 +塑料瓶 +发带 +标签·商标 +肉排 +桃子 +指板 +狼人 +分解动作 +读书 +志愿者 +灵魂能力 +星际宝贝 diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/__init__.py new file mode 100644 index 00000000..0d1df762 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/__init__.py @@ -0,0 +1 @@ +from .metrics_util import get_metrics diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py new file mode 100644 index 00000000..d85d4fa9 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py @@ -0,0 +1,157 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division + +import os +import io +import logging + +import numpy as np +import json +from videotag_tsn_lstm.resource.metrics.youtube8m import eval_util as youtube8m_metrics + +logger = logging.getLogger(__name__) + + +class Metrics(object): + def __init__(self, name, mode, metrics_args): + """Not implemented""" + pass + + def calculate_and_log_out(self, fetch_list, info=''): + """Not implemented""" + pass + + def accumulate(self, fetch_list, info=''): + """Not implemented""" + pass + + def finalize_and_log_out(self, info='', savedir='./'): + """Not implemented""" + pass + + def reset(self): + """Not implemented""" + pass + + +class Youtube8mMetrics(Metrics): + def __init__(self, name, mode, metrics_args): + self.name = name + self.mode = mode + self.num_classes = metrics_args['MODEL']['num_classes'] + self.topk = metrics_args['MODEL']['topk'] + self.threshold = metrics_args['MODEL']['threshold'] + + self.calculator = youtube8m_metrics.EvaluationMetrics( + self.num_classes, self.topk) + if self.mode == 'infer': + self.infer_results = [] + + def calculate_and_log_out(self, fetch_list, info=''): + loss = np.mean(np.array(fetch_list[0])) + pred = np.array(fetch_list[1]) + label = np.array(fetch_list[2]) + hit_at_one = youtube8m_metrics.calculate_hit_at_one(pred, label) + perr = youtube8m_metrics.calculate_precision_at_equal_recall_rate( + pred, label) + gap = youtube8m_metrics.calculate_gap(pred, label) + logger.info(info + ' , loss = {0}, Hit@1 = {1}, PERR = {2}, GAP = {3}'.format(\ + '%.6f' % loss, '%.2f' % hit_at_one, '%.2f' % perr, '%.2f' % gap)) + + def accumulate(self, fetch_list, info=''): + if self.mode == 'infer': + predictions = np.array(fetch_list[0]) + video_id = fetch_list[1] + for i in range(len(predictions)): + topk_inds = predictions[i].argsort()[0 - self.topk:] + topk_inds = topk_inds[::-1] + preds = predictions[i][topk_inds] + self.infer_results.append((video_id[i], topk_inds.tolist(), + preds.tolist())) + else: + loss = np.array(fetch_list[0]) + pred = np.array(fetch_list[1]) + label = np.array(fetch_list[2]) + self.calculator.accumulate(loss, pred, label) + + def finalize_and_log_out(self, info='', label_file='./label_3396.txt'): + if self.mode == 'infer': + all_res_list = [] + for index, item in enumerate(self.infer_results): + video_id = item[0] + f = io.open(label_file, "r", encoding="utf-8") + fl = f.readlines() + res = {} + res["path"] = video_id + res["prediction"] = {} + for i in range(len(item[1])): + class_id = item[1][i] + class_prob = item[2][i] + if class_prob < self.threshold: + continue + class_name = fl[class_id].split('\n')[0] + res["prediction"][class_name] = class_prob + if not res["prediction"]: + logger.warning( + "%s: No prediction exceeds the threshold = %s." % + (video_id, self.threshold)) + all_res_list.append(res) + return all_res_list + else: + epoch_info_dict = self.calculator.get() + logger.info(info + '\tavg_hit_at_one: {0},\tavg_perr: {1},\tavg_loss :{2},\taps: {3},\tgap:{4}'\ + .format(epoch_info_dict['avg_hit_at_one'], epoch_info_dict['avg_perr'], \ + epoch_info_dict['avg_loss'], epoch_info_dict['aps'], epoch_info_dict['gap'])) + + def reset(self): + self.calculator.clear() + if self.mode == 'infer': + self.infer_results = [] + + +class MetricsZoo(object): + def __init__(self): + self.metrics_zoo = {} + + def regist(self, name, metrics): + assert metrics.__base__ == Metrics, "Unknow model type {}".format( + type(metrics)) + self.metrics_zoo[name] = metrics + + def get(self, name, mode, cfg): + for k, v in self.metrics_zoo.items(): + if k == name: + return v(name, mode, cfg) + raise KeyError(name, self.metrics_zoo.keys()) + + +# singleton metrics_zoo +metrics_zoo = MetricsZoo() + + +def regist_metrics(name, metrics): + metrics_zoo.regist(name, metrics) + + +def get_metrics(name, mode, cfg): + return metrics_zoo.get(name, mode, cfg) + + +# sort by alphabet +regist_metrics("ATTENTIONLSTM", Youtube8mMetrics) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py new file mode 100644 index 00000000..f425dd2f --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py @@ -0,0 +1,275 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Calculate or keep track of the interpolated average precision. + +It provides an interface for calculating interpolated average precision for an +entire list or the top-n ranked items. For the definition of the +(non-)interpolated average precision: +http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf + +Example usages: +1) Use it as a static function call to directly calculate average precision for +a short ranked list in the memory. + +``` +import random + +p = np.array([random.random() for _ in xrange(10)]) +a = np.array([random.choice([0, 1]) for _ in xrange(10)]) + +ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a) +``` + +2) Use it as an object for long ranked list that cannot be stored in memory or +the case where partial predictions can be observed at a time (Tensorflow +predictions). In this case, we first call the function accumulate many times +to process parts of the ranked list. After processing all the parts, we call +peek_interpolated_ap_at_n. +``` +p1 = np.array([random.random() for _ in xrange(5)]) +a1 = np.array([random.choice([0, 1]) for _ in xrange(5)]) +p2 = np.array([random.random() for _ in xrange(5)]) +a2 = np.array([random.choice([0, 1]) for _ in xrange(5)]) + +# interpolated average precision at 10 using 1000 break points +calculator = average_precision_calculator.AveragePrecisionCalculator(10) +calculator.accumulate(p1, a1) +calculator.accumulate(p2, a2) +ap3 = calculator.peek_ap_at_n() +``` +""" + +import heapq +import random +import numbers + +import numpy + + +class AveragePrecisionCalculator(object): + """Calculate the average precision and average precision at n.""" + + def __init__(self, top_n=None): + """Construct an AveragePrecisionCalculator to calculate average precision. + + This class is used to calculate the average precision for a single label. + + Args: + top_n: A positive Integer specifying the average precision at n, or + None to use all provided data points. + + Raises: + ValueError: An error occurred when the top_n is not a positive integer. + """ + if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None): + raise ValueError("top_n must be a positive integer or None.") + + self._top_n = top_n # average precision at n + self._total_positives = 0 # total number of positives have seen + self._heap = [] # max heap of (prediction, actual) + + @property + def heap_size(self): + """Gets the heap size maintained in the class.""" + return len(self._heap) + + @property + def num_accumulated_positives(self): + """Gets the number of positive samples that have been accumulated.""" + return self._total_positives + + def accumulate(self, predictions, actuals, num_positives=None): + """Accumulate the predictions and their ground truth labels. + + After the function call, we may call peek_ap_at_n to actually calculate + the average precision. + Note predictions and actuals must have the same shape. + + Args: + predictions: a list storing the prediction scores. + actuals: a list storing the ground truth labels. Any value + larger than 0 will be treated as positives, otherwise as negatives. + num_positives = If the 'predictions' and 'actuals' inputs aren't complete, + then it's possible some true positives were missed in them. In that case, + you can provide 'num_positives' in order to accurately track recall. + + Raises: + ValueError: An error occurred when the format of the input is not the + numpy 1-D array or the shape of predictions and actuals does not match. + """ + if len(predictions) != len(actuals): + raise ValueError( + "the shape of predictions and actuals does not match.") + + if not num_positives is None: + if not isinstance(num_positives, + numbers.Number) or num_positives < 0: + raise ValueError( + "'num_positives' was provided but it wan't a nonzero number." + ) + + if not num_positives is None: + self._total_positives += num_positives + else: + self._total_positives += numpy.size(numpy.where(actuals > 0)) + topk = self._top_n + heap = self._heap + + for i in range(numpy.size(predictions)): + if topk is None or len(heap) < topk: + heapq.heappush(heap, (predictions[i], actuals[i])) + else: + if predictions[i] > heap[0][0]: # heap[0] is the smallest + heapq.heappop(heap) + heapq.heappush(heap, (predictions[i], actuals[i])) + + def clear(self): + """Clear the accumulated predictions.""" + self._heap = [] + self._total_positives = 0 + + def peek_ap_at_n(self): + """Peek the non-interpolated average precision at n. + + Returns: + The non-interpolated average precision at n (default 0). + If n is larger than the length of the ranked list, + the average precision will be returned. + """ + if self.heap_size <= 0: + return 0 + predlists = numpy.array(list(zip(*self._heap))) + + ap = self.ap_at_n( + predlists[0], + predlists[1], + n=self._top_n, + total_num_positives=self._total_positives) + return ap + + @staticmethod + def ap(predictions, actuals): + """Calculate the non-interpolated average precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + actuals: a numpy 1-D array storing the ground truth labels. Any value + larger than 0 will be treated as positives, otherwise as negatives. + + Returns: + The non-interpolated average precision at n. + If n is larger than the length of the ranked list, + the average precision will be returned. + + Raises: + ValueError: An error occurred when the format of the input is not the + numpy 1-D array or the shape of predictions and actuals does not match. + """ + return AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None) + + @staticmethod + def ap_at_n(predictions, actuals, n=20, total_num_positives=None): + """Calculate the non-interpolated average precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + actuals: a numpy 1-D array storing the ground truth labels. Any value + larger than 0 will be treated as positives, otherwise as negatives. + n: the top n items to be considered in ap@n. + total_num_positives : (optionally) you can specify the number of total + positive + in the list. If specified, it will be used in calculation. + + Returns: + The non-interpolated average precision at n. + If n is larger than the length of the ranked list, + the average precision will be returned. + + Raises: + ValueError: An error occurred when + 1) the format of the input is not the numpy 1-D array; + 2) the shape of predictions and actuals does not match; + 3) the input n is not a positive integer. + """ + if len(predictions) != len(actuals): + raise ValueError( + "the shape of predictions and actuals does not match.") + + if n is not None: + if not isinstance(n, int) or n <= 0: + raise ValueError("n must be 'None' or a positive integer." + " It was '%s'." % n) + + ap = 0.0 + + predictions = numpy.array(predictions) + actuals = numpy.array(actuals) + + # add a shuffler to avoid overestimating the ap + predictions, actuals = AveragePrecisionCalculator._shuffle( + predictions, actuals) + sortidx = sorted( + range(len(predictions)), key=lambda k: predictions[k], reverse=True) + + if total_num_positives is None: + numpos = numpy.size(numpy.where(actuals > 0)) + else: + numpos = total_num_positives + + if numpos == 0: + return 0 + + if n is not None: + numpos = min(numpos, n) + delta_recall = 1.0 / numpos + poscount = 0.0 + + # calculate the ap + r = len(sortidx) + if n is not None: + r = min(r, n) + for i in range(r): + if actuals[sortidx[i]] > 0: + poscount += 1 + ap += poscount / (i + 1) * delta_recall + return ap + + @staticmethod + def _shuffle(predictions, actuals): + random.seed(0) + suffidx = random.sample(range(len(predictions)), len(predictions)) + predictions = predictions[suffidx] + actuals = actuals[suffidx] + return predictions, actuals + + @staticmethod + def _zero_one_normalize(predictions, epsilon=1e-7): + """Normalize the predictions to the range between 0.0 and 1.0. + + For some predictions like SVM predictions, we need to normalize them before + calculate the interpolated average precision. The normalization will not + change the rank in the original list and thus won't change the average + precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + epsilon: a small constant to avoid denominator being zero. + + Returns: + The normalized prediction. + """ + denominator = numpy.max(predictions) - numpy.min(predictions) + ret = (predictions - numpy.min(predictions)) / numpy.max( + denominator, epsilon) + return ret diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py new file mode 100644 index 00000000..712abe1a --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py @@ -0,0 +1,245 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Provides functions to help with evaluating models.""" +import datetime +import numpy + +from . import mean_average_precision_calculator as map_calculator +from . import average_precision_calculator as ap_calculator + + +def flatten(l): + """ Merges a list of lists into a single list. """ + return [item for sublist in l for item in sublist] + + +def calculate_hit_at_one(predictions, actuals): + """Performs a local (numpy) calculation of the hit at one. + + Args: + predictions: Matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + + Returns: + float: The average hit at one across the entire batch. + """ + top_prediction = numpy.argmax(predictions, 1) + hits = actuals[numpy.arange(actuals.shape[0]), top_prediction] + return numpy.average(hits) + + +def calculate_precision_at_equal_recall_rate(predictions, actuals): + """Performs a local (numpy) calculation of the PERR. + + Args: + predictions: Matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + + Returns: + float: The average precision at equal recall rate across the entire batch. + """ + aggregated_precision = 0.0 + num_videos = actuals.shape[0] + for row in numpy.arange(num_videos): + num_labels = int(numpy.sum(actuals[row])) + top_indices = numpy.argpartition(predictions[row], + -num_labels)[-num_labels:] + item_precision = 0.0 + for label_index in top_indices: + if predictions[row][label_index] > 0: + item_precision += actuals[row][label_index] + item_precision /= top_indices.size + aggregated_precision += item_precision + aggregated_precision /= num_videos + return aggregated_precision + + +def calculate_gap(predictions, actuals, top_k=20): + """Performs a local (numpy) calculation of the global average precision. + + Only the top_k predictions are taken for each of the videos. + + Args: + predictions: Matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + top_k: How many predictions to use per video. + + Returns: + float: The global average precision. + """ + gap_calculator = ap_calculator.AveragePrecisionCalculator() + sparse_predictions, sparse_labels, num_positives = top_k_by_class( + predictions, actuals, top_k) + gap_calculator.accumulate( + flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) + return gap_calculator.peek_ap_at_n() + + +def top_k_by_class(predictions, labels, k=20): + """Extracts the top k predictions for each video, sorted by class. + + Args: + predictions: A numpy matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + k: the top k non-zero entries to preserve in each prediction. + + Returns: + A tuple (predictions,labels, true_positives). 'predictions' and 'labels' + are lists of lists of floats. 'true_positives' is a list of scalars. The + length of the lists are equal to the number of classes. The entries in the + predictions variable are probability predictions, and + the corresponding entries in the labels variable are the ground truth for + those predictions. The entries in 'true_positives' are the number of true + positives for each class in the ground truth. + + Raises: + ValueError: An error occurred when the k is not a positive integer. + """ + if k <= 0: + raise ValueError("k must be a positive integer.") + k = min(k, predictions.shape[1]) + num_classes = predictions.shape[1] + prediction_triplets = [] + for video_index in range(predictions.shape[0]): + prediction_triplets.extend( + top_k_triplets(predictions[video_index], labels[video_index], k)) + out_predictions = [[] for v in range(num_classes)] + out_labels = [[] for v in range(num_classes)] + for triplet in prediction_triplets: + out_predictions[triplet[0]].append(triplet[1]) + out_labels[triplet[0]].append(triplet[2]) + out_true_positives = [numpy.sum(labels[:, i]) for i in range(num_classes)] + + return out_predictions, out_labels, out_true_positives + + +def top_k_triplets(predictions, labels, k=20): + """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in + (prediction, class) format""" + m = len(predictions) + k = min(k, m) + indices = numpy.argpartition(predictions, -k)[-k:] + return [(index, predictions[index], labels[index]) for index in indices] + + +class EvaluationMetrics(object): + """A class to store the evaluation metrics.""" + + def __init__(self, num_class, top_k): + """Construct an EvaluationMetrics object to store the evaluation metrics. + + Args: + num_class: A positive integer specifying the number of classes. + top_k: A positive integer specifying how many predictions are considered per video. + + Raises: + ValueError: An error occurred when MeanAveragePrecisionCalculator cannot + not be constructed. + """ + self.sum_hit_at_one = 0.0 + self.sum_perr = 0.0 + self.sum_loss = 0.0 + self.map_calculator = map_calculator.MeanAveragePrecisionCalculator( + num_class) + self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator() + self.top_k = top_k + self.num_examples = 0 + + #def accumulate(self, predictions, labels, loss): + def accumulate(self, loss, predictions, labels): + """Accumulate the metrics calculated locally for this mini-batch. + + Args: + predictions: A numpy matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + labels: A numpy matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + loss: A numpy array containing the loss for each sample. + + Returns: + dictionary: A dictionary storing the metrics for the mini-batch. + + Raises: + ValueError: An error occurred when the shape of predictions and actuals + does not match. + """ + batch_size = labels.shape[0] + mean_hit_at_one = calculate_hit_at_one(predictions, labels) + mean_perr = calculate_precision_at_equal_recall_rate( + predictions, labels) + mean_loss = numpy.mean(loss) + + # Take the top 20 predictions. + sparse_predictions, sparse_labels, num_positives = top_k_by_class( + predictions, labels, self.top_k) + self.map_calculator.accumulate(sparse_predictions, sparse_labels, + num_positives) + self.global_ap_calculator.accumulate( + flatten(sparse_predictions), flatten(sparse_labels), + sum(num_positives)) + + self.num_examples += batch_size + self.sum_hit_at_one += mean_hit_at_one * batch_size + self.sum_perr += mean_perr * batch_size + self.sum_loss += mean_loss * batch_size + + return { + "hit_at_one": mean_hit_at_one, + "perr": mean_perr, + "loss": mean_loss + } + + def get(self): + """Calculate the evaluation metrics for the whole epoch. + + Raises: + ValueError: If no examples were accumulated. + + Returns: + dictionary: a dictionary storing the evaluation metrics for the epoch. The + dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and + aps (default nan). + """ + if self.num_examples <= 0: + raise ValueError("total_sample must be positive.") + avg_hit_at_one = self.sum_hit_at_one / self.num_examples + avg_perr = self.sum_perr / self.num_examples + avg_loss = self.sum_loss / self.num_examples + + aps = self.map_calculator.peek_map_at_n() + gap = self.global_ap_calculator.peek_ap_at_n() + + epoch_info_dict = {} + return { + "avg_hit_at_one": avg_hit_at_one, + "avg_perr": avg_perr, + "avg_loss": avg_loss, + "aps": aps, + "gap": gap + } + + def clear(self): + """Clear the evaluation metrics and reset the EvaluationMetrics object.""" + self.sum_hit_at_one = 0.0 + self.sum_perr = 0.0 + self.sum_loss = 0.0 + self.map_calculator.clear() + self.global_ap_calculator.clear() + self.num_examples = 0 diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py new file mode 100644 index 00000000..a8415dc5 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py @@ -0,0 +1,114 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Calculate the mean average precision. + +It provides an interface for calculating mean average precision +for an entire list or the top-n ranked items. + +Example usages: +We first call the function accumulate many times to process parts of the ranked +list. After processing all the parts, we call peek_map_at_n +to calculate the mean average precision. + +``` +import random + +p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)]) +a = np.array([[random.choice([0, 1]) for _ in xrange(50)] + for _ in xrange(1000)]) + +# mean average precision for 50 classes. +calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator( + num_class=50) +calculator.accumulate(p, a) +aps = calculator.peek_map_at_n() +``` +""" + +import numpy +from . import average_precision_calculator + + +class MeanAveragePrecisionCalculator(object): + """This class is to calculate mean average precision. + """ + + def __init__(self, num_class): + """Construct a calculator to calculate the (macro) average precision. + + Args: + num_class: A positive Integer specifying the number of classes. + top_n_array: A list of positive integers specifying the top n for each + class. The top n in each class will be used to calculate its average + precision at n. + The size of the array must be num_class. + + Raises: + ValueError: An error occurred when num_class is not a positive integer; + or the top_n_array is not a list of positive integers. + """ + if not isinstance(num_class, int) or num_class <= 1: + raise ValueError("num_class must be a positive integer.") + + self._ap_calculators = [] # member of AveragePrecisionCalculator + self._num_class = num_class # total number of classes + for i in range(num_class): + self._ap_calculators.append( + average_precision_calculator.AveragePrecisionCalculator()) + + def accumulate(self, predictions, actuals, num_positives=None): + """Accumulate the predictions and their ground truth labels. + + Args: + predictions: A list of lists storing the prediction scores. The outer + dimension corresponds to classes. + actuals: A list of lists storing the ground truth labels. The dimensions + should correspond to the predictions input. Any value + larger than 0 will be treated as positives, otherwise as negatives. + num_positives: If provided, it is a list of numbers representing the + number of true positives for each class. If not provided, the number of + true positives will be inferred from the 'actuals' array. + + Raises: + ValueError: An error occurred when the shape of predictions and actuals + does not match. + """ + if not num_positives: + num_positives = [None for i in predictions.shape[1]] + + calculators = self._ap_calculators + for i in range(len(predictions)): + calculators[i].accumulate(predictions[i], actuals[i], + num_positives[i]) + + def clear(self): + for calculator in self._ap_calculators: + calculator.clear() + + def is_empty(self): + return ([calculator.heap_size for calculator in self._ap_calculators + ] == [0 for _ in range(self._num_class)]) + + def peek_map_at_n(self): + """Peek the non-interpolated mean average precision at n. + + Returns: + An array of non-interpolated average precision at n (default 0) for each + class. + """ + aps = [ + self._ap_calculators[i].peek_ap_at_n() + for i in range(self._num_class) + ] + return aps diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/__init__.py new file mode 100644 index 00000000..4a3adbbf --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/__init__.py @@ -0,0 +1,7 @@ +from .model import regist_model, get_model +from .attention_lstm import AttentionLSTM +from .tsn import TSN + +# regist models, sort by alphabet +regist_model("AttentionLSTM", AttentionLSTM) +regist_model("TSN", TSN) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/__init__.py new file mode 100644 index 00000000..cb872f0e --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/__init__.py @@ -0,0 +1 @@ +from .attention_lstm import * diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py new file mode 100644 index 00000000..fc57e46a --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py @@ -0,0 +1,176 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import numpy as np + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +from ..model import ModelBase +from .lstm_attention import LSTMAttentionModel + +import logging +logger = logging.getLogger(__name__) + +__all__ = ["AttentionLSTM"] + + +class AttentionLSTM(ModelBase): + def __init__(self, name, cfg, mode='train'): + super(AttentionLSTM, self).__init__(name, cfg, mode) + self.get_config() + + def get_config(self): + # get model configs + self.feature_num = self.cfg.MODEL.feature_num + self.feature_names = self.cfg.MODEL.feature_names + self.feature_dims = self.cfg.MODEL.feature_dims + self.num_classes = self.cfg.MODEL.num_classes + self.embedding_size = self.cfg.MODEL.embedding_size + self.lstm_size = self.cfg.MODEL.lstm_size + self.drop_rate = self.cfg.MODEL.drop_rate + + # get mode configs + self.batch_size = self.get_config_from_sec(self.mode, 'batch_size', 1) + self.num_gpus = self.get_config_from_sec(self.mode, 'num_gpus', 1) + + def build_input(self, use_dataloader): + self.feature_input = [] + for name, dim in zip(self.feature_names, self.feature_dims): + self.feature_input.append( + fluid.data( + shape=[None, dim], lod_level=1, dtype='float32', name=name)) +# self.label_input = None + if use_dataloader: + assert self.mode != 'infer', \ + 'dataloader is not recommendated when infer, please set use_dataloader to be false.' + self.dataloader = fluid.io.DataLoader.from_generator( + feed_list=self.feature_input, #+ [self.label_input], + capacity=8, + iterable=True) + + def build_model(self): + att_outs = [] + for i, (input_dim, feature) in enumerate( + zip(self.feature_dims, self.feature_input)): + att = LSTMAttentionModel(input_dim, self.embedding_size, + self.lstm_size, self.drop_rate) + att_out = att.forward(feature, is_training=(self.mode == 'train')) + att_outs.append(att_out) + if len(att_outs) > 1: + out = fluid.layers.concat(att_outs, axis=1) + else: + out = att_outs[0] + + fc1 = fluid.layers.fc( + input=out, + size=8192, + act='relu', + bias_attr=ParamAttr( + regularizer=fluid.regularizer.L2Decay(0.0), + initializer=fluid.initializer.NormalInitializer(scale=0.0)), + name='fc1') + fc2 = fluid.layers.fc( + input=fc1, + size=4096, + act='tanh', + bias_attr=ParamAttr( + regularizer=fluid.regularizer.L2Decay(0.0), + initializer=fluid.initializer.NormalInitializer(scale=0.0)), + name='fc2') + + self.logit = fluid.layers.fc(input=fc2, size=self.num_classes, act=None, \ + bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0), + initializer=fluid.initializer.NormalInitializer(scale=0.0)), + name = 'output') + + self.output = fluid.layers.sigmoid(self.logit) + + def optimizer(self): + assert self.mode == 'train', "optimizer only can be get in train mode" + values = [ + self.learning_rate * (self.decay_gamma**i) + for i in range(len(self.decay_epochs) + 1) + ] + iter_per_epoch = self.num_samples / self.batch_size + boundaries = [e * iter_per_epoch for e in self.decay_epochs] + return fluid.optimizer.RMSProp( + learning_rate=fluid.layers.piecewise_decay( + values=values, boundaries=boundaries), + centered=True, + regularization=fluid.regularizer.L2Decay(self.weight_decay)) + + def loss(self): + assert self.mode != 'infer', "invalid loss calculationg in infer mode" + cost = fluid.layers.sigmoid_cross_entropy_with_logits( + x=self.logit, label=self.label_input) + cost = fluid.layers.reduce_sum(cost, dim=-1) + sum_cost = fluid.layers.reduce_sum(cost) + self.loss_ = fluid.layers.scale( + sum_cost, scale=self.num_gpus, bias_after_scale=False) + return self.loss_ + + def outputs(self): + return [self.output, self.logit] + + def feeds(self): + return self.feature_input + + def fetches(self): + fetch_list = [self.output] + return fetch_list + + def weights_info(self): + return ( + 'AttentionLSTM.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/AttentionLSTM.pdparams' + ) + + def load_pretrain_params(self, exe, pretrain, prog, place): + #def is_parameter(var): + # return isinstance(var, fluid.framework.Parameter) + + #params_list = list(filter(is_parameter, prog.list_vars())) + #for param in params_list: + # print(param.name) + + #assert False, "stop here" + + logger.info( + "Load pretrain weights from {}, exclude fc layer.".format(pretrain)) + + state_dict = fluid.load_program_state(pretrain) + dict_keys = list(state_dict.keys()) + for name in dict_keys: + if "fc_0" in name: + del state_dict[name] + logger.info( + 'Delete {} from pretrained parameters. Do not load it'. + format(name)) + fluid.set_program_state(prog, state_dict) + + +# def load_test_weights(self, exe, weights, prog): +# def is_parameter(var): +# return isinstance(var, fluid.framework.Parameter) +# params_list = list(filter(is_parameter, prog.list_vars())) + +# state_dict = np.load(weights) +# for p in params_list: +# if p.name in state_dict.keys(): +# logger.info('########### load param {} from file'.format(p.name)) +# else: +# logger.info('----------- param {} not in file'.format(p.name)) +# fluid.set_program_state(prog, state_dict) +# fluid.save(prog, './weights/attention_lstm') diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py new file mode 100644 index 00000000..57bf6369 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py @@ -0,0 +1,114 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr +import numpy as np + + +class LSTMAttentionModel(object): + """LSTM Attention Model""" + + def __init__(self, + bias_attr, + embedding_size=512, + lstm_size=1024, + drop_rate=0.5): + self.lstm_size = lstm_size + self.embedding_size = embedding_size + self.drop_rate = drop_rate + + def forward(self, input, is_training): + input_fc = fluid.layers.fc( + input=input, + size=self.embedding_size, + act='tanh', + bias_attr=ParamAttr( + regularizer=fluid.regularizer.L2Decay(0.0), + initializer=fluid.initializer.NormalInitializer(scale=0.0)), + name='rgb_fc') + + #lstm_forward_fc = fluid.layers.fc( + # input=input_fc, + # size=self.lstm_size * 4, + # act=None, + # bias_attr=ParamAttr( + # regularizer=fluid.regularizer.L2Decay(0.0), + # initializer=fluid.initializer.NormalInitializer(scale=0.0)), + # name='rgb_fc_forward') + + lstm_forward_fc = fluid.layers.fc( + input=input_fc, + size=self.lstm_size * 4, + act=None, + bias_attr=False, + name='rgb_fc_forward') + + lstm_forward, _ = fluid.layers.dynamic_lstm( + input=lstm_forward_fc, + size=self.lstm_size * 4, + is_reverse=False, + name='rgb_lstm_forward') + + #lsmt_backward_fc = fluid.layers.fc( + # input=input_fc, + # size=self.lstm_size * 4, + # act=None, + # bias_attr=ParamAttr( + # regularizer=fluid.regularizer.L2Decay(0.0), + # initializer=fluid.initializer.NormalInitializer(scale=0.0)), + # name='rgb_fc_backward') + + lsmt_backward_fc = fluid.layers.fc( + input=input_fc, + size=self.lstm_size * 4, + act=None, + bias_attr=False, + name='rgb_fc_backward') + + lstm_backward, _ = fluid.layers.dynamic_lstm( + input=lsmt_backward_fc, + size=self.lstm_size * 4, + is_reverse=True, + name='rgb_lstm_backward') + + lstm_concat = fluid.layers.concat( + input=[lstm_forward, lstm_backward], axis=1) + + lstm_dropout = fluid.layers.dropout( + x=lstm_concat, + dropout_prob=self.drop_rate, + is_test=(not is_training)) + + #lstm_weight = fluid.layers.fc( + # input=lstm_dropout, + # size=1, + # act='sequence_softmax', + # bias_attr=ParamAttr( + # regularizer=fluid.regularizer.L2Decay(0.0), + # initializer=fluid.initializer.NormalInitializer(scale=0.0)), + # name='rgb_weight') + + lstm_weight = fluid.layers.fc( + input=lstm_dropout, + size=1, + act='sequence_softmax', + bias_attr=False, + name='rgb_weight') + + scaled = fluid.layers.elementwise_mul( + x=lstm_dropout, y=lstm_weight, axis=0) + lstm_pool = fluid.layers.sequence_pool(input=scaled, pool_type='sum') + + return lstm_pool diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/model.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/model.py new file mode 100644 index 00000000..88a337bd --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/model.py @@ -0,0 +1,191 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import wget +import logging +try: + from configparser import ConfigParser +except: + from ConfigParser import ConfigParser + +import paddle.fluid as fluid +from .utils import download, AttrDict + +WEIGHT_DIR = os.path.join(os.path.expanduser('~'), '.paddle', 'weights') + +logger = logging.getLogger(__name__) + + +def is_parameter(var): + return isinstance(var, fluid.framework.Parameter) + + +class NotImplementError(Exception): + "Error: model function not implement" + + def __init__(self, model, function): + super(NotImplementError, self).__init__() + self.model = model.__class__.__name__ + self.function = function.__name__ + + def __str__(self): + return "Function {}() is not implemented in model {}".format( + self.function, self.model) + + +class ModelNotFoundError(Exception): + "Error: model not found" + + def __init__(self, model_name, avail_models): + super(ModelNotFoundError, self).__init__() + self.model_name = model_name + self.avail_models = avail_models + + def __str__(self): + msg = "Model {} Not Found.\nAvailiable models:\n".format( + self.model_name) + for model in self.avail_models: + msg += " {}\n".format(model) + return msg + + +class ModelBase(object): + def __init__(self, name, cfg, mode='train'): + assert mode in ['train', 'valid', 'test', 'infer'], \ + "Unknown mode type {}".format(mode) + self.name = name + self.is_training = (mode == 'train') + self.mode = mode + self.cfg = cfg + self.dataloader = None + + def build_model(self): + "build model struct" + raise NotImplementError(self, self.build_model) + + def build_input(self, use_dataloader): + "build input Variable" + raise NotImplementError(self, self.build_input) + + def optimizer(self): + "get model optimizer" + raise NotImplementError(self, self.optimizer) + + def outputs(self): + "get output variable" + raise NotImplementedError(self, self.outputs) + + def loss(self): + "get loss variable" + raise NotImplementedError(self, self.loss) + + def feeds(self): + "get feed inputs list" + raise NotImplementError(self, self.feeds) + + def fetches(self): + "get fetch list of model" + raise NotImplementError(self, self.fetches) + + def weights_info(self): + "get model weight default path and download url" + raise NotImplementError(self, self.weights_info) + + def get_weights(self): + "get model weight file path, download weight from Paddle if not exist" + path, url = self.weights_info() + path = os.path.join(WEIGHT_DIR, path) + if not os.path.isdir(WEIGHT_DIR): + logger.info('{} not exists, will be created automatically.'.format( + WEIGHT_DIR)) + os.makedirs(WEIGHT_DIR) + if os.path.exists(path): + return path + + logger.info("Download weights of {} from {}".format(self.name, url)) + wget.download(url, path) + return path + + def dataloader(self): + return self.dataloader + + def epoch_num(self): + "get train epoch num" + return self.cfg.TRAIN.epoch + + def pretrain_info(self): + "get pretrain base model directory" + return (None, None) + + def get_pretrain_weights(self): + "get model weight file path, download weight from Paddle if not exist" + path, url = self.pretrain_info() + if not path: + return None + + path = os.path.join(WEIGHT_DIR, path) + if not os.path.isdir(WEIGHT_DIR): + logger.info('{} not exists, will be created automatically.'.format( + WEIGHT_DIR)) + os.makedirs(WEIGHT_DIR) + if os.path.exists(path): + return path + + logger.info("Download pretrain weights of {} from {}".format( + self.name, url)) + download(url, path) + return path + + def load_pretrain_params(self, exe, pretrain, prog, place): + logger.info("Load pretrain weights from {}".format(pretrain)) + state_dict = fluid.load_program_state(pretrain) + fluid.set_program_state(prog, state_dict) + + def load_test_weights(self, exe, weights, prog): + params_list = list(filter(is_parameter, prog.list_vars())) + fluid.load(prog, weights, executor=exe, var_list=params_list) + + def get_config_from_sec(self, sec, item, default=None): + if sec.upper() not in self.cfg: + return default + return self.cfg[sec.upper()].get(item, default) + + +class ModelZoo(object): + def __init__(self): + self.model_zoo = {} + + def regist(self, name, model): + assert model.__base__ == ModelBase, "Unknow model type {}".format( + type(model)) + self.model_zoo[name] = model + + def get(self, name, cfg, mode='train'): + for k, v in self.model_zoo.items(): + if k.upper() == name.upper(): + return v(name, cfg, mode) + raise ModelNotFoundError(name, self.model_zoo.keys()) + + +# singleton model_zoo +model_zoo = ModelZoo() + + +def regist_model(name, model): + model_zoo.regist(name, model) + + +def get_model(name, cfg, mode='train'): + return model_zoo.get(name, cfg, mode) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/__init__.py new file mode 100644 index 00000000..bd57d268 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/__init__.py @@ -0,0 +1 @@ +from .tsn import * diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py new file mode 100644 index 00000000..1c4e3ebf --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py @@ -0,0 +1,107 @@ +import json + +depth = [3, 4, 23, 3] +num_filters = [64, 128, 256, 512] + +layer_index = 1 +caffe_param_list = [] + +name_list = ['conv1'] +params_list = [] +name = name_list[0] +conv_w = name + '_weights' +caffe_conv_w = 'ConvNdBackward' + str(layer_index) + '_weights' +params_list.append(conv_w) +caffe_param_list.append(caffe_conv_w) + +layer_index += 1 + +bn_name = "bn_" + name +caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' +params_list.append(bn_name + '_scale') +params_list.append(bn_name + '_offset') +params_list.append(bn_name + '_mean') +params_list.append(bn_name + '_variance') + +caffe_param_list.append(caffe_bn_name + '_scale') +caffe_param_list.append(caffe_bn_name + '_offset') +caffe_param_list.append(caffe_bn_name + '_mean') +caffe_param_list.append(caffe_bn_name + '_variance') + +filter_input = 64 + +layer_index += 3 + +for block in range(len(depth)): + for i in range(depth[block]): + if block == 2: + if i == 0: + name = "res" + str(block + 2) + "a" + else: + name = "res" + str(block + 2) + "b" + str(i) + else: + name = "res" + str(block + 2) + chr(97 + i) + + name_list.append(name) + + for item in ['a', 'b', 'c']: + name_branch = name + '_branch2' + item + bn_name = 'bn' + name_branch[3:] + params_list.append(name_branch + '_weights') + params_list.append(bn_name + '_scale') + params_list.append(bn_name + '_offset') + params_list.append(bn_name + '_mean') + params_list.append(bn_name + '_variance') + + caffe_name_branch = 'ConvNdBackward' + str(layer_index) + caffe_param_list.append(caffe_name_branch + '_weights') + + layer_index += 1 + caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' + caffe_param_list.append(caffe_bn_name + '_scale') + caffe_param_list.append(caffe_bn_name + '_offset') + caffe_param_list.append(caffe_bn_name + '_mean') + caffe_param_list.append(caffe_bn_name + '_variance') + + layer_index += 2 + + stride = 2 if i == 0 and block != 0 else 1 + filter_num = num_filters[block] + filter_output = filter_num * 4 + + if (filter_output != filter_input) or (stride != 1): + name_branch = name + '_branch1' + + print( + 'filter_input {}, filter_output {}, stride {}, branch name {}'. + format(filter_input, filter_output, stride, name_branch)) + bn_name = 'bn' + name_branch[3:] + params_list.append(name_branch + '_weights') + params_list.append(bn_name + '_scale') + params_list.append(bn_name + '_offset') + params_list.append(bn_name + '_mean') + params_list.append(bn_name + '_variance') + + caffe_name_branch = 'ConvNdBackward' + str(layer_index) + caffe_param_list.append(caffe_name_branch + '_weights') + + layer_index += 1 + caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' + caffe_param_list.append(caffe_bn_name + '_scale') + caffe_param_list.append(caffe_bn_name + '_offset') + caffe_param_list.append(caffe_bn_name + '_mean') + caffe_param_list.append(caffe_bn_name + '_variance') + + layer_index += 3 + else: + layer_index += 2 + + filter_input = filter_output + +map_dict = {} + +for i in range(len(params_list)): + print(params_list[i], caffe_param_list[i]) + map_dict[params_list[i]] = caffe_param_list[i] + +json.dump(map_dict, open('name_map.json', 'w')) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name1 b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name1 new file mode 100644 index 00000000..5f67f93b --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name1 @@ -0,0 +1,521 @@ +conv1_weights +bn_conv1_scale +bn_conv1_offset +bn_conv1_mean +bn_conv1_variance +res2a_branch2a_weights +bn2a_branch2a_scale +bn2a_branch2a_offset +bn2a_branch2a_mean +bn2a_branch2a_variance +res2a_branch2b_weights +bn2a_branch2b_scale +bn2a_branch2b_offset +bn2a_branch2b_mean +bn2a_branch2b_variance +res2a_branch2c_weights +bn2a_branch2c_scale +bn2a_branch2c_offset +bn2a_branch2c_mean +bn2a_branch2c_variance +res2a_branch1_weights +bn2a_branch1_scale +bn2a_branch1_offset +bn2a_branch1_mean +bn2a_branch1_variance +res2b_branch2a_weights +bn2b_branch2a_scale +bn2b_branch2a_offset +bn2b_branch2a_mean +bn2b_branch2a_variance +res2b_branch2b_weights +bn2b_branch2b_scale +bn2b_branch2b_offset +bn2b_branch2b_mean +bn2b_branch2b_variance +res2b_branch2c_weights +bn2b_branch2c_scale +bn2b_branch2c_offset +bn2b_branch2c_mean +bn2b_branch2c_variance +res2c_branch2a_weights +bn2c_branch2a_scale +bn2c_branch2a_offset +bn2c_branch2a_mean +bn2c_branch2a_variance +res2c_branch2b_weights +bn2c_branch2b_scale +bn2c_branch2b_offset +bn2c_branch2b_mean +bn2c_branch2b_variance +res2c_branch2c_weights +bn2c_branch2c_scale +bn2c_branch2c_offset +bn2c_branch2c_mean +bn2c_branch2c_variance +res3a_branch2a_weights +bn3a_branch2a_scale +bn3a_branch2a_offset +bn3a_branch2a_mean +bn3a_branch2a_variance +res3a_branch2b_weights +bn3a_branch2b_scale +bn3a_branch2b_offset +bn3a_branch2b_mean +bn3a_branch2b_variance +res3a_branch2c_weights +bn3a_branch2c_scale +bn3a_branch2c_offset +bn3a_branch2c_mean +bn3a_branch2c_variance +res3a_branch1_weights +bn3a_branch1_scale +bn3a_branch1_offset +bn3a_branch1_mean +bn3a_branch1_variance +res3b_branch2a_weights +bn3b_branch2a_scale +bn3b_branch2a_offset +bn3b_branch2a_mean +bn3b_branch2a_variance +res3b_branch2b_weights +bn3b_branch2b_scale +bn3b_branch2b_offset +bn3b_branch2b_mean +bn3b_branch2b_variance +res3b_branch2c_weights +bn3b_branch2c_scale +bn3b_branch2c_offset +bn3b_branch2c_mean +bn3b_branch2c_variance +res3c_branch2a_weights +bn3c_branch2a_scale +bn3c_branch2a_offset +bn3c_branch2a_mean +bn3c_branch2a_variance +res3c_branch2b_weights +bn3c_branch2b_scale +bn3c_branch2b_offset +bn3c_branch2b_mean +bn3c_branch2b_variance +res3c_branch2c_weights +bn3c_branch2c_scale +bn3c_branch2c_offset +bn3c_branch2c_mean +bn3c_branch2c_variance +res3d_branch2a_weights +bn3d_branch2a_scale +bn3d_branch2a_offset +bn3d_branch2a_mean +bn3d_branch2a_variance +res3d_branch2b_weights +bn3d_branch2b_scale +bn3d_branch2b_offset +bn3d_branch2b_mean +bn3d_branch2b_variance +res3d_branch2c_weights +bn3d_branch2c_scale +bn3d_branch2c_offset +bn3d_branch2c_mean +bn3d_branch2c_variance +res4a_branch2a_weights +bn4a_branch2a_scale +bn4a_branch2a_offset +bn4a_branch2a_mean +bn4a_branch2a_variance +res4a_branch2b_weights +bn4a_branch2b_scale +bn4a_branch2b_offset +bn4a_branch2b_mean +bn4a_branch2b_variance +res4a_branch2c_weights +bn4a_branch2c_scale +bn4a_branch2c_offset +bn4a_branch2c_mean +bn4a_branch2c_variance +res4a_branch1_weights +bn4a_branch1_scale +bn4a_branch1_offset +bn4a_branch1_mean +bn4a_branch1_variance +res4b1_branch2a_weights +bn4b1_branch2a_scale +bn4b1_branch2a_offset +bn4b1_branch2a_mean +bn4b1_branch2a_variance +res4b1_branch2b_weights +bn4b1_branch2b_scale +bn4b1_branch2b_offset +bn4b1_branch2b_mean +bn4b1_branch2b_variance +res4b1_branch2c_weights +bn4b1_branch2c_scale +bn4b1_branch2c_offset +bn4b1_branch2c_mean +bn4b1_branch2c_variance +res4b2_branch2a_weights +bn4b2_branch2a_scale +bn4b2_branch2a_offset +bn4b2_branch2a_mean +bn4b2_branch2a_variance +res4b2_branch2b_weights +bn4b2_branch2b_scale +bn4b2_branch2b_offset +bn4b2_branch2b_mean +bn4b2_branch2b_variance +res4b2_branch2c_weights +bn4b2_branch2c_scale +bn4b2_branch2c_offset +bn4b2_branch2c_mean +bn4b2_branch2c_variance +res4b3_branch2a_weights +bn4b3_branch2a_scale +bn4b3_branch2a_offset +bn4b3_branch2a_mean +bn4b3_branch2a_variance +res4b3_branch2b_weights +bn4b3_branch2b_scale +bn4b3_branch2b_offset +bn4b3_branch2b_mean +bn4b3_branch2b_variance +res4b3_branch2c_weights +bn4b3_branch2c_scale +bn4b3_branch2c_offset +bn4b3_branch2c_mean +bn4b3_branch2c_variance +res4b4_branch2a_weights +bn4b4_branch2a_scale +bn4b4_branch2a_offset +bn4b4_branch2a_mean +bn4b4_branch2a_variance +res4b4_branch2b_weights +bn4b4_branch2b_scale +bn4b4_branch2b_offset +bn4b4_branch2b_mean +bn4b4_branch2b_variance +res4b4_branch2c_weights +bn4b4_branch2c_scale +bn4b4_branch2c_offset +bn4b4_branch2c_mean +bn4b4_branch2c_variance +res4b5_branch2a_weights +bn4b5_branch2a_scale +bn4b5_branch2a_offset +bn4b5_branch2a_mean +bn4b5_branch2a_variance +res4b5_branch2b_weights +bn4b5_branch2b_scale +bn4b5_branch2b_offset +bn4b5_branch2b_mean +bn4b5_branch2b_variance +res4b5_branch2c_weights +bn4b5_branch2c_scale +bn4b5_branch2c_offset +bn4b5_branch2c_mean +bn4b5_branch2c_variance +res4b6_branch2a_weights +bn4b6_branch2a_scale +bn4b6_branch2a_offset +bn4b6_branch2a_mean +bn4b6_branch2a_variance +res4b6_branch2b_weights +bn4b6_branch2b_scale +bn4b6_branch2b_offset +bn4b6_branch2b_mean +bn4b6_branch2b_variance +res4b6_branch2c_weights +bn4b6_branch2c_scale +bn4b6_branch2c_offset +bn4b6_branch2c_mean +bn4b6_branch2c_variance +res4b7_branch2a_weights +bn4b7_branch2a_scale +bn4b7_branch2a_offset +bn4b7_branch2a_mean +bn4b7_branch2a_variance +res4b7_branch2b_weights +bn4b7_branch2b_scale +bn4b7_branch2b_offset +bn4b7_branch2b_mean +bn4b7_branch2b_variance +res4b7_branch2c_weights +bn4b7_branch2c_scale +bn4b7_branch2c_offset +bn4b7_branch2c_mean +bn4b7_branch2c_variance +res4b8_branch2a_weights +bn4b8_branch2a_scale +bn4b8_branch2a_offset +bn4b8_branch2a_mean +bn4b8_branch2a_variance +res4b8_branch2b_weights +bn4b8_branch2b_scale +bn4b8_branch2b_offset +bn4b8_branch2b_mean +bn4b8_branch2b_variance +res4b8_branch2c_weights +bn4b8_branch2c_scale +bn4b8_branch2c_offset +bn4b8_branch2c_mean +bn4b8_branch2c_variance +res4b9_branch2a_weights +bn4b9_branch2a_scale +bn4b9_branch2a_offset +bn4b9_branch2a_mean +bn4b9_branch2a_variance +res4b9_branch2b_weights +bn4b9_branch2b_scale +bn4b9_branch2b_offset +bn4b9_branch2b_mean +bn4b9_branch2b_variance +res4b9_branch2c_weights +bn4b9_branch2c_scale +bn4b9_branch2c_offset +bn4b9_branch2c_mean +bn4b9_branch2c_variance +res4b10_branch2a_weights +bn4b10_branch2a_scale +bn4b10_branch2a_offset +bn4b10_branch2a_mean +bn4b10_branch2a_variance +res4b10_branch2b_weights +bn4b10_branch2b_scale +bn4b10_branch2b_offset +bn4b10_branch2b_mean +bn4b10_branch2b_variance +res4b10_branch2c_weights +bn4b10_branch2c_scale +bn4b10_branch2c_offset +bn4b10_branch2c_mean +bn4b10_branch2c_variance +res4b11_branch2a_weights +bn4b11_branch2a_scale +bn4b11_branch2a_offset +bn4b11_branch2a_mean +bn4b11_branch2a_variance +res4b11_branch2b_weights +bn4b11_branch2b_scale +bn4b11_branch2b_offset +bn4b11_branch2b_mean +bn4b11_branch2b_variance +res4b11_branch2c_weights +bn4b11_branch2c_scale +bn4b11_branch2c_offset +bn4b11_branch2c_mean +bn4b11_branch2c_variance +res4b12_branch2a_weights +bn4b12_branch2a_scale +bn4b12_branch2a_offset +bn4b12_branch2a_mean +bn4b12_branch2a_variance +res4b12_branch2b_weights +bn4b12_branch2b_scale +bn4b12_branch2b_offset +bn4b12_branch2b_mean +bn4b12_branch2b_variance +res4b12_branch2c_weights +bn4b12_branch2c_scale +bn4b12_branch2c_offset +bn4b12_branch2c_mean +bn4b12_branch2c_variance +res4b13_branch2a_weights +bn4b13_branch2a_scale +bn4b13_branch2a_offset +bn4b13_branch2a_mean +bn4b13_branch2a_variance +res4b13_branch2b_weights +bn4b13_branch2b_scale +bn4b13_branch2b_offset +bn4b13_branch2b_mean +bn4b13_branch2b_variance +res4b13_branch2c_weights +bn4b13_branch2c_scale +bn4b13_branch2c_offset +bn4b13_branch2c_mean +bn4b13_branch2c_variance +res4b14_branch2a_weights +bn4b14_branch2a_scale +bn4b14_branch2a_offset +bn4b14_branch2a_mean +bn4b14_branch2a_variance +res4b14_branch2b_weights +bn4b14_branch2b_scale +bn4b14_branch2b_offset +bn4b14_branch2b_mean +bn4b14_branch2b_variance +res4b14_branch2c_weights +bn4b14_branch2c_scale +bn4b14_branch2c_offset +bn4b14_branch2c_mean +bn4b14_branch2c_variance +res4b15_branch2a_weights +bn4b15_branch2a_scale +bn4b15_branch2a_offset +bn4b15_branch2a_mean +bn4b15_branch2a_variance +res4b15_branch2b_weights +bn4b15_branch2b_scale +bn4b15_branch2b_offset +bn4b15_branch2b_mean +bn4b15_branch2b_variance +res4b15_branch2c_weights +bn4b15_branch2c_scale +bn4b15_branch2c_offset +bn4b15_branch2c_mean +bn4b15_branch2c_variance +res4b16_branch2a_weights +bn4b16_branch2a_scale +bn4b16_branch2a_offset +bn4b16_branch2a_mean +bn4b16_branch2a_variance +res4b16_branch2b_weights +bn4b16_branch2b_scale +bn4b16_branch2b_offset +bn4b16_branch2b_mean +bn4b16_branch2b_variance +res4b16_branch2c_weights +bn4b16_branch2c_scale +bn4b16_branch2c_offset +bn4b16_branch2c_mean +bn4b16_branch2c_variance +res4b17_branch2a_weights +bn4b17_branch2a_scale +bn4b17_branch2a_offset +bn4b17_branch2a_mean +bn4b17_branch2a_variance +res4b17_branch2b_weights +bn4b17_branch2b_scale +bn4b17_branch2b_offset +bn4b17_branch2b_mean +bn4b17_branch2b_variance +res4b17_branch2c_weights +bn4b17_branch2c_scale +bn4b17_branch2c_offset +bn4b17_branch2c_mean +bn4b17_branch2c_variance +res4b18_branch2a_weights +bn4b18_branch2a_scale +bn4b18_branch2a_offset +bn4b18_branch2a_mean +bn4b18_branch2a_variance +res4b18_branch2b_weights +bn4b18_branch2b_scale +bn4b18_branch2b_offset +bn4b18_branch2b_mean +bn4b18_branch2b_variance +res4b18_branch2c_weights +bn4b18_branch2c_scale +bn4b18_branch2c_offset +bn4b18_branch2c_mean +bn4b18_branch2c_variance +res4b19_branch2a_weights +bn4b19_branch2a_scale +bn4b19_branch2a_offset +bn4b19_branch2a_mean +bn4b19_branch2a_variance +res4b19_branch2b_weights +bn4b19_branch2b_scale +bn4b19_branch2b_offset +bn4b19_branch2b_mean +bn4b19_branch2b_variance +res4b19_branch2c_weights +bn4b19_branch2c_scale +bn4b19_branch2c_offset +bn4b19_branch2c_mean +bn4b19_branch2c_variance +res4b20_branch2a_weights +bn4b20_branch2a_scale +bn4b20_branch2a_offset +bn4b20_branch2a_mean +bn4b20_branch2a_variance +res4b20_branch2b_weights +bn4b20_branch2b_scale +bn4b20_branch2b_offset +bn4b20_branch2b_mean +bn4b20_branch2b_variance +res4b20_branch2c_weights +bn4b20_branch2c_scale +bn4b20_branch2c_offset +bn4b20_branch2c_mean +bn4b20_branch2c_variance +res4b21_branch2a_weights +bn4b21_branch2a_scale +bn4b21_branch2a_offset +bn4b21_branch2a_mean +bn4b21_branch2a_variance +res4b21_branch2b_weights +bn4b21_branch2b_scale +bn4b21_branch2b_offset +bn4b21_branch2b_mean +bn4b21_branch2b_variance +res4b21_branch2c_weights +bn4b21_branch2c_scale +bn4b21_branch2c_offset +bn4b21_branch2c_mean +bn4b21_branch2c_variance +res4b22_branch2a_weights +bn4b22_branch2a_scale +bn4b22_branch2a_offset +bn4b22_branch2a_mean +bn4b22_branch2a_variance +res4b22_branch2b_weights +bn4b22_branch2b_scale +bn4b22_branch2b_offset +bn4b22_branch2b_mean +bn4b22_branch2b_variance +res4b22_branch2c_weights +bn4b22_branch2c_scale +bn4b22_branch2c_offset +bn4b22_branch2c_mean +bn4b22_branch2c_variance +res5a_branch2a_weights +bn5a_branch2a_scale +bn5a_branch2a_offset +bn5a_branch2a_mean +bn5a_branch2a_variance +res5a_branch2b_weights +bn5a_branch2b_scale +bn5a_branch2b_offset +bn5a_branch2b_mean +bn5a_branch2b_variance +res5a_branch2c_weights +bn5a_branch2c_scale +bn5a_branch2c_offset +bn5a_branch2c_mean +bn5a_branch2c_variance +res5a_branch1_weights +bn5a_branch1_scale +bn5a_branch1_offset +bn5a_branch1_mean +bn5a_branch1_variance +res5b_branch2a_weights +bn5b_branch2a_scale +bn5b_branch2a_offset +bn5b_branch2a_mean +bn5b_branch2a_variance +res5b_branch2b_weights +bn5b_branch2b_scale +bn5b_branch2b_offset +bn5b_branch2b_mean +bn5b_branch2b_variance +res5b_branch2c_weights +bn5b_branch2c_scale +bn5b_branch2c_offset +bn5b_branch2c_mean +bn5b_branch2c_variance +res5c_branch2a_weights +bn5c_branch2a_scale +bn5c_branch2a_offset +bn5c_branch2a_mean +bn5c_branch2a_variance +res5c_branch2b_weights +bn5c_branch2b_scale +bn5c_branch2b_offset +bn5c_branch2b_mean +bn5c_branch2b_variance +res5c_branch2c_weights +bn5c_branch2c_scale +bn5c_branch2c_offset +bn5c_branch2c_mean +bn5c_branch2c_variance + diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name2 b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name2 new file mode 100644 index 00000000..5f67f93b --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name2 @@ -0,0 +1,521 @@ +conv1_weights +bn_conv1_scale +bn_conv1_offset +bn_conv1_mean +bn_conv1_variance +res2a_branch2a_weights +bn2a_branch2a_scale +bn2a_branch2a_offset +bn2a_branch2a_mean +bn2a_branch2a_variance +res2a_branch2b_weights +bn2a_branch2b_scale +bn2a_branch2b_offset +bn2a_branch2b_mean +bn2a_branch2b_variance +res2a_branch2c_weights +bn2a_branch2c_scale +bn2a_branch2c_offset +bn2a_branch2c_mean +bn2a_branch2c_variance +res2a_branch1_weights +bn2a_branch1_scale +bn2a_branch1_offset +bn2a_branch1_mean +bn2a_branch1_variance +res2b_branch2a_weights +bn2b_branch2a_scale +bn2b_branch2a_offset +bn2b_branch2a_mean +bn2b_branch2a_variance +res2b_branch2b_weights +bn2b_branch2b_scale +bn2b_branch2b_offset +bn2b_branch2b_mean +bn2b_branch2b_variance +res2b_branch2c_weights +bn2b_branch2c_scale +bn2b_branch2c_offset +bn2b_branch2c_mean +bn2b_branch2c_variance +res2c_branch2a_weights +bn2c_branch2a_scale +bn2c_branch2a_offset +bn2c_branch2a_mean +bn2c_branch2a_variance +res2c_branch2b_weights +bn2c_branch2b_scale +bn2c_branch2b_offset +bn2c_branch2b_mean +bn2c_branch2b_variance +res2c_branch2c_weights +bn2c_branch2c_scale +bn2c_branch2c_offset +bn2c_branch2c_mean +bn2c_branch2c_variance +res3a_branch2a_weights +bn3a_branch2a_scale +bn3a_branch2a_offset +bn3a_branch2a_mean +bn3a_branch2a_variance +res3a_branch2b_weights +bn3a_branch2b_scale +bn3a_branch2b_offset +bn3a_branch2b_mean +bn3a_branch2b_variance +res3a_branch2c_weights +bn3a_branch2c_scale +bn3a_branch2c_offset +bn3a_branch2c_mean +bn3a_branch2c_variance +res3a_branch1_weights +bn3a_branch1_scale +bn3a_branch1_offset +bn3a_branch1_mean +bn3a_branch1_variance +res3b_branch2a_weights +bn3b_branch2a_scale +bn3b_branch2a_offset +bn3b_branch2a_mean +bn3b_branch2a_variance +res3b_branch2b_weights +bn3b_branch2b_scale +bn3b_branch2b_offset +bn3b_branch2b_mean +bn3b_branch2b_variance +res3b_branch2c_weights +bn3b_branch2c_scale +bn3b_branch2c_offset +bn3b_branch2c_mean +bn3b_branch2c_variance +res3c_branch2a_weights +bn3c_branch2a_scale +bn3c_branch2a_offset +bn3c_branch2a_mean +bn3c_branch2a_variance +res3c_branch2b_weights +bn3c_branch2b_scale +bn3c_branch2b_offset +bn3c_branch2b_mean +bn3c_branch2b_variance +res3c_branch2c_weights +bn3c_branch2c_scale +bn3c_branch2c_offset +bn3c_branch2c_mean +bn3c_branch2c_variance +res3d_branch2a_weights +bn3d_branch2a_scale +bn3d_branch2a_offset +bn3d_branch2a_mean +bn3d_branch2a_variance +res3d_branch2b_weights +bn3d_branch2b_scale +bn3d_branch2b_offset +bn3d_branch2b_mean +bn3d_branch2b_variance +res3d_branch2c_weights +bn3d_branch2c_scale +bn3d_branch2c_offset +bn3d_branch2c_mean +bn3d_branch2c_variance +res4a_branch2a_weights +bn4a_branch2a_scale +bn4a_branch2a_offset +bn4a_branch2a_mean +bn4a_branch2a_variance +res4a_branch2b_weights +bn4a_branch2b_scale +bn4a_branch2b_offset +bn4a_branch2b_mean +bn4a_branch2b_variance +res4a_branch2c_weights +bn4a_branch2c_scale +bn4a_branch2c_offset +bn4a_branch2c_mean +bn4a_branch2c_variance +res4a_branch1_weights +bn4a_branch1_scale +bn4a_branch1_offset +bn4a_branch1_mean +bn4a_branch1_variance +res4b1_branch2a_weights +bn4b1_branch2a_scale +bn4b1_branch2a_offset +bn4b1_branch2a_mean +bn4b1_branch2a_variance +res4b1_branch2b_weights +bn4b1_branch2b_scale +bn4b1_branch2b_offset +bn4b1_branch2b_mean +bn4b1_branch2b_variance +res4b1_branch2c_weights +bn4b1_branch2c_scale +bn4b1_branch2c_offset +bn4b1_branch2c_mean +bn4b1_branch2c_variance +res4b2_branch2a_weights +bn4b2_branch2a_scale +bn4b2_branch2a_offset +bn4b2_branch2a_mean +bn4b2_branch2a_variance +res4b2_branch2b_weights +bn4b2_branch2b_scale +bn4b2_branch2b_offset +bn4b2_branch2b_mean +bn4b2_branch2b_variance +res4b2_branch2c_weights +bn4b2_branch2c_scale +bn4b2_branch2c_offset +bn4b2_branch2c_mean +bn4b2_branch2c_variance +res4b3_branch2a_weights +bn4b3_branch2a_scale +bn4b3_branch2a_offset +bn4b3_branch2a_mean +bn4b3_branch2a_variance +res4b3_branch2b_weights +bn4b3_branch2b_scale +bn4b3_branch2b_offset +bn4b3_branch2b_mean +bn4b3_branch2b_variance +res4b3_branch2c_weights +bn4b3_branch2c_scale +bn4b3_branch2c_offset +bn4b3_branch2c_mean +bn4b3_branch2c_variance +res4b4_branch2a_weights +bn4b4_branch2a_scale +bn4b4_branch2a_offset +bn4b4_branch2a_mean +bn4b4_branch2a_variance +res4b4_branch2b_weights +bn4b4_branch2b_scale +bn4b4_branch2b_offset +bn4b4_branch2b_mean +bn4b4_branch2b_variance +res4b4_branch2c_weights +bn4b4_branch2c_scale +bn4b4_branch2c_offset +bn4b4_branch2c_mean +bn4b4_branch2c_variance +res4b5_branch2a_weights +bn4b5_branch2a_scale +bn4b5_branch2a_offset +bn4b5_branch2a_mean +bn4b5_branch2a_variance +res4b5_branch2b_weights +bn4b5_branch2b_scale +bn4b5_branch2b_offset +bn4b5_branch2b_mean +bn4b5_branch2b_variance +res4b5_branch2c_weights +bn4b5_branch2c_scale +bn4b5_branch2c_offset +bn4b5_branch2c_mean +bn4b5_branch2c_variance +res4b6_branch2a_weights +bn4b6_branch2a_scale +bn4b6_branch2a_offset +bn4b6_branch2a_mean +bn4b6_branch2a_variance +res4b6_branch2b_weights +bn4b6_branch2b_scale +bn4b6_branch2b_offset +bn4b6_branch2b_mean +bn4b6_branch2b_variance +res4b6_branch2c_weights +bn4b6_branch2c_scale +bn4b6_branch2c_offset +bn4b6_branch2c_mean +bn4b6_branch2c_variance +res4b7_branch2a_weights +bn4b7_branch2a_scale +bn4b7_branch2a_offset +bn4b7_branch2a_mean +bn4b7_branch2a_variance +res4b7_branch2b_weights +bn4b7_branch2b_scale +bn4b7_branch2b_offset +bn4b7_branch2b_mean +bn4b7_branch2b_variance +res4b7_branch2c_weights +bn4b7_branch2c_scale +bn4b7_branch2c_offset +bn4b7_branch2c_mean +bn4b7_branch2c_variance +res4b8_branch2a_weights +bn4b8_branch2a_scale +bn4b8_branch2a_offset +bn4b8_branch2a_mean +bn4b8_branch2a_variance +res4b8_branch2b_weights +bn4b8_branch2b_scale +bn4b8_branch2b_offset +bn4b8_branch2b_mean +bn4b8_branch2b_variance +res4b8_branch2c_weights +bn4b8_branch2c_scale +bn4b8_branch2c_offset +bn4b8_branch2c_mean +bn4b8_branch2c_variance +res4b9_branch2a_weights +bn4b9_branch2a_scale +bn4b9_branch2a_offset +bn4b9_branch2a_mean +bn4b9_branch2a_variance +res4b9_branch2b_weights +bn4b9_branch2b_scale +bn4b9_branch2b_offset +bn4b9_branch2b_mean +bn4b9_branch2b_variance +res4b9_branch2c_weights +bn4b9_branch2c_scale +bn4b9_branch2c_offset +bn4b9_branch2c_mean +bn4b9_branch2c_variance +res4b10_branch2a_weights +bn4b10_branch2a_scale +bn4b10_branch2a_offset +bn4b10_branch2a_mean +bn4b10_branch2a_variance +res4b10_branch2b_weights +bn4b10_branch2b_scale +bn4b10_branch2b_offset +bn4b10_branch2b_mean +bn4b10_branch2b_variance +res4b10_branch2c_weights +bn4b10_branch2c_scale +bn4b10_branch2c_offset +bn4b10_branch2c_mean +bn4b10_branch2c_variance +res4b11_branch2a_weights +bn4b11_branch2a_scale +bn4b11_branch2a_offset +bn4b11_branch2a_mean +bn4b11_branch2a_variance +res4b11_branch2b_weights +bn4b11_branch2b_scale +bn4b11_branch2b_offset +bn4b11_branch2b_mean +bn4b11_branch2b_variance +res4b11_branch2c_weights +bn4b11_branch2c_scale +bn4b11_branch2c_offset +bn4b11_branch2c_mean +bn4b11_branch2c_variance +res4b12_branch2a_weights +bn4b12_branch2a_scale +bn4b12_branch2a_offset +bn4b12_branch2a_mean +bn4b12_branch2a_variance +res4b12_branch2b_weights +bn4b12_branch2b_scale +bn4b12_branch2b_offset +bn4b12_branch2b_mean +bn4b12_branch2b_variance +res4b12_branch2c_weights +bn4b12_branch2c_scale +bn4b12_branch2c_offset +bn4b12_branch2c_mean +bn4b12_branch2c_variance +res4b13_branch2a_weights +bn4b13_branch2a_scale +bn4b13_branch2a_offset +bn4b13_branch2a_mean +bn4b13_branch2a_variance +res4b13_branch2b_weights +bn4b13_branch2b_scale +bn4b13_branch2b_offset +bn4b13_branch2b_mean +bn4b13_branch2b_variance +res4b13_branch2c_weights +bn4b13_branch2c_scale +bn4b13_branch2c_offset +bn4b13_branch2c_mean +bn4b13_branch2c_variance +res4b14_branch2a_weights +bn4b14_branch2a_scale +bn4b14_branch2a_offset +bn4b14_branch2a_mean +bn4b14_branch2a_variance +res4b14_branch2b_weights +bn4b14_branch2b_scale +bn4b14_branch2b_offset +bn4b14_branch2b_mean +bn4b14_branch2b_variance +res4b14_branch2c_weights +bn4b14_branch2c_scale +bn4b14_branch2c_offset +bn4b14_branch2c_mean +bn4b14_branch2c_variance +res4b15_branch2a_weights +bn4b15_branch2a_scale +bn4b15_branch2a_offset +bn4b15_branch2a_mean +bn4b15_branch2a_variance +res4b15_branch2b_weights +bn4b15_branch2b_scale +bn4b15_branch2b_offset +bn4b15_branch2b_mean +bn4b15_branch2b_variance +res4b15_branch2c_weights +bn4b15_branch2c_scale +bn4b15_branch2c_offset +bn4b15_branch2c_mean +bn4b15_branch2c_variance +res4b16_branch2a_weights +bn4b16_branch2a_scale +bn4b16_branch2a_offset +bn4b16_branch2a_mean +bn4b16_branch2a_variance +res4b16_branch2b_weights +bn4b16_branch2b_scale +bn4b16_branch2b_offset +bn4b16_branch2b_mean +bn4b16_branch2b_variance +res4b16_branch2c_weights +bn4b16_branch2c_scale +bn4b16_branch2c_offset +bn4b16_branch2c_mean +bn4b16_branch2c_variance +res4b17_branch2a_weights +bn4b17_branch2a_scale +bn4b17_branch2a_offset +bn4b17_branch2a_mean +bn4b17_branch2a_variance +res4b17_branch2b_weights +bn4b17_branch2b_scale +bn4b17_branch2b_offset +bn4b17_branch2b_mean +bn4b17_branch2b_variance +res4b17_branch2c_weights +bn4b17_branch2c_scale +bn4b17_branch2c_offset +bn4b17_branch2c_mean +bn4b17_branch2c_variance +res4b18_branch2a_weights +bn4b18_branch2a_scale +bn4b18_branch2a_offset +bn4b18_branch2a_mean +bn4b18_branch2a_variance +res4b18_branch2b_weights +bn4b18_branch2b_scale +bn4b18_branch2b_offset +bn4b18_branch2b_mean +bn4b18_branch2b_variance +res4b18_branch2c_weights +bn4b18_branch2c_scale +bn4b18_branch2c_offset +bn4b18_branch2c_mean +bn4b18_branch2c_variance +res4b19_branch2a_weights +bn4b19_branch2a_scale +bn4b19_branch2a_offset +bn4b19_branch2a_mean +bn4b19_branch2a_variance +res4b19_branch2b_weights +bn4b19_branch2b_scale +bn4b19_branch2b_offset +bn4b19_branch2b_mean +bn4b19_branch2b_variance +res4b19_branch2c_weights +bn4b19_branch2c_scale +bn4b19_branch2c_offset +bn4b19_branch2c_mean +bn4b19_branch2c_variance +res4b20_branch2a_weights +bn4b20_branch2a_scale +bn4b20_branch2a_offset +bn4b20_branch2a_mean +bn4b20_branch2a_variance +res4b20_branch2b_weights +bn4b20_branch2b_scale +bn4b20_branch2b_offset +bn4b20_branch2b_mean +bn4b20_branch2b_variance +res4b20_branch2c_weights +bn4b20_branch2c_scale +bn4b20_branch2c_offset +bn4b20_branch2c_mean +bn4b20_branch2c_variance +res4b21_branch2a_weights +bn4b21_branch2a_scale +bn4b21_branch2a_offset +bn4b21_branch2a_mean +bn4b21_branch2a_variance +res4b21_branch2b_weights +bn4b21_branch2b_scale +bn4b21_branch2b_offset +bn4b21_branch2b_mean +bn4b21_branch2b_variance +res4b21_branch2c_weights +bn4b21_branch2c_scale +bn4b21_branch2c_offset +bn4b21_branch2c_mean +bn4b21_branch2c_variance +res4b22_branch2a_weights +bn4b22_branch2a_scale +bn4b22_branch2a_offset +bn4b22_branch2a_mean +bn4b22_branch2a_variance +res4b22_branch2b_weights +bn4b22_branch2b_scale +bn4b22_branch2b_offset +bn4b22_branch2b_mean +bn4b22_branch2b_variance +res4b22_branch2c_weights +bn4b22_branch2c_scale +bn4b22_branch2c_offset +bn4b22_branch2c_mean +bn4b22_branch2c_variance +res5a_branch2a_weights +bn5a_branch2a_scale +bn5a_branch2a_offset +bn5a_branch2a_mean +bn5a_branch2a_variance +res5a_branch2b_weights +bn5a_branch2b_scale +bn5a_branch2b_offset +bn5a_branch2b_mean +bn5a_branch2b_variance +res5a_branch2c_weights +bn5a_branch2c_scale +bn5a_branch2c_offset +bn5a_branch2c_mean +bn5a_branch2c_variance +res5a_branch1_weights +bn5a_branch1_scale +bn5a_branch1_offset +bn5a_branch1_mean +bn5a_branch1_variance +res5b_branch2a_weights +bn5b_branch2a_scale +bn5b_branch2a_offset +bn5b_branch2a_mean +bn5b_branch2a_variance +res5b_branch2b_weights +bn5b_branch2b_scale +bn5b_branch2b_offset +bn5b_branch2b_mean +bn5b_branch2b_variance +res5b_branch2c_weights +bn5b_branch2c_scale +bn5b_branch2c_offset +bn5b_branch2c_mean +bn5b_branch2c_variance +res5c_branch2a_weights +bn5c_branch2a_scale +bn5c_branch2a_offset +bn5c_branch2a_mean +bn5c_branch2a_variance +res5c_branch2b_weights +bn5c_branch2b_scale +bn5c_branch2b_offset +bn5c_branch2b_mean +bn5c_branch2b_variance +res5c_branch2c_weights +bn5c_branch2c_scale +bn5c_branch2c_offset +bn5c_branch2c_mean +bn5c_branch2c_variance + diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name_map.json b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name_map.json new file mode 100644 index 00000000..a5c603e8 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name_map.json @@ -0,0 +1 @@ +{"res4a_branch1_weights": "ConvNdBackward95_weights", "bn2a_branch2a_mean": "BatchNormBackward6_bn_mean", "bn4b17_branch2c_offset": "BatchNormBackward282_bn_offset", "bn4b12_branch2a_variance": "BatchNormBackward221_bn_variance", "res4b1_branch2a_weights": "ConvNdBackward99_weights", "bn4b2_branch2a_scale": "BatchNormBackward111_bn_scale", "bn4b18_branch2c_scale": "BatchNormBackward293_bn_scale", "res4b7_branch2c_weights": "ConvNdBackward171_weights", "bn3d_branch2a_scale": "BatchNormBackward76_bn_scale", "res4b13_branch2a_weights": "ConvNdBackward231_weights", "bn2c_branch2c_offset": "BatchNormBackward36_bn_offset", "bn4b18_branch2b_scale": "BatchNormBackward290_bn_scale", "bn4b20_branch2c_scale": "BatchNormBackward315_bn_scale", "bn4b7_branch2a_mean": "BatchNormBackward166_bn_mean", "bn3b_branch2b_mean": "BatchNormBackward57_bn_mean", "bn5a_branch1_variance": "BatchNormBackward351_bn_variance", "bn2b_branch2a_scale": "BatchNormBackward19_bn_scale", "bn4b16_branch2c_variance": "BatchNormBackward271_bn_variance", "bn4b12_branch2b_variance": "BatchNormBackward224_bn_variance", "bn4b19_branch2b_scale": "BatchNormBackward301_bn_scale", "bn4b20_branch2a_offset": "BatchNormBackward309_bn_offset", "bn4b6_branch2c_scale": "BatchNormBackward161_bn_scale", "bn4b9_branch2a_scale": "BatchNormBackward188_bn_scale", "bn3b_branch2b_offset": "BatchNormBackward57_bn_offset", "bn4b16_branch2c_scale": "BatchNormBackward271_bn_scale", "bn4b22_branch2a_offset": "BatchNormBackward331_bn_offset", "bn2c_branch2a_offset": "BatchNormBackward30_bn_offset", "bn4b2_branch2c_scale": "BatchNormBackward117_bn_scale", "bn4b8_branch2a_offset": "BatchNormBackward177_bn_offset", "res4b18_branch2b_weights": "ConvNdBackward289_weights", "res4b3_branch2c_weights": "ConvNdBackward127_weights", "bn3a_branch1_variance": "BatchNormBackward50_bn_variance", "bn4a_branch2a_variance": "BatchNormBackward87_bn_variance", "bn4b22_branch2a_mean": "BatchNormBackward331_bn_mean", "res4b19_branch2c_weights": "ConvNdBackward303_weights", "res5a_branch2a_weights": "ConvNdBackward341_weights", "bn4b14_branch2a_variance": "BatchNormBackward243_bn_variance", "bn4b19_branch2b_mean": "BatchNormBackward301_bn_mean", "bn3a_branch2a_scale": "BatchNormBackward41_bn_scale", "bn4b6_branch2c_offset": "BatchNormBackward161_bn_offset", "bn5c_branch2a_scale": "BatchNormBackward366_bn_scale", "bn4b5_branch2a_scale": "BatchNormBackward144_bn_scale", "bn4b17_branch2a_variance": "BatchNormBackward276_bn_variance", "bn4a_branch2b_variance": "BatchNormBackward90_bn_variance", "bn4b3_branch2b_offset": "BatchNormBackward125_bn_offset", "bn2a_branch2a_variance": "BatchNormBackward6_bn_variance", "res4b4_branch2a_weights": "ConvNdBackward132_weights", "res5c_branch2b_weights": "ConvNdBackward368_weights", "bn5b_branch2c_offset": "BatchNormBackward361_bn_offset", "bn2c_branch2b_mean": "BatchNormBackward33_bn_mean", "bn4b10_branch2a_variance": "BatchNormBackward199_bn_variance", "bn5b_branch2b_offset": "BatchNormBackward358_bn_offset", "bn4b17_branch2b_variance": "BatchNormBackward279_bn_variance", "bn4b2_branch2b_scale": "BatchNormBackward114_bn_scale", "bn4b19_branch2a_mean": "BatchNormBackward298_bn_mean", "res5a_branch2c_weights": "ConvNdBackward347_weights", "bn4b22_branch2b_scale": "BatchNormBackward334_bn_scale", "bn4b14_branch2b_scale": "BatchNormBackward246_bn_scale", "bn3c_branch2c_variance": "BatchNormBackward71_bn_variance", "bn4b5_branch2a_variance": "BatchNormBackward144_bn_variance", "bn4b17_branch2c_variance": "BatchNormBackward282_bn_variance", "bn2b_branch2b_mean": "BatchNormBackward22_bn_mean", "bn3a_branch2c_offset": "BatchNormBackward47_bn_offset", "bn4b19_branch2a_variance": "BatchNormBackward298_bn_variance", "bn3a_branch2a_variance": "BatchNormBackward41_bn_variance", "bn4b12_branch2c_offset": "BatchNormBackward227_bn_offset", "res4b20_branch2a_weights": "ConvNdBackward308_weights", "bn2c_branch2c_variance": "BatchNormBackward36_bn_variance", "bn3c_branch2c_scale": "BatchNormBackward71_bn_scale", "bn4b18_branch2b_variance": "BatchNormBackward290_bn_variance", "bn4b12_branch2a_offset": "BatchNormBackward221_bn_offset", "bn5a_branch2b_mean": "BatchNormBackward345_bn_mean", "bn4b20_branch2b_scale": "BatchNormBackward312_bn_scale", "bn5a_branch2c_mean": "BatchNormBackward348_bn_mean", "res2a_branch2a_weights": "ConvNdBackward5_weights", "res4b3_branch2b_weights": "ConvNdBackward124_weights", "bn2c_branch2b_scale": "BatchNormBackward33_bn_scale", "bn5c_branch2a_mean": "BatchNormBackward366_bn_mean", "res4b14_branch2c_weights": "ConvNdBackward248_weights", "bn2b_branch2a_variance": "BatchNormBackward19_bn_variance", "bn4b15_branch2c_mean": "BatchNormBackward260_bn_mean", "bn4b4_branch2b_scale": "BatchNormBackward136_bn_scale", "bn4b12_branch2c_scale": "BatchNormBackward227_bn_scale", "res2a_branch1_weights": "ConvNdBackward14_weights", "bn4b22_branch2c_scale": "BatchNormBackward337_bn_scale", "bn4b5_branch2b_scale": "BatchNormBackward147_bn_scale", "res4b2_branch2a_weights": "ConvNdBackward110_weights", "res4b10_branch2a_weights": "ConvNdBackward198_weights", "bn4b12_branch2b_mean": "BatchNormBackward224_bn_mean", "bn5a_branch1_mean": "BatchNormBackward351_bn_mean", "bn4b11_branch2c_offset": "BatchNormBackward216_bn_offset", "bn3c_branch2b_variance": "BatchNormBackward68_bn_variance", "bn4b20_branch2c_offset": "BatchNormBackward315_bn_offset", "bn4b9_branch2c_scale": "BatchNormBackward194_bn_scale", "bn4b17_branch2b_mean": "BatchNormBackward279_bn_mean", "bn4b16_branch2b_variance": "BatchNormBackward268_bn_variance", "bn4b16_branch2a_mean": "BatchNormBackward265_bn_mean", "bn4b14_branch2a_mean": "BatchNormBackward243_bn_mean", "bn4b9_branch2a_variance": "BatchNormBackward188_bn_variance", "res2c_branch2c_weights": "ConvNdBackward35_weights", "bn4b22_branch2c_offset": "BatchNormBackward337_bn_offset", "bn4b4_branch2a_scale": "BatchNormBackward133_bn_scale", "bn4b11_branch2a_offset": "BatchNormBackward210_bn_offset", "res4b20_branch2c_weights": "ConvNdBackward314_weights", "bn2c_branch2a_mean": "BatchNormBackward30_bn_mean", "bn4b10_branch2a_scale": "BatchNormBackward199_bn_scale", "bn4b16_branch2b_mean": "BatchNormBackward268_bn_mean", "bn3a_branch1_offset": "BatchNormBackward50_bn_offset", "bn4b15_branch2c_scale": "BatchNormBackward260_bn_scale", "res4b16_branch2b_weights": "ConvNdBackward267_weights", "bn4b19_branch2c_offset": "BatchNormBackward304_bn_offset", "bn2a_branch2b_scale": "BatchNormBackward9_bn_scale", "bn5b_branch2a_scale": "BatchNormBackward355_bn_scale", "bn4b11_branch2b_variance": "BatchNormBackward213_bn_variance", "bn4b14_branch2b_mean": "BatchNormBackward246_bn_mean", "bn2c_branch2a_variance": "BatchNormBackward30_bn_variance", "bn5c_branch2b_mean": "BatchNormBackward369_bn_mean", "res5c_branch2a_weights": "ConvNdBackward365_weights", "bn4b1_branch2c_scale": "BatchNormBackward106_bn_scale", "bn4b5_branch2c_offset": "BatchNormBackward150_bn_offset", "res4b21_branch2c_weights": "ConvNdBackward325_weights", "bn4b21_branch2a_offset": "BatchNormBackward320_bn_offset", "bn4b12_branch2a_mean": "BatchNormBackward221_bn_mean", "res4b19_branch2a_weights": "ConvNdBackward297_weights", "bn5a_branch2c_scale": "BatchNormBackward348_bn_scale", "res4b11_branch2c_weights": "ConvNdBackward215_weights", "bn3b_branch2c_variance": "BatchNormBackward60_bn_variance", "bn4b17_branch2a_mean": "BatchNormBackward276_bn_mean", "bn4b15_branch2c_offset": "BatchNormBackward260_bn_offset", "bn4b10_branch2a_offset": "BatchNormBackward199_bn_offset", "bn3d_branch2a_mean": "BatchNormBackward76_bn_mean", "bn4b20_branch2c_variance": "BatchNormBackward315_bn_variance", "res5a_branch2b_weights": "ConvNdBackward344_weights", "res4b2_branch2c_weights": "ConvNdBackward116_weights", "bn5c_branch2c_scale": "BatchNormBackward372_bn_scale", "bn4b6_branch2a_offset": "BatchNormBackward155_bn_offset", "bn4b10_branch2c_variance": "BatchNormBackward205_bn_variance", "bn4b1_branch2b_variance": "BatchNormBackward103_bn_variance", "bn5b_branch2a_offset": "BatchNormBackward355_bn_offset", "res4b7_branch2b_weights": "ConvNdBackward168_weights", "bn4b3_branch2c_scale": "BatchNormBackward128_bn_scale", "bn4b15_branch2a_variance": "BatchNormBackward254_bn_variance", "bn4b5_branch2c_mean": "BatchNormBackward150_bn_mean", "res2c_branch2b_weights": "ConvNdBackward32_weights", "bn4b19_branch2c_variance": "BatchNormBackward304_bn_variance", "bn4b5_branch2b_mean": "BatchNormBackward147_bn_mean", "bn3a_branch2a_offset": "BatchNormBackward41_bn_offset", "bn2a_branch2c_offset": "BatchNormBackward12_bn_offset", "bn4b10_branch2b_offset": "BatchNormBackward202_bn_offset", "res4b22_branch2c_weights": "ConvNdBackward336_weights", "bn4b7_branch2c_offset": "BatchNormBackward172_bn_offset", "bn4b14_branch2c_mean": "BatchNormBackward249_bn_mean", "bn4b5_branch2a_mean": "BatchNormBackward144_bn_mean", "bn4b3_branch2c_variance": "BatchNormBackward128_bn_variance", "bn3d_branch2a_variance": "BatchNormBackward76_bn_variance", "bn4b10_branch2b_variance": "BatchNormBackward202_bn_variance", "res2b_branch2a_weights": "ConvNdBackward18_weights", "res4b22_branch2b_weights": "ConvNdBackward333_weights", "bn2b_branch2c_mean": "BatchNormBackward25_bn_mean", "bn4b17_branch2c_mean": "BatchNormBackward282_bn_mean", "bn2a_branch2c_mean": "BatchNormBackward12_bn_mean", "bn4a_branch2b_scale": "BatchNormBackward90_bn_scale", "bn3a_branch2b_variance": "BatchNormBackward44_bn_variance", "bn4b3_branch2b_mean": "BatchNormBackward125_bn_mean", "bn2c_branch2b_variance": "BatchNormBackward33_bn_variance", "res4b19_branch2b_weights": "ConvNdBackward300_weights", "res4b16_branch2c_weights": "ConvNdBackward270_weights", "bn4b14_branch2b_offset": "BatchNormBackward246_bn_offset", "bn4b15_branch2b_scale": "BatchNormBackward257_bn_scale", "bn4b13_branch2a_mean": "BatchNormBackward232_bn_mean", "res3a_branch1_weights": "ConvNdBackward49_weights", "bn4b4_branch2c_variance": "BatchNormBackward139_bn_variance", "bn4b2_branch2c_mean": "BatchNormBackward117_bn_mean", "bn3d_branch2b_variance": "BatchNormBackward79_bn_variance", "res4b1_branch2b_weights": "ConvNdBackward102_weights", "bn4b21_branch2a_mean": "BatchNormBackward320_bn_mean", "res3c_branch2a_weights": "ConvNdBackward64_weights", "res4b12_branch2c_weights": "ConvNdBackward226_weights", "bn4b5_branch2a_offset": "BatchNormBackward144_bn_offset", "bn5a_branch2a_offset": "BatchNormBackward342_bn_offset", "bn3b_branch2c_offset": "BatchNormBackward60_bn_offset", "bn4b19_branch2a_offset": "BatchNormBackward298_bn_offset", "bn3c_branch2b_mean": "BatchNormBackward68_bn_mean", "bn3c_branch2c_offset": "BatchNormBackward71_bn_offset", "res4b21_branch2b_weights": "ConvNdBackward322_weights", "bn4b13_branch2a_scale": "BatchNormBackward232_bn_scale", "bn4b13_branch2c_scale": "BatchNormBackward238_bn_scale", "bn4b15_branch2b_variance": "BatchNormBackward257_bn_variance", "bn4b9_branch2c_mean": "BatchNormBackward194_bn_mean", "bn4b19_branch2a_scale": "BatchNormBackward298_bn_scale", "bn4b9_branch2a_mean": "BatchNormBackward188_bn_mean", "bn4a_branch1_variance": "BatchNormBackward96_bn_variance", "bn4b10_branch2a_mean": "BatchNormBackward199_bn_mean", "bn5b_branch2a_variance": "BatchNormBackward355_bn_variance", "bn4b21_branch2a_variance": "BatchNormBackward320_bn_variance", "bn4b9_branch2b_variance": "BatchNormBackward191_bn_variance", "bn5c_branch2c_offset": "BatchNormBackward372_bn_offset", "bn4b6_branch2c_mean": "BatchNormBackward161_bn_mean", "bn5a_branch2b_scale": "BatchNormBackward345_bn_scale", "bn4b11_branch2b_scale": "BatchNormBackward213_bn_scale", "bn4b21_branch2c_scale": "BatchNormBackward326_bn_scale", "bn5c_branch2a_offset": "BatchNormBackward366_bn_offset", "bn3b_branch2a_mean": "BatchNormBackward54_bn_mean", "res2b_branch2c_weights": "ConvNdBackward24_weights", "bn4b5_branch2b_offset": "BatchNormBackward147_bn_offset", "bn3d_branch2c_variance": "BatchNormBackward82_bn_variance", "bn3a_branch1_mean": "BatchNormBackward50_bn_mean", "bn2b_branch2c_offset": "BatchNormBackward25_bn_offset", "bn4b21_branch2c_mean": "BatchNormBackward326_bn_mean", "res4b15_branch2b_weights": "ConvNdBackward256_weights", "bn5b_branch2b_variance": "BatchNormBackward358_bn_variance", "res3d_branch2a_weights": "ConvNdBackward75_weights", "bn4b2_branch2a_offset": "BatchNormBackward111_bn_offset", "bn4b7_branch2a_scale": "BatchNormBackward166_bn_scale", "bn5c_branch2c_variance": "BatchNormBackward372_bn_variance", "bn5c_branch2b_scale": "BatchNormBackward369_bn_scale", "bn3b_branch2b_variance": "BatchNormBackward57_bn_variance", "bn2a_branch1_offset": "BatchNormBackward15_bn_offset", "bn4b8_branch2c_variance": "BatchNormBackward183_bn_variance", "bn4b21_branch2b_offset": "BatchNormBackward323_bn_offset", "bn4b15_branch2a_mean": "BatchNormBackward254_bn_mean", "res4b6_branch2a_weights": "ConvNdBackward154_weights", "bn5a_branch1_offset": "BatchNormBackward351_bn_offset", "bn4b5_branch2c_scale": "BatchNormBackward150_bn_scale", "bn3a_branch2c_variance": "BatchNormBackward47_bn_variance", "bn5b_branch2c_variance": "BatchNormBackward361_bn_variance", "bn3a_branch2a_mean": "BatchNormBackward41_bn_mean", "bn4b7_branch2b_scale": "BatchNormBackward169_bn_scale", "bn5a_branch2b_offset": "BatchNormBackward345_bn_offset", "bn4b19_branch2c_scale": "BatchNormBackward304_bn_scale", "res2a_branch2b_weights": "ConvNdBackward8_weights", "bn2c_branch2b_offset": "BatchNormBackward33_bn_offset", "bn3b_branch2c_mean": "BatchNormBackward60_bn_mean", "res4b16_branch2a_weights": "ConvNdBackward264_weights", "bn4b18_branch2b_offset": "BatchNormBackward290_bn_offset", "bn3a_branch2c_mean": "BatchNormBackward47_bn_mean", "bn4a_branch2b_offset": "BatchNormBackward90_bn_offset", "bn4b18_branch2b_mean": "BatchNormBackward290_bn_mean", "bn4b10_branch2c_mean": "BatchNormBackward205_bn_mean", "res3b_branch2c_weights": "ConvNdBackward59_weights", "bn3c_branch2a_scale": "BatchNormBackward65_bn_scale", "bn4b13_branch2b_variance": "BatchNormBackward235_bn_variance", "bn4b8_branch2c_offset": "BatchNormBackward183_bn_offset", "res4b14_branch2b_weights": "ConvNdBackward245_weights", "bn4b19_branch2b_offset": "BatchNormBackward301_bn_offset", "res4b18_branch2a_weights": "ConvNdBackward286_weights", "bn4b3_branch2c_mean": "BatchNormBackward128_bn_mean", "res4b11_branch2a_weights": "ConvNdBackward209_weights", "bn_conv1_variance": "BatchNormBackward2_bn_variance", "bn4b22_branch2b_variance": "BatchNormBackward334_bn_variance", "bn2b_branch2a_offset": "BatchNormBackward19_bn_offset", "bn_conv1_scale": "BatchNormBackward2_bn_scale", "bn5a_branch1_scale": "BatchNormBackward351_bn_scale", "bn4b7_branch2a_offset": "BatchNormBackward166_bn_offset", "bn4b9_branch2c_offset": "BatchNormBackward194_bn_offset", "res4b3_branch2a_weights": "ConvNdBackward121_weights", "bn2a_branch1_variance": "BatchNormBackward15_bn_variance", "bn4b3_branch2a_variance": "BatchNormBackward122_bn_variance", "res4b9_branch2a_weights": "ConvNdBackward187_weights", "bn4b9_branch2a_offset": "BatchNormBackward188_bn_offset", "bn3c_branch2a_mean": "BatchNormBackward65_bn_mean", "bn2b_branch2b_offset": "BatchNormBackward22_bn_offset", "res3a_branch2b_weights": "ConvNdBackward43_weights", "bn4b12_branch2b_offset": "BatchNormBackward224_bn_offset", "bn4a_branch2c_variance": "BatchNormBackward93_bn_variance", "bn4b18_branch2a_mean": "BatchNormBackward287_bn_mean", "bn4b16_branch2c_mean": "BatchNormBackward271_bn_mean", "bn4b20_branch2b_variance": "BatchNormBackward312_bn_variance", "bn4b8_branch2b_mean": "BatchNormBackward180_bn_mean", "bn3c_branch2c_mean": "BatchNormBackward71_bn_mean", "bn4b13_branch2a_variance": "BatchNormBackward232_bn_variance", "bn3d_branch2c_offset": "BatchNormBackward82_bn_offset", "bn4b1_branch2a_scale": "BatchNormBackward100_bn_scale", "bn4b2_branch2a_mean": "BatchNormBackward111_bn_mean", "bn4b8_branch2a_scale": "BatchNormBackward177_bn_scale", "res4b7_branch2a_weights": "ConvNdBackward165_weights", "bn4b20_branch2a_scale": "BatchNormBackward309_bn_scale", "bn4b12_branch2b_scale": "BatchNormBackward224_bn_scale", "bn3d_branch2b_offset": "BatchNormBackward79_bn_offset", "bn4b21_branch2b_mean": "BatchNormBackward323_bn_mean", "bn4b1_branch2b_scale": "BatchNormBackward103_bn_scale", "res3d_branch2c_weights": "ConvNdBackward81_weights", "bn4b20_branch2b_offset": "BatchNormBackward312_bn_offset", "bn4b5_branch2c_variance": "BatchNormBackward150_bn_variance", "res4b5_branch2c_weights": "ConvNdBackward149_weights", "bn5c_branch2b_variance": "BatchNormBackward369_bn_variance", "res4b20_branch2b_weights": "ConvNdBackward311_weights", "bn2a_branch2c_scale": "BatchNormBackward12_bn_scale", "res4b15_branch2c_weights": "ConvNdBackward259_weights", "bn3b_branch2a_variance": "BatchNormBackward54_bn_variance", "bn4b18_branch2c_mean": "BatchNormBackward293_bn_mean", "bn4b22_branch2a_scale": "BatchNormBackward331_bn_scale", "res3c_branch2b_weights": "ConvNdBackward67_weights", "bn4b8_branch2a_variance": "BatchNormBackward177_bn_variance", "res5c_branch2c_weights": "ConvNdBackward371_weights", "bn4b3_branch2a_offset": "BatchNormBackward122_bn_offset", "bn5c_branch2b_offset": "BatchNormBackward369_bn_offset", "bn2a_branch2b_mean": "BatchNormBackward9_bn_mean", "bn4b4_branch2b_variance": "BatchNormBackward136_bn_variance", "res3b_branch2b_weights": "ConvNdBackward56_weights", "bn4b10_branch2b_mean": "BatchNormBackward202_bn_mean", "bn4b15_branch2b_mean": "BatchNormBackward257_bn_mean", "bn4b21_branch2c_offset": "BatchNormBackward326_bn_offset", "res4b2_branch2b_weights": "ConvNdBackward113_weights", "bn4b8_branch2c_scale": "BatchNormBackward183_bn_scale", "bn2b_branch2c_variance": "BatchNormBackward25_bn_variance", "bn4b1_branch2c_mean": "BatchNormBackward106_bn_mean", "bn3b_branch2a_scale": "BatchNormBackward54_bn_scale", "bn4b6_branch2c_variance": "BatchNormBackward161_bn_variance", "res2c_branch2a_weights": "ConvNdBackward29_weights", "bn4b4_branch2a_variance": "BatchNormBackward133_bn_variance", "bn3d_branch2c_mean": "BatchNormBackward82_bn_mean", "bn4b4_branch2c_mean": "BatchNormBackward139_bn_mean", "bn4b21_branch2b_scale": "BatchNormBackward323_bn_scale", "bn4b7_branch2b_mean": "BatchNormBackward169_bn_mean", "res4b12_branch2a_weights": "ConvNdBackward220_weights", "bn4b1_branch2a_offset": "BatchNormBackward100_bn_offset", "bn3b_branch2b_scale": "BatchNormBackward57_bn_scale", "res4b9_branch2c_weights": "ConvNdBackward193_weights", "bn4b10_branch2c_scale": "BatchNormBackward205_bn_scale", "bn4b19_branch2c_mean": "BatchNormBackward304_bn_mean", "res4b1_branch2c_weights": "ConvNdBackward105_weights", "bn4b18_branch2a_scale": "BatchNormBackward287_bn_scale", "bn3a_branch2c_scale": "BatchNormBackward47_bn_scale", "bn2a_branch2b_offset": "BatchNormBackward9_bn_offset", "bn4b2_branch2b_mean": "BatchNormBackward114_bn_mean", "bn3d_branch2b_scale": "BatchNormBackward79_bn_scale", "res4a_branch2a_weights": "ConvNdBackward86_weights", "res4b6_branch2b_weights": "ConvNdBackward157_weights", "res4b9_branch2b_weights": "ConvNdBackward190_weights", "bn4b4_branch2a_mean": "BatchNormBackward133_bn_mean", "bn3b_branch2a_offset": "BatchNormBackward54_bn_offset", "res4b11_branch2b_weights": "ConvNdBackward212_weights", "bn4b4_branch2c_scale": "BatchNormBackward139_bn_scale", "bn4b8_branch2b_offset": "BatchNormBackward180_bn_offset", "bn4b2_branch2b_variance": "BatchNormBackward114_bn_variance", "res4b17_branch2b_weights": "ConvNdBackward278_weights", "res4b15_branch2a_weights": "ConvNdBackward253_weights", "bn3d_branch2c_scale": "BatchNormBackward82_bn_scale", "res3d_branch2b_weights": "ConvNdBackward78_weights", "bn4b13_branch2b_offset": "BatchNormBackward235_bn_offset", "bn2c_branch2c_mean": "BatchNormBackward36_bn_mean", "bn4b13_branch2b_mean": "BatchNormBackward235_bn_mean", "res4b17_branch2a_weights": "ConvNdBackward275_weights", "bn4b4_branch2b_offset": "BatchNormBackward136_bn_offset", "res4b18_branch2c_weights": "ConvNdBackward292_weights", "bn4b10_branch2c_offset": "BatchNormBackward205_bn_offset", "bn4b10_branch2b_scale": "BatchNormBackward202_bn_scale", "res4b21_branch2a_weights": "ConvNdBackward319_weights", "bn4b6_branch2b_scale": "BatchNormBackward158_bn_scale", "bn5a_branch2c_variance": "BatchNormBackward348_bn_variance", "conv1_weights": "ConvNdBackward1_weights", "bn5b_branch2c_mean": "BatchNormBackward361_bn_mean", "bn4a_branch2c_scale": "BatchNormBackward93_bn_scale", "bn3d_branch2a_offset": "BatchNormBackward76_bn_offset", "res5b_branch2a_weights": "ConvNdBackward354_weights", "bn5a_branch2b_variance": "BatchNormBackward345_bn_variance", "res3a_branch2a_weights": "ConvNdBackward40_weights", "bn4b14_branch2c_scale": "BatchNormBackward249_bn_scale", "bn2a_branch2b_variance": "BatchNormBackward9_bn_variance", "bn4b8_branch2a_mean": "BatchNormBackward177_bn_mean", "bn4b5_branch2b_variance": "BatchNormBackward147_bn_variance", "bn4b7_branch2c_scale": "BatchNormBackward172_bn_scale", "bn4b14_branch2a_scale": "BatchNormBackward243_bn_scale", "bn3c_branch2a_variance": "BatchNormBackward65_bn_variance", "res4b13_branch2c_weights": "ConvNdBackward237_weights", "bn4b3_branch2a_mean": "BatchNormBackward122_bn_mean", "bn3c_branch2a_offset": "BatchNormBackward65_bn_offset", "res4b12_branch2b_weights": "ConvNdBackward223_weights", "res4b10_branch2b_weights": "ConvNdBackward201_weights", "bn4b15_branch2c_variance": "BatchNormBackward260_bn_variance", "res4b10_branch2c_weights": "ConvNdBackward204_weights", "bn4b8_branch2b_scale": "BatchNormBackward180_bn_scale", "bn4a_branch2a_offset": "BatchNormBackward87_bn_offset", "bn4b21_branch2c_variance": "BatchNormBackward326_bn_variance", "res2b_branch2b_weights": "ConvNdBackward21_weights", "res4b8_branch2b_weights": "ConvNdBackward179_weights", "bn4b16_branch2a_offset": "BatchNormBackward265_bn_offset", "bn4b2_branch2a_variance": "BatchNormBackward111_bn_variance", "bn3b_branch2c_scale": "BatchNormBackward60_bn_scale", "bn4b7_branch2c_mean": "BatchNormBackward172_bn_mean", "res4b5_branch2a_weights": "ConvNdBackward143_weights", "bn5c_branch2c_mean": "BatchNormBackward372_bn_mean", "bn2a_branch2a_offset": "BatchNormBackward6_bn_offset", "bn5b_branch2a_mean": "BatchNormBackward355_bn_mean", "bn4b18_branch2a_offset": "BatchNormBackward287_bn_offset", "bn4b17_branch2c_scale": "BatchNormBackward282_bn_scale", "bn4b16_branch2c_offset": "BatchNormBackward271_bn_offset", "res2a_branch2c_weights": "ConvNdBackward11_weights", "res4b5_branch2b_weights": "ConvNdBackward146_weights", "bn4b14_branch2c_offset": "BatchNormBackward249_bn_offset", "res4b13_branch2b_weights": "ConvNdBackward234_weights", "bn3d_branch2b_mean": "BatchNormBackward79_bn_mean", "bn4a_branch2a_mean": "BatchNormBackward87_bn_mean", "bn4b1_branch2a_mean": "BatchNormBackward100_bn_mean", "bn4b8_branch2b_variance": "BatchNormBackward180_bn_variance", "bn4b22_branch2c_variance": "BatchNormBackward337_bn_variance", "bn4b3_branch2b_scale": "BatchNormBackward125_bn_scale", "bn4b22_branch2c_mean": "BatchNormBackward337_bn_mean", "bn4b9_branch2b_mean": "BatchNormBackward191_bn_mean", "bn5a_branch2a_scale": "BatchNormBackward342_bn_scale", "bn4a_branch2c_mean": "BatchNormBackward93_bn_mean", "bn3a_branch2b_mean": "BatchNormBackward44_bn_mean", "bn4b20_branch2b_mean": "BatchNormBackward312_bn_mean", "bn4b11_branch2c_variance": "BatchNormBackward216_bn_variance", "bn3c_branch2b_offset": "BatchNormBackward68_bn_offset", "bn5a_branch2a_mean": "BatchNormBackward342_bn_mean", "bn4a_branch1_offset": "BatchNormBackward96_bn_offset", "bn4b7_branch2b_offset": "BatchNormBackward169_bn_offset", "bn4b13_branch2c_offset": "BatchNormBackward238_bn_offset", "bn_conv1_offset": "BatchNormBackward2_bn_offset", "bn4b14_branch2a_offset": "BatchNormBackward243_bn_offset", "bn4b14_branch2b_variance": "BatchNormBackward246_bn_variance", "bn2c_branch2a_scale": "BatchNormBackward30_bn_scale", "bn2a_branch2a_scale": "BatchNormBackward6_bn_scale", "bn4b3_branch2c_offset": "BatchNormBackward128_bn_offset", "res3b_branch2a_weights": "ConvNdBackward53_weights", "bn4b6_branch2b_variance": "BatchNormBackward158_bn_variance", "bn4b6_branch2b_offset": "BatchNormBackward158_bn_offset", "bn3a_branch2b_scale": "BatchNormBackward44_bn_scale", "bn4b4_branch2b_mean": "BatchNormBackward136_bn_mean", "bn4b11_branch2a_variance": "BatchNormBackward210_bn_variance", "bn4b4_branch2a_offset": "BatchNormBackward133_bn_offset", "bn4b6_branch2a_variance": "BatchNormBackward155_bn_variance", "res4b22_branch2a_weights": "ConvNdBackward330_weights", "bn4b19_branch2b_variance": "BatchNormBackward301_bn_variance", "bn2b_branch2a_mean": "BatchNormBackward19_bn_mean", "bn4b11_branch2a_scale": "BatchNormBackward210_bn_scale", "bn4b3_branch2b_variance": "BatchNormBackward125_bn_variance", "res4a_branch2b_weights": "ConvNdBackward89_weights", "bn4a_branch2c_offset": "BatchNormBackward93_bn_offset", "bn4b13_branch2b_scale": "BatchNormBackward235_bn_scale", "bn4a_branch1_scale": "BatchNormBackward96_bn_scale", "bn5b_branch2b_scale": "BatchNormBackward358_bn_scale", "bn4b21_branch2b_variance": "BatchNormBackward323_bn_variance", "bn4b16_branch2a_variance": "BatchNormBackward265_bn_variance", "bn5b_branch2b_mean": "BatchNormBackward358_bn_mean", "bn4b22_branch2a_variance": "BatchNormBackward331_bn_variance", "bn4b20_branch2a_mean": "BatchNormBackward309_bn_mean", "bn4b12_branch2a_scale": "BatchNormBackward221_bn_scale", "bn4b15_branch2a_scale": "BatchNormBackward254_bn_scale", "bn4b16_branch2a_scale": "BatchNormBackward265_bn_scale", "res4b14_branch2a_weights": "ConvNdBackward242_weights", "bn4b15_branch2a_offset": "BatchNormBackward254_bn_offset", "bn4b13_branch2c_mean": "BatchNormBackward238_bn_mean", "bn5b_branch2c_scale": "BatchNormBackward361_bn_scale", "bn5a_branch2c_offset": "BatchNormBackward348_bn_offset", "bn4b9_branch2b_offset": "BatchNormBackward191_bn_offset", "bn4b14_branch2c_variance": "BatchNormBackward249_bn_variance", "bn4b17_branch2b_scale": "BatchNormBackward279_bn_scale", "res4b8_branch2a_weights": "ConvNdBackward176_weights", "bn4b15_branch2b_offset": "BatchNormBackward257_bn_offset", "bn4b11_branch2b_offset": "BatchNormBackward213_bn_offset", "res5b_branch2c_weights": "ConvNdBackward360_weights", "bn4b16_branch2b_offset": "BatchNormBackward268_bn_offset", "bn4a_branch1_mean": "BatchNormBackward96_bn_mean", "bn4b18_branch2a_variance": "BatchNormBackward287_bn_variance", "bn2b_branch2b_scale": "BatchNormBackward22_bn_scale", "bn4b16_branch2b_scale": "BatchNormBackward268_bn_scale", "bn5a_branch2a_variance": "BatchNormBackward342_bn_variance", "bn4b18_branch2c_offset": "BatchNormBackward293_bn_offset", "bn4b6_branch2a_scale": "BatchNormBackward155_bn_scale", "res5a_branch1_weights": "ConvNdBackward350_weights", "bn2a_branch1_scale": "BatchNormBackward15_bn_scale", "bn4b1_branch2b_mean": "BatchNormBackward103_bn_mean", "res4b4_branch2b_weights": "ConvNdBackward135_weights", "res4b4_branch2c_weights": "ConvNdBackward138_weights", "res3a_branch2c_weights": "ConvNdBackward46_weights", "bn3a_branch1_scale": "BatchNormBackward50_bn_scale", "bn4b1_branch2c_variance": "BatchNormBackward106_bn_variance", "bn4b18_branch2c_variance": "BatchNormBackward293_bn_variance", "bn4b17_branch2a_scale": "BatchNormBackward276_bn_scale", "bn4b1_branch2b_offset": "BatchNormBackward103_bn_offset", "bn4b22_branch2b_offset": "BatchNormBackward334_bn_offset", "bn4b9_branch2b_scale": "BatchNormBackward191_bn_scale", "bn4b2_branch2c_variance": "BatchNormBackward117_bn_variance", "res4b17_branch2c_weights": "ConvNdBackward281_weights", "bn5c_branch2a_variance": "BatchNormBackward366_bn_variance", "bn4b11_branch2c_mean": "BatchNormBackward216_bn_mean", "bn2b_branch2c_scale": "BatchNormBackward25_bn_scale", "bn4a_branch2b_mean": "BatchNormBackward90_bn_mean", "bn4b7_branch2b_variance": "BatchNormBackward169_bn_variance", "bn4a_branch2a_scale": "BatchNormBackward87_bn_scale", "bn4b1_branch2a_variance": "BatchNormBackward100_bn_variance", "bn2a_branch2c_variance": "BatchNormBackward12_bn_variance", "bn4b3_branch2a_scale": "BatchNormBackward122_bn_scale", "bn4b13_branch2a_offset": "BatchNormBackward232_bn_offset", "res4a_branch2c_weights": "ConvNdBackward92_weights", "res3c_branch2c_weights": "ConvNdBackward70_weights", "bn4b11_branch2b_mean": "BatchNormBackward213_bn_mean", "res4b8_branch2c_weights": "ConvNdBackward182_weights", "bn2c_branch2c_scale": "BatchNormBackward36_bn_scale", "bn4b6_branch2b_mean": "BatchNormBackward158_bn_mean", "bn4b9_branch2c_variance": "BatchNormBackward194_bn_variance", "bn_conv1_mean": "BatchNormBackward2_bn_mean", "bn4b7_branch2a_variance": "BatchNormBackward166_bn_variance", "bn4b4_branch2c_offset": "BatchNormBackward139_bn_offset", "bn3c_branch2b_scale": "BatchNormBackward68_bn_scale", "bn4b20_branch2a_variance": "BatchNormBackward309_bn_variance", "bn2b_branch2b_variance": "BatchNormBackward22_bn_variance", "bn4b17_branch2b_offset": "BatchNormBackward279_bn_offset", "bn4b11_branch2c_scale": "BatchNormBackward216_bn_scale", "res5b_branch2b_weights": "ConvNdBackward357_weights", "bn4b8_branch2c_mean": "BatchNormBackward183_bn_mean", "bn2a_branch1_mean": "BatchNormBackward15_bn_mean", "bn4b20_branch2c_mean": "BatchNormBackward315_bn_mean", "bn4b11_branch2a_mean": "BatchNormBackward210_bn_mean", "bn4b21_branch2a_scale": "BatchNormBackward320_bn_scale", "bn4b7_branch2c_variance": "BatchNormBackward172_bn_variance", "bn4b2_branch2c_offset": "BatchNormBackward117_bn_offset", "bn4b12_branch2c_mean": "BatchNormBackward227_bn_mean", "bn4b17_branch2a_offset": "BatchNormBackward276_bn_offset", "bn4b2_branch2b_offset": "BatchNormBackward114_bn_offset", "bn4b22_branch2b_mean": "BatchNormBackward334_bn_mean", "res4b6_branch2c_weights": "ConvNdBackward160_weights", "bn4b1_branch2c_offset": "BatchNormBackward106_bn_offset", "bn4b12_branch2c_variance": "BatchNormBackward227_bn_variance", "bn4b13_branch2c_variance": "BatchNormBackward238_bn_variance", "bn3a_branch2b_offset": "BatchNormBackward44_bn_offset", "bn4b6_branch2a_mean": "BatchNormBackward155_bn_mean"} diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py new file mode 100644 index 00000000..b0ff2727 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py @@ -0,0 +1,190 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import numpy as np + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +from ..model import ModelBase +from .tsn_res_model import TSN_ResNet + +import logging +logger = logging.getLogger(__name__) + +__all__ = ["TSN"] + + +class TSN(ModelBase): + def __init__(self, name, cfg, mode='train'): + super(TSN, self).__init__(name, cfg, mode=mode) + self.get_config() + + def get_config(self): + self.num_classes = self.get_config_from_sec('model', 'num_classes') + self.seg_num = self.get_config_from_sec('model', 'seg_num') + self.seglen = self.get_config_from_sec('model', 'seglen') + self.image_mean = self.get_config_from_sec('model', 'image_mean') + self.image_std = self.get_config_from_sec('model', 'image_std') + self.num_layers = self.get_config_from_sec('model', 'num_layers') + + self.num_epochs = self.get_config_from_sec('train', 'epoch') + self.total_videos = self.get_config_from_sec('train', 'total_videos') + self.base_learning_rate = self.get_config_from_sec( + 'train', 'learning_rate') + self.learning_rate_decay = self.get_config_from_sec( + 'train', 'learning_rate_decay') + self.l2_weight_decay = self.get_config_from_sec('train', + 'l2_weight_decay') + self.momentum = self.get_config_from_sec('train', 'momentum') + + self.seg_num = self.get_config_from_sec(self.mode, 'seg_num', + self.seg_num) + self.target_size = self.get_config_from_sec(self.mode, 'target_size') + self.batch_size = self.get_config_from_sec(self.mode, 'batch_size') + + def build_input(self, use_dataloader=True): + image_shape = [3, self.target_size, self.target_size] + image_shape[0] = image_shape[0] * self.seglen + image_shape = [None, self.seg_num] + image_shape + self.use_dataloader = use_dataloader + + image = fluid.data(name='image', shape=image_shape, dtype='float32') + if self.mode != 'infer': + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + else: + label = None + + if use_dataloader: + assert self.mode != 'infer', \ + 'dataloader is not recommendated when infer, please set use_dataloader to be false.' + self.dataloader = fluid.io.DataLoader.from_generator( + feed_list=[image, label], capacity=4, iterable=True) + + self.feature_input = [image] + self.label_input = label + + def create_model_args(self): + cfg = {} + cfg['layers'] = self.num_layers + cfg['class_dim'] = self.num_classes + cfg['seg_num'] = self.seg_num + return cfg + + def build_model(self): + cfg = self.create_model_args() + videomodel = TSN_ResNet( + layers=cfg['layers'], + seg_num=cfg['seg_num'], + is_training=(self.mode == 'train')) + out = videomodel.net( + input=self.feature_input[0], class_dim=cfg['class_dim']) + self.feature_output = out + #self.network_outputs = [out] + + def optimizer(self): + assert self.mode == 'train', "optimizer only can be get in train mode" + epoch_points = [self.num_epochs / 3, self.num_epochs * 2 / 3] + total_videos = self.total_videos + step = int(total_videos / self.batch_size + 1) + bd = [e * step for e in epoch_points] + base_lr = self.base_learning_rate + lr_decay = self.learning_rate_decay + lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay] + l2_weight_decay = self.l2_weight_decay + momentum = self.momentum + optimizer = fluid.optimizer.Momentum( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr), + momentum=momentum, + regularization=fluid.regularizer.L2Decay(l2_weight_decay)) + + return optimizer + + def loss(self): + assert self.mode != 'infer', "invalid loss calculationg in infer mode" + cost = fluid.layers.cross_entropy(input=self.network_outputs[0], \ + label=self.label_input, ignore_index=-1) + self.loss_ = fluid.layers.mean(x=cost) + return self.loss_ + + def outputs(self): + return self.network_outputs + + def feeds(self): + return self.feature_input #if self.mode == 'infer' else self.feature_input + [ +# self.label_input +# ] + + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'test': + #losses = self.loss() + fetch_list = [self.feature_output, self.label_input] + elif self.mode == 'infer': + fetch_list = self.feature_output + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + return fetch_list + + def pretrain_info(self): + return ( + 'ResNet50_pretrained', + 'https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz' + ) + + def weights_info(self): + return ( + 'TSN.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/TSN.pdparams' + ) + + def load_pretrain_params(self, exe, pretrain, prog, place): + def is_parameter(var): + return isinstance(var, fluid.framework.Parameter) + + params_list = list(filter(is_parameter, prog.list_vars())) + for param in params_list: + print(param.name) + + logger.info( + "Load pretrain weights from {}, exclude fc layer.".format(pretrain)) + + state_dict = fluid.load_program_state(pretrain) + dict_keys = list(state_dict.keys()) + for name in dict_keys: + if "fc_0" in name: + del state_dict[name] + print('Delete {} from pretrained parameters. Do not load it'. + format(name)) + fluid.set_program_state(prog, state_dict) + + +# def load_test_weights(self, exe, weights, prog): +# def is_parameter(var): +# return isinstance(var, fluid.framework.Parameter) +# params_list = list(filter(is_parameter, prog.list_vars())) + +# state_dict = np.load(weights) +# for p in params_list: +# if p.name in state_dict.keys(): +# print('########### load param {} from file'.format(p.name)) +# else: +# print('----------- param {} not in file'.format(p.name)) +# fluid.set_program_state(prog, state_dict) +# fluid.save(prog, './model_weight/tsn') diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py new file mode 100644 index 00000000..c2e90fe4 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py @@ -0,0 +1,149 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import time +import sys +import paddle.fluid as fluid +import math + + +class TSN_ResNet(): + def __init__(self, layers=50, seg_num=7, is_training=True): + self.layers = 101 #layers + self.seg_num = seg_num + self.is_training = is_training + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=fluid.param_attr.ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + + return fluid.layers.batch_norm( + input=conv, + act=act, + is_test=(not self.is_training), + param_attr=fluid.param_attr.ParamAttr(name=bn_name + "_scale"), + bias_attr=fluid.param_attr.ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, num_filters * 4, stride, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + def net(self, input, class_dim=101): + layers = self.layers + seg_num = self.seg_num + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + # reshape input + channels = input.shape[2] + short_size = input.shape[3] + input = fluid.layers.reshape( + x=input, shape=[-1, channels, short_size, short_size]) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name='conv1') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + + feature = fluid.layers.reshape( + x=pool, shape=[-1, seg_num, pool.shape[1]]) + return feature diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/utils.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/utils.py new file mode 100644 index 00000000..3eead927 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/utils.py @@ -0,0 +1,47 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import wget +import tarfile + +__all__ = ['decompress', 'download', 'AttrDict'] + + +def decompress(path): + t = tarfile.open(path) + t.extractall(path=os.path.split(path)[0]) + t.close() + os.remove(path) + + +def download(url, path): + weight_dir = os.path.split(path)[0] + if not os.path.exists(weight_dir): + os.makedirs(weight_dir) + + path = path + ".tar.gz" + wget.download(url, path) + decompress(path) + + +class AttrDict(dict): + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/__init__.py new file mode 100644 index 00000000..d419ab75 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/__init__.py @@ -0,0 +1,5 @@ +from .reader_utils import regist_reader, get_reader +from .kinetics_reader import KineticsReader + +# regist reader, sort by alphabet +regist_reader("TSN", KineticsReader) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py new file mode 100644 index 00000000..ccf59ea2 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py @@ -0,0 +1,459 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import cv2 +import math +import random +import functools +try: + import cPickle as pickle + from cStringIO import StringIO +except ImportError: + import pickle + from io import BytesIO +import numpy as np +import paddle +from PIL import Image, ImageEnhance +import logging + +from .reader_utils import DataReader + +logger = logging.getLogger(__name__) +python_ver = sys.version_info + + +class KineticsReader(DataReader): + """ + Data reader for kinetics dataset of two format mp4 and pkl. + 1. mp4, the original format of kinetics400 + 2. pkl, the mp4 was decoded previously and stored as pkl + In both case, load the data, and then get the frame data in the form of numpy and label as an integer. + dataset cfg: format + num_classes + seg_num + short_size + target_size + num_reader_threads + buf_size + image_mean + image_std + batch_size + list + """ + + def __init__(self, name, mode, cfg): + super(KineticsReader, self).__init__(name, mode, cfg) + self.format = cfg.MODEL.format + self.num_classes = self.get_config_from_sec('model', 'num_classes') + self.seg_num = self.get_config_from_sec('model', 'seg_num') + self.seglen = self.get_config_from_sec('model', 'seglen') + + self.seg_num = self.get_config_from_sec(mode, 'seg_num', self.seg_num) + self.short_size = self.get_config_from_sec(mode, 'short_size') + self.target_size = self.get_config_from_sec(mode, 'target_size') + self.num_reader_threads = self.get_config_from_sec( + mode, 'num_reader_threads') + self.buf_size = self.get_config_from_sec(mode, 'buf_size') + + self.img_mean = np.array(cfg.MODEL.image_mean).reshape( + [3, 1, 1]).astype(np.float32) + self.img_std = np.array(cfg.MODEL.image_std).reshape([3, 1, 1]).astype( + np.float32) + # set batch size and file list + self.batch_size = cfg[mode.upper()]['batch_size'] + self.filelist = cfg[mode.upper()]['filelist'] + + def create_reader(self): + _reader = self._reader_creator(self.filelist, self.mode, seg_num=self.seg_num, seglen = self.seglen, \ + short_size = self.short_size, target_size = self.target_size, \ + img_mean = self.img_mean, img_std = self.img_std, \ + shuffle = (self.mode == 'train'), \ + num_threads = self.num_reader_threads, \ + buf_size = self.buf_size, format = self.format) + + def _batch_reader(): + batch_out = [] + for imgs, label in _reader(): + #for imgs in _reader(): + if imgs is None: + continue + batch_out.append((imgs, label)) + #batch_out.append((imgs,)) + if len(batch_out) == self.batch_size: + yield batch_out + batch_out = [] + + return _batch_reader + + def _inference_reader_creator(self, video_path, mode, seg_num, seglen, + short_size, target_size, img_mean, img_std): + def reader(): + try: + imgs = mp4_loader(video_path, seg_num, seglen, mode) + if len(imgs) < 1: + logger.error('{} frame length {} less than 1.'.format( + video_path, len(imgs))) + yield None, None + except: + logger.error('Error when loading {}'.format(video_path)) + yield None, None + + imgs_ret = imgs_transform(imgs, mode, seg_num, seglen, short_size, + target_size, img_mean, img_std) + label_ret = video_path + + yield imgs_ret, label_ret + + return reader + + def _reader_creator(self, + pickle_list, + mode, + seg_num, + seglen, + short_size, + target_size, + img_mean, + img_std, + shuffle=False, + num_threads=1, + buf_size=1024, + format='pkl'): + def decode_mp4(sample, mode, seg_num, seglen, short_size, target_size, + img_mean, img_std): + sample = sample[0].split(' ') + mp4_path = sample[0] + try: + imgs = mp4_loader(mp4_path, seg_num, seglen, mode) + if len(imgs) < 1: + logger.error('{} frame length {} less than 1.'.format( + mp4_path, len(imgs))) + return None, None + except: + logger.error('Error when loading {}'.format(mp4_path)) + return None, None + + return imgs_transform(imgs, mode, seg_num, seglen, \ + short_size, target_size, img_mean, img_std, name = self.name), mp4_path + + def decode_pickle(sample, mode, seg_num, seglen, short_size, + target_size, img_mean, img_std): + pickle_path = sample[0] + try: + if python_ver < (3, 0): + data_loaded = pickle.load(open(pickle_path, 'rb')) + else: + data_loaded = pickle.load( + open(pickle_path, 'rb'), encoding='bytes') + + vid, label, frames = data_loaded + if len(frames) < 1: + logger.error('{} frame length {} less than 1.'.format( + pickle_path, len(frames))) + return None, None + except: + logger.info('Error when loading {}'.format(pickle_path)) + return None, None + + if mode == 'train' or mode == 'valid' or mode == 'test': + ret_label = label + elif mode == 'infer': + ret_label = vid + + imgs = video_loader(frames, seg_num, seglen, mode) + return imgs_transform(imgs, mode, seg_num, seglen, \ + short_size, target_size, img_mean, img_std, name = self.name), ret_label + + def reader(): + # with open(pickle_list) as flist: + # lines = [line.strip() for line in flist] + lines = [line.strip() for line in pickle_list] + if shuffle: + random.shuffle(lines) + for line in lines: + pickle_path = line.strip() + yield [pickle_path] + + if format == 'pkl': + decode_func = decode_pickle + elif format == 'mp4': + decode_func = decode_mp4 + else: + raise "Not implemented format {}".format(format) + + mapper = functools.partial( + decode_func, + mode=mode, + seg_num=seg_num, + seglen=seglen, + short_size=short_size, + target_size=target_size, + img_mean=img_mean, + img_std=img_std) + + return paddle.reader.xmap_readers(mapper, reader, num_threads, buf_size) + + +def imgs_transform(imgs, + mode, + seg_num, + seglen, + short_size, + target_size, + img_mean, + img_std, + name=''): + imgs = group_scale(imgs, short_size) + + if mode == 'train': + if name == "TSM": + imgs = group_multi_scale_crop(imgs, short_size) + imgs = group_random_crop(imgs, target_size) + imgs = group_random_flip(imgs) + else: + imgs = group_center_crop(imgs, target_size) + + np_imgs = (np.array(imgs[0]).astype('float32').transpose( + (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255 + for i in range(len(imgs) - 1): + img = (np.array(imgs[i + 1]).astype('float32').transpose( + (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255 + np_imgs = np.concatenate((np_imgs, img)) + imgs = np_imgs + imgs -= img_mean + imgs /= img_std + imgs = np.reshape(imgs, (seg_num, seglen * 3, target_size, target_size)) + + return imgs + +def group_multi_scale_crop(img_group, target_size, scales=None, \ + max_distort=1, fix_crop=True, more_fix_crop=True): + scales = scales if scales is not None else [1, .875, .75, .66] + input_size = [target_size, target_size] + + im_size = img_group[0].size + + # get random crop offset + def _sample_crop_size(im_size): + image_w, image_h = im_size[0], im_size[1] + + base_size = min(image_w, image_h) + crop_sizes = [int(base_size * x) for x in scales] + crop_h = [ + input_size[1] if abs(x - input_size[1]) < 3 else x + for x in crop_sizes + ] + crop_w = [ + input_size[0] if abs(x - input_size[0]) < 3 else x + for x in crop_sizes + ] + + pairs = [] + for i, h in enumerate(crop_h): + for j, w in enumerate(crop_w): + if abs(i - j) <= max_distort: + pairs.append((w, h)) + + crop_pair = random.choice(pairs) + if not fix_crop: + w_offset = random.randint(0, image_w - crop_pair[0]) + h_offset = random.randint(0, image_h - crop_pair[1]) + else: + w_step = (image_w - crop_pair[0]) / 4 + h_step = (image_h - crop_pair[1]) / 4 + + ret = list() + ret.append((0, 0)) # upper left + if w_step != 0: + ret.append((4 * w_step, 0)) # upper right + if h_step != 0: + ret.append((0, 4 * h_step)) # lower left + if h_step != 0 and w_step != 0: + ret.append((4 * w_step, 4 * h_step)) # lower right + if h_step != 0 or w_step != 0: + ret.append((2 * w_step, 2 * h_step)) # center + + if more_fix_crop: + ret.append((0, 2 * h_step)) # center left + ret.append((4 * w_step, 2 * h_step)) # center right + ret.append((2 * w_step, 4 * h_step)) # lower center + ret.append((2 * w_step, 0 * h_step)) # upper center + + ret.append((1 * w_step, 1 * h_step)) # upper left quarter + ret.append((3 * w_step, 1 * h_step)) # upper right quarter + ret.append((1 * w_step, 3 * h_step)) # lower left quarter + ret.append((3 * w_step, 3 * h_step)) # lower righ quarter + + w_offset, h_offset = random.choice(ret) + + return crop_pair[0], crop_pair[1], w_offset, h_offset + + crop_w, crop_h, offset_w, offset_h = _sample_crop_size(im_size) + crop_img_group = [ + img.crop((offset_w, offset_h, offset_w + crop_w, offset_h + crop_h)) + for img in img_group + ] + ret_img_group = [ + img.resize((input_size[0], input_size[1]), Image.BILINEAR) + for img in crop_img_group + ] + + return ret_img_group + + +def group_random_crop(img_group, target_size): + w, h = img_group[0].size + th, tw = target_size, target_size + + assert (w >= target_size) and (h >= target_size), \ + "image width({}) and height({}) should be larger than crop size".format(w, h, target_size) + + out_images = [] + x1 = random.randint(0, w - tw) + y1 = random.randint(0, h - th) + + for img in img_group: + if w == tw and h == th: + out_images.append(img) + else: + out_images.append(img.crop((x1, y1, x1 + tw, y1 + th))) + + return out_images + + +def group_random_flip(img_group): + v = random.random() + if v < 0.5: + ret = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in img_group] + return ret + else: + return img_group + + +def group_center_crop(img_group, target_size): + img_crop = [] + for img in img_group: + w, h = img.size + th, tw = target_size, target_size + assert (w >= target_size) and (h >= target_size), \ + "image width({}) and height({}) should be larger than crop size".format(w, h, target_size) + x1 = int(round((w - tw) / 2.)) + y1 = int(round((h - th) / 2.)) + img_crop.append(img.crop((x1, y1, x1 + tw, y1 + th))) + + return img_crop + + +def group_scale(imgs, target_size): + resized_imgs = [] + for i in range(len(imgs)): + img = imgs[i] + w, h = img.size + if (w <= h and w == target_size) or (h <= w and h == target_size): + resized_imgs.append(img) + continue + + if w < h: + ow = target_size + oh = int(target_size * 4.0 / 3.0) + resized_imgs.append(img.resize((ow, oh), Image.BILINEAR)) + else: + oh = target_size + ow = int(target_size * 4.0 / 3.0) + resized_imgs.append(img.resize((ow, oh), Image.BILINEAR)) + + return resized_imgs + + +def imageloader(buf): + if isinstance(buf, str): + img = Image.open(StringIO(buf)) + else: + img = Image.open(BytesIO(buf)) + + return img.convert('RGB') + + +def video_loader(frames, nsample, seglen, mode): + videolen = len(frames) + average_dur = int(videolen / nsample) + + imgs = [] + for i in range(nsample): + idx = 0 + if mode == 'train': + if average_dur >= seglen: + idx = random.randint(0, average_dur - seglen) + idx += i * average_dur + elif average_dur >= 1: + idx += i * average_dur + else: + idx = i + else: + if average_dur >= seglen: + idx = (average_dur - seglen) // 2 + idx += i * average_dur + elif average_dur >= 1: + idx += i * average_dur + else: + idx = i + + for jj in range(idx, idx + seglen): + imgbuf = frames[int(jj % videolen)] + img = imageloader(imgbuf) + imgs.append(img) + + return imgs + + +def mp4_loader(filepath, nsample, seglen, mode): + cap = cv2.VideoCapture(filepath) + videolen = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + sampledFrames = [] + for i in range(videolen): + ret, frame = cap.read() + # maybe first frame is empty + if ret == False: + continue + img = frame[:, :, ::-1] + sampledFrames.append(img) + average_dur = int(len(sampledFrames) / nsample) + imgs = [] + for i in range(nsample): + idx = 0 + if mode == 'train': + if average_dur >= seglen: + idx = random.randint(0, average_dur - seglen) + idx += i * average_dur + elif average_dur >= 1: + idx += i * average_dur + else: + idx = i + else: + if average_dur >= seglen: + idx = (average_dur - 1) // 2 + idx += i * average_dur + elif average_dur >= 1: + idx += i * average_dur + else: + idx = i + + for jj in range(idx, idx + seglen): + imgbuf = sampledFrames[int(jj % len(sampledFrames))] + img = Image.fromarray(imgbuf, mode='RGB') + imgs.append(img) + + return imgs diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py new file mode 100644 index 00000000..b3741188 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py @@ -0,0 +1,81 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import pickle +import cv2 +import numpy as np +import random + + +class ReaderNotFoundError(Exception): + "Error: reader not found" + + def __init__(self, reader_name, avail_readers): + super(ReaderNotFoundError, self).__init__() + self.reader_name = reader_name + self.avail_readers = avail_readers + + def __str__(self): + msg = "Reader {} Not Found.\nAvailiable readers:\n".format( + self.reader_name) + for reader in self.avail_readers: + msg += " {}\n".format(reader) + return msg + + +class DataReader(object): + """data reader for video input""" + + def __init__(self, model_name, mode, cfg): + self.name = model_name + self.mode = mode + self.cfg = cfg + + def create_reader(self): + """Not implemented""" + pass + + def get_config_from_sec(self, sec, item, default=None): + if sec.upper() not in self.cfg: + return default + return self.cfg[sec.upper()].get(item, default) + + +class ReaderZoo(object): + def __init__(self): + self.reader_zoo = {} + + def regist(self, name, reader): + assert reader.__base__ == DataReader, "Unknow model type {}".format( + type(reader)) + self.reader_zoo[name] = reader + + def get(self, name, mode, cfg): + for k, v in self.reader_zoo.items(): + if k == name: + return v(name, mode, cfg) + raise ReaderNotFoundError(name, self.reader_zoo.keys()) + + +# singleton reader_zoo +reader_zoo = ReaderZoo() + + +def regist_reader(name, reader): + reader_zoo.regist(name, reader) + + +def get_reader(name, mode, cfg): + reader_model = reader_zoo.get(name, mode, cfg) + return reader_model.create_reader() diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py new file mode 100644 index 00000000..7be5ed7d --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py @@ -0,0 +1,75 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import yaml +from .utility import AttrDict +import logging +logger = logging.getLogger(__name__) + +CONFIG_SECS = [ + 'train', + 'valid', + 'test', + 'infer', +] + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + import yaml + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.Loader)) + create_attr_dict(yaml_config) + return yaml_config + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + return + + +def merge_configs(cfg, sec, args_dict): + assert sec in CONFIG_SECS, "invalid config section {}".format(sec) + sec_dict = getattr(cfg, sec.upper()) + for k, v in args_dict.items(): + if v is None: + continue + try: + if hasattr(sec_dict, k): + setattr(sec_dict, k, v) + except: + pass + return cfg + + +def print_configs(cfg, mode): + logger.info( + "---------------- {:>5} Arguments ----------------".format(mode)) + for sec, sec_items in cfg.items(): + logger.info("{}:".format(sec)) + for k, v in sec_items.items(): + logger.info(" {}:{}".format(k, v)) + logger.info("-------------------------------------------------") diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py new file mode 100644 index 00000000..d84d80af --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py @@ -0,0 +1,177 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import time +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid import profiler +import logging +import shutil + +logger = logging.getLogger(__name__) + + +def log_lr_and_step(): + try: + # In optimizers, if learning_rate is set as constant, lr_var + # name is 'learning_rate_0', and iteration counter is not + # recorded. If learning_rate is set as decayed values from + # learning_rate_scheduler, lr_var name is 'learning_rate', + # and iteration counter is recorded with name '@LR_DECAY_COUNTER@', + # better impliment is required here + lr_var = fluid.global_scope().find_var("learning_rate") + if not lr_var: + lr_var = fluid.global_scope().find_var("learning_rate_0") + lr = np.array(lr_var.get_tensor()) + + lr_count = '[-]' + lr_count_var = fluid.global_scope().find_var("@LR_DECAY_COUNTER@") + if lr_count_var: + lr_count = np.array(lr_count_var.get_tensor()) + logger.info( + "------- learning rate {}, learning rate counter {} -----".format( + np.array(lr), np.array(lr_count))) + except: + logger.warn("Unable to get learning_rate and LR_DECAY_COUNTER.") + + +def test_with_dataloader(exe, + compiled_test_prog, + test_dataloader, + test_fetch_list, + test_metrics, + log_interval=0, + save_model_name=''): + if not test_dataloader: + logger.error("[TEST] get dataloader failed.") + test_metrics.reset() + test_iter = 0 + + for data in test_dataloader(): + test_outs = exe.run( + compiled_test_prog, fetch_list=test_fetch_list, feed=data) + test_metrics.accumulate(test_outs) + if log_interval > 0 and test_iter % log_interval == 0: + test_metrics.calculate_and_log_out(test_outs, \ + info = '[TEST] test_iter {} '.format(test_iter)) + test_iter += 1 + test_metrics.finalize_and_log_out("[TEST] Finish") + + +def train_with_dataloader(exe, train_prog, compiled_train_prog, train_dataloader, \ + train_fetch_list, train_metrics, epochs = 10, \ + log_interval = 0, valid_interval = 0, save_dir = './', \ + save_model_name = 'model', fix_random_seed = False, \ + compiled_test_prog = None, test_dataloader = None, \ + test_fetch_list = None, test_metrics = None, \ + is_profiler = None, profiler_path = None): + if not train_dataloader: + logger.error("[TRAIN] get dataloader failed.") + epoch_periods = [] + train_loss = 0 + for epoch in range(epochs): + log_lr_and_step() + + train_iter = 0 + epoch_periods = [] + + for data in train_dataloader(): + cur_time = time.time() + train_outs = exe.run( + compiled_train_prog, fetch_list=train_fetch_list, feed=data) + period = time.time() - cur_time + epoch_periods.append(period) + if log_interval > 0 and (train_iter % log_interval == 0): + train_metrics.calculate_and_log_out(train_outs, \ + info = '[TRAIN] Epoch {}, iter {} '.format(epoch, train_iter)) + train_iter += 1 + + # NOTE: profiler tools, used for benchmark + if is_profiler and epoch == 0 and train_iter == log_interval: + profiler.start_profiler("All") + elif is_profiler and epoch == 0 and train_iter == log_interval + 5: + profiler.stop_profiler("total", profiler_path) + return + + if len(epoch_periods) < 1: + logger.info( + 'No iteration was executed, please check the data reader') + sys.exit(1) + + logger.info( + '[TRAIN] Epoch {} training finished, average time: {}'.format( + epoch, np.mean(epoch_periods[1:]))) + save_model( + exe, + train_prog, + save_dir, + save_model_name, + "_epoch{}".format(epoch), + save_type='.pdckpt') + save_model( + exe, + train_prog, + save_dir, + save_model_name, + "_epoch{}".format(epoch), + save_type='.pdparams') + if compiled_test_prog and valid_interval > 0 and ( + epoch + 1) % valid_interval == 0: + test_with_dataloader(exe, compiled_test_prog, test_dataloader, + test_fetch_list, test_metrics, log_interval, + save_model_name) + + save_model( + exe, + train_prog, + save_dir, + save_model_name, + '_final', + save_type='.pdckpt') + save_model( + exe, + train_prog, + save_dir, + save_model_name, + '_final', + save_type='.pdparams') + #when fix_random seed for debug + if fix_random_seed: + cards = os.environ.get('CUDA_VISIBLE_DEVICES') + gpu_num = len(cards.split(",")) + print("kpis\ttrain_cost_card{}\t{}".format(gpu_num, train_loss)) + print("kpis\ttrain_speed_card{}\t{}".format(gpu_num, + np.mean(epoch_periods))) + + +def save_model(exe, + program, + save_dir, + model_name, + postfix=None, + save_type='.pdckpt'): + """ + save_type: '.pdckpt' or '.pdparams', '.pdckpt' for all persistable variables, + '.pdparams' for parameters only + """ + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + saved_model_name = model_name + postfix + + fluid.save(program, os.path.join(save_dir, saved_model_name)) + + return diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/utility.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/utility.py new file mode 100644 index 00000000..ced1e7d7 --- /dev/null +++ b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/utility.py @@ -0,0 +1,71 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import signal +import logging +import paddle +import paddle.fluid as fluid + +__all__ = ['AttrDict'] + +logger = logging.getLogger(__name__) + + +def _term(sig_num, addition): + print('current pid is %s, group id is %s' % (os.getpid(), os.getpgrp())) + os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) + + +signal.signal(signal.SIGTERM, _term) +signal.signal(signal.SIGINT, _term) + + +class AttrDict(dict): + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + +def check_cuda(use_cuda, err = \ + "\nYou can not set use_gpu = True in the model because you are using paddlepaddle-cpu.\n \ + Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_gpu = False to run models on CPU.\n" + ): + try: + if use_cuda == True and fluid.is_compiled_with_cuda() == False: + print(err) + sys.exit(1) + except Exception as e: + pass + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.6 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." \ + + try: + fluid.require_version('1.6.0') + except Exception as e: + logger.error(err) + sys.exit(1) diff --git a/hub_module/scripts/configs/videotag_tsn_lstm.yml b/hub_module/scripts/configs/videotag_tsn_lstm.yml new file mode 100644 index 00000000..b0dca8a0 --- /dev/null +++ b/hub_module/scripts/configs/videotag_tsn_lstm.yml @@ -0,0 +1,9 @@ +name: videotag_tsn_lstm +dir: "modules/video/classification/videotag_tsn_lstm" +exclude: + - README.md +resources: + - + url: https://paddlehub.bj.bcebos.com/model/video/video_classifcation/videotag_tsn_lstm.tar.gz + dest: weights + uncompress: True -- GitLab