提交 90fce5a7 编写于 作者: Y yudongxu(许煜东)

Merge branch 'develop' of https://github.com/PaddlePaddle/models into pr_5291452

......@@ -12,7 +12,7 @@
## 安装
在当前目录下运行样例代码需要PadddlePaddle Fluid的develop或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/)中的说明来更新PaddlePaddle。
在当前目录下运行样例代码需要PadddlePaddle Fluid的1.8.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/)中的说明来更新PaddlePaddle。
## 简介
......@@ -27,16 +27,23 @@ RRPN是在Faster RCNN基础上拓展出的两阶段目标检测器,可用于
### 编译自定义OP
**注意:** 通过pip方式安装的PaddlePaddle由GCC 4.8编译得到,由于GCC 4.8和GCC 5以上C++11 ABI不兼容,您编写的自定义OP,需要通过GCC 4.8编译。若是GCC 5及以上的环境上使用自定义OP,推荐使用Docker安装PaddlePaddle,使得编Paddle和编译自定义OP的GCC版本相同。
自定义OP编译方式如下:
进入 `models/ext_op/src` 目录,执行编译脚本
```
cd models/ext_op/src
sh make.sh ${cuda_path} ${cudnn_path} ${nccl_path}
'''
```
其中${cuda_path}、$cudnn_path}和{nccl_path}分别为cuda、cudnn、nccl的安装路径,需通过命令行进行指定
成功编译后,`ext_op/src` 目录下将会生成 `rrpn_lib.so`
成功编译后,`ext_op/src` 目录下将会生成 `rrpn_lib.so`。
需要将`rrpn_lib.so`所在路径以及libpaddle_framework.so路径(即paddle.sysconfig.get_lib()得到路径)设置到环境变量LD_LIBRARY_PATH中:
```
# 假如rrpn_lib.so路径是:`rrpn/models/ext_op/src/`,对于Linux环境设置:
export LD_LIBRARY_PATH=rrpn/models/ext_op/src/:$( python -c 'import paddle; print(paddle.sysconfig.get_lib())'):$LD_LIBRARY_PATH
```
## 数据准备
### 公开数据集
[ICDAR2015数据集](https://rrc.cvc.uab.es/?ch=4&com=downloads)上进行训练,数据集需进入官网进行注册后方可下载。
......@@ -58,8 +65,8 @@ dataset/icdar2015/
│ ├── img_112.jpg
| ...
├── ch4_test_localization_transcription_gt
│ ├── img_111.jpg
│ ├── img_112.jpg
│ ├── img_111.txt
│ ├── img_112.txt
| ...
```
### 自定义数据
......@@ -88,7 +95,7 @@ x1, y1, x2, y2, x3, y3, x4, y4, class_name
python train.py \
--model_save_dir=output/ \
--pretrained_model=${path_to_pretrain_model} \
--data_dir=${path_to_data} \
--data_dir=${path_to_icdar2015} \
```
......@@ -126,7 +133,7 @@ x1, y1, x2, y2, x3, y3, x4, y4, class_name
```
python eval.py \
--dataset=icdar2015 \
--data_dir=${path_to_icdar2015} \
--pretrained_model=${path_to_trained_model}
```
......@@ -143,10 +150,6 @@ RRPN
| [RRPN](https://paddleseg.bj.bcebos.com/deploy/temp/model_final.tar) |8 | 17500 | 0.8048 |
## 模型推断及可视化
模型推断可以获取图像中的物体及其对应的类别,`infer.py`是主要执行程序,调用示例如下:
......
......@@ -41,6 +41,13 @@ def _load_state(path):
return state
def _strip_postfix(path):
path, ext = os.path.splitext(path)
assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
"Unknown postfix {} from weights".format(ext)
return path
def load_params(exe, prog, path):
"""
Load model from the given path.
......@@ -50,20 +57,33 @@ def load_params(exe, prog, path):
path (string): URL string or loca model path.
"""
if not os.path.exists(path):
path = _strip_postfix(path)
if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
raise ValueError("Model pretrain path {} does not "
"exists.".format(path))
logger.info('Loading parameters from {}...'.format(path))
def _if_exist(var):
param_exist = os.path.exists(os.path.join(path, var.name))
do_load = param_exist
if do_load:
logger.debug('load weight {}'.format(var.name))
return do_load
ignore_set = set()
state = _load_state(path)
fluid.io.load_vars(exe, path, prog, predicate=_if_exist)
# ignore the parameter which mismatch the shape
# between the model and pretrain weight.
all_var_shape = {}
for block in prog.blocks:
for param in block.all_parameters():
all_var_shape[param.name] = param.shape
ignore_set.update([
name for name, shape in all_var_shape.items()
if name in state and shape != state[name].shape
])
if len(ignore_set) > 0:
for k in ignore_set:
if k in state:
logger.warning('variable {} not used'.format(k))
del state[k]
fluid.io.set_program_state(prog, state)
def save(exe, prog, path):
......@@ -83,6 +103,7 @@ def save(exe, prog, path):
def load_and_fusebn(exe, prog, path):
"""
Fuse params of batch norm to scale and bias.
Args:
exe (fluid.Executor): The fluid.Executor object.
prog (fluid.Program): save weight from which Program object.
......@@ -104,19 +125,12 @@ def load_and_fusebn(exe, prog, path):
# x is any prefix
mean_variances = set()
bn_vars = []
state = None
if os.path.exists(path + '.pdparams'):
state = _load_state(path)
state = _load_state(path)
def check_mean_and_bias(prefix):
m = prefix + 'mean'
v = prefix + 'variance'
if state:
return v in state and m in state
else:
return (os.path.exists(os.path.join(path, m)) and
os.path.exists(os.path.join(path, v)))
return v in state and m in state
has_mean_bias = True
......@@ -156,16 +170,14 @@ def load_and_fusebn(exe, prog, path):
bn_vars.append(
[scale_name, bias_name, mean_name, variance_name])
if state:
fluid.io.set_program_state(prog, state)
else:
load_params(exe, prog, path)
if not has_mean_bias:
fluid.io.set_program_state(prog, state)
logger.warning(
"There is no paramters of batch norm in model {}. "
"Skip to fuse batch norm. And load paramters done.".format(path))
return
fluid.load(prog, path, exe)
eps = 1e-5
for names in bn_vars:
scale_name, bias_name, mean_name, var_name = names
......
......@@ -15,7 +15,7 @@
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.framework import Variable
fluid.load_op_library('models/ext_op/src/rrpn_lib.so')
fluid.load_op_library('rrpn_lib.so')
def rrpn_target_assign(bbox_pred,
......
......@@ -27,7 +27,7 @@ git clone https://github.com/NVlabs/cub.git
nvcc rrpn_generate_proposals_op.cu -c -o rrpn_generate_proposals_op.cu.o -ccbin cc -DPADDLE_WITH_MKLDNN -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O3 -DNVCC \
-I ${include_dir} \
-I ${include_dir}/third_party \
-I ${include_dir}/third_party \
-I ${CUDA}/include \
-I ${CUDNN}/include \
-I ${NCCL}/include \
......
......@@ -165,8 +165,7 @@ public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
std::unique_ptr<T> Apply() const override {
std::unique_ptr<T> op(new T);
void Apply(GradOpPtr<T> op) const override {
op->SetType("rrpn_rotated_roi_align_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("ROIs", this->Input("ROIs"));
......@@ -175,12 +174,11 @@ protected:
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetAttrMap(this->Attrs());
return op;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(
RRPNRotatedRoiAlignGradNoNeedBufVarsInferer, "X");
DECLARE_NO_NEED_BUFFER_VARS_INFERER(RRPNRotatedRoiAlignGradNoNeedBufVarsInferer,
"X");
} // namespace operators
} // namespace paddle
......
......@@ -96,7 +96,6 @@ def RRPNData(mode,
continue
batch_out.append(datas)
end = time.time()
#print('reader time:', end - start)
if len(batch_out) == batch_size:
yield batch_out
count += 1
......
......@@ -101,7 +101,6 @@ class ICDAR2015Dataset(object):
elif edge2 >= edge1:
width = edge2
height = edge1
# print pt2[0], pt3[0]
if pt2[0] - pt3[0] != 0:
angle = -np.arctan(
float(pt2[1] - pt3[1]) /
......@@ -160,7 +159,6 @@ class ICDAR2015Dataset(object):
else:
hard_boxes.append([x_ctr, y_ctr, width, height, angle])
#print(easy_boxes)
if self.mode == 'train':
boxes.extend(easy_boxes)
# hard box only get 1/3 for train
......@@ -173,8 +171,6 @@ class ICDAR2015Dataset(object):
is_difficult = [0] * len(easy_boxes)
is_difficult.extend([1] * int(len(hard_boxes)))
len_of_bboxes = len(boxes)
#is_difficult = [0] * len(easy_boxes)
#is_difficult.extend([1] * int(len(hard_boxes)))
is_difficult = np.array(is_difficult).reshape(
1, len_of_bboxes).astype(np.int32)
if self.mode == 'train':
......@@ -221,11 +217,9 @@ class ICDAR2017Dataset(object):
def __init__(self, mode):
print('Creating: {}'.format(cfg.dataset))
self.name = cfg.data_dir
#print('**************', self.name)
self.mode = mode
data_path = DatasetPath(mode, self.name)
data_dir = data_path.get_data_dir()
#print("&**************", data_dir)
file_list = data_path.get_file_list()
self.image_dir = data_dir
self.gt_dir = file_list
......@@ -245,15 +239,12 @@ class ICDAR2017Dataset(object):
labels_map = get_labels_maps()
for image in image_list:
prefix = image[:-4]
#print(image)
if image.split('.')[-1] not in post_fix:
continue
img_name = os.path.join(self.image_dir, image)
gt_name = os.path.join(self.gt_dir, 'gt_' + prefix + '.txt')
gt_classes = []
#boxes = []
#hard_boxes = []
boxes = []
gt_obj = open(gt_name, 'r', encoding='UTF-8-sig')
gt_txt = gt_obj.read()
......@@ -293,7 +284,6 @@ class ICDAR2017Dataset(object):
elif edge2 >= edge1:
width = edge2
height = edge1
# print pt2[0], pt3[0]
if pt2[0] - pt3[0] != 0:
angle = -np.arctan(
float(pt2[1] - pt3[1]) /
......@@ -312,7 +302,6 @@ class ICDAR2017Dataset(object):
else:
boxes.append([x_ctr, y_ctr, width, height, angle])
len_of_bboxes = len(boxes)
#print(len_of_bboxes)
is_difficult = np.zeros((len_of_bboxes, 1), dtype=np.int32)
if self.mode == 'train':
gt_boxes = np.zeros((len_of_bboxes, 5), dtype=np.int32)
......@@ -332,7 +321,6 @@ class ICDAR2017Dataset(object):
boxes[idx][3], boxes[idx][4], boxes[idx][5],
boxes[idx][6], boxes[idx][7]
]
#gt_classes[idx] = 1
if gt_boxes.shape[0] <= 0:
continue
gt_boxes = gt_boxes.astype(np.float64)
......
......@@ -154,7 +154,7 @@ def parse_args():
add_arg('pixel_means', float, [0.485, 0.456, 0.406], "pixel mean")
add_arg('nms_thresh', float, 0.3, "NMS threshold.")
add_arg('score_thresh', float, 0.01, "score threshold for NMS.")
add_arg('snapshot_stride', int, 1000, "save model every snapshot stride.")
add_arg('snapshot_iter', int, 1000, "save model every snapshot iter.")
# SINGLE EVAL AND DRAW
add_arg('draw_threshold', float, 0.8, "Confidence threshold to draw bbox.")
add_arg('image_path', str, 'ICDAR2015/tmp/', "The image path used to inference and visualize.")
......
# VideoTag 飞桨大规模视频分类模型
---
## 内容
- [模型简介](#模型简介)
- [安装说明](#安装说明)
- [数据准备](#数据准备)
- [模型推断](#模型推断)
- [模型微调](#模型微调)
- [参考论文](#参考论文)
## 模型简介
飞桨大规模视频分类模型VideoTag基于百度短视频业务千万级数据,支持3000个源于产业实践的实用标签,具有良好的泛化能力,非常适用于国内大规模(千万/亿/十亿级别)短视频分类场景的应用。VideoTag采用两阶段建模方式,即图像建模和序列学习。第一阶段,使用少量视频样本(十万级别)训练大规模视频特征提取模型(Extractor);第二阶段,使用千万级数据训练预测器(Predictor),最终实现在超大规模(千万/亿/十亿级别)短视频上产业应用,其原理示意如下图所示。
<p align="center">
<img src="video_tag.png" height=220 width=800 hspace='10'/> <br />
Temporal shift module
</p>
- 数据处理:视频是按特定顺序排列的一组图像的集合,这些图像也称为帧。视频分类任务需要先对短视频进行解码,然后再将输出的图像帧序列灌入到VideoTag中进行训练和预测。
- 图像建模:先从训练数据中,对每个类别均匀采样少量样本数据,构成十万量级的训练视频。然后使用TSN网络进行训练,提取所有视频帧的TSN模型分类层前一层的特征数据。在这个过程中,每一帧都被转化成相应的特征向量,一段视频被转化成一个特征序列。
- 序列学习:采用Attention clusters、LSTM和Nextvlad对特征序列进行建模,学习各个特征之间的组合方式,进一步提高模型准确率。由于序列学习相比于图像建模耗时更短,因此可以融合多个具有互补性的序列模型。示例代码仅使用Attention\_LSTM网络进行序列特征预测。
- 预测结果:融合多个模型结果实现视频分类,进一步提高分类准确率。
## 安装说明
运行样例代码需要PaddlePaddle版本>= 1.7.0,请参考[安装文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.7/install/index_cn.html)安装PaddlePaddle。
- 环境依赖:
```
CUDA >= 9.0
cudnn >= 7.5
OpenCV >= 4.1.0 : pip install opencv-python
```
## 数据准备
- 预训练权重下载:我们提供了[TSN](https://videotag.bj.bcebos.com/video_tag_tsn.tar)[AttentionLSTM](https://videotag.bj.bcebos.com/video_tag_lstm.tar)预训练权重,请下载后解压,并将参数文件放在weights目录下,目录结构如下:
```
video_tag
├──weights
├── attention_lstm.pdmodel
├── attention_lstm.pdopt
├── attention_lstm.pdparams
├── tsn.pdmodel
├── tsn.pdopt
└── tsn.pdparams
```
- 示例视频下载:我们提供了[样例视频](https://videotag.bj.bcebos.com/mp4.tar)方便用户测试,请下载后解压,并将视频文件放置在video\_tag/data/mp4目录下,目录结构如下:
```
video_tag
├──data
├── mp4
├── 1.mp4
└── 2.mp4
```
- 目前支持的视频文件输入格式为:mp4、mkv和webm格式;
- 模型会从输入的视频文件中均匀抽取300帧用于预测。对于较长的视频文件,建议先截取有效部分输入模型以提高预测速度。
## 模型推断
模型推断的启动方式如下:
bash run_TSN_LSTM.sh
- 可修改video\_tag/data/tsn.list文件内容,指定待推断的文件路径列表;
- 通过--filelist可指定输入list文件路径,默认为video\_tag/data/tsn.list;
- 通过--extractor\_weights可指定特征提取器参数的存储路径,默认为video\_tag/weights/tsn;
- 通过--predictor\_weights可指定预测器参数的存储路径,默认为video\_tag/weights/attention\_lstm;
- 通过--use\_gpu参数可指定是否使用gpu进行推断,默认使用gpu。对于10s左右的短视频文件,gpu推断时间约为4s;
- 通过--save\_dir可指定预测结果存储路径,默认为video\_tag/data/results,结果保存在json文件中,其格式为:
```
[file_path,
{"class_name": class_name1, "probability": probability1, "class_id": class_id1},
{"class_name": class_name2, "probability": probability2, "class_id": class_id2},
...
]
```
- 通过--label\_file可指定标签文件存储路径,默认为video\_tag/label\_3396.txt;
- 模型相关配置写在video\_tag/configs目录下的yaml文件中。
## 模型微调
- VideoTag中的TSN模型只输出视频特征,无需输出最终分类结果,fine-tune请参考PaddleCV视频库[TSN视频分类模型](../../models/tsn/README.md)请对应修改模型文件。
- VideoTag中的attention\_lstm模型只需要输入视频特征,无需音频特征输入,fine-tune请参考PaddleCV视频库[AttentionLSTM视频分类模型](../../models/attention_lstm/README.md)对应修改模型文件。
## 参考论文
- [Temporal Segment Networks: Towards Good Practices for Deep Action Recognition](https://arxiv.org/abs/1608.00859), Limin Wang, Yuanjun Xiong, Zhe Wang, Yu Qiao, Dahua Lin, Xiaoou Tang, Luc Van Gool
- [Beyond Short Snippets: Deep Networks for Video Classification](https://arxiv.org/abs/1503.08909) Joe Yue-Hei Ng, Matthew Hausknecht, Sudheendra Vijayanarasimhan, Oriol Vinyals, Rajat Monga, George Toderici
MODEL:
name: "AttentionLSTM"
dataset: None
bone_nework: None
drop_rate: 0.5
feature_num: 2
feature_names: ['rgb']
feature_dims: [2048]
embedding_size: 1024
lstm_size: 512
num_classes: 3396
topk: 20
INFER:
batch_size: 1
MODEL:
name: "TSN"
format: "mp4"
num_classes: 400
seglen: 1
image_mean: [0.485, 0.456, 0.406]
image_std: [0.229, 0.224, 0.225]
num_layers: 50
topk: 5
INFER:
seg_num: 300
short_size: 256
target_size: 224
num_reader_threads: 1
buf_size: 1024
batch_size: 1
kinetics_labels: None
video_path: ""
filelist: "./data/tsn.list"
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
import os
import io
import logging
import numpy as np
import json
from metrics.youtube8m import eval_util as youtube8m_metrics
logger = logging.getLogger(__name__)
class Metrics(object):
def __init__(self, name, mode, metrics_args):
"""Not implemented"""
pass
def calculate_and_log_out(self, fetch_list, info=''):
"""Not implemented"""
pass
def accumulate(self, fetch_list, info=''):
"""Not implemented"""
pass
def finalize_and_log_out(self, info='', savedir='./'):
"""Not implemented"""
pass
def reset(self):
"""Not implemented"""
pass
class Youtube8mMetrics(Metrics):
def __init__(self, name, mode, metrics_args):
self.name = name
self.mode = mode
self.num_classes = metrics_args['MODEL']['num_classes']
self.topk = metrics_args['MODEL']['topk']
self.calculator = youtube8m_metrics.EvaluationMetrics(self.num_classes,
self.topk)
if self.mode == 'infer':
self.infer_results = []
def calculate_and_log_out(self, fetch_list, info=''):
loss = np.mean(np.array(fetch_list[0]))
pred = np.array(fetch_list[1])
label = np.array(fetch_list[2])
hit_at_one = youtube8m_metrics.calculate_hit_at_one(pred, label)
perr = youtube8m_metrics.calculate_precision_at_equal_recall_rate(pred,
label)
gap = youtube8m_metrics.calculate_gap(pred, label)
logger.info(info + ' , loss = {0}, Hit@1 = {1}, PERR = {2}, GAP = {3}'.format(\
'%.6f' % loss, '%.2f' % hit_at_one, '%.2f' % perr, '%.2f' % gap))
def accumulate(self, fetch_list, info=''):
if self.mode == 'infer':
predictions = np.array(fetch_list[0])
video_id = fetch_list[1]
for i in range(len(predictions)):
topk_inds = predictions[i].argsort()[0 - self.topk:]
topk_inds = topk_inds[::-1]
preds = predictions[i][topk_inds]
self.infer_results.append(
(video_id[i], topk_inds.tolist(), preds.tolist()))
else:
loss = np.array(fetch_list[0])
pred = np.array(fetch_list[1])
label = np.array(fetch_list[2])
self.calculator.accumulate(loss, pred, label)
def finalize_and_log_out(self,
info='',
savedir='./data/results',
label_file='./label_3396.txt'):
if self.mode == 'infer':
for index, item in enumerate(self.infer_results):
video_id = item[0]
logger.info(
'========video_id [ {} ] , topk({}) preds: ========\n'.
format(video_id, self.topk))
f = io.open(label_file, "r", encoding="utf-8")
fl = f.readlines()
res_list = []
res_list.append(video_id)
for i in range(len(item[1])):
class_id = item[1][i]
class_prob = item[2][i]
class_name = fl[class_id].split('\n')[0]
print('class_id: {},'.format(class_id), 'class_name:',
class_name,
', probability: {} \n'.format(class_prob))
save_dict = {
"'class_id": class_id,
"class_name": class_name,
"probability": class_prob
}
res_list.append(save_dict)
# save infer result into output dir
with io.open(
os.path.join(savedir, 'result' + str(index) + '.json'),
'w',
encoding='utf-8') as f:
f.write(json.dumps(res_list, f, ensure_ascii=False))
else:
epoch_info_dict = self.calculator.get()
logger.info(info + '\tavg_hit_at_one: {0},\tavg_perr: {1},\tavg_loss :{2},\taps: {3},\tgap:{4}'\
.format(epoch_info_dict['avg_hit_at_one'], epoch_info_dict['avg_perr'], \
epoch_info_dict['avg_loss'], epoch_info_dict['aps'], epoch_info_dict['gap']))
def reset(self):
self.calculator.clear()
if self.mode == 'infer':
self.infer_results = []
class MetricsZoo(object):
def __init__(self):
self.metrics_zoo = {}
def regist(self, name, metrics):
assert metrics.__base__ == Metrics, "Unknow model type {}".format(
type(metrics))
self.metrics_zoo[name] = metrics
def get(self, name, mode, cfg):
for k, v in self.metrics_zoo.items():
if k == name:
return v(name, mode, cfg)
raise MetricsNotFoundError(name, self.metrics_zoo.keys())
# singleton metrics_zoo
metrics_zoo = MetricsZoo()
def regist_metrics(name, metrics):
metrics_zoo.regist(name, metrics)
def get_metrics(name, mode, cfg):
return metrics_zoo.get(name, mode, cfg)
# sort by alphabet
regist_metrics("ATTENTIONCLUSTER", Youtube8mMetrics)
regist_metrics("ATTENTIONLSTM", Youtube8mMetrics)
regist_metrics("NEXTVLAD", Youtube8mMetrics)
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calculate or keep track of the interpolated average precision.
It provides an interface for calculating interpolated average precision for an
entire list or the top-n ranked items. For the definition of the
(non-)interpolated average precision:
http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf
Example usages:
1) Use it as a static function call to directly calculate average precision for
a short ranked list in the memory.
```
import random
p = np.array([random.random() for _ in xrange(10)])
a = np.array([random.choice([0, 1]) for _ in xrange(10)])
ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a)
```
2) Use it as an object for long ranked list that cannot be stored in memory or
the case where partial predictions can be observed at a time (Tensorflow
predictions). In this case, we first call the function accumulate many times
to process parts of the ranked list. After processing all the parts, we call
peek_interpolated_ap_at_n.
```
p1 = np.array([random.random() for _ in xrange(5)])
a1 = np.array([random.choice([0, 1]) for _ in xrange(5)])
p2 = np.array([random.random() for _ in xrange(5)])
a2 = np.array([random.choice([0, 1]) for _ in xrange(5)])
# interpolated average precision at 10 using 1000 break points
calculator = average_precision_calculator.AveragePrecisionCalculator(10)
calculator.accumulate(p1, a1)
calculator.accumulate(p2, a2)
ap3 = calculator.peek_ap_at_n()
```
"""
import heapq
import random
import numbers
import numpy
class AveragePrecisionCalculator(object):
"""Calculate the average precision and average precision at n."""
def __init__(self, top_n=None):
"""Construct an AveragePrecisionCalculator to calculate average precision.
This class is used to calculate the average precision for a single label.
Args:
top_n: A positive Integer specifying the average precision at n, or
None to use all provided data points.
Raises:
ValueError: An error occurred when the top_n is not a positive integer.
"""
if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None):
raise ValueError("top_n must be a positive integer or None.")
self._top_n = top_n # average precision at n
self._total_positives = 0 # total number of positives have seen
self._heap = [] # max heap of (prediction, actual)
@property
def heap_size(self):
"""Gets the heap size maintained in the class."""
return len(self._heap)
@property
def num_accumulated_positives(self):
"""Gets the number of positive samples that have been accumulated."""
return self._total_positives
def accumulate(self, predictions, actuals, num_positives=None):
"""Accumulate the predictions and their ground truth labels.
After the function call, we may call peek_ap_at_n to actually calculate
the average precision.
Note predictions and actuals must have the same shape.
Args:
predictions: a list storing the prediction scores.
actuals: a list storing the ground truth labels. Any value
larger than 0 will be treated as positives, otherwise as negatives.
num_positives = If the 'predictions' and 'actuals' inputs aren't complete,
then it's possible some true positives were missed in them. In that case,
you can provide 'num_positives' in order to accurately track recall.
Raises:
ValueError: An error occurred when the format of the input is not the
numpy 1-D array or the shape of predictions and actuals does not match.
"""
if len(predictions) != len(actuals):
raise ValueError(
"the shape of predictions and actuals does not match.")
if not num_positives is None:
if not isinstance(num_positives,
numbers.Number) or num_positives < 0:
raise ValueError(
"'num_positives' was provided but it wan't a nonzero number."
)
if not num_positives is None:
self._total_positives += num_positives
else:
self._total_positives += numpy.size(numpy.where(actuals > 0))
topk = self._top_n
heap = self._heap
for i in range(numpy.size(predictions)):
if topk is None or len(heap) < topk:
heapq.heappush(heap, (predictions[i], actuals[i]))
else:
if predictions[i] > heap[0][0]: # heap[0] is the smallest
heapq.heappop(heap)
heapq.heappush(heap, (predictions[i], actuals[i]))
def clear(self):
"""Clear the accumulated predictions."""
self._heap = []
self._total_positives = 0
def peek_ap_at_n(self):
"""Peek the non-interpolated average precision at n.
Returns:
The non-interpolated average precision at n (default 0).
If n is larger than the length of the ranked list,
the average precision will be returned.
"""
if self.heap_size <= 0:
return 0
predlists = numpy.array(list(zip(*self._heap)))
ap = self.ap_at_n(
predlists[0],
predlists[1],
n=self._top_n,
total_num_positives=self._total_positives)
return ap
@staticmethod
def ap(predictions, actuals):
"""Calculate the non-interpolated average precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
actuals: a numpy 1-D array storing the ground truth labels. Any value
larger than 0 will be treated as positives, otherwise as negatives.
Returns:
The non-interpolated average precision at n.
If n is larger than the length of the ranked list,
the average precision will be returned.
Raises:
ValueError: An error occurred when the format of the input is not the
numpy 1-D array or the shape of predictions and actuals does not match.
"""
return AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None)
@staticmethod
def ap_at_n(predictions, actuals, n=20, total_num_positives=None):
"""Calculate the non-interpolated average precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
actuals: a numpy 1-D array storing the ground truth labels. Any value
larger than 0 will be treated as positives, otherwise as negatives.
n: the top n items to be considered in ap@n.
total_num_positives : (optionally) you can specify the number of total
positive
in the list. If specified, it will be used in calculation.
Returns:
The non-interpolated average precision at n.
If n is larger than the length of the ranked list,
the average precision will be returned.
Raises:
ValueError: An error occurred when
1) the format of the input is not the numpy 1-D array;
2) the shape of predictions and actuals does not match;
3) the input n is not a positive integer.
"""
if len(predictions) != len(actuals):
raise ValueError(
"the shape of predictions and actuals does not match.")
if n is not None:
if not isinstance(n, int) or n <= 0:
raise ValueError("n must be 'None' or a positive integer."
" It was '%s'." % n)
ap = 0.0
predictions = numpy.array(predictions)
actuals = numpy.array(actuals)
# add a shuffler to avoid overestimating the ap
predictions, actuals = AveragePrecisionCalculator._shuffle(predictions,
actuals)
sortidx = sorted(
range(len(predictions)), key=lambda k: predictions[k], reverse=True)
if total_num_positives is None:
numpos = numpy.size(numpy.where(actuals > 0))
else:
numpos = total_num_positives
if numpos == 0:
return 0
if n is not None:
numpos = min(numpos, n)
delta_recall = 1.0 / numpos
poscount = 0.0
# calculate the ap
r = len(sortidx)
if n is not None:
r = min(r, n)
for i in range(r):
if actuals[sortidx[i]] > 0:
poscount += 1
ap += poscount / (i + 1) * delta_recall
return ap
@staticmethod
def _shuffle(predictions, actuals):
random.seed(0)
suffidx = random.sample(range(len(predictions)), len(predictions))
predictions = predictions[suffidx]
actuals = actuals[suffidx]
return predictions, actuals
@staticmethod
def _zero_one_normalize(predictions, epsilon=1e-7):
"""Normalize the predictions to the range between 0.0 and 1.0.
For some predictions like SVM predictions, we need to normalize them before
calculate the interpolated average precision. The normalization will not
change the rank in the original list and thus won't change the average
precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
epsilon: a small constant to avoid denominator being zero.
Returns:
The normalized prediction.
"""
denominator = numpy.max(predictions) - numpy.min(predictions)
ret = (predictions - numpy.min(predictions)) / numpy.max(denominator,
epsilon)
return ret
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides functions to help with evaluating models."""
import datetime
import numpy
from . import mean_average_precision_calculator as map_calculator
from . import average_precision_calculator as ap_calculator
def flatten(l):
""" Merges a list of lists into a single list. """
return [item for sublist in l for item in sublist]
def calculate_hit_at_one(predictions, actuals):
"""Performs a local (numpy) calculation of the hit at one.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
Returns:
float: The average hit at one across the entire batch.
"""
top_prediction = numpy.argmax(predictions, 1)
hits = actuals[numpy.arange(actuals.shape[0]), top_prediction]
return numpy.average(hits)
def calculate_precision_at_equal_recall_rate(predictions, actuals):
"""Performs a local (numpy) calculation of the PERR.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
Returns:
float: The average precision at equal recall rate across the entire batch.
"""
aggregated_precision = 0.0
num_videos = actuals.shape[0]
for row in numpy.arange(num_videos):
num_labels = int(numpy.sum(actuals[row]))
top_indices = numpy.argpartition(predictions[row],
-num_labels)[-num_labels:]
item_precision = 0.0
for label_index in top_indices:
if predictions[row][label_index] > 0:
item_precision += actuals[row][label_index]
item_precision /= top_indices.size
aggregated_precision += item_precision
aggregated_precision /= num_videos
return aggregated_precision
def calculate_gap(predictions, actuals, top_k=20):
"""Performs a local (numpy) calculation of the global average precision.
Only the top_k predictions are taken for each of the videos.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
top_k: How many predictions to use per video.
Returns:
float: The global average precision.
"""
gap_calculator = ap_calculator.AveragePrecisionCalculator()
sparse_predictions, sparse_labels, num_positives = top_k_by_class(
predictions, actuals, top_k)
gap_calculator.accumulate(
flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
return gap_calculator.peek_ap_at_n()
def top_k_by_class(predictions, labels, k=20):
"""Extracts the top k predictions for each video, sorted by class.
Args:
predictions: A numpy matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
k: the top k non-zero entries to preserve in each prediction.
Returns:
A tuple (predictions,labels, true_positives). 'predictions' and 'labels'
are lists of lists of floats. 'true_positives' is a list of scalars. The
length of the lists are equal to the number of classes. The entries in the
predictions variable are probability predictions, and
the corresponding entries in the labels variable are the ground truth for
those predictions. The entries in 'true_positives' are the number of true
positives for each class in the ground truth.
Raises:
ValueError: An error occurred when the k is not a positive integer.
"""
if k <= 0:
raise ValueError("k must be a positive integer.")
k = min(k, predictions.shape[1])
num_classes = predictions.shape[1]
prediction_triplets = []
for video_index in range(predictions.shape[0]):
prediction_triplets.extend(
top_k_triplets(predictions[video_index], labels[video_index], k))
out_predictions = [[] for v in range(num_classes)]
out_labels = [[] for v in range(num_classes)]
for triplet in prediction_triplets:
out_predictions[triplet[0]].append(triplet[1])
out_labels[triplet[0]].append(triplet[2])
out_true_positives = [numpy.sum(labels[:, i]) for i in range(num_classes)]
return out_predictions, out_labels, out_true_positives
def top_k_triplets(predictions, labels, k=20):
"""Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
(prediction, class) format"""
m = len(predictions)
k = min(k, m)
indices = numpy.argpartition(predictions, -k)[-k:]
return [(index, predictions[index], labels[index]) for index in indices]
class EvaluationMetrics(object):
"""A class to store the evaluation metrics."""
def __init__(self, num_class, top_k):
"""Construct an EvaluationMetrics object to store the evaluation metrics.
Args:
num_class: A positive integer specifying the number of classes.
top_k: A positive integer specifying how many predictions are considered per video.
Raises:
ValueError: An error occurred when MeanAveragePrecisionCalculator cannot
not be constructed.
"""
self.sum_hit_at_one = 0.0
self.sum_perr = 0.0
self.sum_loss = 0.0
self.map_calculator = map_calculator.MeanAveragePrecisionCalculator(
num_class)
self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator()
self.top_k = top_k
self.num_examples = 0
#def accumulate(self, predictions, labels, loss):
def accumulate(self, loss, predictions, labels):
"""Accumulate the metrics calculated locally for this mini-batch.
Args:
predictions: A numpy matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
labels: A numpy matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
loss: A numpy array containing the loss for each sample.
Returns:
dictionary: A dictionary storing the metrics for the mini-batch.
Raises:
ValueError: An error occurred when the shape of predictions and actuals
does not match.
"""
batch_size = labels.shape[0]
mean_hit_at_one = calculate_hit_at_one(predictions, labels)
mean_perr = calculate_precision_at_equal_recall_rate(predictions,
labels)
mean_loss = numpy.mean(loss)
# Take the top 20 predictions.
sparse_predictions, sparse_labels, num_positives = top_k_by_class(
predictions, labels, self.top_k)
self.map_calculator.accumulate(sparse_predictions, sparse_labels,
num_positives)
self.global_ap_calculator.accumulate(
flatten(sparse_predictions),
flatten(sparse_labels), sum(num_positives))
self.num_examples += batch_size
self.sum_hit_at_one += mean_hit_at_one * batch_size
self.sum_perr += mean_perr * batch_size
self.sum_loss += mean_loss * batch_size
return {
"hit_at_one": mean_hit_at_one,
"perr": mean_perr,
"loss": mean_loss
}
def get(self):
"""Calculate the evaluation metrics for the whole epoch.
Raises:
ValueError: If no examples were accumulated.
Returns:
dictionary: a dictionary storing the evaluation metrics for the epoch. The
dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and
aps (default nan).
"""
if self.num_examples <= 0:
raise ValueError("total_sample must be positive.")
avg_hit_at_one = self.sum_hit_at_one / self.num_examples
avg_perr = self.sum_perr / self.num_examples
avg_loss = self.sum_loss / self.num_examples
aps = self.map_calculator.peek_map_at_n()
gap = self.global_ap_calculator.peek_ap_at_n()
epoch_info_dict = {}
return {
"avg_hit_at_one": avg_hit_at_one,
"avg_perr": avg_perr,
"avg_loss": avg_loss,
"aps": aps,
"gap": gap
}
def clear(self):
"""Clear the evaluation metrics and reset the EvaluationMetrics object."""
self.sum_hit_at_one = 0.0
self.sum_perr = 0.0
self.sum_loss = 0.0
self.map_calculator.clear()
self.global_ap_calculator.clear()
self.num_examples = 0
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calculate the mean average precision.
It provides an interface for calculating mean average precision
for an entire list or the top-n ranked items.
Example usages:
We first call the function accumulate many times to process parts of the ranked
list. After processing all the parts, we call peek_map_at_n
to calculate the mean average precision.
```
import random
p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)])
a = np.array([[random.choice([0, 1]) for _ in xrange(50)]
for _ in xrange(1000)])
# mean average precision for 50 classes.
calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator(
num_class=50)
calculator.accumulate(p, a)
aps = calculator.peek_map_at_n()
```
"""
import numpy
from . import average_precision_calculator
class MeanAveragePrecisionCalculator(object):
"""This class is to calculate mean average precision.
"""
def __init__(self, num_class):
"""Construct a calculator to calculate the (macro) average precision.
Args:
num_class: A positive Integer specifying the number of classes.
top_n_array: A list of positive integers specifying the top n for each
class. The top n in each class will be used to calculate its average
precision at n.
The size of the array must be num_class.
Raises:
ValueError: An error occurred when num_class is not a positive integer;
or the top_n_array is not a list of positive integers.
"""
if not isinstance(num_class, int) or num_class <= 1:
raise ValueError("num_class must be a positive integer.")
self._ap_calculators = [] # member of AveragePrecisionCalculator
self._num_class = num_class # total number of classes
for i in range(num_class):
self._ap_calculators.append(
average_precision_calculator.AveragePrecisionCalculator())
def accumulate(self, predictions, actuals, num_positives=None):
"""Accumulate the predictions and their ground truth labels.
Args:
predictions: A list of lists storing the prediction scores. The outer
dimension corresponds to classes.
actuals: A list of lists storing the ground truth labels. The dimensions
should correspond to the predictions input. Any value
larger than 0 will be treated as positives, otherwise as negatives.
num_positives: If provided, it is a list of numbers representing the
number of true positives for each class. If not provided, the number of
true positives will be inferred from the 'actuals' array.
Raises:
ValueError: An error occurred when the shape of predictions and actuals
does not match.
"""
if not num_positives:
num_positives = [None for i in predictions.shape[1]]
calculators = self._ap_calculators
for i in range(len(predictions)):
calculators[i].accumulate(predictions[i], actuals[i],
num_positives[i])
def clear(self):
for calculator in self._ap_calculators:
calculator.clear()
def is_empty(self):
return ([calculator.heap_size for calculator in self._ap_calculators] ==
[0 for _ in range(self._num_class)])
def peek_map_at_n(self):
"""Peek the non-interpolated mean average precision at n.
Returns:
An array of non-interpolated average precision at n (default 0) for each
class.
"""
aps = [
self._ap_calculators[i].peek_ap_at_n()
for i in range(self._num_class)
]
return aps
from .model import regist_model, get_model
from .attention_lstm import AttentionLSTM
from .tsn import TSN
# regist models, sort by alphabet
regist_model("AttentionLSTM", AttentionLSTM)
regist_model("TSN", TSN)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import numpy as np
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
from ..model import ModelBase
from .lstm_attention import LSTMAttentionModel
import logging
logger = logging.getLogger(__name__)
__all__ = ["AttentionLSTM"]
class AttentionLSTM(ModelBase):
def __init__(self, name, cfg, mode='train'):
super(AttentionLSTM, self).__init__(name, cfg, mode)
self.get_config()
def get_config(self):
# get model configs
self.feature_num = self.cfg.MODEL.feature_num
self.feature_names = self.cfg.MODEL.feature_names
self.feature_dims = self.cfg.MODEL.feature_dims
self.num_classes = self.cfg.MODEL.num_classes
self.embedding_size = self.cfg.MODEL.embedding_size
self.lstm_size = self.cfg.MODEL.lstm_size
self.drop_rate = self.cfg.MODEL.drop_rate
# get mode configs
self.batch_size = self.get_config_from_sec(self.mode, 'batch_size', 1)
self.num_gpus = self.get_config_from_sec(self.mode, 'num_gpus', 1)
def build_input(self, use_dataloader):
self.feature_input = []
for name, dim in zip(self.feature_names, self.feature_dims):
self.feature_input.append(
fluid.data(
shape=[None, dim], lod_level=1, dtype='float32', name=name))
#video_tag without label_input
if use_dataloader:
assert self.mode != 'infer', \
'dataloader is not recommendated when infer, please set use_dataloader to be false.'
self.dataloader = fluid.io.DataLoader.from_generator(
feed_list=self.feature_input, #video_tag
capacity=8,
iterable=True)
def build_model(self):
att_outs = []
for i, (input_dim, feature
) in enumerate(zip(self.feature_dims, self.feature_input)):
att = LSTMAttentionModel(input_dim, self.embedding_size,
self.lstm_size, self.drop_rate)
att_out = att.forward(feature, is_training=(self.mode == 'train'))
att_outs.append(att_out)
if len(att_outs) > 1:
out = fluid.layers.concat(att_outs, axis=1)
else:
out = att_outs[0]
fc1 = fluid.layers.fc(
input=out,
size=8192,
act='relu',
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)),
name='fc1')
fc2 = fluid.layers.fc(
input=fc1,
size=4096,
act='tanh',
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)),
name='fc2')
self.logit = fluid.layers.fc(input=fc2, size=self.num_classes, act=None, \
bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)),
name = 'output')
self.output = fluid.layers.sigmoid(self.logit)
def optimizer(self):
assert self.mode == 'train', "optimizer only can be get in train mode"
values = [
self.learning_rate * (self.decay_gamma**i)
for i in range(len(self.decay_epochs) + 1)
]
iter_per_epoch = self.num_samples / self.batch_size
boundaries = [e * iter_per_epoch for e in self.decay_epochs]
return fluid.optimizer.RMSProp(
learning_rate=fluid.layers.piecewise_decay(
values=values, boundaries=boundaries),
centered=True,
regularization=fluid.regularizer.L2Decay(self.weight_decay))
def loss(self):
assert self.mode != 'infer', "invalid loss calculationg in infer mode"
cost = fluid.layers.sigmoid_cross_entropy_with_logits(
x=self.logit, label=self.label_input)
cost = fluid.layers.reduce_sum(cost, dim=-1)
sum_cost = fluid.layers.reduce_sum(cost)
self.loss_ = fluid.layers.scale(
sum_cost, scale=self.num_gpus, bias_after_scale=False)
return self.loss_
def outputs(self):
return [self.output, self.logit]
def feeds(self):
return self.feature_input
def fetches(self):
fetch_list = [self.output]
return fetch_list
def weights_info(self):
return None
def load_pretrain_params(self, exe, pretrain, prog, place):
logger.info("Load pretrain weights from {}, exclude fc layer.".format(
pretrain))
state_dict = fluid.load_program_state(pretrain)
dict_keys = list(state_dict.keys())
for name in dict_keys:
if "fc_0" in name:
del state_dict[name]
logger.info(
'Delete {} from pretrained parameters. Do not load it'.
format(name))
fluid.set_program_state(prog, state_dict)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
import numpy as np
class LSTMAttentionModel(object):
"""LSTM Attention Model"""
def __init__(self,
bias_attr,
embedding_size=512,
lstm_size=1024,
drop_rate=0.5):
self.lstm_size = lstm_size
self.embedding_size = embedding_size
self.drop_rate = drop_rate
def forward(self, input, is_training):
input_fc = fluid.layers.fc(
input=input,
size=self.embedding_size,
act='tanh',
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)),
name='rgb_fc')
lstm_forward_fc = fluid.layers.fc(
input=input_fc,
size=self.lstm_size * 4,
act=None,
bias_attr=False, # video_tag
name='rgb_fc_forward')
lstm_forward, _ = fluid.layers.dynamic_lstm(
input=lstm_forward_fc,
size=self.lstm_size * 4,
is_reverse=False,
name='rgb_lstm_forward')
lsmt_backward_fc = fluid.layers.fc(
input=input_fc,
size=self.lstm_size * 4,
act=None,
bias_attr=False, #video_tag
name='rgb_fc_backward')
lstm_backward, _ = fluid.layers.dynamic_lstm(
input=lsmt_backward_fc,
size=self.lstm_size * 4,
is_reverse=True,
name='rgb_lstm_backward')
lstm_concat = fluid.layers.concat(
input=[lstm_forward, lstm_backward], axis=1)
lstm_dropout = fluid.layers.dropout(
x=lstm_concat,
dropout_prob=self.drop_rate,
is_test=(not is_training))
lstm_weight = fluid.layers.fc(
input=lstm_dropout,
size=1,
act='sequence_softmax',
bias_attr=False, #video_tag
name='rgb_weight')
scaled = fluid.layers.elementwise_mul(
x=lstm_dropout, y=lstm_weight, axis=0)
lstm_pool = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return lstm_pool
此差异已折叠。
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import wget
import tarfile
__all__ = ['decompress', 'download', 'AttrDict']
def decompress(path):
t = tarfile.open(path)
t.extractall(path=os.path.split(path)[0])
t.close()
os.remove(path)
def download(url, path):
weight_dir = os.path.split(path)[0]
if not os.path.exists(weight_dir):
os.makedirs(weight_dir)
path = path + ".tar.gz"
wget.download(url, path)
decompress(path)
class AttrDict(dict):
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
if key in self.__dict__:
self.__dict__[key] = value
else:
self[key] = value
from .reader_utils import regist_reader, get_reader
from .kinetics_reader import KineticsReader
# regist reader, sort by alphabet
regist_reader("TSN", KineticsReader)
export CUDA_VISIBLE_DEVICES=0
# TSN + AttentionLSTM
python videotag_main.py
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -239,7 +239,7 @@ def process_image(sample, settings, mode, color_jitter, rotate):
img /= img_std
if mode == 'train' or mode == 'val':
return (img, sample[1])
return (img, [sample[1]])
elif mode == 'test':
return (img, )
......
......@@ -116,10 +116,8 @@ def train_mobilenet():
optimizer.set_dict(opti_dict)
# 3. reader
train_data_loader, train_data = utility.create_data_loader(
is_train=True, args=args)
test_data_loader, test_data = utility.create_data_loader(
is_train=False, args=args)
train_data_loader = utility.create_data_loader(is_train=True, args=args)
test_data_loader = utility.create_data_loader(is_train=False, args=args)
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
imagenet_reader = reader.ImageNetReader(seed=0, place_num=place_num)
train_reader = imagenet_reader.train(settings=args)
......@@ -145,8 +143,6 @@ def train_mobilenet():
t1 = time.time()
if args.max_iter and total_batch_num == args.max_iter:
return
label = to_variable(label.numpy().astype('int64').reshape(
int(args.batch_size // place_num), 1))
t_start = time.time()
# 4.1.1 call net()
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册