提交 a9203822 编写于 作者: S SunGaofeng

add video classification models in PaddleCV

上级 747f947e
# VideoClassification
Video Classification
To run train:
bash ./scripts/train/train_${model_name}.sh
To run test:
bash ./scripts/test/test_${model_name}.sh
[MODEL]
name = "AttentionCluster"
dataset = "YouTube-8M"
bone_nework = None
drop_rate = 0.5
feature_num = 2
feature_names = ['rgb', 'audio']
feature_dims = [1024, 128]
seg_num = 100
cluster_nums = [32, 32]
class_num = 3862
[TRAIN]
epoch = 5
learning_rate = 0.001
pretrain_base = None
batch_size = 160
use_gpu = True
gpu_num = 4
filelist = "data/youtube8m/train.list"
[VALID]
batch_size = 160
filelist = "data/youtube8m/val.list"
[TEST]
batch_size = 40
filelist = "data/youtube8m/test.list"
[INFER]
batch_size = 1
filelist = "data/youtube8m/infer.list"
[MODEL]
name = "AttentionLSTM"
dataset = "YouTube-8M"
bone_nework = None
drop_rate = 0.5
feature_num = 2
feature_names = ['rgb', 'audio']
feature_dims = [1024, 128]
embedding_size = 512
lstm_size = 1024
class_num = 3862
[TRAIN]
epoch = 10
learning_rate = 0.001
decay_epochs = [5]
decay_gamma = 0.1
weight_decay = 0.0008
num_samples = 5000000
pretrain_base = None
batch_size = 160
use_gpu = True
gpu_num = 4
filelist = "data/youtube8m/train.list"
[VALID]
batch_size = 160
filelist = "data/youtube8m/val.list"
[TEST]
batch_size = 40
filelist = "data/youtube8m/test.list"
[INFER]
batch_size = 1
filelist = "data/youtube8m/infer.list"
[MODEL]
name = "NEXTVLAD"
num_classes = 3862
video_feature_size = 1024
audio_feature_size = 128
cluster_size = 128
hidden_size = 2048
groups = 8
expansion = 2
drop_rate = 0.5
gating_reduction = 8
eigen_file = "./data/eigenvals.npy"
[TRAIN]
epoch = 6
learning_rate = 0.0002
lr_boundary_examples = 2000000
max_iter = 700000
learning_rate_decay = 0.8
l2_penalty = 2e-5
gradient_clip_norm = 1.0
use_gpu = True
num_gpus = 4
batch_size = 160
filelist = "./data/youtube8m/train.list"
[VALID]
batch_size = 160
filelist = "./data/youtube8m/val.list"
[TEST]
batch_size = 40
filelist = "./data/youtube8m/test.list"
[INFER]
batch_size = 40
filelist = "./data/youtube8m/infer.list"
[MODEL]
name = "STNET"
format = "pkl"
num_classes = 400
seg_num = 7
seglen = 5
image_mean = [0.485, 0.456, 0.406]
image_std = [0.229, 0.224, 0.225]
num_layers = 50
[TRAIN]
epoch = 60
short_size = 256
target_size = 224
num_reader_threads = 12
buf_size = 1024
batch_size = 128
num_gpus = 8
use_gpu = True
filelist = "./data/kinetics/train.list"
learning_rate = 0.01
learning_rate_decay = 0.1
l2_weight_decay = 1e-4
momentum = 0.9
total_videos = 224684
pretrain_base = "./data/pretrained/ResNet50_pretrained"
[VALID]
short_size = 256
target_size = 224
num_reader_threads = 12
buf_size = 1024
batch_size = 128
filelist = "./data/kinetics/val.list"
[TEST]
short_size = 256
target_size = 224
num_reader_threads = 12
buf_size = 1024
batch_size = 16
filelist = "./data/kinetics/test.list"
[INFER]
short_size = 256
target_size = 224
num_reader_threads = 12
buf_size = 1024
batch_size = 16
filelist = "./data/kinetics/infer.list"
[MODEL]
name = "TSN"
format = "pkl"
num_classes = 400
seg_num = 3
seglen = 1
image_mean = [0.485, 0.456, 0.406]
image_std = [0.229, 0.224, 0.225]
num_layers = 50
[TRAIN]
epoch = 45
short_size = 256
target_size = 224
num_reader_threads = 12
buf_size = 1024
batch_size = 256
use_gpu = True
num_gpus = 8
filelist = "./data/kinetics/train.list"
learning_rate = 0.01
learning_rate_decay = 0.1
l2_weight_decay = 1e-4
momentum = 0.9
total_videos = 224684
[VALID]
short_size = 256
target_size = 224
num_reader_threads = 12
buf_size = 1024
batch_size = 256
filelist = "./data/kinetics/val.list"
[TEST]
short_size = 256
target_size = 224
num_reader_threads = 12
buf_size = 1024
batch_size = 32
filelist = "./data/kinetics/test.list"
[INFER]
short_size = 256
target_size = 224
num_reader_threads = 12
buf_size = 1024
batch_size = 32
filelist = "./data/kinetics/infer.list"
from .reader_utils import regist_reader, get_reader
from .feature_reader import FeatureReader
from .kinetics_reader import KineticsReader
from .nonlocal_reader import NonlocalReader
regist_reader("ATTENTIONCLUSTER", FeatureReader)
regist_reader("NEXTVLAD", FeatureReader)
regist_reader("ATTENTIONLSTM", FeatureReader)
regist_reader("TSN", KineticsReader)
regist_reader("TSM", KineticsReader)
regist_reader("STNET", KineticsReader)
regist_reader("NONLOCAL", NonlocalReader)
import sys
from .reader_utils import DataReader
try:
import cPickle as pickle
from cStringIO import StringIO
except ImportError:
import pickle
from io import BytesIO
import numpy as np
import random
python_ver = sys.version_info
class FeatureReader(DataReader):
"""
Data reader for youtube-8M dataset, which was stored as features extracted by prior networks
This is for the three models: lstm, attention cluster, nextvlad
dataset cfg: num_classes
batch_size
list
NextVlad only: eigen_file
"""
def __init__(self, name, phase, cfg):
self.name = name
self.phase = phase
self.num_classes = cfg['num_classes']
# set batch size and file list
self.batch_size = cfg['batch_size']
self.filelist = cfg['list']
if 'eigen_file' in cfg.keys():
self.eigen_file = cfg['eigen_file']
if 'seg_num' in cfg.keys():
self.seg_num = cfg['seg_num']
def create_reader(self):
fl = open(self.filelist).readlines()
fl = [line.strip() for line in fl if line.strip() != '']
if self.phase == 'train':
random.shuffle(fl)
def reader():
batch_out = []
for filepath in fl:
if python_ver < (3, 0):
data = pickle.load(open(filepath, 'rb'))
else:
data = pickle.load(open(filepath, 'rb'), encoding='bytes')
indexes = list(range(len(data)))
if self.phase == 'train':
random.shuffle(indexes)
for i in indexes:
record = data[i]
nframes = record[b'nframes']
rgb = record[b'feature'].astype(float)
audio = record[b'audio'].astype(float)
if self.phase != 'infer':
label = record[b'label']
one_hot_label = make_one_hot(label, self.num_classes)
video = record[b'video']
rgb = rgb[0:nframes, :]
audio = audio[0:nframes, :]
rgb = dequantize(
rgb, max_quantized_value=2., min_quantized_value=-2.)
audio = dequantize(
audio, max_quantized_value=2, min_quantized_value=-2)
if self.name == 'NEXTVLAD':
# add the effect of eigen values
eigen_file = self.eigen_file
eigen_val = np.sqrt(np.load(eigen_file)
[:1024, 0]).astype(np.float32)
eigen_val = eigen_val + 1e-4
rgb = (rgb - 4. / 512) * eigen_val
if self.name == 'ATTENTIONCLUSTER':
sample_inds = generate_random_idx(rgb.shape[0],
self.seg_num)
rgb = rgb[sample_inds]
audio = audio[sample_inds]
if self.phase != 'infer':
batch_out.append((rgb, audio, one_hot_label))
else:
batch_out.append((rgb, audio, video))
if len(batch_out) == self.batch_size:
yield batch_out
batch_out = []
return reader
def dequantize(feat_vector, max_quantized_value=2., min_quantized_value=-2.):
"""
Dequantize the feature from the byte format to the float format
"""
assert max_quantized_value > min_quantized_value
quantized_range = max_quantized_value - min_quantized_value
scalar = quantized_range / 255.0
bias = (quantized_range / 512.0) + min_quantized_value
return feat_vector * scalar + bias
def make_one_hot(label, dim=3862):
one_hot_label = np.zeros(dim)
one_hot_label = one_hot_label.astype(float)
for ind in label:
one_hot_label[int(ind)] = 1
return one_hot_label
def generate_random_idx(feature_len, seg_num):
idxs = []
stride = float(feature_len) / seg_num
for i in range(seg_num):
pos = (i + np.random.random()) * stride
idxs.append(min(feature_len - 1, int(pos)))
return idxs
import os
import sys
import math
import random
import functools
try:
import cPickle as pickle
from cStringIO import StringIO
except ImportError:
import pickle
from io import BytesIO
import numpy as np
import paddle
from PIL import Image, ImageEnhance
import logging
from .reader_utils import DataReader
logger = logging.getLogger(__name__)
python_ver = sys.version_info
class KineticsReader(DataReader):
"""
Data reader for kinetics dataset of two format mp4 and pkl.
1. mp4, the original format of kinetics400
2. pkl, the mp4 was decoded previously and stored as pkl
In both case, load the data, and then get the frame data in the form of numpy and label as an integer.
dataset cfg: format
num_classes
seg_num
short_size
target_size
num_reader_threads
buf_size
image_mean
image_std
batch_size
list
"""
def __init__(self, name, phase, cfg):
self.name = name
self.phase = phase
self.format = cfg['format']
self.num_classes = cfg['num_classes']
self.seg_num = cfg['seg_num']
self.seglen = cfg['seglen']
self.short_size = cfg['short_size']
self.target_size = cfg['target_size']
self.num_reader_threads = cfg['num_reader_threads']
self.buf_size = cfg['buf_size']
self.img_mean = np.array(cfg['image_mean']).reshape(
[3, 1, 1]).astype(np.float32)
self.img_std = np.array(cfg['image_std']).reshape(
[3, 1, 1]).astype(np.float32)
# set batch size and file list
self.batch_size = cfg['batch_size']
self.filelist = cfg['list']
def create_reader(self):
_reader = _reader_creator(self.filelist, self.phase, seg_num=self.seg_num, seglen = self.seglen, \
short_size = self.short_size, target_size = self.target_size, \
img_mean = self.img_mean, img_std = self.img_std, \
shuffle = (self.phase == 'train'), \
num_threads = self.num_reader_threads, \
buf_size = self.buf_size, format = self.format)
def _batch_reader():
batch_out = []
for imgs, label in _reader():
if imgs is None:
continue
batch_out.append((imgs, label))
if len(batch_out) == self.batch_size:
yield batch_out
batch_out = []
return _batch_reader
def _reader_creator(pickle_list,
phase,
seg_num,
seglen,
short_size,
target_size,
img_mean,
img_std,
shuffle=False,
num_threads=1,
buf_size=1024,
format='pkl'):
def reader():
with open(pickle_list) as flist:
lines = [line.strip() for line in flist]
if shuffle:
random.shuffle(lines)
for line in lines:
pickle_path = line.strip()
yield [pickle_path]
if format == 'pkl':
decode_func = decode_pickle
elif format == 'mp4':
decode_func = decode_mp4
else:
raise "Not implemented format {}".format(format)
mapper = functools.partial(
decode_func,
phase=phase,
seg_num=seg_num,
seglen=seglen,
short_size=short_size,
target_size=target_size,
img_mean=img_mean,
img_std=img_std)
return paddle.reader.xmap_readers(mapper, reader, num_threads, buf_size)
def decode_mp4(sample, phase, seg_num, seglen, short_size, target_size,
img_mean, img_std):
sample = sample[0].split(' ')
mp4_path = sample[0]
# when infer, we store vid as label
label = int(sample[1])
imgs = mp4_loader(mp4_path, seg_num, seglen, phase)
imgs = group_scale(imgs, short_size)
if phase == 'train':
imgs = group_random_crop(imgs, target_size)
imgs = group_random_flip(imgs)
else:
imgs = group_center_crop(imgs, target_size)
np_imgs = (np.array(imgs[0]).astype('float32').transpose(
(2, 0, 1))).reshape(1, 3, target_size, target_size) / 255
for i in range(len(imgs) - 1):
img = (np.array(imgs[i + 1]).astype('float32').transpose(
(2, 0, 1))).reshape(1, 3, target_size, target_size) / 255
np_imgs = np.concatenate((np_imgs, img))
imgs = np_imgs
imgs -= img_mean
imgs /= img_std
imgs = np.reshape(imgs, (seg_num, seglen * 3, target_size, target_size))
return imgs, label
def decode_pickle(sample, phase, seg_num, seglen, short_size, target_size,
img_mean, img_std):
pickle_path = sample[0]
try:
if python_ver < (3, 0):
data_loaded = pickle.load(open(pickle_path, 'rb'))
else:
data_loaded = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
vid, label, frames = data_loaded
if len(frames) < 1:
logger.info('{} frame length {} less than 1.'.format(pickle_path,
len(frames)))
raise
except:
logger.info('Error when loading {}'.format(pickle_path))
return None, None
imgs = video_loader(frames, seg_num, seglen, phase)
imgs = group_scale(imgs, short_size)
if phase == 'train':
imgs = group_random_crop(imgs, target_size)
imgs = group_random_flip(imgs)
else:
imgs = group_center_crop(imgs, target_size)
np_imgs = (np.array(imgs[0]).astype('float32').transpose(
(2, 0, 1))).reshape(1, 3, target_size, target_size) / 255
for i in range(len(imgs) - 1):
img = (np.array(imgs[i + 1]).astype('float32').transpose(
(2, 0, 1))).reshape(1, 3, target_size, target_size) / 255
np_imgs = np.concatenate((np_imgs, img))
imgs = np_imgs
imgs -= img_mean
imgs /= img_std
imgs = np.reshape(imgs, (seg_num, seglen * 3, target_size, target_size))
if phase == 'train' or phase == 'valid' or phase == 'test':
return imgs, label
elif phase == 'infer':
return imgs, vid
def group_random_crop(img_group, target_size):
w, h = img_group[0].size
th, tw = target_size, target_size
assert (w >= target_size) and (h >= target_size), \
"image width({}) and height({}) should be larger than crop size".format(w, h, target_size)
out_images = []
x1 = random.randint(0, w - tw)
y1 = random.randint(0, h - th)
for img in img_group:
if w == tw and h == th:
out_images.append(img)
else:
out_images.append(img.crop((x1, y1, x1 + tw, y1 + th)))
return out_images
def group_random_flip(img_group):
v = random.random()
if v < 0.5:
ret = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in img_group]
return ret
else:
return img_group
def group_center_crop(img_group, target_size):
img_crop = []
for img in img_group:
w, h = img.size
th, tw = target_size, target_size
assert (w >= target_size) and (h >= target_size), \
"image width({}) and height({}) should be larger than crop size".format(w, h, target_size)
x1 = int(round((w - tw) / 2.))
y1 = int(round((h - th) / 2.))
img_crop.append(img.crop((x1, y1, x1 + tw, y1 + th)))
return img_crop
def group_scale(imgs, target_size):
resized_imgs = []
for i in range(len(imgs)):
img = imgs[i]
w, h = img.size
if (w <= h and w == target_size) or (h <= w and h == target_size):
resized_imgs.append(img)
continue
if w < h:
ow = target_size
oh = int(target_size * 4.0 / 3.0)
resized_imgs.append(img.resize((ow, oh), Image.BILINEAR))
else:
oh = target_size
ow = int(target_size * 4.0 / 3.0)
resized_imgs.append(img.resize((ow, oh), Image.BILINEAR))
return resized_imgs
def imageloader(buf):
if isinstance(buf, str):
img = Image.open(StringIO(buf))
else:
img = Image.open(BytesIO(buf))
return img.convert('RGB')
def video_loader(frames, nsample, seglen, phase):
videolen = len(frames)
average_dur = int(videolen / nsample)
imgs = []
for i in range(nsample):
idx = 0
if phase == 'train':
if average_dur >= seglen:
idx = random.randint(0, average_dur - seglen)
idx += i * average_dur
elif average_dur >= 1:
idx += i * average_dur
else:
idx = i
else:
if average_dur >= seglen:
idx = (average_dur - seglen) // 2
idx += i * average_dur
elif average_dur >= 1:
idx += i * average_dur
else:
idx = i
for jj in range(idx, idx + seglen):
imgbuf = frames[int(jj % videolen)]
img = imageloader(imgbuf)
imgs.append(img)
return imgs
def mp4_loader(filepath, nsample, seglen, phase):
cap = cv2.VideoCapture(filepath)
videolen = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
average_dur = int(videolen / nsample)
sampledFrames = []
for i in range(videolen):
ret, frame = cap.read()
# maybe first frame is empty
if ret == False:
continue
img = frame[:, :, ::-1]
sampledFrames.append(img)
imgs = []
for i in range(nsample):
idx = 0
if phase == 'train':
if average_dur >= seglen:
idx = random.randint(0, average_dur - seglen)
idx += i * average_dur
elif average_dur >= 1:
idx += i * average_dur
else:
idx = i
else:
if average_dur >= seglen:
idx = (average_dur - 1) // 2
idx += i * average_dur
elif average_dur >= 1:
idx += i * average_dur
else:
idx = i
for jj in range(idx, idx + seglen):
imgbuf = sampledFrames[int(jj % videolen)]
img = Image.fromarray(imgbuf, mode='RGB')
imgs.append(img)
return imgs
import os
import random
import time
import multiprocessing
import numpy as np
import cv2
import logging
from . import nonlocal_video_io
from .reader_utils import DataReader
logger = logging.getLogger(__name__)
class NonlocalReader(DataReader):
"""
Data reader for kinetics dataset, which read mp4 file and decode into numpy.
This is for nonlocal neural network model.
cfg: num_classes
num_reader_threads
image_mean
image_std
batch_size
list
crop_size
sample_rate
video_length
jitter_scales
Test only cfg: num_test_clips
use_multi_crop
"""
def __init__(self, name, phase, cfg):
self.name = name
self.phase = phase
self.cfg = cfg
def create_reader(self):
cfg = self.cfg
assert cfg['num_reader_threads'] >=1, \
"number of reader threads({}) should be a positive integer".format(cfg['num_reader_threads'])
if cfg['num_reader_threads'] == 1:
reader_func = make_reader
else:
reader_func = make_multi_reader
dataset_args = {}
dataset_args['image_mean'] = cfg['image_mean']
dataset_args['image_std'] = cfg['image_std']
dataset_args['crop_size'] = cfg['crop_size']
dataset_args['sample_rate'] = cfg['sample_rate']
dataset_args['video_length'] = cfg['video_length']
dataset_args['min_size'] = cfg['jitter_scales'][0]
dataset_args['max_size'] = cfg['jitter_scales'][1]
dataset_args['num_reader_threads'] = cfg['num_reader_threads']
if self.phase == 'train':
sample_times = 1
return reader_func(cfg['list'], cfg['batch_size'], sample_times,
True, True, **dataset_args)
elif self.phase == 'valid':
sample_times = 1
return reader_func(cfg['list'], cfg['batch_size'], sample_times,
False, False, **dataset_args)
elif self.phase == 'test':
sample_times = cfg['num_test_clips']
if cfg['use_multi_crop'] == 1:
sample_times = int(sample_times / 3)
if cfg['use_multi_crop'] == 2:
sample_times = int(sample_times / 6)
return reader_func(cfg['list'], cfg['batch_size'], sample_times,
False, False, **dataset_args)
else:
logger.info('Not implemented')
raise
def apply_resize(rgbdata, min_size, max_size):
length, height, width, channel = rgbdata.shape
ratio = 1.0
# generate random scale between [min_size, max_size]
if min_size == max_size:
side_length = min_size
else:
side_length = np.random.randint(min_size, max_size)
if height > width:
ratio = float(side_length) / float(width)
else:
ratio = float(side_length) / float(height)
out_height = int(height * ratio)
out_width = int(width * ratio)
outdata = np.zeros(
(length, out_height, out_width, channel), dtype=rgbdata.dtype)
for i in range(length):
outdata[i] = cv2.resize(rgbdata[i], (out_width, out_height))
return outdata
def crop_mirror_transform(rgbdata,
mean,
std,
cropsize=224,
use_mirror=True,
center_crop=False,
spatial_pos=-1):
channel, length, height, width = rgbdata.shape
assert height >= cropsize, "crop size should not be larger than video height"
assert width >= cropsize, "crop size should not be larger than video width"
# crop to specific scale
if center_crop:
h_off = int((height - cropsize) / 2)
w_off = int((width - cropsize) / 2)
if spatial_pos >= 0:
now_pos = spatial_pos % 3
if h_off > 0:
h_off = h_off * now_pos
else:
w_off = w_off * now_pos
else:
h_off = np.random.randint(0, height - cropsize)
w_off = np.random.randint(0, width - cropsize)
outdata = np.zeros(
(channel, length, cropsize, cropsize), dtype=rgbdata.dtype)
outdata[:, :, :, :] = rgbdata[:, :, h_off:h_off + cropsize, w_off:w_off +
cropsize]
# apply mirror
mirror_indicator = (np.random.rand() > 0.5)
mirror_me = use_mirror and mirror_indicator
if spatial_pos > 0:
mirror_me = (int(spatial_pos / 3) > 0)
if mirror_me:
outdata = outdata[:, :, :, ::-1]
# substract mean and divide std
outdata = outdata.astype(np.float32)
outdata = (outdata - mean) / std
return outdata
def make_reader(filelist, batch_size, sample_times, is_training, shuffle,
**dataset_args):
# should add smaple_times param
fl = open(filelist).readlines()
fl = [line.strip() for line in fl if line.strip() != '']
if shuffle:
random.shuffle(fl)
def reader():
batch_out = []
for line in fl:
# start_time = time.time()
line_items = line.split(' ')
fn = line_items[0]
label = int(line_items[1])
if len(line_items) > 2:
start_frm = int(line_items[2])
spatial_pos = int(line_items[3])
in_sample_times = sample_times
else:
start_frm = -1
spatial_pos = -1
in_sample_times = 1
# print('label = ', label)
label = np.array([label]).astype(np.int64)
# 1, get rgb data for fixed length of frames
try:
rgbdata = nonlocal_video_io.video_fast_get_frame(fn, \
sampling_rate = dataset_args['sample_rate'], length = dataset_args['video_length'], \
start_frm = start_frm, sample_times = in_sample_times)
except:
logger.info('Error when loading {}, just skip this file'.format(
fn))
continue
# add prepocessing
# 2, reszie to randomly scale between [min_size, max_size] when training, or cgf.TEST.SCALE when inference
min_size = dataset_args['min_size']
max_size = dataset_args['max_size']
rgbdata = apply_resize(rgbdata, min_size, max_size)
# transform [length, height, width, channel] to [channel, length, height, width]
rgbdata = np.transpose(rgbdata, [3, 0, 1, 2])
# 3 crop, mirror and transform
rgbdata = crop_mirror_transform(rgbdata, mean = dataset_args['image_mean'], \
std = dataset_args['image_std'], cropsize = dataset_args['crop_size'], \
use_mirror = is_training, center_crop = (not is_training), \
spatial_pos = spatial_pos)
batch_out.append((rgbdata, label))
#elapsed_time = time.time() - start_time
#print('read item time ', elapsed_time)
if len(batch_out) == batch_size:
yield batch_out
batch_out = []
return reader
def make_multi_reader(filelist, batch_size, sample_times, is_training, shuffle,
**dataset_args):
fl = open(filelist).readlines()
fl = [line.strip() for line in fl if line.strip() != '']
if shuffle:
random.shuffle(fl)
n = dataset_args['num_reader_threads']
queue_size = 20
reader_lists = [None] * n
file_num = int(len(fl) // n)
for i in range(n):
if i < len(reader_lists) - 1:
tmp_list = fl[i * file_num:(i + 1) * file_num]
else:
tmp_list = fl[i * file_num:]
reader_lists[i] = tmp_list
def read_into_queue(flq, queue):
batch_out = []
for line in flq:
line_items = line.split(' ')
fn = line_items[0]
label = int(line_items[1])
if len(line_items) > 2:
start_frm = int(line_items[2])
spatial_pos = int(line_items[3])
in_sample_times = sample_times
else:
start_frm = -1
spatial_pos = -1
in_sample_times = 1
label = np.array([label]).astype(np.int64)
# 1, get rgb data for fixed length of frames
try:
rgbdata = nonlocal_video_io.video_fast_get_frame(fn, \
sampling_rate = dataset_args['sample_rate'], length = dataset_args['video_length'], \
start_frm = start_frm, sample_times = in_sample_times)
except:
logger.info('Error when loading {}, just skip this file'.format(
fn))
continue
# add prepocessing
# 2, reszie to randomly scale between [min_size, max_size] when training, or cgf.TEST.SCALE when inference
min_size = dataset_args['min_size']
max_size = dataset_args['max_size']
rgbdata = apply_resize(rgbdata, min_size, max_size)
# transform [length, height, width, channel] to [channel, length, height, width]
rgbdata = np.transpose(rgbdata, [3, 0, 1, 2])
# 3 crop, mirror and transform
rgbdata = crop_mirror_transform(rgbdata, mean = dataset_args['image_mean'], \
std = dataset_args['image_std'], cropsize = dataset_args['crop_size'], \
use_mirror = is_training, center_crop = (not is_training), \
spatial_pos = spatial_pos)
batch_out.append((rgbdata, label))
if len(batch_out) == batch_size:
queue.put(batch_out)
batch_out = []
queue.put(None)
def queue_reader():
queue = multiprocessing.Queue(queue_size)
p_list = [None] * len(reader_lists)
# for reader_list in reader_lists:
for i in range(len(reader_lists)):
reader_list = reader_lists[i]
p_list[i] = multiprocessing.Process(
target=read_into_queue, args=(reader_list, queue))
p_list[i].start()
reader_num = len(reader_lists)
finish_num = 0
while finish_num < reader_num:
sample = queue.get()
if sample is None:
finish_num += 1
else:
yield sample
for i in range(len(p_list)):
p_list[i].terminate()
p_list[i].join()
return queue_reader
import cv2
import numpy as np
import random
def video_fast_get_frame(video_path,
sampling_rate=1,
length=64,
start_frm=-1,
sample_times=1):
cap = cv2.VideoCapture(video_path)
frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
sampledFrames = []
# n_frame < sample area
video_output = np.ndarray(shape=[length, height, width, 3], dtype=np.uint8)
use_start_frm = start_frm
if start_frm < 0:
if (frame_cnt - length * sampling_rate > 0):
use_start_frm = random.randint(0,
frame_cnt - length * sampling_rate)
else:
use_start_frm = 0
else:
frame_gaps = float(frame_cnt) / float(sample_times)
use_start_frm = int(frame_gaps * start_frm) % frame_cnt
for i in range(frame_cnt):
ret, frame = cap.read()
# maybe first frame is empty
if ret == False:
continue
img = frame[:, :, ::-1]
sampledFrames.append(img)
for idx in range(length):
i = use_start_frm + idx * sampling_rate
i = i % len(sampledFrames)
video_output[idx] = sampledFrames[i]
cap.release()
return video_output
if __name__ == '__main__':
video_path = '~/docker/dockermount/data/k400/Kinetics_trimmed_processed_val/dancing_gangnam_style/rC7d3L8nSB4.mp4'
vout = video_fast_get_frame(video_path)
vout2 = video_fast_get_frame(video_path, \
sampling_rate = 2, length = 8, \
start_frm = 3, sample_times = 10)
import pickle
import cv2
import numpy as np
import random
class ReaderNotFoundError(Exception):
"Error: model not found"
def __init__(self, reader_name, avail_readers):
super(ReaderNotFoundError, self).__init__()
self.reader_name = reader_name
self.avail_readers = avail_readers
def __str__(self):
msg = "Reader {} Not Found.\nAvailiable readers:\n".format(
self.reader_name)
for reader in self.avail_readers:
msg += " {}\n".format(reader)
return msg
class DataReader(object):
"""data reader for video input"""
def __init__(self, model_name, phase, cfg):
"""Not implemented"""
pass
def create_reader(self):
"""Not implemented"""
pass
class ReaderZoo(object):
def __init__(self):
self.reader_zoo = {}
def regist(self, name, reader):
assert reader.__base__ == DataReader, "Unknow model type {}".format(
type(reader))
self.reader_zoo[name] = reader
def get(self, name, mode, cfg):
for k, v in self.reader_zoo.items():
if k == name:
return v(name, mode, cfg)
raise ReaderNotFoundError(name, self.reader_zoo.keys())
# singleton reader_zoo
reader_zoo = ReaderZoo()
def regist_reader(name, reader):
reader_zoo.regist(name, reader)
def get_reader(name, mode='train', **cfg):
reader_model = reader_zoo.get(name, mode, cfg)
return reader_model.create_reader()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import time
import logging
import argparse
import numpy as np
try:
import cPickle as pickle
except:
import pickle
import paddle.fluid as fluid
import models
FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--model-name',
type=str,
default='AttentionCluster',
help='name of model to train.')
parser.add_argument(
'--config',
type=str,
default='configs/attention_cluster.txt',
help='path to config file of model')
parser.add_argument(
'--use-gpu', type=bool, default=True, help='default use gpu.')
parser.add_argument(
'--weights',
type=str,
default=None,
help='weight path, None to use weights from Paddle.')
parser.add_argument(
'--batch-size',
type=int,
default=1,
help='sample number in a batch for inference.')
parser.add_argument(
'--filelist',
type=str,
default=None,
help='path to inferenece data file lists file.')
parser.add_argument(
'--log-interval',
type=int,
default=1,
help='mini-batch interval to log.')
parser.add_argument(
'--infer-topk',
type=int,
default=20,
help='topk predictions to restore.')
parser.add_argument(
'--save-dir', type=str, default='./', help='directory to store results')
args = parser.parse_args()
return args
def infer(infer_model, args):
infer_model.build_input(use_pyreader=False)
infer_model.build_model()
infer_feeds = infer_model.feeds()
infer_outputs = infer_model.outputs()
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
# get infer reader
if not args.filelist:
logger.error("[INFER] --filelist unset.")
return
assert os.path.exists(args.filelist), "{} not exist.".format(args.filelist)
infer_reader = infer_model.reader()
if args.weights:
assert os.path.exists(
args.weights), "Given weight dir {} not exist.".format(args.weights)
# if no weight files specified, download weights from paddle
weights = args.weights or infer_model.get_weights()
def if_exist(var):
return os.path.exists(os.path.join(weights, var.name))
fluid.io.load_vars(exe, weights, predicate=if_exist)
infer_feeder = fluid.DataFeeder(place=place, feed_list=infer_feeds)
fetch_list = [x.name for x in infer_outputs]
def _infer_loop():
periods = []
results = []
cur_time = time.time()
for infer_iter, data in enumerate(infer_reader()):
data_feed_in = [items[:-1] for items in data]
video_id = [items[-1] for items in data]
infer_outs = exe.run(fetch_list=fetch_list,
feed=infer_feeder.feed(data_feed_in))
predictions = np.array(infer_outs[0])
for i in range(len(predictions)):
topk_inds = predictions[i].argsort()[0 - args.infer_topk:]
topk_inds = topk_inds[::-1]
preds = predictions[i][topk_inds]
results.append(
(video_id[i], preds.tolist(), topk_inds.tolist()))
prev_time = cur_time
cur_time = time.time()
period = cur_time - prev_time
periods.append(period)
logger.info('Processed {} samples'.format((infer_iter) * len(
predictions)))
logger.info('[INFER] infer finished. average time: {}'.format(
np.mean(periods)))
return results
# start infer loop
infer_results = _infer_loop()
if not os.path.isdir(args.save_dir):
os.mkdir(args.save_dir)
result_file_name = os.path.join(args.save_dir,
"{}_infer_result".format(args.model_name))
pickle.dump(infer_results, open(result_file_name, 'wb'))
if __name__ == "__main__":
args = parse_args()
logger.info(args)
infer_model = models.get_model(
args.model_name, args.config, mode='infer', args=vars(args))
infer(infer_model, args)
from .metrics_util import get_metrics
from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
import numpy as np
import datetime
import logging
logger = logging.getLogger(__name__)
class MetricsCalculator():
def __init__(self, name, split):
self.name = name
self.split = split # 'train', 'val', 'test'
self.reset()
def reset(self):
logger.info('Resetting {} metrics...'.format(self.split))
self.aggr_acc1 = 0.0
self.aggr_acc5 = 0.0
self.aggr_loss = 0.0
self.aggr_batch_size = 0
def finalize_metrics(self):
self.avg_acc1 = self.aggr_acc1 / self.aggr_batch_size
self.avg_acc5 = self.aggr_acc5 / self.aggr_batch_size
self.avg_loss = self.aggr_loss / self.aggr_batch_size
def get_computed_metrics(self):
json_stats = {}
json_stats['avg_loss'] = self.avg_loss
json_stats['avg_acc1'] = self.avg_acc1
json_stats['avg_acc5'] = self.avg_acc5
return json_stats
def calculate_metrics(self, loss, softmax, labels):
accuracy1 = compute_topk_accuracy(softmax, labels, top_k=1) * 100.
accuracy5 = compute_topk_accuracy(softmax, labels, top_k=5) * 100.
return accuracy1, accuracy5
def accumulate(self, loss, softmax, labels):
cur_batch_size = softmax.shape[0]
# if returned loss is None for e.g. test, just set loss to be 0.
if loss is None:
cur_loss = 0.
else:
cur_loss = np.mean(np.array(loss)) #
self.aggr_batch_size += cur_batch_size
self.aggr_loss += cur_loss * cur_batch_size
accuracy1 = compute_topk_accuracy(softmax, labels, top_k=1) * 100.
accuracy5 = compute_topk_accuracy(softmax, labels, top_k=5) * 100.
self.aggr_acc1 += accuracy1 * cur_batch_size
self.aggr_acc5 += accuracy5 * cur_batch_size
return
# ----------------------------------------------
# other utils
# ----------------------------------------------
def compute_topk_correct_hits(top_k, preds, labels):
'''Compute the number of corret hits'''
batch_size = preds.shape[0]
top_k_preds = np.zeros((batch_size, top_k), dtype=np.float32)
for i in range(batch_size):
top_k_preds[i, :] = np.argsort(-preds[i, :])[:top_k]
correctness = np.zeros(batch_size, dtype=np.int32)
for i in range(batch_size):
if labels[i] in top_k_preds[i, :].astype(np.int32).tolist():
correctness[i] = 1
correct_hits = sum(correctness)
return correct_hits
def compute_topk_accuracy(softmax, labels, top_k):
computed_metrics = {}
assert labels.shape[0] == softmax.shape[0], "Batch size mismatch."
aggr_batch_size = labels.shape[0]
aggr_top_k_correct_hits = compute_topk_correct_hits(top_k, softmax, labels)
# normalize results
computed_metrics = \
float(aggr_top_k_correct_hits) / aggr_batch_size
return computed_metrics
from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
import logging
import numpy as np
from metrics.youtube8m import eval_util as youtube8m_metrics
from metrics.kinetics import accuracy_metrics as kinetics_metrics
from metrics.non_local import nonlocal_test_metrics as nonlocal_test_metrics
logger = logging.getLogger(__name__)
class Metrics(object):
def __init__(self, name, phase, **metrics_args):
"""Not implemented"""
pass
def calculate_and_log_out(self, loss, pred, label, info=''):
"""Not implemented"""
pass
def accumulate(self, loss, pred, label, info=''):
"""Not implemented"""
pass
def finalize_and_log_out(self, info=''):
"""Not implemented"""
pass
def reset(self):
"""Not implemented"""
pass
class Youtube8mMetrics(Metrics):
def __init__(self, name, phase, **metrics_args):
self.name = name
self.phase = phase
self.metrics_args = metrics_args
self.num_classes = metrics_args['num_classes']
self.topk = metrics_args['topk']
self.calculator = youtube8m_metrics.EvaluationMetrics(self.num_classes,
self.topk)
def calculate_and_log_out(self, loss, pred, label, info=''):
loss = np.mean(np.array(loss))
hit_at_one = youtube8m_metrics.calculate_hit_at_one(pred, label)
perr = youtube8m_metrics.calculate_precision_at_equal_recall_rate(pred,
label)
gap = youtube8m_metrics.calculate_gap(pred, label)
logger.info(info + ' , loss = {0}, Hit@1 = {1}, PERR = {2}, GAP = {3}'.format(\
'%.6f' % loss, '%.2f' % hit_at_one, '%.2f' % perr, '%.2f' % gap))
def accumulate(self, loss, pred, label, info=''):
self.calculator.accumulate(loss, pred, label)
def finalize_and_log_out(self, info=''):
epoch_info_dict = self.calculator.get()
logger.info(info + '\tavg_hit_at_one: {0},\tavg_perr: {1},\tavg_loss :{2},\taps: {3},\tgap:{4}'\
.format(epoch_info_dict['avg_hit_at_one'], epoch_info_dict['avg_perr'], \
epoch_info_dict['avg_loss'], epoch_info_dict['aps'], epoch_info_dict['gap']))
def reset(self):
self.calculator.clear()
class Kinetics400Metrics(Metrics):
def __init__(self, name, phase, **metrics_args):
self.name = name
self.phase = phase
self.metrics_args = metrics_args
self.calculator = kinetics_metrics.MetricsCalculator(name,
phase.lower())
def calculate_and_log_out(self, loss, pred, label, info=''):
if loss is not None:
loss = np.mean(np.array(loss))
else:
loss = 0.
acc1, acc5 = self.calculator.calculate_metrics(loss, pred, label)
logger.info(info + '\tLoss: {},\ttop1_acc: {}, \ttop5_acc: {}'.format('%.6f' % loss, \
'%.2f' % acc1, '%.2f' % acc5))
def accumulate(self, loss, pred, label, info=''):
self.calculator.accumulate(loss, pred, label)
def finalize_and_log_out(self, info=''):
self.calculator.finalize_metrics()
metrics_dict = self.calculator.get_computed_metrics()
loss = metrics_dict['avg_loss']
acc1 = metrics_dict['avg_acc1']
acc5 = metrics_dict['avg_acc5']
logger.info(info + '\tLoss: {},\ttop1_acc: {}, \ttop5_acc: {}'.format('%.6f' % loss, \
'%.2f' % acc1, '%.2f' % acc5))
def reset(self):
self.calculator.reset()
class NonlocalMetrics(Metrics):
def __init__(self, name, phase, **metrics_args):
self.name = name
self.phase = phase
self.metrics_args = metrics_args
if phase == 'test':
self.calculator = nonlocal_test_metrics.MetricsCalculator(
name, phase.lower(), **metrics_args)
else:
self.calculator = kinetics_metrics.MetricsCalculator(name,
phase.lower())
def calculate_and_log_out(self, loss, pred, label, info=''):
if self.phase == 'test':
pass
else:
if loss is not None:
loss = np.mean(np.array(loss))
else:
loss = 0.
acc1, acc5 = self.calculator.calculate_metrics(loss, pred, label)
logger.info(info + '\tLoss: {},\ttop1_acc: {}, \ttop5_acc: {}'.format('%.6f' % loss, \
'%.2f' % acc1, '%.2f' % acc5))
def accumulate(self, loss, pred, label):
self.calculator.accumulate(loss, pred, label)
def finalize_and_log_out(self, info=''):
if self.phase == 'test':
self.calculator.finalize_metrics()
else:
self.calculator.finalize_metrics()
metrics_dict = self.calculator.get_computed_metrics()
loss = metrics_dict['avg_loss']
acc1 = metrics_dict['avg_acc1']
acc5 = metrics_dict['avg_acc5']
logger.info(info + '\tLoss: {},\ttop1_acc: {}, \ttop5_acc: {}'.format('%.6f' % loss, \
'%.2f' % acc1, '%.2f' % acc5))
def reset(self):
self.calculator.reset()
class MetricsZoo(object):
def __init__(self):
self.metrics_zoo = {}
def regist(self, name, metrics):
assert metrics.__base__ == Metrics, "Unknow model type {}".format(
type(metrics))
self.metrics_zoo[name] = metrics
def get(self, name, mode, **cfg):
for k, v in self.metrics_zoo.items():
if k == name:
return v(name, mode, **cfg)
raise MetricsNotFoundError(name, self.metrics_zoo.keys())
# singleton metrics_zoo
metrics_zoo = MetricsZoo()
def regist_metrics(name, metrics):
metrics_zoo.regist(name, metrics)
def get_metrics(name, mode='train', **cfg):
return metrics_zoo.get(name, mode, **cfg)
regist_metrics("NEXTVLAD", Youtube8mMetrics)
regist_metrics("ATTENTIONLSTM", Youtube8mMetrics)
regist_metrics("ATTENTIONCLUSTER", Youtube8mMetrics)
regist_metrics("TSN", Kinetics400Metrics)
regist_metrics("TSM", Kinetics400Metrics)
regist_metrics("STNET", Kinetics400Metrics)
regist_metrics("NONLOCAL", NonlocalMetrics)
from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
import sys
import os
import numpy as np
import datetime
import logging
from collections import defaultdict
import pickle
logger = logging.getLogger(__name__)
class MetricsCalculator():
def __init__(self, name, split, **metrics_args):
"""
dataset args:
num_test_clips
dataset_size
filename_gt
checkpoint_dir
num_classes
"""
self.name = name
self.split = split # 'train', 'val', 'test'
self.metrics_args = metrics_args
self.num_test_clips = metrics_args['num_test_clips']
self.dataset_size = metrics_args['dataset_size']
self.filename_gt = metrics_args['filename_gt']
self.checkpoint_dir = metrics_args['checkpoint_dir']
self.num_classes = metrics_args['num_classes']
self.reset()
def reset(self):
logger.info('Resetting {} metrics...'.format(self.split))
self.aggr_acc1 = 0.0
self.aggr_acc5 = 0.0
self.aggr_loss = 0.0
self.aggr_batch_size = 0
self.seen_inds = defaultdict(int)
self.results = []
def calculate_metrics(self, loss, pred, labels):
pass
def accumulate(self, loss, pred, labels):
labels = labels.astype(int)
for i in range(pred.shape[0]):
probs = pred[i, :].tolist()
vid = labels[i]
self.seen_inds[vid] += 1
if self.seen_inds[vid] > self.num_test_clips:
logger.warning('Video id {} have been seen. Skip.'.format(vid,
))
continue
save_pairs = [vid, probs]
self.results.append(save_pairs)
logger.info("({0} / {1}) videos".format(\
len(self.seen_inds), self.dataset_size))
def finalize_metrics(self):
if self.filename_gt is not None:
evaluate_results(self.results, self.filename_gt, self.dataset_size, \
self.num_classes, self.num_test_clips)
# save temporary file
pkl_path = os.path.join(self.checkpoint_dir, "results_probs.pkl")
with open(pkl_path, 'w') as f:
pickle.dump(self.results, f)
logger.info('Temporary file saved to: {}'.format(pkl_path))
def read_groundtruth(filename_gt):
f = open(filename_gt, 'r')
labels = []
for line in f:
rows = line.split()
labels.append(int(rows[1]))
f.close()
return labels
def evaluate_results(results, filename_gt, test_dataset_size, num_classes,
num_test_clips):
gt_labels = read_groundtruth(filename_gt)
sample_num = test_dataset_size
class_num = num_classes
sample_video_times = num_test_clips
counts = np.zeros(sample_num, dtype=np.int32)
probs = np.zeros((sample_num, class_num))
assert (len(gt_labels) == sample_num)
"""
clip_accuracy: the (e.g.) 10*19761 clips' average accuracy
clip1_accuracy: the 1st clip's accuracy (starting from frame 0)
"""
clip_accuracy = 0
clip1_accuracy = 0
clip1_count = 0
seen_inds = defaultdict(int)
# evaluate
for entry in results:
vid = entry[0]
prob = np.array(entry[1])
probs[vid] += prob[0:class_num]
counts[vid] += 1
idx = prob.argmax()
if idx == gt_labels[vid]:
# clip accuracy
clip_accuracy += 1
# clip1 accuracy
seen_inds[vid] += 1
if seen_inds[vid] == 1:
clip1_count += 1
if idx == gt_labels[vid]:
clip1_accuracy += 1
# sanity checkcnt = 0
max_clips = 0
min_clips = sys.maxsize
count_empty = 0
count_corrupted = 0
for i in range(sample_num):
max_clips = max(max_clips, counts[i])
min_clips = min(min_clips, counts[i])
if counts[i] != sample_video_times:
count_corrupted += 1
logger.warning('Id: {} count: {}'.format(i, counts[i]))
if counts[i] == 0:
count_empty += 1
logger.info('Num of empty videos: {}'.format(count_empty))
logger.info('Num of corrupted videos: {}'.format(count_corrupted))
logger.info('Max num of clips in a video: {}'.format(max_clips))
logger.info('Min num of clips in a video: {}'.format(min_clips))
# clip1 accuracy for sanity (# print clip1 first as it is lowest)
logger.info('Clip1 accuracy: {:.2f} percent ({}/{})'.format(
100. * clip1_accuracy / clip1_count, clip1_accuracy, clip1_count))
# clip accuracy for sanity
logger.info('Clip accuracy: {:.2f} percent ({}/{})'.format(
100. * clip_accuracy / len(results), clip_accuracy, len(results)))
# compute accuracy
accuracy = 0
accuracy_top5 = 0
for i in range(sample_num):
prob = probs[i]
# top-1
idx = prob.argmax()
if idx == gt_labels[i] and counts[i] > 0:
accuracy = accuracy + 1
ids = np.argsort(prob)[::-1]
for j in range(5):
if ids[j] == gt_labels[i] and counts[i] > 0:
accuracy_top5 = accuracy_top5 + 1
break
accuracy = float(accuracy) / float(sample_num)
accuracy_top5 = float(accuracy_top5) / float(sample_num)
logger.info('-' * 80)
logger.info('top-1 accuracy: {:.2f} percent'.format(accuracy * 100))
logger.info('top-5 accuracy: {:.2f} percent'.format(accuracy_top5 * 100))
logger.info('-' * 80)
for i in range(sample_num):
prob = probs[i]
# top-1
idx = prob.argmax()
if idx == gt_labels[i] and counts[i] > 0:
accuracy = accuracy + 1
ids = np.argsort(prob)[::-1]
for j in range(5):
if ids[j] == gt_labels[i] and counts[i] > 0:
accuracy_top5 = accuracy_top5 + 1
break
accuracy = float(accuracy) / float(sample_num)
accuracy_top5 = float(accuracy_top5) / float(sample_num)
logger.info('-' * 80)
logger.info('top-1 accuracy: {:.2f} percent'.format(accuracy * 100))
logger.info('top-5 accuracy: {:.2f} percent'.format(accuracy_top5 * 100))
logger.info('-' * 80)
return
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calculate or keep track of the interpolated average precision.
It provides an interface for calculating interpolated average precision for an
entire list or the top-n ranked items. For the definition of the
(non-)interpolated average precision:
http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf
Example usages:
1) Use it as a static function call to directly calculate average precision for
a short ranked list in the memory.
```
import random
p = np.array([random.random() for _ in xrange(10)])
a = np.array([random.choice([0, 1]) for _ in xrange(10)])
ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a)
```
2) Use it as an object for long ranked list that cannot be stored in memory or
the case where partial predictions can be observed at a time (Tensorflow
predictions). In this case, we first call the function accumulate many times
to process parts of the ranked list. After processing all the parts, we call
peek_interpolated_ap_at_n.
```
p1 = np.array([random.random() for _ in xrange(5)])
a1 = np.array([random.choice([0, 1]) for _ in xrange(5)])
p2 = np.array([random.random() for _ in xrange(5)])
a2 = np.array([random.choice([0, 1]) for _ in xrange(5)])
# interpolated average precision at 10 using 1000 break points
calculator = average_precision_calculator.AveragePrecisionCalculator(10)
calculator.accumulate(p1, a1)
calculator.accumulate(p2, a2)
ap3 = calculator.peek_ap_at_n()
```
"""
import heapq
import random
import numbers
import numpy
class AveragePrecisionCalculator(object):
"""Calculate the average precision and average precision at n."""
def __init__(self, top_n=None):
"""Construct an AveragePrecisionCalculator to calculate average precision.
This class is used to calculate the average precision for a single label.
Args:
top_n: A positive Integer specifying the average precision at n, or
None to use all provided data points.
Raises:
ValueError: An error occurred when the top_n is not a positive integer.
"""
if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None):
raise ValueError("top_n must be a positive integer or None.")
self._top_n = top_n # average precision at n
self._total_positives = 0 # total number of positives have seen
self._heap = [] # max heap of (prediction, actual)
@property
def heap_size(self):
"""Gets the heap size maintained in the class."""
return len(self._heap)
@property
def num_accumulated_positives(self):
"""Gets the number of positive samples that have been accumulated."""
return self._total_positives
def accumulate(self, predictions, actuals, num_positives=None):
"""Accumulate the predictions and their ground truth labels.
After the function call, we may call peek_ap_at_n to actually calculate
the average precision.
Note predictions and actuals must have the same shape.
Args:
predictions: a list storing the prediction scores.
actuals: a list storing the ground truth labels. Any value
larger than 0 will be treated as positives, otherwise as negatives.
num_positives = If the 'predictions' and 'actuals' inputs aren't complete,
then it's possible some true positives were missed in them. In that case,
you can provide 'num_positives' in order to accurately track recall.
Raises:
ValueError: An error occurred when the format of the input is not the
numpy 1-D array or the shape of predictions and actuals does not match.
"""
if len(predictions) != len(actuals):
raise ValueError(
"the shape of predictions and actuals does not match.")
if not num_positives is None:
if not isinstance(num_positives,
numbers.Number) or num_positives < 0:
raise ValueError(
"'num_positives' was provided but it wan't a nonzero number."
)
if not num_positives is None:
self._total_positives += num_positives
else:
self._total_positives += numpy.size(numpy.where(actuals > 0))
topk = self._top_n
heap = self._heap
for i in range(numpy.size(predictions)):
if topk is None or len(heap) < topk:
heapq.heappush(heap, (predictions[i], actuals[i]))
else:
if predictions[i] > heap[0][0]: # heap[0] is the smallest
heapq.heappop(heap)
heapq.heappush(heap, (predictions[i], actuals[i]))
def clear(self):
"""Clear the accumulated predictions."""
self._heap = []
self._total_positives = 0
def peek_ap_at_n(self):
"""Peek the non-interpolated average precision at n.
Returns:
The non-interpolated average precision at n (default 0).
If n is larger than the length of the ranked list,
the average precision will be returned.
"""
if self.heap_size <= 0:
return 0
predlists = numpy.array(list(zip(*self._heap)))
ap = self.ap_at_n(
predlists[0],
predlists[1],
n=self._top_n,
total_num_positives=self._total_positives)
return ap
@staticmethod
def ap(predictions, actuals):
"""Calculate the non-interpolated average precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
actuals: a numpy 1-D array storing the ground truth labels. Any value
larger than 0 will be treated as positives, otherwise as negatives.
Returns:
The non-interpolated average precision at n.
If n is larger than the length of the ranked list,
the average precision will be returned.
Raises:
ValueError: An error occurred when the format of the input is not the
numpy 1-D array or the shape of predictions and actuals does not match.
"""
return AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None)
@staticmethod
def ap_at_n(predictions, actuals, n=20, total_num_positives=None):
"""Calculate the non-interpolated average precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
actuals: a numpy 1-D array storing the ground truth labels. Any value
larger than 0 will be treated as positives, otherwise as negatives.
n: the top n items to be considered in ap@n.
total_num_positives : (optionally) you can specify the number of total
positive
in the list. If specified, it will be used in calculation.
Returns:
The non-interpolated average precision at n.
If n is larger than the length of the ranked list,
the average precision will be returned.
Raises:
ValueError: An error occurred when
1) the format of the input is not the numpy 1-D array;
2) the shape of predictions and actuals does not match;
3) the input n is not a positive integer.
"""
if len(predictions) != len(actuals):
raise ValueError(
"the shape of predictions and actuals does not match.")
if n is not None:
if not isinstance(n, int) or n <= 0:
raise ValueError("n must be 'None' or a positive integer."
" It was '%s'." % n)
ap = 0.0
predictions = numpy.array(predictions)
actuals = numpy.array(actuals)
# add a shuffler to avoid overestimating the ap
predictions, actuals = AveragePrecisionCalculator._shuffle(predictions,
actuals)
sortidx = sorted(
range(len(predictions)), key=lambda k: predictions[k], reverse=True)
if total_num_positives is None:
numpos = numpy.size(numpy.where(actuals > 0))
else:
numpos = total_num_positives
if numpos == 0:
return 0
if n is not None:
numpos = min(numpos, n)
delta_recall = 1.0 / numpos
poscount = 0.0
# calculate the ap
r = len(sortidx)
if n is not None:
r = min(r, n)
for i in range(r):
if actuals[sortidx[i]] > 0:
poscount += 1
ap += poscount / (i + 1) * delta_recall
return ap
@staticmethod
def _shuffle(predictions, actuals):
random.seed(0)
suffidx = random.sample(range(len(predictions)), len(predictions))
predictions = predictions[suffidx]
actuals = actuals[suffidx]
return predictions, actuals
@staticmethod
def _zero_one_normalize(predictions, epsilon=1e-7):
"""Normalize the predictions to the range between 0.0 and 1.0.
For some predictions like SVM predictions, we need to normalize them before
calculate the interpolated average precision. The normalization will not
change the rank in the original list and thus won't change the average
precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
epsilon: a small constant to avoid denominator being zero.
Returns:
The normalized prediction.
"""
denominator = numpy.max(predictions) - numpy.min(predictions)
ret = (predictions - numpy.min(predictions)) / numpy.max(denominator,
epsilon)
return ret
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides functions to help with evaluating models."""
import datetime
import numpy
from . import mean_average_precision_calculator as map_calculator
from . import average_precision_calculator as ap_calculator
def flatten(l):
""" Merges a list of lists into a single list. """
return [item for sublist in l for item in sublist]
def calculate_hit_at_one(predictions, actuals):
"""Performs a local (numpy) calculation of the hit at one.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
Returns:
float: The average hit at one across the entire batch.
"""
top_prediction = numpy.argmax(predictions, 1)
hits = actuals[numpy.arange(actuals.shape[0]), top_prediction]
return numpy.average(hits)
def calculate_precision_at_equal_recall_rate(predictions, actuals):
"""Performs a local (numpy) calculation of the PERR.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
Returns:
float: The average precision at equal recall rate across the entire batch.
"""
aggregated_precision = 0.0
num_videos = actuals.shape[0]
for row in numpy.arange(num_videos):
num_labels = int(numpy.sum(actuals[row]))
top_indices = numpy.argpartition(predictions[row],
-num_labels)[-num_labels:]
item_precision = 0.0
for label_index in top_indices:
if predictions[row][label_index] > 0:
item_precision += actuals[row][label_index]
item_precision /= top_indices.size
aggregated_precision += item_precision
aggregated_precision /= num_videos
return aggregated_precision
def calculate_gap(predictions, actuals, top_k=20):
"""Performs a local (numpy) calculation of the global average precision.
Only the top_k predictions are taken for each of the videos.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
top_k: How many predictions to use per video.
Returns:
float: The global average precision.
"""
gap_calculator = ap_calculator.AveragePrecisionCalculator()
sparse_predictions, sparse_labels, num_positives = top_k_by_class(
predictions, actuals, top_k)
gap_calculator.accumulate(
flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
return gap_calculator.peek_ap_at_n()
def top_k_by_class(predictions, labels, k=20):
"""Extracts the top k predictions for each video, sorted by class.
Args:
predictions: A numpy matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
k: the top k non-zero entries to preserve in each prediction.
Returns:
A tuple (predictions,labels, true_positives). 'predictions' and 'labels'
are lists of lists of floats. 'true_positives' is a list of scalars. The
length of the lists are equal to the number of classes. The entries in the
predictions variable are probability predictions, and
the corresponding entries in the labels variable are the ground truth for
those predictions. The entries in 'true_positives' are the number of true
positives for each class in the ground truth.
Raises:
ValueError: An error occurred when the k is not a positive integer.
"""
if k <= 0:
raise ValueError("k must be a positive integer.")
k = min(k, predictions.shape[1])
num_classes = predictions.shape[1]
prediction_triplets = []
for video_index in range(predictions.shape[0]):
prediction_triplets.extend(
top_k_triplets(predictions[video_index], labels[video_index], k))
out_predictions = [[] for v in range(num_classes)]
out_labels = [[] for v in range(num_classes)]
for triplet in prediction_triplets:
out_predictions[triplet[0]].append(triplet[1])
out_labels[triplet[0]].append(triplet[2])
out_true_positives = [numpy.sum(labels[:, i]) for i in range(num_classes)]
return out_predictions, out_labels, out_true_positives
def top_k_triplets(predictions, labels, k=20):
"""Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
(prediction, class) format"""
m = len(predictions)
k = min(k, m)
indices = numpy.argpartition(predictions, -k)[-k:]
return [(index, predictions[index], labels[index]) for index in indices]
class EvaluationMetrics(object):
"""A class to store the evaluation metrics."""
def __init__(self, num_class, top_k):
"""Construct an EvaluationMetrics object to store the evaluation metrics.
Args:
num_class: A positive integer specifying the number of classes.
top_k: A positive integer specifying how many predictions are considered per video.
Raises:
ValueError: An error occurred when MeanAveragePrecisionCalculator cannot
not be constructed.
"""
self.sum_hit_at_one = 0.0
self.sum_perr = 0.0
self.sum_loss = 0.0
self.map_calculator = map_calculator.MeanAveragePrecisionCalculator(
num_class)
self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator()
self.top_k = top_k
self.num_examples = 0
#def accumulate(self, predictions, labels, loss):
def accumulate(self, loss, predictions, labels):
"""Accumulate the metrics calculated locally for this mini-batch.
Args:
predictions: A numpy matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
labels: A numpy matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
loss: A numpy array containing the loss for each sample.
Returns:
dictionary: A dictionary storing the metrics for the mini-batch.
Raises:
ValueError: An error occurred when the shape of predictions and actuals
does not match.
"""
batch_size = labels.shape[0]
mean_hit_at_one = calculate_hit_at_one(predictions, labels)
mean_perr = calculate_precision_at_equal_recall_rate(predictions,
labels)
mean_loss = numpy.mean(loss)
# Take the top 20 predictions.
sparse_predictions, sparse_labels, num_positives = top_k_by_class(
predictions, labels, self.top_k)
self.map_calculator.accumulate(sparse_predictions, sparse_labels,
num_positives)
self.global_ap_calculator.accumulate(
flatten(sparse_predictions),
flatten(sparse_labels), sum(num_positives))
self.num_examples += batch_size
self.sum_hit_at_one += mean_hit_at_one * batch_size
self.sum_perr += mean_perr * batch_size
self.sum_loss += mean_loss * batch_size
return {
"hit_at_one": mean_hit_at_one,
"perr": mean_perr,
"loss": mean_loss
}
def get(self):
"""Calculate the evaluation metrics for the whole epoch.
Raises:
ValueError: If no examples were accumulated.
Returns:
dictionary: a dictionary storing the evaluation metrics for the epoch. The
dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and
aps (default nan).
"""
if self.num_examples <= 0:
raise ValueError("total_sample must be positive.")
avg_hit_at_one = self.sum_hit_at_one / self.num_examples
avg_perr = self.sum_perr / self.num_examples
avg_loss = self.sum_loss / self.num_examples
aps = self.map_calculator.peek_map_at_n()
gap = self.global_ap_calculator.peek_ap_at_n()
epoch_info_dict = {}
return {
"avg_hit_at_one": avg_hit_at_one,
"avg_perr": avg_perr,
"avg_loss": avg_loss,
"aps": aps,
"gap": gap
}
def clear(self):
"""Clear the evaluation metrics and reset the EvaluationMetrics object."""
self.sum_hit_at_one = 0.0
self.sum_perr = 0.0
self.sum_loss = 0.0
self.map_calculator.clear()
self.global_ap_calculator.clear()
self.num_examples = 0
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calculate the mean average precision.
It provides an interface for calculating mean average precision
for an entire list or the top-n ranked items.
Example usages:
We first call the function accumulate many times to process parts of the ranked
list. After processing all the parts, we call peek_map_at_n
to calculate the mean average precision.
```
import random
p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)])
a = np.array([[random.choice([0, 1]) for _ in xrange(50)]
for _ in xrange(1000)])
# mean average precision for 50 classes.
calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator(
num_class=50)
calculator.accumulate(p, a)
aps = calculator.peek_map_at_n()
```
"""
import numpy
from . import average_precision_calculator
class MeanAveragePrecisionCalculator(object):
"""This class is to calculate mean average precision.
"""
def __init__(self, num_class):
"""Construct a calculator to calculate the (macro) average precision.
Args:
num_class: A positive Integer specifying the number of classes.
top_n_array: A list of positive integers specifying the top n for each
class. The top n in each class will be used to calculate its average
precision at n.
The size of the array must be num_class.
Raises:
ValueError: An error occurred when num_class is not a positive integer;
or the top_n_array is not a list of positive integers.
"""
if not isinstance(num_class, int) or num_class <= 1:
raise ValueError("num_class must be a positive integer.")
self._ap_calculators = [] # member of AveragePrecisionCalculator
self._num_class = num_class # total number of classes
for i in range(num_class):
self._ap_calculators.append(
average_precision_calculator.AveragePrecisionCalculator())
def accumulate(self, predictions, actuals, num_positives=None):
"""Accumulate the predictions and their ground truth labels.
Args:
predictions: A list of lists storing the prediction scores. The outer
dimension corresponds to classes.
actuals: A list of lists storing the ground truth labels. The dimensions
should correspond to the predictions input. Any value
larger than 0 will be treated as positives, otherwise as negatives.
num_positives: If provided, it is a list of numbers representing the
number of true positives for each class. If not provided, the number of
true positives will be inferred from the 'actuals' array.
Raises:
ValueError: An error occurred when the shape of predictions and actuals
does not match.
"""
if not num_positives:
num_positives = [None for i in predictions.shape[1]]
calculators = self._ap_calculators
for i in range(len(predictions)):
calculators[i].accumulate(predictions[i], actuals[i],
num_positives[i])
def clear(self):
for calculator in self._ap_calculators:
calculator.clear()
def is_empty(self):
return ([calculator.heap_size for calculator in self._ap_calculators] ==
[0 for _ in range(self._num_class)])
def peek_map_at_n(self):
"""Peek the non-interpolated mean average precision at n.
Returns:
An array of non-interpolated average precision at n (default 0) for each
class.
"""
aps = [
self._ap_calculators[i].peek_ap_at_n()
for i in range(self._num_class)
]
return aps
from .model import regist_model, get_model
from .attention_cluster import AttentionCluster
from .nextvlad import NEXTVLAD
from .tsn import TSN
from .stnet import STNET
from .attention_lstm import AttentionLSTM
# regist models
regist_model("AttentionCluster", AttentionCluster)
regist_model("NEXTVLAD", NEXTVLAD)
regist_model("TSN", TSN)
regist_model("STNET", STNET)
regist_model("AttentionLSTM", AttentionLSTM)
from __future__ import absolute_import
from .attention_cluster import *
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
from ..model import ModelBase
from .shifting_attention import ShiftingAttentionModel
from .logistic_model import LogisticModel
__all__ = ["AttentionCluster"]
class AttentionCluster(ModelBase):
def __init__(self, name, cfg, mode='train', args=None):
super(AttentionCluster, self).__init__(name, cfg, mode, args)
self.get_config()
def get_config(self):
# get model configs
self.feature_num = self.cfg.MODEL.feature_num
self.feature_names = self.cfg.MODEL.feature_names
self.feature_dims = self.cfg.MODEL.feature_dims
self.cluster_nums = self.cfg.MODEL.cluster_nums
self.seg_num = self.cfg.MODEL.seg_num
self.class_num = self.cfg.MODEL.class_num
self.drop_rate = self.cfg.MODEL.drop_rate
# get mode configs
self.batch_size = self.get_config_from_sec(self.mode, 'batch_size', 1)
self.use_gpu = self.get_config_from_sec(self.mode, 'use_gpu', True)
self.gpu_num = self.get_config_from_sec(self.mode, 'gpu_num', 1)
if self.mode == 'train':
self.learning_rate = self.get_config_from_sec('train',
'learning_rate', 1e-3)
def build_input(self, use_pyreader):
if use_pyreader:
assert self.mode != 'infer', \
'pyreader is not recommendated when infer, please set use_pyreader to be false.'
shapes = []
for dim in self.feature_dims:
shapes.append([-1, self.seg_num, dim])
shapes.append([-1, self.class_num]) # label
self.py_reader = fluid.layers.py_reader(
capacity=1024,
shapes=shapes,
lod_levels=[0] * (self.feature_num + 1),
dtypes=['float32'] * (self.feature_num + 1),
name='train_py_reader'
if self.is_training else 'test_py_reader',
use_double_buffer=True)
inputs = fluid.layers.read_file(self.py_reader)
self.feature_input = inputs[:self.feature_num]
self.label_input = inputs[-1]
else:
self.feature_input = []
for name, dim in zip(self.feature_names, self.feature_dims):
self.feature_input.append(
fluid.layers.data(
shape=[self.seg_num, dim], dtype='float32', name=name))
if self.mode == 'infer':
self.label_input = None
else:
self.label_input = fluid.layers.data(
shape=[self.class_num], dtype='float32', name='label')
def build_model(self):
att_outs = []
for i, (input_dim, cluster_num, feature) in enumerate(
zip(self.feature_dims, self.cluster_nums, self.feature_input)):
att = ShiftingAttentionModel(input_dim, self.seg_num, cluster_num,
"satt{}".format(i))
att_out = att.forward(feature)
att_outs.append(att_out)
out = fluid.layers.concat(att_outs, axis=1)
if self.drop_rate > 0.:
out = fluid.layers.dropout(
out, self.drop_rate, is_test=(not self.is_training))
fc1 = fluid.layers.fc(
out,
size=1024,
act='tanh',
param_attr=ParamAttr(
name="fc1.weights",
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=ParamAttr(
name="fc1.bias", initializer=fluid.initializer.MSRA()))
fc2 = fluid.layers.fc(
fc1,
size=4096,
act='tanh',
param_attr=ParamAttr(
name="fc2.weights",
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=ParamAttr(
name="fc2.bias", initializer=fluid.initializer.MSRA()))
aggregate_model = LogisticModel()
self.output, self.logit = aggregate_model.build_model(
model_input=fc2,
vocab_size=self.class_num,
is_training=self.is_training)
def optimizer(self):
assert self.mode == 'train', "optimizer only can be get in train mode"
return fluid.optimizer.AdamOptimizer(self.learning_rate)
def loss(self):
assert self.mode != 'infer', "invalid loss calculationg in infer mode"
cost = fluid.layers.sigmoid_cross_entropy_with_logits(
x=self.logit, label=self.label_input)
cost = fluid.layers.reduce_sum(cost, dim=-1)
self.loss_ = fluid.layers.mean(x=cost)
return self.loss_
def outputs(self):
return [self.output, self.logit]
def feeds(self):
return self.feature_input if self.mode == 'infer' else self.feature_input + [
self.label_input
]
def weights_info(self):
return (
"attention_cluster_youtube8m",
"https://paddlemodels.bj.bcebos.com/video_classification/attention_cluster_youtube8m.tar.gz"
)
def create_dataset_args(self):
dataset_args = {}
dataset_args['num_classes'] = self.class_num
dataset_args['seg_num'] = self.seg_num
dataset_args['list'] = self.get_config_from_sec(self.mode, 'filelist')
if self.use_gpu and self.py_reader:
dataset_args['batch_size'] = int(self.batch_size / self.gpu_num)
else:
dataset_args['batch_size'] = self.batch_size
return dataset_args
def create_metrics_args(self):
metrics_args = {}
metrics_args['num_classes'] = self.class_num
metrics_args['topk'] = 20
return metrics_args
import paddle
import paddle.fluid as fluid
class LogisticModel(object):
"""Logistic model with L2 regularization."""
def build_model(self,
model_input,
vocab_size,
l2_penalty=None,
**unused_params):
"""Creates a logistic model.
Args:
model_input: 'batch' x 'num_features' matrix of input features.
vocab_size: The number of classes in the dataset.
Returns:
A dictionary with a tensor containing the probability predictions of the
model in the 'predictions' key. The dimensions of the tensor are
batch_size x num_classes."""
logit = fluid.layers.fc(
input=model_input,
size=vocab_size,
act=None,
name='logits_clf',
param_attr=fluid.ParamAttr(
name='logistic.weights',
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=fluid.ParamAttr(
name='logistic.bias',
initializer=fluid.initializer.MSRA(uniform=False)))
output = fluid.layers.sigmoid(logit)
return output, logit
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
import numpy as np
class ShiftingAttentionModel(object):
"""Shifting Attention Model"""
def __init__(self, input_dim, seg_num, n_att, name):
self.n_att = n_att
self.input_dim = input_dim
self.seg_num = seg_num
self.name = name
self.gnorm = np.sqrt(n_att)
def softmax_m1(self, x):
x_shape = fluid.layers.shape(x)
x_shape.stop_gradient = True
flat_x = fluid.layers.reshape(x, shape=(-1, self.seg_num))
flat_softmax = fluid.layers.softmax(flat_x)
# return fluid.layers.reshape(flat_softmax, shape=(x.shape[0], self.n_att, -1), actual_shape=x_shape)
return fluid.layers.reshape(
flat_softmax, shape=x.shape, actual_shape=x_shape)
def glorot(self, n):
return np.sqrt(1.0 / np.sqrt(n))
def forward(self, x):
"""Forward shifting attention model.
Args:
x: input features in shape of [N, L, F].
Returns:
out: output features in shape of [N, F * C]
"""
trans_x = fluid.layers.transpose(x, perm=[0, 2, 1])
# scores and weight in shape [N, C, L], sum(weights, -1) = 1
trans_x = fluid.layers.unsqueeze(trans_x, [-1])
scores = fluid.layers.conv2d(
trans_x,
self.n_att,
filter_size=1,
param_attr=ParamAttr(
name=self.name + ".conv.weight",
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=ParamAttr(
name=self.name + ".conv.bias",
initializer=fluid.initializer.MSRA()))
scores = fluid.layers.squeeze(scores, [-1])
weights = self.softmax_m1(scores)
glrt = self.glorot(self.n_att)
self.w = fluid.layers.create_parameter(
shape=(self.n_att, ),
dtype=x.dtype,
attr=ParamAttr(self.name + ".shift_w"),
default_initializer=fluid.initializer.Normal(0.0, glrt))
self.b = fluid.layers.create_parameter(
shape=(self.n_att, ),
dtype=x.dtype,
attr=ParamAttr(name=self.name + ".shift_b"),
default_initializer=fluid.initializer.Normal(0.0, glrt))
outs = []
for i in range(self.n_att):
# slice weight and expand to shape [N, L, C]
weight = fluid.layers.slice(
weights, axes=[1], starts=[i], ends=[i + 1])
weight = fluid.layers.transpose(weight, perm=[0, 2, 1])
weight = fluid.layers.expand(weight, [1, 1, self.input_dim])
w_i = fluid.layers.slice(self.w, axes=[0], starts=[i], ends=[i + 1])
b_i = fluid.layers.slice(self.b, axes=[0], starts=[i], ends=[i + 1])
shift = fluid.layers.reduce_sum(x * weight, dim=1) * w_i + b_i
l2_norm = fluid.layers.l2_normalize(shift, axis=-1)
outs.append(l2_norm / self.gnorm)
out = fluid.layers.concat(outs, axis=1)
return out
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
from ..model import ModelBase
from .lstm_attention import LSTMAttentionModel
__all__ = ["AttentionLSTM"]
class AttentionLSTM(ModelBase):
def __init__(self, name, cfg, mode='train', args=None):
super(AttentionLSTM, self).__init__(name, cfg, mode, args=args)
self.get_config()
def get_config(self):
# get model configs
self.feature_num = self.cfg.MODEL.feature_num
self.feature_names = self.cfg.MODEL.feature_names
self.feature_dims = self.cfg.MODEL.feature_dims
self.class_num = self.cfg.MODEL.class_num
self.embedding_size = self.cfg.MODEL.embedding_size
self.lstm_size = self.cfg.MODEL.lstm_size
# get mode configs
self.batch_size = self.get_config_from_sec(self.mode, 'batch_size', 1)
self.use_gpu = self.get_config_from_sec(self.mode, 'use_gpu', False)
self.gpu_num = self.get_config_from_sec(self.mode, 'gpu_num', 1)
if self.mode == 'train':
self.learning_rate = self.get_config_from_sec('train',
'learning_rate', 1e-3)
self.weight_decay = self.get_config_from_sec('train',
'weight_decay', 8e-4)
self.num_samples = self.get_config_from_sec('train', 'num_samples',
5000000)
self.decay_epochs = self.get_config_from_sec('train',
'decay_epochs', [5])
self.decay_gamma = self.get_config_from_sec('train', 'decay_gamma',
0.1)
def build_input(self, use_pyreader):
if use_pyreader:
assert self.mode != 'infer', \
'pyreader is not recommendated when infer, please set use_pyreader to be false.'
shapes = []
for dim in self.feature_dims:
shapes.append([-1, dim])
shapes.append([-1, self.class_num]) # label
self.py_reader = fluid.layers.py_reader(
capacity=1024,
shapes=shapes,
lod_levels=[1] * self.feature_num + [0],
dtypes=['float32'] * (self.feature_num + 1),
name='train_py_reader'
if self.is_training else 'test_py_reader',
use_double_buffer=True)
inputs = fluid.layers.read_file(self.py_reader)
self.feature_input = inputs[:self.feature_num]
self.label_input = inputs[-1]
else:
self.feature_input = []
for name, dim in zip(self.feature_names, self.feature_dims):
self.feature_input.append(
fluid.layers.data(
shape=[dim], lod_level=1, dtype='float32', name=name))
if self.mode == 'infer':
self.label_input = None
else:
self.label_input = fluid.layers.data(
shape=[self.class_num], dtype='float32', name='label')
def build_model(self):
att_outs = []
for i, (input_dim, feature
) in enumerate(zip(self.feature_dims, self.feature_input)):
att = LSTMAttentionModel(input_dim, self.embedding_size,
self.lstm_size)
att_out = att.forward(feature, is_training=(self.mode == 'train'))
att_outs.append(att_out)
out = fluid.layers.concat(att_outs, axis=1)
fc1 = fluid.layers.fc(
input=out,
size=8192,
act='relu',
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)))
fc2 = fluid.layers.fc(
input=fc1,
size=4096,
act='tanh',
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)))
self.logit = fluid.layers.fc(input=fc2, size=self.class_num, act=None, \
bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)))
self.output = fluid.layers.sigmoid(self.logit)
def optimizer(self):
assert self.mode == 'train', "optimizer only can be get in train mode"
values = [
self.learning_rate * (self.decay_gamma**i)
for i in range(len(self.decay_epochs) + 1)
]
iter_per_epoch = self.num_samples / self.batch_size
boundaries = [e * iter_per_epoch for e in self.decay_epochs]
return fluid.optimizer.RMSProp(
learning_rate=fluid.layers.piecewise_decay(
values=values, boundaries=boundaries),
centered=True,
regularization=fluid.regularizer.L2Decay(self.weight_decay))
def loss(self):
assert self.mode != 'infer', "invalid loss calculationg in infer mode"
cost = fluid.layers.sigmoid_cross_entropy_with_logits(
x=self.logit, label=self.label_input)
cost = fluid.layers.reduce_sum(cost, dim=-1)
sum_cost = fluid.layers.reduce_sum(cost)
self.loss_ = fluid.layers.scale(
sum_cost, scale=self.gpu_num, bias_after_scale=False)
return self.loss_
def outputs(self):
return [self.output, self.logit]
def feeds(self):
return self.feature_input if self.mode == 'infer' else self.feature_input + [
self.label_input
]
def weights_info(self):
return (None, None)
def create_dataset_args(self):
dataset_args = {}
dataset_args['num_classes'] = self.class_num
dataset_args['list'] = self.get_config_from_sec(self.mode, 'filelist')
if self.use_gpu and self.py_reader:
dataset_args['batch_size'] = int(self.batch_size / self.gpu_num)
else:
dataset_args['batch_size'] = self.batch_size
return dataset_args
def create_metrics_args(self):
metrics_args = {}
metrics_args['num_classes'] = self.class_num
metrics_args['topk'] = 20
return metrics_args
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
import numpy as np
class LSTMAttentionModel(object):
"""LSTM Attention Model"""
def __init__(self,
bias_attr,
embedding_size=512,
lstm_size=1024,
drop_rate=0.5):
self.lstm_size = lstm_size
self.embedding_size = embedding_size
self.bias_attr = ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0))
def forward(self, input, is_training):
input_fc = fluid.layers.fc(
input=input,
size=self.embedding_size,
act='tanh',
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)))
lstm_forward_fc = fluid.layers.fc(
input=input_fc,
size=self.lstm_size * 4,
act=None,
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)))
lstm_forward, _ = fluid.layers.dynamic_lstm(
input=lstm_forward_fc, size=self.lstm_size * 4, is_reverse=False)
lsmt_backward_fc = fluid.layers.fc(
input=input_fc,
size=self.lstm_size * 4,
act=None,
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)))
lstm_backward, _ = fluid.layers.dynamic_lstm(
input=lsmt_backward_fc, size=self.lstm_size * 4, is_reverse=True)
lstm_concat = fluid.layers.concat(
input=[lstm_forward, lstm_backward], axis=1)
lstm_dropout = fluid.layers.dropout(
x=lstm_concat, dropout_prob=0.5, is_test=(not is_training))
lstm_weight = fluid.layers.fc(
input=lstm_dropout,
size=1,
act='sequence_softmax',
bias_attr=ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0)))
scaled = fluid.layers.elementwise_mul(
x=lstm_dropout, y=lstm_weight, axis=0)
lstm_pool = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return lstm_pool
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
try:
from configparser import ConfigParser
except:
from ConfigParser import ConfigParser
import paddle.fluid as fluid
from datareader import get_reader
from metrics import get_metrics
from .utils import download, AttrDict
WEIGHT_DIR = os.path.expanduser("~/.paddle/weights")
class NotImplementError(Exception):
"Error: model function not implement"
def __init__(self, model, function):
super(NotImplementError, self).__init__()
self.model = model.__class__.__name__
self.function = function.__name__
def __str__(self):
return "Function {}() is not implemented in model {}".format(
self.function, self.model)
class ModelNotFoundError(Exception):
"Error: model not found"
def __init__(self, model_name, avail_models):
super(ModelNotFoundError, self).__init__()
self.model_name = model_name
self.avail_models = avail_models
def __str__(self):
msg = "Model {} Not Found.\nAvailiable models:\n".format(
self.model_name)
for model in self.avail_models:
msg += " {}\n".format(model)
return msg
class ModelConfig(object):
def __init__(self, cfg_file):
self.cfg_file = cfg_file
self.parser = ConfigParser()
self.cfg = AttrDict()
def parse(self):
self.parser.read(self.cfg_file)
for sec in self.parser.sections():
sec_dict = AttrDict()
for k, v in self.parser.items(sec):
try:
v = eval(v)
except:
pass
setattr(sec_dict, k, v)
setattr(self.cfg, sec.upper(), sec_dict)
def merge_configs(self, sec, cfg_dict):
sec_dict = getattr(self.cfg, sec.upper())
for k, v in cfg_dict.items():
if v is None:
continue
try:
if hasattr(sec_dict, k):
setattr(sec_dict, k, v)
except:
pass
def get_config_from_sec(self, sec, item):
try:
if hasattr(self.cfg, sec):
sec_dict = getattr(self.cfg, sec)
except:
return None
try:
if hasattr(sec_dict, item):
return getattr(sec_dict, item)
except:
return None
def get_configs(self):
return self.cfg
class ModelBase(object):
def __init__(self, name, cfg, mode='train', args=None):
assert mode in ['train', 'valid', 'test', 'infer'], \
"Unknown mode type {}".format(mode)
self.name = name
self.is_training = (mode == 'train')
self.mode = mode
self.py_reader = None
# parse config
assert os.path.exists(cfg), \
"Config file {} not exists".format(cfg)
self._config = ModelConfig(cfg)
self._config.parse()
if args and isinstance(args, dict):
self._config.merge_configs(mode, args)
self.cfg = self._config.get_configs()
def build_model(self):
"build model struct"
raise NotImplementError(self, self.build_model)
def build_input(self, use_pyreader):
"build input Variable"
raise NotImplementError(self, self.build_input)
def optimizer(self):
"get model optimizer"
raise NotImplementError(self, self.optimizer)
def outputs():
"get output variable"
raise notimplementerror(self, self.outputs)
def loss(self):
"get loss variable"
raise notimplementerror(self, self.loss)
def feeds(self):
"get feed inputs list"
raise NotImplementError(self, self.feeds)
def create_dataset_args(self):
"get model reader"
raise NotImplementError(self, self.create_dataset_args)
def reader(self):
dataset_args = self.create_dataset_args()
return get_reader(self.name.upper(), self.mode, **dataset_args)
def create_metrics_args(self):
"get model reader"
raise NotImplementError(self, self.create_metrics_args)
def metrics(self):
metrics_args = self.create_metrics_args()
return get_metrics(self.name.upper(), self.mode, **metrics_args)
def weights_info(self):
"get model weight default path and download url"
raise NotImplementError(self, self.weights_info)
def get_weights(self, logger=None):
"get model weight file path, download weight from Paddle if not exist"
path, url = self.weights_info()
path = os.path.join(WEIGHT_DIR, path)
if os.path.exists(path):
return path
if logger:
logger.info("Download weights of {} from {}".format(self.name, url))
download(url, path)
return path
def pyreader(self):
return self.py_reader
def epoch_num(self):
"get train epoch num"
return self.cfg.TRAIN.epoch
def pretrain_info(self):
"get pretrain base model directory"
return (None, None)
def get_pretrain_weights(self, logger=None):
"get model weight file path, download weight from Paddle if not exist"
path, url = self.pretrain_info()
if not path:
return None
path = os.path.join(WEIGHT_DIR, path)
if os.path.exists(path):
return path
if logger:
logger.info("Download pretrain weights of {} from {}".format(
self.name, url))
utils.download(url, path)
return path
def load_pretrained_params(self, exe, pretrained_base, prog, place):
def if_exist(var):
return os.path.exists(os.path.join(pretrained_base, var.name))
inference_program = prog.clone(for_test=True)
fluid.io.load_vars(
exe,
pretrained_base,
predicate=if_exist,
main_program=inference_program)
def get_config_from_sec(self, sec, item, default=None):
cfg_item = self._config.get_config_from_sec(sec.upper(),
item) or default
return cfg_item
class ModelZoo(object):
def __init__(self):
self.model_zoo = {}
def regist(self, name, model):
assert model.__base__ == ModelBase, "Unknow model type {}".format(
type(model))
self.model_zoo[name] = model
def get(self, name, cfg, mode='train', args=None):
for k, v in self.model_zoo.items():
if k == name:
return v(name, cfg, mode, args)
raise ModelNotFoundError(name, self.model_zoo.keys())
# singleton model_zoo
model_zoo = ModelZoo()
def regist_model(name, model):
model_zoo.regist(name, model)
def get_model(name, cfg, mode='train', args=None):
return model_zoo.get(name, cfg, mode, args)
if __name__ == "__main__":
class TestModel(ModelBase):
pass
model_zoo.regist('test', TestModel)
m = model_zoo.get('test', './config.txt')
print(m.get_train_config('batch_size'))
m.build_model()
m = model_zoo.get('test2', './config.txt')
from __future__ import absolute_import
from .nextvlad import *
import paddle
import paddle.fluid as fluid
class LogisticModel(object):
"""Logistic model with L2 regularization."""
def create_model(self,
model_input,
vocab_size,
l2_penalty=None,
**unused_params):
"""Creates a logistic model.
Args:
model_input: 'batch' x 'num_features' matrix of input features.
vocab_size: The number of classes in the dataset.
Returns:
A dictionary with a tensor containing the probability predictions of the
model in the 'predictions' key. The dimensions of the tensor are
batch_size x num_classes."""
logits = fluid.layers.fc(
input=model_input,
size=vocab_size,
act=None,
name='logits_clf',
param_attr=fluid.ParamAttr(
name='logits_clf_weights',
initializer=fluid.initializer.MSRA(uniform=False),
regularizer=fluid.regularizer.L2DecayRegularizer(l2_penalty)),
bias_attr=fluid.ParamAttr(
name='logits_clf_bias',
regularizer=fluid.regularizer.L2DecayRegularizer(l2_penalty)))
output = fluid.layers.sigmoid(logits)
return {'predictions': output, 'logits': logits}
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
from ..model import ModelBase
from .clf_model import LogisticModel
from . import nextvlad_model
__all__ = ["NEXTVLAD"]
class NEXTVLAD(ModelBase):
def __init__(self, name, cfg, mode='train', args=None):
super(NEXTVLAD, self).__init__(name, cfg, mode=mode, args=args)
self.get_config()
def get_config(self):
# model params
self.num_classes = self.get_config_from_sec('model', 'num_classes')
self.video_feature_size = self.get_config_from_sec('model',
'video_feature_size')
self.audio_feature_size = self.get_config_from_sec('model',
'audio_feature_size')
self.cluster_size = self.get_config_from_sec('model', 'cluster_size')
self.hidden_size = self.get_config_from_sec('model', 'hidden_size')
self.groups = self.get_config_from_sec('model', 'groups')
self.expansion = self.get_config_from_sec('model', 'expansion')
self.drop_rate = self.get_config_from_sec('model', 'drop_rate')
self.gating_reduction = self.get_config_from_sec('model',
'gating_reduction')
self.eigen_file = self.get_config_from_sec('model', 'eigen_file')
# training params
self.base_learning_rate = self.get_config_from_sec('train',
'learning_rate')
self.lr_boundary_examples = self.get_config_from_sec(
'train', 'lr_boundary_examples')
self.max_iter = self.get_config_from_sec('train', 'max_iter')
self.learning_rate_decay = self.get_config_from_sec(
'train', 'learning_rate_decay')
self.l2_penalty = self.get_config_from_sec('train', 'l2_penalty')
self.gradient_clip_norm = self.get_config_from_sec('train',
'gradient_clip_norm')
self.use_gpu = self.get_config_from_sec('train', 'use_gpu')
self.num_gpus = self.get_config_from_sec('train', 'num_gpus')
# other params
self.batch_size = self.get_config_from_sec(self.mode, 'batch_size')
self.list = self.get_config_from_sec(self.mode, 'filelist')
def build_input(self, use_pyreader=True):
rgb_shape = [self.video_feature_size]
audio_shape = [self.audio_feature_size]
label_shape = [self.num_classes]
if use_pyreader:
assert self.mode != 'infer', \
'pyreader is not recommendated when infer, please set use_pyreader to be false.'
py_reader = fluid.layers.py_reader(
capacity=100,
shapes=[[-1] + rgb_shape, [-1] + audio_shape,
[-1] + label_shape],
lod_levels=[1, 1, 0],
dtypes=['float32', 'float32', 'float32'],
name='train_py_reader'
if self.is_training else 'test_py_reader',
use_double_buffer=True)
rgb, audio, label = fluid.layers.read_file(py_reader)
self.py_reader = py_reader
else:
rgb = fluid.layers.data(
name='train_rgb' if self.is_training else 'test_rgb',
shape=rgb_shape,
dtype='float32',
lod_level=1)
audio = fluid.layers.data(
name='train_audio' if self.is_training else 'test_audio',
shape=audio_shape,
dtype='float32',
lod_level=1)
if self.mode == 'infer':
label = None
else:
label = fluid.layers.data(
name='train_label' if self.is_training else 'test_label',
shape=label_shape,
dtype='float32')
self.feature_input = [rgb, audio]
self.label_input = label
def create_model_args(self):
model_args = {}
model_args['class_dim'] = self.num_classes
model_args['cluster_size'] = self.cluster_size
model_args['hidden_size'] = self.hidden_size
model_args['groups'] = self.groups
model_args['expansion'] = self.expansion
model_args['drop_rate'] = self.drop_rate
model_args['gating_reduction'] = self.gating_reduction
model_args['l2_penalty'] = self.l2_penalty
return model_args
def build_model(self):
model_args = self.create_model_args()
videomodel = nextvlad_model.NeXtVLADModel()
rgb = self.feature_input[0]
audio = self.feature_input[1]
out = videomodel.create_model(
rgb, audio, is_training=(self.mode == 'train'), **model_args)
self.logits = out['logits']
self.predictions = out['predictions']
self.network_outputs = [out['predictions']]
def optimizer(self):
assert self.mode == 'train', "optimizer only can be get in train mode"
im_per_batch = self.batch_size
lr_bounds, lr_values = get_learning_rate_decay_list(
self.base_learning_rate, self.learning_rate_decay, self.max_iter,
self.lr_boundary_examples, im_per_batch)
return fluid.optimizer.AdamOptimizer(
learning_rate=fluid.layers.piecewise_decay(
boundaries=lr_bounds, values=lr_values))
def loss(self):
assert self.mode != 'infer', "invalid loss calculationg in infer mode"
cost = fluid.layers.sigmoid_cross_entropy_with_logits(
x=self.logits, label=self.label_input)
cost = fluid.layers.reduce_sum(cost, dim=-1)
self.loss_ = fluid.layers.mean(x=cost)
return self.loss_
def outputs(self):
return self.network_outputs
def feeds(self):
return self.feature_input if self.mode == 'infer' else self.feature_input + [
self.label_input
]
def create_dataset_args(self):
dataset_args = {}
dataset_args['num_classes'] = self.num_classes
if self.use_gpu and self.py_reader:
dataset_args['batch_size'] = int(self.batch_size / self.num_gpus)
else:
dataset_args['batch_size'] = self.batch_size
dataset_args['list'] = self.list
dataset_args['eigen_file'] = self.eigen_file
return dataset_args
def create_metrics_args(self):
metrics_args = {}
metrics_args['num_classes'] = self.num_classes
metrics_args['topk'] = 20
return metrics_args
def get_learning_rate_decay_list(base_learning_rate, decay, max_iter,
decay_examples, total_batch_size):
decay_step = decay_examples // total_batch_size
lr_bounds = []
lr_values = [base_learning_rate]
i = 1
while True:
if i * decay_step >= max_iter:
break
lr_bounds.append(i * decay_step)
lr_values.append(base_learning_rate * (decay**i))
i += 1
return lr_bounds, lr_values
import numpy as np
import paddle
import paddle.fluid as fluid
from . import clf_model
class NeXtVLAD(object):
"""
This is a paddlepaddle implementation of the NeXtVLAD model. For more
information, please refer to the paper,
https://static.googleusercontent.com/media/research.google.com/zh-CN//youtube8m/workshop2018/p_c03.pdf
"""
def __init__(self,
feature_size,
cluster_size,
is_training=True,
expansion=2,
groups=None,
inputname='video'):
self.feature_size = feature_size
self.cluster_size = cluster_size
self.is_training = is_training
self.expansion = expansion
self.groups = groups
self.name = inputname + '_'
def forward(self, input):
input = fluid.layers.fc(
input=input,
size=self.expansion * self.feature_size,
act=None,
name=self.name + 'fc_expansion',
param_attr=fluid.ParamAttr(
name=self.name + 'fc_expansion_w',
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=fluid.ParamAttr(
name=self.name + 'fc_expansion_b',
initializer=fluid.initializer.Constant(value=0.)))
# attention factor of per group
attention = fluid.layers.fc(
input=input,
size=self.groups,
act='sigmoid',
name=self.name + 'fc_group_attention',
param_attr=fluid.ParamAttr(
name=self.name + 'fc_group_attention_w',
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=fluid.ParamAttr(
name=self.name + 'fc_group_attention_b',
initializer=fluid.initializer.Constant(value=0.)))
# calculate activation factor of per group per cluster
feature_size = self.feature_size * self.expansion // self.groups
cluster_weights = fluid.layers.create_parameter(
shape=[
self.expansion * self.feature_size,
self.groups * self.cluster_size
],
dtype=input.dtype,
attr=fluid.ParamAttr(name=self.name + 'cluster_weights'),
default_initializer=fluid.initializer.MSRA(uniform=False))
activation = fluid.layers.matmul(input, cluster_weights)
activation = fluid.layers.batch_norm(
activation, is_test=(not self.is_training))
# reshape of activation
activation = fluid.layers.reshape(activation,
[-1, self.groups, self.cluster_size])
# softmax on per cluster
activation = fluid.layers.softmax(activation)
activation = fluid.layers.elementwise_mul(activation, attention, axis=0)
a_sum = fluid.layers.sequence_pool(activation, 'sum')
a_sum = fluid.layers.reduce_sum(a_sum, dim=1)
# create cluster_weights2
cluster_weights2 = fluid.layers.create_parameter(
shape=[self.cluster_size, feature_size],
dtype=input.dtype,
attr=fluid.ParamAttr(name=self.name + 'cluster_weights2'),
default_initializer=fluid.initializer.MSRA(uniform=False))
# expand a_sum dimension from [-1, self.cluster_size] to be [-1, self.cluster_size, feature_size]
a_sum = fluid.layers.reshape(a_sum, [-1, self.cluster_size, 1])
a_sum = fluid.layers.expand(a_sum, [1, 1, feature_size])
# element wise multiply a_sum and cluster_weights2
a = fluid.layers.elementwise_mul(
a_sum, cluster_weights2,
axis=1) # output shape [-1, self.cluster_size, feature_size]
# transpose activation from [-1, self.groups, self.cluster_size] to [-1, self.cluster_size, self.groups]
activation2 = fluid.layers.transpose(activation, perm=[0, 2, 1])
# transpose op will clear the lod infomation, so it should be reset
activation = fluid.layers.lod_reset(activation2, activation)
# reshape input from [-1, self.expansion * self.feature_size] to [-1, self.groups, feature_size]
reshaped_input = fluid.layers.reshape(input,
[-1, self.groups, feature_size])
# mat multiply activation and reshaped_input
vlad = fluid.layers.matmul(
activation,
reshaped_input) # output shape [-1, self.cluster_size, feature_size]
vlad = fluid.layers.sequence_pool(vlad, 'sum')
vlad = fluid.layers.elementwise_sub(vlad, a)
# l2_normalization
vlad = fluid.layers.transpose(vlad, [0, 2, 1])
vlad = fluid.layers.l2_normalize(vlad, axis=1)
# reshape and batch norm
vlad = fluid.layers.reshape(vlad,
[-1, self.cluster_size * feature_size])
vlad = fluid.layers.batch_norm(vlad, is_test=(not self.is_training))
return vlad
class NeXtVLADModel(object):
"""
Creates a NeXtVLAD based model.
Args:
model_input: A LoDTensor of [-1, N] for the input video frames.
vocab_size: The number of classes in the dataset.
"""
def __init__(self):
pass
def create_model(self,
video_input,
audio_input,
is_training=True,
class_dim=None,
cluster_size=None,
hidden_size=None,
groups=None,
expansion=None,
drop_rate=None,
gating_reduction=None,
l2_penalty=None,
**unused_params):
# calcluate vlad of video and audio
video_nextvlad = NeXtVLAD(
1024,
cluster_size,
is_training,
expansion=expansion,
groups=groups,
inputname='video')
audio_nextvlad = NeXtVLAD(
128,
cluster_size,
is_training,
expansion=expansion,
groups=groups,
inputname='audio')
vlad_video = video_nextvlad.forward(video_input)
vlad_audio = audio_nextvlad.forward(audio_input)
# concat video and audio
vlad = fluid.layers.concat([vlad_video, vlad_audio], axis=1)
# drop out
if drop_rate > 0.:
vlad = fluid.layers.dropout(
vlad, drop_rate, is_test=(not is_training))
# add fc
activation = fluid.layers.fc(
input=vlad,
size=hidden_size,
act=None,
name='hidden1_fc',
param_attr=fluid.ParamAttr(
name='hidden1_fc_weights',
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=False)
activation = fluid.layers.batch_norm(
activation, is_test=(not is_training))
# add fc, gate 1
gates = fluid.layers.fc(
input=activation,
size=hidden_size // gating_reduction,
act=None,
name='gating_fc1',
param_attr=fluid.ParamAttr(
name='gating_fc1_weights',
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=False)
gates = fluid.layers.batch_norm(
gates, is_test=(not is_training), act='relu')
# add fc, gate 2
gates = fluid.layers.fc(
input=gates,
size=hidden_size,
act='sigmoid',
name='gating_fc2',
param_attr=fluid.ParamAttr(
name='gating_fc2_weights',
initializer=fluid.initializer.MSRA(uniform=False)),
bias_attr=False)
activation = fluid.layers.elementwise_mul(activation, gates)
aggregate_model = clf_model.LogisticModel # set classification model
return aggregate_model().create_model(
model_input=activation,
vocab_size=class_dim,
l2_penalty=l2_penalty,
is_training=is_training,
**unused_params)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import numpy as np
import paddle.fluid as fluid
from ..model import ModelBase
from .stnet_res_model import StNet_ResNet
__all__ = ["STNET"]
class STNET(ModelBase):
def __init__(self, name, cfg, mode='train', args=None):
super(STNET, self).__init__(name, cfg, mode=mode, args=args)
self.get_config()
def get_config(self):
self.format = self.get_config_from_sec('model', 'format')
self.num_classes = self.get_config_from_sec('model', 'num_classes')
self.seg_num = self.get_config_from_sec('model', 'seg_num')
self.seglen = self.get_config_from_sec('model', 'seglen')
self.image_mean = self.get_config_from_sec('model', 'image_mean')
self.image_std = self.get_config_from_sec('model', 'image_std')
self.num_layers = self.get_config_from_sec('model', 'num_layers')
self.num_epochs = self.get_config_from_sec('train', 'epoch')
self.total_videos = self.get_config_from_sec('train', 'total_videos')
self.base_learning_rate = self.get_config_from_sec('train',
'learning_rate')
self.learning_rate_decay = self.get_config_from_sec(
'train', 'learning_rate_decay')
self.l2_weight_decay = self.get_config_from_sec('train',
'l2_weight_decay')
self.momentum = self.get_config_from_sec('train', 'momentum')
self.use_gpu = self.get_config_from_sec('train', 'use_gpu')
self.num_gpus = self.get_config_from_sec('train', 'num_gpus')
self.short_size = self.get_config_from_sec(self.mode, 'short_size')
self.target_size = self.get_config_from_sec(self.mode, 'target_size')
self.num_reader_threads = self.get_config_from_sec(self.mode,
'num_reader_threads')
self.buf_size = self.get_config_from_sec(self.mode, 'buf_size')
self.batch_size = self.get_config_from_sec(self.mode, 'batch_size')
self.filelist = self.get_config_from_sec(self.mode, 'filelist')
def build_input(self, use_pyreader=True):
image_shape = [3, self.target_size, self.target_size]
image_shape[0] = image_shape[0] * self.seglen
image_shape = [self.seg_num] + image_shape
self.use_pyreader = use_pyreader
if use_pyreader:
assert self.mode != 'infer', \
'pyreader is not recommendated when infer, please set use_pyreader to be false.'
py_reader = fluid.layers.py_reader(
capacity=100,
shapes=[[-1] + image_shape, [-1] + [1]],
dtypes=['float32', 'int64'],
name='train_py_reader'
if self.is_training else 'test_py_reader',
use_double_buffer=True)
image, label = fluid.layers.read_file(py_reader)
self.py_reader = py_reader
else:
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
if self.mode != 'infer':
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
else:
label = None
self.feature_input = [image]
self.label_input = label
def create_model_args(self):
cfg = {}
cfg['layers'] = self.num_layers
cfg['class_dim'] = self.num_classes
cfg['seg_num'] = self.seg_num
cfg['seglen'] = self.seglen
return cfg
def build_model(self):
cfg = self.create_model_args()
videomodel = StNet_ResNet(layers = cfg['layers'], seg_num = cfg['seg_num'], \
seglen = cfg['seglen'], is_training = (self.mode == 'train'))
out = videomodel.net(input=self.feature_input[0],
class_dim=cfg['class_dim'])
self.network_outputs = [out]
def optimizer(self):
epoch_points = [self.num_epochs / 3, self.num_epochs * 2 / 3]
total_videos = self.total_videos
step = int(total_videos / self.batch_size + 1)
bd = [e * step for e in epoch_points]
base_lr = self.base_learning_rate
lr_decay = self.learning_rate_decay
lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay]
l2_weight_decay = self.l2_weight_decay
momentum = self.momentum
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum,
regularization=fluid.regularizer.L2Decay(l2_weight_decay))
return optimizer
def loss(self):
cost = fluid.layers.cross_entropy(input=self.network_outputs[0], \
label=self.label_input, ignore_index=-1)
self.loss_ = fluid.layers.mean(x=cost)
return self.loss_
def outputs(self):
return self.network_outputs
def feeds(self):
return self.feature_input if self.mode == 'infer' else self.feature_input + [
self.label_input
]
def create_dataset_args(self):
cfg = {}
cfg['format'] = self.format
cfg['num_classes'] = self.num_classes
cfg['seg_num'] = self.seg_num
cfg['seglen'] = self.seglen
cfg['short_size'] = self.short_size
cfg['target_size'] = self.target_size
cfg['num_reader_threads'] = self.num_reader_threads
cfg['buf_size'] = self.buf_size
cfg['image_mean'] = self.image_mean
cfg['image_std'] = self.image_std
cfg['list'] = self.filelist
if (self.use_gpu) and (self.py_reader is not None):
cfg['batch_size'] = int(self.batch_size / self.num_gpus)
else:
cfg['batch_size'] = self.batch_size
return cfg
def create_metrics_args(self):
return {}
def load_pretrained_params(self, exe, pretrain_base, prog, place):
def is_parameter(var):
if isinstance(var, fluid.framework.Parameter):
return isinstance(var, fluid.framework.Parameter) and (not ("fc_0" in var.name)) \
and (not ("batch_norm" in var.name)) and (not ("xception" in var.name)) and (not ("conv3d" in var.name))
inference_program = prog.clone(for_test=True)
vars = filter(is_parameter, inference_program.list_vars())
fluid.io.load_vars(exe, pretrain_base, vars=vars)
param_tensor = fluid.global_scope().find_var(
"conv1_weights").get_tensor()
param_numpy = np.array(param_tensor)
param_numpy = np.mean(param_numpy, axis=1, keepdims=True) / self.seglen
param_numpy = np.repeat(param_numpy, 3 * self.seglen, axis=1)
param_tensor.set(param_numpy.astype(np.float32), place)
import os
import time
import sys
import paddle.fluid as fluid
import math
class StNet_ResNet():
def __init__(self, layers=50, seg_num=7, seglen=5, is_training=True):
self.layers = layers
self.seglen = seglen
self.seg_num = seg_num
self.is_training = is_training
def temporal_conv_bn(
self,
input, #(B*seg_num, c, h, w)
num_filters,
filter_size=(3, 1, 1),
padding=(1, 0, 0)):
#(B, seg_num, c, h, w)
in_reshape = fluid.layers.reshape(
x=input,
shape=[
-1, self.seg_num, input.shape[-3], input.shape[-2],
input.shape[-1]
])
in_transpose = fluid.layers.transpose(in_reshape, perm=[0, 2, 1, 3, 4])
conv = fluid.layers.conv3d(
input=in_transpose,
num_filters=num_filters,
filter_size=filter_size,
stride=1,
groups=1,
padding=padding,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.MSRAInitializer()),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0.0)))
out = fluid.layers.batch_norm(
input=conv,
act=None,
is_test=(not self.is_training),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=1.0)),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0.0)))
out = out + in_transpose
out = fluid.layers.transpose(out, perm=[0, 2, 1, 3, 4])
out = fluid.layers.reshape(x=out, shape=input.shape)
return out
def xception(self, input): #(B, C, seg_num,1)
bn = fluid.layers.batch_norm(
input=input,
act=None,
name="xception_bn",
is_test=(not self.is_training),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=1.0)),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0.0)))
att_conv = fluid.layers.conv2d(
input=bn,
num_filters=2048,
filter_size=[3, 1],
stride=[1, 1],
padding=[1, 0],
groups=2048,
name="xception_att_conv",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.MSRAInitializer()),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0)))
att_2 = fluid.layers.conv2d(
input=att_conv,
num_filters=1024,
filter_size=[1, 1],
stride=[1, 1],
name="xception_att_2",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.MSRAInitializer()),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0)))
bndw = fluid.layers.batch_norm(
input=att_2,
act="relu",
name="xception_bndw",
is_test=(not self.is_training),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=1.0)),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0.0)))
att1 = fluid.layers.conv2d(
input=bndw,
num_filters=1024,
filter_size=[3, 1],
stride=[1, 1],
padding=[1, 0],
groups=1024,
name="xception_att1",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.MSRAInitializer()),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0)))
att1_2 = fluid.layers.conv2d(
input=att1,
num_filters=1024,
filter_size=[1, 1],
stride=[1, 1],
name="xception_att1_2",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.MSRAInitializer()),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0)))
dw = fluid.layers.conv2d(
input=bn,
num_filters=1024,
filter_size=[1, 1],
stride=[1, 1],
name="xception_dw",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.MSRAInitializer()),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0)))
add_to = dw + att1_2
bn2 = fluid.layers.batch_norm(
input=add_to,
act=None,
name='xception_bn2',
is_test=(not self.is_training),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=1.0)),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0.0)))
return fluid.layers.relu(bn2)
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=fluid.param_attr.ParamAttr(name=name + "_weights"),
bias_attr=False,
#name = name+".conv2d.output.1"
)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
return fluid.layers.batch_norm(
input=conv,
act=act,
is_test=(not self.is_training),
#name=bn_name+'.output.1',
param_attr=fluid.param_attr.ParamAttr(name=bn_name + "_scale"),
bias_attr=fluid.param_attr.ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + '_variance')
def shortcut(self, input, ch_out, stride, name):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
else:
return input
def bottleneck_block(self, input, num_filters, stride, name):
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
conv2 = self.conv_bn_layer(
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
short = self.shortcut(
input, num_filters * 4, stride, name=name + "_branch1")
return fluid.layers.elementwise_add(
x=short,
y=conv2,
act='relu',
#name=".add.output.5"
)
def net(self, input, class_dim=101):
layers = self.layers
seg_num = self.seg_num
seglen = self.seglen
supported_layers = [50, 101, 152]
if layers not in supported_layers:
print("supported layers are", supported_layers, \
"but input layer is ", layers)
exit()
# reshape input
# [B, seg_num, seglen*c, H, W] --> [B*seg_num, seglen*c, H, W]
channels = input.shape[2]
short_size = input.shape[3]
input = fluid.layers.reshape(
x=input, shape=[-1, channels, short_size, short_size])
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input,
num_filters=64,
filter_size=7,
stride=2,
act='relu',
name='conv1')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
name=conv_name)
if block == 1:
#insert the first temporal modeling block
conv = self.temporal_conv_bn(input=conv, num_filters=512)
if block == 2:
#insert the second temporal modeling block
conv = self.temporal_conv_bn(input=conv, num_filters=1024)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
feature = fluid.layers.reshape(
x=pool, shape=[-1, seg_num, pool.shape[1], 1])
feature = fluid.layers.transpose(feature, perm=[0, 2, 1, 3])
#append the temporal Xception block
xfeat = self.xception(feature) #(B, 1024, seg_num, 1)
out = fluid.layers.pool2d(
input=xfeat,
pool_size=(seg_num, 1),
pool_type='max',
global_pooling=True)
out = fluid.layers.reshape(x=out, shape=[-1, 1024])
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(input=out,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
return out
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
from ..model import ModelBase
from .tsn_res_model import TSN_ResNet
__all__ = ["TSN"]
class TSN(ModelBase):
def __init__(self, name, cfg, mode='train', args=None):
super(TSN, self).__init__(name, cfg, mode=mode, args=args)
self.get_config()
def get_config(self):
self.format = self.get_config_from_sec('model', 'format')
self.num_classes = self.get_config_from_sec('model', 'num_classes')
self.seg_num = self.get_config_from_sec('model', 'seg_num')
self.seglen = self.get_config_from_sec('model', 'seglen')
self.image_mean = self.get_config_from_sec('model', 'image_mean')
self.image_std = self.get_config_from_sec('model', 'image_std')
self.num_layers = self.get_config_from_sec('model', 'num_layers')
self.num_epochs = self.get_config_from_sec('train', 'epoch')
self.total_videos = self.get_config_from_sec('train', 'total_videos')
self.base_learning_rate = self.get_config_from_sec('train',
'learning_rate')
self.learning_rate_decay = self.get_config_from_sec(
'train', 'learning_rate_decay')
self.l2_weight_decay = self.get_config_from_sec('train',
'l2_weight_decay')
self.momentum = self.get_config_from_sec('train', 'momentum')
self.use_gpu = self.get_config_from_sec('train', 'use_gpu')
self.num_gpus = self.get_config_from_sec('train', 'num_gpus')
self.short_size = self.get_config_from_sec(self.mode, 'short_size')
self.target_size = self.get_config_from_sec(self.mode, 'target_size')
self.num_reader_threads = self.get_config_from_sec(self.mode,
'num_reader_threads')
self.buf_size = self.get_config_from_sec(self.mode, 'buf_size')
self.batch_size = self.get_config_from_sec(self.mode, 'batch_size')
self.filelist = self.get_config_from_sec(self.mode, 'filelist')
def build_input(self, use_pyreader=True):
image_shape = [3, self.target_size, self.target_size]
image_shape[0] = image_shape[0] * self.seglen
image_shape = [self.seg_num] + image_shape
self.use_pyreader = use_pyreader
if use_pyreader:
assert self.mode != 'infer', \
'pyreader is not recommendated when infer, please set use_pyreader to be false.'
py_reader = fluid.layers.py_reader(
capacity=100,
shapes=[[-1] + image_shape, [-1] + [1]],
dtypes=['float32', 'int64'],
name='train_py_reader'
if self.is_training else 'test_py_reader',
use_double_buffer=True)
image, label = fluid.layers.read_file(py_reader)
self.py_reader = py_reader
else:
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
if self.mode != 'infer':
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
else:
label = None
self.feature_input = [image]
self.label_input = label
def create_model_args(self):
cfg = {}
cfg['layers'] = self.num_layers
cfg['class_dim'] = self.num_classes
cfg['seg_num'] = self.seg_num
return cfg
def build_model(self):
cfg = self.create_model_args()
videomodel = TSN_ResNet(
layers=cfg['layers'],
seg_num=cfg['seg_num'],
is_training=(self.mode == 'train'))
out = videomodel.net(input=self.feature_input[0],
class_dim=cfg['class_dim'])
self.network_outputs = [out]
def optimizer(self):
assert self.mode == 'train', "optimizer only can be get in train mode"
epoch_points = [self.num_epochs / 3, self.num_epochs * 2 / 3]
total_videos = self.total_videos
step = int(total_videos / self.batch_size + 1)
bd = [e * step for e in epoch_points]
base_lr = self.base_learning_rate
lr_decay = self.learning_rate_decay
lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay]
l2_weight_decay = self.l2_weight_decay
momentum = self.momentum
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum,
regularization=fluid.regularizer.L2Decay(l2_weight_decay))
return optimizer
def loss(self):
assert self.mode != 'infer', "invalid loss calculationg in infer mode"
cost = fluid.layers.cross_entropy(input=self.network_outputs[0], \
label=self.label_input, ignore_index=-1)
self.loss_ = fluid.layers.mean(x=cost)
return self.loss_
def outputs(self):
return self.network_outputs
def feeds(self):
return self.feature_input if self.mode == 'infer' else self.feature_input + [
self.label_input
]
def create_dataset_args(self):
cfg = {}
cfg['format'] = self.format
cfg['num_classes'] = self.num_classes
cfg['seg_num'] = self.seg_num
cfg['seglen'] = self.seglen
cfg['short_size'] = self.short_size
cfg['target_size'] = self.target_size
cfg['num_reader_threads'] = self.num_reader_threads
cfg['buf_size'] = self.buf_size
cfg['image_mean'] = self.image_mean
cfg['image_std'] = self.image_std
cfg['list'] = self.filelist
if self.use_gpu and (self.py_reader is not None):
cfg['batch_size'] = int(self.batch_size / self.num_gpus)
else:
cfg['batch_size'] = self.batch_size
return cfg
def create_metrics_args(self):
return {}
import os
import time
import sys
import paddle.fluid as fluid
import math
class TSN_ResNet():
def __init__(self, layers=50, seg_num=7, is_training=True):
self.layers = layers
self.seg_num = seg_num
self.is_training = is_training
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=fluid.param_attr.ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
return fluid.layers.batch_norm(
input=conv,
act=act,
is_test=(not self.is_training),
param_attr=fluid.param_attr.ParamAttr(name=bn_name + "_scale"),
bias_attr=fluid.param_attr.ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + '_variance')
def shortcut(self, input, ch_out, stride, name):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
else:
return input
def bottleneck_block(self, input, num_filters, stride, name):
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
conv2 = self.conv_bn_layer(
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
short = self.shortcut(
input, num_filters * 4, stride, name=name + "_branch1")
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
def net(self, input, class_dim=101):
layers = self.layers
seg_num = self.seg_num
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
# reshape input
channels = input.shape[2]
short_size = input.shape[3]
input = fluid.layers.reshape(
x=input, shape=[-1, channels, short_size, short_size])
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input,
num_filters=64,
filter_size=7,
stride=2,
act='relu',
name='conv1')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
feature = fluid.layers.reshape(
x=pool, shape=[-1, seg_num, pool.shape[1]])
out = fluid.layers.reduce_mean(feature, dim=1)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(input=out,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
return out
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import wget
import tarfile
__all__ = ['decompress', 'download', 'AttrDict']
def decompress(path):
t = tarfile.open(path)
t.extractall(path='/'.join(path.split('/')[:-1]))
t.close()
os.remove(path)
def download(url, path):
weight_dir = '/'.join(path.split('/')[:-1])
if not os.path.exists(weight_dir):
os.makedirs(weight_dir)
path = path + ".tar.gz"
wget.download(url, path)
decompress(path)
class AttrDict(dict):
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
if key in self.__dict__:
self.__dict__[key] = value
else:
self[key] = value
python infer.py --model-name="AttentionCluster" --config=./configs/attention_cluster.txt \
--filelist=./data/youtube8m/infer.list \
--weights=./checkpoints/AttentionCluster_epoch0 \
--save-dir="./save"
python infer.py --model-name="AttentionLSTM" --config=./configs/attention_lstm.txt \
--filelist=./data/youtube8m/infer.list \
--weights=./checkpoints/AttentionLSTM_epoch0 \
--save-dir="./save"
python infer.py --model-name="NEXTVLAD" --config=./configs/nextvlad.txt --filelist=./data/youtube8m/infer.list \
--weights=./checkpoints/NEXTVLAD_epoch0 \
--save-dir="./save"
python infer.py --model-name="STNET" --config=./configs/stnet.txt --filelist=./data/kinetics/infer.list \
--log-interval=10 --weights=./checkpoints/STNET_epoch0 --save-dir=./save
python infer.py --model-name="TSN" --config=./configs/tsn.txt --filelist=./data/kinetics/infer.list \
--log-interval=10 --weights=./checkpoints/TSN_epoch0 --save-dir=./save
python test.py --model-name="AttentionCluster" --config=./configs/attention_cluster.txt \
--log-interval=5 --weights=./checkpoints/AttentionCluster_epoch0
python test.py --model-name="AttentionLSTM" --config=./configs/attention_lstm.txt \
--log-interval=5 --weights=./checkpoints/AttentionLSTM_epoch0
python test.py --model-name="NEXTVLAD" --config=./configs/nextvlad.txt \
--log-interval=10 --weights=./checkpoints/NEXTVLAD_epoch0
python test.py --model-name="STNET" --config=./configs/stnet.txt \
--log-interval=10 --weights=./checkpoints/STNET_epoch0
python test.py --model-name="TSN" --config=./configs/tsn.txt \
--log-interval=10 --weights=./checkpoints/TSN_epoch0
python train.py --model-name="AttentionCluster" --config=./configs/attention_cluster.txt --epoch-num=5 \
--valid-interval=1 --save-interval=1 --log-interval=10
python train.py --model-name="AttentionLSTM" --config=./configs/attention_lstm.txt --epoch-num=10 \
--valid-interval=1 --save-interval=1 --log-interval=10
export CUDA_VISIBLE_DEVICES=0,1,2,3
python train.py --model-name="NEXTVLAD" --config=./configs/nextvlad.txt --epoch-num=6 \
--valid-interval=1 --save-interval=1 --log-interval=10
python train.py --model-name="STNET" --config=./configs/stnet.txt --epoch-num=6 \
--valid-interval=1 --save-interval=1 --log-interval=10
python train.py --model-name="TSN" --config=./configs/tsn.txt --epoch-num=6 \
--valid-interval=1 --save-interval=1 --log-interval=10
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import time
import logging
import argparse
import numpy as np
import paddle.fluid as fluid
import models
FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--model-name',
type=str,
default='AttentionCluster',
help='name of model to train.')
parser.add_argument(
'--config',
type=str,
default='configs/attention_cluster.txt',
help='path to config file of model')
parser.add_argument(
'--batch-size',
type=int,
default=None,
help='traing batch size per GPU. None to use config file setting.')
parser.add_argument(
'--use-gpu', type=bool, default=True, help='default use gpu.')
parser.add_argument(
'--weights',
type=str,
default=None,
help='weight path, None to use weights from Paddle.')
parser.add_argument(
'--log-interval',
type=int,
default=1,
help='mini-batch interval to log.')
args = parser.parse_args()
return args
def test(test_model, args):
test_model.build_input(use_pyreader=False)
test_model.build_model()
test_feeds = test_model.feeds()
test_outputs = test_model.outputs()
test_reader = test_model.reader()
test_metrics = test_model.metrics()
loss = test_model.loss()
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
if args.weights:
assert os.path.exists(
args.weights), "Given weight dir {} not exist.".format(args.weights)
weights = args.weights or test_model.get_weights()
def if_exist(var):
return os.path.exists(os.path.join(weights, var.name))
fluid.io.load_vars(exe, weights, predicate=if_exist)
test_feeder = fluid.DataFeeder(place=place, feed_list=test_feeds)
fetch_list = [loss.name] + [x.name
for x in test_outputs] + [test_feeds[-1].name]
def _test_loop():
epoch_period = []
for test_iter, data in enumerate(test_reader()):
cur_time = time.time()
test_outs = exe.run(fetch_list=fetch_list,
feed=test_feeder.feed(data))
period = time.time() - cur_time
epoch_period.append(period)
loss = np.array(test_outs[0])
pred = np.array(test_outs[1])
label = np.array(test_outs[-1])
test_metrics.accumulate(loss, pred, label)
# metric here
if args.log_interval > 0 and test_iter % args.log_interval == 0:
info_str = '[EVAL] Batch {}'.format(test_iter)
test_metrics.calculate_and_log_out(loss, pred, label, info_str)
test_metrics.finalize_and_log_out("[EVAL] eval finished. ")
# start eval loop
_test_loop()
if __name__ == "__main__":
args = parse_args()
test_model = models.get_model(
args.model_name, args.config, mode='test', args=vars(args))
test(test_model, args)
import os
import time
import numpy as np
import paddle
import paddle.fluid as fluid
import logging
import shutil
logger = logging.getLogger(__name__)
def test_without_pyreader(test_exe,
test_reader,
test_feeder,
test_fetch_list,
test_metrics,
log_interval=0):
test_metrics.reset()
for test_iter, data in enumerate(test_reader()):
test_outs = test_exe.run(test_fetch_list, feed=test_feeder.feed(data))
loss = np.array(test_outs[0])
pred = np.array(test_outs[1])
label = np.array(test_outs[-1])
test_metrics.accumulate(loss, pred, label)
if log_interval > 0 and test_iter % log_interval == 0:
test_metrics.calculate_and_log_out(loss, pred, label, \
info = '[Test] test_iter {} '.format(test_iter))
test_metrics.finalize_and_log_out("[TEST] Finish")
def test_with_pyreader(test_exe,
test_pyreader,
test_fetch_list,
test_metrics,
log_interval=0):
if not test_pyreader:
logger.error("[TEST] get pyreader failed.")
test_pyreader.start()
test_metrics.reset()
test_iter = 0
try:
while True:
test_outs = test_exe.run(fetch_list=test_fetch_list)
loss = np.array(test_outs[0])
pred = np.array(test_outs[1])
label = np.array(test_outs[-1])
test_metrics.accumulate(loss, pred, label)
if log_interval > 0 and test_iter % log_interval == 0:
test_metrics.calculate_and_log_out(loss, pred, label, \
info = '[Test] test_iter {} '.format(test_iter))
test_iter += 1
except fluid.core.EOFException:
test_metrics.finalize_and_log_out("[TEST] Finish")
finally:
test_pyreader.reset()
def train_without_pyreader(exe, train_prog, train_exe, train_reader, train_feeder, \
train_fetch_list, train_metrics, epochs = 10, \
log_interval = 0, valid_interval = 0, save_dir = './', \
save_model_name = 'model', test_exe = None, test_reader = None, \
test_feeder = None, test_fetch_list = None, test_metrics = None):
for epoch in range(epochs):
epoch_periods = []
for train_iter, data in enumerate(train_reader()):
cur_time = time.time()
train_outs = train_exe.run(train_fetch_list,
feed=train_feeder.feed(data))
period = time.time() - cur_time
epoch_periods.append(period)
loss = np.array(train_outs[0])
pred = np.array(train_outs[1])
label = np.array(train_outs[-1])
if log_interval > 0 and (train_iter % log_interval == 0):
# eval here
train_metrics.calculate_and_log_out(loss, pred, label, \
info = '[TRAIN] Epoch {}, iter {} '.format(epoch, train_iter))
train_iter += 1
logger.info('[TRAIN] Epoch {} training finished, average time: {}'.
format(epoch, np.mean(epoch_periods)))
save_model(exe, train_prog, save_dir, save_model_name,
"_epoch{}".format(epoch))
if test_exe and (epoch + 1) % valid_interval == 0:
test_without_pyreader(test_exe, test_reader, test_feeder,
test_fetch_list, test_metrics, log_interval)
def train_with_pyreader(exe, train_prog, train_exe, train_pyreader, \
train_fetch_list, train_metrics, epochs = 10, \
log_interval = 0, valid_interval = 0, \
save_dir = './', save_model_name = 'model', \
test_exe = None, test_pyreader = None, \
test_fetch_list = None, test_metrics = None):
if not train_pyreader:
logger.error("[TRAIN] get pyreader failed.")
for epoch in range(epochs):
train_pyreader.start()
train_metrics.reset()
try:
train_iter = 0
epoch_periods = []
while True:
cur_time = time.time()
train_outs = train_exe.run(fetch_list=train_fetch_list)
period = time.time() - cur_time
epoch_periods.append(period)
loss = np.array(train_outs[0])
pred = np.array(train_outs[1])
label = np.array(train_outs[-1])
if log_interval > 0 and (train_iter % log_interval == 0):
# eval here
train_metrics.calculate_and_log_out(loss, pred, label, \
info = '[TRAIN] Epoch {}, iter {} '.format(epoch, train_iter))
train_iter += 1
except fluid.core.EOFException:
# eval here
logger.info('[TRAIN] Epoch {} training finished, average time: {}'.
format(epoch, np.mean(epoch_periods)))
save_model(exe, train_prog, save_dir, save_model_name,
"_epoch{}".format(epoch))
if test_exe and valid_interval > 0:
test_with_pyreader(test_exe, test_pyreader, test_fetch_list,
test_metrics, log_interval)
finally:
epoch_period = []
train_pyreader.reset()
def save_model(exe, program, save_dir, model_name, postfix=None):
model_path = os.path.join(save_dir, model_name + postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
fluid.io.save_persistables(exe, model_path, main_program=program)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import time
import shutil
import argparse
import logging
import numpy as np
import paddle.fluid as fluid
from tools.train_utils import train_with_pyreader, train_without_pyreader
import models
FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser("Paddle Video train script")
parser.add_argument(
'--model-name',
type=str,
default='AttentionCluster',
help='name of model to train.')
parser.add_argument(
'--config',
type=str,
default='configs/attention_cluster.txt',
help='path to config file of model')
parser.add_argument(
'--batch-size',
type=int,
default=None,
help='training batch size. None to use config file setting.')
parser.add_argument(
'--learning-rate',
type=float,
default=None,
help='learning rate use for training. None to use config file setting.')
parser.add_argument(
'--pretrain',
type=str,
default=None,
help='path to pretrain weights. None to use default weights path in ~/.paddle/weights.'
)
parser.add_argument(
'--use-gpu', type=bool, default=True, help='default use gpu.')
parser.add_argument(
'--no-parallel',
action='store_true',
default=False,
help='whether to use parallel executor')
parser.add_argument(
'--no-use-pyreader',
action='store_true',
default=False,
help='whether to use pyreader')
parser.add_argument(
'--no-memory-optimize',
action='store_true',
default=False,
help='whether to use memory optimize in train')
parser.add_argument(
'--epoch-num',
type=int,
default=0,
help='epoch number, 0 for read from config file')
parser.add_argument(
'--valid-interval',
type=int,
default=1,
help='validation epoch interval, 0 for no validation.')
parser.add_argument(
'--save-interval',
type=int,
default=1,
help='save checkpoints epoch interval.')
parser.add_argument(
'--save-dir',
type=str,
default='checkpoints',
help='directory name to save train snapshoot')
parser.add_argument(
'--log-interval',
type=int,
default=10,
help='mini-batch interval to log.')
args = parser.parse_args()
return args
def train(train_model, valid_model, args):
train_prog = fluid.Program()
train_startup = fluid.Program()
with fluid.program_guard(train_prog, train_startup):
with fluid.unique_name.guard():
train_model.build_input(not args.no_use_pyreader)
train_model.build_model()
# for the input, has the form [data1, data2,..., label], so train_feeds[-1] is label
train_feeds = train_model.feeds()
train_feeds[-1].persistable = True
# for the output of classification model, has the form [pred]
train_outputs = train_model.outputs()
for output in train_outputs:
output.persistable = True
train_loss = train_model.loss()
train_loss.persistable = True
# outputs, loss, label should be fetched, so set persistable to be true
optimizer = train_model.optimizer()
optimizer.minimize(train_loss)
train_reader = train_model.reader()
train_metrics = train_model.metrics()
train_pyreader = train_model.pyreader()
if not args.no_memory_optimize:
fluid.memory_optimize(train_prog)
valid_prog = fluid.Program()
valid_startup = fluid.Program()
with fluid.program_guard(valid_prog, valid_startup):
with fluid.unique_name.guard():
valid_model.build_input(not args.no_use_pyreader)
valid_model.build_model()
valid_feeds = valid_model.feeds()
valid_outputs = valid_model.outputs()
valid_loss = valid_model.loss()
valid_reader = valid_model.reader()
valid_metrics = valid_model.metrics()
valid_pyreader = valid_model.pyreader()
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(train_startup)
exe.run(valid_startup)
if args.pretrain:
assert os.path.exists(args.pretrain), \
"Given pretrain weight dir {} not exist.".format(args.pretrain)
pretrain = args.pretrain or train_model.get_pretrain_weights()
if pretrain:
train_model.load_pretrained_params(exe, pretrain, train_prog, place)
if args.no_parallel:
train_exe = exe
valid_exe = exe
else:
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu,
loss_name=train_loss.name,
main_program=train_prog)
valid_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu,
share_vars_from=train_exe,
main_program=valid_prog)
train_fetch_list = [train_loss.name] + [x.name for x in train_outputs
] + [train_feeds[-1].name]
valid_fetch_list = [valid_loss.name] + [x.name for x in valid_outputs
] + [valid_feeds[-1].name]
epochs = args.epoch_num or train_model.epoch_num()
if args.no_use_pyreader:
train_feeder = fluid.DataFeeder(place=place, feed_list=train_feeds)
valid_feeder = fluid.DataFeeder(place=place, feed_list=valid_feeds)
train_without_pyreader(exe, train_prog, train_exe, train_reader, train_feeder, \
train_fetch_list, train_metrics, epochs = epochs, \
log_interval = args.log_interval, valid_interval = args.valid_interval, \
save_dir = args.save_dir, save_model_name = args.model_name, \
test_exe = valid_exe, test_reader = valid_reader, test_feeder = valid_feeder, \
test_fetch_list = valid_fetch_list, test_metrics = valid_metrics)
else:
train_pyreader.decorate_paddle_reader(train_reader)
valid_pyreader.decorate_paddle_reader(valid_reader)
train_with_pyreader(exe, train_prog, train_exe, train_pyreader, train_fetch_list, train_metrics, \
epochs = epochs, log_interval = args.log_interval, \
valid_interval = args.valid_interval, \
save_dir = args.save_dir, save_model_name = args.model_name, \
test_exe = valid_exe, test_pyreader = valid_pyreader, \
test_fetch_list = valid_fetch_list, test_metrics = valid_metrics)
if __name__ == "__main__":
args = parse_args()
logger.info(args)
if not os.path.exists(args.save_dir):
os.makedirs(args.save_dir)
train_model = models.get_model(
args.model_name, args.config, mode='train', args=vars(args))
valid_model = models.get_model(
args.model_name, args.config, mode='valid', args=vars(args))
train(train_model, valid_model, args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册