提交 527c90ba 编写于 作者: D dengkaipeng

refine comment

上级 3de4bd26
......@@ -20,18 +20,15 @@ import argparse
import numpy as np
from paddle import fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.parallel import ParallelEnv
from model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy
from tsm import *
NUM_CLASSES = 10
def make_optimizer(num_samples, parameter_list=None):
step = int(num_samples / FLAGS.batch_size / FLAGS.num_devices)
boundaries = [e * step for e in [40, 60]]
def make_optimizer(step_per_epoch, parameter_list=None):
boundaries = [e * step_per_epoch for e in [40, 60]]
values = [FLAGS.lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
learning_rate = fluid.layers.piecewise_decay(
......@@ -56,20 +53,26 @@ def main():
GroupRandomFlip(),
NormalizeImage()])
train_dataset = KineticsDataset(
filelist=os.path.join(FLAGS.data, 'train_10.list'),
file_list=os.path.join(FLAGS.data, 'train_10.list'),
pickle_dir=os.path.join(FLAGS.data, 'train_10'),
label_list=os.path.join(FLAGS.data, 'label_list'),
transform=train_transform)
val_transform = Compose([GroupScale(),
GroupCenterCrop(),
NormalizeImage()])
val_dataset = KineticsDataset(
filelist=os.path.join(FLAGS.data, 'val_10.list'),
file_list=os.path.join(FLAGS.data, 'val_10.list'),
pickle_dir=os.path.join(FLAGS.data, 'val_10'),
label_list=os.path.join(FLAGS.data, 'label_list'),
mode='val',
transform=val_transform)
pretrained = FLAGS.eval_only and FLAGS.weights is None
model = tsm_resnet50(num_classes=NUM_CLASSES, pretrained=pretrained)
model = tsm_resnet50(num_classes=train_dataset.num_classes,
pretrained=pretrained)
step_per_epoch = int(len(train_dataset) / FLAGS.batch_size \
/ ParallelEnv().nranks)
optim = make_optimizer(len(train_dataset), model.parameters())
inputs = [Input([None, 8, 3, 224, 224], 'float32', name='image')]
......@@ -101,7 +104,7 @@ def main():
epochs=FLAGS.epoch,
batch_size=FLAGS.batch_size,
save_dir='tsm_checkpoint',
num_workers=4,
num_workers=FLAGS.num_workers,
drop_last=True,
shuffle=True)
......@@ -128,8 +131,6 @@ if __name__ == '__main__':
help='initial learning rate')
parser.add_argument(
"-b", "--batch_size", default=16, type=int, help="batch size")
parser.add_argument(
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument(
"-r",
"--resume",
......
......@@ -33,32 +33,52 @@ logger = logging.getLogger(__name__)
__all__ = ['KineticsDataset']
KINETICS_CLASS_NUM = 400
class KineticsDataset(Dataset):
"""
Kinetics dataset
Args:
filelist (str): path to file list, default None.
num_classes (int): class number
file_list (str): path to file list
pickle_dir (str): path to pickle file directory
label_list (str): path to label_list file, if set None, the
default class number 400 of kinetics dataset will be
used. Default None
mode (str): 'train' or 'val' mode, segmentation methods will
be different in these 2 modes. Default 'train'
seg_num (int): segment number to sample from each video.
Default 8
seg_len (int): frame number of each segment. Default 1
transform (callable): transforms to perform on video samples,
None for no transforms. Default None.
"""
def __init__(self,
filelist,
file_list,
pickle_dir,
label_list=None,
mode='train',
seg_num=8,
seg_len=1,
transform=None):
assert os.path.isfile(filelist), \
"filelist {} not a file".format(filelist)
with open(filelist) as f:
assert os.path.isfile(file_list), \
"file_list {} not a file".format(file_list)
with open(file_list) as f:
self.pickle_paths = [l.strip() for l in f]
assert os.path.isdir(pickle_dir), \
"pickle_dir {} not a directory".format(pickle_dir)
self.pickle_dir = pickle_dir
self.label_list = label_list
if self.label_list is not None:
assert os.path.isfile(self.label_list), \
"label_list {} not a file".format(self.label_list)
with open(self.label_list) as f:
self.label_list = [int(l.strip()) for l in f]
assert mode in ['train', 'val'], \
"mode can only be 'train' or 'val'"
self.mode = mode
......@@ -87,14 +107,19 @@ class KineticsDataset(Dataset):
logger.error("Load {} failed: {}".format(pickle_path, e))
sys.exit(-1)
label_list = [0, 2, 3, 4, 6, 7, 9, 12, 14, 15]
label = label_list.index(label)
if self.label_list is not None:
label = self.label_list.index(label)
imgs = self._video_loader(frames)
if self.transform:
imgs, label = self.transform(imgs, label)
return imgs, np.array([label])
@property
def num_classes(self):
return KINETICS_CLASS_NUM if self.label_list is None \
else len(self.label_list)
def _video_loader(self, frames):
videolen = len(frames)
average_dur = int(videolen / self.seg_num)
......@@ -134,9 +159,3 @@ class KineticsDataset(Dataset):
return img.convert('RGB')
if __name__ == "__main__":
kd = KineticsDataset('/paddle/ssd3/kineteics_mini/val_10.list', '/paddle/ssd3/kineteics_mini/val_10')
print("KineticsDataset length", len(kd))
for d in kd:
print(len(d[0]), d[0][0].size, d[1])
......@@ -113,6 +113,15 @@ class BottleneckBlock(fluid.dygraph.Layer):
class TSM_ResNet(Model):
"""
TSM network with ResNet as backbone
Args:
num_layers (int): ResNet layer number, only support 50 currently.
Default 50.
seg_num (int): segment number of each video sample. Default 8.
num_classes (int): video class number. Default 400.
"""
def __init__(self, num_layers=50, seg_num=8, num_classes=400):
super(TSM_ResNet, self).__init__()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册