提交 527c90ba 编写于 作者: D dengkaipeng

refine comment

上级 3de4bd26
...@@ -20,18 +20,15 @@ import argparse ...@@ -20,18 +20,15 @@ import argparse
import numpy as np import numpy as np
from paddle import fluid from paddle import fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from paddle.fluid.dygraph.parallel import ParallelEnv
from model import Model, CrossEntropy, Input, set_device from model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy from metrics import Accuracy
from tsm import * from tsm import *
NUM_CLASSES = 10
def make_optimizer(step_per_epoch, parameter_list=None):
def make_optimizer(num_samples, parameter_list=None): boundaries = [e * step_per_epoch for e in [40, 60]]
step = int(num_samples / FLAGS.batch_size / FLAGS.num_devices)
boundaries = [e * step for e in [40, 60]]
values = [FLAGS.lr * (0.1 ** i) for i in range(len(boundaries) + 1)] values = [FLAGS.lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
learning_rate = fluid.layers.piecewise_decay( learning_rate = fluid.layers.piecewise_decay(
...@@ -56,20 +53,26 @@ def main(): ...@@ -56,20 +53,26 @@ def main():
GroupRandomFlip(), GroupRandomFlip(),
NormalizeImage()]) NormalizeImage()])
train_dataset = KineticsDataset( train_dataset = KineticsDataset(
filelist=os.path.join(FLAGS.data, 'train_10.list'), file_list=os.path.join(FLAGS.data, 'train_10.list'),
pickle_dir=os.path.join(FLAGS.data, 'train_10'), pickle_dir=os.path.join(FLAGS.data, 'train_10'),
label_list=os.path.join(FLAGS.data, 'label_list'),
transform=train_transform) transform=train_transform)
val_transform = Compose([GroupScale(), val_transform = Compose([GroupScale(),
GroupCenterCrop(), GroupCenterCrop(),
NormalizeImage()]) NormalizeImage()])
val_dataset = KineticsDataset( val_dataset = KineticsDataset(
filelist=os.path.join(FLAGS.data, 'val_10.list'), file_list=os.path.join(FLAGS.data, 'val_10.list'),
pickle_dir=os.path.join(FLAGS.data, 'val_10'), pickle_dir=os.path.join(FLAGS.data, 'val_10'),
label_list=os.path.join(FLAGS.data, 'label_list'),
mode='val', mode='val',
transform=val_transform) transform=val_transform)
pretrained = FLAGS.eval_only and FLAGS.weights is None pretrained = FLAGS.eval_only and FLAGS.weights is None
model = tsm_resnet50(num_classes=NUM_CLASSES, pretrained=pretrained) model = tsm_resnet50(num_classes=train_dataset.num_classes,
pretrained=pretrained)
step_per_epoch = int(len(train_dataset) / FLAGS.batch_size \
/ ParallelEnv().nranks)
optim = make_optimizer(len(train_dataset), model.parameters()) optim = make_optimizer(len(train_dataset), model.parameters())
inputs = [Input([None, 8, 3, 224, 224], 'float32', name='image')] inputs = [Input([None, 8, 3, 224, 224], 'float32', name='image')]
...@@ -101,7 +104,7 @@ def main(): ...@@ -101,7 +104,7 @@ def main():
epochs=FLAGS.epoch, epochs=FLAGS.epoch,
batch_size=FLAGS.batch_size, batch_size=FLAGS.batch_size,
save_dir='tsm_checkpoint', save_dir='tsm_checkpoint',
num_workers=4, num_workers=FLAGS.num_workers,
drop_last=True, drop_last=True,
shuffle=True) shuffle=True)
...@@ -128,8 +131,6 @@ if __name__ == '__main__': ...@@ -128,8 +131,6 @@ if __name__ == '__main__':
help='initial learning rate') help='initial learning rate')
parser.add_argument( parser.add_argument(
"-b", "--batch_size", default=16, type=int, help="batch size") "-b", "--batch_size", default=16, type=int, help="batch size")
parser.add_argument(
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument( parser.add_argument(
"-r", "-r",
"--resume", "--resume",
......
...@@ -33,32 +33,52 @@ logger = logging.getLogger(__name__) ...@@ -33,32 +33,52 @@ logger = logging.getLogger(__name__)
__all__ = ['KineticsDataset'] __all__ = ['KineticsDataset']
KINETICS_CLASS_NUM = 400
class KineticsDataset(Dataset): class KineticsDataset(Dataset):
""" """
Kinetics dataset Kinetics dataset
Args: Args:
filelist (str): path to file list, default None. file_list (str): path to file list
num_classes (int): class number pickle_dir (str): path to pickle file directory
label_list (str): path to label_list file, if set None, the
default class number 400 of kinetics dataset will be
used. Default None
mode (str): 'train' or 'val' mode, segmentation methods will
be different in these 2 modes. Default 'train'
seg_num (int): segment number to sample from each video.
Default 8
seg_len (int): frame number of each segment. Default 1
transform (callable): transforms to perform on video samples,
None for no transforms. Default None.
""" """
def __init__(self, def __init__(self,
filelist, file_list,
pickle_dir, pickle_dir,
label_list=None,
mode='train', mode='train',
seg_num=8, seg_num=8,
seg_len=1, seg_len=1,
transform=None): transform=None):
assert os.path.isfile(filelist), \ assert os.path.isfile(file_list), \
"filelist {} not a file".format(filelist) "file_list {} not a file".format(file_list)
with open(filelist) as f: with open(file_list) as f:
self.pickle_paths = [l.strip() for l in f] self.pickle_paths = [l.strip() for l in f]
assert os.path.isdir(pickle_dir), \ assert os.path.isdir(pickle_dir), \
"pickle_dir {} not a directory".format(pickle_dir) "pickle_dir {} not a directory".format(pickle_dir)
self.pickle_dir = pickle_dir self.pickle_dir = pickle_dir
self.label_list = label_list
if self.label_list is not None:
assert os.path.isfile(self.label_list), \
"label_list {} not a file".format(self.label_list)
with open(self.label_list) as f:
self.label_list = [int(l.strip()) for l in f]
assert mode in ['train', 'val'], \ assert mode in ['train', 'val'], \
"mode can only be 'train' or 'val'" "mode can only be 'train' or 'val'"
self.mode = mode self.mode = mode
...@@ -87,14 +107,19 @@ class KineticsDataset(Dataset): ...@@ -87,14 +107,19 @@ class KineticsDataset(Dataset):
logger.error("Load {} failed: {}".format(pickle_path, e)) logger.error("Load {} failed: {}".format(pickle_path, e))
sys.exit(-1) sys.exit(-1)
label_list = [0, 2, 3, 4, 6, 7, 9, 12, 14, 15] if self.label_list is not None:
label = label_list.index(label) label = self.label_list.index(label)
imgs = self._video_loader(frames) imgs = self._video_loader(frames)
if self.transform: if self.transform:
imgs, label = self.transform(imgs, label) imgs, label = self.transform(imgs, label)
return imgs, np.array([label]) return imgs, np.array([label])
@property
def num_classes(self):
return KINETICS_CLASS_NUM if self.label_list is None \
else len(self.label_list)
def _video_loader(self, frames): def _video_loader(self, frames):
videolen = len(frames) videolen = len(frames)
average_dur = int(videolen / self.seg_num) average_dur = int(videolen / self.seg_num)
...@@ -134,9 +159,3 @@ class KineticsDataset(Dataset): ...@@ -134,9 +159,3 @@ class KineticsDataset(Dataset):
return img.convert('RGB') return img.convert('RGB')
if __name__ == "__main__":
kd = KineticsDataset('/paddle/ssd3/kineteics_mini/val_10.list', '/paddle/ssd3/kineteics_mini/val_10')
print("KineticsDataset length", len(kd))
for d in kd:
print(len(d[0]), d[0][0].size, d[1])
...@@ -113,6 +113,15 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -113,6 +113,15 @@ class BottleneckBlock(fluid.dygraph.Layer):
class TSM_ResNet(Model): class TSM_ResNet(Model):
"""
TSM network with ResNet as backbone
Args:
num_layers (int): ResNet layer number, only support 50 currently.
Default 50.
seg_num (int): segment number of each video sample. Default 8.
num_classes (int): video class number. Default 400.
"""
def __init__(self, num_layers=50, seg_num=8, num_classes=400): def __init__(self, num_layers=50, seg_num=8, num_classes=400):
super(TSM_ResNet, self).__init__() super(TSM_ResNet, self).__init__()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册