slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py 4.3 KB
Newer Older
L
save  
lixuanyi 已提交
1 2 3
model = dict(
    type='Recognizer3D',
    backbone=dict(
L
lixuanyi 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17
        type='ResNet3dSlowFast',
        pretrained=None,
        resample_rate=8,  # tau
        speed_ratio=8,  # alpha
        channel_ratio=8,  # beta_inv
        slow_pathway=dict(
            type='resnet3d',
            depth=50,
            pretrained=None,
            lateral=True,
            conv1_kernel=(1, 7, 7),
            dilations=(1, 1, 1, 1),
            conv1_stride_t=1,
            pool1_stride_t=1,
18 19
            inflate=(0, 0, 1, 1),
            norm_eval=False),
L
lixuanyi 已提交
20 21 22 23 24 25 26 27
        fast_pathway=dict(
            type='resnet3d',
            depth=50,
            pretrained=None,
            lateral=False,
            base_channels=8,
            conv1_kernel=(5, 7, 7),
            conv1_stride_t=1,
28 29
            pool1_stride_t=1,
            norm_eval=False)),
L
save  
lixuanyi 已提交
30
    cls_head=dict(
X
xusu 已提交
31
        type='SlowFastHead',
L
lixuanyi 已提交
32
        in_channels=2304,  # 2048+256
L
save  
lixuanyi 已提交
33 34
        num_classes=400,
        spatial_type='avg',
L
lixuanyi 已提交
35
        dropout_ratio=0.5))
L
save  
lixuanyi 已提交
36
train_cfg = None
X
xusu 已提交
37
test_cfg = dict(average_clips=None)
L
save  
lixuanyi 已提交
38
dataset_type = 'VideoDataset'
L
linjintao 已提交
39 40 41 42 43
data_root = 'data/kinetics400/videos_train'
data_root_val = 'data/kinetics400/videos_val'
ann_file_train = 'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test = 'data/kinetics400/kinetics400_val_list_videos.txt'
L
save  
lixuanyi 已提交
44 45 46
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
train_pipeline = [
47
    dict(type='DecordInit'),
48
    dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1),
L
save  
lixuanyi 已提交
49 50
    dict(type='DecordDecode'),
    dict(type='Resize', scale=(-1, 256)),
L
lixuanyi 已提交
51
    dict(type='RandomResizedCrop'),
L
save  
lixuanyi 已提交
52 53 54 55 56 57 58 59
    dict(type='Resize', scale=(224, 224), keep_ratio=False),
    dict(type='Flip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCTHW'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs', 'label'])
]
val_pipeline = [
60
    dict(type='DecordInit'),
L
save  
lixuanyi 已提交
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
    dict(
        type='SampleFrames',
        clip_len=32,
        frame_interval=2,
        num_clips=1,
        test_mode=True),
    dict(type='DecordDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='CenterCrop', crop_size=224),
    dict(type='Flip', flip_ratio=0),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCTHW'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs'])
]
test_pipeline = [
77
    dict(type='DecordInit'),
L
save  
lixuanyi 已提交
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
    dict(
        type='SampleFrames',
        clip_len=32,
        frame_interval=2,
        num_clips=10,
        test_mode=True),
    dict(type='DecordDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='ThreeCrop', crop_size=256),
    dict(type='Flip', flip_ratio=0),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCTHW'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs'])
]
data = dict(
    videos_per_gpu=8,
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
        ann_file=ann_file_train,
        data_prefix=data_root,
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        ann_file=ann_file_val,
        data_prefix=data_root_val,
        pipeline=val_pipeline),
    test=dict(
        type=dataset_type,
L
lixuanyi 已提交
108
        ann_file=ann_file_test,
L
save  
lixuanyi 已提交
109 110 111
        data_prefix=data_root_val,
        pipeline=test_pipeline))
# optimizer
112 113 114
optimizer = dict(
    type='SGD', lr=0.1, momentum=0.9,
    weight_decay=0.0001)  # this lr is used for 8 gpus
L
save  
lixuanyi 已提交
115 116
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
# learning policy
S
su 已提交
117 118 119 120 121 122
lr_config = dict(
    policy='CosineAnnealing',
    min_lr=0,
    warmup='linear',
    warmup_by_epoch=True,
    warmup_iters=34)
L
lixuanyi 已提交
123 124 125
total_epochs = 256
checkpoint_config = dict(interval=4)
workflow = [('train', 1)]
L
save  
lixuanyi 已提交
126 127 128 129 130 131
evaluation = dict(
    interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5))
log_config = dict(
    interval=20,
    hooks=[
        dict(type='TextLoggerHook'),
L
lixuanyi 已提交
132
        #    dict(type='TensorboardLoggerHook'),
L
save  
lixuanyi 已提交
133 134 135
    ])
dist_params = dict(backend='nccl')
log_level = 'INFO'
L
lixuanyi 已提交
136
work_dir = './work_dirs/slowfast_r50_video_3d_4x16x1_256e_kinetics400_rgb'
L
save  
lixuanyi 已提交
137 138
load_from = None
resume_from = None
L
lixuanyi 已提交
139
find_unused_parameters = False