From 0fb191ec6cac66c91b21a8bf132cea17cfdfd00a Mon Sep 17 00:00:00 2001 From: JoannaLXY Date: Fri, 10 Jul 2020 16:52:46 +0800 Subject: [PATCH] fix config struct --- docs/config.md | 366 +++++++++++++++++++++++++++++++++++- docs/config_localization.md | 146 -------------- docs/config_recognition.md | 203 -------------------- docs/index.rst | 2 - 4 files changed, 364 insertions(+), 353 deletions(-) delete mode 100644 docs/config_localization.md delete mode 100644 docs/config_recognition.md diff --git a/docs/config.md b/docs/config.md index 56d4587..3503304 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1,4 +1,18 @@ # Config System + + +- [Config System](#config-system) + - [Config File Naming Convention](#config-file-naming-convention) + - [Config File Structure](#config-file-structure) + - [Config System for Action localization](#config-system-for-action-localization) + - [An Example of BMN](#an-example-of-bmn) + - [Config System for Action Recognition](#config-system-for-action-recognition) + - [An Example of TSN](#an-example-of-tsn) + - [FAQ](#faq) + - [Use intermediate variables in configs](#use-intermediate-variables-in-configs) + + + We use python files as our config system. You can find all the provided configs under `$MMAction/configs`. ## Config File Naming Convention @@ -25,9 +39,357 @@ We follow the style below to name config files. Contributors are advised to foll Please refer to the corresponding pages for config file structure for different tasks. -[Localization](config_localization.md) +### Config System for Action localization + +We incorporate modular design into our config system, +which is convenient to conduct various experiments. + +#### An Example of BMN + +To help the users have a basic idea of a complete config structure and the modules in an action localization system, +we make brief comments on the config of BMN as the following. +For more detailed usage and alternative for per parameter in each module, please refer to the API documentation. + +```python +# model settings +model = dict( # Config of the model + type='BMN', # Type of the localizer + temporal_dim=100, # Total frames selected for each video + boundary_ratio=0.5, # Ratio for determining video boundaries + num_samples=32, # Number of samples for each proposal + num_samples_per_bin=3, # Number of bin samples for each sample + feat_dim=400, # Dimension of feature + soft_nms_alpha=0.4, # Soft NMS alpha + soft_nms_low_threshold=0.5, # Soft NMS low threshold + soft_nms_high_threshold=0.9, # Soft NMS high threshold + post_process_top_k=100) # Top k proposals in post process +# model training and testing settings +train_cfg = None # Config of training hyperparameters for BMN +test_cfg = dict(average_clips='score') # Config for testing hyperparameters for BMN + +# dataset settings +dataset_type = 'ActivityNetDataset' # Type of dataset for training, valiation and testing +data_root = 'data/activitynet_feature_cuhk/csv_mean_100/' # Root path to data for training +data_root_val = 'data/activitynet_feature_cuhk/csv_mean_100/' # Root path to data for validation and testing +ann_file_train = 'data/ActivityNet/anet_anno_train.json' # Path to the annotation file for training +ann_file_val = 'data/ActivityNet/anet_anno_val.json' # Path to the annotation file for validation +ann_file_test = 'data/ActivityNet/anet_anno_test.json' # Path to the annotation file for testing + +train_pipeline = [ # List of training pipeline steps + dict(type='LoadLocalizationFeature'), # Load localization feature pipeline + dict(type='GenerateLocalizationLabels'), # Generate localization labels pipeline + dict( # Config of Collect + type='Collect', # Collect pipeline that decides which keys in the data should be passed to the localizer + keys=['raw_feature', 'gt_bbox'], # Keys of input + meta_name='video_meta', # Meta name + meta_keys=['video_name']), # Meta keys of input + dict( # Config of ToTensor + type='ToTensor', # Convert other types to tensor type pipeline + keys=['raw_feature']), # Keys to be converted from image to tensor + dict( # Config of ToDataContainer + type='ToDataContainer', # Pipeline to convert the data to DataContainer + fields=[dict(key='gt_bbox', stack=False, cpu_only=True)]) # Required fields to be converted with keys and attributes +] +val_pipeline = [ # List of validation pipeline steps + dict(type='LoadLocalizationFeature'), # Load localization feature pipeline + dict(type='GenerateLocalizationLabels'), # Generate localization labels pipeline + dict( # Config of Collect + type='Collect', # Collect pipeline that decides which keys in the data should be passed to the localizer + keys=['raw_feature', 'gt_bbox'], # Keys of input + meta_name='video_meta', # Meta name + meta_keys=[ + 'video_name', 'duration_second', 'duration_frame', 'annotations', + 'feature_frame' + ]), # Meta keys of input + dict( # Config of ToTensor + type='ToTensor', # Convert other types to tensor type pipeline + keys=['raw_feature']), # Keys to be converted from image to tensor + dict( # Config of ToDataContainer + type='ToDataContainer', # Pipeline to convert the data to DataContainer + fields=[dict(key='gt_bbox', stack=False, cpu_only=True)]) # Required fields to be converted with keys and attributes +] +test_pipeline = [ # List of testing pipeline steps + dict(type='LoadLocalizationFeature'), # Load localization feature pipeline + dict( # Config of Collect + type='Collect', # Collect pipeline that decides which keys in the data should be passed to the localizer + keys=['raw_feature'], # Keys of input + meta_name='video_meta', # Meta name + meta_keys=[ + 'video_name', 'duration_second', 'duration_frame', 'annotations', + 'feature_frame' + ]), # Meta keys of input + dict( # Config of ToTensor + type='ToTensor', # Convert other types to tensor type pipeline + keys=['raw_feature']), # Keys to be converted from image to tensor +] +data = dict( # Config of data + videos_per_gpu=8, # Batch size of each single GPU + workers_per_gpu=8, # Workers to pre-fetch data for each single GPU + train_dataloader=dict( # Addition config of train dataloader + drop_last=True), # Whether to drop out the last batch of data in training + val_dataloader=dict( # Addition config of validation dataloader + videos_per_gpu=1), # Batch size of each single GPU during evaluation + test=dict( # Testing dataset config + type=dataset_type, + ann_file=ann_file_test, + pipeline=test_pipeline, + data_prefix=data_root_val), + val=dict( # Validation dataset config + type=dataset_type, + ann_file=ann_file_val, + pipeline=val_pipeline, + data_prefix=data_root_val), + train=dict( # Training dataset config + type=dataset_type, + ann_file=ann_file_train, + pipeline=train_pipeline, + data_prefix=data_root)) + +# optimizer +optimizer = dict( + # Config used to build optimizer, support (1). All the optimizers in PyTorch + # whose arguments are also the same as those in PyTorch. (2). Custom optimizers + # which are builed on `constructor`, referring to "tutorials/new_modules.md" + # for implementation. + type='Adam', # Type of optimizer, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details + lr=0.001, # Learning rate, see detail usages of the parameters in the documentaion of PyTorch + weight_decay=0.0001) # Weight decay of Adam +optimizer_config = dict( # Config used to build the optimizer hook + grad_clip=None) # Most of the methods do not use gradient clip +# learning policy +lr_config = dict( # Learning rate scheduler config used to register LrUpdater hook + policy='step', # Policy of scheduler, also support CosineAnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9 + step=7) # Steps to decay the learning rate + +total_epochs = 9 # Total epochs to train the model +checkpoint_config = dict( # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation + interval=1) # Interval to save checkpoint +evaluation = dict( # Config of evaluation during training + interval=1, # Interval to perform evaluation + metrics=['AR@AN']) # Metrics to be performed +log_config = dict( # Config to register logger hook + interval=50, # Interval to print the log + hooks=[ # Hooks to be implemented during training + dict(type='TextLoggerHook'), # The logger used to record the training process + # dict(type='TensorboardLoggerHook'), # The Tensorboard logger is also supported + ]) + +# runtime settings +dist_params = dict(backend='nccl') # Parameters to setup distributed training, the port can also be set +log_level = 'INFO' # The level of logging +work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/' # Directory to save the model checkpoints and logs for the current experiments +load_from = None # load models as a pre-trained model from a given path. This will not resume training +resume_from = None # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved +workflow = [('train', 1)] # Workflow for # runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once +output_config = dict( # Config of localization ouput + out=f'{work_dir}/results.json', # Path to output file + output_format='json') # File format of output file +``` + +### Config System for Action Recognition + +We incorporate modular design into our config system, +which is convenient to conduct various experiments. + +#### An Example of TSN + +To help the users have a basic idea of a complete config structure and the modules in an action recognition system, +we make brief comments on the config of TSN as the following. +For more detailed usage and alternative for per parameter in each module, please refer to the API documentation. + +```python +# model settings +model = dict( # Config of the model + type='Recognizer2D', # Type of the recognizer + backbone=dict( # Dict for backbone + type='ResNet', # Name of the backbone + pretrained='torchvision://resnet50', # The url/site of the pretrained model + depth=50, # Depth of ResNet model + norm_eval=False), # Whether to set BN layers to eval mode when training + cls_head=dict( # Dict for classification head + type='TSNHead', # Name of classification head + num_classes=400, # Number of classes to be classified. + in_channels=2048, # The input channels of classification head. + spatial_type='avg', # Type of pooling in spatial dimension + consensus=dict(type='AvgConsensus', dim=1), # Config of consensus module + dropout_ratio=0.4, # Probability in dropout layer + init_std=0.01)) # Std value for linear layer initiation +# model training and testing settings +train_cfg = None # Config of training hyperparameters for TSN +test_cfg = dict(average_clips=None) # Config for testing hyperparameters for TSN. Here we define clip averaging method in it + +# dataset settings +dataset_type = 'RawframeDataset' # Type of dataset for training, valiation and testing +data_root = 'data/kinetics400/rawframes_train/' # Root path to data for training +data_root_val = 'data/kinetics400/rawframes_val/' # Root path to data for validation and testing +ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' # Path to the annotation file for training +ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' # Path to the annotation file for validation +ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' # Path to the annotation file for testing +img_norm_cfg = dict( # Config of image normalition used in data pipeline + mean=[123.675, 116.28, 103.53], # Mean values of different channels to normalize + std=[58.395, 57.12, 57.375], # Std values of different channels to normalize + to_bgr=False) # Whether to convert channels from RGB to BGR + +train_pipeline = [ # List of training pipeline steps + dict( # Config of SampleFrames + type='SampleFrames', # Sample frames pipeline, sampling frames from video + clip_len=1, # Frames of each sampled output clip + frame_interval=1, # Temporal interval of adjacent sampled frames + num_clips=3), # Number of clips to be sampled + dict( # Config of FrameSelector + type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices + dict( # Config of Resize + type='Resize', # Resize pipeline + scale=(-1, 256)), # The scale to resize images + dict( # Config of MultiScaleCrop + type='MultiScaleCrop', # Multi scale crop pipeline, cropping images with a list of randomly selected scales + input_size=224, # Input size of the network + scales=(1, 0.875, 0.75, 0.66), # Scales of weight and height to be selected + random_crop=False, # Whether to randomly sample cropping bbox + max_wh_scale_gap=1), # Maximum gap of w and h scale levels + dict( # Config of Resize + type='Resize', # Resize pipeline + scale=(224, 224), # The scale to resize images + keep_ratio=False), # Whether to resize with changing the aspect ratio + dict( # Config of Flip + type='Flip', # Flip Pipeline + flip_ratio=0.5), # Probability of implementing flip + dict( # Config of Normalize + type='Normalize', # Normalize pipeline + **img_norm_cfg), # Config of image normalization + dict( # Config of FormatShape + type='FormatShape', # Format shape pipeline, Format final image shape to the given input_format + input_format='NCHW'), # Final image shape format + dict( # Config of Collect + type='Collect', # Collect pipeline that decides which keys in the data should be passed to the recognizer + keys=['imgs', 'label'], # Keys of input + meta_keys=[]), # Meta keys of input + dict( # Config of ToTensor + type='ToTensor', # Convert other types to tensor type pipeline + keys=['imgs', 'label']) # Keys to be converted from image to tensor +] +val_pipeline = [ # List of validation pipeline steps + dict( # Config of SampleFrames + type='SampleFrames', # Sample frames pipeline, sampling frames from video + clip_len=1, # Frames of each sampled output clip + frame_interval=1, # Temporal interval of adjacent sampled frames + num_clips=3, # Number of clips to be sampled + test_mode=True), # Whether to set test mode in sampling + dict( # Config of FrameSelector + type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices + dict( # Config of Resize + type='Resize', # Resize pipeline + scale=(-1, 256)), # The scale to resize images + dict( # Config of CenterCrop + type='CenterCrop', # Center crop pipeline, cropping the center area from images + crop_size=224), # The size to crop images + dict( # Config of Flip + type='Flip', # Flip pipeline + flip_ratio=0), # Probability of implementing flip + dict( # Config of Normalize + type='Normalize', # Normalize pipeline + **img_norm_cfg), # Config of image normalization + dict( # Config of FormatShape + type='FormatShape', # Format shape pipeline, Format final image shape to the given input_format + input_format='NCHW'), # Final image shape format + dict( # Config of Collect + type='Collect', # Collect pipeline that decides which keys in the data should be passed to the recognizer + keys=['imgs', 'label'], # Keys of input + meta_keys=[]), # Meta keys of input + dict( # Config of ToTensor + type='ToTensor', # Convert other types to tensor type pipeline + keys=['imgs']) # Keys to be converted from image to tensor +] +test_pipeline = [ # List of testing pipeline steps + dict( # Config of SampleFrames + type='SampleFrames', # Sample frames pipeline, sampling frames from video + clip_len=1, # Frames of each sampled output clip + frame_interval=1, # Temporal interval of adjacent sampled frames + num_clips=25, # Number of clips to be sampled + test_mode=True), # Whether to set test mode in sampling + dict( # Config of FrameSelector + type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices + dict( # Config of Resize + type='Resize', # Resize pipeline + scale=(-1, 256)), # The scale to resize images + dict( # Config of CenterCrop + type='TenCrop', # Center crop pipeline, cropping the center area from images + crop_size=224), # The size to crop images + dict( # Config of Flip + type='Flip', # Flip pipeline + flip_ratio=0), # Probability of implementing flip + dict( # Config of Normalize + type='Normalize', # Normalize pipeline + **img_norm_cfg), # Config of image normalization + dict( # Config of FormatShape + type='FormatShape', # Format shape pipeline, Format final image shape to the given input_format + input_format='NCHW'), # Final image shape format + dict( # Config of Collect + type='Collect', # Collect pipeline that decides which keys in the data should be passed to the recognizer + keys=['imgs', 'label'], # Keys of input + meta_keys=[]), # Meta keys of input + dict( # Config of ToTensor + type='ToTensor', # Convert other types to tensor type pipeline + keys=['imgs']) # Keys to be converted from image to tensor +] +data = dict( # Config of data + videos_per_gpu=32, # Batch size of each single GPU + workers_per_gpu=4, # Workers to pre-fetch data for each single GPU + train=dict( # Training dataset config + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( # Validation dataset config + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( # Testing dataset config + type=dataset_type, + ann_file=ann_file_test, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + # Config used to build optimizer, support (1). All the optimizers in PyTorch + # whose arguments are also the same as those in PyTorch. (2). Custom optimizers + # which are builed on `constructor`, referring to "tutorials/new_modules.md" + # for implementation. + type='SGD', # Type of optimizer, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details + lr=0.01, # Learning rate, see detail usages of the parameters in the documentaion of PyTorch + momentum=0.9, # Momentum, + weight_decay=0.0001) # Weight decay of SGD +optimizer_config = dict( # Config used to build the optimizer hook + grad_clip=dict(max_norm=40, norm_type=2)) # Use gradient clip +# learning policy +lr_config = dict( # Learning rate scheduler config used to register LrUpdater hook + policy='step', # Policy of scheduler, also support CosineAnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9 + step=[40, 80]) # Steps to decay the learning rate +total_epochs = 100 # Total epochs to train the model +checkpoint_config = dict( # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation + interval=5) # Interval to save checkpoint +evaluation = dict( # Config of evaluation during training + interval=5, # Interval to perform evaluation + metrics=['top_k_accuracy', 'mean_class_accuracy'], # Metrics to be performed + topk=(1, 5)) # K value for `top_k_accuracy` metric +log_config = dict( # Config to register logger hook + interval=20, # Interval to print the log + hooks=[ # Hooks to be implemented during training + dict(type='TextLoggerHook'), # The logger used to record the training process + # dict(type='TensorboardLoggerHook'), # The Tensorboard logger is also supported + ]) + +# runtime settings +dist_params = dict(backend='nccl') # Parameters to setup distributed training, the port can also be set +log_level = 'INFO' # The level of logging +work_dir = './work_dirs/tsn_r50_1x1x3_100e_kinetics400_rgb/' # Directory to save the model checkpoints and logs for the current experiments +load_from = None # load models as a pre-trained model from a given path. This will not resume training +resume_from = None # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved +workflow = [('train', 1)] # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once + +``` -[Recognition](config_recognition.md) ## FAQ diff --git a/docs/config_localization.md b/docs/config_localization.md deleted file mode 100644 index 2ed3390..0000000 --- a/docs/config_localization.md +++ /dev/null @@ -1,146 +0,0 @@ -# Config System for Action localization - -We incorporate modular design into our config system, -which is convenient to conduct various experiments. - -## An Example of BMN - -To help the users have a basic idea of a complete config structure and the modules in an action localization system, -we make brief comments on the config of BMN as the following. -For more detailed usage and alternative for per parameter in each module, please refer to the API documentation. - -```python -# model settings -model = dict( # Config of the model - type='BMN', # Type of the localizer - temporal_dim=100, # Total frames selected for each video - boundary_ratio=0.5, # Ratio for determining video boundaries - num_samples=32, # Number of samples for each proposal - num_samples_per_bin=3, # Number of bin samples for each sample - feat_dim=400, # Dimension of feature - soft_nms_alpha=0.4, # Soft NMS alpha - soft_nms_low_threshold=0.5, # Soft NMS low threshold - soft_nms_high_threshold=0.9, # Soft NMS high threshold - post_process_top_k=100) # Top k proposals in post process -# model training and testing settings -train_cfg = None # Config of training hyperparameters for BMN -test_cfg = dict(average_clips='score') # Config for testing hyperparameters for BMN - -# dataset settings -dataset_type = 'ActivityNetDataset' # Type of dataset for training, valiation and testing -data_root = 'data/activitynet_feature_cuhk/csv_mean_100/' # Root path to data for training -data_root_val = 'data/activitynet_feature_cuhk/csv_mean_100/' # Root path to data for validation and testing -ann_file_train = 'data/ActivityNet/anet_anno_train.json' # Path to the annotation file for training -ann_file_val = 'data/ActivityNet/anet_anno_val.json' # Path to the annotation file for validation -ann_file_test = 'data/ActivityNet/anet_anno_test.json' # Path to the annotation file for testing - -train_pipeline = [ # List of training pipeline steps - dict(type='LoadLocalizationFeature'), # Load localization feature pipeline - dict(type='GenerateLocalizationLabels'), # Generate localization labels pipeline - dict( # Config of Collect - type='Collect', # Collect pipeline that decides which keys in the data should be passed to the localizer - keys=['raw_feature', 'gt_bbox'], # Keys of input - meta_name='video_meta', # Meta name - meta_keys=['video_name']), # Meta keys of input - dict( # Config of ToTensor - type='ToTensor', # Convert other types to tensor type pipeline - keys=['raw_feature']), # Keys to be converted from image to tensor - dict( # Config of ToDataContainer - type='ToDataContainer', # Pipeline to convert the data to DataContainer - fields=[dict(key='gt_bbox', stack=False, cpu_only=True)]) # Required fields to be converted with keys and attributes -] -val_pipeline = [ # List of validation pipeline steps - dict(type='LoadLocalizationFeature'), # Load localization feature pipeline - dict(type='GenerateLocalizationLabels'), # Generate localization labels pipeline - dict( # Config of Collect - type='Collect', # Collect pipeline that decides which keys in the data should be passed to the localizer - keys=['raw_feature', 'gt_bbox'], # Keys of input - meta_name='video_meta', # Meta name - meta_keys=[ - 'video_name', 'duration_second', 'duration_frame', 'annotations', - 'feature_frame' - ]), # Meta keys of input - dict( # Config of ToTensor - type='ToTensor', # Convert other types to tensor type pipeline - keys=['raw_feature']), # Keys to be converted from image to tensor - dict( # Config of ToDataContainer - type='ToDataContainer', # Pipeline to convert the data to DataContainer - fields=[dict(key='gt_bbox', stack=False, cpu_only=True)]) # Required fields to be converted with keys and attributes -] -test_pipeline = [ # List of testing pipeline steps - dict(type='LoadLocalizationFeature'), # Load localization feature pipeline - dict( # Config of Collect - type='Collect', # Collect pipeline that decides which keys in the data should be passed to the localizer - keys=['raw_feature'], # Keys of input - meta_name='video_meta', # Meta name - meta_keys=[ - 'video_name', 'duration_second', 'duration_frame', 'annotations', - 'feature_frame' - ]), # Meta keys of input - dict( # Config of ToTensor - type='ToTensor', # Convert other types to tensor type pipeline - keys=['raw_feature']), # Keys to be converted from image to tensor -] -data = dict( # Config of data - videos_per_gpu=8, # Batch size of each single GPU - workers_per_gpu=8, # Workers to pre-fetch data for each single GPU - train_dataloader=dict( # Addition config of train dataloader - drop_last=True), # Whether to drop out the last batch of data in training - val_dataloader=dict( # Addition config of validation dataloader - videos_per_gpu=1), # Batch size of each single GPU during evaluation - test=dict( # Testing dataset config - type=dataset_type, - ann_file=ann_file_test, - pipeline=test_pipeline, - data_prefix=data_root_val), - val=dict( # Validation dataset config - type=dataset_type, - ann_file=ann_file_val, - pipeline=val_pipeline, - data_prefix=data_root_val), - train=dict( # Training dataset config - type=dataset_type, - ann_file=ann_file_train, - pipeline=train_pipeline, - data_prefix=data_root)) - -# optimizer -optimizer = dict( - # Config used to build optimizer, support (1). All the optimizers in PyTorch - # whose arguments are also the same as those in PyTorch. (2). Custom optimizers - # which are builed on `constructor`, referring to "tutorials/new_modules.md" - # for implementation. - type='Adam', # Type of optimizer, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details - lr=0.001, # Learning rate, see detail usages of the parameters in the documentaion of PyTorch - weight_decay=0.0001) # Weight decay of Adam -optimizer_config = dict( # Config used to build the optimizer hook - grad_clip=None) # Most of the methods do not use gradient clip -# learning policy -lr_config = dict( # Learning rate scheduler config used to register LrUpdater hook - policy='step', # Policy of scheduler, also support CosineAnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9 - step=7) # Steps to decay the learning rate - -total_epochs = 9 # Total epochs to train the model -checkpoint_config = dict( # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation - interval=1) # Interval to save checkpoint -evaluation = dict( # Config of evaluation during training - interval=1, # Interval to perform evaluation - metrics=['AR@AN']) # Metrics to be performed -log_config = dict( # Config to register logger hook - interval=50, # Interval to print the log - hooks=[ # Hooks to be implemented during training - dict(type='TextLoggerHook'), # The logger used to record the training process - # dict(type='TensorboardLoggerHook'), # The Tensorboard logger is also supported - ]) - -# runtime settings -dist_params = dict(backend='nccl') # Parameters to setup distributed training, the port can also be set -log_level = 'INFO' # The level of logging -work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/' # Directory to save the model checkpoints and logs for the current experiments -load_from = None # load models as a pre-trained model from a given path. This will not resume training -resume_from = None # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved -workflow = [('train', 1)] # Workflow for # runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once -output_config = dict( # Config of localization ouput - out=f'{work_dir}/results.json', # Path to output file - output_format='json') # File format of output file -``` diff --git a/docs/config_recognition.md b/docs/config_recognition.md deleted file mode 100644 index 79d3429..0000000 --- a/docs/config_recognition.md +++ /dev/null @@ -1,203 +0,0 @@ -# Config System for Action Recognition - -We incorporate modular design into our config system, -which is convenient to conduct various experiments. - -## An Example of TSN - -To help the users have a basic idea of a complete config structure and the modules in an action recognition system, -we make brief comments on the config of TSN as the following. -For more detailed usage and alternative for per parameter in each module, please refer to the API documentation. - -```python -# model settings -model = dict( # Config of the model - type='Recognizer2D', # Type of the recognizer - backbone=dict( # Dict for backbone - type='ResNet', # Name of the backbone - pretrained='torchvision://resnet50', # The url/site of the pretrained model - depth=50, # Depth of ResNet model - norm_eval=False), # Whether to set BN layers to eval mode when training - cls_head=dict( # Dict for classification head - type='TSNHead', # Name of classification head - num_classes=400, # Number of classes to be classified. - in_channels=2048, # The input channels of classification head. - spatial_type='avg', # Type of pooling in spatial dimension - consensus=dict(type='AvgConsensus', dim=1), # Config of consensus module - dropout_ratio=0.4, # Probability in dropout layer - init_std=0.01)) # Std value for linear layer initiation -# model training and testing settings -train_cfg = None # Config of training hyperparameters for TSN -test_cfg = dict(average_clips=None) # Config for testing hyperparameters for TSN. Here we define clip averaging method in it - -# dataset settings -dataset_type = 'RawframeDataset' # Type of dataset for training, valiation and testing -data_root = 'data/kinetics400/rawframes_train/' # Root path to data for training -data_root_val = 'data/kinetics400/rawframes_val/' # Root path to data for validation and testing -ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt' # Path to the annotation file for training -ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' # Path to the annotation file for validation -ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt' # Path to the annotation file for testing -img_norm_cfg = dict( # Config of image normalition used in data pipeline - mean=[123.675, 116.28, 103.53], # Mean values of different channels to normalize - std=[58.395, 57.12, 57.375], # Std values of different channels to normalize - to_bgr=False) # Whether to convert channels from RGB to BGR - -train_pipeline = [ # List of training pipeline steps - dict( # Config of SampleFrames - type='SampleFrames', # Sample frames pipeline, sampling frames from video - clip_len=1, # Frames of each sampled output clip - frame_interval=1, # Temporal interval of adjacent sampled frames - num_clips=3), # Number of clips to be sampled - dict( # Config of FrameSelector - type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices - dict( # Config of Resize - type='Resize', # Resize pipeline - scale=(-1, 256)), # The scale to resize images - dict( # Config of MultiScaleCrop - type='MultiScaleCrop', # Multi scale crop pipeline, cropping images with a list of randomly selected scales - input_size=224, # Input size of the network - scales=(1, 0.875, 0.75, 0.66), # Scales of weight and height to be selected - random_crop=False, # Whether to randomly sample cropping bbox - max_wh_scale_gap=1), # Maximum gap of w and h scale levels - dict( # Config of Resize - type='Resize', # Resize pipeline - scale=(224, 224), # The scale to resize images - keep_ratio=False), # Whether to resize with changing the aspect ratio - dict( # Config of Flip - type='Flip', # Flip Pipeline - flip_ratio=0.5), # Probability of implementing flip - dict( # Config of Normalize - type='Normalize', # Normalize pipeline - **img_norm_cfg), # Config of image normalization - dict( # Config of FormatShape - type='FormatShape', # Format shape pipeline, Format final image shape to the given input_format - input_format='NCHW'), # Final image shape format - dict( # Config of Collect - type='Collect', # Collect pipeline that decides which keys in the data should be passed to the recognizer - keys=['imgs', 'label'], # Keys of input - meta_keys=[]), # Meta keys of input - dict( # Config of ToTensor - type='ToTensor', # Convert other types to tensor type pipeline - keys=['imgs', 'label']) # Keys to be converted from image to tensor -] -val_pipeline = [ # List of validation pipeline steps - dict( # Config of SampleFrames - type='SampleFrames', # Sample frames pipeline, sampling frames from video - clip_len=1, # Frames of each sampled output clip - frame_interval=1, # Temporal interval of adjacent sampled frames - num_clips=3, # Number of clips to be sampled - test_mode=True), # Whether to set test mode in sampling - dict( # Config of FrameSelector - type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices - dict( # Config of Resize - type='Resize', # Resize pipeline - scale=(-1, 256)), # The scale to resize images - dict( # Config of CenterCrop - type='CenterCrop', # Center crop pipeline, cropping the center area from images - crop_size=224), # The size to crop images - dict( # Config of Flip - type='Flip', # Flip pipeline - flip_ratio=0), # Probability of implementing flip - dict( # Config of Normalize - type='Normalize', # Normalize pipeline - **img_norm_cfg), # Config of image normalization - dict( # Config of FormatShape - type='FormatShape', # Format shape pipeline, Format final image shape to the given input_format - input_format='NCHW'), # Final image shape format - dict( # Config of Collect - type='Collect', # Collect pipeline that decides which keys in the data should be passed to the recognizer - keys=['imgs', 'label'], # Keys of input - meta_keys=[]), # Meta keys of input - dict( # Config of ToTensor - type='ToTensor', # Convert other types to tensor type pipeline - keys=['imgs']) # Keys to be converted from image to tensor -] -test_pipeline = [ # List of testing pipeline steps - dict( # Config of SampleFrames - type='SampleFrames', # Sample frames pipeline, sampling frames from video - clip_len=1, # Frames of each sampled output clip - frame_interval=1, # Temporal interval of adjacent sampled frames - num_clips=25, # Number of clips to be sampled - test_mode=True), # Whether to set test mode in sampling - dict( # Config of FrameSelector - type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices - dict( # Config of Resize - type='Resize', # Resize pipeline - scale=(-1, 256)), # The scale to resize images - dict( # Config of CenterCrop - type='TenCrop', # Center crop pipeline, cropping the center area from images - crop_size=224), # The size to crop images - dict( # Config of Flip - type='Flip', # Flip pipeline - flip_ratio=0), # Probability of implementing flip - dict( # Config of Normalize - type='Normalize', # Normalize pipeline - **img_norm_cfg), # Config of image normalization - dict( # Config of FormatShape - type='FormatShape', # Format shape pipeline, Format final image shape to the given input_format - input_format='NCHW'), # Final image shape format - dict( # Config of Collect - type='Collect', # Collect pipeline that decides which keys in the data should be passed to the recognizer - keys=['imgs', 'label'], # Keys of input - meta_keys=[]), # Meta keys of input - dict( # Config of ToTensor - type='ToTensor', # Convert other types to tensor type pipeline - keys=['imgs']) # Keys to be converted from image to tensor -] -data = dict( # Config of data - videos_per_gpu=32, # Batch size of each single GPU - workers_per_gpu=4, # Workers to pre-fetch data for each single GPU - train=dict( # Training dataset config - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( # Validation dataset config - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( # Testing dataset config - type=dataset_type, - ann_file=ann_file_test, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - # Config used to build optimizer, support (1). All the optimizers in PyTorch - # whose arguments are also the same as those in PyTorch. (2). Custom optimizers - # which are builed on `constructor`, referring to "tutorials/new_modules.md" - # for implementation. - type='SGD', # Type of optimizer, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details - lr=0.01, # Learning rate, see detail usages of the parameters in the documentaion of PyTorch - momentum=0.9, # Momentum, - weight_decay=0.0001) # Weight decay of SGD -optimizer_config = dict( # Config used to build the optimizer hook - grad_clip=dict(max_norm=40, norm_type=2)) # Use gradient clip -# learning policy -lr_config = dict( # Learning rate scheduler config used to register LrUpdater hook - policy='step', # Policy of scheduler, also support CosineAnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9 - step=[40, 80]) # Steps to decay the learning rate -total_epochs = 100 # Total epochs to train the model -checkpoint_config = dict( # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation - interval=5) # Interval to save checkpoint -evaluation = dict( # Config of evaluation during training - interval=5, # Interval to perform evaluation - metrics=['top_k_accuracy', 'mean_class_accuracy'], # Metrics to be performed - topk=(1, 5)) # K value for `top_k_accuracy` metric -log_config = dict( # Config to register logger hook - interval=20, # Interval to print the log - hooks=[ # Hooks to be implemented during training - dict(type='TextLoggerHook'), # The logger used to record the training process - # dict(type='TensorboardLoggerHook'), # The Tensorboard logger is also supported - ]) - -# runtime settings -dist_params = dict(backend='nccl') # Parameters to setup distributed training, the port can also be set -log_level = 'INFO' # The level of logging -work_dir = './work_dirs/tsn_r50_1x1x3_100e_kinetics400_rgb/' # Directory to save the model checkpoints and logs for the current experiments -load_from = None # load models as a pre-trained model from a given path. This will not resume training -resume_from = None # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved -workflow = [('train', 1)] # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once - -``` diff --git a/docs/index.rst b/docs/index.rst index b4030e0..4fc7a32 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,8 +9,6 @@ Welcome to MMAction's documentation! install.md data_preparation.md config.md - config_localization.md - config_recognition.md api.rst -- GitLab