diff --git a/configs/localization/bmn/README.md b/configs/localization/bmn/README.md index c3d685d06889c69c31df17a71d38165fc9e7e316..fa14d153714c2ba7ce04b6e5f4e06b48b4133ca4 100644 --- a/configs/localization/bmn/README.md +++ b/configs/localization/bmn/README.md @@ -1,7 +1,7 @@ # BMN config | pretrain | AR@100| AUC | gpu_mem(M) | iter time(s) | ckpt | log -|-|-|-|-|-|- | - -bmn_400x100_9e_activitynet_feature | None |-|-|-|-|[ckpt]()| [log]() +bmn_400x100_9e_2x8_activitynet_feature | None |75.28|67.22|5420|3.27|[ckpt]()| [log]() ### Data 1. Put the rescaled feature data folder `csv_mean_100` under `$MMACTION/data/activitynet_feature_cuhk/` @@ -10,12 +10,12 @@ bmn_400x100_9e_activitynet_feature | None |-|-|-|-|[ckpt]()| [log]() 2. Put the annotaion files under `$MMACTION/data/ActivityNet` - The annotation files could be found at [here]() (TODO) + The annotation files could be found at [here]() ### Checkpoint 1. Put the `tem_best.pth.tar` and `pem_best.pth.tar` under `checkpoints/` - The ckpts could be found at [here]() (TODO) + The ckpts could be found at [here]() ## Train You can use the following command to train a model. @@ -23,7 +23,7 @@ You can use the following command to train a model. python tools/train.py ${CONFIG_FILE} [optional arguments] # Example: train BMN on ActivityNet features dataset -python tools/train.py config/localization/bmn_feature_100_activitynet.py +python tools/train.py config/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py ``` ## Test @@ -33,5 +33,5 @@ python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments] # Example: test BMN on ActivityNet feature dataset # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth. -python tools/test.py config/localization/bmn_feature_100_activitynet.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json +python tools/test.py config/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json ``` diff --git a/configs/localization/bmn/bmn_400x100_9e_activitynet_feature.py b/configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py similarity index 84% rename from configs/localization/bmn/bmn_400x100_9e_activitynet_feature.py rename to configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py index 344dc4634d6e6e702659170aaf82c4c65ad398d8..682b26c87fe673a412e621d16fa04e2b623e2ca9 100644 --- a/configs/localization/bmn/bmn_400x100_9e_activitynet_feature.py +++ b/configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py @@ -19,7 +19,7 @@ data_root = 'data/activitynet_feature_cuhk/csv_mean_100/' data_root_val = 'data/activitynet_feature_cuhk/csv_mean_100/' ann_file_train = 'data/ActivityNet/anet_anno_train.json' ann_file_val = 'data/ActivityNet/anet_anno_val.json' -ann_file_test = 'data/ActivityNet/anet_anno_test.json' +ann_file_test = 'data/ActivityNet/anet_anno_val.json' test_pipeline = [ dict(type='LoadLocalizationFeature'), @@ -41,8 +41,10 @@ train_pipeline = [ keys=['raw_feature', 'gt_bbox'], meta_name='video_meta', meta_keys=['video_name']), - dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']), - dict(type='ToDataContainer', fields=[dict(key='gt_bbox', stack=False)]) + dict(type='ToTensor', keys=['raw_feature']), + dict( + type='ToDataContainer', + fields=[dict(key='gt_bbox', stack=False, cpu_only=True)]) ] val_pipeline = [ dict(type='LoadLocalizationFeature'), @@ -55,12 +57,14 @@ val_pipeline = [ 'video_name', 'duration_second', 'duration_frame', 'annotations', 'feature_frame' ]), - dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']), - dict(type='ToDataContainer', fields=[dict(key='gt_bbox', stack=False)]) + dict(type='ToTensor', keys=['raw_feature']), + dict( + type='ToDataContainer', + fields=[dict(key='gt_bbox', stack=False, cpu_only=True)]) ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=8, train_dataloader=dict(drop_last=True), test=dict( type=dataset_type, @@ -84,14 +88,14 @@ optimizer_config = dict(grad_clip=None) # learning policy lr_config = dict(policy='step', step=7) -total_epochs = 9 +total_epochs = 20 checkpoint_config = dict(interval=1) evaluation = dict(interval=1, metrics=['AR@AN']) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) # runtime settings dist_params = dict(backend='nccl') log_level = 'INFO' -work_dir = './work_dirs/bmn_400x100_9e_activitynet_feature/' +work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/' load_from = None resume_from = None workflow = [('train', 1)] diff --git a/configs/localization/bsn/README.md b/configs/localization/bsn/README.md index c2439a0af59414ca4315af867b30f5a215298ea6..51bf7702b2bbd5ad398531d174d61157ce1f8575 100644 --- a/configs/localization/bsn/README.md +++ b/configs/localization/bsn/README.md @@ -1,7 +1,7 @@ # BSN config | pretrain | AR@100| AUC | gpu_mem(M) | iter time(s) | ckpt | log -|-|-|-|-|-|- | - -bsn_400x100_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.303(TEM)+0.119(PEM)|[ckpt_tem]() [ckpt_pem]| [log_tem]() [log_pem]() +bsn_400x100_1x16_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.074(TEM)+0.036(PEM)|[ckpt_tem]() [ckpt_pem]| [log_tem]() [log_pem]() ## Preparation ### Data @@ -11,7 +11,7 @@ bsn_400x100_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.303(TE 2. Put the annotaion files under `$MMACTION/data/ActivityNet` - The annotation files could be found at [here]() (TODO) + The annotation files could be found at [here]() ### Checkpoint 1. Put the `tem_best.pth.tar` and `pem_best.pth.tar` under `checkpoints/` @@ -24,29 +24,29 @@ You can use the following commands to train a model. # TEM Train python tools/train.py ${CONFIG_FILE} [optional arguments] # Example: train BSN(TEM) on ActivityNet features dataset -python tools/train.py config/localization/bsn_feature_100_activitynet_step1_tem.py +python tools/train.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py # TEM Inference # Note: This could not be evaluated. python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments] # Example: Inference BSN(TEM) with trained model. -python tools/test.py config/localization/bsn_feature_100_activitynet_step1_tem.py checkpoints/SOME_CHECKPOINT.pth +python tools/test.py config/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth # PGM python tools/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}] # Example: Inference BSN(PGM) -python tools/bsn_proposal_generation.py config/localization/bsn_feature_100_activitynet_step2_pgm.py --mode train +python tools/bsn_proposal_generation.py config/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode train # PEM Train python tools/train.py ${CONFIG_FILE} [optional arguments] # Example: train BSN(PEM) on PGM results. -python tools/train.py config/localization/bsn_feature_100_activitynet_step3_pem.py +python tools/train.py config/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py # PEM Inference python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments] # Example: Inference BSN(PEM) with evaluation metric 'AR@AN' and output the results. # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth. -python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json +python tools/test.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json ``` ## Test @@ -55,16 +55,16 @@ python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.p # Note: This could not be evaluated. python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments] # Example: Test a TEM model on ActivityNet dataset. -python tools/test.py config/localization/bsn_feature_100_activitynet_step1_tem.py checkpoints/SOME_CHECKPOINT.pth +python tools/test.py config/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth # PGM python tools/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}] # Example: -python tools/bsn_proposal_generation.py config/localization/bsn_feature_100_activitynet_step2_pgm.py --mode test +python tools/bsn_proposal_generation.py config/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode test # PEM python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments] # Example: Test a PEM model with with evaluation metric 'AR@AN' and output the results. # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth. -python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json +python tools/test.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json ``` diff --git a/configs/localization/bsn/bsn_pem_400x100_20e_activitynet_feature.py b/configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py similarity index 97% rename from configs/localization/bsn/bsn_pem_400x100_20e_activitynet_feature.py rename to configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py index 6999e125e318d7e345b0f174680198100e31a911..d55eae39e7bc7f9a336408296b479aa46488ea94 100644 --- a/configs/localization/bsn/bsn_pem_400x100_20e_activitynet_feature.py +++ b/configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py @@ -22,7 +22,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json' ann_file_val = 'data/ActivityNet/anet_anno_val.json' ann_file_test = 'data/ActivityNet/anet_anno_test.json' -work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/' +work_dir = 'work_dirs/bsn_feature_100/' pgm_proposals_dir = f'{work_dir}/pgm_proposals/' pgm_features_dir = f'{work_dir}/pgm_features/' @@ -79,7 +79,7 @@ val_pipeline = [ ] data = dict( videos_per_gpu=16, - workers_per_gpu=2, + workers_per_gpu=8, train_dataloader=dict(drop_last=True), val_dataloader=dict(videos_per_gpu=1), test=dict( diff --git a/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py b/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py index beff45ed0853742caf5deba0f7f613a52b4813bc..ce510a5ac5079958adb064aba31e6ecd4e6aec9e 100644 --- a/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py +++ b/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py @@ -6,7 +6,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json' ann_file_val = 'data/ActivityNet/anet_anno_val.json' ann_file_test = 'data/ActivityNet/anet_anno_test.json' -work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/' +work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/' tem_results_dir = f'{work_dir}/tem_results/' pgm_proposals_dir = f'{work_dir}/pgm_proposals/' pgm_features_dir = f'{work_dir}/pgm_features/' diff --git a/configs/localization/bsn/bsn_tem_400x100_20e_activitynet_feature.py b/configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py similarity index 96% rename from configs/localization/bsn/bsn_tem_400x100_20e_activitynet_feature.py rename to configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py index 861eefdc766451d70e09bb5445d0979f7d70c22c..397aa333500f69b3f735b6659cc332b39944bbd7 100644 --- a/configs/localization/bsn/bsn_tem_400x100_20e_activitynet_feature.py +++ b/configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py @@ -17,7 +17,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json' ann_file_val = 'data/ActivityNet/anet_anno_val.json' ann_file_test = 'data/ActivityNet/anet_anno_full.json' -work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/' +work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/' tem_results_dir = f'{work_dir}/tem_results/' test_pipeline = [ @@ -54,7 +54,7 @@ val_pipeline = [ data = dict( videos_per_gpu=16, - workers_per_gpu=2, + workers_per_gpu=8, train_dataloader=dict(drop_last=True), test=dict( type=dataset_type, diff --git a/mmaction/models/localizers/bmn.py b/mmaction/models/localizers/bmn.py index 286441addd6fceb2e2851334eb7903b89311767b..59d259da8a170fed204caefba65fa0bb0d8ddc49 100644 --- a/mmaction/models/localizers/bmn.py +++ b/mmaction/models/localizers/bmn.py @@ -128,6 +128,7 @@ class BMN(nn.Module): self.anchors_tmins, self.anchors_tmaxs = self._temporal_anchors( -0.5, 1.5) self.match_map = self._match_map() + self.bm_mask = self._get_bm_mask() def _match_map(self): temporal_gap = 1. / self.tscale @@ -245,9 +246,9 @@ class BMN(nn.Module): def forward_train(self, raw_feature, label_confidence, label_start, label_end): confidence_map, start, end = self._forward(raw_feature) - bm_mask = self._get_bm_mask().to(raw_feature.device) loss = self.loss_cls(confidence_map, start, end, label_confidence, - label_start, label_end, bm_mask) + label_start, label_end, + self.bm_mask.to(raw_feature.device)) loss_dict = dict(loss=loss[0]) return loss_dict @@ -256,7 +257,6 @@ class BMN(nn.Module): match_score_start_list = [] match_score_end_list = [] for every_gt_bbox in gt_bbox: - every_gt_bbox = every_gt_bbox.cpu().numpy() gt_iou_map = [] for start, end in every_gt_bbox: current_gt_iou_map = temporal_iou(self.match_map[:, 0],