update bsn/bmn modelzoo and improve bmn speed

Signed-off-by: N lixuanyi <lixuanyi@sensetime.com>

update bsn/bmn modelzoo and improve bmn speed
Signed-off-by: N lixuanyi <lixuanyi@sensetime.com>
f3530dbc · lixuanyi · 0b670873 · f3530dbc · f3530dbc · f3530dbc
7 changed file
--- a/configs/localization/bmn/README.md
+++ b/configs/localization/bmn/README.md
 # BMN
 config | pretrain | AR@100| AUC | gpu_mem(M) | iter time(s) | ckpt | log
 -|-|-|-|-|-|- | -
-bmn_400x100_9e_activitynet_feature | None |-|-|-|-|[ckpt]()| [log]()
+bmn_400x100_9e_2x8_activitynet_feature | None |75.28|67.22|5420|3.27|[ckpt]()| [log]()

 ### Data
 1. Put the rescaled feature data folder `csv_mean_100` under `$MMACTION/data/activitynet_feature_cuhk/`
@@ -10,12 +10,12 @@ bmn_400x100_9e_activitynet_feature | None |-|-|-|-|[ckpt]()| [log]()

 2. Put the annotaion files under `$MMACTION/data/ActivityNet`

-    The annotation files could be found at [here]() (TODO)
+    The annotation files could be found at [here]()

 ### Checkpoint
 1. Put the `tem_best.pth.tar` and `pem_best.pth.tar` under `checkpoints/`

-    The ckpts could be found at [here]() (TODO)
+    The ckpts could be found at [here]()

 ## Train
 You can use the following command to train a model.
@@ -23,7 +23,7 @@ You can use the following command to train a model.
 python tools/train.py ${CONFIG_FILE} [optional arguments]

 # Example: train BMN on ActivityNet features dataset
-python tools/train.py config/localization/bmn_feature_100_activitynet.py
+python tools/train.py config/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py
 ```

 ## Test
@@ -33,5 +33,5 @@ python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]

 # Example: test BMN on ActivityNet feature dataset
 # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
-python tools/test.py  config/localization/bmn_feature_100_activitynet.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
+python tools/test.py  config/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
 ```
--- a/configs/localization/bmn/bmn_400x100_9e_activitynet_feature.py
+++ b/configs/localization/bmn/bmn_400x100_9e_activitynet_feature.py
@@ -19,7 +19,7 @@ data_root = 'data/activitynet_feature_cuhk/csv_mean_100/'
 data_root_val = 'data/activitynet_feature_cuhk/csv_mean_100/'
 ann_file_train = 'data/ActivityNet/anet_anno_train.json'
 ann_file_val = 'data/ActivityNet/anet_anno_val.json'
-ann_file_test = 'data/ActivityNet/anet_anno_test.json'
+ann_file_test = 'data/ActivityNet/anet_anno_val.json'

 test_pipeline = [
    dict(type='LoadLocalizationFeature'),
@@ -41,8 +41,10 @@ train_pipeline = [
        keys=['raw_feature', 'gt_bbox'],
        meta_name='video_meta',
        meta_keys=['video_name']),
-    dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
-    dict(type='ToDataContainer', fields=[dict(key='gt_bbox', stack=False)])
+    dict(type='ToTensor', keys=['raw_feature']),
+    dict(
+        type='ToDataContainer',
+        fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
 ]
 val_pipeline = [
    dict(type='LoadLocalizationFeature'),
@@ -55,12 +57,14 @@ val_pipeline = [
            'video_name', 'duration_second', 'duration_frame', 'annotations',
            'feature_frame'
        ]),
-    dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
-    dict(type='ToDataContainer', fields=[dict(key='gt_bbox', stack=False)])
+    dict(type='ToTensor', keys=['raw_feature']),
+    dict(
+        type='ToDataContainer',
+        fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
 ]
 data = dict(
    videos_per_gpu=8,
-    workers_per_gpu=4,
+    workers_per_gpu=8,
    train_dataloader=dict(drop_last=True),
    test=dict(
        type=dataset_type,
@@ -84,14 +88,14 @@ optimizer_config = dict(grad_clip=None)
 # learning policy
 lr_config = dict(policy='step', step=7)

-total_epochs = 9
+total_epochs = 20
 checkpoint_config = dict(interval=1)
 evaluation = dict(interval=1, metrics=['AR@AN'])
 log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
 # runtime settings
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
-work_dir = './work_dirs/bmn_400x100_9e_activitynet_feature/'
+work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/'
 load_from = None
 resume_from = None
 workflow = [('train', 1)]

--- a/configs/localization/bsn/README.md
+++ b/configs/localization/bsn/README.md
 # BSN
 config | pretrain | AR@100| AUC | gpu_mem(M) | iter time(s) | ckpt | log
 -|-|-|-|-|-|- | -
-bsn_400x100_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.303(TEM)+0.119(PEM)|[ckpt_tem]() [ckpt_pem]| [log_tem]() [log_pem]()
+bsn_400x100_1x16_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.074(TEM)+0.036(PEM)|[ckpt_tem]() [ckpt_pem]| [log_tem]() [log_pem]()

 ## Preparation
 ### Data
@@ -11,7 +11,7 @@ bsn_400x100_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.303(TE

 2. Put the annotaion files under `$MMACTION/data/ActivityNet`

-    The annotation files could be found at [here]() (TODO)
+    The annotation files could be found at [here]()

 ### Checkpoint
 1. Put the `tem_best.pth.tar` and `pem_best.pth.tar` under `checkpoints/`
@@ -24,29 +24,29 @@ You can use the following commands to train a model.
 # TEM Train
 python tools/train.py ${CONFIG_FILE} [optional arguments]
 # Example: train BSN(TEM) on ActivityNet features dataset
-python tools/train.py config/localization/bsn_feature_100_activitynet_step1_tem.py
+python tools/train.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py

 # TEM Inference
 # Note: This could not be evaluated.
 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 # Example: Inference BSN(TEM) with trained model.
-python tools/test.py config/localization/bsn_feature_100_activitynet_step1_tem.py checkpoints/SOME_CHECKPOINT.pth
+python tools/test.py config/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth

 # PGM
 python tools/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}]
 # Example: Inference BSN(PGM)
-python tools/bsn_proposal_generation.py config/localization/bsn_feature_100_activitynet_step2_pgm.py --mode train
+python tools/bsn_proposal_generation.py config/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode train

 # PEM Train
 python tools/train.py ${CONFIG_FILE} [optional arguments]
 # Example: train BSN(PEM) on PGM results.
-python tools/train.py config/localization/bsn_feature_100_activitynet_step3_pem.py
+python tools/train.py config/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py

 # PEM Inference
 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 # Example: Inference BSN(PEM) with evaluation metric 'AR@AN' and output the results.
 # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
-python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.py  checkpoints/SOME_CHECKPOINT.pth  --eval AR@AN --out results.json
+python tools/test.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py  checkpoints/SOME_CHECKPOINT.pth  --eval AR@AN --out results.json
 ```

 ## Test
@@ -55,16 +55,16 @@ python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.p
 # Note: This could not be evaluated.
 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 # Example: Test a TEM model on ActivityNet dataset.
-python tools/test.py config/localization/bsn_feature_100_activitynet_step1_tem.py checkpoints/SOME_CHECKPOINT.pth
+python tools/test.py config/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth

 # PGM
 python tools/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}]
 # Example:
-python tools/bsn_proposal_generation.py config/localization/bsn_feature_100_activitynet_step2_pgm.py --mode test
+python tools/bsn_proposal_generation.py config/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode test

 # PEM
 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 # Example: Test a PEM model with with evaluation metric 'AR@AN' and output the results.
 # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
-python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
+python tools/test.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
 ```
--- a/configs/localization/bsn/bsn_pem_400x100_20e_activitynet_feature.py
+++ b/configs/localization/bsn/bsn_pem_400x100_20e_activitynet_feature.py
@@ -22,7 +22,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json'
 ann_file_val = 'data/ActivityNet/anet_anno_val.json'
 ann_file_test = 'data/ActivityNet/anet_anno_test.json'

-work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/'
+work_dir = 'work_dirs/bsn_feature_100/'
 pgm_proposals_dir = f'{work_dir}/pgm_proposals/'
 pgm_features_dir = f'{work_dir}/pgm_features/'

@@ -79,7 +79,7 @@ val_pipeline = [
 ]
 data = dict(
    videos_per_gpu=16,
-    workers_per_gpu=2,
+    workers_per_gpu=8,
    train_dataloader=dict(drop_last=True),
    val_dataloader=dict(videos_per_gpu=1),
    test=dict(

--- a/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py
+++ b/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py
@@ -6,7 +6,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json'
 ann_file_val = 'data/ActivityNet/anet_anno_val.json'
 ann_file_test = 'data/ActivityNet/anet_anno_test.json'

-work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/'
+work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/'
 tem_results_dir = f'{work_dir}/tem_results/'
 pgm_proposals_dir = f'{work_dir}/pgm_proposals/'
 pgm_features_dir = f'{work_dir}/pgm_features/'

--- a/configs/localization/bsn/bsn_tem_400x100_20e_activitynet_feature.py
+++ b/configs/localization/bsn/bsn_tem_400x100_20e_activitynet_feature.py
@@ -17,7 +17,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json'
 ann_file_val = 'data/ActivityNet/anet_anno_val.json'
 ann_file_test = 'data/ActivityNet/anet_anno_full.json'

-work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/'
+work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/'
 tem_results_dir = f'{work_dir}/tem_results/'

 test_pipeline = [
@@ -54,7 +54,7 @@ val_pipeline = [

 data = dict(
    videos_per_gpu=16,
-    workers_per_gpu=2,
+    workers_per_gpu=8,
    train_dataloader=dict(drop_last=True),
    test=dict(
        type=dataset_type,

--- a/mmaction/models/localizers/bmn.py
+++ b/mmaction/models/localizers/bmn.py
@@ -128,6 +128,7 @@ class BMN(nn.Module):
        self.anchors_tmins, self.anchors_tmaxs = self._temporal_anchors(
            -0.5, 1.5)
        self.match_map = self._match_map()
+        self.bm_mask = self._get_bm_mask()

    def _match_map(self):
        temporal_gap = 1. / self.tscale
@@ -245,9 +246,9 @@ class BMN(nn.Module):
    def forward_train(self, raw_feature, label_confidence, label_start,
                      label_end):
        confidence_map, start, end = self._forward(raw_feature)
-        bm_mask = self._get_bm_mask().to(raw_feature.device)
        loss = self.loss_cls(confidence_map, start, end, label_confidence,
-                             label_start, label_end, bm_mask)
+                             label_start, label_end,
+                             self.bm_mask.to(raw_feature.device))
        loss_dict = dict(loss=loss[0])
        return loss_dict

@@ -256,7 +257,6 @@ class BMN(nn.Module):
        match_score_start_list = []
        match_score_end_list = []
        for every_gt_bbox in gt_bbox:
-            every_gt_bbox = every_gt_bbox.cpu().numpy()
            gt_iou_map = []
            for start, end in every_gt_bbox:
                current_gt_iou_map = temporal_iou(self.match_map[:, 0],