diff --git a/configs/localization/bmn/README.md b/configs/localization/bmn/README.md
index c3d685d06889c69c31df17a71d38165fc9e7e316..fa14d153714c2ba7ce04b6e5f4e06b48b4133ca4 100644
--- a/configs/localization/bmn/README.md
+++ b/configs/localization/bmn/README.md
@@ -1,7 +1,7 @@
 # BMN
 config | pretrain | AR@100| AUC | gpu_mem(M) | iter time(s) | ckpt | log
 -|-|-|-|-|-|- | -
-bmn_400x100_9e_activitynet_feature | None |-|-|-|-|[ckpt]()| [log]()
+bmn_400x100_9e_2x8_activitynet_feature | None |75.28|67.22|5420|3.27|[ckpt]()| [log]()
 
 ### Data
 1. Put the rescaled feature data folder `csv_mean_100` under `$MMACTION/data/activitynet_feature_cuhk/`
@@ -10,12 +10,12 @@ bmn_400x100_9e_activitynet_feature | None |-|-|-|-|[ckpt]()| [log]()
 
 2. Put the annotaion files under `$MMACTION/data/ActivityNet`
 
-    The annotation files could be found at [here]() (TODO)
+    The annotation files could be found at [here]()
 
 ### Checkpoint
 1. Put the `tem_best.pth.tar` and `pem_best.pth.tar` under `checkpoints/`
 
-    The ckpts could be found at [here]() (TODO)
+    The ckpts could be found at [here]()
 
 ## Train
 You can use the following command to train a model.
@@ -23,7 +23,7 @@ You can use the following command to train a model.
 python tools/train.py ${CONFIG_FILE} [optional arguments]
 
 # Example: train BMN on ActivityNet features dataset
-python tools/train.py config/localization/bmn_feature_100_activitynet.py
+python tools/train.py config/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py
 ```
 
 ## Test
@@ -33,5 +33,5 @@ python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 
 # Example: test BMN on ActivityNet feature dataset
 # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
-python tools/test.py  config/localization/bmn_feature_100_activitynet.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
+python tools/test.py  config/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
 ```
diff --git a/configs/localization/bmn/bmn_400x100_9e_activitynet_feature.py b/configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py
similarity index 84%
rename from configs/localization/bmn/bmn_400x100_9e_activitynet_feature.py
rename to configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py
index 344dc4634d6e6e702659170aaf82c4c65ad398d8..682b26c87fe673a412e621d16fa04e2b623e2ca9 100644
--- a/configs/localization/bmn/bmn_400x100_9e_activitynet_feature.py
+++ b/configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py
@@ -19,7 +19,7 @@ data_root = 'data/activitynet_feature_cuhk/csv_mean_100/'
 data_root_val = 'data/activitynet_feature_cuhk/csv_mean_100/'
 ann_file_train = 'data/ActivityNet/anet_anno_train.json'
 ann_file_val = 'data/ActivityNet/anet_anno_val.json'
-ann_file_test = 'data/ActivityNet/anet_anno_test.json'
+ann_file_test = 'data/ActivityNet/anet_anno_val.json'
 
 test_pipeline = [
     dict(type='LoadLocalizationFeature'),
@@ -41,8 +41,10 @@ train_pipeline = [
         keys=['raw_feature', 'gt_bbox'],
         meta_name='video_meta',
         meta_keys=['video_name']),
-    dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
-    dict(type='ToDataContainer', fields=[dict(key='gt_bbox', stack=False)])
+    dict(type='ToTensor', keys=['raw_feature']),
+    dict(
+        type='ToDataContainer',
+        fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
 ]
 val_pipeline = [
     dict(type='LoadLocalizationFeature'),
@@ -55,12 +57,14 @@ val_pipeline = [
             'video_name', 'duration_second', 'duration_frame', 'annotations',
             'feature_frame'
         ]),
-    dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
-    dict(type='ToDataContainer', fields=[dict(key='gt_bbox', stack=False)])
+    dict(type='ToTensor', keys=['raw_feature']),
+    dict(
+        type='ToDataContainer',
+        fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
 ]
 data = dict(
     videos_per_gpu=8,
-    workers_per_gpu=4,
+    workers_per_gpu=8,
     train_dataloader=dict(drop_last=True),
     test=dict(
         type=dataset_type,
@@ -84,14 +88,14 @@ optimizer_config = dict(grad_clip=None)
 # learning policy
 lr_config = dict(policy='step', step=7)
 
-total_epochs = 9
+total_epochs = 20
 checkpoint_config = dict(interval=1)
 evaluation = dict(interval=1, metrics=['AR@AN'])
 log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
 # runtime settings
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
-work_dir = './work_dirs/bmn_400x100_9e_activitynet_feature/'
+work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/'
 load_from = None
 resume_from = None
 workflow = [('train', 1)]
diff --git a/configs/localization/bsn/README.md b/configs/localization/bsn/README.md
index c2439a0af59414ca4315af867b30f5a215298ea6..51bf7702b2bbd5ad398531d174d61157ce1f8575 100644
--- a/configs/localization/bsn/README.md
+++ b/configs/localization/bsn/README.md
@@ -1,7 +1,7 @@
 # BSN
 config | pretrain | AR@100| AUC | gpu_mem(M) | iter time(s) | ckpt | log
 -|-|-|-|-|-|- | -
-bsn_400x100_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.303(TEM)+0.119(PEM)|[ckpt_tem]() [ckpt_pem]| [log_tem]() [log_pem]()
+bsn_400x100_1x16_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.074(TEM)+0.036(PEM)|[ckpt_tem]() [ckpt_pem]| [log_tem]() [log_pem]()
 
 ## Preparation
 ### Data
@@ -11,7 +11,7 @@ bsn_400x100_20e_activitynet_feature | None |74.91|66.31|41(TEM)+25(PEM)|0.303(TE
 
 2. Put the annotaion files under `$MMACTION/data/ActivityNet`
 
-    The annotation files could be found at [here]() (TODO)
+    The annotation files could be found at [here]()
 
 ### Checkpoint
 1. Put the `tem_best.pth.tar` and `pem_best.pth.tar` under `checkpoints/`
@@ -24,29 +24,29 @@ You can use the following commands to train a model.
 # TEM Train
 python tools/train.py ${CONFIG_FILE} [optional arguments]
 # Example: train BSN(TEM) on ActivityNet features dataset
-python tools/train.py config/localization/bsn_feature_100_activitynet_step1_tem.py
+python tools/train.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py
 
 # TEM Inference
 # Note: This could not be evaluated.
 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 # Example: Inference BSN(TEM) with trained model.
-python tools/test.py config/localization/bsn_feature_100_activitynet_step1_tem.py checkpoints/SOME_CHECKPOINT.pth
+python tools/test.py config/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth
 
 # PGM
 python tools/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}]
 # Example: Inference BSN(PGM)
-python tools/bsn_proposal_generation.py config/localization/bsn_feature_100_activitynet_step2_pgm.py --mode train
+python tools/bsn_proposal_generation.py config/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode train
 
 # PEM Train
 python tools/train.py ${CONFIG_FILE} [optional arguments]
 # Example: train BSN(PEM) on PGM results.
-python tools/train.py config/localization/bsn_feature_100_activitynet_step3_pem.py
+python tools/train.py config/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
 
 # PEM Inference
 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 # Example: Inference BSN(PEM) with evaluation metric 'AR@AN' and output the results.
 # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
-python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.py  checkpoints/SOME_CHECKPOINT.pth  --eval AR@AN --out results.json
+python tools/test.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py  checkpoints/SOME_CHECKPOINT.pth  --eval AR@AN --out results.json
 ```
 
 ## Test
@@ -55,16 +55,16 @@ python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.p
 # Note: This could not be evaluated.
 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 # Example: Test a TEM model on ActivityNet dataset.
-python tools/test.py config/localization/bsn_feature_100_activitynet_step1_tem.py checkpoints/SOME_CHECKPOINT.pth
+python tools/test.py config/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth
 
 # PGM
 python tools/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}]
 # Example:
-python tools/bsn_proposal_generation.py config/localization/bsn_feature_100_activitynet_step2_pgm.py --mode test
+python tools/bsn_proposal_generation.py config/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode test
 
 # PEM
 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
 # Example: Test a PEM model with with evaluation metric 'AR@AN' and output the results.
 # Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
-python tools/test.py config/localization/bsn_feature_100_activitynet_step3_pem.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
+python tools/test.py config/localization/bsn/bsn_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
 ```
diff --git a/configs/localization/bsn/bsn_pem_400x100_20e_activitynet_feature.py b/configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
similarity index 97%
rename from configs/localization/bsn/bsn_pem_400x100_20e_activitynet_feature.py
rename to configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
index 6999e125e318d7e345b0f174680198100e31a911..d55eae39e7bc7f9a336408296b479aa46488ea94 100644
--- a/configs/localization/bsn/bsn_pem_400x100_20e_activitynet_feature.py
+++ b/configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
@@ -22,7 +22,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json'
 ann_file_val = 'data/ActivityNet/anet_anno_val.json'
 ann_file_test = 'data/ActivityNet/anet_anno_test.json'
 
-work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/'
+work_dir = 'work_dirs/bsn_feature_100/'
 pgm_proposals_dir = f'{work_dir}/pgm_proposals/'
 pgm_features_dir = f'{work_dir}/pgm_features/'
 
@@ -79,7 +79,7 @@ val_pipeline = [
 ]
 data = dict(
     videos_per_gpu=16,
-    workers_per_gpu=2,
+    workers_per_gpu=8,
     train_dataloader=dict(drop_last=True),
     val_dataloader=dict(videos_per_gpu=1),
     test=dict(
diff --git a/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py b/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py
index beff45ed0853742caf5deba0f7f613a52b4813bc..ce510a5ac5079958adb064aba31e6ecd4e6aec9e 100644
--- a/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py
+++ b/configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py
@@ -6,7 +6,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json'
 ann_file_val = 'data/ActivityNet/anet_anno_val.json'
 ann_file_test = 'data/ActivityNet/anet_anno_test.json'
 
-work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/'
+work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/'
 tem_results_dir = f'{work_dir}/tem_results/'
 pgm_proposals_dir = f'{work_dir}/pgm_proposals/'
 pgm_features_dir = f'{work_dir}/pgm_features/'
diff --git a/configs/localization/bsn/bsn_tem_400x100_20e_activitynet_feature.py b/configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py
similarity index 96%
rename from configs/localization/bsn/bsn_tem_400x100_20e_activitynet_feature.py
rename to configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py
index 861eefdc766451d70e09bb5445d0979f7d70c22c..397aa333500f69b3f735b6659cc332b39944bbd7 100644
--- a/configs/localization/bsn/bsn_tem_400x100_20e_activitynet_feature.py
+++ b/configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py
@@ -17,7 +17,7 @@ ann_file_train = 'data/ActivityNet/anet_anno_train.json'
 ann_file_val = 'data/ActivityNet/anet_anno_val.json'
 ann_file_test = 'data/ActivityNet/anet_anno_full.json'
 
-work_dir = 'work_dirs/bsn_400x100_20e_activitynet_feature/'
+work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/'
 tem_results_dir = f'{work_dir}/tem_results/'
 
 test_pipeline = [
@@ -54,7 +54,7 @@ val_pipeline = [
 
 data = dict(
     videos_per_gpu=16,
-    workers_per_gpu=2,
+    workers_per_gpu=8,
     train_dataloader=dict(drop_last=True),
     test=dict(
         type=dataset_type,
diff --git a/mmaction/models/localizers/bmn.py b/mmaction/models/localizers/bmn.py
index 286441addd6fceb2e2851334eb7903b89311767b..59d259da8a170fed204caefba65fa0bb0d8ddc49 100644
--- a/mmaction/models/localizers/bmn.py
+++ b/mmaction/models/localizers/bmn.py
@@ -128,6 +128,7 @@ class BMN(nn.Module):
         self.anchors_tmins, self.anchors_tmaxs = self._temporal_anchors(
             -0.5, 1.5)
         self.match_map = self._match_map()
+        self.bm_mask = self._get_bm_mask()
 
     def _match_map(self):
         temporal_gap = 1. / self.tscale
@@ -245,9 +246,9 @@ class BMN(nn.Module):
     def forward_train(self, raw_feature, label_confidence, label_start,
                       label_end):
         confidence_map, start, end = self._forward(raw_feature)
-        bm_mask = self._get_bm_mask().to(raw_feature.device)
         loss = self.loss_cls(confidence_map, start, end, label_confidence,
-                             label_start, label_end, bm_mask)
+                             label_start, label_end,
+                             self.bm_mask.to(raw_feature.device))
         loss_dict = dict(loss=loss[0])
         return loss_dict
 
@@ -256,7 +257,6 @@ class BMN(nn.Module):
         match_score_start_list = []
         match_score_end_list = []
         for every_gt_bbox in gt_bbox:
-            every_gt_bbox = every_gt_bbox.cpu().numpy()
             gt_iou_map = []
             for start, end in every_gt_bbox:
                 current_gt_iou_map = temporal_iou(self.match_map[:, 0],