diff --git a/examples/bmn/README.md b/examples/bmn/README.md index dbf593512c07da9645418b92b2a6819c9be13fa5..4ff05519010cf002ba6fc1bf145113bde7aa239a 100644 --- a/examples/bmn/README.md +++ b/examples/bmn/README.md @@ -29,7 +29,6 @@ BMN Overview ├── train.py # 训练代码,训练网络 ├── eval.py # 评估代码,评估网络性能 ├── predict.py # 预测代码,针对任意输入预测结果 -├── bmn_model.py # 网络结构与损失函数定义 ├── bmn_metric.py # 精度评估方法定义 ├── reader.py # 数据reader,构造Dataset和Dataloader ├── bmn_utils.py # 模型细节相关代码 @@ -41,7 +40,7 @@ BMN Overview ## 数据准备 -BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理好的视频特征,请下载[bmn\_feat](https://paddlemodels.bj.bcebos.com/video_detection/bmn_feat.tar.gz)数据后解压,同时相应的修改bmn.yaml中的特征路径feat\_path。对应的标签文件请下载[label](https://paddlemodels.bj.bcebos.com/video_detection/activitynet_1.3_annotations.json)并修改bmn.yaml中的标签文件路径anno\_file。 +BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理好的视频特征和对应的标签文件,请下载特征数据[bmn\_feat](https://paddlemodels.bj.bcebos.com/video_detection/bmn_feat.tar.gz)和标签数据[label](https://paddlemodels.bj.bcebos.com/video_detection/activitynet_1.3_annotations.json),并相应地修改配置文件bmn.yaml中的特征文件路径feat\_path和标签文件路径anno\_file。 ## 模型训练 @@ -52,22 +51,17 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 bash run.sh -若使用单卡训练,启动方式如下: +若使用单卡训练,请将配置文件bmn.yaml中的batch\_size调整为16,启动方式如下: - export CUDA_VISIBLE_DEVICES=0 python train.py -- 代码运行需要先安装pandas - -- 从头开始训练,使用上述启动命令行或者脚本程序即可启动训练,不需要用到预训练模型 +默认使用静态图训练,若使用动态图训练只需要在运行脚本添加`-d`参数即可,如: -- 单卡训练时,请将配置文件中的batch_size调整为16 + python train.py -d -**训练策略:** +- 代码运行需要先安装pandas -* 采用Adam优化器,初始learning\_rate=0.001 -* 权重衰减系数为1e-4 -* 学习率在迭代次数达到4200的时候做一次衰减,衰减系数为0.1 +- 从头开始训练,使用上述启动命令行或者脚本程序即可启动训练,不需要用到预训练模型 ## 模型评估 @@ -76,9 +70,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 python eval.py --weights=$PATH_TO_WEIGHTS -- 进行评估时,可修改命令行中的`weights`参数指定需要评估的权重,如果不设置,将使用默认参数文件checkpoint/final.pdparams。 +- 进行评估时,可修改命令行中的`weights`参数指定需要评估的权重,若未指定,脚本会下载已发布的模型[model](https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams)进行评估。 -- 上述程序会将运行结果保存在output/EVAL/BMN\_results文件夹下,测试结果保存在evaluate\_results/bmn\_results\_validation.json文件中。 +- 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/EVAL/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为evaluate\_results。 - 注:评估时可能会出现loss为nan的情况。这是由于评估时用的是单个样本,可能存在没有iou>0.6的样本,所以为nan,对最终的评估结果没有影响。 @@ -87,9 +81,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 - ActivityNet数据集的具体使用说明可以参考其[官方网站](http://activity-net.org) -- 下载指标评估代码,请从[ActivityNet Gitub repository](https://github.com/activitynet/ActivityNet.git)下载,将Evaluation文件夹拷贝至models/dygraph/bmn目录下。(注:由于第三方评估代码不支持python3,此处建议使用python2进行评估;若使用python3,print函数需要添加括号,请对Evaluation目录下的.py文件做相应修改。) +- 下载指标评估代码,请从[ActivityNet Gitub repository](https://github.com/activitynet/ActivityNet.git)下载,将Evaluation文件夹拷贝至hapi/examples/bmn目录下。(注:由于第三方评估代码不支持python3,此处建议使用python2进行评估;若使用python3,print函数需要添加括号,请对Evaluation目录下的.py文件做相应修改。) -- 请下载[activity\_net\_1\_3\_new.json](https://paddlemodels.bj.bcebos.com/video_detection/activity_net_1_3_new.json)文件,并将其放置在models/dygraph/bmn/Evaluation/data目录下,相较于原始的activity\_net.v1-3.min.json文件,我们过滤了其中一些失效的视频条目。 +- 请下载[activity\_net\_1\_3\_new.json](https://paddlemodels.bj.bcebos.com/video_detection/activity_net_1_3_new.json)文件,并将其放置在hapi/examples/bmn/Evaluation/data目录下,相较于原始的activity\_net.v1-3.min.json文件,我们过滤了其中一些失效的视频条目。 - 计算精度指标 @@ -100,7 +94,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 | AR@1 | AR@5 | AR@10 | AR@100 | AUC | | :---: | :---: | :---: | :---: | :---: | -| 33.46 | 49.25 | 56.25 | 75.40 | 67.16% | +| 33.10 | 49.18 | 56.54 | 75.12 | 67.16% | ## 模型推断 @@ -110,9 +104,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 python predict.py --weights=$PATH_TO_WEIGHTS \ --filelist=$FILELIST -- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为./infer.list。`--weights`参数为训练好的权重参数,如果不设置,将使用默认参数文件checkpoint/final.pdparams。 +- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为./infer.list。`--weights`参数为训练好的权重参数,若未指定,脚本会下载已发布的模型[model](https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams)进行预测。 -- 上述程序会将运行结果保存在output/INFER/BMN\_results文件夹下,测试结果保存在predict\_results/bmn\_results\_test.json文件中。 +- 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/INFER/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为predict\_results。 ## 参考论文 diff --git a/examples/bmn/bmn.yaml b/examples/bmn/bmn.yaml index da50ea4f7c654d40fbf2498863cb7e87664fe55a..7964f92161f8a5f55289e21a465494c32e99fcbf 100644 --- a/examples/bmn/bmn.yaml +++ b/examples/bmn/bmn.yaml @@ -12,11 +12,10 @@ MODEL: TRAIN: subset: "train" epoch: 9 - batch_size: 4 + batch_size: 4 num_workers: 4 use_shuffle: True device: "gpu" - num_gpus: 4 learning_rate: 0.001 learning_rate_decay: 0.1 lr_decay_iter: 4200 @@ -29,10 +28,6 @@ TEST: subset: "validation" batch_size: 1 num_workers: 1 - use_buffer: False - snms_alpha: 0.001 - snms_t1: 0.5 - snms_t2: 0.9 output_path: "output/EVAL/BMN_results" result_path: "evaluate_results" @@ -40,10 +35,6 @@ INFER: subset: "test" batch_size: 1 num_workers: 1 - use_buffer: False - snms_alpha: 0.4 - snms_t1: 0.5 - snms_t2: 0.9 filelist: './infer.list' output_path: "output/INFER/BMN_results" result_path: "predict_results" diff --git a/examples/bmn/bmn_metric.py b/examples/bmn/bmn_metric.py index d8e0d3e3ae307c9fa61000e336b4ef6864f956f2..22adab5644975d9762e80ad320dbbb17318654a9 100644 --- a/examples/bmn/bmn_metric.py +++ b/examples/bmn/bmn_metric.py @@ -36,8 +36,21 @@ class BmnMetric(Metric): #get video_dict and video_list if self.mode == 'test': self.get_test_dataset_dict() + if not os.path.isdir(self.cfg.TEST.output_path): + os.makedirs(self.cfg.TEST.output_path) + if not os.path.isdir(self.cfg.TEST.result_path): + os.makedirs(self.cfg.TEST.result_path) elif self.mode == 'infer': self.get_infer_dataset_dict() + if not os.path.isdir(self.cfg.INFER.output_path): + os.makedirs(self.cfg.INFER.output_path) + if not os.path.isdir(self.cfg.INFER.result_path): + os.makedirs(self.cfg.INFER.result_path) + + def add_metric_op(self, preds, label): + pred_bm, pred_start, pred_en = preds + video_index = label[-1] + return [pred_bm, pred_start, pred_en, video_index] #return list def update(self, pred_bm, pred_start, pred_end, fid): # generate proposals diff --git a/examples/bmn/eval.py b/examples/bmn/eval.py index 52f4091f7541e40190ffa2c29c653626b6844515..071f5d13401a9218ee2749a67edb70148ec973a4 100644 --- a/examples/bmn/eval.py +++ b/examples/bmn/eval.py @@ -37,7 +37,6 @@ def parse_args(): parser.add_argument( "-d", "--dynamic", - default=True, action='store_true', help="enable dygraph mode, only support dynamic mode at present time") parser.add_argument( @@ -56,6 +55,17 @@ def parse_args(): default=None, help='weight path, None to automatically download weights provided by Paddle.' ) + parser.add_argument( + '--output_path', + type=str, + default="output/EVAL/BMN_results", + help='output dir path, default to use output/EVAL/BMN_results') + parser.add_argument( + '--result_path', + type=str, + default="evaluate_results/", + help='output dir path after post processing, default to use ./evaluate_results/' + ) parser.add_argument( '--log_interval', type=int, @@ -67,17 +77,21 @@ def parse_args(): # Performance Evaluation def test_bmn(args): - # only support dynamic mode at present time device = set_device(args.device) fluid.enable_dygraph(device) if args.dynamic else None + #config setting config = parse_config(args.config_file) eval_cfg = merge_configs(config, 'test', vars(args)) - if not os.path.isdir(config.TEST.output_path): - os.makedirs(config.TEST.output_path) - if not os.path.isdir(config.TEST.result_path): - os.makedirs(config.TEST.result_path) + feat_dim = config.MODEL.feat_dim + tscale = config.MODEL.tscale + dscale = config.MODEL.dscale + prop_boundary_ratio = config.MODEL.prop_boundary_ratio + num_sample = config.MODEL.num_sample + num_sample_perbin = config.MODEL.num_sample_perbin + + #input and video index inputs = [ Input( [None, config.MODEL.feat_dim, config.MODEL.tscale], @@ -97,9 +111,14 @@ def test_bmn(args): eval_dataset = BmnDataset(eval_cfg, 'test') #model - model = bmn(config, pretrained=args.weights is None) + model = bmn(tscale, + dscale, + prop_boundary_ratio, + num_sample, + num_sample_perbin, + pretrained=args.weights is None) model.prepare( - loss_function=BmnLoss(config), + loss_function=BmnLoss(tscale, dscale), metrics=BmnMetric( config, mode='test'), inputs=inputs, diff --git a/examples/bmn/predict.py b/examples/bmn/predict.py index 45733bc40213528d7398707ef0ec1e3f1b2c48be..fb1b10452fb34a3380d3e027df5383d2a85fb0c0 100644 --- a/examples/bmn/predict.py +++ b/examples/bmn/predict.py @@ -37,7 +37,6 @@ def parse_args(): parser.add_argument( "-d", "--dynamic", - default=True, action='store_true', help="enable dygraph mode, only support dynamic mode at present time") parser.add_argument( @@ -54,10 +53,21 @@ def parse_args(): help='weight path, None to automatically download weights provided by Paddle.' ) parser.add_argument( - '--save_dir', + '--filelist', + type=str, + default="infer.list", + help='infer file list, default to use ./infer.list') + parser.add_argument( + '--output_path', + type=str, + default="output/INFER/BMN_results", + help='output dir path, default to use output/INFER/BMN_results') + parser.add_argument( + '--result_path', type=str, default="predict_results/", - help='output dir path, default to use ./predict_results/') + help='output dir path after post processing, default to use ./predict_results/' + ) parser.add_argument( '--log_interval', type=int, @@ -69,18 +79,21 @@ def parse_args(): # Prediction def infer_bmn(args): - # only support dynamic mode at present time device = set_device(args.device) fluid.enable_dygraph(device) if args.dynamic else None + #config setting config = parse_config(args.config_file) infer_cfg = merge_configs(config, 'infer', vars(args)) - if not os.path.isdir(config.INFER.output_path): - os.makedirs(config.INFER.output_path) - if not os.path.isdir(config.INFER.result_path): - os.makedirs(config.INFER.result_path) + feat_dim = config.MODEL.feat_dim + tscale = config.MODEL.tscale + dscale = config.MODEL.dscale + prop_boundary_ratio = config.MODEL.prop_boundary_ratio + num_sample = config.MODEL.num_sample + num_sample_perbin = config.MODEL.num_sample_perbin + #input and video index inputs = [ Input( [None, config.MODEL.feat_dim, config.MODEL.tscale], @@ -92,7 +105,13 @@ def infer_bmn(args): #data infer_dataset = BmnDataset(infer_cfg, 'infer') - model = bmn(config, pretrained=args.weights is None) + #model + model = bmn(tscale, + dscale, + prop_boundary_ratio, + num_sample, + num_sample_perbin, + pretrained=args.weights is None) model.prepare( metrics=BmnMetric( config, mode='infer'), diff --git a/examples/bmn/run.sh b/examples/bmn/run.sh index 24fd8e3da991c74628f6d345badf7bfe2e67c35d..979a7301705e6a02c59907221dbaa7a152d9dc47 100644 --- a/examples/bmn/run.sh +++ b/examples/bmn/run.sh @@ -1,3 +1,2 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 - python -m paddle.distributed.launch train.py diff --git a/examples/bmn/train.py b/examples/bmn/train.py index 7bb4affdf288f6c51771ebcef47cf9aaabb14b22..178085ae7eb1275d43df65e3ce0d96b37a25c882 100644 --- a/examples/bmn/train.py +++ b/examples/bmn/train.py @@ -34,11 +34,7 @@ logger = logging.getLogger(__name__) def parse_args(): parser = argparse.ArgumentParser("Paddle high level api of BMN.") parser.add_argument( - "-d", - "--dynamic", - default=True, - action='store_true', - help="enable dygraph mode") + "-d", "--dynamic", action='store_true', help="enable dygraph mode") parser.add_argument( '--config_file', type=str, @@ -48,7 +44,7 @@ def parse_args(): '--batch_size', type=int, default=None, - help='training batch size. None to use config file setting.') + help='training batch size. None for read from config file.') parser.add_argument( '--learning_rate', type=float, @@ -68,8 +64,8 @@ def parse_args(): parser.add_argument( '--epoch', type=int, - default=9, - help='epoch number, 0 for read from config file') + default=None, + help='epoch number, None for read from config file') parser.add_argument( '--valid_interval', type=int, @@ -113,22 +109,23 @@ def train_bmn(args): if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) + #config setting config = parse_config(args.config_file) train_cfg = merge_configs(config, 'train', vars(args)) val_cfg = merge_configs(config, 'valid', vars(args)) - inputs = [ - Input( - [None, config.MODEL.feat_dim, config.MODEL.tscale], - 'float32', - name='feat_input') - ] - gt_iou_map = Input( - [None, config.MODEL.dscale, config.MODEL.tscale], - 'float32', - name='gt_iou_map') - gt_start = Input([None, config.MODEL.tscale], 'float32', name='gt_start') - gt_end = Input([None, config.MODEL.tscale], 'float32', name='gt_end') + feat_dim = config.MODEL.feat_dim + tscale = config.MODEL.tscale + dscale = config.MODEL.dscale + prop_boundary_ratio = config.MODEL.prop_boundary_ratio + num_sample = config.MODEL.num_sample + num_sample_perbin = config.MODEL.num_sample_perbin + + # input and label list + inputs = [Input([None, feat_dim, tscale], 'float32', name='feat_input')] + gt_iou_map = Input([None, dscale, tscale], 'float32', name='gt_iou_map') + gt_start = Input([None, tscale], 'float32', name='gt_start') + gt_end = Input([None, tscale], 'float32', name='gt_end') labels = [gt_iou_map, gt_start, gt_end] # data @@ -136,11 +133,16 @@ def train_bmn(args): val_dataset = BmnDataset(val_cfg, 'valid') # model - model = bmn(config, pretrained=False) + model = bmn(tscale, + dscale, + prop_boundary_ratio, + num_sample, + num_sample_perbin, + pretrained=False) optim = optimizer(config, parameter_list=model.parameters()) model.prepare( optimizer=optim, - loss_function=BmnLoss(config), + loss_function=BmnLoss(tscale, dscale), inputs=inputs, labels=labels, device=device) @@ -148,11 +150,10 @@ def train_bmn(args): # if resume weights is given, load resume weights directly if args.resume is not None: model.load(args.resume) - model.fit(train_data=train_dataset, eval_data=val_dataset, batch_size=train_cfg.TRAIN.batch_size, - epochs=args.epoch, + epochs=train_cfg.TRAIN.epoch, eval_freq=args.valid_interval, log_freq=args.log_interval, save_dir=args.save_dir, diff --git a/hapi/vision/models/bmn_model.py b/hapi/vision/models/bmn_model.py index 4f503f9d55ad13875d0a80b674e1a4599155f24e..e15555debd2cf0a07299d414310487b1c8bfa905 100644 --- a/hapi/vision/models/bmn_model.py +++ b/hapi/vision/models/bmn_model.py @@ -14,7 +14,6 @@ import paddle.fluid as fluid from paddle.fluid import ParamAttr -from paddle.fluid.framework import in_dygraph_mode import numpy as np import math @@ -27,7 +26,7 @@ DATATYPE = 'float32' pretrain_infos = { 'bmn': ('https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams', - '9286c821acc4cad46d6613b931ba468c') + '9286c821acc4cad46d6613b931ba468c') } @@ -131,17 +130,23 @@ class BMN(Model): `"BMN: Boundary-Matching Network for Temporal Action Proposal Generation" `_ Args: - cfg (AttrDict): configs for BMN model + tscale (int): sequence length, default 100. + dscale (int): max duration length, default 100. + prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5. + num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32. + num_sample_perbin (int): number of selected points in each sample, default 3. """ - def __init__(self, cfg): + + def __init__(self, tscale, dscale, prop_boundary_ratio, num_sample, + num_sample_perbin): super(BMN, self).__init__() #init config - self.tscale = cfg.MODEL.tscale - self.dscale = cfg.MODEL.dscale - self.prop_boundary_ratio = cfg.MODEL.prop_boundary_ratio - self.num_sample = cfg.MODEL.num_sample - self.num_sample_perbin = cfg.MODEL.num_sample_perbin + self.tscale = tscale + self.dscale = dscale + self.prop_boundary_ratio = prop_boundary_ratio + self.num_sample = num_sample + self.num_sample_perbin = num_sample_perbin self.hidden_dim_1d = 256 self.hidden_dim_2d = 128 @@ -192,23 +197,17 @@ class BMN(Model): padding=1, act="relu") - # init to speed up + # get sample mask sample_mask_array = get_interp1d_mask( self.tscale, self.dscale, self.prop_boundary_ratio, self.num_sample, self.num_sample_perbin) - if in_dygraph_mode(): - self.sample_mask = fluid.dygraph.base.to_variable( - sample_mask_array) - else: # static - self.sample_mask = fluid.layers.create_parameter( - shape=[ - self.tscale, self.num_sample * self.dscale * self.tscale - ], - dtype=DATATYPE, - attr=fluid.ParamAttr( - name="sample_mask", trainable=False), - default_initializer=fluid.initializer.NumpyArrayInitializer( - sample_mask_array)) + self.sample_mask = fluid.layers.create_parameter( + shape=[self.tscale, self.num_sample * self.dscale * self.tscale], + dtype=DATATYPE, + attr=fluid.ParamAttr( + name="sample_mask", trainable=False), + default_initializer=fluid.initializer.NumpyArrayInitializer( + sample_mask_array)) self.sample_mask.stop_gradient = True @@ -292,23 +291,27 @@ class BmnLoss(Loss): """Loss for BMN model Args: - cfg (AttrDict): configs for BMN model + tscale (int): sequence length, default 100. + dscale (int): max duration length, default 100. """ - def __init__(self, cfg): + + def __init__(self, tscale, dscale): super(BmnLoss, self).__init__() - self.cfg = cfg + self.tscale = tscale + self.dscale = dscale def _get_mask(self): - dscale = self.cfg.MODEL.dscale - tscale = self.cfg.MODEL.tscale bm_mask = [] - for idx in range(dscale): - mask_vector = [1 for i in range(tscale - idx) + for idx in range(self.dscale): + mask_vector = [1 for i in range(self.tscale - idx) ] + [0 for i in range(idx)] bm_mask.append(mask_vector) bm_mask = np.array(bm_mask, dtype=np.float32) self_bm_mask = fluid.layers.create_global_var( - shape=[dscale, tscale], value=0, dtype=DATATYPE, persistable=True) + shape=[self.dscale, self.tscale], + value=0, + dtype=DATATYPE, + persistable=True) fluid.layers.assign(bm_mask, self_bm_mask) self_bm_mask.stop_gradient = True return self_bm_mask @@ -437,15 +440,24 @@ class BmnLoss(Loss): return loss -def bmn(cfg, pretrained=True): +def bmn(tscale, + dscale, + prop_boundary_ratio, + num_sample, + num_sample_perbin, + pretrained=True): """BMN model Args: - cfg (AttrDict): configs for BMN model - pretrained (bool): If True, returns a model with pre-trained model - on COCO, default True + tscale (int): sequence length, default 100. + dscale (int): max duration length, default 100. + prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5. + num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32. + num_sample_perbin (int): number of selected points in each sample, default 3. + pretrained (bool): If True, returns a model with pre-trained model, default True. """ - model = BMN(cfg) + model = BMN(tscale, dscale, prop_boundary_ratio, num_sample, + num_sample_perbin) if pretrained: weight_path = get_weights_path(*(pretrain_infos['bmn'])) assert weight_path.endswith('.pdparams'), \