From 96c57e63bd6012cef3793096f0d374495a642c7b Mon Sep 17 00:00:00 2001 From: SunGaofeng Date: Mon, 2 Sep 2019 03:12:28 -0500 Subject: [PATCH] modifications on PaddleVideo to enable universality of codes (#2872) * fix reader, metrics, frameworks to increase universality of PaddleVideo * change config files from .txt to .yaml * fix input to use PyReader and modify on readme to fit new codes * adjust code structure and modify configs and readme to fit standard * modify links of pdparams in readme and scripts * fix pickle.dump in python3 and modify on readme of attention_lstm * add description of weights argument in eval/predict/inference_model.py --- PaddleCV/PaddleVideo/.gitignore | 1 - PaddleCV/PaddleVideo/README.md | 119 ++++++--- .../PaddleVideo/configs/attention_cluster.txt | 33 --- .../configs/attention_cluster.yaml | 36 +++ .../PaddleVideo/configs/attention_lstm.txt | 37 --- .../PaddleVideo/configs/attention_lstm.yaml | 37 +++ PaddleCV/PaddleVideo/configs/ctcn.txt | 53 ---- PaddleCV/PaddleVideo/configs/ctcn.yaml | 53 ++++ PaddleCV/PaddleVideo/configs/nextvlad.txt | 39 --- PaddleCV/PaddleVideo/configs/nextvlad.yaml | 39 +++ PaddleCV/PaddleVideo/configs/nonlocal.txt | 93 ------- PaddleCV/PaddleVideo/configs/nonlocal.yaml | 94 +++++++ PaddleCV/PaddleVideo/configs/stnet.txt | 53 ---- PaddleCV/PaddleVideo/configs/stnet.yaml | 55 +++++ PaddleCV/PaddleVideo/configs/tsm.txt | 52 ---- PaddleCV/PaddleVideo/configs/tsm.yaml | 54 ++++ PaddleCV/PaddleVideo/configs/tsn.txt | 51 ---- PaddleCV/PaddleVideo/configs/tsn.yaml | 53 ++++ PaddleCV/PaddleVideo/data/dataset/README.md | 164 +++++++++++++ .../{ => data}/dataset/ctcn/README.md | 2 +- .../dataset/kinetics/generate_label.py | 0 .../{ => data}/dataset/kinetics/video2pkl.py | 9 +- .../data/dataset/kinetics_labels.json | 1 + .../{ => data}/dataset/nonlocal/README.md | 8 +- .../dataset/nonlocal/change_filelist.py | 0 .../dataset/nonlocal/generate_filelist.py | 0 .../dataset/nonlocal/generate_list.sh | 0 .../nonlocal/generate_testlist_multicrop.py | 0 .../{ => data}/dataset/youtube8m/tf2pkl.py | 7 +- .../dataset/youtube8m/yt8m_pca/eigenvals.npy | Bin PaddleCV/PaddleVideo/dataset/README.md | 128 ---------- PaddleCV/PaddleVideo/{test.py => eval.py} | 60 ++--- PaddleCV/PaddleVideo/inference_model.py | 123 ++++++++++ .../metrics/detections/detection_metrics.py | 70 +++++- PaddleCV/PaddleVideo/metrics/metrics_util.py | 199 +++++++++++---- .../multicrop_test/multicrop_test_metrics.py | 57 ++++- .../models/attention_cluster/README.md | 65 +++-- .../attention_cluster/attention_cluster.py | 63 ++--- .../attention_cluster/logistic_model.py | 0 .../attention_cluster/shifting_attention.py | 0 .../models/attention_lstm/README.md | 69 ++++-- .../models/attention_lstm/attention_lstm.py | 61 ++--- .../models/attention_lstm/lstm_attention.py | 0 PaddleCV/PaddleVideo/models/ctcn/README.md | 75 ++++-- PaddleCV/PaddleVideo/models/ctcn/ctcn.py | 104 ++++---- PaddleCV/PaddleVideo/models/model.py | 17 +- .../PaddleVideo/models/nextvlad/README.md | 63 ++++- .../PaddleVideo/models/nextvlad/nextvlad.py | 71 +++--- .../models/nonlocal_model/README.md | 69 ++++-- .../models/nonlocal_model/nonlocal_model.py | 57 +++-- PaddleCV/PaddleVideo/models/stnet/README.md | 77 +++--- PaddleCV/PaddleVideo/models/stnet/stnet.py | 46 ++-- PaddleCV/PaddleVideo/models/tsm/README.md | 69 ++++-- PaddleCV/PaddleVideo/models/tsm/tsm.py | 46 ++-- PaddleCV/PaddleVideo/models/tsn/README.md | 74 ++++-- PaddleCV/PaddleVideo/models/tsn/tsn.py | 46 ++-- PaddleCV/PaddleVideo/{infer.py => predict.py} | 59 ++--- .../{datareader => reader}/__init__.py | 0 .../{datareader => reader}/ctcn_reader.py | 4 +- .../{datareader => reader}/feature_reader.py | 0 .../{datareader => reader}/kinetics_reader.py | 111 ++++++--- .../{datareader => reader}/nonlocal_reader.py | 13 + .../{datareader => reader}/reader_utils.py | 0 PaddleCV/PaddleVideo/run.sh | 107 ++++++++ .../scripts/infer/infer_attention_cluster.sh | 4 - .../scripts/infer/infer_attention_lstm.sh | 4 - .../PaddleVideo/scripts/infer/infer_ctcn.sh | 2 - .../scripts/infer/infer_nextvlad.sh | 3 - .../scripts/infer/infer_nonlocal.sh | 2 - .../PaddleVideo/scripts/infer/infer_stnet.sh | 2 - .../PaddleVideo/scripts/infer/infer_tsm.sh | 2 - .../PaddleVideo/scripts/infer/infer_tsn.sh | 2 - .../scripts/test/test_attention_cluster.sh | 2 - .../scripts/test/test_attention_lstm.sh | 2 - .../PaddleVideo/scripts/test/test_ctcn.sh | 3 - .../PaddleVideo/scripts/test/test_nextvlad.sh | 2 - .../PaddleVideo/scripts/test/test_nonlocal.sh | 2 - .../PaddleVideo/scripts/test/test_stnet.sh | 2 - PaddleCV/PaddleVideo/scripts/test/test_tsm.sh | 2 - PaddleCV/PaddleVideo/scripts/test/test_tsn.sh | 2 - .../scripts/train/train_attention_cluster.sh | 3 - .../scripts/train/train_attention_lstm.sh | 3 - .../PaddleVideo/scripts/train/train_ctcn.sh | 8 - .../scripts/train/train_nextvlad.sh | 9 - .../scripts/train/train_nonlocal.sh | 3 - .../PaddleVideo/scripts/train/train_stnet.sh | 3 - .../PaddleVideo/scripts/train/train_tsm.sh | 11 - .../PaddleVideo/scripts/train/train_tsn.sh | 3 - PaddleCV/PaddleVideo/tools/train_utils.py | 232 ------------------ PaddleCV/PaddleVideo/train.py | 169 +++++-------- .../PaddleVideo/{tools => utils}/__init__.py | 0 .../{config.py => utils/config_utils.py} | 42 ++-- PaddleCV/PaddleVideo/utils/train_utils.py | 176 +++++++++++++ .../{utils.py => utils/utility.py} | 0 94 files changed, 2239 insertions(+), 1607 deletions(-) delete mode 100644 PaddleCV/PaddleVideo/configs/attention_cluster.txt create mode 100644 PaddleCV/PaddleVideo/configs/attention_cluster.yaml delete mode 100644 PaddleCV/PaddleVideo/configs/attention_lstm.txt create mode 100644 PaddleCV/PaddleVideo/configs/attention_lstm.yaml delete mode 100644 PaddleCV/PaddleVideo/configs/ctcn.txt create mode 100644 PaddleCV/PaddleVideo/configs/ctcn.yaml delete mode 100644 PaddleCV/PaddleVideo/configs/nextvlad.txt create mode 100644 PaddleCV/PaddleVideo/configs/nextvlad.yaml delete mode 100644 PaddleCV/PaddleVideo/configs/nonlocal.txt create mode 100644 PaddleCV/PaddleVideo/configs/nonlocal.yaml delete mode 100644 PaddleCV/PaddleVideo/configs/stnet.txt create mode 100644 PaddleCV/PaddleVideo/configs/stnet.yaml delete mode 100755 PaddleCV/PaddleVideo/configs/tsm.txt create mode 100644 PaddleCV/PaddleVideo/configs/tsm.yaml delete mode 100644 PaddleCV/PaddleVideo/configs/tsn.txt create mode 100644 PaddleCV/PaddleVideo/configs/tsn.yaml create mode 100644 PaddleCV/PaddleVideo/data/dataset/README.md rename PaddleCV/PaddleVideo/{ => data}/dataset/ctcn/README.md (83%) rename PaddleCV/PaddleVideo/{ => data}/dataset/kinetics/generate_label.py (100%) rename PaddleCV/PaddleVideo/{ => data}/dataset/kinetics/video2pkl.py (94%) create mode 100644 PaddleCV/PaddleVideo/data/dataset/kinetics_labels.json rename PaddleCV/PaddleVideo/{ => data}/dataset/nonlocal/README.md (52%) rename PaddleCV/PaddleVideo/{ => data}/dataset/nonlocal/change_filelist.py (100%) rename PaddleCV/PaddleVideo/{ => data}/dataset/nonlocal/generate_filelist.py (100%) rename PaddleCV/PaddleVideo/{ => data}/dataset/nonlocal/generate_list.sh (100%) rename PaddleCV/PaddleVideo/{ => data}/dataset/nonlocal/generate_testlist_multicrop.py (100%) rename PaddleCV/PaddleVideo/{ => data}/dataset/youtube8m/tf2pkl.py (98%) rename PaddleCV/PaddleVideo/{ => data}/dataset/youtube8m/yt8m_pca/eigenvals.npy (100%) delete mode 100644 PaddleCV/PaddleVideo/dataset/README.md rename PaddleCV/PaddleVideo/{test.py => eval.py} (66%) create mode 100644 PaddleCV/PaddleVideo/inference_model.py mode change 100755 => 100644 PaddleCV/PaddleVideo/models/attention_cluster/attention_cluster.py mode change 100755 => 100644 PaddleCV/PaddleVideo/models/attention_cluster/logistic_model.py mode change 100755 => 100644 PaddleCV/PaddleVideo/models/attention_cluster/shifting_attention.py mode change 100755 => 100644 PaddleCV/PaddleVideo/models/attention_lstm/attention_lstm.py mode change 100755 => 100644 PaddleCV/PaddleVideo/models/attention_lstm/lstm_attention.py rename PaddleCV/PaddleVideo/{infer.py => predict.py} (75%) rename PaddleCV/PaddleVideo/{datareader => reader}/__init__.py (100%) rename PaddleCV/PaddleVideo/{datareader => reader}/ctcn_reader.py (99%) rename PaddleCV/PaddleVideo/{datareader => reader}/feature_reader.py (100%) rename PaddleCV/PaddleVideo/{datareader => reader}/kinetics_reader.py (81%) rename PaddleCV/PaddleVideo/{datareader => reader}/nonlocal_reader.py (96%) rename PaddleCV/PaddleVideo/{datareader => reader}/reader_utils.py (100%) create mode 100644 PaddleCV/PaddleVideo/run.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/infer/infer_attention_cluster.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/infer/infer_attention_lstm.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/infer/infer_ctcn.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/infer/infer_nextvlad.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/infer/infer_nonlocal.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/infer/infer_stnet.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/infer/infer_tsm.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/infer/infer_tsn.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/test/test_attention_cluster.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/test/test_attention_lstm.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/test/test_ctcn.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/test/test_nextvlad.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/test/test_nonlocal.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/test/test_stnet.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/test/test_tsm.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/test/test_tsn.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/train/train_attention_cluster.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/train/train_attention_lstm.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/train/train_ctcn.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/train/train_nextvlad.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/train/train_nonlocal.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/train/train_stnet.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/train/train_tsm.sh delete mode 100644 PaddleCV/PaddleVideo/scripts/train/train_tsn.sh delete mode 100644 PaddleCV/PaddleVideo/tools/train_utils.py rename PaddleCV/PaddleVideo/{tools => utils}/__init__.py (100%) rename PaddleCV/PaddleVideo/{config.py => utils/config_utils.py} (67%) create mode 100644 PaddleCV/PaddleVideo/utils/train_utils.py rename PaddleCV/PaddleVideo/{utils.py => utils/utility.py} (100%) diff --git a/PaddleCV/PaddleVideo/.gitignore b/PaddleCV/PaddleVideo/.gitignore index 7052bdda..ca78c9ac 100644 --- a/PaddleCV/PaddleVideo/.gitignore +++ b/PaddleCV/PaddleVideo/.gitignore @@ -1,4 +1,3 @@ -checkpoints output* *.pyc *.swp diff --git a/PaddleCV/PaddleVideo/README.md b/PaddleCV/PaddleVideo/README.md index 907e74f6..84e50c6f 100644 --- a/PaddleCV/PaddleVideo/README.md +++ b/PaddleCV/PaddleVideo/README.md @@ -17,21 +17,33 @@ ### 主要特点 -- 包含视频分类和动作定位方向的多个主流领先模型,其中Attention LSTM,Attention Cluster和NeXtVLAD是比较流行的特征序列模型,Non-local, TSN, TSM和StNet是End-to-End的视频分类模型。Attention LSTM模型速度快精度高,NeXtVLAD是2nd-Youtube-8M比赛中最好的单模型, TSN是基于2D-CNN的经典解决方案,TSM是基于时序移位的简单高效视频时空建模方法,Non-local模型提出了视频非局部关联建模方法。Attention Cluster和StNet是百度自研模型,分别发表于CVPR2018和AAAI2019,是Kinetics600比赛第一名中使用到的模型。C-TCN也是百度自研模型,2018年ActivityNet比赛的夺冠方案。 +- 包含视频分类和动作定位方向的多个主流领先模型,其中Attention LSTM,Attention Cluster和NeXtVLAD是比较流行的特征序列模型,Non-local, TSN, TSM和StNet是End-to-End的视频分类模型。Attention LSTM模型速度快精度高,NeXtVLAD是2nd-Youtube-8M比赛中最好的单模型, TSN是基于2D-CNN的经典解决方案,TSM是基于时序移位的简单高效视频时空建模方法,Non-local模型提出了视频非局部关联建模方法。Attention Cluster和StNet是百度自研模型,分别发表于CVPR2018和AAAI2019,是Kinetics600比赛第一名中使用到的模型。C-TCN动作定位模型也是百度自研,2018年ActivityNet比赛的夺冠方案。 - 提供了适合视频分类和动作定位任务的通用骨架代码,用户可一键式高效配置模型完成训练和评测。 ## 安装 -在当前模型库运行样例代码需要PadddlePaddle Fluid v.1.5.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html)中的说明来更新PaddlePaddle。 +在当前模型库运行样例代码需要PaddlePaddle Fluid v.1.5.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html)中的说明来更新PaddlePaddle。 + +### 其他环境依赖 + +- Python >= 2.7 + +- CUDA >= 8.0 + +- CUDNN >= 7.0 + +- 使用Youtube-8M数据集时,需要将tfrecord数据转化成pickle格式,需要用到Tensorflow,详见[数据说明](./data/dataset/README.md)中Youtube-8M部分。与此相关的模型是Attention Cluster, Attention LSTM, NeXtVLAD,使用其他模型请忽略此项。 + +- 使用Kinetics数据集时,如果需要将mp4文件提前解码并保存成pickle格式,需要用到ffmpeg,详见[数据说明](./data/dataset/README.md)中Kinetics部分。需要说明的是Nonlocal模型虽然也使用Kinetics数据集,但输入数据是视频源文件,不需要提前解码,不涉及此项。与此相关的模型是TSN, TSM, StNet,使用其他模型请忽略此项。 ## 数据准备 -视频模型库使用Youtube-8M和Kinetics数据集, 具体使用方法请参考[数据说明](./dataset/README.md) +视频模型库使用Youtube-8M和Kinetics数据集, 具体使用方法请参考[数据说明](./data/dataset/README.md) ## 快速使用 -视频模型库提供通用的train/test/infer框架,通过`train.py/test.py/infer.py`指定模型名、模型配置参数等可一键式进行训练和预测。 +视频模型库提供通用的train/evaluate/predict框架,通过`train.py/eval.py/predict.py`指定任务类型、模型名、模型配置参数等可一键式进行训练和预测。 以StNet模型为例: @@ -39,31 +51,52 @@ ``` bash export CUDA_VISIBLE_DEVICES=0 -python train.py --model_name=STNET - --config=./configs/stnet.txt - --save_dir=checkpoints +python train.py --model_name=STNET \ + --config=./configs/stnet.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False ``` 多卡训练: ``` bash export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python train.py --model_name=STNET - --config=./configs/stnet.txt - --save_dir=checkpoints +python train.py --model_name=STNET \ + --config=./configs/stnet.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False ``` -视频模型库同时提供了快速训练脚本,脚本位于`scripts/train`目录下,可通过如下命令启动训练: +CPU训练: ``` bash -bash scripts/train/train_stnet.sh +python train.py --model_name=STNET \ + --config=./configs/stnet.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=False \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False +``` + +视频模型库同时提供了快速训练脚本,run.sh,可通过如下命令启动训练: + +``` bash +bash run.sh train STNET ./configs/stnet.yaml ``` - 请根据`CUDA_VISIBLE_DEVICES`指定卡数修改`config`文件中的`num_gpus`和`batch_size`配置。 -### 注意 +- 使用CPU训练时请在run.sh中设置use\_gpu=False,使用GPU训练时则设置use\_gpu=True + +- 上述启动脚本run.sh运行时需要指定任务类型、模型名、配置文件。训练、评估和预测对应的任务类型分别是train,eval和predict。模型名称则是[AttentionCluster, AttentionLSTM, NEXTVLAD, NONLOCAL, STNET, TSN, TSM, CTCN]中的任何一个。配置文件全部在PaddleVideo/configs目录下,根据模型名称选择对应的配置文件即可。具体使用请参见各模型的说明文档。 -使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)。 ## 模型库结构 @@ -71,13 +104,23 @@ bash scripts/train/train_stnet.sh ``` configs/ - stnet.txt - tsn.txt + stnet.yaml + tsn.yaml ... -dataset/ - youtube/ - kinetics/ -datareader/ +data/ + dataset/ + youtube/ + kinetics/ + ... + checkpoints/ + ... + evaluate_results/ + ... + predict_results/ + ... + inference_model/ + ... +reader/ feature_readeer.py kinetics_reader.py ... @@ -89,22 +132,22 @@ models/ stnet/ tsn/ ... -scripts/ - train/ - test/ +utils/ + ... train.py -test.py -infer.py +eval.py +predict.py +run.sh ``` - `configs`: 各模型配置文件模板 -- `datareader`: 提供Youtube-8M,Kinetics数据集reader -- `metrics`: Youtube-8,Kinetics数据集评估脚本 +- `reader`: 提供Youtube-8M,Kinetics数据集通用reader,以及模型自定义reader,如nonlocal、ctcn等 +- `metrics`: Youtube-8,Kinetics数据集评估脚本,以及模型自定义评估方法 - `models`: 各模型网络结构构建脚本 -- `scripts`: 各模型快速训练评估脚本 - `train.py`: 一键式训练脚本,可通过指定模型名,配置文件等一键式启动训练 -- `test.py`: 一键式评估脚本,可通过指定模型名,配置文件,模型权重等一键式启动评估 -- `infer.py`: 一键式推断脚本,可通过指定模型名,配置文件,模型权重,待推断文件列表等一键式启动推断 +- `eval.py`: 一键式评估脚本,可通过指定模型名,配置文件,模型权重等一键式启动评估 +- `predict.py`: 一键式推断脚本,可通过指定模型名,配置文件,模型权重,待推断文件列表等一键式启动推断 +- `run.sh`: 各模型快速训练评估脚本 ## Model Zoo @@ -112,24 +155,24 @@ infer.py | 模型 | Batch Size | 环境配置 | cuDNN版本 | GAP | 下载链接 | | :-------: | :---: | :---------: | :-----: | :----: | :----------: | -| Attention Cluster | 2048 | 8卡P40 | 7.1 | 0.84 | [model](https://paddlemodels.bj.bcebos.com/video_classification/attention_cluster_youtube8m.tar.gz) | -| Attention LSTM | 1024 | 8卡P40 | 7.1 | 0.86 | [model](https://paddlemodels.bj.bcebos.com/video_classification/attention_lstm_youtube8m.tar.gz) | -| NeXtVLAD | 160 | 4卡P40 | 7.1 | 0.87 | [model](https://paddlemodels.bj.bcebos.com/video_classification/nextvlad_youtube8m.tar.gz) | +| Attention Cluster | 2048 | 8卡P40 | 7.1 | 0.84 | [model](https://paddlemodels.bj.bcebos.com/video_classification/AttentionCluster_final.pdparams) | +| Attention LSTM | 1024 | 8卡P40 | 7.1 | 0.86 | [model](https://paddlemodels.bj.bcebos.com/video_classification/AttentionLSTM_final.pdparams) | +| NeXtVLAD | 160 | 4卡P40 | 7.1 | 0.87 | [model](https://paddlemodels.bj.bcebos.com/video_classification/NEXTVLAD_final.pdparams) | - 基于Kinetics数据集模型: | 模型 | Batch Size | 环境配置 | cuDNN版本 | Top-1 | 下载链接 | | :-------: | :---: | :---------: | :----: | :----: | :----------: | -| StNet | 128 | 8卡P40 | 7.1 | 0.69 | [model](https://paddlemodels.bj.bcebos.com/video_classification/stnet_kinetics.tar.gz) | -| TSN | 256 | 8卡P40 | 7.1 | 0.67 | [model](https://paddlemodels.bj.bcebos.com/video_classification/tsn_kinetics.tar.gz) | -| TSM | 128 | 8卡P40 | 7.1 | 0.70 | [model](https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz) | -| Non-local | 64 | 8卡P40 | 7.1 | 0.74 | [model](https://paddlemodels.bj.bcebos.com/video_classification/nonlocal_kinetics.tar.gz) | +| StNet | 128 | 8卡P40 | 7.1 | 0.69 | [model](https://paddlemodels.bj.bcebos.com/video_classification/STNET_final.pdparams) | +| TSN | 256 | 8卡P40 | 7.1 | 0.67 | [model](https://paddlemodels.bj.bcebos.com/video_classification/TSN_final.pdparams) | +| TSM | 128 | 8卡P40 | 7.1 | 0.70 | [model](https://paddlemodels.bj.bcebos.com/video_classification/TSM_final.pdparams) | +| Non-local | 64 | 8卡P40 | 7.1 | 0.74 | [model](https://paddlemodels.bj.bcebos.com/video_classification/NONLOCAL_final.pdparams) | - 基于ActivityNet的动作定位模型: | 模型 | Batch Size | 环境配置 | cuDNN版本 | MAP | 下载链接 | | :-------: | :---: | :---------: | :----: | :----: | :----------: | -| C-TCN | 16 | 8卡P40 | 7.1 | 0.31| [model](https://paddlemodels.bj.bcebos.com/video_detection/ctcn.tar.gz) | +| C-TCN | 16 | 8卡P40 | 7.1 | 0.31| [model](https://paddlemodels.bj.bcebos.com/video_detection/CTCN_final.pdparams) | ## 参考文献 diff --git a/PaddleCV/PaddleVideo/configs/attention_cluster.txt b/PaddleCV/PaddleVideo/configs/attention_cluster.txt deleted file mode 100644 index 0ce7c4b2..00000000 --- a/PaddleCV/PaddleVideo/configs/attention_cluster.txt +++ /dev/null @@ -1,33 +0,0 @@ -[MODEL] -name = "AttentionCluster" -dataset = "YouTube-8M" -bone_network = None -drop_rate = 0.5 -feature_num = 2 -feature_names = ['rgb', 'audio'] -feature_dims = [1024, 128] -seg_num = 100 -cluster_nums = [32, 32] -num_classes = 3862 -topk = 20 - -[TRAIN] -epoch = 5 -learning_rate = 0.001 -pretrain_base = None -batch_size = 2048 -use_gpu = True -num_gpus = 8 -filelist = "dataset/youtube8m/train.list" - -[VALID] -batch_size = 2048 -filelist = "dataset/youtube8m/val.list" - -[TEST] -batch_size = 256 -filelist = "dataset/youtube8m/test.list" - -[INFER] -batch_size = 1 -filelist = "dataset/youtube8m/infer.list" diff --git a/PaddleCV/PaddleVideo/configs/attention_cluster.yaml b/PaddleCV/PaddleVideo/configs/attention_cluster.yaml new file mode 100644 index 00000000..bbf9108a --- /dev/null +++ b/PaddleCV/PaddleVideo/configs/attention_cluster.yaml @@ -0,0 +1,36 @@ +MODEL: + name: "AttentionCluster" + dataset: "YouTube-8M" + bone_network: None + drop_rate: 0.5 + feature_num: 2 + feature_names: ['rgb', 'audio'] + feature_dims: [1024, 128] + seg_num: 100 + cluster_nums: [32, 32] + num_classes: 3862 + topk: 20 + UNIQUE: + good: 20 + bad: 30 + +TRAIN: + epoch: 5 + learning_rate: 0.001 + pretrain_base: None + batch_size: 2048 + use_gpu: True + num_gpus: 8 + filelist: "data/dataset/youtube8m/train.list" + +VALID: + batch_size: 2048 + filelist: "data/dataset/youtube8m/val.list" + +TEST: + batch_size: 256 + filelist: "data/dataset/youtube8m/test.list" + +INFER: + batch_size: 1 + filelist: "data/dataset/youtube8m/infer.list" diff --git a/PaddleCV/PaddleVideo/configs/attention_lstm.txt b/PaddleCV/PaddleVideo/configs/attention_lstm.txt deleted file mode 100644 index 9154fe2c..00000000 --- a/PaddleCV/PaddleVideo/configs/attention_lstm.txt +++ /dev/null @@ -1,37 +0,0 @@ -[MODEL] -name = "AttentionLSTM" -dataset = "YouTube-8M" -bone_nework = None -drop_rate = 0.5 -feature_num = 2 -feature_names = ['rgb', 'audio'] -feature_dims = [1024, 128] -embedding_size = 512 -lstm_size = 1024 -num_classes = 3862 -topk = 20 - -[TRAIN] -epoch = 10 -learning_rate = 0.001 -decay_epochs = [5] -decay_gamma = 0.1 -weight_decay = 0.0008 -num_samples = 5000000 -pretrain_base = None -batch_size = 1024 -use_gpu = True -num_gpus = 8 -filelist = "dataset/youtube8m/train.list" - -[VALID] -batch_size = 1024 -filelist = "dataset/youtube8m/val.list" - -[TEST] -batch_size = 128 -filelist = "dataset/youtube8m/test.list" - -[INFER] -batch_size = 1 -filelist = "dataset/youtube8m/infer.list" diff --git a/PaddleCV/PaddleVideo/configs/attention_lstm.yaml b/PaddleCV/PaddleVideo/configs/attention_lstm.yaml new file mode 100644 index 00000000..660bf8fc --- /dev/null +++ b/PaddleCV/PaddleVideo/configs/attention_lstm.yaml @@ -0,0 +1,37 @@ +MODEL: + name: "AttentionLSTM" + dataset: "YouTube-8M" + bone_nework: None + drop_rate: 0.5 + feature_num: 2 + feature_names: ['rgb', 'audio'] + feature_dims: [1024, 128] + embedding_size: 512 + lstm_size: 1024 + num_classes: 3862 + topk: 20 + +TRAIN: + epoch: 10 + learning_rate: 0.001 + decay_epochs: [5] + decay_gamma: 0.1 + weight_decay: 0.0008 + num_samples: 5000000 + pretrain_base: None + batch_size: 1024 + use_gpu: True + num_gpus: 8 + filelist: "data/dataset/youtube8m/train.list" + +VALID: + batch_size: 1024 + filelist: "data/dataset/youtube8m/val.list" + +TEST: + batch_size: 128 + filelist: "data/dataset/youtube8m/test.list" + +INFER: + batch_size: 1 + filelist: "data/dataset/youtube8m/infer.list" diff --git a/PaddleCV/PaddleVideo/configs/ctcn.txt b/PaddleCV/PaddleVideo/configs/ctcn.txt deleted file mode 100644 index 6c8837ae..00000000 --- a/PaddleCV/PaddleVideo/configs/ctcn.txt +++ /dev/null @@ -1,53 +0,0 @@ -[MODEL] -name = "CTCN" -num_classes = 201 -img_size = 512 -concept_size = 402 -num_anchors = 7 -total_num_anchors = 1785 -snippet_length = 1 -root = '/ssd3/huangjun/Paddle/feats' - -[TRAIN] -epoch = 35 -filelist = 'dataset/ctcn/Activity1.3_train_rgb.listformat' -rgb = 'senet152-201cls-rgb-70.3-5seg-331data_331img_train' -flow = 'senet152-201cls-flow-60.9-5seg-331data_train' -batch_size = 16 -num_threads = 8 -use_gpu = True -num_gpus = 8 -learning_rate = 0.0005 -learning_rate_decay = 0.1 -lr_decay_iter = 9000 -l2_weight_decay = 1e-4 -momentum = 0.9 - -[VALID] -filelist = 'dataset/ctcn/Activity1.3_val_rgb.listformat' -rgb = 'senet152-201cls-rgb-70.3-5seg-331data_331img_val' -flow = 'senet152-201cls-flow-60.9-5seg-331data_val' -batch_size = 16 -num_threads = 8 -use_gpu = True -num_gpus = 8 - -[TEST] -filelist = 'dataset/ctcn/Activity1.3_val_rgb.listformat' -rgb = 'senet152-201cls-rgb-70.3-5seg-331data_331img_val' -flow = 'senet152-201cls-flow-60.9-5seg-331data_val' -class_label_file = 'dataset/ctcn/labels.txt' -video_duration_file = 'dataset/ctcn/val_duration_frame.list' -batch_size = 1 -num_threads = 1 -score_thresh = 0.001 -nms_thresh = 0.8 -sigma_thresh = 0.9 -soft_thresh = 0.004 - -[INFER] -filelist = 'dataset/ctcn/infer.list' -rgb = 'senet152-201cls-rgb-70.3-5seg-331data_331img_val' -flow = 'senet152-201cls-flow-60.9-5seg-331data_val' -batch_size = 1 -num_threads = 1 diff --git a/PaddleCV/PaddleVideo/configs/ctcn.yaml b/PaddleCV/PaddleVideo/configs/ctcn.yaml new file mode 100644 index 00000000..9c2ffce2 --- /dev/null +++ b/PaddleCV/PaddleVideo/configs/ctcn.yaml @@ -0,0 +1,53 @@ +MODEL: + name: "CTCN" + num_classes: 201 + img_size: 512 + concept_size: 402 + num_anchors: 7 + total_num_anchors: 1785 + snippet_length: 1 + root: './data/dataset/ctcn/feats' + +TRAIN: + epoch: 35 + filelist: './data/dataset/ctcn/Activity1.3_train_rgb.listformat' + rgb: 'senet152-201cls-rgb-70.3-5seg-331data_331img_train' + flow: 'senet152-201cls-flow-60.9-5seg-331data_train' + batch_size: 16 + num_threads: 8 + use_gpu: True + num_gpus: 8 + learning_rate: 0.0005 + learning_rate_decay: 0.1 + lr_decay_iter: 9000 + l2_weight_decay: 1e-4 + momentum: 0.9 + +VALID: + filelist: './data/dataset/ctcn/Activity1.3_val_rgb.listformat' + rgb: 'senet152-201cls-rgb-70.3-5seg-331data_331img_val' + flow: 'senet152-201cls-flow-60.9-5seg-331data_val' + batch_size: 16 + num_threads: 8 + use_gpu: True + num_gpus: 8 + +TEST: + filelist: './data/dataset/ctcn/Activity1.3_val_rgb.listformat' + rgb: 'senet152-201cls-rgb-70.3-5seg-331data_331img_val' + flow: 'senet152-201cls-flow-60.9-5seg-331data_val' + class_label_file: './data/dataset/ctcn/labels.txt' + video_duration_file: './data/dataset/ctcn/val_duration_frame.list' + batch_size: 1 + num_threads: 1 + score_thresh: 0.001 + nms_thresh: 0.8 + sigma_thresh: 0.9 + soft_thresh: 0.004 + +INFER: + filelist: './data/dataset/ctcn/infer.list' + rgb: 'senet152-201cls-rgb-70.3-5seg-331data_331img_val' + flow: 'senet152-201cls-flow-60.9-5seg-331data_val' + batch_size: 1 + num_threads: 1 diff --git a/PaddleCV/PaddleVideo/configs/nextvlad.txt b/PaddleCV/PaddleVideo/configs/nextvlad.txt deleted file mode 100644 index 18779b1f..00000000 --- a/PaddleCV/PaddleVideo/configs/nextvlad.txt +++ /dev/null @@ -1,39 +0,0 @@ -[MODEL] -name = "NEXTVLAD" -num_classes = 3862 -topk = 20 -video_feature_size = 1024 -audio_feature_size = 128 -cluster_size = 128 -hidden_size = 2048 -groups = 8 -expansion = 2 -drop_rate = 0.5 -gating_reduction = 8 -eigen_file = "./dataset/youtube8m/yt8m_pca/eigenvals.npy" - -[TRAIN] -epoch = 6 -learning_rate = 0.0002 -lr_boundary_examples = 2000000 -max_iter = 700000 -learning_rate_decay = 0.8 -l2_penalty = 1e-5 -gradient_clip_norm = 1.0 -use_gpu = True -num_gpus = 4 -batch_size = 160 -filelist = "./dataset/youtube8m/train.list" - -[VALID] -batch_size = 160 -filelist = "./dataset/youtube8m/val.list" - -[TEST] -batch_size = 40 -filelist = "./dataset/youtube8m/test.list" - -[INFER] -batch_size = 1 -filelist = "./dataset/youtube8m/infer.list" - diff --git a/PaddleCV/PaddleVideo/configs/nextvlad.yaml b/PaddleCV/PaddleVideo/configs/nextvlad.yaml new file mode 100644 index 00000000..03ce8ff1 --- /dev/null +++ b/PaddleCV/PaddleVideo/configs/nextvlad.yaml @@ -0,0 +1,39 @@ +MODEL: + name: "NEXTVLAD" + num_classes: 3862 + topk: 20 + video_feature_size: 1024 + audio_feature_size: 128 + cluster_size: 128 + hidden_size: 2048 + groups: 8 + expansion: 2 + drop_rate: 0.5 + gating_reduction: 8 + eigen_file: "./data/dataset/youtube8m/yt8m_pca/eigenvals.npy" + +TRAIN: + epoch: 6 + learning_rate: 0.0002 + lr_boundary_examples: 2000000 + max_iter: 700000 + learning_rate_decay: 0.8 + l2_penalty: 1e-5 + gradient_clip_norm: 1.0 + use_gpu: True + num_gpus: 4 + batch_size: 160 + filelist: "./data/dataset/youtube8m/train.list" + +VALID: + batch_size: 160 + filelist: "./data/dataset/youtube8m/val.list" + +TEST: + batch_size: 40 + filelist: "./data/dataset/youtube8m/test.list" + +INFER: + batch_size: 1 + filelist: "./data/dataset/youtube8m/infer.list" + diff --git a/PaddleCV/PaddleVideo/configs/nonlocal.txt b/PaddleCV/PaddleVideo/configs/nonlocal.txt deleted file mode 100644 index 955ea402..00000000 --- a/PaddleCV/PaddleVideo/configs/nonlocal.txt +++ /dev/null @@ -1,93 +0,0 @@ -[MODEL] -name = "NONLOCAL" -num_classes = 400 -image_mean = 114.75 -image_std = 57.375 -depth = 50 -dataset = 'kinetics400' -video_arc_choice = 1 -use_affine = False -fc_init_std = 0.01 -bn_momentum = 0.9 -bn_epsilon = 1.0e-5 -bn_init_gamma = 0. - -[RESNETS] -num_groups = 1 -width_per_group = 64 -trans_func = bottleneck_transformation_3d - -[NONLOCAL] -bn_momentum = 0.9 -bn_epsilon = 1.0e-5 -bn_init_gamma = 0.0 -layer_mod = 2 -conv3_nonlocal = True -conv4_nonlocal = True -conv_init_std = 0.01 -no_bias = 0 -use_maxpool = True -use_softmax = True -use_scale = True -use_zero_init_conv = False -use_bn = True -use_affine = False - -[TRAIN] -epoch = 120 -num_reader_threads = 8 -batch_size = 64 -num_gpus = 8 -filelist = './dataset/nonlocal/trainlist.txt' -crop_size = 224 -sample_rate = 8 -video_length = 8 -jitter_scales = [256, 320] - -dropout_rate = 0.5 - -learning_rate = 0.01 -learning_rate_decay = 0.1 -step_sizes = [150000, 150000, 100000] -max_iter = 400000 - -weight_decay = 0.0001 -weight_decay_bn = 0.0 -momentum = 0.9 -nesterov = True -scale_momentum = True - -[VALID] -num_reader_threads = 8 -batch_size = 64 -filelist = './dataset/nonlocal/vallist.txt' -crop_size = 224 -sample_rate = 8 -video_length = 8 -jitter_scales = [256, 320] - -[TEST] -num_reader_threads = 8 -batch_size = 4 -filelist = 'dataset/nonlocal/testlist.txt' -filename_gt = 'dataset/nonlocal/vallist.txt' -checkpoint_dir = './output' -crop_size = 256 -sample_rate = 8 -video_length = 8 -jitter_scales = [256, 256] -num_test_clips = 30 -dataset_size = 19761 -use_multi_crop = 1 - -[INFER] -num_reader_threads = 8 -batch_size = 1 -filelist = 'dataset/nonlocal/inferlist.txt' -crop_size = 256 -sample_rate = 8 -video_length = 8 -jitter_scales = [256, 256] -num_test_clips = 30 -use_multi_crop = 1 - diff --git a/PaddleCV/PaddleVideo/configs/nonlocal.yaml b/PaddleCV/PaddleVideo/configs/nonlocal.yaml new file mode 100644 index 00000000..ea9af894 --- /dev/null +++ b/PaddleCV/PaddleVideo/configs/nonlocal.yaml @@ -0,0 +1,94 @@ +MODEL: + name: "NONLOCAL" + num_classes: 400 + image_mean: 114.75 + image_std: 57.375 + depth: 50 + dataset: 'kinetics400' + video_arc_choice: 1 + use_affine: False + fc_init_std: 0.01 + bn_momentum: 0.9 + bn_epsilon: 1.0e-5 + bn_init_gamma: 0. + +RESNETS: + num_groups: 1 + width_per_group: 64 + trans_func: bottleneck_transformation_3d + +NONLOCAL: + bn_momentum: 0.9 + bn_epsilon: 1.0e-5 + bn_init_gamma: 0.0 + layer_mod: 2 + conv3_nonlocal: True + conv4_nonlocal: True + conv_init_std: 0.01 + no_bias: 0 + use_maxpool: True + use_softmax: True + use_scale: True + use_zero_init_conv: False + use_bn: True + use_affine: False + +TRAIN: + epoch: 120 + num_reader_threads: 8 + batch_size: 64 + num_gpus: 8 + filelist: './data/dataset/nonlocal/trainlist.txt' + crop_size: 224 + sample_rate: 8 + video_length: 8 + jitter_scales: [256, 320] + + dropout_rate: 0.5 + learning_rate: 0.01 + learning_rate_decay: 0.1 + step_sizes: [150000, 150000, 100000] + max_iter: 400000 + + weight_decay: 0.0001 + weight_decay_bn: 0.0 + momentum: 0.9 + nesterov: True + scale_momentum: True + +VALID: + num_reader_threads: 8 + batch_size: 64 + filelist: './data/dataset/nonlocal/vallist.txt' + crop_size: 224 + sample_rate: 8 + video_length: 8 + jitter_scales: [256, 320] + +TEST: + num_reader_threads: 8 + batch_size: 4 + filelist: './data/dataset/nonlocal/testlist.txt' + filename_gt: './data/dataset/nonlocal/vallist.txt' + checkpoint_dir: './output' + crop_size: 256 + sample_rate: 8 + video_length: 8 + jitter_scales: [256, 256] + num_test_clips: 30 + dataset_size: 19761 + use_multi_crop: 1 + +INFER: + num_reader_threads: 1 + batch_size: 1 + filelist: './data/dataset/nonlocal/inferlist.txt' + checkpoint_dir: './output' + crop_size: 256 + sample_rate: 8 + video_length: 8 + jitter_scales: [256, 256] + num_test_clips: 30 + use_multi_crop: 1 + video_path: "" + kinetics_labels: "./data/dataset/kinetics_labels.json" diff --git a/PaddleCV/PaddleVideo/configs/stnet.txt b/PaddleCV/PaddleVideo/configs/stnet.txt deleted file mode 100644 index 7be17834..00000000 --- a/PaddleCV/PaddleVideo/configs/stnet.txt +++ /dev/null @@ -1,53 +0,0 @@ -[MODEL] -name = "STNET" -format = "pkl" -num_classes = 400 -seg_num = 7 -seglen = 5 -image_mean = [0.485, 0.456, 0.406] -image_std = [0.229, 0.224, 0.225] -num_layers = 50 - -[TRAIN] -epoch = 60 -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 128 -num_gpus = 8 -use_gpu = True -filelist = "./dataset/kinetics/train.list" -learning_rate = 0.01 -learning_rate_decay = 0.1 -l2_weight_decay = 1e-4 -momentum = 0.9 -total_videos = 224684 -pretrain_base = "./dataset/pretrained/ResNet50_pretrained" - -[VALID] -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 128 -filelist = "./dataset/kinetics/val.list" - -[TEST] -seg_num = 25 -short_size = 256 -target_size = 256 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 4 -filelist = "./dataset/kinetics/test.list" - -[INFER] -seg_num = 25 -short_size = 256 -target_size = 256 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 1 -filelist = "./dataset/kinetics/infer.list" - diff --git a/PaddleCV/PaddleVideo/configs/stnet.yaml b/PaddleCV/PaddleVideo/configs/stnet.yaml new file mode 100644 index 00000000..04bcbe66 --- /dev/null +++ b/PaddleCV/PaddleVideo/configs/stnet.yaml @@ -0,0 +1,55 @@ +MODEL: + name: "STNET" + format: "pkl" + num_classes: 400 + seg_num: 7 + seglen: 5 + image_mean: [0.485, 0.456, 0.406] + image_std: [0.229, 0.224, 0.225] + num_layers: 50 + topk: 5 + +TRAIN: + epoch: 60 + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 128 + num_gpus: 8 + use_gpu: True + filelist: "./data/dataset/kinetics/train.list" + learning_rate: 0.01 + learning_rate_decay: 0.1 + l2_weight_decay: 1e-4 + momentum: 0.9 + total_videos: 224684 + pretrain_base: "./data/dataset/pretrained/ResNet50_pretrained" + +VALID: + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 128 + filelist: "./data/dataset/kinetics/val.list" + +TEST: + seg_num: 25 + short_size: 256 + target_size: 256 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 4 + filelist: "./data/dataset/kinetics/test.list" + +INFER: + seg_num: 25 + short_size: 256 + target_size: 256 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 1 + filelist: "./data/dataset/kinetics/infer.list" + video_path: "" + kinetics_labels: "./data/dataset/kinetics_labels.json" diff --git a/PaddleCV/PaddleVideo/configs/tsm.txt b/PaddleCV/PaddleVideo/configs/tsm.txt deleted file mode 100755 index 28cad997..00000000 --- a/PaddleCV/PaddleVideo/configs/tsm.txt +++ /dev/null @@ -1,52 +0,0 @@ -[MODEL] -name = "TSM" -format = "pkl" -num_classes = 400 -seg_num = 8 -seglen = 1 -image_mean = [0.485, 0.456, 0.406] -image_std = [0.229, 0.224, 0.225] -num_layers = 50 - -[TRAIN] -epoch = 65 -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 128 -use_gpu = True -num_gpus = 8 -filelist = "./dataset/kinetics/train.list" -learning_rate = 0.01 -learning_rate_decay = 0.1 -decay_epochs = [40, 60] -l2_weight_decay = 1e-4 -momentum = 0.9 -total_videos = 239781 -enable_ce = False - -[VALID] -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 128 -filelist = "./dataset/kinetics/val.list" - -[TEST] -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 16 -filelist = "./dataset/kinetics/test.list" - -[INFER] -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 1 -filelist = "./dataset/kinetics/infer.list" - diff --git a/PaddleCV/PaddleVideo/configs/tsm.yaml b/PaddleCV/PaddleVideo/configs/tsm.yaml new file mode 100644 index 00000000..10b23e66 --- /dev/null +++ b/PaddleCV/PaddleVideo/configs/tsm.yaml @@ -0,0 +1,54 @@ +MODEL: + name: "TSM" + format: "pkl" + num_classes: 400 + seg_num: 8 + seglen: 1 + image_mean: [0.485, 0.456, 0.406] + image_std: [0.229, 0.224, 0.225] + num_layers: 50 + topk: 5 + +TRAIN: + epoch: 65 + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 128 + use_gpu: True + num_gpus: 8 + filelist: "./data/dataset/kinetics/train.list" + learning_rate: 0.01 + learning_rate_decay: 0.1 + decay_epochs: [40, 60] + l2_weight_decay: 1e-4 + momentum: 0.9 + total_videos: 239781 + fix_random_seed: False + +VALID: + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 128 + filelist: "./data/dataset/kinetics/val.list" + +TEST: + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 16 + filelist: "./data/dataset/kinetics/test.list" + +INFER: + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 1 + filelist: "./data/dataset/kinetics/infer.list" + video_path: "" + kinetics_labels: "./data/dataset/kinetics_labels.json" diff --git a/PaddleCV/PaddleVideo/configs/tsn.txt b/PaddleCV/PaddleVideo/configs/tsn.txt deleted file mode 100644 index d1935322..00000000 --- a/PaddleCV/PaddleVideo/configs/tsn.txt +++ /dev/null @@ -1,51 +0,0 @@ -[MODEL] -name = "TSN" -format = "pkl" -num_classes = 400 -seg_num = 3 -seglen = 1 -image_mean = [0.485, 0.456, 0.406] -image_std = [0.229, 0.224, 0.225] -num_layers = 50 - -[TRAIN] -epoch = 45 -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 256 -use_gpu = True -num_gpus = 8 -filelist = "./dataset/kinetics/train.list" -learning_rate = 0.01 -learning_rate_decay = 0.1 -l2_weight_decay = 1e-4 -momentum = 0.9 -total_videos = 224684 - -[VALID] -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 256 -filelist = "./dataset/kinetics/val.list" - -[TEST] -seg_num = 7 -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 16 -filelist = "./dataset/kinetics/test.list" - -[INFER] -short_size = 256 -target_size = 224 -num_reader_threads = 12 -buf_size = 1024 -batch_size = 1 -filelist = "./dataset/kinetics/infer.list" - diff --git a/PaddleCV/PaddleVideo/configs/tsn.yaml b/PaddleCV/PaddleVideo/configs/tsn.yaml new file mode 100644 index 00000000..1354ab04 --- /dev/null +++ b/PaddleCV/PaddleVideo/configs/tsn.yaml @@ -0,0 +1,53 @@ +MODEL: + name: "TSN" + format: "pkl" + num_classes: 400 + seg_num: 3 + seglen: 1 + image_mean: [0.485, 0.456, 0.406] + image_std: [0.229, 0.224, 0.225] + num_layers: 50 + topk: 5 + +TRAIN: + epoch: 45 + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 256 + use_gpu: True + num_gpus: 8 + filelist: "./data/dataset/kinetics/train.list" + learning_rate: 0.01 + learning_rate_decay: 0.1 + l2_weight_decay: 1e-4 + momentum: 0.9 + total_videos: 224684 + +VALID: + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 256 + filelist: "./data/dataset/kinetics/val.list" + +TEST: + seg_num: 7 + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 16 + filelist: "./data/dataset/kinetics/test.list" + +INFER: + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 1 + filelist: "./data/dataset/kinetics/infer.list" + video_path: "" + kinetics_labels: "./data/dataset/kinetics_labels.json" diff --git a/PaddleCV/PaddleVideo/data/dataset/README.md b/PaddleCV/PaddleVideo/data/dataset/README.md new file mode 100644 index 00000000..ef0de54a --- /dev/null +++ b/PaddleCV/PaddleVideo/data/dataset/README.md @@ -0,0 +1,164 @@ +# 数据使用说明 + +- [Youtube-8M](#Youtube-8M数据集) +- [Kinetics](#Kinetics数据集) +- [Non-local](#Non-local) +- [C-TCN](#C-TCN) + +## Youtube-8M数据集 +这里用到的是YouTube-8M 2018年更新之后的数据集。使用官方数据集,并将TFRecord文件转化为pickle文件以便PaddlePaddle使用。Youtube-8M数据集官方提供了frame-level和video-level的特征,这里只需使用到frame-level的特征。 + +### 数据下载 +请使用Youtube-8M官方链接分别下载[训练集](http://us.data.yt8m.org/2/frame/train/index.html)和[验证集](http://us.data.yt8m.org/2/frame/validate/index.html)。每个链接里各提供了3844个文件的下载地址,用户也可以使用官方提供的[下载脚本](https://research.google.com/youtube8m/download.html)下载数据。数据下载完成后,将会得到3844个训练数据文件和3844个验证数据文件(TFRecord格式)。 +假设存放视频模型代码库PaddleVideo的主目录为: Code\_Root,进入data/dataset/youtube8m目录 + + cd data/dataset/youtube8m + +在youtube8m下新建目录tf/train和tf/val + + mkdir tf && cd tf + + mkdir train && mkdir val + +并分别将下载的train和validate数据存放在其中。 + +### 数据格式转化 + +为了适用于PaddlePaddle训练,需要离线将下载好的TFRecord文件格式转成了pickle格式,转换脚本请使用[dataset/youtube8m/tf2pkl.py](./youtube8m/tf2pkl.py)。 + +在data/dataset/youtube8m 目录下新建目录pkl/train和pkl/val + + cd data/dataset/youtube8m + + mkdir pkl && cd pkl + + mkdir train && mkdir val + + +转化文件格式(TFRecord -> pkl),进入data/dataset/youtube8m目录,运行脚本 + + python tf2pkl.py ./tf/train ./pkl/train + +和 + + python tf2pkl.py ./tf/val ./pkl/val + +分别将train和validate数据集转化为pkl文件。tf2pkl.py文件运行时需要两个参数,分别是数据源tf文件存放路径和转化后的pkl文件存放路径。 + +备注:由于TFRecord文件的读取需要用到Tensorflow,用户要先安装Tensorflow,或者在安装有Tensorflow的环境中转化完数据,再拷贝到data/dataset/youtube8m/pkl目录下。为了避免和PaddlePaddle环境冲突,建议先在其他地方转化完成再将数据拷贝过来。 + +### 生成文件列表 + +进入data/dataset/youtube8m目录 + + cd $Code_Root/data/dataset/youtube8m + + ls $Code_Root/data/dataset/youtube8m/pkl/train/* > train.list + + ls $Code_Root/data/dataset/youtube8m/pkl/val/* > val.list + + ls $Code_Root/data/dataset/youtube8m/pkl/val/* > test.list + + ls $Code_Root/data/dataset/youtube8m/pkl/val/* > infer.list + +此处Code\_Root是PaddleVideo目录所在的绝对路径,比如/ssd1/user/models/PaddleCV/PaddleVideo,在data/dataset/youtube8m目录下将生成四个文件,train.list,val.list,test.list和infer.list,每一行分别保存了一个pkl文件的绝对路径,示例如下: + + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/youtube8m/pkl/train/train0471.pkl + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/youtube8m/pkl/train/train0472.pkl + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/youtube8m/pkl/train/train0473.pkl + ... + +或者 + + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/youtube8m/pkl/val/validate3666.pkl + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/youtube8m/pkl/val/validate3666.pkl + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/youtube8m/pkl/val/validate3666.pkl + ... + +- 备注:由于Youtube-8M数据集中test部分的数据没有标签,所以此处使用validate数据做模型评估。 + +## Kinetics数据集 + +Kinetics数据集是DeepMind公开的大规模视频动作识别数据集,有Kinetics400与Kinetics600两个版本。这里使用Kinetics400数据集,具体的数据预处理过程如下。 + +### mp4视频下载 +在Code\_Root目录下创建文件夹 + + cd $Code_Root/data/dataset && mkdir kinetics + + cd kinetics && mkdir data_k400 && cd data_k400 + + mkdir train_mp4 && mkdir val_mp4 + +ActivityNet官方提供了Kinetics的下载工具,具体参考其[官方repo ](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics)即可下载Kinetics400的mp4视频集合。将kinetics400的训练与验证集合分别下载到data/dataset/kinetics/data\_k400/train\_mp4与data/dataset/kinetics/data\_k400/val\_mp4。 + +### mp4文件预处理 + +为提高数据读取速度,提前将mp4文件解帧并打pickle包,dataloader从视频的pkl文件中读取数据(该方法耗费更多存储空间)。pkl文件里打包的内容为(video-id,[frame1, frame2,...,frameN],label)。 + +在 data/dataset/kinetics/data\_k400目录下创建目录train\_pkl和val\_pkl + + cd $Code_Root/data/dataset/kinetics/data_k400 + + mkdir train_pkl && mkdir val_pkl + +进入$Code\_Root/data/dataset/kinetics目录,使用video2pkl.py脚本进行数据转化。首先需要下载[train](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics/data/kinetics-400_train.csv)和[validation](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics/data/kinetics-400_val.csv)数据集的文件列表。 + +首先生成预处理需要的数据集标签文件 + + python generate_label.py kinetics-400_train.csv kinetics400_label.txt + +然后执行如下程序: + + python video2pkl.py kinetics-400_train.csv $Source_dir $Target_dir 8 #以8个进程为例 + +- 该脚本依赖`ffmpeg`库,请预先安装`ffmpeg` + +对于train数据, + + Source_dir = $Code_Root/data/dataset/kinetics/data_k400/train_mp4 + + Target_dir = $Code_Root/data/dataset/kinetics/data_k400/train_pkl + +对于val数据, + + Source_dir = $Code_Root/data/dataset/kinetics/data_k400/val_mp4 + + Target_dir = $Code_Root/data/dataset/kinetics/data_k400/val_pkl + +这样即可将mp4文件解码并保存为pkl文件。 + +### 生成训练和验证集list + + cd $Code_Root/data/dataset/kinetics + + ls $Code_Root/data/dataset/kinetics/data_k400/train_pkl/* > train.list + + ls $Code_Root/data/dataset/kinetics/data_k400/val_pkl/* > val.list + + ls $Code_Root/data/dataset/kinetics/data_k400/val_pkl/* > test.list + + ls $Code_Root/data/dataset/kinetics/data_k400/val_pkl/* > infer.list + +即可生成相应的文件列表,train.list和val.list的每一行表示一个pkl文件的绝对路径,示例如下: + + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/train_pkl/data_batch_100-097 + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/train_pkl/data_batch_100-114 + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/train_pkl/data_batch_100-118 + ... + +或者 + + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/val_pkl/data_batch_102-085 + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/val_pkl/data_batch_102-086 + /ssd1/user/models/PaddleCV/PaddleVideo/data/dataset/kinetics/data_k400/val_pkl/data_batch_102-090 + ... + + +## Non-local + +Non-local模型也使用kinetics数据集,不过其数据处理方式和其他模型不一样,详细内容见[Non-local数据说明](./nonlocal/README.md) + +## C-TCN + +C-TCN模型使用ActivityNet 1.3数据集,具体使用方法见[C-TCN数据说明](./ctcn/README.md) diff --git a/PaddleCV/PaddleVideo/dataset/ctcn/README.md b/PaddleCV/PaddleVideo/data/dataset/ctcn/README.md similarity index 83% rename from PaddleCV/PaddleVideo/dataset/ctcn/README.md rename to PaddleCV/PaddleVideo/data/dataset/ctcn/README.md index f7aad11b..f834cea0 100644 --- a/PaddleCV/PaddleVideo/dataset/ctcn/README.md +++ b/PaddleCV/PaddleVideo/data/dataset/ctcn/README.md @@ -11,4 +11,4 @@ data |----senet152-201cls-rgb-70.3-5seg-331data\_331img\_val ``` -同时需要下载如下几个数据文件Activity1.3\_train\_rgb.listformat, Activity1.3\_val\_rgb.listformat, labels.txt, test\_val\_label.list, val\_duration\_frame.list,并放到dataset/ctcn目录下。 +同时需要下载如下几个数据文件Activity1.3\_train\_rgb.listformat, Activity1.3\_val\_rgb.listformat, labels.txt, val\_duration\_frame.list,并放到dataset/ctcn目录下。 diff --git a/PaddleCV/PaddleVideo/dataset/kinetics/generate_label.py b/PaddleCV/PaddleVideo/data/dataset/kinetics/generate_label.py similarity index 100% rename from PaddleCV/PaddleVideo/dataset/kinetics/generate_label.py rename to PaddleCV/PaddleVideo/data/dataset/kinetics/generate_label.py diff --git a/PaddleCV/PaddleVideo/dataset/kinetics/video2pkl.py b/PaddleCV/PaddleVideo/data/dataset/kinetics/video2pkl.py similarity index 94% rename from PaddleCV/PaddleVideo/dataset/kinetics/video2pkl.py rename to PaddleCV/PaddleVideo/data/dataset/kinetics/video2pkl.py index 881857c4..0950b01b 100644 --- a/PaddleCV/PaddleVideo/dataset/kinetics/video2pkl.py +++ b/PaddleCV/PaddleVideo/data/dataset/kinetics/video2pkl.py @@ -15,7 +15,10 @@ import os import sys import glob -import cPickle +try: + import cPickle as pickle +except: + import pickle from multiprocessing import Pool # example command line: python generate_k400_pkl.py kinetics-400_train.csv 8 @@ -71,8 +74,8 @@ def generate_pkl(entry): output_pkl = vid + ".pkl" output_pkl = os.path.join(target_dir, output_pkl) - f = open(output_pkl, 'w') - cPickle.dump((vid, label, ims), f, -1) + f = open(output_pkl, 'wb') + pickle.dump((vid, label, ims), f, protocol=2) f.close() os.system('rm -rf %s' % vid) diff --git a/PaddleCV/PaddleVideo/data/dataset/kinetics_labels.json b/PaddleCV/PaddleVideo/data/dataset/kinetics_labels.json new file mode 100644 index 00000000..f7fc2640 --- /dev/null +++ b/PaddleCV/PaddleVideo/data/dataset/kinetics_labels.json @@ -0,0 +1 @@ +["abseiling", "air_drumming", "answering_questions", "applauding", "applying_cream", "archery", "arm_wrestling", "arranging_flowers", "assembling_computer", "auctioning", "baby_waking_up", "baking_cookies", "balloon_blowing", "bandaging", "barbequing", "bartending", "beatboxing", "bee_keeping", "belly_dancing", "bench_pressing", "bending_back", "bending_metal", "biking_through_snow", "blasting_sand", "blowing_glass", "blowing_leaves", "blowing_nose", "blowing_out_candles", "bobsledding", "bookbinding", "bouncing_on_trampoline", "bowling", "braiding_hair", "breading_or_breadcrumbing", "breakdancing", "brush_painting", "brushing_hair", "brushing_teeth", "building_cabinet", "building_shed", "bungee_jumping", "busking", "canoeing_or_kayaking", "capoeira", "carrying_baby", "cartwheeling", "carving_pumpkin", "catching_fish", "catching_or_throwing_baseball", "catching_or_throwing_frisbee", "catching_or_throwing_softball", "celebrating", "changing_oil", "changing_wheel", "checking_tires", "cheerleading", "chopping_wood", "clapping", "clay_pottery_making", "clean_and_jerk", "cleaning_floor", "cleaning_gutters", "cleaning_pool", "cleaning_shoes", "cleaning_toilet", "cleaning_windows", "climbing_a_rope", "climbing_ladder", "climbing_tree", "contact_juggling", "cooking_chicken", "cooking_egg", "cooking_on_campfire", "cooking_sausages", "counting_money", "country_line_dancing", "cracking_neck", "crawling_baby", "crossing_river", "crying", "curling_hair", "cutting_nails", "cutting_pineapple", "cutting_watermelon", "dancing_ballet", "dancing_charleston", "dancing_gangnam_style", "dancing_macarena", "deadlifting", "decorating_the_christmas_tree", "digging", "dining", "disc_golfing", "diving_cliff", "dodgeball", "doing_aerobics", "doing_laundry", "doing_nails", "drawing", "dribbling_basketball", "drinking", "drinking_beer", "drinking_shots", "driving_car", "driving_tractor", "drop_kicking", "drumming_fingers", "dunking_basketball", "dying_hair", "eating_burger", "eating_cake", "eating_carrots", "eating_chips", "eating_doughnuts", "eating_hotdog", "eating_ice_cream", "eating_spaghetti", "eating_watermelon", "egg_hunting", "exercising_arm", "exercising_with_an_exercise_ball", "extinguishing_fire", "faceplanting", "feeding_birds", "feeding_fish", "feeding_goats", "filling_eyebrows", "finger_snapping", "fixing_hair", "flipping_pancake", "flying_kite", "folding_clothes", "folding_napkins", "folding_paper", "front_raises", "frying_vegetables", "garbage_collecting", "gargling", "getting_a_haircut", "getting_a_tattoo", "giving_or_receiving_award", "golf_chipping", "golf_driving", "golf_putting", "grinding_meat", "grooming_dog", "grooming_horse", "gymnastics_tumbling", "hammer_throw", "headbanging", "headbutting", "high_jump", "high_kick", "hitting_baseball", "hockey_stop", "holding_snake", "hopscotch", "hoverboarding", "hugging", "hula_hooping", "hurdling", "hurling_(sport)", "ice_climbing", "ice_fishing", "ice_skating", "ironing", "javelin_throw", "jetskiing", "jogging", "juggling_balls", "juggling_fire", "juggling_soccer_ball", "jumping_into_pool", "jumpstyle_dancing", "kicking_field_goal", "kicking_soccer_ball", "kissing", "kitesurfing", "knitting", "krumping", "laughing", "laying_bricks", "long_jump", "lunge", "making_a_cake", "making_a_sandwich", "making_bed", "making_jewelry", "making_pizza", "making_snowman", "making_sushi", "making_tea", "marching", "massaging_back", "massaging_feet", "massaging_legs", "massaging_person's_head", "milking_cow", "mopping_floor", "motorcycling", "moving_furniture", "mowing_lawn", "news_anchoring", "opening_bottle", "opening_present", "paragliding", "parasailing", "parkour", "passing_American_football_(in_game)", "passing_American_football_(not_in_game)", "peeling_apples", "peeling_potatoes", "petting_animal_(not_cat)", "petting_cat", "picking_fruit", "planting_trees", "plastering", "playing_accordion", "playing_badminton", "playing_bagpipes", "playing_basketball", "playing_bass_guitar", "playing_cards", "playing_cello", "playing_chess", "playing_clarinet", "playing_controller", "playing_cricket", "playing_cymbals", "playing_didgeridoo", "playing_drums", "playing_flute", "playing_guitar", "playing_harmonica", "playing_harp", "playing_ice_hockey", "playing_keyboard", "playing_kickball", "playing_monopoly", "playing_organ", "playing_paintball", "playing_piano", "playing_poker", "playing_recorder", "playing_saxophone", "playing_squash_or_racquetball", "playing_tennis", "playing_trombone", "playing_trumpet", "playing_ukulele", "playing_violin", "playing_volleyball", "playing_xylophone", "pole_vault", "presenting_weather_forecast", "pull_ups", "pumping_fist", "pumping_gas", "punching_bag", "punching_person_(boxing)", "push_up", "pushing_car", "pushing_cart", "pushing_wheelchair", "reading_book", "reading_newspaper", "recording_music", "riding_a_bike", "riding_camel", "riding_elephant", "riding_mechanical_bull", "riding_mountain_bike", "riding_mule", "riding_or_walking_with_horse", "riding_scooter", "riding_unicycle", "ripping_paper", "robot_dancing", "rock_climbing", "rock_scissors_paper", "roller_skating", "running_on_treadmill", "sailing", "salsa_dancing", "sanding_floor", "scrambling_eggs", "scuba_diving", "setting_table", "shaking_hands", "shaking_head", "sharpening_knives", "sharpening_pencil", "shaving_head", "shaving_legs", "shearing_sheep", "shining_shoes", "shooting_basketball", "shooting_goal_(soccer)", "shot_put", "shoveling_snow", "shredding_paper", "shuffling_cards", "side_kick", "sign_language_interpreting", "singing", "situp", "skateboarding", "ski_jumping", "skiing_(not_slalom_or_crosscountry)", "skiing_crosscountry", "skiing_slalom", "skipping_rope", "skydiving", "slacklining", "slapping", "sled_dog_racing", "smoking", "smoking_hookah", "snatch_weight_lifting", "sneezing", "sniffing", "snorkeling", "snowboarding", "snowkiting", "snowmobiling", "somersaulting", "spinning_poi", "spray_painting", "spraying", "springboard_diving", "squat", "sticking_tongue_out", "stomping_grapes", "stretching_arm", "stretching_leg", "strumming_guitar", "surfing_crowd", "surfing_water", "sweeping_floor", "swimming_backstroke", "swimming_breast_stroke", "swimming_butterfly_stroke", "swing_dancing", "swinging_legs", "swinging_on_something", "sword_fighting", "tai_chi", "taking_a_shower", "tango_dancing", "tap_dancing", "tapping_guitar", "tapping_pen", "tasting_beer", "tasting_food", "testifying", "texting", "throwing_axe", "throwing_ball", "throwing_discus", "tickling", "tobogganing", "tossing_coin", "tossing_salad", "training_dog", "trapezing", "trimming_or_shaving_beard", "trimming_trees", "triple_jump", "tying_bow_tie", "tying_knot_(not_on_a_tie)", "tying_tie", "unboxing", "unloading_truck", "using_computer", "using_remote_controller_(not_gaming)", "using_segway", "vault", "waiting_in_line", "walking_the_dog", "washing_dishes", "washing_feet", "washing_hair", "washing_hands", "water_skiing", "water_sliding", "watering_plants", "waxing_back", "waxing_chest", "waxing_eyebrows", "waxing_legs", "weaving_basket", "welding", "whistling", "windsurfing", "wrapping_present", "wrestling", "writing", "yawning", "yoga", "zumba"] \ No newline at end of file diff --git a/PaddleCV/PaddleVideo/dataset/nonlocal/README.md b/PaddleCV/PaddleVideo/data/dataset/nonlocal/README.md similarity index 52% rename from PaddleCV/PaddleVideo/dataset/nonlocal/README.md rename to PaddleCV/PaddleVideo/data/dataset/nonlocal/README.md index 52b5d73a..f0c10b0a 100644 --- a/PaddleCV/PaddleVideo/dataset/nonlocal/README.md +++ b/PaddleCV/PaddleVideo/data/dataset/nonlocal/README.md @@ -1,6 +1,6 @@ # Non-local模型数据说明 -在Non-local模型中,输入数据是mp4文件,在datareader部分的代码中,使用opencv读取mp4文件对视频进行解码和采样。train和valid数据随机选取起始帧的位置,对每帧图像做随机增强,短边缩放至[256, 320]之间的某个随机数,长边根据长宽比计算出来,截取出224x224大小的区域。test时每条视频会选取10个不同的位置作为起始帧,同时会选取三个不同的空间位置作为crop区域的起始点,这样每个视频会进行10x3次采样,对这30个样本的预测概率求和,选取概率最大的分类作为最终的预测结果。 +在Non-local模型中,输入数据是mp4文件,在reader部分的代码中,使用opencv读取mp4文件对视频进行解码和采样。train和valid数据随机选取起始帧的位置,对每帧图像做随机增强,短边缩放至[256, 320]之间的某个随机数,长边根据长宽比计算出来,截取出224x224大小的区域。test时每条视频会选取10个不同的位置作为起始帧,同时会选取三个不同的空间位置作为crop区域的起始点,这样每个视频会进行10x3次采样,对这30个样本的预测概率求和,选取概率最大的分类作为最终的预测结果。 ## 数据下载 @@ -17,3 +17,9 @@ bash generate_list.sh 即可生成trainlist.txt、vallist.txt和testlist.txt。 + +另外,如果要观察模型推断的效果,可以复制testlist.txt生成inferlist.txt, + + cp testlist.txt inferlist.txt + +生成inferlist.txt。也可以在predict的时候指定`video_path`对单个视频文件进行预测。 diff --git a/PaddleCV/PaddleVideo/dataset/nonlocal/change_filelist.py b/PaddleCV/PaddleVideo/data/dataset/nonlocal/change_filelist.py similarity index 100% rename from PaddleCV/PaddleVideo/dataset/nonlocal/change_filelist.py rename to PaddleCV/PaddleVideo/data/dataset/nonlocal/change_filelist.py diff --git a/PaddleCV/PaddleVideo/dataset/nonlocal/generate_filelist.py b/PaddleCV/PaddleVideo/data/dataset/nonlocal/generate_filelist.py similarity index 100% rename from PaddleCV/PaddleVideo/dataset/nonlocal/generate_filelist.py rename to PaddleCV/PaddleVideo/data/dataset/nonlocal/generate_filelist.py diff --git a/PaddleCV/PaddleVideo/dataset/nonlocal/generate_list.sh b/PaddleCV/PaddleVideo/data/dataset/nonlocal/generate_list.sh similarity index 100% rename from PaddleCV/PaddleVideo/dataset/nonlocal/generate_list.sh rename to PaddleCV/PaddleVideo/data/dataset/nonlocal/generate_list.sh diff --git a/PaddleCV/PaddleVideo/dataset/nonlocal/generate_testlist_multicrop.py b/PaddleCV/PaddleVideo/data/dataset/nonlocal/generate_testlist_multicrop.py similarity index 100% rename from PaddleCV/PaddleVideo/dataset/nonlocal/generate_testlist_multicrop.py rename to PaddleCV/PaddleVideo/data/dataset/nonlocal/generate_testlist_multicrop.py diff --git a/PaddleCV/PaddleVideo/dataset/youtube8m/tf2pkl.py b/PaddleCV/PaddleVideo/data/dataset/youtube8m/tf2pkl.py similarity index 98% rename from PaddleCV/PaddleVideo/dataset/youtube8m/tf2pkl.py rename to PaddleCV/PaddleVideo/data/dataset/youtube8m/tf2pkl.py index 3b32e3b4..7184850b 100644 --- a/PaddleCV/PaddleVideo/dataset/youtube8m/tf2pkl.py +++ b/PaddleCV/PaddleVideo/data/dataset/youtube8m/tf2pkl.py @@ -16,7 +16,10 @@ import os, sys import numpy as np import tensorflow as tf from tensorflow import logging -import cPickle +try: + import cPickle as pickle +except: + import pickle from tensorflow.python.platform import gfile @@ -266,7 +269,7 @@ def main(files_pattern): outputdir = target_dir fn = '%s.pkl' % record_name outp = open(os.path.join(outputdir, fn), 'wb') - cPickle.dump(all_data, outp, protocol=cPickle.HIGHEST_PROTOCOL) + pickle.dump(all_data, outp, protocol=2) outp.close() diff --git a/PaddleCV/PaddleVideo/dataset/youtube8m/yt8m_pca/eigenvals.npy b/PaddleCV/PaddleVideo/data/dataset/youtube8m/yt8m_pca/eigenvals.npy similarity index 100% rename from PaddleCV/PaddleVideo/dataset/youtube8m/yt8m_pca/eigenvals.npy rename to PaddleCV/PaddleVideo/data/dataset/youtube8m/yt8m_pca/eigenvals.npy diff --git a/PaddleCV/PaddleVideo/dataset/README.md b/PaddleCV/PaddleVideo/dataset/README.md deleted file mode 100644 index b0436978..00000000 --- a/PaddleCV/PaddleVideo/dataset/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# 数据使用说明 - -- [Youtube-8M](#Youtube-8M数据集) -- [Kinetics](#Kinetics数据集) -- [Non-local](#Non-local) -- [C-TCN](#C-TCN) - -## Youtube-8M数据集 -这里用到的是YouTube-8M 2018年更新之后的数据集。使用官方数据集,并将TFRecord文件转化为pickle文件以便PaddlePaddle使用。Youtube-8M数据集官方提供了frame-level和video-level的特征,这里只需使用到frame-level的特征。 - -### 数据下载 -请使用Youtube-8M官方链接分别下载[训练集](http://us.data.yt8m.org/2/frame/train/index.html)和[验证集](http://us.data.yt8m.org/2/frame/validate/index.html)。每个链接里各提供了3844个文件的下载地址,用户也可以使用官方提供的[下载脚本](https://research.google.com/youtube8m/download.html)下载数据。数据下载完成后,将会得到3844个训练数据文件和3844个验证数据文件(TFRecord格式)。 -假设存放视频模型代码库的主目录为: Code\_Root,进入dataset/youtube8m目录 - - cd dataset/youtube8m - -在youtube8m下新建目录tf/train和tf/val - - mkdir tf && cd tf - - mkdir train && mkdir val - -并分别将下载的train和validate数据存放在其中。 - -### 数据格式转化 - -为了适用于PaddlePaddle训练,需要离线将下载好的TFRecord文件格式转成了pickle格式,转换脚本请使用[dataset/youtube8m/tf2pkl.py](./youtube8m/tf2pkl.py)。 - -在dataset/youtube8m 目录下新建目录pkl/train和pkl/val - - cd dataset/youtube8m - - mkdir pkl && cd pkl - - mkdir train && mkdir val - - -转化文件格式(TFRecord -> pkl),进入dataset/youtube8m目录,运行脚本 - - python tf2pkl.py ./tf/train ./pkl/train - -和 - - python tf2pkl.py ./tf/val ./pkl/val - -分别将train和validate数据集转化为pkl文件。tf2pkl.py文件运行时需要两个参数,分别是数据源tf文件存放路径和转化后的pkl文件存放路径。 - -备注:由于TFRecord文件的读取需要用到Tensorflow,用户要先安装Tensorflow,或者在安装有Tensorflow的环境中转化完数据,再拷贝到dataset/youtube8m/pkl目录下。为了避免和PaddlePaddle环境冲突,建议先在其他地方转化完成再将数据拷贝过来。 - -### 生成文件列表 - -进入dataset/youtube8m目录 - - ls $Code_Root/dataset/youtube8m/pkl/train/* > train.list - - ls $Code_Root/dataset/youtube8m/pkl/val/* > val.list - -在dataset/youtube8m目录下将生成两个文件,train.list和val.list,每一行分别保存了一个pkl文件的绝对路径。 - -## Kinetics数据集 - -Kinetics数据集是DeepMind公开的大规模视频动作识别数据集,有Kinetics400与Kinetics600两个版本。这里使用Kinetics400数据集,具体的数据预处理过程如下。 - -### mp4视频下载 -在Code\_Root目录下创建文件夹 - - cd $Code_Root/dataset && mkdir kinetics - - cd kinetics && mkdir data_k400 && cd data_k400 - - mkdir train_mp4 && mkdir val_mp4 - -ActivityNet官方提供了Kinetics的下载工具,具体参考其[官方repo ](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics)即可下载Kinetics400的mp4视频集合。将kinetics400的训练与验证集合分别下载到dataset/kinetics/data\_k400/train\_mp4与dataset/kinetics/data\_k400/val\_mp4。 - -### mp4文件预处理 - -为提高数据读取速度,提前将mp4文件解帧并打pickle包,dataloader从视频的pkl文件中读取数据(该方法耗费更多存储空间)。pkl文件里打包的内容为(video-id,[frame1, frame2,...,frameN],label)。 - -在 dataset/kinetics/data\_k400目录下创建目录train\_pkl和val\_pkl - - cd $Code_Root/dataset/kinetics/data_k400 - - mkdir train_pkl && mkdir val_pkl - -进入$Code\_Root/dataset/kinetics目录,使用video2pkl.py脚本进行数据转化。首先需要下载[train](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics/data/kinetics-400_train.csv)和[validation](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics/data/kinetics-400_val.csv)数据集的文件列表。 - -首先生成预处理需要的数据集标签文件 - - python generate_label.py kinetics-400_train.csv kinetics400_label.txt - -然后执行如下程序: - - python video2pkl.py kinetics-400_train.csv $Source_dir $Target_dir 8 #以8个进程为例 - -- 该脚本依赖`ffmpeg`库,请预先安装`ffmpeg` - -对于train数据, - - Source_dir = $Code_Root/dataset/kinetics/data_k400/train_mp4 - - Target_dir = $Code_Root/dataset/kinetics/data_k400/train_pkl - -对于val数据, - - Source_dir = $Code_Root/dataset/kinetics/data_k400/val_mp4 - - Target_dir = $Code_Root/dataset/kinetics/data_k400/val_pkl - -这样即可将mp4文件解码并保存为pkl文件。 - -### 生成训练和验证集list - - cd $Code_Root/dataset/kinetics - - ls $Code_Root/dataset/kinetics/data_k400/train_pkl /* > train.list - - ls $Code_Root/dataset/kinetics/data_k400/val_pkl /* > val.list - - -即可生成相应的文件列表,train.list和val.list的每一行表示一个pkl文件的绝对路径。 - -## Non-local - -Non-local模型也使用kinetics数据集,不过其数据处理方式和其他模型不一样,详细内容见[Non-local数据说明](./nonlocal/README.md) - -## C-TCN - -C-TCN模型使用ActivityNet 1.3数据集,具体使用方法见[C-TCN数据说明](./ctcn/README.md) diff --git a/PaddleCV/PaddleVideo/test.py b/PaddleCV/PaddleVideo/eval.py similarity index 66% rename from PaddleCV/PaddleVideo/test.py rename to PaddleCV/PaddleVideo/eval.py index 10ef5355..3c2641d0 100644 --- a/PaddleCV/PaddleVideo/test.py +++ b/PaddleCV/PaddleVideo/eval.py @@ -21,11 +21,11 @@ import ast import numpy as np import paddle.fluid as fluid -from config import * +from utils.config_utils import * import models -from datareader import get_reader +from reader import get_reader from metrics import get_metrics -from utils import check_cuda +from utils.utility import check_cuda logging.root.handlers = [] FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s' @@ -59,7 +59,13 @@ def parse_args(): '--weights', type=str, default=None, - help='weight path, None to use weights from Paddle.') + help='weight path, None to automatically download weights provided by Paddle.' + ) + parser.add_argument( + '--save_dir', + type=str, + default=os.path.join('data', 'evaluate_results'), + help='output dir path, default to use ./data/evaluate_results') parser.add_argument( '--log_interval', type=int, @@ -80,8 +86,7 @@ def test(args): test_model.build_input(use_pyreader=False) test_model.build_model() test_feeds = test_model.feeds() - test_outputs = test_model.outputs() - test_loss = test_model.loss() + test_fetch_list = test_model.fetches() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) @@ -91,6 +96,8 @@ def test(args): args.weights), "Given weight dir {} not exist.".format(args.weights) weights = args.weights or test_model.get_weights() + logger.info('load test weights from {}'.format(weights)) + test_model.load_test_weights(exe, weights, fluid.default_main_program(), place) @@ -99,49 +106,24 @@ def test(args): test_metrics = get_metrics(args.model_name.upper(), 'test', test_config) test_feeder = fluid.DataFeeder(place=place, feed_list=test_feeds) - if args.model_name.upper() in ['CTCN']: - fetch_list = [x.name for x in test_loss] + \ - [x.name for x in test_outputs] + \ - [test_feeds[-1].name] - else: - if test_loss is None: - fetch_list = [x.name for x in test_outputs] + [test_feeds[-1].name] - else: - fetch_list = [test_loss.name] + [x.name for x in test_outputs - ] + [test_feeds[-1].name] epoch_period = [] for test_iter, data in enumerate(test_reader()): cur_time = time.time() - test_outs = exe.run(fetch_list=fetch_list, feed=test_feeder.feed(data)) + test_outs = exe.run(fetch_list=test_fetch_list, + feed=test_feeder.feed(data)) period = time.time() - cur_time epoch_period.append(period) - if args.model_name.upper() in ['CTCN']: - total_loss = test_outs[0] - loc_loss = test_outs[1] - cls_loss = test_outs[2] - loc_preds = test_outs[3] - cls_preds = test_outs[4] - fid = test_outs[-1] - loss = [total_loss, loc_loss, cls_loss] - pred = [loc_preds, cls_preds] - label = fid - else: - if test_loss is None: - loss = np.zeros(1, ).astype('float32') - pred = np.array(test_outs[0]) - label = np.array(test_outs[-1]) - else: - loss = np.array(test_outs[0]) - pred = np.array(test_outs[1]) - label = np.array(test_outs[-1]) - test_metrics.accumulate(loss, pred, label) + test_metrics.accumulate(test_outs) # metric here if args.log_interval > 0 and test_iter % args.log_interval == 0: info_str = '[EVAL] Batch {}'.format(test_iter) - test_metrics.calculate_and_log_out(loss, pred, label, info_str) - test_metrics.finalize_and_log_out("[EVAL] eval finished. ") + test_metrics.calculate_and_log_out(test_outs, info_str) + + if not os.path.isdir(args.save_dir): + os.makedirs(args.save_dir) + test_metrics.finalize_and_log_out("[EVAL] eval finished. ", args.save_dir) if __name__ == "__main__": diff --git a/PaddleCV/PaddleVideo/inference_model.py b/PaddleCV/PaddleVideo/inference_model.py new file mode 100644 index 00000000..5fe8ec14 --- /dev/null +++ b/PaddleCV/PaddleVideo/inference_model.py @@ -0,0 +1,123 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import time +import logging +import argparse +import ast +import numpy as np +try: + import cPickle as pickle +except: + import pickle +import paddle.fluid as fluid + +from utils.config_utils import * +import models +from reader import get_reader +from metrics import get_metrics +from utils.utility import check_cuda + +logging.root.handlers = [] +FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s' +logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout) +logger = logging.getLogger(__name__) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--model_name', + type=str, + default='AttentionCluster', + help='name of model to train.') + parser.add_argument( + '--config', + type=str, + default='configs/attention_cluster.txt', + help='path to config file of model') + parser.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=True, + help='default use gpu.') + parser.add_argument( + '--weights', + type=str, + default=None, + help='weight path, None to automatically download weights provided by Paddle.' + ) + parser.add_argument( + '--batch_size', + type=int, + default=1, + help='sample number in a batch for inference.') + parser.add_argument( + '--save_dir', + type=str, + default='./', + help='directory to store model and params file') + args = parser.parse_args() + return args + + +def save_inference_model(args): + # parse config + config = parse_config(args.config) + infer_config = merge_configs(config, 'infer', vars(args)) + print_configs(infer_config, "Infer") + infer_model = models.get_model(args.model_name, infer_config, mode='infer') + infer_model.build_input(use_pyreader=False) + infer_model.build_model() + infer_feeds = infer_model.feeds() + infer_outputs = infer_model.outputs() + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if args.weights: + assert os.path.exists( + args.weights), "Given weight dir {} not exist.".format(args.weights) + # if no weight files specified, download weights from paddle + weights = args.weights or infer_model.get_weights() + + infer_model.load_test_weights(exe, weights, + fluid.default_main_program(), place) + + if not os.path.isdir(args.save_dir): + os.makedirs(args.save_dir) + + # saving inference model + fluid.io.save_inference_model( + args.save_dir, + feeded_var_names=[item.name for item in infer_feeds], + target_vars=infer_outputs, + executor=exe, + main_program=fluid.default_main_program(), + model_filename=args.model_name + "_model.pdmodel", + params_filename=args.model_name + "_params.pdparams") + + print("save inference model at %s" % (args.save_dir)) + + +if __name__ == "__main__": + args = parse_args() + # check whether the installed paddle is compiled with GPU + check_cuda(args.use_gpu) + logger.info(args) + + save_inference_model(args) diff --git a/PaddleCV/PaddleVideo/metrics/detections/detection_metrics.py b/PaddleCV/PaddleVideo/metrics/detections/detection_metrics.py index 266aa3e9..7ba83545 100644 --- a/PaddleCV/PaddleVideo/metrics/detections/detection_metrics.py +++ b/PaddleCV/PaddleVideo/metrics/detections/detection_metrics.py @@ -15,6 +15,7 @@ import numpy as np import datetime import logging import json +import os from models.ctcn.ctcn_utils import BoxCoder @@ -47,14 +48,14 @@ class MetricsCalculator(): class_label_file='', video_duration_file=''): self.name = name - self.mode = mode # 'train', 'val', 'test' + self.mode = mode # 'train', 'valid', 'test', 'infer' self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.sigma_thresh = sigma_thresh self.soft_thresh = soft_thresh self.class_label_file = class_label_file self.video_duration_file = video_duration_file - if mode == 'test': + if mode == 'test' or mode == 'infer': lines = open(gt_label_file).readlines() self.gt_labels = [item.split(' ')[0] for item in lines] self.box_coder = BoxCoder() @@ -69,7 +70,7 @@ class MetricsCalculator(): self.aggr_loc_loss = 0.0 self.aggr_cls_loss = 0.0 self.aggr_batch_size = 0 - if self.mode == 'test': + if self.mode == 'test' or 'infer': self.class_label = get_class_label(self.class_label_file) self.video_time_dict = get_video_time_dict(self.video_duration_file) self.res_detect = dict() @@ -86,19 +87,28 @@ class MetricsCalculator(): 'sigma_thresh': self.sigma_thresh, 'soft_thresh': self.soft_thresh } - self.out_file = 'res_decode_' + str(self.score_thresh) + '_' + \ + self.out_file = self.name + '_' + self.mode + \ + '_res_decode_' + str(self.score_thresh) + '_' + \ str(self.nms_thresh) + '_' + str(self.sigma_thresh) + \ '_' + str(self.soft_thresh) + '.json' - def accumulate(self, loss, pred, label): + def accumulate(self, fetch_list): cur_batch_size = 1 # iteration counter - self.aggr_loss += np.mean(np.array(loss[0])) - self.aggr_loc_loss += np.mean(np.array(loss[1])) - self.aggr_cls_loss += np.mean(np.array(loss[2])) + total_loss = fetch_list[0] + loc_loss = fetch_list[1] + cls_loss = fetch_list[2] + + self.aggr_loss += np.mean(np.array(total_loss)) + self.aggr_loc_loss += np.mean(np.array(loc_loss)) + self.aggr_cls_loss += np.mean(np.array(cls_loss)) self.aggr_batch_size += cur_batch_size if self.mode == 'test': + loc_pred = np.array(fetch_list[3]) + cls_pred = np.array(fetch_list[4]) + label = np.array(fetch_list[5]) box_preds, label_preds, score_preds = self.box_coder.decode( - pred[0].squeeze(), pred[1].squeeze(), **self.box_decode_params) + loc_pred.squeeze(), + cls_pred.squeeze(), **self.box_decode_params) fid = label.squeeze() fname = self.gt_labels[fid] logger.info("id {}, file {}, num of box preds {}:".format( @@ -116,16 +126,50 @@ class MetricsCalculator(): ] }) - def finalize_metrics(self): + def accumulate_infer_results(self, fetch_list): + fname = fetch_list[2][0] + print('fname', fetch_list[2]) + loc_pred = np.array(fetch_list[0]) + cls_pred = np.array(fetch_list[1]) + assert len(loc_pred) == 1, "please set batchsize to be 1 when infer" + box_preds, label_preds, score_preds = self.box_coder.decode( + loc_pred.squeeze(), cls_pred.squeeze(), **self.box_decode_params) + self.results_detect[fname] = [] + log_info = 'name: {} \n'.format(fname) + for j in range(len(label_preds)): + score = score_preds[j] + label = self.class_label[label_preds[j]].strip() + segment_start = max(0, self.video_time_dict[fname] * + box_preds[j][0] / 512.0) + segment_end = min(self.video_time_dict[fname], + self.video_time_dict[fname] * box_preds[j][1] / + 512.0) + self.results_detect[fname].append({ + "score": score, + "label": label, + "segment": [segment_start, segment_end] + }) + log_info += 'score: {}, \tlabel: {}, \tsegment: [{}, {}] \n'.format( + score, label, segment_start, segment_end) + logger.info(log_info) + + def finalize_metrics(self, savedir): self.avg_loss = self.aggr_loss / self.aggr_batch_size self.avg_loc_loss = self.aggr_loc_loss / self.aggr_batch_size self.avg_cls_loss = self.aggr_cls_loss / self.aggr_batch_size + filepath = os.path.join(savedir, self.out_file) if self.mode == 'test': self.res_detect['results'] = self.results_detect - with open(self.out_file, 'w') as f: + with open(filepath, 'w') as f: json.dump(self.res_detect, f) - logger.info('results has been saved into file: {}'.format( - self.out_file)) + logger.info('results has been saved into file: {}'.format(filepath)) + + def finalize_infer_metrics(self, savedir): + self.res_detect['results'] = self.results_detect + filepath = os.path.join(savedir, self.out_file) + with open(filepath, 'w') as f: + json.dump(self.res_detect, f) + logger.info('results has been saved into file: {}'.format(filepath)) def get_computed_metrics(self): json_stats = {} diff --git a/PaddleCV/PaddleVideo/metrics/metrics_util.py b/PaddleCV/PaddleVideo/metrics/metrics_util.py index 416016cc..64246906 100644 --- a/PaddleCV/PaddleVideo/metrics/metrics_util.py +++ b/PaddleCV/PaddleVideo/metrics/metrics_util.py @@ -20,6 +20,7 @@ from __future__ import division import logging import numpy as np +import json from metrics.youtube8m import eval_util as youtube8m_metrics from metrics.kinetics import accuracy_metrics as kinetics_metrics from metrics.multicrop_test import multicrop_test_metrics as multicrop_test_metrics @@ -33,15 +34,15 @@ class Metrics(object): """Not implemented""" pass - def calculate_and_log_out(self, loss, pred, label, info=''): + def calculate_and_log_out(self, fetch_list, info=''): """Not implemented""" pass - def accumulate(self, loss, pred, label, info=''): + def accumulate(self, fetch_list, info=''): """Not implemented""" pass - def finalize_and_log_out(self, info=''): + def finalize_and_log_out(self, info='', savedir='./'): """Not implemented""" pass @@ -58,9 +59,13 @@ class Youtube8mMetrics(Metrics): self.topk = metrics_args['MODEL']['topk'] self.calculator = youtube8m_metrics.EvaluationMetrics(self.num_classes, self.topk) + if self.mode == 'infer': + self.infer_results = [] - def calculate_and_log_out(self, loss, pred, label, info=''): - loss = np.mean(np.array(loss)) + def calculate_and_log_out(self, fetch_list, info=''): + loss = np.mean(np.array(fetch_list[0])) + pred = np.array(fetch_list[1]) + label = np.array(fetch_list[2]) hit_at_one = youtube8m_metrics.calculate_hit_at_one(pred, label) perr = youtube8m_metrics.calculate_precision_at_equal_recall_rate(pred, label) @@ -68,86 +73,174 @@ class Youtube8mMetrics(Metrics): logger.info(info + ' , loss = {0}, Hit@1 = {1}, PERR = {2}, GAP = {3}'.format(\ '%.6f' % loss, '%.2f' % hit_at_one, '%.2f' % perr, '%.2f' % gap)) - def accumulate(self, loss, pred, label, info=''): - self.calculator.accumulate(loss, pred, label) + def accumulate(self, fetch_list, info=''): + if self.mode == 'infer': + predictions = np.array(fetch_list[0]) + video_id = fetch_list[1] + for i in range(len(predictions)): + topk_inds = predictions[i].argsort()[0 - self.topk:] + topk_inds = topk_inds[::-1] + preds = predictions[i][topk_inds] + self.infer_results.append( + (video_id[i], topk_inds.tolist(), preds.tolist())) + else: + loss = np.array(fetch_list[0]) + pred = np.array(fetch_list[1]) + label = np.array(fetch_list[2]) + self.calculator.accumulate(loss, pred, label) + + def finalize_and_log_out(self, info='', savedir='./'): + if self.mode == 'infer': + for item in self.infer_results: + logger.info('video_id {} , topk({}) preds: \n'.format(item[ + 0], self.topk)) + for i in range(len(item[1])): + logger.info('\t class: {}, probability {} \n'.format( + item[1][i], item[2][i])) + # save infer result into output dir + #json.dump(self.infer_results, xxxx) - def finalize_and_log_out(self, info=''): - epoch_info_dict = self.calculator.get() - logger.info(info + '\tavg_hit_at_one: {0},\tavg_perr: {1},\tavg_loss :{2},\taps: {3},\tgap:{4}'\ + else: + epoch_info_dict = self.calculator.get() + logger.info(info + '\tavg_hit_at_one: {0},\tavg_perr: {1},\tavg_loss :{2},\taps: {3},\tgap:{4}'\ .format(epoch_info_dict['avg_hit_at_one'], epoch_info_dict['avg_perr'], \ epoch_info_dict['avg_loss'], epoch_info_dict['aps'], epoch_info_dict['gap'])) def reset(self): self.calculator.clear() + if self.mode == 'infer': + self.infer_results = [] class Kinetics400Metrics(Metrics): def __init__(self, name, mode, metrics_args): self.name = name self.mode = mode + self.topk = metrics_args['MODEL']['topk'] self.calculator = kinetics_metrics.MetricsCalculator(name, mode.lower()) - - def calculate_and_log_out(self, loss, pred, label, info=''): - if loss is not None: + if self.mode == 'infer': + self.infer_results = [] + self.kinetics_labels = metrics_args['INFER']['kinetics_labels'] + self.labels_list = json.load(open(self.kinetics_labels)) + + def calculate_and_log_out(self, fetch_list, info=''): + if len(fetch_list) == 3: + loss = fetch_list[0] loss = np.mean(np.array(loss)) + pred = np.array(fetch_list[1]) + label = np.array(fetch_list[2]) else: loss = 0. + pred = np.array(fetch_list[0]) + label = np.array(fetch_list[1]) acc1, acc5 = self.calculator.calculate_metrics(loss, pred, label) logger.info(info + '\tLoss: {},\ttop1_acc: {}, \ttop5_acc: {}'.format('%.6f' % loss, \ '%.2f' % acc1, '%.2f' % acc5)) return loss - def accumulate(self, loss, pred, label, info=''): - self.calculator.accumulate(loss, pred, label) - - def finalize_and_log_out(self, info=''): - self.calculator.finalize_metrics() - metrics_dict = self.calculator.get_computed_metrics() - loss = metrics_dict['avg_loss'] - acc1 = metrics_dict['avg_acc1'] - acc5 = metrics_dict['avg_acc5'] - logger.info(info + '\tLoss: {},\ttop1_acc: {}, \ttop5_acc: {}'.format('%.6f' % loss, \ + def accumulate(self, fetch_list, info=''): + if self.mode == 'infer': + predictions = np.array(fetch_list[0]) + video_id = fetch_list[1] + for i in range(len(predictions)): + topk_inds = predictions[i].argsort()[0 - self.topk:] + topk_inds = topk_inds[::-1] + preds = predictions[i][topk_inds] + self.infer_results.append( + (video_id[i], topk_inds.tolist(), preds.tolist())) + else: + if len(fetch_list) == 3: + loss = fetch_list[0] + loss = np.mean(np.array(loss)) + pred = np.array(fetch_list[1]) + label = np.array(fetch_list[2]) + else: + loss = 0. + pred = np.array(fetch_list[0]) + label = np.array(fetch_list[1]) + self.calculator.accumulate(loss, pred, label) + + def finalize_and_log_out(self, info='', savedir='./'): + if self.mode == 'infer': + for item in self.infer_results: + logger.info('video_id {} , topk({}) preds: \n'.format(item[ + 0], self.topk)) + for i in range(len(item[1])): + logger.info('\t class: {}, probability: {} \n'.format( + self.labels_list[item[1][i]], item[2][i])) + # save infer results + else: + self.calculator.finalize_metrics() + metrics_dict = self.calculator.get_computed_metrics() + loss = metrics_dict['avg_loss'] + acc1 = metrics_dict['avg_acc1'] + acc5 = metrics_dict['avg_acc5'] + logger.info(info + '\tLoss: {},\ttop1_acc: {}, \ttop5_acc: {}'.format('%.6f' % loss, \ '%.2f' % acc1, '%.2f' % acc5)) def reset(self): self.calculator.reset() + if self.mode == 'infer': + self.infer_results = [] class MulticropMetrics(Metrics): def __init__(self, name, mode, metrics_args): self.name = name self.mode = mode - if mode == 'test': + if (mode == 'test') or (mode == 'infer'): args = {} - args['num_test_clips'] = metrics_args.TEST.num_test_clips + args['num_test_clips'] = metrics_args[mode.upper()][ + 'num_test_clips'] args['dataset_size'] = metrics_args.TEST.dataset_size args['filename_gt'] = metrics_args.TEST.filename_gt - args['checkpoint_dir'] = metrics_args.TEST.checkpoint_dir + args['checkpoint_dir'] = metrics_args[mode.upper()][ + 'checkpoint_dir'] args['num_classes'] = metrics_args.MODEL.num_classes + args['labels_list'] = metrics_args.INFER.kinetics_labels self.calculator = multicrop_test_metrics.MetricsCalculator( name, mode.lower(), **args) else: self.calculator = kinetics_metrics.MetricsCalculator(name, mode.lower()) - def calculate_and_log_out(self, loss, pred, label, info=''): - if self.mode == 'test': + def calculate_and_log_out(self, fetch_list, info=''): + if (self.mode == 'test') or (self.mode == 'infer'): pass else: - if loss is not None: + if len(fetch_list) == 3: + loss = fetch_list[0] loss = np.mean(np.array(loss)) + pred = fetch_list[1] + label = fetch_list[2] else: loss = 0. + pred = fetch_list[0] + label = fetch_list[1] acc1, acc5 = self.calculator.calculate_metrics(loss, pred, label) logger.info(info + '\tLoss: {},\ttop1_acc: {}, \ttop5_acc: {}'.format('%.6f' % loss, \ '%.2f' % acc1, '%.2f' % acc5)) - def accumulate(self, loss, pred, label): - self.calculator.accumulate(loss, pred, label) + def accumulate(self, fetch_list): + if self.mode == 'test': + pred = fetch_list[0] + label = fetch_list[1] + self.calculator.accumulate(pred, label) + elif self.mode == 'infer': + pred = fetch_list[0] + video_id = fetch_list[1] + self.calculator.accumulate_infer_results(pred, video_id) + else: + loss = fetch_list[0] + pred = fetch_list[1] + label = fetch_list[2] + self.calculator.accumulate(loss, pred, label) - def finalize_and_log_out(self, info=''): + def finalize_and_log_out(self, info='', savedir='./'): if self.mode == 'test': self.calculator.finalize_metrics() + elif self.mode == 'infer': + self.calculator.finalize_infer_metrics() else: self.calculator.finalize_metrics() metrics_dict = self.calculator.get_computed_metrics() @@ -177,22 +270,32 @@ class DetectionMetrics(Metrics): args['name'] = name self.calculator = detection_metrics.MetricsCalculator(**args) - def calculate_and_log_out(self, loss, pred, label, info=''): - logger.info(info + - '\tLoss = {}, \tloc_loss = {}, \tcls_loss = {}'.format( - np.mean(loss[0]), np.mean(loss[1]), np.mean(loss[2]))) - - def accumulate(self, loss, pred, label): - self.calculator.accumulate(loss, pred, label) - - def finalize_and_log_out(self, info=''): - self.calculator.finalize_metrics() - metrics_dict = self.calculator.get_computed_metrics() - loss = metrics_dict['avg_loss'] - loc_loss = metrics_dict['avg_loc_loss'] - cls_loss = metrics_dict['avg_cls_loss'] - logger.info(info + '\tLoss: {},\tloc_loss: {}, \tcls_loss: {}'.format('%.6f' % loss, \ - '%.6f' % loc_loss, '%.6f' % cls_loss)) + def calculate_and_log_out(self, fetch_list, info=''): + total_loss = np.array(fetch_list[0]) + loc_loss = np.array(fetch_list[1]) + cls_loss = np.array(fetch_list[2]) + logger.info( + info + '\tLoss = {}, \tloc_loss = {}, \tcls_loss = {}'.format( + np.mean(total_loss), np.mean(loc_loss), np.mean(cls_loss))) + + def accumulate(self, fetch_list): + if self.mode == 'infer': + self.calculator.accumulate_infer_results(fetch_list) + else: + self.calculator.accumulate(fetch_list) + + def finalize_and_log_out(self, info='', savedir='./'): + if self.mode == 'infer': + self.calculator.finalize_infer_metrics(savedir) + #pass + else: + self.calculator.finalize_metrics(savedir) + metrics_dict = self.calculator.get_computed_metrics() + loss = metrics_dict['avg_loss'] + loc_loss = metrics_dict['avg_loc_loss'] + cls_loss = metrics_dict['avg_cls_loss'] + logger.info(info + '\tLoss: {},\tloc_loss: {}, \tcls_loss: {}'.format('%.6f' % loss, \ + '%.6f' % loc_loss, '%.6f' % cls_loss)) def reset(self): self.calculator.reset() diff --git a/PaddleCV/PaddleVideo/metrics/multicrop_test/multicrop_test_metrics.py b/PaddleCV/PaddleVideo/metrics/multicrop_test/multicrop_test_metrics.py index 278df696..6a9e9b0f 100644 --- a/PaddleCV/PaddleVideo/metrics/multicrop_test/multicrop_test_metrics.py +++ b/PaddleCV/PaddleVideo/metrics/multicrop_test/multicrop_test_metrics.py @@ -24,6 +24,7 @@ import datetime import logging from collections import defaultdict import pickle +import json logger = logging.getLogger(__name__) @@ -47,6 +48,7 @@ class MetricsCalculator(): self.filename_gt = metrics_args['filename_gt'] self.checkpoint_dir = metrics_args['checkpoint_dir'] self.num_classes = metrics_args['num_classes'] + self.labels_list = json.load(open(metrics_args['labels_list'])) self.reset() def reset(self): @@ -61,7 +63,7 @@ class MetricsCalculator(): def calculate_metrics(self, loss, pred, labels): pass - def accumulate(self, loss, pred, labels): + def accumulate(self, pred, labels): labels = labels.astype(int) labels = labels[:, 0] for i in range(pred.shape[0]): @@ -77,18 +79,26 @@ class MetricsCalculator(): logger.info("({0} / {1}) videos".format(\ len(self.seen_inds), self.dataset_size)) + def accumulate_infer_results(self, pred, labels): + for i in range(pred.shape[0]): + vid = labels[i][0] + probs = pred[i, :].tolist() + self.seen_inds[vid] += 1 + if self.seen_inds[vid] > self.num_test_clips: + logger.warning('Video id {} have been seen. Skip.'.format(vid, + )) + continue + save_pairs = [vid, probs] + self.results.append(save_pairs) + def finalize_metrics(self): if self.filename_gt is not None: evaluate_results(self.results, self.filename_gt, self.dataset_size, \ self.num_classes, self.num_test_clips) - # save temporary file - if not os.path.isdir(self.checkpoint_dir): - os.makedirs(self.checkpoint_dir) - pkl_path = os.path.join(self.checkpoint_dir, "results_probs.pkl") - with open(pkl_path, 'wb') as f: - pickle.dump(self.results, f, protocol=0) - logger.info('Temporary file saved to: {}'.format(pkl_path)) + def finalize_infer_metrics(self): + evaluate_infer_results(self.results, self.num_classes, + self.num_test_clips, self.labels_list) def read_groundtruth(filename_gt): @@ -110,7 +120,9 @@ def evaluate_results(results, filename_gt, test_dataset_size, num_classes, counts = np.zeros(sample_num, dtype=np.int32) probs = np.zeros((sample_num, class_num)) - assert (len(gt_labels) == sample_num) + assert (len(gt_labels) == sample_num), \ + "the number of gt_labels({}) should be the same with sample_num({})".format( + len(gt_labels), sample_num) """ clip_accuracy: the (e.g.) 10*19761 clips' average accuracy clip1_accuracy: the 1st clip's accuracy (starting from frame 0) @@ -192,3 +204,30 @@ def evaluate_results(results, filename_gt, test_dataset_size, num_classes, logger.info('-' * 80) return + + +def evaluate_infer_results(results, num_classes, num_test_clips, labels_list): + probs = {} + counts = {} + for entry in results: + vid = entry[0] + pred = entry[1] + if vid in probs.keys(): + assert vid in counts.keys( + ), "If vid in probs, it should be in counts" + probs[vid] = (probs[vid] * counts[vid] + pred) / (counts[vid] + 1) + counts[vid] += 1 + else: + probs[vid] = np.copy(pred) + counts[vid] = 1 + + topk = 20 + + for vid in probs.keys(): + pred = probs[vid] + sorted_inds = np.argsort(pred)[::-1] + topk_inds = sorted_inds[:topk] + logger.info('video {}, topk({}) preds: \n'.format(vid, topk)) + for ind in topk_inds: + logger.info('\t class: {}, probability {} \n'.format( + labels_list[ind], pred[ind])) diff --git a/PaddleCV/PaddleVideo/models/attention_cluster/README.md b/PaddleCV/PaddleVideo/models/attention_cluster/README.md index 6314f2d0..9b8a0ddf 100644 --- a/PaddleCV/PaddleVideo/models/attention_cluster/README.md +++ b/PaddleCV/PaddleVideo/models/attention_cluster/README.md @@ -26,21 +26,24 @@ Shifting Operation通过对每一个attention单元的输出添加一个独立 ## 数据准备 -Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请参考[数据说明](../../dataset/README.md) +Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请参考[数据说明](../../data/dataset/README.md) ## 模型训练 数据准备完毕后,可以通过如下两种方式启动训练: - python train.py --model_name=AttentionCluster - --config=./configs/attention_cluster.txt - --save_dir=checkpoints - --log_interval=10 - --valid_interval=1 + export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 + python train.py --model_name=AttentionCluster \ + --config=./configs/attention_cluster.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False - bash scripts/train/train_attention_cluster.sh + bash run.sh train AttentionCluster ./configs/attention_cluster.yaml -- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/attention_cluster_youtube8m.tar.gz)通过`--resume`指定权重存放路径进行finetune等开发 +- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/AttentionCluster_final.pdparams)通过`--resume`指定权重存放路径进行finetune等开发,或者在run.sh脚本中修改resume为解压之后的权重文件存放路径。 **数据读取器说明:** 模型读取Youtube-8M数据集中已抽取好的`rgb`和`audio`数据,对于每个视频的数据,均匀采样100帧,该值由配置文件中的`seg_num`参数指定。 @@ -56,16 +59,21 @@ Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请 可通过如下两种方式进行模型评估: - python test.py --model_name=AttentionCluster - --config=configs/attention_cluster.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS + python eval.py --model_name=AttentionCluster \ + --config=./configs/attention_cluster.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --use_gpu=True - bash scripts/test/test_attention_cluster.sh + bash run.sh eval AttentionCluster ./configs/attention_cluster.yaml -- 使用`scripts/test/test_attention_cluster.sh`进行评估时,需要修改脚本中的`--weights`参数指定需要评估的权重。 +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要评估的权重。 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/attention_cluster_youtube8m.tar.gz)进行评估 +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/AttentionCluster_final.pdparams)进行评估 + +- 评估结果以log的形式直接打印输出GAP、Hit@1等精度指标 + +- 使用CPU进行评估时,请将`use_gpu`设置为False 当取如下参数时: @@ -74,7 +82,7 @@ Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请 | cluster\_nums | 32 | | seg\_num | 100 | | batch\_size | 2048 | -| nums\_gpu | 7 | +| num\_gpus | 8 | 在2nd-YouTube-8M数据集下评估精度如下: @@ -87,17 +95,26 @@ Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请 ## 模型推断 -可通过如下命令进行模型推断: +可通过如下两种方式启动模型推断: + + python predict.py --model_name=AttentionCluster \ + --config=configs/attention_cluster.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --filelist=$FILELIST \ + --use_gpu=True + + bash run.sh predict AttentionCluster ./configs/attention_cluster.yaml + +- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为data/dataset/youtube8m/infer.list。`--weights`参数为训练好的权重参数,如果不设置,程序会自动下载已训练好的权重。这两个参数如果不设置,请不要写在命令行,将会自动使用默认值。 + +- 使用`run.sh`进行评估时,请修改脚本中的`weights`参数指定需要用到的权重。 - python infer.py --model_name=attention_cluster - --config=configs/attention_cluster.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS - --filelist=$FILELIST +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/AttentionCluster_final.pdparams)进行推断 -- 模型推断结果存储于`AttentionCluster_infer_result`中,通过`pickle`格式存储。 +- 模型推断结果以log的形式直接打印输出,可以看到每个测试样本的分类预测概率。 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/attention_cluster_youtube8m.tar.gz)进行推断 +- 使用CPU进行预测时,请将`use_gpu`设置为False ## 参考论文 diff --git a/PaddleCV/PaddleVideo/models/attention_cluster/attention_cluster.py b/PaddleCV/PaddleVideo/models/attention_cluster/attention_cluster.py old mode 100755 new mode 100644 index 84282544..555008af --- a/PaddleCV/PaddleVideo/models/attention_cluster/attention_cluster.py +++ b/PaddleCV/PaddleVideo/models/attention_cluster/attention_cluster.py @@ -41,36 +41,24 @@ class AttentionCluster(ModelBase): self.learning_rate = self.get_config_from_sec('train', 'learning_rate', 1e-3) - def build_input(self, use_pyreader): + def build_input(self, use_pyreader=True): + self.feature_input = [] + for name, dim in zip(self.feature_names, self.feature_dims): + self.feature_input.append( + fluid.layers.data( + shape=[self.seg_num, dim], dtype='float32', name=name)) + if self.mode != 'infer': + self.label_input = fluid.layers.data( + shape=[self.class_num], dtype='float32', name='label') + else: + self.label_input = None if use_pyreader: assert self.mode != 'infer', \ - 'pyreader is not recommendated when infer, please set use_pyreader to be false.' - shapes = [] - for dim in self.feature_dims: - shapes.append([-1, self.seg_num, dim]) - shapes.append([-1, self.class_num]) # label - self.py_reader = fluid.layers.py_reader( - capacity=1024, - shapes=shapes, - lod_levels=[0] * (self.feature_num + 1), - dtypes=['float32'] * (self.feature_num + 1), - name='train_py_reader' - if self.is_training else 'test_py_reader', - use_double_buffer=True) - inputs = fluid.layers.read_file(self.py_reader) - self.feature_input = inputs[:self.feature_num] - self.label_input = inputs[-1] - else: - self.feature_input = [] - for name, dim in zip(self.feature_names, self.feature_dims): - self.feature_input.append( - fluid.layers.data( - shape=[self.seg_num, dim], dtype='float32', name=name)) - if self.mode == 'infer': - self.label_input = None - else: - self.label_input = fluid.layers.data( - shape=[self.class_num], dtype='float32', name='label') + 'pyreader is not recommendated when infer, please set use_pyreader to be false.' + self.py_reader = fluid.io.PyReader( + feed_list=self.feature_input + [self.label_input], + capacity=8, + iterable=True) def build_model(self): att_outs = [] @@ -132,8 +120,23 @@ class AttentionCluster(ModelBase): self.label_input ] + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.output, self.label_input] + elif self.mode == 'test': + losses = self.loss() + fetch_list = [losses, self.output, self.label_input] + elif self.mode == 'infer': + fetch_list = [self.output] + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + return fetch_list + def weights_info(self): return ( - "attention_cluster_youtube8m", - "https://paddlemodels.bj.bcebos.com/video_classification/attention_cluster_youtube8m.tar.gz" + "AttentionCluster_final.pdparams", + "https://paddlemodels.bj.bcebos.com/video_classification/AttentionCluster_final.pdparams" ) diff --git a/PaddleCV/PaddleVideo/models/attention_cluster/logistic_model.py b/PaddleCV/PaddleVideo/models/attention_cluster/logistic_model.py old mode 100755 new mode 100644 diff --git a/PaddleCV/PaddleVideo/models/attention_cluster/shifting_attention.py b/PaddleCV/PaddleVideo/models/attention_cluster/shifting_attention.py old mode 100755 new mode 100644 diff --git a/PaddleCV/PaddleVideo/models/attention_lstm/README.md b/PaddleCV/PaddleVideo/models/attention_lstm/README.md index 50a835b7..ffd10bd8 100644 --- a/PaddleCV/PaddleVideo/models/attention_lstm/README.md +++ b/PaddleCV/PaddleVideo/models/attention_lstm/README.md @@ -12,13 +12,13 @@ ## 模型简介 -递归神经网络(RNN)常用于序列数据的处理,可建模视频连续多帧的时序信息,在视频分类领域为基础常用方法。该模型采用了双向长短记忆网络(LSTM),将视频的所有帧特征依次编码。与传统方法直接采用LSTM最后一个时刻的输出不同,该模型增加了一个Attention层,每个时刻的隐状态输出都有一个自适应权重,然后线性加权得到最终特征向量。论文中实现的是两层LSTM结构,而本代码实现的是带Attention的双向LSTM,Attention层可参考论文[AttentionCluster](https://arxiv.org/abs/1711.09550)。 +循环神经网络(RNN)常用于序列数据的处理,可建模视频连续多帧的时序信息,在视频分类领域为基础常用方法。该模型采用了双向长短时记忆网络(LSTM),将视频的所有帧特征依次编码。与传统方法直接采用LSTM最后一个时刻的输出不同,该模型增加了一个Attention层,每个时刻的隐状态输出都有一个自适应权重,然后线性加权得到最终特征向量。参考论文中实现的是两层LSTM结构,而本代码实现的是带Attention的双向LSTM,Attention层可参考论文[AttentionCluster](https://arxiv.org/abs/1711.09550)。 详细内容请参考[Beyond Short Snippets: Deep Networks for Video Classification](https://arxiv.org/abs/1503.08909)。 ## 数据准备 -AttentionLSTM模型使用2nd-Youtube-8M数据集,关于数据部分请参考[数据说明](../../dataset/README.md) +AttentionLSTM模型使用2nd-Youtube-8M数据集,关于数据部分请参考[数据说明](../../data/dataset/README.md) ## 模型训练 @@ -26,32 +26,42 @@ AttentionLSTM模型使用2nd-Youtube-8M数据集,关于数据部分请参考[ 数据准备完毕后,可以通过如下两种方式启动训练: - python train.py --model_name=AttentionLSTM - --config=./configs/attention_lstm.txt - --save_dir=checkpoints - --log_interval=10 - --valid_interval=1 + export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 + python train.py --model_name=AttentionLSTM \ + --config=./configs/attention_lstm.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False - bash scripts/train/train_attention_lstm.sh + bash run.sh train AttentionLSTM ./configs/attention_lstm.yaml - AttentionLSTM模型使用8卡Nvidia Tesla P40来训练的,总的batch size数是1024。 ### 使用预训练模型做finetune -请先将提供的[model](https://paddlemodels.bj.bcebos.com/video_classification/attention_lstm_youtube8m.tar.gz)下载到本地,并在上述脚本文件中添加`--resume`为所保存的预模型存放路径。 +请先将提供的[model](https://paddlemodels.bj.bcebos.com/video_classification/AttentionLSTM_final.pdparams)下载到本地,并在上述脚本文件中添加`--resume`为所保存的预训练模型存放路径。 ## 模型评估 可通过如下两种方式进行模型评估: - python test.py --model_name=AttentionLSTM - --config=configs/attention_lstm.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS + python eval.py --model_name=AttentionLSTM \ + --config=./configs/attention_lstm.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --use_gpu=True - bash scripts/test/test_attention_lstm.sh + bash run.sh eval AttentionLSTM ./configs/attention_lstm.yaml -- 使用`scripts/test/test_attention_LSTM.sh`进行评估时,需要修改脚本中的`--weights`参数指定需要评估的权重。 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/attention_lstm_youtube8m.tar.gz)进行评估 +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要评估的权重。 + +- 若未指定`weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/AttentionLSTM_final.pdparams)进行评估 + +- 评估结果以log的形式直接打印输出GAP、Hit@1等精度指标 + +- 使用CPU进行评估时,请将`use_gpu`设置为False + 模型参数列表如下: @@ -73,17 +83,28 @@ AttentionLSTM模型使用2nd-Youtube-8M数据集,关于数据部分请参考[ ## 模型推断 -可通过如下命令进行模型推断: +可通过如下两种方式启动模型推断: + + python predict.py --model_name=AttentionLSTM \ + --config=configs/attention_lstm.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --filelist=$FILELIST \ + --use_gpu=True + + bash run.sh predict AttentionLSTM ./configs/attention_lstm.yaml + +- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为data/dataset/youtube8m/infer.list。`--weights`参数为训练好的权重参数,如果不设置,程序会自动下载已训练好的权重。这两个参数如果不设置,请不要写在命令行,将会自动使用默 +认值。 + +- 使用`run.sh`进行评估时,请修改脚本中的`weights`参数指定需要用到的权重。 + +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/AttentionLSTM_final.pdparams)进行推断 - python infer.py --model_name=attention_lstm - --config=configs/attention_lstm.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS - --filelist=$FILELIST +- 模型推断结果以log的形式直接打印输出,可以看到每个测试样本的分类预测概率。 -- 模型推断结果存储于`AttentionLSTM_infer_result`中,通过`pickle`格式存储。 +- 使用CPU进行预测时,请将`use_gpu`设置为False -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/attention_lstm_youtube8m.tar.gz)进行推断 ## 参考论文 diff --git a/PaddleCV/PaddleVideo/models/attention_lstm/attention_lstm.py b/PaddleCV/PaddleVideo/models/attention_lstm/attention_lstm.py old mode 100755 new mode 100644 index febd558b..ede38b8e --- a/PaddleCV/PaddleVideo/models/attention_lstm/attention_lstm.py +++ b/PaddleCV/PaddleVideo/models/attention_lstm/attention_lstm.py @@ -53,35 +53,23 @@ class AttentionLSTM(ModelBase): 0.1) def build_input(self, use_pyreader): + self.feature_input = [] + for name, dim in zip(self.feature_names, self.feature_dims): + self.feature_input.append( + fluid.layers.data( + shape=[dim], lod_level=1, dtype='float32', name=name)) + if self.mode != 'infer': + self.label_input = fluid.layers.data( + shape=[self.num_classes], dtype='float32', name='label') + else: + self.label_input = None if use_pyreader: assert self.mode != 'infer', \ - 'pyreader is not recommendated when infer, please set use_pyreader to be false.' - shapes = [] - for dim in self.feature_dims: - shapes.append([-1, dim]) - shapes.append([-1, self.num_classes]) # label - self.py_reader = fluid.layers.py_reader( - capacity=1024, - shapes=shapes, - lod_levels=[1] * self.feature_num + [0], - dtypes=['float32'] * (self.feature_num + 1), - name='train_py_reader' - if self.is_training else 'test_py_reader', - use_double_buffer=True) - inputs = fluid.layers.read_file(self.py_reader) - self.feature_input = inputs[:self.feature_num] - self.label_input = inputs[-1] - else: - self.feature_input = [] - for name, dim in zip(self.feature_names, self.feature_dims): - self.feature_input.append( - fluid.layers.data( - shape=[dim], lod_level=1, dtype='float32', name=name)) - if self.mode == 'infer': - self.label_input = None - else: - self.label_input = fluid.layers.data( - shape=[self.num_classes], dtype='float32', name='label') + 'pyreader is not recommendated when infer, please set use_pyreader to be false.' + self.py_reader = fluid.io.PyReader( + feed_list=self.feature_input + [self.label_input], + capacity=8, + iterable=True) def build_model(self): att_outs = [] @@ -146,8 +134,23 @@ class AttentionLSTM(ModelBase): self.label_input ] + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.output, self.label_input] + elif self.mode == 'test': + losses = self.loss() + fetch_list = [losses, self.output, self.label_input] + elif self.mode == 'infer': + fetch_list = [self.output] + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + return fetch_list + def weights_info(self): return ( - 'attention_lstm_youtube8m', - 'https://paddlemodels.bj.bcebos.com/video_classification/attention_lstm_youtube8m.tar.gz' + 'AttentionLSTM_final.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/AttentionLSTM_final.pdparams' ) diff --git a/PaddleCV/PaddleVideo/models/attention_lstm/lstm_attention.py b/PaddleCV/PaddleVideo/models/attention_lstm/lstm_attention.py old mode 100755 new mode 100644 diff --git a/PaddleCV/PaddleVideo/models/ctcn/README.md b/PaddleCV/PaddleVideo/models/ctcn/README.md index 4d247d76..bded2862 100644 --- a/PaddleCV/PaddleVideo/models/ctcn/README.md +++ b/PaddleCV/PaddleVideo/models/ctcn/README.md @@ -13,29 +13,35 @@ ## 模型简介 -C-TCN动作定位模型是百度自研,2018年ActivityNet夺冠方案,在Paddle上首次开源,为开发者提供了处理视频动作定位问题的解决方案。此模型引入了concept-wise时间卷积网络,对每个concept先用卷积神经网络分别提取时间维度的信息,然后再将每个concept的信息进行组合。主体结构是残差网络+FPN,采用类似SSD的单阶段目标检测算法对时间维度的anchor box进行预测和分类。 +C-TCN动作定位模型是百度自研,2018年ActivityNet夺冠方案,在PaddlePaddle上首次开源,为开发者提供了处理视频动作定位问题的解决方案。此模型引入了concept-wise时间卷积网络,对每个concept先用卷积神经网络分别提取时间维度的信息,然后再将每个concept的信息进行组合。主体结构是残差网络+FPN,采用类似SSD的单阶段目标检测算法对时间维度的anchor box进行预测和分类。 ## 数据准备 -C-TCN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准备请参考[数据说明](../../dataset/ctcn/README.md) +C-TCN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准备请参考[数据说明](../../data/dataset/ctcn/README.md) ## 模型训练 数据准备完毕后,可以通过如下两种方式启动训练: - python train.py --model_name=CTCN - --config=./configs/ctcn.txt - --save_dir=checkpoints - --log_interval=10 - --valid_interval=1 - --pretrain=${path_to_pretrain_model} + export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 + export FLAGS_fast_eager_deletion_mode=1 + export FLAGS_eager_delete_tensor_gb=0.0 + export FLAGS_fraction_of_gpu_memory_to_use=0.98 + python train.py --model_name=CTCN \ + --config=./configs/ctcn.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False \ + --pretrain=$PATH_TO_PRETRAIN_MODEL - bash scripts/train/train_ctcn.sh + bash run.sh train CTCN ./configs/ctcn.yaml -- 从头开始训练,使用上述启动脚本程序即可启动训练,不需要用到预训练模型 +- 从头开始训练,使用上述启动命令行或者脚本程序即可启动训练,不需要用到预训练模型 -- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_detection/ctcn.tar.gz)通过`--resume`指定权重存放路径进行finetune等开发 +- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_detection/CTCN_final.pdparams)通过`--resume`指定权重存放路径进行finetune等开发 **训练策略:** @@ -48,18 +54,22 @@ C-TCN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准 可通过如下两种方式进行模型评估: - python test.py --model_name=CTCN - --config=configs/ctcn.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS + python eval.py --model_name=CTCN \ + --config=./configs/ctcn.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --use_gpu=True - bash scripts/test/test_ctcn.sh + bash run.sh eval CTCN ./configs/ctcn.yaml -- 使用`scripts/test/test_ctcn.sh`进行评估时,需要修改脚本中的`--weights`参数指定需要评估的权重。 +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要评估的权重。 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_detection/ctcn.tar.gz)进行评估 +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_detection/CTCN_final.pdparams)进行评估 + +- 运行上述程序会将测试结果保存在json文件中,默认存储在data/evaluate\_results目录下,程序根据所使用的超参数自动生成文件名,例如:CTCN\_test\_res\_decode\_0.001\_0.8\_0.9\_0.004.json。使用ActivityNet官方提供的测试脚本,即可计算MAP。具体计算过程请参考[指标计算](../../metrics/detections/README.md) + +- 使用CPU进行评估时,请将上面的命令行或者run.sh脚本中的`use_gpu`设置为False -- 运行上述程序会将测试结果保存在json文件中,使用ActivityNet官方提供的测试脚本,即可计算MAP。具体计算过程请参考[指标计算](../../metrics/detections/README.md) 当取如下参数时,在ActivityNet1.3数据集下评估精度如下: @@ -70,17 +80,28 @@ C-TCN的训练数据采用ActivityNet1.3提供的数据集,数据下载及准 ## 模型推断 -可通过如下命令进行模型推断: +可通过如下两种方式启动模型推断: + + python predict.py --model_name=CTCN \ + --config=./configs/ctcn.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --filelist=$FILELIST \ + --use_gpu=True + + bash run.sh predict CTCN ./configs/ctcn.yaml + +- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为data/dataset/youtube8m/infer.list。`--weights`参数为训练好的权重参数,如果不设置,程序会自动下载已训练好的权重。这两个参数如果不设置,请不要写在命令行,将会自动使用默 +认值。 + +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要用到的权重。 + +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_detection/CTCN_final.pdparams)进行推断 - python infer.py --model_name=CTCN - --config=configs/ctcn.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS - --filelist=$FILELIST -- 模型推断结果存储于`CTCN_infer_result.pkl`中,通过`pickle`格式存储。 +- 模型推断结果存储于json文件中,默认存储在`data/dataset/inference_results`目录下,程序根据所使用的超参数自动生成文件名,例如:CTCN\_infer\_res\_decode\_0.001\_0.8\_0.9\_0.004.json。同时也会以log的形式打印输出,显示每个视频的预测片段起止时间和类别 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_detection/ctcn.tar.gz)进行推断 +- 使用CPU进行推断时,请将命令行或者run.sh脚本中的`use_gpu`设置为False ## 参考论文 diff --git a/PaddleCV/PaddleVideo/models/ctcn/ctcn.py b/PaddleCV/PaddleVideo/models/ctcn/ctcn.py index 7c618660..3fa6e55a 100644 --- a/PaddleCV/PaddleVideo/models/ctcn/ctcn.py +++ b/PaddleCV/PaddleVideo/models/ctcn/ctcn.py @@ -63,54 +63,42 @@ class CTCN(ModelBase): loc_targets = None cls_targets = None fileid = None + + image = fluid.layers.data( + name='image', shape=image_shape, dtype='float32') + + feed_list = [] + feed_list.append(image) + if (self.mode == 'train') or (self.mode == 'valid'): + loc_targets = fluid.layers.data( + name='loc_targets', shape=loc_shape, dtype='float32') + cls_targets = fluid.layers.data( + name='cls_targets', shape=cls_shape, dtype='int64') + feed_list.append(loc_targets) + feed_list.append(cls_targets) + elif self.mode == 'test': + loc_targets = fluid.layers.data( + name='loc_targets', shape=loc_shape, dtype='float32') + cls_targets = fluid.layers.data( + name='cls_targets', shape=cls_shape, dtype='int64') + fileid = fluid.layers.data( + name='fileid', shape=fileid_shape, dtype='int64') + feed_list.append(loc_targets) + feed_list.append(cls_targets) + feed_list.append(fileid) + elif self.mode == 'infer': + # only image feature input when inference + pass + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + if use_pyreader: assert self.mode != 'infer', \ 'pyreader is not recommendated when infer, please set use_pyreader to be false.' - if (self.mode == 'train') or (self.mode == 'valid'): - py_reader = fluid.layers.py_reader( - capacity=100, - shapes=[[-1] + image_shape, [-1] + loc_shape, - [-1] + cls_shape], - dtypes=['float32', 'float32', 'int64'], - name='train_py_reader' - if self.is_training else 'test_py_reader', - use_double_buffer=True) - image, loc_targets, cls_targets = fluid.layers.read_file( - py_reader) - elif self.mode == 'test': - py_reader = fluid.layers.py_reader( - capacity=100, - shapes=[[-1] + image_shape, [-1] + loc_shape, [-1] + - cls_shape] + [-1, 1], - dtypes=['float32', 'float32', 'int64', 'int64'], - use_double_buffer=True) - image, loc_targets, cls_targets, fileid = fluid.layers.read_file( - pyreader) - else: - raise NotImplementedError('mode {} not implemented'.format( - self.mode)) - self.py_reader = py_reader - else: - image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - if (self.mode == 'train') or (self.mode == 'valid'): - loc_targets = fluid.layers.data( - name='loc_targets', shape=loc_shape, dtype='float32') - cls_targets = fluid.layers.data( - name='cls_targets', shape=cls_shape, dtype='int64') - elif self.mode == 'test': - loc_targets = fluid.layers.data( - name='loc_targets', shape=loc_shape, dtype='float32') - cls_targets = fluid.layers.data( - name='cls_targets', shape=cls_shape, dtype='int64') - fileid = fluid.layers.data( - name='fileid', shape=fileid_shape, dtype='int64') - elif self.mode == 'infer': - # only image feature input when inference - pass - else: - raise NotImplementedError('mode {} not implemented'.format( - self.mode)) + self.py_reader = fluid.io.PyReader( + feed_list=feed_list, capacity=4, iterable=True) + self.feature_input = [image] self.cls_targets = cls_targets self.loc_targets = loc_targets @@ -170,10 +158,32 @@ class CTCN(ModelBase): elif self.mode == 'infer': return self.feature_input else: - raise NotImplemented + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + def fetches(self): + if (self.mode == 'train') or (self.mode == 'valid'): + losses = self.loss() + fetch_list = [item for item in losses] + elif self.mode == 'test': + losses = self.loss() + preds = self.outputs() + fetch_list = [item for item in losses] + \ + [item for item in preds] + \ + [self.fileid] + elif self.mode == 'infer': + preds = self.outputs() + fetch_list = [item for item in preds] + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + return fetch_list def pretrain_info(self): return (None, None) def weights_info(self): - return (None, None) + return ( + 'CTCN_final.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_detection/CTCN_final.pdparams' + ) diff --git a/PaddleCV/PaddleVideo/models/model.py b/PaddleCV/PaddleVideo/models/model.py index 53a45940..ddce3968 100644 --- a/PaddleCV/PaddleVideo/models/model.py +++ b/PaddleCV/PaddleVideo/models/model.py @@ -13,6 +13,7 @@ #limitations under the License. import os +import wget import logging try: from configparser import ConfigParser @@ -90,6 +91,10 @@ class ModelBase(object): "get feed inputs list" raise NotImplementError(self, self.feeds) + def fetches(self): + "get fetch list of model" + raise NotImplementError(self, self.fetches) + def weights_info(self): "get model weight default path and download url" raise NotImplementError(self, self.weights_info) @@ -98,11 +103,15 @@ class ModelBase(object): "get model weight file path, download weight from Paddle if not exist" path, url = self.weights_info() path = os.path.join(WEIGHT_DIR, path) + if not os.path.isdir(WEIGHT_DIR): + logger.info('{} not exists, will be created automatically.'.format( + WEIGHT_DIR)) + os.makedirs(WEIGHT_DIR) if os.path.exists(path): return path logger.info("Download weights of {} from {}".format(self.name, url)) - download(url, path) + wget.download(url, path) return path def pyreader(self): @@ -123,6 +132,10 @@ class ModelBase(object): return None path = os.path.join(WEIGHT_DIR, path) + if not os.path.isdir(WEIGHT_DIR): + logger.info('{} not exists, will be created automatically.'.format( + WEIGHT_DIR)) + os.makedirs(WEIGHT_DIR) if os.path.exists(path): return path @@ -136,7 +149,7 @@ class ModelBase(object): fluid.io.load_params(exe, pretrain, main_program=prog) def load_test_weights(self, exe, weights, prog, place): - fluid.io.load_params(exe, weights, main_program=prog) + fluid.io.load_params(exe, '', main_program=prog, filename=weights) def get_config_from_sec(self, sec, item, default=None): if sec.upper() not in self.cfg: diff --git a/PaddleCV/PaddleVideo/models/nextvlad/README.md b/PaddleCV/PaddleVideo/models/nextvlad/README.md index cef445b2..bde66d01 100644 --- a/PaddleCV/PaddleVideo/models/nextvlad/README.md +++ b/PaddleCV/PaddleVideo/models/nextvlad/README.md @@ -18,18 +18,30 @@ NeXtVLAD模型是第二届Youtube-8M视频理解竞赛中效果最好的单模 ## 数据准备 -NeXtVLAD模型使用2nd-Youtube-8M数据集, 数据下载及准备请参考[数据说明](../../dataset/README.md) +NeXtVLAD模型使用2nd-Youtube-8M数据集, 数据下载及准备请参考[数据说明](../../data/dataset/README.md) ## 模型训练 ### 随机初始化开始训练 -在video目录下运行如下脚本即可 - bash ./scripts/train/train_nextvlad.sh +在video目录下可以通过如下两种方式启动训练: + + export CUDA_VISIBLE_DEVICES=0,1,2,3 + python train.py --model_name=NEXTVLAD \ + --config=./configs/nextvlad.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False + + bash run.sh train NEXTVLAD ./configs/nextvlad.yaml + +- 在训练NeXtVLAD模型时使用的是4卡,请修改run.sh中的CUDA\_VISIBLE\_DEVICES=0,1,2,3 ### 使用预训练模型做finetune -请先将提供的预训练模型[model](https://paddlemodels.bj.bcebos.com/video_classification/nextvlad_youtube8m.tar.gz)下载到本地,并在上述脚本文件中添加--resume为所保存的预模型存放路径。 +请先将提供的预训练模型[model](https://paddlemodels.bj.bcebos.com/video_classification/NEXTVLAD_final.pdparams)下载到本地,并在上述脚本文件中添加--resume为所保存的模型参数存放路径。 使用4卡Nvidia Tesla P40,总的batch size数是160。 @@ -41,10 +53,23 @@ NeXtVLAD模型使用2nd-Youtube-8M数据集, 数据下载及准备请参考[数 ## 模型评估 -用户可以下载的预训练模型参数,或者使用自己训练好的模型参数,请在./scripts/test/test\_nextvald.sh -文件中修改--weights参数为保存模型参数的目录。运行 +可通过如下两种方式进行模型评估: + + python eval.py --model_name=NEXTVLAD \ + --config=./configs/nextvlad.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --use_gpu=True + + bash run.sh eval NEXTVLAD ./configs/nextvlad.yaml + +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要评估的权重。 + +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/NEXTVLAD_final.pdparams)进行评估 - bash ./scripts/test/test_nextvlad.sh +- 评估结果以log的形式直接打印输出GAP、Hit@1等精度指标 + +- 使用CPU进行评估时,请将`use_gpu`设置为False 由于youtube-8m提供的数据中test数据集是没有ground truth标签的,所以这里使用validation数据集来做测试。 @@ -69,12 +94,28 @@ NeXtVLAD模型使用2nd-Youtube-8M数据集, 数据下载及准备请参考[数 ## 模型推断 -用户可以下载的预训练模型参数,或者使用自己训练好的模型参数,请在./scripts/infer/infer\_nextvald.sh -文件中修改--weights参数为保存模型参数的目录,运行如下脚本 +可通过如下两种方式启动模型推断: + + python predict.py --model_name=NEXTVLAD \ + --config=configs/nextvlad.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --filelist=$FILELIST \ + --use_gpu=True + + bash run.sh predict NEXTVLAD ./configs/nextvlad.yaml + +- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为data/dataset/youtube8m/infer.list。`--weights`参数为训练好的权重参数,如果不设置,程序会自动下载已训练好的权重。这两个参数如果不设置,请不要写在命令行,将会自动使用默 +认值。 + +- 使用`run.sh`进行评估时,请修改脚本中的`weights`参数指定需要用到的权重。 + +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/NEXTVLAD_final.pdparams)进行推断 + +- 模型推断结果以log的形式直接打印输出,可以看到每个测试样本的分类预测概率。 - bash ./scripts/infer/infer_nextvald.sh +- 使用CPU进行预测时,请将`use_gpu`设置为False -推断结果会保存在NEXTVLAD\_infer\_result文件中,通过pickle格式存储。 ## 参考论文 diff --git a/PaddleCV/PaddleVideo/models/nextvlad/nextvlad.py b/PaddleCV/PaddleVideo/models/nextvlad/nextvlad.py index d0935352..295165fc 100644 --- a/PaddleCV/PaddleVideo/models/nextvlad/nextvlad.py +++ b/PaddleCV/PaddleVideo/models/nextvlad/nextvlad.py @@ -65,38 +65,30 @@ class NEXTVLAD(ModelBase): rgb_shape = [self.video_feature_size] audio_shape = [self.audio_feature_size] label_shape = [self.num_classes] + + rgb = fluid.layers.data( + name='train_rgb' if self.is_training else 'test_rgb', + shape=rgb_shape, + dtype='uint8', + lod_level=1) + audio = fluid.layers.data( + name='train_audio' if self.is_training else 'test_audio', + shape=audio_shape, + dtype='uint8', + lod_level=1) + if self.mode == 'infer': + label = None + else: + label = fluid.layers.data( + name='train_label' if self.is_training else 'test_label', + shape=label_shape, + dtype='float32') + if use_pyreader: assert self.mode != 'infer', \ - 'pyreader is not recommendated when infer, please set use_pyreader to be false.' - py_reader = fluid.layers.py_reader( - capacity=100, - shapes=[[-1] + rgb_shape, [-1] + audio_shape, - [-1] + label_shape], - lod_levels=[1, 1, 0], - dtypes=['uint8', 'uint8', 'float32'], - name='train_py_reader' - if self.is_training else 'test_py_reader', - use_double_buffer=True) - rgb, audio, label = fluid.layers.read_file(py_reader) - self.py_reader = py_reader - else: - rgb = fluid.layers.data( - name='train_rgb' if self.is_training else 'test_rgb', - shape=rgb_shape, - dtype='uint8', - lod_level=1) - audio = fluid.layers.data( - name='train_audio' if self.is_training else 'test_audio', - shape=audio_shape, - dtype='uint8', - lod_level=1) - if self.mode == 'infer': - label = None - else: - label = fluid.layers.data( - name='train_label' if self.is_training else 'test_label', - shape=label_shape, - dtype='float32') + 'pyreader is not recommendated when infer, please set use_pyreader to be false.' + self.py_reader = fluid.io.PyReader( + feed_list=[rgb, audio, label], capacity=8, iterable=True) self.feature_input = [rgb, audio] self.label_input = label @@ -174,10 +166,25 @@ class NEXTVLAD(ModelBase): self.label_input ] + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.predictions, self.label_input] + elif self.mode == 'test': + losses = self.loss() + fetch_list = [losses, self.predictions, self.label_input] + elif self.mode == 'infer': + fetch_list = [self.predictions] + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + return fetch_list + def weights_info(self): return ( - 'nextvlad_youtube8m', - 'https://paddlemodels.bj.bcebos.com/video_classification/nextvlad_youtube8m.tar.gz' + 'NEXTVLAD_final.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/NEXTVLAD_final.pdparams' ) diff --git a/PaddleCV/PaddleVideo/models/nonlocal_model/README.md b/PaddleCV/PaddleVideo/models/nonlocal_model/README.md index a886d11a..c7eb2624 100644 --- a/PaddleCV/PaddleVideo/models/nonlocal_model/README.md +++ b/PaddleCV/PaddleVideo/models/nonlocal_model/README.md @@ -82,23 +82,27 @@ g(Xj)是对输入feature map做一个线性变换,使用1x1x1的卷积;theta ## 数据准备 -Non-local模型的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考Non-local模型的[数据说明](../../dataset/nonlocal/README.md) +Non-local模型的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考Non-local模型的[数据说明](../../data/dataset/nonlocal/README.md) ## 模型训练 数据准备完毕后,可以通过如下两种方式启动训练: - python train.py --model_name=NONLOCAL - --config=./configs/nonlocal.txt - --save_dir=checkpoints - --log_interval=10 - --valid_interval=1 - --pretrain=${path_to_pretrain_model} - bash scripts/train/train_nonlocal.sh + export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 + python train.py --model_name=NONLOCAL \ + --config=./configs/nonlocal.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False \ + --pretrain=$PATH_TO_PRETRAIN_MODEL -- 从头开始训练,需要加载在ImageNet上训练的ResNet50权重作为初始化参数(该模型参数转自Caffe2)。请下载此[模型参数](https://paddlemodels.bj.bcebos.com/video_classification/Nonlocal_ResNet50_pretrained.tar.gz)并解压,将上面启动脚本中的path\_to\_pretrain\_model设置为解压之后的模型参数存放路径。如果没有手动下载并设置path\_to\_pretrain\_model,则程序会自动下载并将参数保存在~/.paddle/weights/Nonlocal\_ResNet50\_pretrained目录下面 + bash run.sh train NONLOCAL ./configs/nonlocal.yaml -- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/nonlocal_kinetics.tar.gz)通过`--resume`指定权重存放路径进行finetune等开发 +- 从头开始训练,需要加载在ImageNet上训练的ResNet50权重作为初始化参数(该模型参数转自Caffe2)。请下载此[模型参数](https://paddlemodels.bj.bcebos.com/video_classification/Nonlocal_ResNet50_pretrained.tar.gz)并解压,将上面启动命令行或者run.sh脚本中的`pretrain`参数设置为解压之后的模型参数存放路径。如果没有手动下载并设置`pretrain`参数,则程序会自动下载并将参数保存在~/.paddle/weights/Nonlocal\_ResNet50\_pretrained目录下面 + +- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/NONLOCAL_final.pdparams)通过`--resume`指定权重存放路径进行finetune等开发 **数据读取器说明:** 模型读取Kinetics-400数据集中的`mp4`数据,根据视频长度和采样频率随机选取起始帧的位置,每个视频抽取`video_length`帧图像,对每帧图像做随机增强,短边缩放至[256, 320]之间的某个随机数,长边根据长宽比计算出来,然后再截取出224x224的区域作为训练数据输入网络。 @@ -115,16 +119,22 @@ Non-local模型的训练数据采用由DeepMind公布的Kinetics-400动作识别 可通过如下两种方式进行模型评估: - python test.py --model_name=NONLOCAL - --config=configs/nonlocal.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS + python eval.py --model_name=NONLOCAL \ + --config=./configs/nonlocal.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --use_gpu=True + + bash run.sh eval NONLOCAL ./configs/nonlocal.yaml + +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要评估的权重。 + +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/NONLOCAL_final.pdparams)进行评估 - bash scripts/test/test_nonlocal.sh +- 评估结果以log的形式直接打印输出TOP1\_ACC、TOP5\_ACC等精度指标 -- 使用`scripts/test/test_nonlocal.sh`进行评估时,需要修改脚本中的`--weights`参数指定需要评估的权重。 +- 使用CPU进行评估时,请将上面的命令行或者run.sh脚本中的`use_gpu`设置为False -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/nonlocal_kinetics.tar.gz)进行评估 实现了C2D-ResNet50, C2D-ResNet101, I3D-ResNet50三种网络结构,在Kinetics400的validation数据集下评估精度如下: @@ -141,18 +151,27 @@ Non-local模型的训练数据采用由DeepMind公布的Kinetics-400动作识别 ## 模型推断 -可通过如下命令进行模型推断: +可通过如下两种方式启动模型推断: + + python predict.py --model_name=NONLOCAL \ + --config=./configs/nonlocal.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --filelist=$FILELIST \ + --use_gpu=True \ + --video_path=$VIDEO_PATH + + bash run.sh predict NONLOCAL ./configs/nonlocal.yaml + +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要用到的权重。 - python infer.py --model_name=NONLOCAL - --config=configs/nonlocal.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS - --filelist=$FILELIST +- 如果video\_path为'', 则忽略掉此参数。如果video\_path != '',则程序会对video\_path指定的视频文件进行预测,而忽略掉filelist的值,预测结果为此视频的分类概率。 -- 模型推断结果存储于`NONLOCAL_infer_result`中,通过`pickle`格式存储。 +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/NONLOCAL_final.pdparams)进行推断 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/nonlocal_kinetics.tar.gz)进行推断 +- 模型推断结果以log的形式直接打印输出,可以看到测试样本的分类预测概率。 +- 使用CPU进行推断时,请将命令行或者run.sh脚本中的`use_gpu`设置为False ## 参考论文 diff --git a/PaddleCV/PaddleVideo/models/nonlocal_model/nonlocal_model.py b/PaddleCV/PaddleVideo/models/nonlocal_model/nonlocal_model.py index cdb366eb..5555ee46 100644 --- a/PaddleCV/PaddleVideo/models/nonlocal_model/nonlocal_model.py +++ b/PaddleCV/PaddleVideo/models/nonlocal_model/nonlocal_model.py @@ -42,31 +42,25 @@ class NonLocal(ModelBase): def build_input(self, use_pyreader=True): input_shape = [3, self.video_length, self.crop_size, self.crop_size] label_shape = [1] - py_reader = None + + data = fluid.layers.data( + name='train_data' if self.is_training else 'test_data', + shape=input_shape, + dtype='float32') + if self.mode != 'infer': + label = fluid.layers.data( + name='train_label' if self.is_training else 'test_label', + shape=label_shape, + dtype='int64') + else: + label = None + if use_pyreader: assert self.mode != 'infer', \ 'pyreader is not recommendated when infer, please set use_pyreader to be false.' - py_reader = fluid.layers.py_reader( - capacity=20, - shapes=[[-1] + input_shape, [-1] + label_shape], - dtypes=['float32', 'int64'], - name='train_py_reader' - if self.is_training else 'test_py_reader', - use_double_buffer=True) - data, label = fluid.layers.read_file(py_reader) - self.py_reader = py_reader - else: - data = fluid.layers.data( - name='train_data' if self.is_training else 'test_data', - shape=input_shape, - dtype='float32') - if self.mode != 'infer': - label = fluid.layers.data( - name='train_label' if self.is_training else 'test_label', - shape=label_shape, - dtype='int64') - else: - label = None + self.py_reader = fluid.io.PyReader( + feed_list=[data, label], capacity=4, iterable=True) + self.feature_input = [data] self.label_input = label @@ -119,6 +113,20 @@ class NonLocal(ModelBase): return self.feature_input if self.mode == 'infer' else \ self.feature_input + [self.label_input] + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'test': + fetch_list = [self.network_outputs[0], self.label_input] + elif self.mode == 'infer': + fetch_list = self.network_outputs + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + return fetch_list + def pretrain_info(self): return ( 'Nonlocal_ResNet50_pretrained', @@ -126,7 +134,10 @@ class NonLocal(ModelBase): ) def weights_info(self): - pass + return ( + 'NONLOCAL_final.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/NONLOCAL_final.pdparams' + ) def load_pretrain_params(self, exe, pretrain, prog, place): load_params_from_file(exe, prog, pretrain, place) diff --git a/PaddleCV/PaddleVideo/models/stnet/README.md b/PaddleCV/PaddleVideo/models/stnet/README.md index 6771cde1..d8faecc1 100644 --- a/PaddleCV/PaddleVideo/models/stnet/README.md +++ b/PaddleCV/PaddleVideo/models/stnet/README.md @@ -24,24 +24,28 @@ StNet Framework Overview ## 数据准备 -StNet的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](../../dataset/README.md) +StNet的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](../../data/dataset/README.md) ## 模型训练 数据准备完毕后,可以通过如下两种方式启动训练: - python train.py --model_name=STNET - --config=./configs/stnet.txt - --save_dir=checkpoints - --log_interval=10 - --valid_interval=1 - --pretrain=${path_to_pretrain_model} + export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 + python train.py --model_name=STNET \ + --config=./configs/stnet.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False + --pretrain=$PATH_TO_PRETRAIN_MODEL - bash scripts/train/train_stnet.sh + bash run.sh train STNET ./configs/stnet.yaml -- 从头开始训练,需要加载在ImageNet上训练的ResNet50权重作为初始化参数,请下载此[模型参数](https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz)并解压,将上面启动脚本中的path\_to\_pretrain\_model设置为解压之后的模型参数存放路径。如果没有手动下载并设置path\_to\_pretrain\_model,则程序会自动下载并将参数保存在~/.paddle/weights/ResNet50\_pretrained目录下面 +- 从头开始训练,需要加载在ImageNet上训练的ResNet50权重作为初始化参数,请下载此[模型参数](https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz)并解压,将上面启动命令行或者run.sh脚本中的`pretrain`参数设置为解压之后的模型参数存放路径。如果没有手动下载并设置`pretrain`参数,则程序会自动下载并将参数保存在~/.paddle/weights/ResNet50\_pretrained目录下面 + +- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/STNET_final.pdparams)通过`--resume`指定权重存放路径进行finetune等开发 -- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/stnet_kinetics.tar.gz)通过`--resume`指定权重存放路径进行finetune等开发 **数据读取器说明:** 模型读取Kinetics-400数据集中的`mp4`数据,每条数据抽取`seg_num`段,每段抽取`seg_len`帧图像,对每帧图像做随机增强后,缩放至`target_size`。 @@ -51,30 +55,27 @@ StNet的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集 * 权重衰减系数为1e-4 * 学习率在训练的总epoch数的1/3和2/3时分别做0.1的衰减 -**备注:** - -* 在训练StNet模型时使用PaddlePaddle Fluid 1.3 + cudnn5.1。使用cudnn7.0以上版本时batchnorm计算moving mean和moving average会出现异常,此问题还在修复中。建议用户安装PaddlePaddle时指定cudnn版本, - pip install paddlepaddle\_gpu==1.3.0.post85 +## 模型评估 -或者在PaddlePaddle的whl包[下载页面](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/Tables.html/#permalink-4--whl-release)选择下载cuda8.0\_cudnn5\_avx\_mkl对应的whl包安装。 -关于安装PaddlePaddle的详细操作请参考[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html)。 +可通过如下两种方式进行模型评估: + python eval.py --model_name=STNET \ + --config=./configs/stnet.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --use_gpu=True -## 模型评估 + bash run.sh eval STNET ./configs/stnet.yaml -可通过如下两种方式进行模型评估: +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要评估的权重。 - python test.py --model_name=STNET - --config=configs/stnet.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS +- 若未指定`weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/STNET_final.pdparams)进行评估 - bash scripts/test/test__stnet.sh +- 评估结果以log的形式直接打印输出TOP1\_ACC、TOP5\_ACC等精度指标 -- 使用`scripts/test/test_stnet.sh`进行评估时,需要修改脚本中的`--weights`参数指定需要评估的权重。 +- 使用CPU进行评估时,请将上面的命令行或者run.sh脚本中的`use_gpu`设置为False -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/stnet_kinetics.tar.gz)进行评估 当取如下参数时: @@ -93,17 +94,27 @@ StNet的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集 ## 模型推断 -可通过如下命令进行模型推断: +可通过如下两种方式启动模型推断: + + python predict.py --model_name=STNET \ + --config=./configs/stnet.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --filelist=$FILELIST \ + --use_gpu=True \ + --video_path=$VIDEO_PATH + + bash run.sh predict STNET ./configs/stnet.yaml + +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要用到的权重。 + +- 如果video\_path为'', 则忽略掉此参数。如果video\_path != '',则程序会对video\_path指定的视频文件进行预测,而忽略掉filelist的值,预测结果为此视频的分类概率。 - python infer.py --model_name=stnet - --config=configs/stnet.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS - --filelist=$FILELIST +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/STNET_final.pdparams)进行推断 -- 模型推断结果存储于`STNET_infer_result`中,通过`pickle`格式存储。 +- 模型推断结果以log的形式直接打印输出,可以看到测试样本的分类预测概率。 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/stnet_kinetics.tar.gz)进行推断 +- 使用CPU进行推断时,请将命令行或者run.sh脚本中的`use_gpu`设置为False ## 参考论文 diff --git a/PaddleCV/PaddleVideo/models/stnet/stnet.py b/PaddleCV/PaddleVideo/models/stnet/stnet.py index 8e93aa4b..c257df1f 100644 --- a/PaddleCV/PaddleVideo/models/stnet/stnet.py +++ b/PaddleCV/PaddleVideo/models/stnet/stnet.py @@ -56,26 +56,21 @@ class STNET(ModelBase): image_shape[0] = image_shape[0] * self.seglen image_shape = [self.seg_num] + image_shape self.use_pyreader = use_pyreader + + image = fluid.layers.data( + name='image', shape=image_shape, dtype='float32') + if self.mode != 'infer': + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + else: + label = None + if use_pyreader: assert self.mode != 'infer', \ 'pyreader is not recommendated when infer, please set use_pyreader to be false.' - py_reader = fluid.layers.py_reader( - capacity=100, - shapes=[[-1] + image_shape, [-1] + [1]], - dtypes=['float32', 'int64'], - name='train_py_reader' - if self.is_training else 'test_py_reader', - use_double_buffer=True) - image, label = fluid.layers.read_file(py_reader) + py_reader = fluid.io.PyReader( + feed_list=[image, label], capacity=4, iterable=True) self.py_reader = py_reader - else: - image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - if self.mode != 'infer': - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - else: - label = None + self.feature_input = [image] self.label_input = label @@ -127,6 +122,21 @@ class STNET(ModelBase): self.label_input ] + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'test': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'infer': + fetch_list = self.network_outputs + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + return fetch_list + def pretrain_info(self): return ( 'ResNet50_pretrained', @@ -135,8 +145,8 @@ class STNET(ModelBase): def weights_info(self): return ( - 'stnet_kinetics', - 'https://paddlemodels.bj.bcebos.com/video_classification/stnet_kinetics.tar.gz' + 'STNET_final.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/STNET_final.pdparams' ) def load_pretrain_params(self, exe, pretrain, prog, place): diff --git a/PaddleCV/PaddleVideo/models/tsm/README.md b/PaddleCV/PaddleVideo/models/tsm/README.md index 1e35d29b..0364799e 100644 --- a/PaddleCV/PaddleVideo/models/tsm/README.md +++ b/PaddleCV/PaddleVideo/models/tsm/README.md @@ -28,27 +28,30 @@ TSM模型是将Temporal Shift Module插入到ResNet网络中构建的视频分 ## 数据准备 -TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](../../dataset/README.md) +TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](../../data/dataset/README.md) ## 模型训练 数据准备完毕后,可以通过如下两种方式启动训练: + export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_fast_eager_deletion_mode=1 export FLAGS_eager_delete_tensor_gb=0.0 export FLAGS_fraction_of_gpu_memory_to_use=0.98 - python train.py --model_name=TSM - --config=./configs/tsm.txt - --save_dir=checkpoints - --log_interval=10 - --valid_interval=1 - --pretrain=${path_to_pretrain_model} + python train.py --model_name=TSM \ + --config=./configs/tsm.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False \ + --pretrain=$PATH_TO_PRETRAIN_MODEL - bash scripts/train/train_tsm.sh + bash run.sh train TSM ./configs/tsm.yaml -- 从头开始训练,需要加载在ImageNet上训练的ResNet50权重作为初始化参数,请下载此[模型参数](https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz)并解压,将上面启动脚本中的path\_to\_pretrain\_model设置为解压之后的模型参数存放路径。如果没有手动下载并设置path\_to\_pretrain\_model,则程序会自动下载并将参数保存在~/.paddle/weights/ResNet50\_pretrained目录下面 +- 从头开始训练,需要加载在ImageNet上训练的ResNet50权重作为初始化参数,请下载此[模型参数](https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz)并解压,将上面启动命令行或者run.sh脚本中的`pretrain`参数设置为解压之后的模型参数存放路径。如果没有手动下载并设置`pretrain`参数,则程序会自动下载并将参数保存在~/.paddle/weights/ResNet50\_pretrained目录下面 -- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz)通过`--resume`指定权重存放路径进行finetune等开发 +- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/TSM_final.pdparams)通过`--resume`指定权重存放路径进行finetune等开发 **数据读取器说明:** 模型读取Kinetics-400数据集中的`mp4`数据,每条数据抽取`seg_num`段,每段抽取1帧图像,对每帧图像做随机增强后,缩放至`target_size`。 @@ -61,16 +64,21 @@ TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。 可通过如下两种方式进行模型评估: - python test.py --model_name=TSM - --config=configs/tsm.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS + python eval.py --model_name=TSM \ + --config=./configs/tsm.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --use_gpu=True - bash scripts/test/test_tsm.sh + bash run.sh eval TSM ./configs/tsm.yaml -- 使用`scripts/test/test_tsm.sh`进行评估时,需要修改脚本中的`--weights`参数指定需要评估的权重。 +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要评估的权重。 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz)进行评估 +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/TSM_final.pdparams)进行评估 + +- 评估结果以log的形式直接打印输出TOP1\_ACC、TOP5\_ACC等精度指标 + +- 使用CPU进行评估时,请将上面的命令行或者run.sh脚本中的`use_gpu`设置为False 当取如下参数时,在Kinetics400的validation数据集下评估精度如下: @@ -80,17 +88,28 @@ TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。 ## 模型推断 -可通过如下命令进行模型推断: +可通过如下两种方式启动模型推断: + + python predict.py --model_name=TSM \ + --config=./configs/tsm.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --filelist=$FILELIST \ + --use_gpu=True \ + --video_path=$VIDEO_PATH + + bash run.sh predict TSM ./configs/tsm.yaml + +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要用到的权重。 + +- 如果video\_path为'', 则忽略掉此参数。如果video\_path != '',则程序会对video\_path指定的视频文件进行预测,而忽略掉filelist的值,预测结果为此视频的分类概率。 + +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/TSM_final.pdparams)进行推断 - python infer.py --model_name=TSM - --config=configs/tsm.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS - --filelist=$FILELIST +- 模型推断结果以log的形式直接打印输出,可以看到测试样本的分类预测概率。 -- 模型推断结果存储于`TSM_infer_result`中,通过`pickle`格式存储。 +- 使用CPU进行推断时,请将命令行或者run.sh脚本中的`use_gpu`设置为False -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz)进行推断 ## 参考论文 diff --git a/PaddleCV/PaddleVideo/models/tsm/tsm.py b/PaddleCV/PaddleVideo/models/tsm/tsm.py index 6e854c8c..bd69d0ef 100644 --- a/PaddleCV/PaddleVideo/models/tsm/tsm.py +++ b/PaddleCV/PaddleVideo/models/tsm/tsm.py @@ -56,26 +56,21 @@ class TSM(ModelBase): image_shape[0] = image_shape[0] * self.seglen image_shape = [self.seg_num] + image_shape self.use_pyreader = use_pyreader + + image = fluid.layers.data( + name='image', shape=image_shape, dtype='float32') + if self.mode != 'infer': + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + else: + label = None + if use_pyreader: assert self.mode != 'infer', \ 'pyreader is not recommendated when infer, please set use_pyreader to be false.' - py_reader = fluid.layers.py_reader( - capacity=100, - shapes=[[-1] + image_shape, [-1] + [1]], - dtypes=['float32', 'int64'], - name='train_py_reader' - if self.is_training else 'test_py_reader', - use_double_buffer=True) - image, label = fluid.layers.read_file(py_reader) + py_reader = fluid.io.PyReader( + feed_list=[image, label], capacity=4, iterable=True) self.py_reader = py_reader - else: - image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - if self.mode != 'infer': - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - else: - label = None + self.feature_input = [image] self.label_input = label @@ -121,6 +116,21 @@ class TSM(ModelBase): self.label_input ] + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'test': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'infer': + fetch_list = self.network_outputs + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + return fetch_list + def pretrain_info(self): return ( 'ResNet50_pretrained', @@ -129,8 +139,8 @@ class TSM(ModelBase): def weights_info(self): return ( - 'tsm_kinetics', - 'https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz' + 'TSM_final.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/TSM_final.pdparams' ) def load_pretrain_params(self, exe, pretrain, prog, place): diff --git a/PaddleCV/PaddleVideo/models/tsn/README.md b/PaddleCV/PaddleVideo/models/tsn/README.md index ebb40ebb..80ca3268 100644 --- a/PaddleCV/PaddleVideo/models/tsn/README.md +++ b/PaddleCV/PaddleVideo/models/tsn/README.md @@ -19,24 +19,32 @@ Temporal Segment Network (TSN) 是视频分类领域经典的基于2D-CNN的解 ## 数据准备 -TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](../../dataset/README.md) +TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](../../data/dataset/README.md) ## 模型训练 数据准备完毕后,可以通过如下两种方式启动训练: - python train.py --model_name=TSN - --config=./configs/tsn.txt - --save_dir=checkpoints - --log_interval=10 - --valid_interval=1 - --pretrain=${path_to_pretrain_model} + export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 + export FLAGS_fast_eager_deletion_mode=1 + export FLAGS_eager_delete_tensor_gb=0.0 + export FLAGS_fraction_of_gpu_memory_to_use=0.98 + python train.py --model_name=TSN \ + --config=./configs/tsn.yaml \ + --log_interval=10 \ + --valid_interval=1 \ + --use_gpu=True \ + --save_dir=./data/checkpoints \ + --fix_random_seed=False \ + --pretrain=$PATH_TO_PRETRAIN_MODEL - bash scripts/train/train_tsn.sh + bash run.sh train TSN ./configs/tsn.yaml -- 从头开始训练,需要加载在ImageNet上训练的ResNet50权重作为初始化参数,请下载此[模型参数](https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz)并解压,将上面启动脚本中的path\_to\_pretrain\_model设置为解压之后的模型参数存放路径。如果没有手动下载并设置path\_to\_pretrain\_model,则程序会自动下载并将参数保存在~/.paddle/weights/ResNet50\_pretrained目录下面 +- 从头开始训练,需要加载在ImageNet上训练的ResNet50权重作为初始化参数,请下载此[模型参数](https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz)并解压,将上面启动命令行或者run.sh脚本中的`pretrain`参数设置为解压之后的模型参数 +存放路径。如果没有手动下载并设置`pretrain`参数,则程序会自动下载并将参数保存在~/.paddle/weights/ResNet50\_pretrained目录下面 -- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsn_kinetics.tar.gz)通过`--resume`指定权重存放路径进行finetune等开发 +- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/TSN_final.pdparams)通过`--resume`指定权重存 +放路径进行finetune等开发 **数据读取器说明:** 模型读取Kinetics-400数据集中的`mp4`数据,每条数据抽取`seg_num`段,每段抽取1帧图像,对每帧图像做随机增强后,缩放至`target_size`。 @@ -50,16 +58,22 @@ TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。 可通过如下两种方式进行模型评估: - python test.py --model_name=TSN - --config=configs/tsn.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS + python eval.py --model_name=TSN \ + --config=./configs/tsn.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --use_gpu=True - bash scripts/test/test_tsn.sh + bash run.sh eval TSN ./configs/tsn.yaml -- 使用`scripts/test/test_tsn.sh`进行评估时,需要修改脚本中的`--weights`参数指定需要评估的权重。 +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要评估的权重 + +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/TSN_final.pdparams)进行评估 + +- 评估结果以log的形式直接打印输出TOP1\_ACC、TOP5\_ACC等精度指标 + +- 使用CPU进行评估时,请将上面的命令行或者run.sh脚本中的`use_gpu`设置为False -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsn_kinetics.tar.gz)进行评估 当取如下参数时,在Kinetics400的validation数据集下评估精度如下: @@ -70,17 +84,27 @@ TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。 ## 模型推断 -可通过如下命令进行模型推断: +可通过如下两种方式启动模型推断: + + python predict.py --model_name=TSN \ + --config=./configs/tsn.yaml \ + --log_interval=1 \ + --weights=$PATH_TO_WEIGHTS \ + --filelist=$FILELIST \ + --use_gpu=True \ + --video_path=$VIDEO_PATH + + bash run.sh predict TSN ./configs/tsn.yaml + +- 使用`run.sh`进行评估时,需要修改脚本中的`weights`参数指定需要用到的权重。 + +- 如果video\_path为'', 则忽略掉此参数。如果video\_path != '',则程序会对video\_path指定的视频文件进行预测,而忽略掉filelist的值,预测结果为此视频的分类概率。 - python infer.py --model_name=TSN - --config=configs/tsn.txt - --log_interval=1 - --weights=$PATH_TO_WEIGHTS - --filelist=$FILELIST +- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/TSN_final.pdparams)进行推断 -- 模型推断结果存储于`TSN_infer_result`中,通过`pickle`格式存储。 +- 模型推断结果以log的形式直接打印输出,可以看到测试样本的分类预测概率。 -- 若未指定`--weights`参数,脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsn_kinetics.tar.gz)进行推断 +- 使用CPU进行推断时,请将命令行或者run.sh脚本中的`use_gpu`设置为False ## 参考论文 diff --git a/PaddleCV/PaddleVideo/models/tsn/tsn.py b/PaddleCV/PaddleVideo/models/tsn/tsn.py index ebd64d59..4d3cdc89 100644 --- a/PaddleCV/PaddleVideo/models/tsn/tsn.py +++ b/PaddleCV/PaddleVideo/models/tsn/tsn.py @@ -57,26 +57,21 @@ class TSN(ModelBase): image_shape[0] = image_shape[0] * self.seglen image_shape = [self.seg_num] + image_shape self.use_pyreader = use_pyreader + + image = fluid.layers.data( + name='image', shape=image_shape, dtype='float32') + if self.mode != 'infer': + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + else: + label = None + if use_pyreader: assert self.mode != 'infer', \ 'pyreader is not recommendated when infer, please set use_pyreader to be false.' - py_reader = fluid.layers.py_reader( - capacity=100, - shapes=[[-1] + image_shape, [-1] + [1]], - dtypes=['float32', 'int64'], - name='train_py_reader' - if self.is_training else 'test_py_reader', - use_double_buffer=True) - image, label = fluid.layers.read_file(py_reader) + py_reader = fluid.io.PyReader( + feed_list=[image, label], capacity=4, iterable=True) self.py_reader = py_reader - else: - image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - if self.mode != 'infer': - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - else: - label = None + self.feature_input = [image] self.label_input = label @@ -131,6 +126,21 @@ class TSN(ModelBase): self.label_input ] + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'test': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'infer': + fetch_list = self.network_outputs + else: + raise NotImplementedError('mode {} not implemented'.format( + self.mode)) + + return fetch_list + def pretrain_info(self): return ( 'ResNet50_pretrained', @@ -139,8 +149,8 @@ class TSN(ModelBase): def weights_info(self): return ( - 'tsn_kinetics', - 'https://paddlemodels.bj.bcebos.com/video_classification/tsn_kinetics.tar.gz' + 'TSN_final.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/TSN_final.pdparams' ) def load_pretrain_params(self, exe, pretrain, prog, place): diff --git a/PaddleCV/PaddleVideo/infer.py b/PaddleCV/PaddleVideo/predict.py similarity index 75% rename from PaddleCV/PaddleVideo/infer.py rename to PaddleCV/PaddleVideo/predict.py index 41b20f14..8eb58fab 100644 --- a/PaddleCV/PaddleVideo/infer.py +++ b/PaddleCV/PaddleVideo/predict.py @@ -25,10 +25,11 @@ except: import pickle import paddle.fluid as fluid -from config import * +from utils.config_utils import * import models -from datareader import get_reader -from utils import check_cuda +from reader import get_reader +from metrics import get_metrics +from utils.utility import check_cuda logging.root.handlers = [] FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s' @@ -57,7 +58,8 @@ def parse_args(): '--weights', type=str, default=None, - help='weight path, None to use weights from Paddle.') + help='weight path, None to automatically download weights provided by Paddle.' + ) parser.add_argument( '--batch_size', type=int, @@ -79,7 +81,15 @@ def parse_args(): default=20, help='topk predictions to restore.') parser.add_argument( - '--save_dir', type=str, default='./', help='directory to store results') + '--save_dir', + type=str, + default=os.path.join('data', 'predict_results'), + help='directory to store results') + parser.add_argument( + '--video_path', + type=str, + default=None, + help='directory to store results') args = parser.parse_args() return args @@ -99,7 +109,11 @@ def infer(args): exe = fluid.Executor(place) filelist = args.filelist or infer_config.INFER.filelist - assert os.path.exists(filelist), "{} not exist.".format(args.filelist) + filepath = args.video_path or infer_config.INFER.get('filepath', '') + if filepath != '': + assert os.path.exists(filepath), "{} not exist.".format(filepath) + else: + assert os.path.exists(filelist), "{} not exist.".format(filelist) # get infer reader infer_reader = get_reader(args.model_name.upper(), 'infer', infer_config) @@ -114,10 +128,12 @@ def infer(args): fluid.default_main_program(), place) infer_feeder = fluid.DataFeeder(place=place, feed_list=infer_feeds) - fetch_list = [x.name for x in infer_outputs] + fetch_list = infer_model.fetches() + + infer_metrics = get_metrics(args.model_name.upper(), 'infer', infer_config) + infer_metrics.reset() periods = [] - results = [] cur_time = time.time() for infer_iter, data in enumerate(infer_reader()): data_feed_in = [items[:-1] for items in data] @@ -128,22 +144,10 @@ def infer(args): cur_time = time.time() period = cur_time - prev_time periods.append(period) - if args.model_name in ['CTCN']: - # For detection model - loc_predictions = np.array(infer_outs[0]) - cls_predictions = np.array(infer_outs[1]) - for i in range(len(video_id)): - results.append((video_id[i], loc_predictions[i].tolist(), - cls_predictions[i].tolist())) - else: - # For classification model - predictions = np.array(infer_outs[0]) - for i in range(len(predictions)): - topk_inds = predictions[i].argsort()[0 - args.infer_topk:] - topk_inds = topk_inds[::-1] - preds = predictions[i][topk_inds] - results.append( - (video_id[i], preds.tolist(), topk_inds.tolist())) + + infer_result_list = [item for item in infer_outs] + [video_id] + infer_metrics.accumulate(infer_result_list) + if args.log_interval > 0 and infer_iter % args.log_interval == 0: logger.info('Processed {} samples'.format((infer_iter + 1) * len( video_id))) @@ -152,10 +156,9 @@ def infer(args): np.mean(periods))) if not os.path.isdir(args.save_dir): - os.mkdir(args.save_dir) - result_file_name = os.path.join( - args.save_dir, "{}_infer_result.pkl".format(args.model_name)) - pickle.dump(results, open(result_file_name, 'wb'), protocol=0) + os.makedirs(args.save_dir) + + infer_metrics.finalize_and_log_out(savedir=args.save_dir) if __name__ == "__main__": diff --git a/PaddleCV/PaddleVideo/datareader/__init__.py b/PaddleCV/PaddleVideo/reader/__init__.py similarity index 100% rename from PaddleCV/PaddleVideo/datareader/__init__.py rename to PaddleCV/PaddleVideo/reader/__init__.py diff --git a/PaddleCV/PaddleVideo/datareader/ctcn_reader.py b/PaddleCV/PaddleVideo/reader/ctcn_reader.py similarity index 99% rename from PaddleCV/PaddleVideo/datareader/ctcn_reader.py rename to PaddleCV/PaddleVideo/reader/ctcn_reader.py index e5fe8ee7..35ceca6e 100644 --- a/PaddleCV/PaddleVideo/datareader/ctcn_reader.py +++ b/PaddleCV/PaddleVideo/reader/ctcn_reader.py @@ -284,7 +284,7 @@ class CTCNReader(DataReader): flow_exist = os.path.exists( os.path.join(self.root, self.flow, splited[0] + '.pkl')) if not (rgb_exist and flow_exist): - # logger.info('file not exist', splited[0]) + logger.info('file not exist {}'.format(splited[0])) continue fnames.append(splited[0]) frames_num = int(splited[1]) // self.snippet_length @@ -354,7 +354,7 @@ class CTCNReader(DataReader): flow_exist = os.path.exists( os.path.join(self.root, self.flow, splited[0] + '.pkl')) if not (rgb_exist and flow_exist): - # logger.info('file not exist {}'.format(splited[0])) + logger.info('file not exist {}'.format(splited[0])) continue fnames.append(splited[0]) frames_num = int(splited[1]) // self.snippet_length diff --git a/PaddleCV/PaddleVideo/datareader/feature_reader.py b/PaddleCV/PaddleVideo/reader/feature_reader.py similarity index 100% rename from PaddleCV/PaddleVideo/datareader/feature_reader.py rename to PaddleCV/PaddleVideo/reader/feature_reader.py diff --git a/PaddleCV/PaddleVideo/datareader/kinetics_reader.py b/PaddleCV/PaddleVideo/reader/kinetics_reader.py similarity index 81% rename from PaddleCV/PaddleVideo/datareader/kinetics_reader.py rename to PaddleCV/PaddleVideo/reader/kinetics_reader.py index 10f9dae8..67d54d6c 100644 --- a/PaddleCV/PaddleVideo/datareader/kinetics_reader.py +++ b/PaddleCV/PaddleVideo/reader/kinetics_reader.py @@ -67,7 +67,7 @@ class KineticsReader(DataReader): self.num_reader_threads = self.get_config_from_sec(mode, 'num_reader_threads') self.buf_size = self.get_config_from_sec(mode, 'buf_size') - self.enable_ce = self.get_config_from_sec(mode, 'enable_ce') + self.fix_random_seed = self.get_config_from_sec(mode, 'fix_random_seed') self.img_mean = np.array(cfg.MODEL.image_mean).reshape( [3, 1, 1]).astype(np.float32) @@ -76,12 +76,30 @@ class KineticsReader(DataReader): # set batch size and file list self.batch_size = cfg[mode.upper()]['batch_size'] self.filelist = cfg[mode.upper()]['filelist'] - if self.enable_ce: + if self.mode == 'infer': + self.video_path = cfg[mode.upper()]['video_path'] + else: + self.video_path = '' + if self.fix_random_seed: random.seed(0) np.random.seed(0) + self.num_reader_threads = 1 def create_reader(self): - _reader = self._reader_creator(self.filelist, self.mode, seg_num=self.seg_num, seglen = self.seglen, \ + # if set video_path for inference mode, just load this single video + if (self.mode == 'infer') and (self.video_path != ''): + # load video from file stored at video_path + _reader = self._inference_reader_creator( + self.video_path, + self.mode, + seg_num=self.seg_num, + seglen=self.seglen, + short_size=self.short_size, + target_size=self.target_size, + img_mean=self.img_mean, + img_std=self.img_std) + else: + _reader = self._reader_creator(self.filelist, self.mode, seg_num=self.seg_num, seglen = self.seglen, \ short_size = self.short_size, target_size = self.target_size, \ img_mean = self.img_mean, img_std = self.img_std, \ shuffle = (self.mode == 'train'), \ @@ -100,6 +118,27 @@ class KineticsReader(DataReader): return _batch_reader + def _inference_reader_creator(self, video_path, mode, seg_num, seglen, + short_size, target_size, img_mean, img_std): + def reader(): + try: + imgs = mp4_loader(video_path, seg_num, seglen, mode) + if len(imgs) < 1: + logger.error('{} frame length {} less than 1.'.format( + video_path, len(imgs))) + yield None, None + except: + logger.error('Error when loading {}'.format(mp4_path)) + yield None, None + + imgs_ret = imgs_transform(imgs, mode, seg_num, seglen, short_size, + target_size, img_mean, img_std) + label_ret = video_path + + yield imgs_ret, label_ret + + return reader + def _reader_creator(self, pickle_list, mode, @@ -129,8 +168,8 @@ class KineticsReader(DataReader): logger.error('Error when loading {}'.format(mp4_path)) return None, None - return imgs_transform(imgs, label, mode, seg_num, seglen, \ - short_size, target_size, img_mean, img_std) + return imgs_transform(imgs, mode, seg_num, seglen, \ + short_size, target_size, img_mean, img_std, name = self.name), label def decode_pickle(sample, mode, seg_num, seglen, short_size, target_size, img_mean, img_std): @@ -157,34 +196,8 @@ class KineticsReader(DataReader): ret_label = vid imgs = video_loader(frames, seg_num, seglen, mode) - return imgs_transform(imgs, ret_label, mode, seg_num, seglen, \ - short_size, target_size, img_mean, img_std) - - def imgs_transform(imgs, label, mode, seg_num, seglen, short_size, - target_size, img_mean, img_std): - imgs = group_scale(imgs, short_size) - - if mode == 'train': - if self.name == "TSM": - imgs = group_multi_scale_crop(imgs, short_size) - imgs = group_random_crop(imgs, target_size) - imgs = group_random_flip(imgs) - else: - imgs = group_center_crop(imgs, target_size) - - np_imgs = (np.array(imgs[0]).astype('float32').transpose( - (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255 - for i in range(len(imgs) - 1): - img = (np.array(imgs[i + 1]).astype('float32').transpose( - (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255 - np_imgs = np.concatenate((np_imgs, img)) - imgs = np_imgs - imgs -= img_mean - imgs /= img_std - imgs = np.reshape(imgs, - (seg_num, seglen * 3, target_size, target_size)) - - return imgs, label + return imgs_transform(imgs, mode, seg_num, seglen, \ + short_size, target_size, img_mean, img_std, name = self.name), ret_label def reader(): with open(pickle_list) as flist: @@ -215,6 +228,38 @@ class KineticsReader(DataReader): return paddle.reader.xmap_readers(mapper, reader, num_threads, buf_size) +def imgs_transform(imgs, + mode, + seg_num, + seglen, + short_size, + target_size, + img_mean, + img_std, + name=''): + imgs = group_scale(imgs, short_size) + + if mode == 'train': + if name == "TSM": + imgs = group_multi_scale_crop(imgs, short_size) + imgs = group_random_crop(imgs, target_size) + imgs = group_random_flip(imgs) + else: + imgs = group_center_crop(imgs, target_size) + + np_imgs = (np.array(imgs[0]).astype('float32').transpose( + (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255 + for i in range(len(imgs) - 1): + img = (np.array(imgs[i + 1]).astype('float32').transpose( + (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255 + np_imgs = np.concatenate((np_imgs, img)) + imgs = np_imgs + imgs -= img_mean + imgs /= img_std + imgs = np.reshape(imgs, (seg_num, seglen * 3, target_size, target_size)) + + return imgs + def group_multi_scale_crop(img_group, target_size, scales=None, \ max_distort=1, fix_crop=True, more_fix_crop=True): scales = scales if scales is not None else [1, .875, .75, .66] diff --git a/PaddleCV/PaddleVideo/datareader/nonlocal_reader.py b/PaddleCV/PaddleVideo/reader/nonlocal_reader.py similarity index 96% rename from PaddleCV/PaddleVideo/datareader/nonlocal_reader.py rename to PaddleCV/PaddleVideo/reader/nonlocal_reader.py index 15c0a849..9bf9ec85 100644 --- a/PaddleCV/PaddleVideo/datareader/nonlocal_reader.py +++ b/PaddleCV/PaddleVideo/reader/nonlocal_reader.py @@ -71,6 +71,9 @@ class NonlocalReader(DataReader): filelist = cfg[mode.upper()]['filelist'] batch_size = cfg[mode.upper()]['batch_size'] + if (self.mode == 'infer') and (cfg['INFER']['video_path'] != ''): + filelist = create_tmp_inference_file(cfg['INFER']['video_path']) + if self.mode == 'train': sample_times = 1 return reader_func(filelist, batch_size, sample_times, True, True, @@ -92,6 +95,16 @@ class NonlocalReader(DataReader): raise NotImplementedError +def create_tmp_inference_file(video_path, + file_path='temp_nonlocal_inference_list'): + tmp_file = open(file_path, 'w') + for i in range(10): + for j in range(3): + tmp_file.write('{} {} {} {}\n'.format(video_path, 0, i, j)) + tmp_file.close() + return file_path + + def video_fast_get_frame(video_path, sampling_rate=1, length=64, diff --git a/PaddleCV/PaddleVideo/datareader/reader_utils.py b/PaddleCV/PaddleVideo/reader/reader_utils.py similarity index 100% rename from PaddleCV/PaddleVideo/datareader/reader_utils.py rename to PaddleCV/PaddleVideo/reader/reader_utils.py diff --git a/PaddleCV/PaddleVideo/run.sh b/PaddleCV/PaddleVideo/run.sh new file mode 100644 index 00000000..d0fa9e48 --- /dev/null +++ b/PaddleCV/PaddleVideo/run.sh @@ -0,0 +1,107 @@ +# examples of running programs: +# bash ./run.sh train CTCN ./configs/ctcn.yaml +# bash ./run.sh eval NEXTVLAD ./configs/nextvlad.yaml +# bash ./run.sh predict NONLOCAL ./cofings/nonlocal.yaml + +# mode should be one of [train, eval, predict, inference] +# name should be one of [AttentionCluster, AttentionLSTM, NEXTVLAD, NONLOCAL, TSN, TSM, STNET, CTCN] +# configs should be ./configs/xxx.yaml + +mode=$1 +name=$2 +configs=$3 + +pretrain="" # set pretrain model path if needed +resume="" # set pretrain model path if needed +save_dir="./data/checkpoints" +save_inference_dir="./data/inference_model" +use_gpu=True +fix_random_seed=False +log_interval=1 +valid_interval=1 + +weights="" #set the path of weights to enable eval and predicut, just ignore this when training + +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +#export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +if [ "$mode"x == "train"x ]; then + echo $mode $name $configs $resume $pretrain + if [ "$resume"x != ""x ]; then + python train.py --model_name=$name \ + --config=$configs \ + --resume=$resume \ + --log_interval=$log_interval \ + --valid_interval=$valid_interval \ + --use_gpu=$use_gpu \ + --save_dir=$save_dir \ + --fix_random_seed=$fix_random_seed + elif [ "$pretrain"x != ""x ]; then + python train.py --model_name=$name \ + --config=$configs \ + --pretrain=$pretrain \ + --log_interval=$log_interval \ + --valid_interval=$valid_interval \ + --use_gpu=$use_gpu \ + --save_dir=$save_dir \ + --fix_random_seed=$fix_random_seed + else + python train.py --model_name=$name \ + --config=$configs \ + --log_interval=$log_interval \ + --valid_interval=$valid_interval \ + --use_gpu=$use_gpu \ + --save_dir=$save_dir \ + --fix_random_seed=$fix_random_seed + fi +elif [ "$mode"x == "eval"x ]; then + echo $mode $name $configs $weights + if [ "$weights"x != ""x ]; then + python eval.py --model_name=$name \ + --config=$configs \ + --log_interval=$log_interval \ + --weights=$weights \ + --use_gpu=$use_gpu + else + python eval.py --model_name=$name \ + --config=$configs \ + --log_interval=$log_interval \ + --use_gpu=$use_gpu + fi +elif [ "$mode"x == "predict"x ]; then + echo $mode $name $configs $weights + if [ "$weights"x != ""x ]; then + python -i predict.py --model_name=$name \ + --config=$configs \ + --log_interval=$log_interval \ + --weights=$weights \ + --video_path='' \ + --use_gpu=$use_gpu + else + python predict.py --model_name=$name \ + --config=$configs \ + --log_interval=$log_interval \ + --use_gpu=$use_gpu \ + --video_path='' + fi +elif [ "$mode"x == "inference"x ]; then + echo $mode $name $configs $weights + if [ "$weights"x != ""x ]; then + python inference_model.py --model_name=$name \ + --config=$configs \ + --weights=$weights \ + --use_gpu=$use_gpu \ + --save_dir=$save_inference_dir + else + python inference_model.py --model_name=$name \ + --config=$configs \ + --use_gpu=$use_gpu \ + --save_dir=$save_inference_dir + fi +else + echo "Not implemented mode " $mode +fi + diff --git a/PaddleCV/PaddleVideo/scripts/infer/infer_attention_cluster.sh b/PaddleCV/PaddleVideo/scripts/infer/infer_attention_cluster.sh deleted file mode 100644 index 6a122dc2..00000000 --- a/PaddleCV/PaddleVideo/scripts/infer/infer_attention_cluster.sh +++ /dev/null @@ -1,4 +0,0 @@ -python infer.py --model_name="AttentionCluster" --config=./configs/attention_cluster.txt \ - --filelist=./dataset/youtube8m/infer.list \ - --weights=./checkpoints/AttentionCluster_epoch0 \ - --save_dir="./save" diff --git a/PaddleCV/PaddleVideo/scripts/infer/infer_attention_lstm.sh b/PaddleCV/PaddleVideo/scripts/infer/infer_attention_lstm.sh deleted file mode 100644 index 659ca891..00000000 --- a/PaddleCV/PaddleVideo/scripts/infer/infer_attention_lstm.sh +++ /dev/null @@ -1,4 +0,0 @@ -python infer.py --model_name="AttentionLSTM" --config=./configs/attention_lstm.txt \ - --filelist=./dataset/youtube8m/infer.list \ - --weights=./checkpoints/AttentionLSTM_epoch0 \ - --save_dir="./save" diff --git a/PaddleCV/PaddleVideo/scripts/infer/infer_ctcn.sh b/PaddleCV/PaddleVideo/scripts/infer/infer_ctcn.sh deleted file mode 100644 index ad7476bd..00000000 --- a/PaddleCV/PaddleVideo/scripts/infer/infer_ctcn.sh +++ /dev/null @@ -1,2 +0,0 @@ -python infer.py --model_name="CTCN" --config=./configs/ctcn.txt --filelist=./dataset/ctcn/infer.list \ - --log_interval=1 --weights=./checkpoints/CTCN_epoch0 --save_dir=./save diff --git a/PaddleCV/PaddleVideo/scripts/infer/infer_nextvlad.sh b/PaddleCV/PaddleVideo/scripts/infer/infer_nextvlad.sh deleted file mode 100644 index db383b81..00000000 --- a/PaddleCV/PaddleVideo/scripts/infer/infer_nextvlad.sh +++ /dev/null @@ -1,3 +0,0 @@ -python infer.py --model_name="NEXTVLAD" --config=./configs/nextvlad.txt --filelist=./dataset/youtube8m/infer.list \ - --weights=./checkpoints/NEXTVLAD_epoch0 \ - --save_dir="./save" diff --git a/PaddleCV/PaddleVideo/scripts/infer/infer_nonlocal.sh b/PaddleCV/PaddleVideo/scripts/infer/infer_nonlocal.sh deleted file mode 100644 index 73ed47de..00000000 --- a/PaddleCV/PaddleVideo/scripts/infer/infer_nonlocal.sh +++ /dev/null @@ -1,2 +0,0 @@ -python infer.py --model_name="NONLOCAL" --config=./configs/nonlocal.txt --filelist=./dataset/nonlocal/inferlist.txt \ - --log_interval=10 --weights=./checkpoints/NONLOCAL_epoch0 --save_dir=./save diff --git a/PaddleCV/PaddleVideo/scripts/infer/infer_stnet.sh b/PaddleCV/PaddleVideo/scripts/infer/infer_stnet.sh deleted file mode 100644 index fb598fde..00000000 --- a/PaddleCV/PaddleVideo/scripts/infer/infer_stnet.sh +++ /dev/null @@ -1,2 +0,0 @@ -python infer.py --model_name="STNET" --config=./configs/stnet.txt --filelist=./dataset/kinetics/infer.list \ - --log_interval=10 --weights=./checkpoints/STNET_epoch0 --save_dir=./save diff --git a/PaddleCV/PaddleVideo/scripts/infer/infer_tsm.sh b/PaddleCV/PaddleVideo/scripts/infer/infer_tsm.sh deleted file mode 100644 index cdcc30df..00000000 --- a/PaddleCV/PaddleVideo/scripts/infer/infer_tsm.sh +++ /dev/null @@ -1,2 +0,0 @@ -python infer.py --model_name="TSM" --config=./configs/tsm.txt --filelist=./dataset/kinetics/infer.list \ - --log_interval=10 --weights=./checkpoints/TSM_epoch0 --save_dir=./save diff --git a/PaddleCV/PaddleVideo/scripts/infer/infer_tsn.sh b/PaddleCV/PaddleVideo/scripts/infer/infer_tsn.sh deleted file mode 100644 index 03dd1f0a..00000000 --- a/PaddleCV/PaddleVideo/scripts/infer/infer_tsn.sh +++ /dev/null @@ -1,2 +0,0 @@ -python infer.py --model_name="TSN" --config=./configs/tsn.txt --filelist=./dataset/kinetics/infer.list \ - --log_interval=10 --weights=./checkpoints/TSN_epoch0 --save_dir=./save diff --git a/PaddleCV/PaddleVideo/scripts/test/test_attention_cluster.sh b/PaddleCV/PaddleVideo/scripts/test/test_attention_cluster.sh deleted file mode 100644 index 1bdc5acf..00000000 --- a/PaddleCV/PaddleVideo/scripts/test/test_attention_cluster.sh +++ /dev/null @@ -1,2 +0,0 @@ -python test.py --model_name="AttentionCluster" --config=./configs/attention_cluster.txt \ - --log_interval=5 --weights=./checkpoints/AttentionCluster_epoch0 diff --git a/PaddleCV/PaddleVideo/scripts/test/test_attention_lstm.sh b/PaddleCV/PaddleVideo/scripts/test/test_attention_lstm.sh deleted file mode 100644 index 27bff350..00000000 --- a/PaddleCV/PaddleVideo/scripts/test/test_attention_lstm.sh +++ /dev/null @@ -1,2 +0,0 @@ -python test.py --model_name="AttentionLSTM" --config=./configs/attention_lstm.txt \ - --log_interval=5 --weights=./checkpoints/AttentionLSTM_epoch0 diff --git a/PaddleCV/PaddleVideo/scripts/test/test_ctcn.sh b/PaddleCV/PaddleVideo/scripts/test/test_ctcn.sh deleted file mode 100644 index 3447b579..00000000 --- a/PaddleCV/PaddleVideo/scripts/test/test_ctcn.sh +++ /dev/null @@ -1,3 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0 -python test.py --model_name="CTCN" --config=./configs/ctcn.txt \ - --log_interval=1 --weights=./checkpoints/CTCN_epoch0 diff --git a/PaddleCV/PaddleVideo/scripts/test/test_nextvlad.sh b/PaddleCV/PaddleVideo/scripts/test/test_nextvlad.sh deleted file mode 100644 index 4d390a0b..00000000 --- a/PaddleCV/PaddleVideo/scripts/test/test_nextvlad.sh +++ /dev/null @@ -1,2 +0,0 @@ -python test.py --model_name="NEXTVLAD" --config=./configs/nextvlad.txt \ - --log_interval=10 --weights=./checkpoints/NEXTVLAD_epoch0 diff --git a/PaddleCV/PaddleVideo/scripts/test/test_nonlocal.sh b/PaddleCV/PaddleVideo/scripts/test/test_nonlocal.sh deleted file mode 100644 index 7a42bb05..00000000 --- a/PaddleCV/PaddleVideo/scripts/test/test_nonlocal.sh +++ /dev/null @@ -1,2 +0,0 @@ -python -i test.py --model_name="NONLOCAL" --config=./configs/nonlocal.txt \ - --log_interval=1 --weights=./checkpoints/NONLOCAL_epoch0 diff --git a/PaddleCV/PaddleVideo/scripts/test/test_stnet.sh b/PaddleCV/PaddleVideo/scripts/test/test_stnet.sh deleted file mode 100644 index 0b471ed9..00000000 --- a/PaddleCV/PaddleVideo/scripts/test/test_stnet.sh +++ /dev/null @@ -1,2 +0,0 @@ -python test.py --model_name="STNET" --config=./configs/stnet.txt \ - --log_interval=10 --weights=./checkpoints/STNET_epoch0 diff --git a/PaddleCV/PaddleVideo/scripts/test/test_tsm.sh b/PaddleCV/PaddleVideo/scripts/test/test_tsm.sh deleted file mode 100644 index ffebc477..00000000 --- a/PaddleCV/PaddleVideo/scripts/test/test_tsm.sh +++ /dev/null @@ -1,2 +0,0 @@ -python test.py --model_name="TSM" --config=./configs/tsm.txt \ - --log_interval=10 --weights=./checkpoints/TSM_epoch0 diff --git a/PaddleCV/PaddleVideo/scripts/test/test_tsn.sh b/PaddleCV/PaddleVideo/scripts/test/test_tsn.sh deleted file mode 100644 index ffe0ff51..00000000 --- a/PaddleCV/PaddleVideo/scripts/test/test_tsn.sh +++ /dev/null @@ -1,2 +0,0 @@ -python test.py --model_name="TSN" --config=./configs/tsn.txt \ - --log_interval=10 --weights=./checkpoints/TSN_epoch0 diff --git a/PaddleCV/PaddleVideo/scripts/train/train_attention_cluster.sh b/PaddleCV/PaddleVideo/scripts/train/train_attention_cluster.sh deleted file mode 100644 index d5a88aaa..00000000 --- a/PaddleCV/PaddleVideo/scripts/train/train_attention_cluster.sh +++ /dev/null @@ -1,3 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python train.py --model_name="AttentionCluster" --config=./configs/attention_cluster.txt --epoch=5 \ - --valid_interval=1 --log_interval=10 diff --git a/PaddleCV/PaddleVideo/scripts/train/train_attention_lstm.sh b/PaddleCV/PaddleVideo/scripts/train/train_attention_lstm.sh deleted file mode 100644 index aed607c3..00000000 --- a/PaddleCV/PaddleVideo/scripts/train/train_attention_lstm.sh +++ /dev/null @@ -1,3 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python train.py --model_name="AttentionLSTM" --config=./configs/attention_lstm.txt --epoch=10 \ - --valid_interval=1 --log_interval=10 diff --git a/PaddleCV/PaddleVideo/scripts/train/train_ctcn.sh b/PaddleCV/PaddleVideo/scripts/train/train_ctcn.sh deleted file mode 100644 index a15b5c38..00000000 --- a/PaddleCV/PaddleVideo/scripts/train/train_ctcn.sh +++ /dev/null @@ -1,8 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 - -export FLAGS_fast_eager_deletion_mode=1 -export FLAGS_eager_delete_tensor_gb=0.0 -export FLAGS_fraction_of_gpu_memory_to_use=1.0 - -python train.py --model_name="CTCN" --config=./configs/ctcn.txt --epoch=35 \ - --valid_interval=1 --log_interval=1 diff --git a/PaddleCV/PaddleVideo/scripts/train/train_nextvlad.sh b/PaddleCV/PaddleVideo/scripts/train/train_nextvlad.sh deleted file mode 100644 index 21ba7888..00000000 --- a/PaddleCV/PaddleVideo/scripts/train/train_nextvlad.sh +++ /dev/null @@ -1,9 +0,0 @@ -# activate eager gc to reduce memory use -#export FLAGS_fraction_of_gpu_memory_to_use=1.0 -#export FLAGS_fast_eager_deletion_mode=1 -#export FLAGS_eager_delete_tensor_gb=0.0 -#export FLAGS_limit_of_tmp_allocation=0 - -export CUDA_VISIBLE_DEVICES=0,1,2,3 -python train.py --model_name="NEXTVLAD" --config=./configs/nextvlad.txt --epoch=6 \ - --valid_interval=1 --log_interval=10 diff --git a/PaddleCV/PaddleVideo/scripts/train/train_nonlocal.sh b/PaddleCV/PaddleVideo/scripts/train/train_nonlocal.sh deleted file mode 100644 index 2b89d170..00000000 --- a/PaddleCV/PaddleVideo/scripts/train/train_nonlocal.sh +++ /dev/null @@ -1,3 +0,0 @@ -python train.py --model_name="NONLOCAL" --config=./configs/nonlocal.txt --epoch=120 \ - --valid_interval=1 --log_interval=1 \ - --pretrain=./pretrained/ResNet50_pretrained diff --git a/PaddleCV/PaddleVideo/scripts/train/train_stnet.sh b/PaddleCV/PaddleVideo/scripts/train/train_stnet.sh deleted file mode 100644 index 6d2a3acf..00000000 --- a/PaddleCV/PaddleVideo/scripts/train/train_stnet.sh +++ /dev/null @@ -1,3 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python train.py --model_name="STNET" --config=./configs/stnet.txt --epoch=60 \ - --valid_interval=1 --log_interval=10 diff --git a/PaddleCV/PaddleVideo/scripts/train/train_tsm.sh b/PaddleCV/PaddleVideo/scripts/train/train_tsm.sh deleted file mode 100644 index 5d73a42b..00000000 --- a/PaddleCV/PaddleVideo/scripts/train/train_tsm.sh +++ /dev/null @@ -1,11 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 - -# activate eager gc to reduce memory use -export FLAGS_fast_eager_deletion_mode=1 -export FLAGS_eager_delete_tensor_gb=0.0 -export FLAGS_fraction_of_gpu_memory_to_use=0.98 -#export FLAGS_limit_of_tmp_allocation=0 -#export FLAGS_conv_workspace_size_limit=1024 - -python train.py --model_name="TSM" --config=./configs/tsm.txt --epoch=65 \ - --valid_interval=1 --log_interval=10 diff --git a/PaddleCV/PaddleVideo/scripts/train/train_tsn.sh b/PaddleCV/PaddleVideo/scripts/train/train_tsn.sh deleted file mode 100644 index be5a25f3..00000000 --- a/PaddleCV/PaddleVideo/scripts/train/train_tsn.sh +++ /dev/null @@ -1,3 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python train.py --model_name="TSN" --config=./configs/tsn.txt --epoch=45 \ - --valid_interval=1 --log_interval=10 diff --git a/PaddleCV/PaddleVideo/tools/train_utils.py b/PaddleCV/PaddleVideo/tools/train_utils.py deleted file mode 100644 index a1199dd1..00000000 --- a/PaddleCV/PaddleVideo/tools/train_utils.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import numpy as np -import paddle -import paddle.fluid as fluid -import logging -import shutil - -logger = logging.getLogger(__name__) - - -def log_lr_and_step(): - try: - # In optimizers, if learning_rate is set as constant, lr_var - # name is 'learning_rate_0', and iteration counter is not - # recorded. If learning_rate is set as decayed values from - # learning_rate_scheduler, lr_var name is 'learning_rate', - # and iteration counter is recorded with name '@LR_DECAY_COUNTER@', - # better impliment is required here - lr_var = fluid.global_scope().find_var("learning_rate") - if not lr_var: - lr_var = fluid.global_scope().find_var("learning_rate_0") - lr = np.array(lr_var.get_tensor()) - - lr_count = '[-]' - lr_count_var = fluid.global_scope().find_var("@LR_DECAY_COUNTER@") - if lr_count_var: - lr_count = np.array(lr_count_var.get_tensor()) - logger.info("------- learning rate {}, learning rate counter {} -----" - .format(np.array(lr), np.array(lr_count))) - except: - logger.warn("Unable to get learning_rate and LR_DECAY_COUNTER.") - - -def test_without_pyreader(test_exe, - test_reader, - test_feeder, - test_fetch_list, - test_metrics, - log_interval=0, - save_model_name=''): - test_metrics.reset() - for test_iter, data in enumerate(test_reader()): - test_outs = test_exe.run(test_fetch_list, feed=test_feeder.feed(data)) - if save_model_name in ['CTCN']: - # for detection - total_loss = np.array(test_outs[0]) - loc_loss = np.array(test_outs[1]) - cls_loss = np.array(test_outs[2]) - loc_preds = np.array(test_outs[3]) - cls_preds = np.array(test_outs[4]) - label = np.array(test_outs[-1]) - loss = [total_loss, loc_loss, cls_loss] - pred = [loc_preds, cls_preds] - else: - # for classification - loss = np.array(test_outs[0]) - pred = np.array(test_outs[1]) - label = np.array(test_outs[-1]) - test_metrics.accumulate(loss, pred, label) - if log_interval > 0 and test_iter % log_interval == 0: - test_metrics.calculate_and_log_out(loss, pred, label, \ - info = '[TEST] test_iter {} '.format(test_iter)) - test_metrics.finalize_and_log_out("[TEST] Finish") - - -def test_with_pyreader(test_exe, - test_pyreader, - test_fetch_list, - test_metrics, - log_interval=0, - save_model_name=''): - if not test_pyreader: - logger.error("[TEST] get pyreader failed.") - test_pyreader.start() - test_metrics.reset() - test_iter = 0 - try: - while True: - test_outs = test_exe.run(fetch_list=test_fetch_list) - if save_model_name in ['CTCN']: - # for detection - total_loss = np.array(test_outs[0]) - loc_loss = np.array(test_outs[1]) - cls_loss = np.array(test_outs[2]) - loc_preds = np.array(test_outs[3]) - cls_preds = np.array(test_outs[4]) - label = np.array(test_outs[-1]) - loss = [total_loss, loc_loss, cls_loss] - pred = [loc_preds, cls_preds] - else: - loss = np.array(test_outs[0]) - pred = np.array(test_outs[1]) - label = np.array(test_outs[-1]) - test_metrics.accumulate(loss, pred, label) - if log_interval > 0 and test_iter % log_interval == 0: - test_metrics.calculate_and_log_out(loss, pred, label, \ - info = '[TEST] test_iter {} '.format(test_iter)) - test_iter += 1 - except fluid.core.EOFException: - test_metrics.finalize_and_log_out("[TEST] Finish") - finally: - test_pyreader.reset() - - -def train_without_pyreader(exe, train_prog, train_exe, train_reader, train_feeder, \ - train_fetch_list, train_metrics, epochs = 10, \ - log_interval = 0, valid_interval = 0, save_dir = './', \ - save_model_name = 'model', test_exe = None, test_reader = None, \ - test_feeder = None, test_fetch_list = None, test_metrics = None): - for epoch in range(epochs): - log_lr_and_step() - epoch_periods = [] - for train_iter, data in enumerate(train_reader()): - cur_time = time.time() - train_outs = train_exe.run(train_fetch_list, - feed=train_feeder.feed(data)) - period = time.time() - cur_time - epoch_periods.append(period) - if save_model_name in ['CTCN']: - # detection model - total_loss = np.array(train_outs[0]) - loc_loss = np.array(train_outs[1]) - cls_loss = np.array(train_outs[2]) - loc_preds = np.array(train_outs[3]) - cls_preds = np.array(train_outs[4]) - label = np.array(train_outs[-1]) - loss = [total_loss, loc_loss, cls_loss] - pred = [loc_preds, cls_preds] - else: - # classification model - loss = np.array(train_outs[0]) - pred = np.array(train_outs[1]) - label = np.array(train_outs[-1]) - if log_interval > 0 and (train_iter % log_interval == 0): - # eval here - train_metrics.calculate_and_log_out(loss, pred, label, \ - info = '[TRAIN] Epoch {}, iter {} '.format(epoch, train_iter)) - train_iter += 1 - logger.info('[TRAIN] Epoch {} training finished, average time: {}'. - format(epoch, np.mean(epoch_periods[1:]))) - save_model(exe, train_prog, save_dir, save_model_name, - "_epoch{}".format(epoch)) - if test_exe and valid_interval > 0 and (epoch + 1 - ) % valid_interval == 0: - test_without_pyreader(test_exe, test_reader, test_feeder, - test_fetch_list, test_metrics, log_interval, - save_model_name) - - -def train_with_pyreader(exe, train_prog, train_exe, train_pyreader, \ - train_fetch_list, train_metrics, epochs = 10, \ - log_interval = 0, valid_interval = 0, save_dir = './', \ - save_model_name = 'model', enable_ce = False, \ - test_exe = None, test_pyreader = None, \ - test_fetch_list = None, test_metrics = None): - if not train_pyreader: - logger.error("[TRAIN] get pyreader failed.") - epoch_periods = [] - train_loss = 0 - for epoch in range(epochs): - log_lr_and_step() - train_pyreader.start() - train_metrics.reset() - try: - train_iter = 0 - epoch_periods = [] - while True: - cur_time = time.time() - train_outs = train_exe.run(fetch_list=train_fetch_list) - period = time.time() - cur_time - epoch_periods.append(period) - if save_model_name in ['CTCN']: - # for detection - total_loss = np.array(train_outs[0]) - loc_loss = np.array(train_outs[1]) - cls_loss = np.array(train_outs[2]) - loc_preds = np.array(train_outs[3]) - cls_preds = np.array(train_outs[4]) - label = np.array(train_outs[-1]) - loss = [total_loss, loc_loss, cls_loss] - pred = [loc_preds, cls_preds] - else: - # for classification - loss = np.array(train_outs[0]) - pred = np.array(train_outs[1]) - label = np.array(train_outs[-1]) - if log_interval > 0 and (train_iter % log_interval == 0): - # eval here - train_loss = train_metrics.calculate_and_log_out(loss, pred, label, \ - info = '[TRAIN] Epoch {}, iter {} '.format(epoch, train_iter)) - train_iter += 1 - except fluid.core.EOFException: - # eval here - logger.info('[TRAIN] Epoch {} training finished, average time: {}'. - format(epoch, np.mean(epoch_periods[1:]))) - save_model(exe, train_prog, save_dir, save_model_name, - "_epoch{}".format(epoch)) - if test_exe and valid_interval > 0 and (epoch + 1 - ) % valid_interval == 0: - test_with_pyreader(test_exe, test_pyreader, test_fetch_list, - test_metrics, log_interval, save_model_name) - finally: - epoch_period = [] - train_pyreader.reset() - #only for ce - if enable_ce: - cards = os.environ.get('CUDA_VISIBLE_DEVICES') - gpu_num = len(cards.split(",")) - print("kpis\ttrain_cost_card{}\t{}".format(gpu_num, train_loss)) - print("kpis\ttrain_speed_card{}\t{}".format(gpu_num, - np.mean(epoch_periods))) - - -def save_model(exe, program, save_dir, model_name, postfix=None): - model_path = os.path.join(save_dir, model_name + postfix) - if os.path.isdir(model_path): - shutil.rmtree(model_path) - fluid.io.save_persistables(exe, model_path, main_program=program) diff --git a/PaddleCV/PaddleVideo/train.py b/PaddleCV/PaddleVideo/train.py index ac42f361..7880d9e3 100644 --- a/PaddleCV/PaddleVideo/train.py +++ b/PaddleCV/PaddleVideo/train.py @@ -21,12 +21,12 @@ import logging import numpy as np import paddle.fluid as fluid -from tools.train_utils import train_with_pyreader, train_without_pyreader +from utils.train_utils import train_with_pyreader import models -from config import * -from datareader import get_reader +from utils.config_utils import * +from reader import get_reader from metrics import get_metrics -from utils import check_cuda +from utils.utility import check_cuda logging.root.handlers = [] FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s' @@ -73,11 +73,6 @@ def parse_args(): type=ast.literal_eval, default=True, help='default use gpu.') - parser.add_argument( - '--no_use_pyreader', - action='store_true', - default=False, - help='whether to use pyreader') parser.add_argument( '--no_memory_optimize', action='store_true', @@ -86,7 +81,7 @@ def parse_args(): parser.add_argument( '--epoch', type=int, - default=0, + default=None, help='epoch number, 0 for read from config file') parser.add_argument( '--valid_interval', @@ -96,7 +91,7 @@ def parse_args(): parser.add_argument( '--save_dir', type=str, - default='checkpoints', + default=os.path.join('data', 'checkpoints'), help='directory name to save train snapshoot') parser.add_argument( '--log_interval', @@ -104,7 +99,7 @@ def parse_args(): default=10, help='mini-batch interval to log.') parser.add_argument( - '--enable_ce', + '--fix_random_seed', type=ast.literal_eval, default=False, help='If set True, enable continuous evaluation job.') @@ -124,32 +119,19 @@ def train(args): # build model startup = fluid.Program() train_prog = fluid.Program() - if args.enable_ce: + if args.fix_random_seed: startup.random_seed = 1000 train_prog.random_seed = 1000 with fluid.program_guard(train_prog, startup): with fluid.unique_name.guard(): - train_model.build_input(not args.no_use_pyreader) + train_model.build_input(use_pyreader=True) train_model.build_model() # for the input, has the form [data1, data2,..., label], so train_feeds[-1] is label train_feeds = train_model.feeds() - train_feeds[-1].persistable = True - # for the output of classification model, has the form [pred] - # for the output of detection model, has the form [loc_pred, cls_pred] - train_outputs = train_model.outputs() - for output in train_outputs: - output.persistable = True - train_losses = train_model.loss() - if isinstance(train_losses, list) or isinstance(train_losses, - tuple): - # for detection model, train_losses has the form [total_loss, loc_loss, cls_loss] - train_loss = train_losses[0] - for item in train_losses: - item.persistable = True - else: - train_loss = train_losses - train_loss.persistable = True - # outputs, loss, label should be fetched, so set persistable to be true + train_fetch_list = train_model.fetches() + train_loss = train_fetch_list[0] + for item in train_fetch_list: + item.persistable = True optimizer = train_model.optimizer() optimizer.minimize(train_loss) train_pyreader = train_model.pyreader() @@ -157,14 +139,13 @@ def train(args): valid_prog = fluid.Program() with fluid.program_guard(valid_prog, startup): with fluid.unique_name.guard(): - valid_model.build_input(not args.no_use_pyreader) + valid_model.build_input(use_pyreader=True) valid_model.build_model() valid_feeds = valid_model.feeds() - # for the output of classification model, has the form [pred] - # for the output of detection model, has the form [loc_pred, cls_pred] - valid_outputs = valid_model.outputs() - valid_losses = valid_model.loss() + valid_fetch_list = valid_model.fetches() valid_pyreader = valid_model.pyreader() + for item in valid_fetch_list: + item.persistable = True place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) @@ -175,11 +156,8 @@ def train(args): assert os.path.exists(args.resume), \ "Given resume weight dir {} not exist.".format(args.resume) - def if_exist(var): - return os.path.exists(os.path.join(args.resume, var.name)) - - fluid.io.load_vars( - exe, args.resume, predicate=if_exist, main_program=train_prog) + fluid.io.load_persistables( + exe, '', main_program=train_prog, filename=args.resume) else: # if not in resume mode, load pretrain weights if args.pretrain: @@ -195,20 +173,30 @@ def train(args): build_strategy.enable_sequential_execution = True #build_strategy.memory_optimize = True - train_exe = fluid.ParallelExecutor( - use_cuda=args.use_gpu, - loss_name=train_loss.name, - main_program=train_prog, - build_strategy=build_strategy) - valid_exe = fluid.ParallelExecutor( - use_cuda=args.use_gpu, - share_vars_from=train_exe, - main_program=valid_prog) + compiled_train_prog = fluid.compiler.CompiledProgram( + train_prog).with_data_parallel( + loss_name=train_loss.name, build_strategy=build_strategy) + compiled_valid_prog = fluid.compiler.CompiledProgram( + valid_prog).with_data_parallel( + share_vars_from=compiled_train_prog, build_strategy=build_strategy) # get reader bs_denominator = 1 - if (not args.no_use_pyreader) and args.use_gpu: + if args.use_gpu: + # check number of GPUs + gpus = os.getenv("CUDA_VISIBLE_DEVICES", "") + if gpus == "": + pass + else: + gpus = gpus.split(",") + num_gpus = len(gpus) + assert num_gpus == train_config.TRAIN.num_gpus, \ + "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \ + "shoud be the same as that" \ + "set in {}({})".format( + num_gpus, args.config, train_config.TRAIN.num_gpus) bs_denominator = train_config.TRAIN.num_gpus + train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size / bs_denominator) valid_config.VALID.batch_size = int(valid_config.VALID.batch_size / @@ -220,64 +208,31 @@ def train(args): train_metrics = get_metrics(args.model_name.upper(), 'train', train_config) valid_metrics = get_metrics(args.model_name.upper(), 'valid', valid_config) - if isinstance(train_losses, tuple) or isinstance(train_losses, list): - # for detection - train_fetch_list = [item.name for item in train_losses] + \ - [x.name for x in train_outputs] + [train_feeds[-1].name] - valid_fetch_list = [item.name for item in valid_losses] + \ - [x.name for x in valid_outputs] + [valid_feeds[-1].name] - else: - # for classification - train_fetch_list = [train_losses.name] + [ - x.name for x in train_outputs - ] + [train_feeds[-1].name] - valid_fetch_list = [valid_losses.name] + [ - x.name for x in valid_outputs - ] + [valid_feeds[-1].name] - epochs = args.epoch or train_model.epoch_num() - if args.no_use_pyreader: - train_feeder = fluid.DataFeeder(place=place, feed_list=train_feeds) - valid_feeder = fluid.DataFeeder(place=place, feed_list=valid_feeds) - train_without_pyreader( - exe, - train_prog, - train_exe, - train_reader, - train_feeder, - train_fetch_list, - train_metrics, - epochs=epochs, - log_interval=args.log_interval, - valid_interval=args.valid_interval, - save_dir=args.save_dir, - save_model_name=args.model_name, - test_exe=valid_exe, - test_reader=valid_reader, - test_feeder=valid_feeder, - test_fetch_list=valid_fetch_list, - test_metrics=valid_metrics) - else: - train_pyreader.decorate_paddle_reader(train_reader) - valid_pyreader.decorate_paddle_reader(valid_reader) - train_with_pyreader( - exe, - train_prog, - train_exe, - train_pyreader, - train_fetch_list, - train_metrics, - epochs=epochs, - log_interval=args.log_interval, - valid_interval=args.valid_interval, - save_dir=args.save_dir, - save_model_name=args.model_name, - enable_ce=args.enable_ce, - test_exe=valid_exe, - test_pyreader=valid_pyreader, - test_fetch_list=valid_fetch_list, - test_metrics=valid_metrics) + exe_places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() + train_pyreader.decorate_sample_list_generator( + train_reader, places=exe_places) + valid_pyreader.decorate_sample_list_generator( + valid_reader, places=exe_places) + + train_with_pyreader( + exe, + train_prog, + compiled_train_prog, #train_exe, + train_pyreader, + train_fetch_list, + train_metrics, + epochs=epochs, + log_interval=args.log_interval, + valid_interval=args.valid_interval, + save_dir=args.save_dir, + save_model_name=args.model_name, + fix_random_seed=args.fix_random_seed, + compiled_test_prog=compiled_valid_prog, #test_exe=valid_exe, + test_pyreader=valid_pyreader, + test_fetch_list=valid_fetch_list, + test_metrics=valid_metrics) if __name__ == "__main__": diff --git a/PaddleCV/PaddleVideo/tools/__init__.py b/PaddleCV/PaddleVideo/utils/__init__.py similarity index 100% rename from PaddleCV/PaddleVideo/tools/__init__.py rename to PaddleCV/PaddleVideo/utils/__init__.py diff --git a/PaddleCV/PaddleVideo/config.py b/PaddleCV/PaddleVideo/utils/config_utils.py similarity index 67% rename from PaddleCV/PaddleVideo/config.py rename to PaddleCV/PaddleVideo/utils/config_utils.py index 80a26dc2..1acb9d28 100644 --- a/PaddleCV/PaddleVideo/config.py +++ b/PaddleCV/PaddleVideo/utils/config_utils.py @@ -12,13 +12,8 @@ #See the License for the specific language governing permissions and #limitations under the License. -try: - from configparser import ConfigParser -except: - from ConfigParser import ConfigParser - -from utils import AttrDict - +import yaml +from .utility import AttrDict import logging logger = logging.getLogger(__name__) @@ -31,20 +26,29 @@ CONFIG_SECS = [ def parse_config(cfg_file): - parser = ConfigParser() - cfg = AttrDict() - parser.read(cfg_file) - for sec in parser.sections(): - sec_dict = AttrDict() - for k, v in parser.items(sec): + """Load a config file into AttrDict""" + import yaml + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.Loader)) + create_attr_dict(yaml_config) + return yaml_config + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): try: - v = eval(v) - except: + value = literal_eval(value) + except BaseException: pass - setattr(sec_dict, k, v) - setattr(cfg, sec.upper(), sec_dict) - - return cfg + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + return def merge_configs(cfg, sec, args_dict): diff --git a/PaddleCV/PaddleVideo/utils/train_utils.py b/PaddleCV/PaddleVideo/utils/train_utils.py new file mode 100644 index 00000000..abec586f --- /dev/null +++ b/PaddleCV/PaddleVideo/utils/train_utils.py @@ -0,0 +1,176 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import time +import numpy as np +import paddle +import paddle.fluid as fluid +import logging +import shutil + +logger = logging.getLogger(__name__) + + +def log_lr_and_step(): + try: + # In optimizers, if learning_rate is set as constant, lr_var + # name is 'learning_rate_0', and iteration counter is not + # recorded. If learning_rate is set as decayed values from + # learning_rate_scheduler, lr_var name is 'learning_rate', + # and iteration counter is recorded with name '@LR_DECAY_COUNTER@', + # better impliment is required here + lr_var = fluid.global_scope().find_var("learning_rate") + if not lr_var: + lr_var = fluid.global_scope().find_var("learning_rate_0") + lr = np.array(lr_var.get_tensor()) + + lr_count = '[-]' + lr_count_var = fluid.global_scope().find_var("@LR_DECAY_COUNTER@") + if lr_count_var: + lr_count = np.array(lr_count_var.get_tensor()) + logger.info("------- learning rate {}, learning rate counter {} -----" + .format(np.array(lr), np.array(lr_count))) + except: + logger.warn("Unable to get learning_rate and LR_DECAY_COUNTER.") + + +def test_with_pyreader(exe, + compiled_test_prog, + test_pyreader, + test_fetch_list, + test_metrics, + log_interval=0, + save_model_name=''): + if not test_pyreader: + logger.error("[TEST] get pyreader failed.") + test_metrics.reset() + test_iter = 0 + + for data in test_pyreader(): + test_outs = exe.run(compiled_test_prog, + fetch_list=test_fetch_list, + feed=data) + test_metrics.accumulate(test_outs) + if log_interval > 0 and test_iter % log_interval == 0: + test_metrics.calculate_and_log_out(test_outs, \ + info = '[TEST] test_iter {} '.format(test_iter)) + test_iter += 1 + test_metrics.finalize_and_log_out("[TEST] Finish") + + +def train_with_pyreader(exe, train_prog, compiled_train_prog, train_pyreader, \ + train_fetch_list, train_metrics, epochs = 10, \ + log_interval = 0, valid_interval = 0, save_dir = './', \ + save_model_name = 'model', fix_random_seed = False, \ + compiled_test_prog = None, test_pyreader = None, \ + test_fetch_list = None, test_metrics = None): + if not train_pyreader: + logger.error("[TRAIN] get pyreader failed.") + epoch_periods = [] + train_loss = 0 + for epoch in range(epochs): + log_lr_and_step() + + train_iter = 0 + epoch_periods = [] + + for data in train_pyreader(): + cur_time = time.time() + train_outs = exe.run(compiled_train_prog, + fetch_list=train_fetch_list, + feed=data) + period = time.time() - cur_time + epoch_periods.append(period) + if log_interval > 0 and (train_iter % log_interval == 0): + train_metrics.calculate_and_log_out(train_outs, \ + info = '[TRAIN] Epoch {}, iter {} '.format(epoch, train_iter)) + train_iter += 1 + + if len(epoch_periods) < 1: + logger.info( + 'No iteration was executed, please check the data reader') + sys.exit(1) + + logger.info('[TRAIN] Epoch {} training finished, average time: {}'. + format(epoch, np.mean(epoch_periods[1:]))) + save_model( + exe, + train_prog, + save_dir, + save_model_name, + "_epoch{}".format(epoch), + save_type='.pdckpt') + save_model( + exe, + train_prog, + save_dir, + save_model_name, + "_epoch{}".format(epoch), + save_type='.pdparams') + if compiled_test_prog and valid_interval > 0 and ( + epoch + 1) % valid_interval == 0: + test_with_pyreader(exe, compiled_test_prog, test_pyreader, + test_fetch_list, test_metrics, log_interval, + save_model_name) + + save_model( + exe, + train_prog, + save_dir, + save_model_name, + '_final', + save_type='.pdckpt') + save_model( + exe, + train_prog, + save_dir, + save_model_name, + '_final', + save_type='.pdparams') + #when fix_random seed for debug + if fix_random_seed: + cards = os.environ.get('CUDA_VISIBLE_DEVICES') + gpu_num = len(cards.split(",")) + print("kpis\ttrain_cost_card{}\t{}".format(gpu_num, train_loss)) + print("kpis\ttrain_speed_card{}\t{}".format(gpu_num, + np.mean(epoch_periods))) + + +def save_model(exe, + program, + save_dir, + model_name, + postfix=None, + save_type='.pdckpt'): + """ + save_type: '.pdckpt' or '.pdparams', '.pdckpt' for all persistable variables, + '.pdparams' for parameters only + """ + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + saved_model_name = model_name + postfix + save_type + + if save_type == '.pdckpt': + fluid.io.save_persistables( + exe, save_dir, main_program=program, filename=saved_model_name) + elif save_type == '.pdparams': + fluid.io.save_params( + exe, save_dir, main_program=program, filename=saved_model_name) + else: + raise NotImplementedError( + 'save_type {} not implemented, it should be either {} or {}' + .format(save_type, '.pdckpt', '.pdparams')) + return diff --git a/PaddleCV/PaddleVideo/utils.py b/PaddleCV/PaddleVideo/utils/utility.py similarity index 100% rename from PaddleCV/PaddleVideo/utils.py rename to PaddleCV/PaddleVideo/utils/utility.py -- GitLab