未验证 提交 1bff563e 编写于 作者: H huangjun12 提交者: GitHub

Api update of video model (#4591)

* API update of video model
* update api and refine bmn dygraph implementation
* refine Readme and config of bmn dygraph
* fix reademe and reader
* fix details
上级 a8a58316
...@@ -25,6 +25,13 @@ ...@@ -25,6 +25,13 @@
- 提供了适合视频分类和动作定位任务的通用骨架代码,用户可一键式高效配置模型完成训练和评测。 - 提供了适合视频分类和动作定位任务的通用骨架代码,用户可一键式高效配置模型完成训练和评测。
### 推荐用法
- 视频分类共开源7个模型,可分为:端到端模型、序列模型。端到端模型:TSN推荐在时序不敏感视频场景(比如互联网视频场景)使用;TSM、StNet推荐在时序敏感视频场景(比如Kinetics数据集)使用;Non-local模型计算量较大,在科研场景推荐。序列模型:Attention LSTM,Attention Cluster和NeXtVLAD 整体性能接近,但是网络结构不同,推荐集成多个模型使用。
- 视频动作定位共开源3个模型,视频动作定位推荐使用CTCN模型,时序提名生成推荐使用BMN模型。
## 安装 ## 安装
在当前模型库运行样例代码需要PaddlePaddle Fluid v.1.6.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.6/beginners_guide/install/index_cn.html)中的说明来更新PaddlePaddle。 在当前模型库运行样例代码需要PaddlePaddle Fluid v.1.6.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.6/beginners_guide/install/index_cn.html)中的说明来更新PaddlePaddle。
......
...@@ -37,7 +37,7 @@ def calculate_hit_at_one(predictions, actuals): ...@@ -37,7 +37,7 @@ def calculate_hit_at_one(predictions, actuals):
float: The average hit at one across the entire batch. float: The average hit at one across the entire batch.
""" """
top_prediction = numpy.argmax(predictions, 1) top_prediction = numpy.argmax(predictions, 1)
hits = actuals[numpy.arange(actuals.shape[0]), top_prediction] hits = actuals[:, top_prediction]
return numpy.average(hits) return numpy.average(hits)
......
...@@ -32,8 +32,7 @@ class ShiftingAttentionModel(object): ...@@ -32,8 +32,7 @@ class ShiftingAttentionModel(object):
x_shape.stop_gradient = True x_shape.stop_gradient = True
flat_x = fluid.layers.reshape(x, shape=(-1, self.seg_num)) flat_x = fluid.layers.reshape(x, shape=(-1, self.seg_num))
flat_softmax = fluid.layers.softmax(flat_x) flat_softmax = fluid.layers.softmax(flat_x)
return fluid.layers.reshape( return fluid.layers.reshape(flat_softmax, shape=x_shape)
flat_softmax, shape=x.shape, actual_shape=x_shape)
def glorot(self, n): def glorot(self, n):
return np.sqrt(1.0 / np.sqrt(n)) return np.sqrt(1.0 / np.sqrt(n))
......
...@@ -21,6 +21,7 @@ import json ...@@ -21,6 +21,7 @@ import json
import logging import logging
import functools import functools
import paddle import paddle
import paddle.fluid as fluid
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -228,8 +229,8 @@ class BMNReader(DataReader): ...@@ -228,8 +229,8 @@ class BMNReader(DataReader):
mapper = functools.partial(process_data, mode=self.mode) mapper = functools.partial(process_data, mode=self.mode)
def batch_reader(): def batch_reader():
xreader = paddle.reader.xmap_readers(mapper, reader, xreader = fluid.io.xmap_readers(mapper, reader, self.num_threads,
self.num_threads, 1024) 1024)
batch = [] batch = []
for item in xreader(): for item in xreader():
batch.append(item) batch.append(item)
......
...@@ -22,6 +22,7 @@ import json ...@@ -22,6 +22,7 @@ import json
import logging import logging
import functools import functools
import paddle import paddle
import paddle.fluid as fluid
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from .reader_utils import DataReader from .reader_utils import DataReader
...@@ -214,8 +215,8 @@ class BSNVideoReader(DataReader): ...@@ -214,8 +215,8 @@ class BSNVideoReader(DataReader):
mapper = functools.partial(process_data, mode=self.mode) mapper = functools.partial(process_data, mode=self.mode)
def batch_reader(): def batch_reader():
xreader = paddle.reader.xmap_readers(mapper, reader, xreader = fluid.io.xmap_readers(mapper, reader, self.num_threads,
self.num_threads, 1024) 1024)
batch = [] batch = []
for item in xreader(): for item in xreader():
batch.append(item) batch.append(item)
...@@ -444,8 +445,8 @@ class BSNProposalReader(DataReader): ...@@ -444,8 +445,8 @@ class BSNProposalReader(DataReader):
mapper = functools.partial(process_data, mode=self.mode) mapper = functools.partial(process_data, mode=self.mode)
def batch_reader(): def batch_reader():
xreader = paddle.reader.xmap_readers(mapper, reader, xreader = fluid.io.xmap_readers(mapper, reader, self.num_threads,
self.num_threads, 1024) 1024)
batch = [] batch = []
for item in xreader(): for item in xreader():
batch.append(item) batch.append(item)
......
...@@ -18,6 +18,7 @@ import sys ...@@ -18,6 +18,7 @@ import sys
import numpy as np import numpy as np
import functools import functools
import paddle import paddle
import paddle.fluid as fluid
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -154,7 +155,7 @@ class ETSReader(DataReader): ...@@ -154,7 +155,7 @@ class ETSReader(DataReader):
mapper = functools.partial(process_data) mapper = functools.partial(process_data)
return paddle.reader.xmap_readers(mapper, reader, self.num_threads, return fluid.io.xmap_readers(mapper, reader, self.num_threads,
self.buffer_size) self.buffer_size)
def batch_reader(): def batch_reader():
......
...@@ -26,7 +26,7 @@ except ImportError: ...@@ -26,7 +26,7 @@ except ImportError:
from io import BytesIO from io import BytesIO
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid
try: try:
from nvidia.dali.pipeline import Pipeline from nvidia.dali.pipeline import Pipeline
import nvidia.dali.ops as ops import nvidia.dali.ops as ops
...@@ -34,6 +34,7 @@ try: ...@@ -34,6 +34,7 @@ try:
import tempfile import tempfile
from nvidia.dali.plugin.paddle import DALIGenericIterator from nvidia.dali.plugin.paddle import DALIGenericIterator
except: except:
Pipeline = object
print("DALI is not installed, you can improve performance if use DALI") print("DALI is not installed, you can improve performance if use DALI")
from PIL import Image, ImageEnhance from PIL import Image, ImageEnhance
...@@ -272,8 +273,7 @@ class KineticsReader(DataReader): ...@@ -272,8 +273,7 @@ class KineticsReader(DataReader):
img_mean=img_mean, img_mean=img_mean,
img_std=img_std) img_std=img_std)
return paddle.reader.xmap_readers(mapper, reader_, num_threads, return fluid.io.xmap_readers(mapper, reader, num_threads, buf_size)
buf_size)
def build_dali_reader(self): def build_dali_reader(self):
""" """
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
BMN模型是百度自研,2019年ActivityNet夺冠方案,为视频动作定位问题中proposal的生成提供高效的解决方案,在PaddlePaddle上首次开源。此模型引入边界匹配(Boundary-Matching, BM)机制来评估proposal的置信度,按照proposal开始边界的位置及其长度将所有可能存在的proposal组合成一个二维的BM置信度图,图中每个点的数值代表其所对应的proposal的置信度分数。网络由三个模块组成,基础模块作为主干网络处理输入的特征序列,TEM模块预测每一个时序位置属于动作开始、动作结束的概率,PEM模块生成BM置信度图。 BMN模型是百度自研,2019年ActivityNet夺冠方案,为视频动作定位问题中proposal的生成提供高效的解决方案,在PaddlePaddle上首次开源。此模型引入边界匹配(Boundary-Matching, BM)机制来评估proposal的置信度,按照proposal开始边界的位置及其长度将所有可能存在的proposal组合成一个二维的BM置信度图,图中每个点的数值代表其所对应的proposal的置信度分数。网络由三个模块组成,基础模块作为主干网络处理输入的特征序列,TEM模块预测每一个时序位置属于动作开始、动作结束的概率,PEM模块生成BM置信度图。
<p align="center"> <p align="center">
<img src="../../PaddleCV/PaddleVideo/images/BMN.png" height=300 width=500 hspace='10'/> <br /> <img src="./BMN.png" height=300 width=500 hspace='10'/> <br />
BMN Overview BMN Overview
</p> </p>
...@@ -44,7 +44,7 @@ BMN模型的静态图实现请参考[PaddleVideo](../../PaddleCV/PaddleVideo) ...@@ -44,7 +44,7 @@ BMN模型的静态图实现请参考[PaddleVideo](../../PaddleCV/PaddleVideo)
## 数据准备 ## 数据准备
BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理好的视频特征,请下载[bmn\_feat](https://paddlemodels.bj.bcebos.com/video_detection/bmn_feat.tar.gz)数据后解压,同时相应的修改bmn.yaml中的特征路径feat\_path。 BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理好的视频特征和对应的标签文件,请下载特征数据[bmn\_feat](https://paddlemodels.bj.bcebos.com/video_detection/bmn_feat.tar.gz)和标签数据[label](https://paddlemodels.bj.bcebos.com/video_detection/activitynet_1.3_annotations.json),并相应地修改配置文件bmn.yaml中的特征文件路径feat\_path和标签文件路径anno\_file
## 模型训练 ## 模型训练
...@@ -55,7 +55,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 ...@@ -55,7 +55,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
bash run.sh bash run.sh
若使用单卡训练,启动方式如下: 若使用单卡训练,请将配置文件bmn.yaml中`TRAIN``VALID`对应的batch\_size调整为16,启动方式如下:
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
python train.py python train.py
......
...@@ -6,13 +6,13 @@ MODEL: ...@@ -6,13 +6,13 @@ MODEL:
prop_boundary_ratio: 0.5 prop_boundary_ratio: 0.5
num_sample: 32 num_sample: 32
num_sample_perbin: 3 num_sample_perbin: 3
anno_file: "../../PaddleCV/video/data/dataset/bmn/activitynet_1.3_annotations.json" anno_file: "./activitynet_1.3_annotations.json"
feat_path: './fix_feat_100' feat_path: "./fix_feat_100"
TRAIN: TRAIN:
subset: "train" subset: "train"
epoch: 9 epoch: 9
batch_size: 16 batch_size: 4
num_threads: 8 num_threads: 8
use_gpu: True use_gpu: True
num_gpus: 4 num_gpus: 4
...@@ -23,7 +23,7 @@ TRAIN: ...@@ -23,7 +23,7 @@ TRAIN:
VALID: VALID:
subset: "validation" subset: "validation"
batch_size: 16 batch_size: 4
num_threads: 8 num_threads: 8
use_gpu: True use_gpu: True
num_gpus: 4 num_gpus: 4
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册