Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
9bbb9542
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9bbb9542
编写于
6月 03, 2020
作者:
A
Aurelius84
提交者:
GitHub
6月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Dy2stat]Add BMN model for unittest (#24839)
* add test_bmn_model test=develop * remove random test=develop
上级
40a5f3fd
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
739 addition
and
0 deletion
+739
-0
python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
...addle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+739
-0
未找到文件。
python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
0 → 100644
浏览文件 @
9bbb9542
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
numpy
as
np
import
unittest
import
paddle.fluid
as
fluid
from
paddle.fluid
import
ParamAttr
from
paddle.fluid.dygraph
import
to_variable
from
paddle.fluid.dygraph
import
declarative
,
ProgramTranslator
SEED
=
2020
DATATYPE
=
'float32'
program_translator
=
ProgramTranslator
()
# Note: Set True to eliminate randomness.
# 1. For one operation, cuDNN has several algorithms,
# some algorithm results are non-deterministic, like convolution algorithms.
if
fluid
.
is_compiled_with_cuda
():
fluid
.
set_flags
({
'FLAGS_cudnn_deterministic'
:
True
})
def
get_interp1d_mask
(
tscale
,
dscale
,
prop_boundary_ratio
,
num_sample
,
num_sample_perbin
):
""" generate sample mask for each point in Boundary-Matching Map """
mask_mat
=
[]
for
start_index
in
range
(
tscale
):
mask_mat_vector
=
[]
for
duration_index
in
range
(
dscale
):
if
start_index
+
duration_index
<
tscale
:
p_xmin
=
start_index
p_xmax
=
start_index
+
duration_index
center_len
=
float
(
p_xmax
-
p_xmin
)
+
1
sample_xmin
=
p_xmin
-
center_len
*
prop_boundary_ratio
sample_xmax
=
p_xmax
+
center_len
*
prop_boundary_ratio
p_mask
=
_get_interp1d_bin_mask
(
sample_xmin
,
sample_xmax
,
tscale
,
num_sample
,
num_sample_perbin
)
else
:
p_mask
=
np
.
zeros
([
tscale
,
num_sample
])
mask_mat_vector
.
append
(
p_mask
)
mask_mat_vector
=
np
.
stack
(
mask_mat_vector
,
axis
=
2
)
mask_mat
.
append
(
mask_mat_vector
)
mask_mat
=
np
.
stack
(
mask_mat
,
axis
=
3
)
mask_mat
=
mask_mat
.
astype
(
np
.
float32
)
sample_mask
=
np
.
reshape
(
mask_mat
,
[
tscale
,
-
1
])
return
sample_mask
def
_get_interp1d_bin_mask
(
seg_xmin
,
seg_xmax
,
tscale
,
num_sample
,
num_sample_perbin
):
""" generate sample mask for a boundary-matching pair """
plen
=
float
(
seg_xmax
-
seg_xmin
)
plen_sample
=
plen
/
(
num_sample
*
num_sample_perbin
-
1.0
)
total_samples
=
[
seg_xmin
+
plen_sample
*
ii
for
ii
in
range
(
num_sample
*
num_sample_perbin
)
]
p_mask
=
[]
for
idx
in
range
(
num_sample
):
bin_samples
=
total_samples
[
idx
*
num_sample_perbin
:(
idx
+
1
)
*
num_sample_perbin
]
bin_vector
=
np
.
zeros
([
tscale
])
for
sample
in
bin_samples
:
sample_upper
=
math
.
ceil
(
sample
)
sample_decimal
,
sample_down
=
math
.
modf
(
sample
)
if
int
(
sample_down
)
<=
(
tscale
-
1
)
and
int
(
sample_down
)
>=
0
:
bin_vector
[
int
(
sample_down
)]
+=
1
-
sample_decimal
if
int
(
sample_upper
)
<=
(
tscale
-
1
)
and
int
(
sample_upper
)
>=
0
:
bin_vector
[
int
(
sample_upper
)]
+=
sample_decimal
bin_vector
=
1.0
/
num_sample_perbin
*
bin_vector
p_mask
.
append
(
bin_vector
)
p_mask
=
np
.
stack
(
p_mask
,
axis
=
1
)
return
p_mask
class
Conv1D
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
prefix
,
num_channels
=
256
,
num_filters
=
256
,
size_k
=
3
,
padding
=
1
,
groups
=
1
,
act
=
"relu"
):
super
(
Conv1D
,
self
).
__init__
()
fan_in
=
num_channels
*
size_k
*
1
k
=
1.
/
math
.
sqrt
(
fan_in
)
param_attr
=
ParamAttr
(
name
=
prefix
+
"_w"
,
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
))
bias_attr
=
ParamAttr
(
name
=
prefix
+
"_b"
,
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
))
self
.
_conv2d
=
fluid
.
dygraph
.
Conv2D
(
num_channels
=
num_channels
,
num_filters
=
num_filters
,
filter_size
=
(
1
,
size_k
),
stride
=
1
,
padding
=
(
0
,
padding
),
groups
=
groups
,
act
=
act
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
)
def
forward
(
self
,
x
):
x
=
fluid
.
layers
.
unsqueeze
(
input
=
x
,
axes
=
[
2
])
x
=
self
.
_conv2d
(
x
)
x
=
fluid
.
layers
.
squeeze
(
input
=
x
,
axes
=
[
2
])
return
x
class
BMN
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
cfg
):
super
(
BMN
,
self
).
__init__
()
self
.
tscale
=
cfg
.
tscale
self
.
dscale
=
cfg
.
dscale
self
.
prop_boundary_ratio
=
cfg
.
prop_boundary_ratio
self
.
num_sample
=
cfg
.
num_sample
self
.
num_sample_perbin
=
cfg
.
num_sample_perbin
self
.
hidden_dim_1d
=
256
self
.
hidden_dim_2d
=
128
self
.
hidden_dim_3d
=
512
# Base Module
self
.
b_conv1
=
Conv1D
(
prefix
=
"Base_1"
,
num_channels
=
cfg
.
feat_dim
,
num_filters
=
self
.
hidden_dim_1d
,
size_k
=
3
,
padding
=
1
,
groups
=
4
,
act
=
"relu"
)
self
.
b_conv2
=
Conv1D
(
prefix
=
"Base_2"
,
num_filters
=
self
.
hidden_dim_1d
,
size_k
=
3
,
padding
=
1
,
groups
=
4
,
act
=
"relu"
)
# Temporal Evaluation Module
self
.
ts_conv1
=
Conv1D
(
prefix
=
"TEM_s1"
,
num_filters
=
self
.
hidden_dim_1d
,
size_k
=
3
,
padding
=
1
,
groups
=
4
,
act
=
"relu"
)
self
.
ts_conv2
=
Conv1D
(
prefix
=
"TEM_s2"
,
num_filters
=
1
,
size_k
=
1
,
padding
=
0
,
act
=
"sigmoid"
)
self
.
te_conv1
=
Conv1D
(
prefix
=
"TEM_e1"
,
num_filters
=
self
.
hidden_dim_1d
,
size_k
=
3
,
padding
=
1
,
groups
=
4
,
act
=
"relu"
)
self
.
te_conv2
=
Conv1D
(
prefix
=
"TEM_e2"
,
num_filters
=
1
,
size_k
=
1
,
padding
=
0
,
act
=
"sigmoid"
)
#Proposal Evaluation Module
self
.
p_conv1
=
Conv1D
(
prefix
=
"PEM_1d"
,
num_filters
=
self
.
hidden_dim_2d
,
size_k
=
3
,
padding
=
1
,
act
=
"relu"
)
# init to speed up
self
.
sample_mask
=
get_interp1d_mask
(
self
.
tscale
,
self
.
dscale
,
self
.
prop_boundary_ratio
,
self
.
num_sample
,
self
.
num_sample_perbin
)
# self.sample_mask = fluid.dygraph.base.to_variable(sample_mask)
# self.sample_mask.stop_gradient = True
self
.
p_conv3d1
=
fluid
.
dygraph
.
Conv3D
(
num_channels
=
128
,
num_filters
=
self
.
hidden_dim_3d
,
filter_size
=
(
self
.
num_sample
,
1
,
1
),
stride
=
(
self
.
num_sample
,
1
,
1
),
padding
=
0
,
act
=
"relu"
,
param_attr
=
ParamAttr
(
name
=
"PEM_3d1_w"
),
bias_attr
=
ParamAttr
(
name
=
"PEM_3d1_b"
))
self
.
p_conv2d1
=
fluid
.
dygraph
.
Conv2D
(
num_channels
=
512
,
num_filters
=
self
.
hidden_dim_2d
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
act
=
"relu"
,
param_attr
=
ParamAttr
(
name
=
"PEM_2d1_w"
),
bias_attr
=
ParamAttr
(
name
=
"PEM_2d1_b"
))
self
.
p_conv2d2
=
fluid
.
dygraph
.
Conv2D
(
num_channels
=
128
,
num_filters
=
self
.
hidden_dim_2d
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
act
=
"relu"
,
param_attr
=
ParamAttr
(
name
=
"PEM_2d2_w"
),
bias_attr
=
ParamAttr
(
name
=
"PEM_2d2_b"
))
self
.
p_conv2d3
=
fluid
.
dygraph
.
Conv2D
(
num_channels
=
128
,
num_filters
=
self
.
hidden_dim_2d
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
act
=
"relu"
,
param_attr
=
ParamAttr
(
name
=
"PEM_2d3_w"
),
bias_attr
=
ParamAttr
(
name
=
"PEM_2d3_b"
))
self
.
p_conv2d4
=
fluid
.
dygraph
.
Conv2D
(
num_channels
=
128
,
num_filters
=
2
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
act
=
"sigmoid"
,
param_attr
=
ParamAttr
(
name
=
"PEM_2d4_w"
),
bias_attr
=
ParamAttr
(
name
=
"PEM_2d4_b"
))
@
declarative
def
forward
(
self
,
x
):
# TODO(Aurelius84): sample_mask is created in `__init__`,
# but currently we don't support that. The two lines code
# will be removed when support creating var outside of forward.
sample_mask
=
to_variable
(
self
.
sample_mask
)
sample_mask
.
stop_gradient
=
True
# Base Module
x
=
self
.
b_conv1
(
x
)
x
=
self
.
b_conv2
(
x
)
# TEM
xs
=
self
.
ts_conv1
(
x
)
xs
=
self
.
ts_conv2
(
xs
)
xs
=
fluid
.
layers
.
squeeze
(
xs
,
axes
=
[
1
])
xe
=
self
.
te_conv1
(
x
)
xe
=
self
.
te_conv2
(
xe
)
xe
=
fluid
.
layers
.
squeeze
(
xe
,
axes
=
[
1
])
# PEM
xp
=
self
.
p_conv1
(
x
)
# BM layer
xp
=
fluid
.
layers
.
matmul
(
xp
,
sample_mask
)
xp
=
fluid
.
layers
.
reshape
(
xp
,
shape
=
[
0
,
0
,
-
1
,
self
.
dscale
,
self
.
tscale
])
xp
=
self
.
p_conv3d1
(
xp
)
xp
=
fluid
.
layers
.
squeeze
(
xp
,
axes
=
[
2
])
xp
=
self
.
p_conv2d1
(
xp
)
xp
=
self
.
p_conv2d2
(
xp
)
xp
=
self
.
p_conv2d3
(
xp
)
xp
=
self
.
p_conv2d4
(
xp
)
return
xp
,
xs
,
xe
def
bmn_loss_func
(
pred_bm
,
pred_start
,
pred_end
,
gt_iou_map
,
gt_start
,
gt_end
,
cfg
):
def
_get_mask
(
cfg
):
dscale
=
cfg
.
dscale
tscale
=
cfg
.
tscale
bm_mask
=
[]
for
idx
in
range
(
dscale
):
mask_vector
=
[
1
for
i
in
range
(
tscale
-
idx
)
]
+
[
0
for
i
in
range
(
idx
)]
bm_mask
.
append
(
mask_vector
)
bm_mask
=
np
.
array
(
bm_mask
,
dtype
=
np
.
float32
)
self_bm_mask
=
fluid
.
layers
.
create_global_var
(
shape
=
[
dscale
,
tscale
],
value
=
0
,
dtype
=
DATATYPE
,
persistable
=
True
)
fluid
.
layers
.
assign
(
bm_mask
,
self_bm_mask
)
self_bm_mask
.
stop_gradient
=
True
return
self_bm_mask
def
tem_loss_func
(
pred_start
,
pred_end
,
gt_start
,
gt_end
):
def
bi_loss
(
pred_score
,
gt_label
):
pred_score
=
fluid
.
layers
.
reshape
(
x
=
pred_score
,
shape
=
[
-
1
],
inplace
=
False
)
gt_label
=
fluid
.
layers
.
reshape
(
x
=
gt_label
,
shape
=
[
-
1
],
inplace
=
False
)
gt_label
.
stop_gradient
=
True
pmask
=
fluid
.
layers
.
cast
(
x
=
(
gt_label
>
0.5
),
dtype
=
DATATYPE
)
num_entries
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
shape
(
pmask
),
dtype
=
DATATYPE
)
num_positive
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
reduce_sum
(
pmask
),
dtype
=
DATATYPE
)
ratio
=
num_entries
/
num_positive
coef_0
=
0.5
*
ratio
/
(
ratio
-
1
)
coef_1
=
0.5
*
ratio
epsilon
=
0.000001
# temp = fluid.layers.log(pred_score + epsilon)
loss_pos
=
fluid
.
layers
.
elementwise_mul
(
fluid
.
layers
.
log
(
pred_score
+
epsilon
),
pmask
)
loss_pos
=
coef_1
*
fluid
.
layers
.
reduce_mean
(
loss_pos
)
loss_neg
=
fluid
.
layers
.
elementwise_mul
(
fluid
.
layers
.
log
(
1.0
-
pred_score
+
epsilon
),
(
1.0
-
pmask
))
loss_neg
=
coef_0
*
fluid
.
layers
.
reduce_mean
(
loss_neg
)
loss
=
-
1
*
(
loss_pos
+
loss_neg
)
return
loss
loss_start
=
bi_loss
(
pred_start
,
gt_start
)
loss_end
=
bi_loss
(
pred_end
,
gt_end
)
loss
=
loss_start
+
loss_end
return
loss
def
pem_reg_loss_func
(
pred_score
,
gt_iou_map
,
mask
):
gt_iou_map
=
fluid
.
layers
.
elementwise_mul
(
gt_iou_map
,
mask
)
u_hmask
=
fluid
.
layers
.
cast
(
x
=
gt_iou_map
>
0.7
,
dtype
=
DATATYPE
)
u_mmask
=
fluid
.
layers
.
logical_and
(
gt_iou_map
<=
0.7
,
gt_iou_map
>
0.3
)
u_mmask
=
fluid
.
layers
.
cast
(
x
=
u_mmask
,
dtype
=
DATATYPE
)
u_lmask
=
fluid
.
layers
.
logical_and
(
gt_iou_map
<=
0.3
,
gt_iou_map
>=
0.
)
u_lmask
=
fluid
.
layers
.
cast
(
x
=
u_lmask
,
dtype
=
DATATYPE
)
u_lmask
=
fluid
.
layers
.
elementwise_mul
(
u_lmask
,
mask
)
num_h
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
reduce_sum
(
u_hmask
),
dtype
=
DATATYPE
)
num_m
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
reduce_sum
(
u_mmask
),
dtype
=
DATATYPE
)
num_l
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
reduce_sum
(
u_lmask
),
dtype
=
DATATYPE
)
r_m
=
num_h
/
num_m
u_smmask
=
fluid
.
layers
.
assign
(
local_random
.
uniform
(
0.
,
1.
,
[
gt_iou_map
.
shape
[
1
],
gt_iou_map
.
shape
[
2
]
]).
astype
(
DATATYPE
))
u_smmask
=
fluid
.
layers
.
elementwise_mul
(
u_mmask
,
u_smmask
)
u_smmask
=
fluid
.
layers
.
cast
(
x
=
(
u_smmask
>
(
1.
-
r_m
)),
dtype
=
DATATYPE
)
r_l
=
num_h
/
num_l
u_slmask
=
fluid
.
layers
.
assign
(
local_random
.
uniform
(
0.
,
1.
,
[
gt_iou_map
.
shape
[
1
],
gt_iou_map
.
shape
[
2
]
]).
astype
(
DATATYPE
))
u_slmask
=
fluid
.
layers
.
elementwise_mul
(
u_lmask
,
u_slmask
)
u_slmask
=
fluid
.
layers
.
cast
(
x
=
(
u_slmask
>
(
1.
-
r_l
)),
dtype
=
DATATYPE
)
weights
=
u_hmask
+
u_smmask
+
u_slmask
weights
.
stop_gradient
=
True
loss
=
fluid
.
layers
.
square_error_cost
(
pred_score
,
gt_iou_map
)
loss
=
fluid
.
layers
.
elementwise_mul
(
loss
,
weights
)
loss
=
0.5
*
fluid
.
layers
.
reduce_sum
(
loss
)
/
fluid
.
layers
.
reduce_sum
(
weights
)
return
loss
def
pem_cls_loss_func
(
pred_score
,
gt_iou_map
,
mask
):
gt_iou_map
=
fluid
.
layers
.
elementwise_mul
(
gt_iou_map
,
mask
)
gt_iou_map
.
stop_gradient
=
True
pmask
=
fluid
.
layers
.
cast
(
x
=
(
gt_iou_map
>
0.9
),
dtype
=
DATATYPE
)
nmask
=
fluid
.
layers
.
cast
(
x
=
(
gt_iou_map
<=
0.9
),
dtype
=
DATATYPE
)
nmask
=
fluid
.
layers
.
elementwise_mul
(
nmask
,
mask
)
num_positive
=
fluid
.
layers
.
reduce_sum
(
pmask
)
num_entries
=
num_positive
+
fluid
.
layers
.
reduce_sum
(
nmask
)
ratio
=
num_entries
/
num_positive
coef_0
=
0.5
*
ratio
/
(
ratio
-
1
)
coef_1
=
0.5
*
ratio
epsilon
=
0.000001
loss_pos
=
fluid
.
layers
.
elementwise_mul
(
fluid
.
layers
.
log
(
pred_score
+
epsilon
),
pmask
)
loss_pos
=
coef_1
*
fluid
.
layers
.
reduce_sum
(
loss_pos
)
loss_neg
=
fluid
.
layers
.
elementwise_mul
(
fluid
.
layers
.
log
(
1.0
-
pred_score
+
epsilon
),
nmask
)
loss_neg
=
coef_0
*
fluid
.
layers
.
reduce_sum
(
loss_neg
)
loss
=
-
1
*
(
loss_pos
+
loss_neg
)
/
num_entries
return
loss
pred_bm_reg
=
fluid
.
layers
.
squeeze
(
fluid
.
layers
.
slice
(
pred_bm
,
axes
=
[
1
],
starts
=
[
0
],
ends
=
[
1
]),
axes
=
[
1
])
pred_bm_cls
=
fluid
.
layers
.
squeeze
(
fluid
.
layers
.
slice
(
pred_bm
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
]),
axes
=
[
1
])
bm_mask
=
_get_mask
(
cfg
)
pem_reg_loss
=
pem_reg_loss_func
(
pred_bm_reg
,
gt_iou_map
,
bm_mask
)
pem_cls_loss
=
pem_cls_loss_func
(
pred_bm_cls
,
gt_iou_map
,
bm_mask
)
tem_loss
=
tem_loss_func
(
pred_start
,
pred_end
,
gt_start
,
gt_end
)
loss
=
tem_loss
+
10
*
pem_reg_loss
+
pem_cls_loss
return
loss
,
tem_loss
,
pem_reg_loss
,
pem_cls_loss
class
Args
(
object
):
epoch
=
1
batch_size
=
4
learning_rate
=
0.1
learning_rate_decay
=
0.1
lr_decay_iter
=
4200
l2_weight_decay
=
1e-4
valid_interval
=
20
log_interval
=
5
train_batch_num
=
valid_interval
valid_batch_num
=
5
tscale
=
50
dscale
=
50
feat_dim
=
100
prop_boundary_ratio
=
0.5
num_sample
=
2
num_sample_perbin
=
2
infer_dir
=
'./bmn_infer_model'
dy_param_path
=
'./bmn_dy_param'
def
optimizer
(
cfg
,
parameter_list
):
bd
=
[
cfg
.
lr_decay_iter
]
base_lr
=
cfg
.
learning_rate
lr_decay
=
cfg
.
learning_rate_decay
l2_weight_decay
=
cfg
.
l2_weight_decay
lr
=
[
base_lr
,
base_lr
*
lr_decay
]
optimizer
=
fluid
.
optimizer
.
Adam
(
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
),
parameter_list
=
parameter_list
,
regularization
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
l2_weight_decay
))
return
optimizer
def
fake_data_reader
(
args
,
mode
=
'train'
):
def
iou_with_anchors
(
anchors_min
,
anchors_max
,
box_min
,
box_max
):
"""Compute jaccard score between a box and the anchors.
"""
len_anchors
=
anchors_max
-
anchors_min
int_xmin
=
np
.
maximum
(
anchors_min
,
box_min
)
int_xmax
=
np
.
minimum
(
anchors_max
,
box_max
)
inter_len
=
np
.
maximum
(
int_xmax
-
int_xmin
,
0.
)
union_len
=
len_anchors
-
inter_len
+
box_max
-
box_min
jaccard
=
np
.
divide
(
inter_len
,
union_len
)
return
jaccard
def
ioa_with_anchors
(
anchors_min
,
anchors_max
,
box_min
,
box_max
):
"""Compute intersection between score a box and the anchors.
"""
len_anchors
=
anchors_max
-
anchors_min
int_xmin
=
np
.
maximum
(
anchors_min
,
box_min
)
int_xmax
=
np
.
minimum
(
anchors_max
,
box_max
)
inter_len
=
np
.
maximum
(
int_xmax
-
int_xmin
,
0.
)
scores
=
np
.
divide
(
inter_len
,
len_anchors
)
return
scores
def
get_match_map
(
tscale
):
match_map
=
[]
tgap
=
1.
/
tscale
for
idx
in
range
(
tscale
):
tmp_match_window
=
[]
xmin
=
tgap
*
idx
for
jdx
in
range
(
1
,
tscale
+
1
):
xmax
=
xmin
+
tgap
*
jdx
tmp_match_window
.
append
([
xmin
,
xmax
])
match_map
.
append
(
tmp_match_window
)
match_map
=
np
.
array
(
match_map
)
match_map
=
np
.
transpose
(
match_map
,
[
1
,
0
,
2
])
match_map
=
np
.
reshape
(
match_map
,
[
-
1
,
2
])
match_map
=
match_map
anchor_xmin
=
[
tgap
*
i
for
i
in
range
(
tscale
)]
anchor_xmax
=
[
tgap
*
i
for
i
in
range
(
1
,
tscale
+
1
)]
return
match_map
,
anchor_xmin
,
anchor_xmax
def
get_video_label
(
match_map
,
anchor_xmin
,
anchor_xmax
):
video_second
=
local_random
.
randint
(
75
,
90
)
label_num
=
local_random
.
randint
(
1
,
3
)
gt_bbox
=
[]
gt_iou_map
=
[]
for
idx
in
range
(
label_num
):
duration
=
local_random
.
uniform
(
video_second
*
0.4
,
video_second
*
0.8
)
start_t
=
local_random
.
uniform
(
0.1
*
video_second
,
video_second
-
duration
)
tmp_start
=
max
(
min
(
1
,
start_t
/
video_second
),
0
)
tmp_end
=
max
(
min
(
1
,
(
start_t
+
duration
)
/
video_second
),
0
)
gt_bbox
.
append
([
tmp_start
,
tmp_end
])
tmp_gt_iou_map
=
iou_with_anchors
(
match_map
[:,
0
],
match_map
[:,
1
],
tmp_start
,
tmp_end
)
tmp_gt_iou_map
=
np
.
reshape
(
tmp_gt_iou_map
,
[
args
.
dscale
,
args
.
tscale
])
gt_iou_map
.
append
(
tmp_gt_iou_map
)
gt_iou_map
=
np
.
array
(
gt_iou_map
)
gt_iou_map
=
np
.
max
(
gt_iou_map
,
axis
=
0
)
gt_bbox
=
np
.
array
(
gt_bbox
)
gt_xmins
=
gt_bbox
[:,
0
]
gt_xmaxs
=
gt_bbox
[:,
1
]
gt_len_small
=
3.
/
args
.
tscale
gt_start_bboxs
=
np
.
stack
(
(
gt_xmins
-
gt_len_small
/
2
,
gt_xmins
+
gt_len_small
/
2
),
axis
=
1
)
gt_end_bboxs
=
np
.
stack
(
(
gt_xmaxs
-
gt_len_small
/
2
,
gt_xmaxs
+
gt_len_small
/
2
),
axis
=
1
)
match_score_start
=
[]
for
jdx
in
range
(
len
(
anchor_xmin
)):
match_score_start
.
append
(
np
.
max
(
ioa_with_anchors
(
anchor_xmin
[
jdx
],
anchor_xmax
[
jdx
],
gt_start_bboxs
[:,
0
],
gt_start_bboxs
[:,
1
])))
match_score_end
=
[]
for
jdx
in
range
(
len
(
anchor_xmin
)):
match_score_end
.
append
(
np
.
max
(
ioa_with_anchors
(
anchor_xmin
[
jdx
],
anchor_xmax
[
jdx
],
gt_end_bboxs
[:,
0
],
gt_end_bboxs
[:,
1
])))
gt_start
=
np
.
array
(
match_score_start
)
gt_end
=
np
.
array
(
match_score_end
)
return
gt_iou_map
,
gt_start
,
gt_end
def
reader
():
batch_out
=
[]
iter_num
=
args
.
batch_size
*
100
match_map
,
anchor_xmin
,
anchor_xmax
=
get_match_map
(
args
.
tscale
)
for
video_idx
in
range
(
iter_num
):
video_feat
=
local_random
.
random_sample
(
[
args
.
feat_dim
,
args
.
tscale
]).
astype
(
'float32'
)
gt_iou_map
,
gt_start
,
gt_end
=
get_video_label
(
match_map
,
anchor_xmin
,
anchor_xmax
)
if
mode
==
'train'
or
mode
==
'valid'
:
batch_out
.
append
((
video_feat
,
gt_iou_map
,
gt_start
,
gt_end
))
elif
mode
==
'test'
:
batch_out
.
append
(
(
video_feat
,
gt_iou_map
,
gt_start
,
gt_end
,
video_idx
))
else
:
raise
NotImplementedError
(
'mode {} not implemented'
.
format
(
mode
))
if
len
(
batch_out
)
==
args
.
batch_size
:
yield
batch_out
batch_out
=
[]
return
reader
def
train_bmn
(
args
,
place
,
to_static
):
program_translator
.
enable
(
to_static
)
loss_data
=
[]
with
fluid
.
dygraph
.
guard
(
place
):
fluid
.
default_main_program
().
random_seed
=
SEED
fluid
.
default_startup_program
().
random_seed
=
SEED
global
local_random
local_random
=
np
.
random
.
RandomState
(
SEED
)
bmn
=
BMN
(
args
)
adam
=
optimizer
(
args
,
parameter_list
=
bmn
.
parameters
())
train_reader
=
fake_data_reader
(
args
,
'train'
)
for
epoch
in
range
(
args
.
epoch
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
video_feat
=
np
.
array
(
[
item
[
0
]
for
item
in
data
]).
astype
(
DATATYPE
)
gt_iou_map
=
np
.
array
(
[
item
[
1
]
for
item
in
data
]).
astype
(
DATATYPE
)
gt_start
=
np
.
array
([
item
[
2
]
for
item
in
data
]).
astype
(
DATATYPE
)
gt_end
=
np
.
array
([
item
[
3
]
for
item
in
data
]).
astype
(
DATATYPE
)
x_data
=
to_variable
(
video_feat
)
gt_iou_map
=
to_variable
(
gt_iou_map
)
gt_start
=
to_variable
(
gt_start
)
gt_end
=
to_variable
(
gt_end
)
gt_iou_map
.
stop_gradient
=
True
gt_start
.
stop_gradient
=
True
gt_end
.
stop_gradient
=
True
pred_bm
,
pred_start
,
pred_end
=
bmn
(
x_data
)
loss
,
tem_loss
,
pem_reg_loss
,
pem_cls_loss
=
bmn_loss_func
(
pred_bm
,
pred_start
,
pred_end
,
gt_iou_map
,
gt_start
,
gt_end
,
args
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
avg_loss
.
backward
()
adam
.
minimize
(
avg_loss
)
bmn
.
clear_gradients
()
# log loss data to verify correctness
loss_data
+=
[
avg_loss
.
numpy
()[
0
],
tem_loss
.
numpy
()[
0
],
pem_reg_loss
.
numpy
()[
0
],
pem_cls_loss
.
numpy
()[
0
]
]
if
args
.
log_interval
>
0
and
(
batch_id
%
args
.
log_interval
==
0
):
print
(
'[TRAIN] Epoch {}, iter {} '
.
format
(
epoch
,
batch_id
)
+
'
\t
Loss = {},
\t
tem_loss = {},
\t
pem_reg_loss = {},
\t
pem_cls_loss = {}'
.
format
(
'%f'
%
avg_loss
.
numpy
()[
0
],
'%f'
%
tem_loss
.
numpy
()[
0
],
\
'%f'
%
pem_reg_loss
.
numpy
()[
0
],
'%f'
%
pem_cls_loss
.
numpy
()[
0
]))
# validation
if
batch_id
%
args
.
valid_interval
==
0
and
batch_id
>
0
:
bmn
.
eval
()
val_loss_data
=
val_bmn
(
bmn
,
args
)
bmn
.
train
()
loss_data
+=
val_loss_data
if
batch_id
==
args
.
train_batch_num
:
if
to_static
:
program_translator
.
save_inference_model
(
args
.
infer_dir
)
else
:
fluid
.
dygraph
.
save_dygraph
(
bmn
.
state_dict
(),
args
.
dy_param_path
)
break
return
np
.
array
(
loss_data
)
# Validation
def
val_bmn
(
model
,
args
):
val_reader
=
fake_data_reader
(
args
,
'valid'
)
loss_data
=
[]
for
batch_id
,
data
in
enumerate
(
val_reader
()):
video_feat
=
np
.
array
([
item
[
0
]
for
item
in
data
]).
astype
(
DATATYPE
)
gt_iou_map
=
np
.
array
([
item
[
1
]
for
item
in
data
]).
astype
(
DATATYPE
)
gt_start
=
np
.
array
([
item
[
2
]
for
item
in
data
]).
astype
(
DATATYPE
)
gt_end
=
np
.
array
([
item
[
3
]
for
item
in
data
]).
astype
(
DATATYPE
)
x_data
=
to_variable
(
video_feat
)
gt_iou_map
=
to_variable
(
gt_iou_map
)
gt_start
=
to_variable
(
gt_start
)
gt_end
=
to_variable
(
gt_end
)
gt_iou_map
.
stop_gradient
=
True
gt_start
.
stop_gradient
=
True
gt_end
.
stop_gradient
=
True
pred_bm
,
pred_start
,
pred_end
=
model
(
x_data
)
loss
,
tem_loss
,
pem_reg_loss
,
pem_cls_loss
=
bmn_loss_func
(
pred_bm
,
pred_start
,
pred_end
,
gt_iou_map
,
gt_start
,
gt_end
,
args
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
loss_data
+=
[
avg_loss
.
numpy
()[
0
],
tem_loss
.
numpy
()[
0
],
pem_reg_loss
.
numpy
()[
0
],
pem_cls_loss
.
numpy
()[
0
]
]
print
(
'[VALID] iter {} '
.
format
(
batch_id
)
+
'
\t
Loss = {},
\t
tem_loss = {},
\t
pem_reg_loss = {},
\t
pem_cls_loss = {}'
.
format
(
'%f'
%
avg_loss
.
numpy
()[
0
],
'%f'
%
tem_loss
.
numpy
()[
0
],
\
'%f'
%
pem_reg_loss
.
numpy
()[
0
],
'%f'
%
pem_cls_loss
.
numpy
()[
0
]))
if
batch_id
==
args
.
valid_batch_num
:
break
return
loss_data
class
TestTrain
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
args
=
Args
()
self
.
place
=
fluid
.
CPUPlace
()
if
not
fluid
.
is_compiled_with_cuda
()
\
else
fluid
.
CUDAPlace
(
0
)
def
test_train
(
self
):
static_res
=
train_bmn
(
self
.
args
,
self
.
place
,
to_static
=
True
)
dygraph_res
=
train_bmn
(
self
.
args
,
self
.
place
,
to_static
=
False
)
self
.
assertTrue
(
np
.
allclose
(
dygraph_res
,
static_res
),
"dygraph_res: {},
\n
static_res: {}"
.
format
(
dygraph_res
[
~
np
.
isclose
(
dygraph_res
,
static_res
)],
static_res
[
~
np
.
isclose
(
dygraph_res
,
static_res
)]))
# Prediction needs trained models, so put `test_predict` at last of `test_train`
self
.
verify_predict
()
def
verify_predict
(
self
):
args
=
Args
()
args
.
batch_size
=
1
# change batch_size
test_reader
=
fake_data_reader
(
args
,
'test'
)
for
batch_id
,
data
in
enumerate
(
test_reader
()):
video_data
=
np
.
array
([
item
[
0
]
for
item
in
data
]).
astype
(
DATATYPE
)
static_pred_res
=
self
.
predict_static
(
video_data
)
dygraph_pred_res
=
self
.
predict_dygraph
(
video_data
)
for
dy_res
,
st_res
in
zip
(
dygraph_pred_res
,
static_pred_res
):
self
.
assertTrue
(
np
.
allclose
(
st_res
,
dy_res
),
"dygraph_res: {},
\n
static_res: {}"
.
format
(
dy_res
[
~
np
.
isclose
(
st_res
,
dy_res
)],
st_res
[
~
np
.
isclose
(
st_res
,
dy_res
)]))
break
def
predict_dygraph
(
self
,
data
):
program_translator
.
enable
(
False
)
with
fluid
.
dygraph
.
guard
(
self
.
place
):
bmn
=
BMN
(
self
.
args
)
# load dygraph trained parameters
model_dict
,
_
=
fluid
.
load_dygraph
(
self
.
args
.
dy_param_path
+
".pdparams"
)
bmn
.
set_dict
(
model_dict
)
bmn
.
eval
()
x
=
to_variable
(
data
)
pred_res
=
bmn
(
x
)
pred_res
=
[
var
.
numpy
()
for
var
in
pred_res
]
return
pred_res
def
predict_static
(
self
,
data
):
exe
=
fluid
.
Executor
(
self
.
place
)
# load inference model
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
fluid
.
io
.
load_inference_model
(
self
.
args
.
infer_dir
,
executor
=
exe
)
pred_res
=
exe
.
run
(
inference_program
,
feed
=
{
feed_target_names
[
0
]:
data
},
fetch_list
=
fetch_targets
)
return
pred_res
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录