Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
84dd46a7
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
84dd46a7
编写于
6月 16, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 16, 2020
浏览文件
操作
浏览文件
下载
差异文件
!2141 add ci test cast for yolov3
Merge pull request !2141 from chengxb7532/cxb_st
上级
105dcb1c
0f5fcfee
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
1272 addition
and
0 deletion
+1272
-0
tests/st/model_zoo_tests/yolov3/src/config.py
tests/st/model_zoo_tests/yolov3/src/config.py
+49
-0
tests/st/model_zoo_tests/yolov3/src/dataset.py
tests/st/model_zoo_tests/yolov3/src/dataset.py
+318
-0
tests/st/model_zoo_tests/yolov3/src/yolov3.py
tests/st/model_zoo_tests/yolov3/src/yolov3.py
+748
-0
tests/st/model_zoo_tests/yolov3/test_yolov3.py
tests/st/model_zoo_tests/yolov3/test_yolov3.py
+157
-0
未找到文件。
tests/st/model_zoo_tests/yolov3/src/config.py
0 → 100644
浏览文件 @
84dd46a7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Config parameters for YOLOv3 models."""
class
ConfigYOLOV3ResNet18
:
"""
Config parameters for YOLOv3.
Examples:
ConfigYoloV3ResNet18.
"""
img_shape
=
[
352
,
640
]
feature_shape
=
[
32
,
3
,
352
,
640
]
num_classes
=
2
nms_max_num
=
50
backbone_input_shape
=
[
64
,
64
,
128
,
256
]
backbone_shape
=
[
64
,
128
,
256
,
512
]
backbone_layers
=
[
2
,
2
,
2
,
2
]
backbone_stride
=
[
1
,
2
,
2
,
2
]
ignore_threshold
=
0.5
obj_threshold
=
0.3
nms_threshold
=
0.4
anchor_scales
=
[(
10
,
13
),
(
16
,
30
),
(
33
,
23
),
(
30
,
61
),
(
62
,
45
),
(
59
,
119
),
(
116
,
90
),
(
156
,
198
),
(
163
,
326
)]
out_channel
=
int
(
len
(
anchor_scales
)
/
3
*
(
num_classes
+
5
))
tests/st/model_zoo_tests/yolov3/src/dataset.py
0 → 100644
浏览文件 @
84dd46a7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""YOLOv3 dataset"""
from
__future__
import
division
import
os
import
numpy
as
np
from
matplotlib.colors
import
rgb_to_hsv
,
hsv_to_rgb
from
PIL
import
Image
import
mindspore.dataset
as
de
from
mindspore.mindrecord
import
FileWriter
import
mindspore.dataset.transforms.vision.c_transforms
as
C
from
src.config
import
ConfigYOLOV3ResNet18
iter_cnt
=
0
_NUM_BOXES
=
50
np
.
random
.
seed
(
1
)
de
.
config
.
set_seed
(
1
)
def
preprocess_fn
(
image
,
box
,
is_training
):
"""Preprocess function for dataset."""
config_anchors
=
[
10
,
13
,
16
,
30
,
33
,
23
,
30
,
61
,
62
,
45
,
59
,
119
,
116
,
90
,
156
,
198
,
163
,
326
]
anchors
=
np
.
array
([
float
(
x
)
for
x
in
config_anchors
]).
reshape
(
-
1
,
2
)
do_hsv
=
False
max_boxes
=
20
num_classes
=
ConfigYOLOV3ResNet18
.
num_classes
def
_rand
(
a
=
0.
,
b
=
1.
):
return
np
.
random
.
rand
()
*
(
b
-
a
)
+
a
def
_preprocess_true_boxes
(
true_boxes
,
anchors
,
in_shape
=
None
):
"""Get true boxes."""
num_layers
=
anchors
.
shape
[
0
]
//
3
anchor_mask
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]]
true_boxes
=
np
.
array
(
true_boxes
,
dtype
=
'float32'
)
# input_shape = np.array([in_shape, in_shape], dtype='int32')
input_shape
=
np
.
array
(
in_shape
,
dtype
=
'int32'
)
boxes_xy
=
(
true_boxes
[...,
0
:
2
]
+
true_boxes
[...,
2
:
4
])
//
2.
boxes_wh
=
true_boxes
[...,
2
:
4
]
-
true_boxes
[...,
0
:
2
]
true_boxes
[...,
0
:
2
]
=
boxes_xy
/
input_shape
[::
-
1
]
true_boxes
[...,
2
:
4
]
=
boxes_wh
/
input_shape
[::
-
1
]
grid_shapes
=
[
input_shape
//
32
,
input_shape
//
16
,
input_shape
//
8
]
y_true
=
[
np
.
zeros
((
grid_shapes
[
l
][
0
],
grid_shapes
[
l
][
1
],
len
(
anchor_mask
[
l
]),
5
+
num_classes
),
dtype
=
'float32'
)
for
l
in
range
(
num_layers
)]
anchors
=
np
.
expand_dims
(
anchors
,
0
)
anchors_max
=
anchors
/
2.
anchors_min
=
-
anchors_max
valid_mask
=
boxes_wh
[...,
0
]
>=
1
wh
=
boxes_wh
[
valid_mask
]
if
len
(
wh
)
>=
1
:
wh
=
np
.
expand_dims
(
wh
,
-
2
)
boxes_max
=
wh
/
2.
boxes_min
=
-
boxes_max
intersect_min
=
np
.
maximum
(
boxes_min
,
anchors_min
)
intersect_max
=
np
.
minimum
(
boxes_max
,
anchors_max
)
intersect_wh
=
np
.
maximum
(
intersect_max
-
intersect_min
,
0.
)
intersect_area
=
intersect_wh
[...,
0
]
*
intersect_wh
[...,
1
]
box_area
=
wh
[...,
0
]
*
wh
[...,
1
]
anchor_area
=
anchors
[...,
0
]
*
anchors
[...,
1
]
iou
=
intersect_area
/
(
box_area
+
anchor_area
-
intersect_area
)
best_anchor
=
np
.
argmax
(
iou
,
axis
=-
1
)
for
t
,
n
in
enumerate
(
best_anchor
):
for
l
in
range
(
num_layers
):
if
n
in
anchor_mask
[
l
]:
i
=
np
.
floor
(
true_boxes
[
t
,
0
]
*
grid_shapes
[
l
][
1
]).
astype
(
'int32'
)
j
=
np
.
floor
(
true_boxes
[
t
,
1
]
*
grid_shapes
[
l
][
0
]).
astype
(
'int32'
)
k
=
anchor_mask
[
l
].
index
(
n
)
c
=
true_boxes
[
t
,
4
].
astype
(
'int32'
)
y_true
[
l
][
j
,
i
,
k
,
0
:
4
]
=
true_boxes
[
t
,
0
:
4
]
y_true
[
l
][
j
,
i
,
k
,
4
]
=
1.
y_true
[
l
][
j
,
i
,
k
,
5
+
c
]
=
1.
pad_gt_box0
=
np
.
zeros
(
shape
=
[
50
,
4
],
dtype
=
np
.
float32
)
pad_gt_box1
=
np
.
zeros
(
shape
=
[
50
,
4
],
dtype
=
np
.
float32
)
pad_gt_box2
=
np
.
zeros
(
shape
=
[
50
,
4
],
dtype
=
np
.
float32
)
mask0
=
np
.
reshape
(
y_true
[
0
][...,
4
:
5
],
[
-
1
])
gt_box0
=
np
.
reshape
(
y_true
[
0
][...,
0
:
4
],
[
-
1
,
4
])
gt_box0
=
gt_box0
[
mask0
==
1
]
pad_gt_box0
[:
gt_box0
.
shape
[
0
]]
=
gt_box0
mask1
=
np
.
reshape
(
y_true
[
1
][...,
4
:
5
],
[
-
1
])
gt_box1
=
np
.
reshape
(
y_true
[
1
][...,
0
:
4
],
[
-
1
,
4
])
gt_box1
=
gt_box1
[
mask1
==
1
]
pad_gt_box1
[:
gt_box1
.
shape
[
0
]]
=
gt_box1
mask2
=
np
.
reshape
(
y_true
[
2
][...,
4
:
5
],
[
-
1
])
gt_box2
=
np
.
reshape
(
y_true
[
2
][...,
0
:
4
],
[
-
1
,
4
])
gt_box2
=
gt_box2
[
mask2
==
1
]
pad_gt_box2
[:
gt_box2
.
shape
[
0
]]
=
gt_box2
return
y_true
[
0
],
y_true
[
1
],
y_true
[
2
],
pad_gt_box0
,
pad_gt_box1
,
pad_gt_box2
def
_infer_data
(
img_data
,
input_shape
,
box
):
w
,
h
=
img_data
.
size
input_h
,
input_w
=
input_shape
scale
=
min
(
float
(
input_w
)
/
float
(
w
),
float
(
input_h
)
/
float
(
h
))
nw
=
int
(
w
*
scale
)
nh
=
int
(
h
*
scale
)
img_data
=
img_data
.
resize
((
nw
,
nh
),
Image
.
BICUBIC
)
new_image
=
np
.
zeros
((
input_h
,
input_w
,
3
),
np
.
float32
)
new_image
.
fill
(
128
)
img_data
=
np
.
array
(
img_data
)
if
len
(
img_data
.
shape
)
==
2
:
img_data
=
np
.
expand_dims
(
img_data
,
axis
=-
1
)
img_data
=
np
.
concatenate
([
img_data
,
img_data
,
img_data
],
axis
=-
1
)
dh
=
int
((
input_h
-
nh
)
/
2
)
dw
=
int
((
input_w
-
nw
)
/
2
)
new_image
[
dh
:(
nh
+
dh
),
dw
:(
nw
+
dw
),
:]
=
img_data
new_image
/=
255.
new_image
=
np
.
transpose
(
new_image
,
(
2
,
0
,
1
))
new_image
=
np
.
expand_dims
(
new_image
,
0
)
return
new_image
,
np
.
array
([
h
,
w
],
np
.
float32
),
box
def
_data_aug
(
image
,
box
,
is_training
,
jitter
=
0.3
,
hue
=
0.1
,
sat
=
1.5
,
val
=
1.5
,
image_size
=
(
352
,
640
)):
"""Data augmentation function."""
if
not
isinstance
(
image
,
Image
.
Image
):
image
=
Image
.
fromarray
(
image
)
iw
,
ih
=
image
.
size
ori_image_shape
=
np
.
array
([
ih
,
iw
],
np
.
int32
)
h
,
w
=
image_size
if
not
is_training
:
return
_infer_data
(
image
,
image_size
,
box
)
flip
=
_rand
()
<
.
5
# correct boxes
box_data
=
np
.
zeros
((
max_boxes
,
5
))
while
True
:
# Prevent the situation that all boxes are eliminated
new_ar
=
float
(
w
)
/
float
(
h
)
*
_rand
(
1
-
jitter
,
1
+
jitter
)
/
\
_rand
(
1
-
jitter
,
1
+
jitter
)
scale
=
_rand
(
0.25
,
2
)
if
new_ar
<
1
:
nh
=
int
(
scale
*
h
)
nw
=
int
(
nh
*
new_ar
)
else
:
nw
=
int
(
scale
*
w
)
nh
=
int
(
nw
/
new_ar
)
dx
=
int
(
_rand
(
0
,
w
-
nw
))
dy
=
int
(
_rand
(
0
,
h
-
nh
))
if
len
(
box
)
>=
1
:
t_box
=
box
.
copy
()
np
.
random
.
shuffle
(
t_box
)
t_box
[:,
[
0
,
2
]]
=
t_box
[:,
[
0
,
2
]]
*
float
(
nw
)
/
float
(
iw
)
+
dx
t_box
[:,
[
1
,
3
]]
=
t_box
[:,
[
1
,
3
]]
*
float
(
nh
)
/
float
(
ih
)
+
dy
if
flip
:
t_box
[:,
[
0
,
2
]]
=
w
-
t_box
[:,
[
2
,
0
]]
t_box
[:,
0
:
2
][
t_box
[:,
0
:
2
]
<
0
]
=
0
t_box
[:,
2
][
t_box
[:,
2
]
>
w
]
=
w
t_box
[:,
3
][
t_box
[:,
3
]
>
h
]
=
h
box_w
=
t_box
[:,
2
]
-
t_box
[:,
0
]
box_h
=
t_box
[:,
3
]
-
t_box
[:,
1
]
t_box
=
t_box
[
np
.
logical_and
(
box_w
>
1
,
box_h
>
1
)]
# discard invalid box
if
len
(
t_box
)
>=
1
:
box
=
t_box
break
box_data
[:
len
(
box
)]
=
box
# resize image
image
=
image
.
resize
((
nw
,
nh
),
Image
.
BICUBIC
)
# place image
new_image
=
Image
.
new
(
'RGB'
,
(
w
,
h
),
(
128
,
128
,
128
))
new_image
.
paste
(
image
,
(
dx
,
dy
))
image
=
new_image
# flip image or not
if
flip
:
image
=
image
.
transpose
(
Image
.
FLIP_LEFT_RIGHT
)
# convert image to gray or not
gray
=
_rand
()
<
.
25
if
gray
:
image
=
image
.
convert
(
'L'
).
convert
(
'RGB'
)
# when the channels of image is 1
image
=
np
.
array
(
image
)
if
len
(
image
.
shape
)
==
2
:
image
=
np
.
expand_dims
(
image
,
axis
=-
1
)
image
=
np
.
concatenate
([
image
,
image
,
image
],
axis
=-
1
)
# distort image
hue
=
_rand
(
-
hue
,
hue
)
sat
=
_rand
(
1
,
sat
)
if
_rand
()
<
.
5
else
1
/
_rand
(
1
,
sat
)
val
=
_rand
(
1
,
val
)
if
_rand
()
<
.
5
else
1
/
_rand
(
1
,
val
)
image_data
=
image
/
255.
if
do_hsv
:
x
=
rgb_to_hsv
(
image_data
)
x
[...,
0
]
+=
hue
x
[...,
0
][
x
[...,
0
]
>
1
]
-=
1
x
[...,
0
][
x
[...,
0
]
<
0
]
+=
1
x
[...,
1
]
*=
sat
x
[...,
2
]
*=
val
x
[
x
>
1
]
=
1
x
[
x
<
0
]
=
0
image_data
=
hsv_to_rgb
(
x
)
# numpy array, 0 to 1
image_data
=
image_data
.
astype
(
np
.
float32
)
# preprocess bounding boxes
bbox_true_1
,
bbox_true_2
,
bbox_true_3
,
gt_box1
,
gt_box2
,
gt_box3
=
\
_preprocess_true_boxes
(
box_data
,
anchors
,
image_size
)
return
image_data
,
bbox_true_1
,
bbox_true_2
,
bbox_true_3
,
\
ori_image_shape
,
gt_box1
,
gt_box2
,
gt_box3
if
is_training
:
images
,
bbox_1
,
bbox_2
,
bbox_3
,
_
,
gt_box1
,
gt_box2
,
gt_box3
=
_data_aug
(
image
,
box
,
is_training
)
return
images
,
bbox_1
,
bbox_2
,
bbox_3
,
gt_box1
,
gt_box2
,
gt_box3
images
,
shape
,
anno
=
_data_aug
(
image
,
box
,
is_training
)
return
images
,
shape
,
anno
def
anno_parser
(
annos_str
):
"""Parse annotation from string to list."""
annos
=
[]
for
anno_str
in
annos_str
:
anno
=
list
(
map
(
int
,
anno_str
.
strip
().
split
(
','
)))
annos
.
append
(
anno
)
return
annos
def
filter_valid_data
(
image_dir
,
anno_path
):
"""Filter valid image file, which both in image_dir and anno_path."""
image_files
=
[]
image_anno_dict
=
{}
if
not
os
.
path
.
isdir
(
image_dir
):
raise
RuntimeError
(
"Path given is not valid."
)
if
not
os
.
path
.
isfile
(
anno_path
):
raise
RuntimeError
(
"Annotation file is not valid."
)
with
open
(
anno_path
,
"rb"
)
as
f
:
lines
=
f
.
readlines
()
for
line
in
lines
:
line_str
=
line
.
decode
(
"utf-8"
).
strip
()
line_split
=
str
(
line_str
).
split
(
' '
)
file_name
=
line_split
[
0
]
if
os
.
path
.
isfile
(
os
.
path
.
join
(
image_dir
,
file_name
)):
image_anno_dict
[
file_name
]
=
anno_parser
(
line_split
[
1
:])
image_files
.
append
(
file_name
)
return
image_files
,
image_anno_dict
def
data_to_mindrecord_byte_image
(
image_dir
,
anno_path
,
mindrecord_dir
,
prefix
=
"yolo.mindrecord"
,
file_num
=
8
):
"""Create MindRecord file by image_dir and anno_path."""
mindrecord_path
=
os
.
path
.
join
(
mindrecord_dir
,
prefix
)
writer
=
FileWriter
(
mindrecord_path
,
file_num
)
image_files
,
image_anno_dict
=
filter_valid_data
(
image_dir
,
anno_path
)
yolo_json
=
{
"image"
:
{
"type"
:
"bytes"
},
"annotation"
:
{
"type"
:
"int64"
,
"shape"
:
[
-
1
,
5
]},
}
writer
.
add_schema
(
yolo_json
,
"yolo_json"
)
for
image_name
in
image_files
:
image_path
=
os
.
path
.
join
(
image_dir
,
image_name
)
with
open
(
image_path
,
'rb'
)
as
f
:
img
=
f
.
read
()
annos
=
np
.
array
(
image_anno_dict
[
image_name
])
row
=
{
"image"
:
img
,
"annotation"
:
annos
}
writer
.
write_raw_data
([
row
])
writer
.
commit
()
def
create_yolo_dataset
(
mindrecord_dir
,
batch_size
=
32
,
repeat_num
=
10
,
device_num
=
1
,
rank
=
0
,
is_training
=
True
,
num_parallel_workers
=
8
):
"""Creatr YOLOv3 dataset with MindDataset."""
ds
=
de
.
MindDataset
(
mindrecord_dir
,
columns_list
=
[
"image"
,
"annotation"
],
num_shards
=
device_num
,
shard_id
=
rank
,
num_parallel_workers
=
num_parallel_workers
,
shuffle
=
False
)
decode
=
C
.
Decode
()
ds
=
ds
.
map
(
input_columns
=
[
"image"
],
operations
=
decode
)
compose_map_func
=
(
lambda
image
,
annotation
:
preprocess_fn
(
image
,
annotation
,
is_training
))
if
is_training
:
hwc_to_chw
=
C
.
HWC2CHW
()
ds
=
ds
.
map
(
input_columns
=
[
"image"
,
"annotation"
],
output_columns
=
[
"image"
,
"bbox_1"
,
"bbox_2"
,
"bbox_3"
,
"gt_box1"
,
"gt_box2"
,
"gt_box3"
],
columns_order
=
[
"image"
,
"bbox_1"
,
"bbox_2"
,
"bbox_3"
,
"gt_box1"
,
"gt_box2"
,
"gt_box3"
],
operations
=
compose_map_func
,
num_parallel_workers
=
num_parallel_workers
)
ds
=
ds
.
map
(
input_columns
=
[
"image"
],
operations
=
hwc_to_chw
,
num_parallel_workers
=
num_parallel_workers
)
ds
=
ds
.
batch
(
batch_size
,
drop_remainder
=
True
)
ds
=
ds
.
repeat
(
repeat_num
)
else
:
ds
=
ds
.
map
(
input_columns
=
[
"image"
,
"annotation"
],
output_columns
=
[
"image"
,
"image_shape"
,
"annotation"
],
columns_order
=
[
"image"
,
"image_shape"
,
"annotation"
],
operations
=
compose_map_func
,
num_parallel_workers
=
num_parallel_workers
)
return
ds
tests/st/model_zoo_tests/yolov3/src/yolov3.py
0 → 100644
浏览文件 @
84dd46a7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""YOLOv3 based on ResNet18."""
import
numpy
as
np
import
mindspore
as
ms
import
mindspore.nn
as
nn
from
mindspore
import
context
,
Tensor
from
mindspore.parallel._auto_parallel_context
import
auto_parallel_context
from
mindspore.communication.management
import
get_group_size
from
mindspore.common.initializer
import
TruncatedNormal
from
mindspore.ops
import
operations
as
P
from
mindspore.ops
import
functional
as
F
from
mindspore.ops
import
composite
as
C
def
weight_variable
():
"""Weight variable."""
return
TruncatedNormal
(
0.02
)
class
_conv2d
(
nn
.
Cell
):
"""Create Conv2D with padding."""
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
):
super
(
_conv2d
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2d
(
in_channels
,
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
0
,
pad_mode
=
'same'
,
weight_init
=
weight_variable
())
def
construct
(
self
,
x
):
x
=
self
.
conv
(
x
)
return
x
def
_fused_bn
(
channels
,
momentum
=
0.99
):
"""Get a fused batchnorm."""
return
nn
.
BatchNorm2d
(
channels
,
momentum
=
momentum
)
def
_conv_bn_relu
(
in_channel
,
out_channel
,
ksize
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
alpha
=
0.1
,
momentum
=
0.99
,
pad_mode
=
"same"
):
"""Get a conv2d batchnorm and relu layer."""
return
nn
.
SequentialCell
(
[
nn
.
Conv2d
(
in_channel
,
out_channel
,
kernel_size
=
ksize
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
,
pad_mode
=
pad_mode
),
nn
.
BatchNorm2d
(
out_channel
,
momentum
=
momentum
),
nn
.
LeakyReLU
(
alpha
)]
)
class
BasicBlock
(
nn
.
Cell
):
"""
ResNet basic block.
Args:
in_channels (int): Input channel.
out_channels (int): Output channel.
stride (int): Stride size for the initial convolutional layer. Default:1.
momentum (float): Momentum for batchnorm layer. Default:0.1.
Returns:
Tensor, output tensor.
Examples:
BasicBlock(3,256,stride=2,down_sample=True).
"""
expansion
=
1
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
=
1
,
momentum
=
0.99
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv1
=
_conv2d
(
in_channels
,
out_channels
,
3
,
stride
=
stride
)
self
.
bn1
=
_fused_bn
(
out_channels
,
momentum
=
momentum
)
self
.
conv2
=
_conv2d
(
out_channels
,
out_channels
,
3
)
self
.
bn2
=
_fused_bn
(
out_channels
,
momentum
=
momentum
)
self
.
relu
=
P
.
ReLU
()
self
.
down_sample_layer
=
None
self
.
downsample
=
(
in_channels
!=
out_channels
)
if
self
.
downsample
:
self
.
down_sample_layer
=
_conv2d
(
in_channels
,
out_channels
,
1
,
stride
=
stride
)
self
.
add
=
P
.
TensorAdd
()
def
construct
(
self
,
x
):
identity
=
x
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
bn2
(
x
)
if
self
.
downsample
:
identity
=
self
.
down_sample_layer
(
identity
)
out
=
self
.
add
(
x
,
identity
)
out
=
self
.
relu
(
out
)
return
out
class
ResNet
(
nn
.
Cell
):
"""
ResNet network.
Args:
block (Cell): Block for network.
layer_nums (list): Numbers of different layers.
in_channels (int): Input channel.
out_channels (int): Output channel.
num_classes (int): Class number. Default:100.
Returns:
Tensor, output tensor.
Examples:
ResNet(ResidualBlock,
[3, 4, 6, 3],
[64, 256, 512, 1024],
[256, 512, 1024, 2048],
100).
"""
def
__init__
(
self
,
block
,
layer_nums
,
in_channels
,
out_channels
,
strides
=
None
,
num_classes
=
80
):
super
(
ResNet
,
self
).
__init__
()
if
not
len
(
layer_nums
)
==
len
(
in_channels
)
==
len
(
out_channels
)
==
4
:
raise
ValueError
(
"the length of "
"layer_num, inchannel, outchannel list must be 4!"
)
self
.
conv1
=
_conv2d
(
3
,
64
,
7
,
stride
=
2
)
self
.
bn1
=
_fused_bn
(
64
)
self
.
relu
=
P
.
ReLU
()
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
pad_mode
=
'same'
)
self
.
layer1
=
self
.
_make_layer
(
block
,
layer_nums
[
0
],
in_channel
=
in_channels
[
0
],
out_channel
=
out_channels
[
0
],
stride
=
strides
[
0
])
self
.
layer2
=
self
.
_make_layer
(
block
,
layer_nums
[
1
],
in_channel
=
in_channels
[
1
],
out_channel
=
out_channels
[
1
],
stride
=
strides
[
1
])
self
.
layer3
=
self
.
_make_layer
(
block
,
layer_nums
[
2
],
in_channel
=
in_channels
[
2
],
out_channel
=
out_channels
[
2
],
stride
=
strides
[
2
])
self
.
layer4
=
self
.
_make_layer
(
block
,
layer_nums
[
3
],
in_channel
=
in_channels
[
3
],
out_channel
=
out_channels
[
3
],
stride
=
strides
[
3
])
self
.
num_classes
=
num_classes
if
num_classes
:
self
.
reduce_mean
=
P
.
ReduceMean
(
keep_dims
=
True
)
self
.
end_point
=
nn
.
Dense
(
out_channels
[
3
],
num_classes
,
has_bias
=
True
,
weight_init
=
weight_variable
(),
bias_init
=
weight_variable
())
self
.
squeeze
=
P
.
Squeeze
(
axis
=
(
2
,
3
))
def
_make_layer
(
self
,
block
,
layer_num
,
in_channel
,
out_channel
,
stride
):
"""
Make Layer for ResNet.
Args:
block (Cell): Resnet block.
layer_num (int): Layer number.
in_channel (int): Input channel.
out_channel (int): Output channel.
stride (int): Stride size for the initial convolutional layer.
Returns:
SequentialCell, the output layer.
Examples:
_make_layer(BasicBlock, 3, 128, 256, 2).
"""
layers
=
[]
resblk
=
block
(
in_channel
,
out_channel
,
stride
=
stride
)
layers
.
append
(
resblk
)
for
_
in
range
(
1
,
layer_num
-
1
):
resblk
=
block
(
out_channel
,
out_channel
,
stride
=
1
)
layers
.
append
(
resblk
)
resblk
=
block
(
out_channel
,
out_channel
,
stride
=
1
)
layers
.
append
(
resblk
)
return
nn
.
SequentialCell
(
layers
)
def
construct
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
relu
(
x
)
c1
=
self
.
maxpool
(
x
)
c2
=
self
.
layer1
(
c1
)
c3
=
self
.
layer2
(
c2
)
c4
=
self
.
layer3
(
c3
)
c5
=
self
.
layer4
(
c4
)
out
=
c5
if
self
.
num_classes
:
out
=
self
.
reduce_mean
(
c5
,
(
2
,
3
))
out
=
self
.
squeeze
(
out
)
out
=
self
.
end_point
(
out
)
return
c3
,
c4
,
out
def
resnet18
(
class_num
=
10
):
"""
Get ResNet18 neural network.
Args:
class_num (int): Class number.
Returns:
Cell, cell instance of ResNet18 neural network.
Examples:
resnet18(100).
"""
return
ResNet
(
BasicBlock
,
[
2
,
2
,
2
,
2
],
[
64
,
64
,
128
,
256
],
[
64
,
128
,
256
,
512
],
[
1
,
2
,
2
,
2
],
num_classes
=
class_num
)
class
YoloBlock
(
nn
.
Cell
):
"""
YoloBlock for YOLOv3.
Args:
in_channels (int): Input channel.
out_chls (int): Middle channel.
out_channels (int): Output channel.
Returns:
Tuple, tuple of output tensor,(f1,f2,f3).
Examples:
YoloBlock(1024, 512, 255).
"""
def
__init__
(
self
,
in_channels
,
out_chls
,
out_channels
):
super
(
YoloBlock
,
self
).
__init__
()
out_chls_2
=
out_chls
*
2
self
.
conv0
=
_conv_bn_relu
(
in_channels
,
out_chls
,
ksize
=
1
)
self
.
conv1
=
_conv_bn_relu
(
out_chls
,
out_chls_2
,
ksize
=
3
)
self
.
conv2
=
_conv_bn_relu
(
out_chls_2
,
out_chls
,
ksize
=
1
)
self
.
conv3
=
_conv_bn_relu
(
out_chls
,
out_chls_2
,
ksize
=
3
)
self
.
conv4
=
_conv_bn_relu
(
out_chls_2
,
out_chls
,
ksize
=
1
)
self
.
conv5
=
_conv_bn_relu
(
out_chls
,
out_chls_2
,
ksize
=
3
)
self
.
conv6
=
nn
.
Conv2d
(
out_chls_2
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
has_bias
=
True
)
def
construct
(
self
,
x
):
c1
=
self
.
conv0
(
x
)
c2
=
self
.
conv1
(
c1
)
c3
=
self
.
conv2
(
c2
)
c4
=
self
.
conv3
(
c3
)
c5
=
self
.
conv4
(
c4
)
c6
=
self
.
conv5
(
c5
)
out
=
self
.
conv6
(
c6
)
return
c5
,
out
class
YOLOv3
(
nn
.
Cell
):
"""
YOLOv3 Network.
Note:
backbone = resnet18.
Args:
feature_shape (list): Input image shape, [N,C,H,W].
backbone_shape (list): resnet18 output channels shape.
backbone (Cell): Backbone Network.
out_channel (int): Output channel.
Returns:
Tensor, output tensor.
Examples:
YOLOv3(feature_shape=[1,3,416,416],
backbone_shape=[64, 128, 256, 512, 1024]
backbone=darknet53(),
out_channel=255).
"""
def
__init__
(
self
,
feature_shape
,
backbone_shape
,
backbone
,
out_channel
):
super
(
YOLOv3
,
self
).
__init__
()
self
.
out_channel
=
out_channel
self
.
net
=
backbone
self
.
backblock0
=
YoloBlock
(
backbone_shape
[
-
1
],
out_chls
=
backbone_shape
[
-
2
],
out_channels
=
out_channel
)
self
.
conv1
=
_conv_bn_relu
(
in_channel
=
backbone_shape
[
-
2
],
out_channel
=
backbone_shape
[
-
2
]
//
2
,
ksize
=
1
)
self
.
upsample1
=
P
.
ResizeNearestNeighbor
((
feature_shape
[
2
]
//
16
,
feature_shape
[
3
]
//
16
))
self
.
backblock1
=
YoloBlock
(
in_channels
=
backbone_shape
[
-
2
]
+
backbone_shape
[
-
3
],
out_chls
=
backbone_shape
[
-
3
],
out_channels
=
out_channel
)
self
.
conv2
=
_conv_bn_relu
(
in_channel
=
backbone_shape
[
-
3
],
out_channel
=
backbone_shape
[
-
3
]
//
2
,
ksize
=
1
)
self
.
upsample2
=
P
.
ResizeNearestNeighbor
((
feature_shape
[
2
]
//
8
,
feature_shape
[
3
]
//
8
))
self
.
backblock2
=
YoloBlock
(
in_channels
=
backbone_shape
[
-
3
]
+
backbone_shape
[
-
4
],
out_chls
=
backbone_shape
[
-
4
],
out_channels
=
out_channel
)
self
.
concat
=
P
.
Concat
(
axis
=
1
)
def
construct
(
self
,
x
):
# input_shape of x is (batch_size, 3, h, w)
# feature_map1 is (batch_size, backbone_shape[2], h/8, w/8)
# feature_map2 is (batch_size, backbone_shape[3], h/16, w/16)
# feature_map3 is (batch_size, backbone_shape[4], h/32, w/32)
feature_map1
,
feature_map2
,
feature_map3
=
self
.
net
(
x
)
con1
,
big_object_output
=
self
.
backblock0
(
feature_map3
)
con1
=
self
.
conv1
(
con1
)
ups1
=
self
.
upsample1
(
con1
)
con1
=
self
.
concat
((
ups1
,
feature_map2
))
con2
,
medium_object_output
=
self
.
backblock1
(
con1
)
con2
=
self
.
conv2
(
con2
)
ups2
=
self
.
upsample2
(
con2
)
con3
=
self
.
concat
((
ups2
,
feature_map1
))
_
,
small_object_output
=
self
.
backblock2
(
con3
)
return
big_object_output
,
medium_object_output
,
small_object_output
class
DetectionBlock
(
nn
.
Cell
):
"""
YOLOv3 detection Network. It will finally output the detection result.
Args:
scale (str): Character, scale.
config (Class): YOLOv3 config.
Returns:
Tuple, tuple of output tensor,(f1,f2,f3).
Examples:
DetectionBlock(scale='l',stride=32).
"""
def
__init__
(
self
,
scale
,
config
):
super
(
DetectionBlock
,
self
).
__init__
()
self
.
config
=
config
if
scale
==
's'
:
idx
=
(
0
,
1
,
2
)
elif
scale
==
'm'
:
idx
=
(
3
,
4
,
5
)
elif
scale
==
'l'
:
idx
=
(
6
,
7
,
8
)
else
:
raise
KeyError
(
"Invalid scale value for DetectionBlock"
)
self
.
anchors
=
Tensor
([
self
.
config
.
anchor_scales
[
i
]
for
i
in
idx
],
ms
.
float32
)
self
.
num_anchors_per_scale
=
3
self
.
num_attrib
=
4
+
1
+
self
.
config
.
num_classes
self
.
ignore_threshold
=
0.5
self
.
lambda_coord
=
1
self
.
sigmoid
=
nn
.
Sigmoid
()
self
.
reshape
=
P
.
Reshape
()
self
.
tile
=
P
.
Tile
()
self
.
concat
=
P
.
Concat
(
axis
=-
1
)
self
.
input_shape
=
Tensor
(
tuple
(
config
.
img_shape
[::
-
1
]),
ms
.
float32
)
def
construct
(
self
,
x
):
num_batch
=
P
.
Shape
()(
x
)[
0
]
grid_size
=
P
.
Shape
()(
x
)[
2
:
4
]
# Reshape and transpose the feature to [n, 3, grid_size[0], grid_size[1], num_attrib]
prediction
=
P
.
Reshape
()(
x
,
(
num_batch
,
self
.
num_anchors_per_scale
,
self
.
num_attrib
,
grid_size
[
0
],
grid_size
[
1
]))
prediction
=
P
.
Transpose
()(
prediction
,
(
0
,
3
,
4
,
1
,
2
))
range_x
=
range
(
grid_size
[
1
])
range_y
=
range
(
grid_size
[
0
])
grid_x
=
P
.
Cast
()(
F
.
tuple_to_array
(
range_x
),
ms
.
float32
)
grid_y
=
P
.
Cast
()(
F
.
tuple_to_array
(
range_y
),
ms
.
float32
)
# Tensor of shape [grid_size[0], grid_size[1], 1, 1] representing the coordinate of x/y axis for each grid
grid_x
=
self
.
tile
(
self
.
reshape
(
grid_x
,
(
1
,
1
,
-
1
,
1
,
1
)),
(
1
,
grid_size
[
0
],
1
,
1
,
1
))
grid_y
=
self
.
tile
(
self
.
reshape
(
grid_y
,
(
1
,
-
1
,
1
,
1
,
1
)),
(
1
,
1
,
grid_size
[
1
],
1
,
1
))
# Shape is [grid_size[0], grid_size[1], 1, 2]
grid
=
self
.
concat
((
grid_x
,
grid_y
))
box_xy
=
prediction
[:,
:,
:,
:,
:
2
]
box_wh
=
prediction
[:,
:,
:,
:,
2
:
4
]
box_confidence
=
prediction
[:,
:,
:,
:,
4
:
5
]
box_probs
=
prediction
[:,
:,
:,
:,
5
:]
box_xy
=
(
self
.
sigmoid
(
box_xy
)
+
grid
)
/
P
.
Cast
()(
F
.
tuple_to_array
((
grid_size
[
1
],
grid_size
[
0
])),
ms
.
float32
)
box_wh
=
P
.
Exp
()(
box_wh
)
*
self
.
anchors
/
self
.
input_shape
box_confidence
=
self
.
sigmoid
(
box_confidence
)
box_probs
=
self
.
sigmoid
(
box_probs
)
if
self
.
training
:
return
grid
,
prediction
,
box_xy
,
box_wh
return
box_xy
,
box_wh
,
box_confidence
,
box_probs
class
Iou
(
nn
.
Cell
):
"""Calculate the iou of boxes."""
def
__init__
(
self
):
super
(
Iou
,
self
).
__init__
()
self
.
min
=
P
.
Minimum
()
self
.
max
=
P
.
Maximum
()
def
construct
(
self
,
box1
,
box2
):
box1_xy
=
box1
[:,
:,
:,
:,
:,
:
2
]
box1_wh
=
box1
[:,
:,
:,
:,
:,
2
:
4
]
box1_mins
=
box1_xy
-
box1_wh
/
F
.
scalar_to_array
(
2.0
)
box1_maxs
=
box1_xy
+
box1_wh
/
F
.
scalar_to_array
(
2.0
)
box2_xy
=
box2
[:,
:,
:,
:,
:,
:
2
]
box2_wh
=
box2
[:,
:,
:,
:,
:,
2
:
4
]
box2_mins
=
box2_xy
-
box2_wh
/
F
.
scalar_to_array
(
2.0
)
box2_maxs
=
box2_xy
+
box2_wh
/
F
.
scalar_to_array
(
2.0
)
intersect_mins
=
self
.
max
(
box1_mins
,
box2_mins
)
intersect_maxs
=
self
.
min
(
box1_maxs
,
box2_maxs
)
intersect_wh
=
self
.
max
(
intersect_maxs
-
intersect_mins
,
F
.
scalar_to_array
(
0.0
))
intersect_area
=
P
.
Squeeze
(
-
1
)(
intersect_wh
[:,
:,
:,
:,
:,
0
:
1
])
*
\
P
.
Squeeze
(
-
1
)(
intersect_wh
[:,
:,
:,
:,
:,
1
:
2
])
box1_area
=
P
.
Squeeze
(
-
1
)(
box1_wh
[:,
:,
:,
:,
:,
0
:
1
])
*
P
.
Squeeze
(
-
1
)(
box1_wh
[:,
:,
:,
:,
:,
1
:
2
])
box2_area
=
P
.
Squeeze
(
-
1
)(
box2_wh
[:,
:,
:,
:,
:,
0
:
1
])
*
P
.
Squeeze
(
-
1
)(
box2_wh
[:,
:,
:,
:,
:,
1
:
2
])
iou
=
intersect_area
/
(
box1_area
+
box2_area
-
intersect_area
)
return
iou
class
YoloLossBlock
(
nn
.
Cell
):
"""
YOLOv3 Loss block cell. It will finally output loss of the scale.
Args:
scale (str): Three scale here, 's', 'm' and 'l'.
config (Class): The default config of YOLOv3.
Returns:
Tensor, loss of the scale.
Examples:
YoloLossBlock('l', ConfigYOLOV3ResNet18()).
"""
def
__init__
(
self
,
scale
,
config
):
super
(
YoloLossBlock
,
self
).
__init__
()
self
.
config
=
config
if
scale
==
's'
:
idx
=
(
0
,
1
,
2
)
elif
scale
==
'm'
:
idx
=
(
3
,
4
,
5
)
elif
scale
==
'l'
:
idx
=
(
6
,
7
,
8
)
else
:
raise
KeyError
(
"Invalid scale value for DetectionBlock"
)
self
.
anchors
=
Tensor
([
self
.
config
.
anchor_scales
[
i
]
for
i
in
idx
],
ms
.
float32
)
self
.
ignore_threshold
=
Tensor
(
self
.
config
.
ignore_threshold
,
ms
.
float32
)
self
.
concat
=
P
.
Concat
(
axis
=-
1
)
self
.
iou
=
Iou
()
self
.
cross_entropy
=
P
.
SigmoidCrossEntropyWithLogits
()
self
.
reduce_sum
=
P
.
ReduceSum
()
self
.
reduce_max
=
P
.
ReduceMax
(
keep_dims
=
False
)
self
.
input_shape
=
Tensor
(
tuple
(
config
.
img_shape
[::
-
1
]),
ms
.
float32
)
def
construct
(
self
,
grid
,
prediction
,
pred_xy
,
pred_wh
,
y_true
,
gt_box
):
object_mask
=
y_true
[:,
:,
:,
:,
4
:
5
]
class_probs
=
y_true
[:,
:,
:,
:,
5
:]
grid_shape
=
P
.
Shape
()(
prediction
)[
1
:
3
]
grid_shape
=
P
.
Cast
()(
F
.
tuple_to_array
(
grid_shape
[::
-
1
]),
ms
.
float32
)
pred_boxes
=
self
.
concat
((
pred_xy
,
pred_wh
))
true_xy
=
y_true
[:,
:,
:,
:,
:
2
]
*
grid_shape
-
grid
true_wh
=
y_true
[:,
:,
:,
:,
2
:
4
]
true_wh
=
P
.
Select
()(
P
.
Equal
()(
true_wh
,
0.0
),
P
.
Fill
()(
P
.
DType
()(
true_wh
),
P
.
Shape
()(
true_wh
),
1.0
),
true_wh
)
true_wh
=
P
.
Log
()(
true_wh
/
self
.
anchors
*
self
.
input_shape
)
box_loss_scale
=
2
-
y_true
[:,
:,
:,
:,
2
:
3
]
*
y_true
[:,
:,
:,
:,
3
:
4
]
gt_shape
=
P
.
Shape
()(
gt_box
)
gt_box
=
P
.
Reshape
()(
gt_box
,
(
gt_shape
[
0
],
1
,
1
,
1
,
gt_shape
[
1
],
gt_shape
[
2
]))
iou
=
self
.
iou
(
P
.
ExpandDims
()(
pred_boxes
,
-
2
),
gt_box
)
# [batch, grid[0], grid[1], num_anchor, num_gt]
best_iou
=
self
.
reduce_max
(
iou
,
-
1
)
# [batch, grid[0], grid[1], num_anchor]
ignore_mask
=
best_iou
<
self
.
ignore_threshold
ignore_mask
=
P
.
Cast
()(
ignore_mask
,
ms
.
float32
)
ignore_mask
=
P
.
ExpandDims
()(
ignore_mask
,
-
1
)
ignore_mask
=
F
.
stop_gradient
(
ignore_mask
)
xy_loss
=
object_mask
*
box_loss_scale
*
self
.
cross_entropy
(
prediction
[:,
:,
:,
:,
:
2
],
true_xy
)
wh_loss
=
object_mask
*
box_loss_scale
*
0.5
*
P
.
Square
()(
true_wh
-
prediction
[:,
:,
:,
:,
2
:
4
])
confidence_loss
=
self
.
cross_entropy
(
prediction
[:,
:,
:,
:,
4
:
5
],
object_mask
)
confidence_loss
=
object_mask
*
confidence_loss
+
(
1
-
object_mask
)
*
confidence_loss
*
ignore_mask
class_loss
=
object_mask
*
self
.
cross_entropy
(
prediction
[:,
:,
:,
:,
5
:],
class_probs
)
# Get smooth loss
xy_loss
=
self
.
reduce_sum
(
xy_loss
,
())
wh_loss
=
self
.
reduce_sum
(
wh_loss
,
())
confidence_loss
=
self
.
reduce_sum
(
confidence_loss
,
())
class_loss
=
self
.
reduce_sum
(
class_loss
,
())
loss
=
xy_loss
+
wh_loss
+
confidence_loss
+
class_loss
return
loss
/
P
.
Shape
()(
prediction
)[
0
]
class
yolov3_resnet18
(
nn
.
Cell
):
"""
ResNet based YOLOv3 network.
Args:
config (Class): YOLOv3 config.
Returns:
Cell, cell instance of ResNet based YOLOv3 neural network.
Examples:
yolov3_resnet18(80, [1,3,416,416]).
"""
def
__init__
(
self
,
config
):
super
(
yolov3_resnet18
,
self
).
__init__
()
self
.
config
=
config
# YOLOv3 network
self
.
feature_map
=
YOLOv3
(
feature_shape
=
self
.
config
.
feature_shape
,
backbone
=
ResNet
(
BasicBlock
,
self
.
config
.
backbone_layers
,
self
.
config
.
backbone_input_shape
,
self
.
config
.
backbone_shape
,
self
.
config
.
backbone_stride
,
num_classes
=
None
),
backbone_shape
=
self
.
config
.
backbone_shape
,
out_channel
=
self
.
config
.
out_channel
)
# prediction on the default anchor boxes
self
.
detect_1
=
DetectionBlock
(
'l'
,
self
.
config
)
self
.
detect_2
=
DetectionBlock
(
'm'
,
self
.
config
)
self
.
detect_3
=
DetectionBlock
(
's'
,
self
.
config
)
def
construct
(
self
,
x
):
big_object_output
,
medium_object_output
,
small_object_output
=
self
.
feature_map
(
x
)
output_big
=
self
.
detect_1
(
big_object_output
)
output_me
=
self
.
detect_2
(
medium_object_output
)
output_small
=
self
.
detect_3
(
small_object_output
)
return
output_big
,
output_me
,
output_small
class
YoloWithLossCell
(
nn
.
Cell
):
""""
Provide YOLOv3 training loss through network.
Args:
network (Cell): The training network.
config (Class): YOLOv3 config.
Returns:
Tensor, the loss of the network.
"""
def
__init__
(
self
,
network
,
config
):
super
(
YoloWithLossCell
,
self
).
__init__
()
self
.
yolo_network
=
network
self
.
config
=
config
self
.
loss_big
=
YoloLossBlock
(
'l'
,
self
.
config
)
self
.
loss_me
=
YoloLossBlock
(
'm'
,
self
.
config
)
self
.
loss_small
=
YoloLossBlock
(
's'
,
self
.
config
)
def
construct
(
self
,
x
,
y_true_0
,
y_true_1
,
y_true_2
,
gt_0
,
gt_1
,
gt_2
):
yolo_out
=
self
.
yolo_network
(
x
)
loss_l
=
self
.
loss_big
(
yolo_out
[
0
][
0
],
yolo_out
[
0
][
1
],
yolo_out
[
0
][
2
],
yolo_out
[
0
][
3
],
y_true_0
,
gt_0
)
loss_m
=
self
.
loss_me
(
yolo_out
[
1
][
0
],
yolo_out
[
1
][
1
],
yolo_out
[
1
][
2
],
yolo_out
[
1
][
3
],
y_true_1
,
gt_1
)
loss_s
=
self
.
loss_small
(
yolo_out
[
2
][
0
],
yolo_out
[
2
][
1
],
yolo_out
[
2
][
2
],
yolo_out
[
2
][
3
],
y_true_2
,
gt_2
)
return
loss_l
+
loss_m
+
loss_s
class
TrainingWrapper
(
nn
.
Cell
):
"""
Encapsulation class of YOLOv3 network training.
Append an optimizer to the training network after that the construct
function can be called to create the backward graph.
Args:
network (Cell): The training network. Note that loss function should have been added.
optimizer (Optimizer): Optimizer for updating the weights.
sens (Number): The adjust parameter. Default: 1.0.
"""
def
__init__
(
self
,
network
,
optimizer
,
sens
=
1.0
):
super
(
TrainingWrapper
,
self
).
__init__
(
auto_prefix
=
False
)
self
.
network
=
network
self
.
weights
=
ms
.
ParameterTuple
(
network
.
trainable_params
())
self
.
optimizer
=
optimizer
self
.
grad
=
C
.
GradOperation
(
'grad'
,
get_by_list
=
True
,
sens_param
=
True
)
self
.
sens
=
sens
self
.
reducer_flag
=
False
self
.
grad_reducer
=
None
self
.
parallel_mode
=
context
.
get_auto_parallel_context
(
"parallel_mode"
)
if
self
.
parallel_mode
in
[
ms
.
ParallelMode
.
DATA_PARALLEL
,
ms
.
ParallelMode
.
HYBRID_PARALLEL
]:
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"mirror_mean"
)
if
auto_parallel_context
().
get_device_num_is_set
():
degree
=
context
.
get_auto_parallel_context
(
"device_num"
)
else
:
degree
=
get_group_size
()
self
.
grad_reducer
=
nn
.
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
def
construct
(
self
,
*
args
):
weights
=
self
.
weights
loss
=
self
.
network
(
*
args
)
sens
=
P
.
Fill
()(
P
.
DType
()(
loss
),
P
.
Shape
()(
loss
),
self
.
sens
)
grads
=
self
.
grad
(
self
.
network
,
weights
)(
*
args
,
sens
)
if
self
.
reducer_flag
:
# apply grad reducer on grads
grads
=
self
.
grad_reducer
(
grads
)
return
F
.
depend
(
loss
,
self
.
optimizer
(
grads
))
class
YoloBoxScores
(
nn
.
Cell
):
"""
Calculate the boxes of the original picture size and the score of each box.
Args:
config (Class): YOLOv3 config.
Returns:
Tensor, the boxes of the original picture size.
Tensor, the score of each box.
"""
def
__init__
(
self
,
config
):
super
(
YoloBoxScores
,
self
).
__init__
()
self
.
input_shape
=
Tensor
(
np
.
array
(
config
.
img_shape
),
ms
.
float32
)
self
.
num_classes
=
config
.
num_classes
def
construct
(
self
,
box_xy
,
box_wh
,
box_confidence
,
box_probs
,
image_shape
):
batch_size
=
F
.
shape
(
box_xy
)[
0
]
x
=
box_xy
[:,
:,
:,
:,
0
:
1
]
y
=
box_xy
[:,
:,
:,
:,
1
:
2
]
box_yx
=
P
.
Concat
(
-
1
)((
y
,
x
))
w
=
box_wh
[:,
:,
:,
:,
0
:
1
]
h
=
box_wh
[:,
:,
:,
:,
1
:
2
]
box_hw
=
P
.
Concat
(
-
1
)((
h
,
w
))
new_shape
=
P
.
Round
()(
image_shape
*
P
.
ReduceMin
()(
self
.
input_shape
/
image_shape
))
offset
=
(
self
.
input_shape
-
new_shape
)
/
2.0
/
self
.
input_shape
scale
=
self
.
input_shape
/
new_shape
box_yx
=
(
box_yx
-
offset
)
*
scale
box_hw
=
box_hw
*
scale
box_min
=
box_yx
-
box_hw
/
2.0
box_max
=
box_yx
+
box_hw
/
2.0
boxes
=
P
.
Concat
(
-
1
)((
box_min
[:,
:,
:,
:,
0
:
1
],
box_min
[:,
:,
:,
:,
1
:
2
],
box_max
[:,
:,
:,
:,
0
:
1
],
box_max
[:,
:,
:,
:,
1
:
2
]))
image_scale
=
P
.
Tile
()(
image_shape
,
(
1
,
2
))
boxes
=
boxes
*
image_scale
boxes
=
F
.
reshape
(
boxes
,
(
batch_size
,
-
1
,
4
))
boxes_scores
=
box_confidence
*
box_probs
boxes_scores
=
F
.
reshape
(
boxes_scores
,
(
batch_size
,
-
1
,
self
.
num_classes
))
return
boxes
,
boxes_scores
class
YoloWithEval
(
nn
.
Cell
):
"""
Encapsulation class of YOLOv3 evaluation.
Args:
network (Cell): The training network. Note that loss function and optimizer must not be added.
config (Class): YOLOv3 config.
Returns:
Tensor, the boxes of the original picture size.
Tensor, the score of each box.
Tensor, the original picture size.
"""
def
__init__
(
self
,
network
,
config
):
super
(
YoloWithEval
,
self
).
__init__
()
self
.
yolo_network
=
network
self
.
box_score_0
=
YoloBoxScores
(
config
)
self
.
box_score_1
=
YoloBoxScores
(
config
)
self
.
box_score_2
=
YoloBoxScores
(
config
)
def
construct
(
self
,
x
,
image_shape
):
yolo_output
=
self
.
yolo_network
(
x
)
boxes_0
,
boxes_scores_0
=
self
.
box_score_0
(
*
yolo_output
[
0
],
image_shape
)
boxes_1
,
boxes_scores_1
=
self
.
box_score_1
(
*
yolo_output
[
1
],
image_shape
)
boxes_2
,
boxes_scores_2
=
self
.
box_score_2
(
*
yolo_output
[
2
],
image_shape
)
boxes
=
P
.
Concat
(
1
)((
boxes_0
,
boxes_1
,
boxes_2
))
boxes_scores
=
P
.
Concat
(
1
)((
boxes_scores_0
,
boxes_scores_1
,
boxes_scores_2
))
return
boxes
,
boxes_scores
,
image_shape
tests/st/model_zoo_tests/yolov3/test_yolov3.py
0 → 100644
浏览文件 @
84dd46a7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# less required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
######################## train YOLOv3 example ########################
train YOLOv3 and get network model files(.ckpt) :
python train.py --image_dir /data --anno_path /data/coco/train_coco.txt --mindrecord_dir=/data/Mindrecord_train
If the mindrecord_dir is empty, it wil generate mindrecord file by image_dir and anno_path.
Note if mindrecord_dir isn't empty, it will use mindrecord_dir rather than image_dir and anno_path.
"""
import
os
import
time
import
pytest
import
numpy
as
np
import
mindspore.nn
as
nn
from
mindspore
import
context
,
Tensor
from
mindspore.train
import
Model
from
mindspore.common.initializer
import
initializer
from
mindspore.train.callback
import
Callback
from
src.yolov3
import
yolov3_resnet18
,
YoloWithLossCell
,
TrainingWrapper
from
src.dataset
import
create_yolo_dataset
from
src.config
import
ConfigYOLOV3ResNet18
np
.
random
.
seed
(
1
)
def
get_lr
(
learning_rate
,
start_step
,
global_step
,
decay_step
,
decay_rate
,
steps
=
False
):
"""Set learning rate."""
lr_each_step
=
[]
for
i
in
range
(
global_step
):
if
steps
:
lr_each_step
.
append
(
learning_rate
*
(
decay_rate
**
(
i
//
decay_step
)))
else
:
lr_each_step
.
append
(
learning_rate
*
(
decay_rate
**
(
i
/
decay_step
)))
lr_each_step
=
np
.
array
(
lr_each_step
).
astype
(
np
.
float32
)
lr_each_step
=
lr_each_step
[
start_step
:]
return
lr_each_step
def
init_net_param
(
network
,
init_value
=
'ones'
):
"""Init:wq the parameters in network."""
params
=
network
.
trainable_params
()
for
p
in
params
:
if
isinstance
(
p
.
data
,
Tensor
)
and
'beta'
not
in
p
.
name
and
'gamma'
not
in
p
.
name
and
'bias'
not
in
p
.
name
:
p
.
set_parameter_data
(
initializer
(
init_value
,
p
.
data
.
shape
(),
p
.
data
.
dtype
()))
class
ModelCallback
(
Callback
):
def
__init__
(
self
):
super
(
ModelCallback
,
self
).
__init__
()
self
.
loss_list
=
[]
def
step_end
(
self
,
run_context
):
cb_params
=
run_context
.
original_args
()
self
.
loss_list
.
append
(
cb_params
.
net_outputs
.
asnumpy
())
print
(
"epoch: {}, outputs are: {}"
.
format
(
cb_params
.
cur_epoch_num
,
str
(
cb_params
.
net_outputs
)))
class
TimeMonitor
(
Callback
):
"""Time Monitor."""
def
__init__
(
self
,
data_size
):
super
(
TimeMonitor
,
self
).
__init__
()
self
.
data_size
=
data_size
self
.
epoch_mseconds_list
=
[]
self
.
per_step_mseconds_list
=
[]
def
epoch_begin
(
self
,
run_context
):
self
.
epoch_time
=
time
.
time
()
def
epoch_end
(
self
,
run_context
):
epoch_mseconds
=
(
time
.
time
()
-
self
.
epoch_time
)
*
1000
self
.
epoch_mseconds_list
.
append
(
epoch_mseconds
)
self
.
per_step_mseconds_list
.
append
(
epoch_mseconds
/
self
.
data_size
)
DATA_DIR
=
"/home/workspace/mindspore_dataset/coco/coco2017/mindrecord_train/yolov3"
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_arm_ascend_training
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_onecard
def
test_yolov3
():
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
)
rank
=
0
device_num
=
1
lr_init
=
0.001
epoch_size
=
3
batch_size
=
32
loss_scale
=
1024
mindrecord_dir
=
DATA_DIR
# It will generate mindrecord file in args_opt.mindrecord_dir,
# and the file name is yolo.mindrecord0, 1, ... file_num.
if
not
os
.
path
.
isdir
(
mindrecord_dir
):
raise
KeyError
(
"mindrecord path is not exist."
)
prefix
=
"yolo.mindrecord"
mindrecord_file
=
os
.
path
.
join
(
mindrecord_dir
,
prefix
+
"0"
)
print
(
"yolov3 mindrecord is "
,
mindrecord_file
)
if
not
os
.
path
.
exists
(
mindrecord_file
):
print
(
"mindrecord file is not exist."
)
assert
False
else
:
loss_scale
=
float
(
loss_scale
)
# When create MindDataset, using the fitst mindrecord file, such as yolo.mindrecord0.
dataset
=
create_yolo_dataset
(
mindrecord_file
,
repeat_num
=
epoch_size
,
batch_size
=
batch_size
,
device_num
=
device_num
,
rank
=
rank
)
dataset_size
=
dataset
.
get_dataset_size
()
print
(
"Create dataset done!"
)
net
=
yolov3_resnet18
(
ConfigYOLOV3ResNet18
())
net
=
YoloWithLossCell
(
net
,
ConfigYOLOV3ResNet18
())
init_net_param
(
net
)
total_epoch_size
=
60
lr
=
Tensor
(
get_lr
(
learning_rate
=
lr_init
,
start_step
=
0
,
global_step
=
total_epoch_size
*
dataset_size
,
decay_step
=
1000
,
decay_rate
=
0.95
,
steps
=
True
))
opt
=
nn
.
Adam
(
filter
(
lambda
x
:
x
.
requires_grad
,
net
.
get_parameters
()),
lr
,
loss_scale
=
loss_scale
)
net
=
TrainingWrapper
(
net
,
opt
,
loss_scale
)
model_callback
=
ModelCallback
()
time_monitor_callback
=
TimeMonitor
(
data_size
=
dataset_size
)
callback
=
[
model_callback
,
time_monitor_callback
]
model
=
Model
(
net
)
print
(
"Start train YOLOv3, the first epoch will be slower because of the graph compilation."
)
model
.
train
(
epoch_size
,
dataset
,
callbacks
=
callback
,
dataset_sink_mode
=
True
)
# assertion occurs while the loss value, overflow state or loss_scale value is wrong
loss_value
=
np
.
array
(
model_callback
.
loss_list
)
expect_loss_value
=
[
6600
,
4200
,
2700
]
print
(
"loss value: {}"
.
format
(
loss_value
))
assert
loss_value
[
0
]
<
expect_loss_value
[
0
]
assert
loss_value
[
1
]
<
expect_loss_value
[
1
]
assert
loss_value
[
2
]
<
expect_loss_value
[
2
]
epoch_mseconds
=
np
.
array
(
time_monitor_callback
.
epoch_mseconds_list
)[
2
]
expect_epoch_mseconds
=
950
print
(
"epoch mseconds: {}"
.
format
(
epoch_mseconds
))
assert
epoch_mseconds
<=
expect_epoch_mseconds
per_step_mseconds
=
np
.
array
(
time_monitor_callback
.
per_step_mseconds_list
)[
2
]
expect_per_step_mseconds
=
110
print
(
"per step mseconds: {}"
.
format
(
per_step_mseconds
))
assert
per_step_mseconds
<=
expect_per_step_mseconds
print
(
"yolov3 test case passed."
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录