Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
陈小光丶
yolov4-keras
提交
5b6f4c01
Y
yolov4-keras
项目概览
陈小光丶
/
yolov4-keras
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Y
yolov4-keras
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
5b6f4c01
编写于
1月 14, 2021
作者:
B
Bubbliiiing
提交者:
GitHub
1月 14, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add files via upload
上级
1efeefe3
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
632 addition
and
265 deletion
+632
-265
VOCdevkit/VOC2007/voc2yolo4.py
VOCdevkit/VOC2007/voc2yolo4.py
+10
-1
get_dr_txt.py
get_dr_txt.py
+35
-18
kmeans_for_anchors.py
kmeans_for_anchors.py
+8
-3
nets/CSPdarknet53.py
nets/CSPdarknet53.py
+30
-13
nets/ious.py
nets/ious.py
+25
-8
nets/loss.py
nets/loss.py
+127
-55
nets/yolo4.py
nets/yolo4.py
+126
-42
predict.py
predict.py
+11
-1
test.py
test.py
+9
-6
train.py
train.py
+143
-74
utils/utils.py
utils/utils.py
+39
-11
video.py
video.py
+18
-10
vision_for_anchors.py
vision_for_anchors.py
+4
-2
voc_annotation.py
voc_annotation.py
+5
-0
yolo.py
yolo.py
+42
-21
未找到文件。
VOCdevkit/VOC2007/voc2yolo4.py
浏览文件 @
5b6f4c01
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# test.txt和val.txt里面没有内容是正常的。训练不会使用到。
#----------------------------------------------------------------------#
import
os
import
random
random
.
seed
(
0
)
xmlfilepath
=
r
'./VOCdevkit/VOC2007/Annotations'
saveBasePath
=
r
"./VOCdevkit/VOC2007/ImageSets/Main/"
#----------------------------------------------------------------------#
# 想要增加测试集修改trainval_percent
# train_percent不需要修改
#----------------------------------------------------------------------#
trainval_percent
=
1
train_percent
=
1
...
...
get_dr_txt.py
浏览文件 @
5b6f4c01
...
...
@@ -3,18 +3,21 @@
# 具体视频教程可查看
# https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------#
from
yolo
import
YOLO
from
PIL
import
Image
from
keras.layers
import
Input
from
keras.applications.imagenet_utils
import
preprocess_input
from
keras
import
backend
as
K
from
utils.utils
import
letterbox_image
from
nets.yolo4
import
yolo_body
,
yolo_eval
from
tqdm
import
tqdm
import
colorsys
import
numpy
as
np
import
os
import
numpy
as
np
from
keras
import
backend
as
K
from
keras.applications.imagenet_utils
import
preprocess_input
from
keras.layers
import
Input
from
PIL
import
Image
from
tqdm
import
tqdm
from
nets.yolo4
import
yolo_body
,
yolo_eval
from
utils.utils
import
letterbox_image
from
yolo
import
YOLO
class
mAP_YOLO
(
YOLO
):
#---------------------------------------------------#
# 获得所有的分类
...
...
@@ -25,12 +28,16 @@ class mAP_YOLO(YOLO):
model_path
=
os
.
path
.
expanduser
(
self
.
model_path
)
assert
model_path
.
endswith
(
'.h5'
),
'Keras model or weights must be a .h5 file.'
# 计算anchor数量
#---------------------------------------------------#
# 计算先验框的数量和种类的数量
#---------------------------------------------------#
num_anchors
=
len
(
self
.
anchors
)
num_classes
=
len
(
self
.
class_names
)
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
#---------------------------------------------------------#
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
#---------------------------------------------------------#
try
:
self
.
yolo_model
=
load_model
(
model_path
,
compile
=
False
)
except
:
...
...
@@ -58,6 +65,10 @@ class mAP_YOLO(YOLO):
self
.
input_image_shape
=
K
.
placeholder
(
shape
=
(
2
,
))
#---------------------------------------------------------#
# 在yolo_eval函数中,我们会对预测结果进行后处理
# 后处理的内容包括,解码、非极大抑制、门限筛选等
#---------------------------------------------------------#
boxes
,
scores
,
classes
=
yolo_eval
(
self
.
yolo_model
.
output
,
self
.
anchors
,
num_classes
,
self
.
input_image_shape
,
max_boxes
=
self
.
max_boxes
,
score_threshold
=
self
.
score
,
iou_threshold
=
self
.
iou
)
...
...
@@ -68,21 +79,27 @@ class mAP_YOLO(YOLO):
#---------------------------------------------------#
def
detect_image
(
self
,
image_id
,
image
):
f
=
open
(
"./input/detection-results/"
+
image_id
+
".txt"
,
"w"
)
# 调整图片使其符合输入要求
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
new_image_size
=
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])
boxed_image
=
letterbox_image
(
image
,
new_image_size
)
image_data
=
np
.
array
(
boxed_image
,
dtype
=
'float32'
)
image_data
/=
255.
image_data
=
np
.
expand_dims
(
image_data
,
0
)
# Add batch dimension.
# 预测结果
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data
=
np
.
expand_dims
(
image_data
,
0
)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
out_boxes
,
out_scores
,
out_classes
=
self
.
sess
.
run
(
[
self
.
boxes
,
self
.
scores
,
self
.
classes
],
feed_dict
=
{
self
.
yolo_model
.
input
:
image_data
,
self
.
input_image_shape
:
[
image
.
size
[
1
],
image
.
size
[
0
]],
K
.
learning_phase
():
0
})
K
.
learning_phase
():
0
})
for
i
,
c
in
enumerate
(
out_classes
):
predicted_class
=
self
.
class_names
[
int
(
c
)]
...
...
kmeans_for_anchors.py
浏览文件 @
5b6f4c01
import
numpy
as
np
import
xml.etree.ElementTree
as
ET
import
glob
import
random
import
xml.etree.ElementTree
as
ET
import
numpy
as
np
def
cas_iou
(
box
,
cluster
):
x
=
np
.
minimum
(
cluster
[:,
0
],
box
[
0
])
...
...
@@ -61,6 +63,9 @@ def load_data(path):
tree
=
ET
.
parse
(
xml_file
)
height
=
int
(
tree
.
findtext
(
'./size/height'
))
width
=
int
(
tree
.
findtext
(
'./size/width'
))
if
height
<=
0
or
width
<=
0
:
continue
# 对于每一个目标都获得它的宽高
for
obj
in
tree
.
iter
(
'object'
):
xmin
=
int
(
float
(
obj
.
findtext
(
'bndbox/xmin'
)))
/
width
...
...
@@ -103,4 +108,4 @@ if __name__ == '__main__':
else
:
x_y
=
", %d,%d"
%
(
data
[
i
][
0
],
data
[
i
][
1
])
f
.
write
(
x_y
)
f
.
close
()
\ No newline at end of file
f
.
close
()
nets/CSPdarknet53.py
浏览文件 @
5b6f4c01
from
functools
import
wraps
from
keras
import
backend
as
K
from
keras.layers
import
Conv2D
,
Add
,
ZeroPadding2D
,
UpSampling2D
,
Concatenate
,
MaxPooling2D
,
Layer
from
keras.layers
import
(
Add
,
Concatenate
,
Conv2D
,
Layer
,
MaxPooling2D
,
UpSampling2D
,
ZeroPadding2D
)
from
keras.layers.advanced_activations
import
LeakyReLU
from
keras.layers.normalization
import
BatchNormalization
from
keras.regularizers
import
l2
...
...
@@ -21,8 +23,11 @@ class Mish(Layer):
def
compute_output_shape
(
self
,
input_shape
):
return
input_shape
#--------------------------------------------------#
# 单次卷积
# 单次卷积DarknetConv2D
# 正则化系数为5e-4
# 如果步长为2则自己设定padding方式。
#--------------------------------------------------#
@
wraps
(
Conv2D
)
def
DarknetConv2D
(
*
args
,
**
kwargs
):
...
...
@@ -32,7 +37,7 @@ def DarknetConv2D(*args, **kwargs):
return
Conv2D
(
*
args
,
**
darknet_conv_kwargs
)
#---------------------------------------------------#
# 卷积块
# 卷积块
-> 卷积 + 标准化 + 激活函数
# DarknetConv2D + BatchNormalization + Mish
#---------------------------------------------------#
def
DarknetConv2D_BN_Mish
(
*
args
,
**
kwargs
):
...
...
@@ -43,36 +48,48 @@ def DarknetConv2D_BN_Mish(*args, **kwargs):
BatchNormalization
(),
Mish
())
#---------------------------------------------------#
#---------------------------------------------------
-----------------
#
# CSPdarknet的结构块
# 存在一个大残差边
# 这个大残差边绕过了很多的残差结构
#---------------------------------------------------#
# 首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
# 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
# 对于整个CSPdarknet的结构块,就是一个大残差块+内部多个小残差块
#--------------------------------------------------------------------#
def
resblock_body
(
x
,
num_filters
,
num_blocks
,
all_narrow
=
True
):
# 进行长和宽的压缩
#----------------------------------------------------------------#
# 利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
#----------------------------------------------------------------#
preconv1
=
ZeroPadding2D
(((
1
,
0
),(
1
,
0
)))(
x
)
preconv1
=
DarknetConv2D_BN_Mish
(
num_filters
,
(
3
,
3
),
strides
=
(
2
,
2
))(
preconv1
)
# 生成一个大的残差边
#--------------------------------------------------------------------#
# 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
#--------------------------------------------------------------------#
shortconv
=
DarknetConv2D_BN_Mish
(
num_filters
//
2
if
all_narrow
else
num_filters
,
(
1
,
1
))(
preconv1
)
# 主干部分的卷积
#----------------------------------------------------------------#
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
#----------------------------------------------------------------#
mainconv
=
DarknetConv2D_BN_Mish
(
num_filters
//
2
if
all_narrow
else
num_filters
,
(
1
,
1
))(
preconv1
)
# 1x1卷积对通道数进行整合->3x3卷积提取特征,使用残差结构
for
i
in
range
(
num_blocks
):
y
=
compose
(
DarknetConv2D_BN_Mish
(
num_filters
//
2
,
(
1
,
1
)),
DarknetConv2D_BN_Mish
(
num_filters
//
2
if
all_narrow
else
num_filters
,
(
3
,
3
)))(
mainconv
)
mainconv
=
Add
()([
mainconv
,
y
])
# 1x1卷积后和残差边堆叠
postconv
=
DarknetConv2D_BN_Mish
(
num_filters
//
2
if
all_narrow
else
num_filters
,
(
1
,
1
))(
mainconv
)
#----------------------------------------------------------------#
# 将大残差边再堆叠回来
#----------------------------------------------------------------#
route
=
Concatenate
()([
postconv
,
shortconv
])
# 最后对通道数进行整合
return
DarknetConv2D_BN_Mish
(
num_filters
,
(
1
,
1
))(
route
)
#---------------------------------------------------#
# darknet53 的主体部分
# CSPdarknet53 的主体部分
# 输入为一张416x416x3的图片
# 输出为三个有效特征层
#---------------------------------------------------#
def
darknet_body
(
x
):
x
=
DarknetConv2D_BN_Mish
(
32
,
(
3
,
3
))(
x
)
...
...
nets/ious.py
浏览文件 @
5b6f4c01
...
...
@@ -12,20 +12,31 @@ def box_ciou(b1, b2):
-------
ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
"""
# 求出预测框左上角右下角
#-----------------------------------------------------------#
# 求出预测框左上角右下角
# b1_mins (batch, feat_w, feat_h, anchor_num, 2)
# b1_maxes (batch, feat_w, feat_h, anchor_num, 2)
#-----------------------------------------------------------#
b1_xy
=
b1
[...,
:
2
]
b1_wh
=
b1
[...,
2
:
4
]
b1_wh_half
=
b1_wh
/
2.
b1_mins
=
b1_xy
-
b1_wh_half
b1_maxes
=
b1_xy
+
b1_wh_half
# 求出真实框左上角右下角
#-----------------------------------------------------------#
# 求出真实框左上角右下角
# b2_mins (batch, feat_w, feat_h, anchor_num, 2)
# b2_maxes (batch, feat_w, feat_h, anchor_num, 2)
#-----------------------------------------------------------#
b2_xy
=
b2
[...,
:
2
]
b2_wh
=
b2
[...,
2
:
4
]
b2_wh_half
=
b2_wh
/
2.
b2_mins
=
b2_xy
-
b2_wh_half
b2_maxes
=
b2_xy
+
b2_wh_half
# 求真实框和预测框所有的iou
#-----------------------------------------------------------#
# 求真实框和预测框所有的iou
# iou (batch, feat_w, feat_h, anchor_num)
#-----------------------------------------------------------#
intersect_mins
=
K
.
maximum
(
b1_mins
,
b2_mins
)
intersect_maxes
=
K
.
minimum
(
b1_maxes
,
b2_maxes
)
intersect_wh
=
K
.
maximum
(
intersect_maxes
-
intersect_mins
,
0.
)
...
...
@@ -33,21 +44,27 @@ def box_ciou(b1, b2):
b1_area
=
b1_wh
[...,
0
]
*
b1_wh
[...,
1
]
b2_area
=
b2_wh
[...,
0
]
*
b2_wh
[...,
1
]
union_area
=
b1_area
+
b2_area
-
intersect_area
iou
=
intersect_area
/
K
.
maximum
(
union_area
,
K
.
epsilon
())
iou
=
intersect_area
/
K
.
maximum
(
union_area
,
K
.
epsilon
())
# 计算中心的差距
#-----------------------------------------------------------#
# 计算中心的差距
# center_distance (batch, feat_w, feat_h, anchor_num)
#-----------------------------------------------------------#
center_distance
=
K
.
sum
(
K
.
square
(
b1_xy
-
b2_xy
),
axis
=-
1
)
# 找到包裹两个框的最小框的左上角和右下角
enclose_mins
=
K
.
minimum
(
b1_mins
,
b2_mins
)
enclose_maxes
=
K
.
maximum
(
b1_maxes
,
b2_maxes
)
enclose_wh
=
K
.
maximum
(
enclose_maxes
-
enclose_mins
,
0.0
)
# 计算对角线距离
#-----------------------------------------------------------#
# 计算对角线距离
# enclose_diagonal (batch, feat_w, feat_h, anchor_num)
#-----------------------------------------------------------#
enclose_diagonal
=
K
.
sum
(
K
.
square
(
enclose_wh
),
axis
=-
1
)
ciou
=
iou
-
1.0
*
(
center_distance
)
/
K
.
maximum
(
enclose_diagonal
,
K
.
epsilon
())
v
=
4
*
K
.
square
(
tf
.
math
.
atan2
(
b1_wh
[...,
0
],
K
.
maximum
(
b1_wh
[...,
1
],
K
.
epsilon
()))
-
tf
.
math
.
atan2
(
b2_wh
[...,
0
],
K
.
maximum
(
b2_wh
[...,
1
],
K
.
epsilon
())))
/
(
math
.
pi
*
math
.
pi
)
v
=
4
*
K
.
square
(
tf
.
math
.
atan2
(
b1_wh
[...,
0
],
K
.
maximum
(
b1_wh
[...,
1
],
K
.
epsilon
()))
-
tf
.
math
.
atan2
(
b2_wh
[...,
0
],
K
.
maximum
(
b2_wh
[...,
1
],
K
.
epsilon
())))
/
(
math
.
pi
*
math
.
pi
)
alpha
=
v
/
K
.
maximum
((
1.0
-
iou
+
v
),
K
.
epsilon
())
ciou
=
ciou
-
alpha
*
v
ciou
=
K
.
expand_dims
(
ciou
,
-
1
)
ciou
=
tf
.
where
(
tf
.
is_nan
(
ciou
),
tf
.
zeros_like
(
ciou
),
ciou
)
return
ciou
nets/loss.py
浏览文件 @
5b6f4c01
import
numpy
as
np
import
tensorflow
as
tf
from
keras
import
backend
as
K
from
nets.ious
import
box_ciou
#---------------------------------------------------#
...
...
@@ -10,17 +11,22 @@ def _smooth_labels(y_true, label_smoothing):
num_classes
=
tf
.
cast
(
K
.
shape
(
y_true
)[
-
1
],
dtype
=
K
.
floatx
())
label_smoothing
=
K
.
constant
(
label_smoothing
,
dtype
=
K
.
floatx
())
return
y_true
*
(
1.0
-
label_smoothing
)
+
label_smoothing
/
num_classes
#---------------------------------------------------#
# 将预测值的每个特征层调成真实值
#---------------------------------------------------#
def
yolo_head
(
feats
,
anchors
,
num_classes
,
input_shape
,
calc_loss
=
False
):
num_anchors
=
len
(
anchors
)
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
anchors_tensor
=
K
.
reshape
(
K
.
constant
(
anchors
),
[
1
,
1
,
1
,
num_anchors
,
2
])
# 获得x,y的网格
# (13, 13, 1, 2)
grid_shape
=
K
.
shape
(
feats
)[
1
:
3
]
# height, width
#---------------------------------------------------#
# 获得x,y的网格
# (13, 13, 1, 2)
#---------------------------------------------------#
grid_shape
=
K
.
shape
(
feats
)[
1
:
3
]
grid_y
=
K
.
tile
(
K
.
reshape
(
K
.
arange
(
0
,
stop
=
grid_shape
[
0
]),
[
-
1
,
1
,
1
,
1
]),
[
1
,
grid_shape
[
1
],
1
,
1
])
grid_x
=
K
.
tile
(
K
.
reshape
(
K
.
arange
(
0
,
stop
=
grid_shape
[
1
]),
[
1
,
-
1
,
1
,
1
]),
...
...
@@ -28,22 +34,34 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
grid
=
K
.
concatenate
([
grid_x
,
grid_y
])
grid
=
K
.
cast
(
grid
,
K
.
dtype
(
feats
))
# (batch_size,13,13,3,85)
#---------------------------------------------------#
# 将预测结果调整成(batch_size,13,13,3,85)
# 85可拆分成4 + 1 + 80
# 4代表的是中心宽高的调整参数
# 1代表的是框的置信度
# 80代表的是种类的置信度
#---------------------------------------------------#
feats
=
K
.
reshape
(
feats
,
[
-
1
,
grid_shape
[
0
],
grid_shape
[
1
],
num_anchors
,
num_classes
+
5
])
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
box_xy
=
(
K
.
sigmoid
(
feats
[...,
:
2
])
+
grid
)
/
K
.
cast
(
grid_shape
[::
-
1
],
K
.
dtype
(
feats
))
box_wh
=
K
.
exp
(
feats
[...,
2
:
4
])
*
anchors_tensor
/
K
.
cast
(
input_shape
[::
-
1
],
K
.
dtype
(
feats
))
box_confidence
=
K
.
sigmoid
(
feats
[...,
4
:
5
])
box_class_probs
=
K
.
sigmoid
(
feats
[...,
5
:])
# 在计算loss的时候返回如下参数
#---------------------------------------------------------------------#
# 在计算loss的时候返回grid, feats, box_xy, box_wh
# 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
#---------------------------------------------------------------------#
if
calc_loss
==
True
:
return
grid
,
feats
,
box_xy
,
box_wh
return
box_xy
,
box_wh
,
box_confidence
,
box_class_probs
#---------------------------------------------------#
# 用于计算每个预测框与真实框的iou
#---------------------------------------------------#
...
...
@@ -77,108 +95,162 @@ def box_iou(b1, b2):
return
iou
#---------------------------------------------------#
# loss值计算
#---------------------------------------------------#
def
yolo_loss
(
args
,
anchors
,
num_classes
,
ignore_thresh
=
.
5
,
label_smoothing
=
0.1
,
print_loss
=
False
):
def
yolo_loss
(
args
,
anchors
,
num_classes
,
ignore_thresh
=
.
5
,
label_smoothing
=
0.1
,
print_loss
=
False
,
normalize
=
True
):
# 一共有三层
num_layers
=
len
(
anchors
)
//
3
# 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,255),(m,26,26,255),(m,52,52,255)。
#---------------------------------------------------------------------------------------------------#
# 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
#---------------------------------------------------------------------------------------------------#
y_true
=
args
[
num_layers
:]
yolo_outputs
=
args
[:
num_layers
]
# 先验框
# 678为142,110, 192,243, 459,401
# 345为36,75, 76,55, 72,146
# 012为12,16, 19,36, 40,28
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
#-----------------------------------------------------------#
anchor_mask
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]]
if
num_layers
==
3
else
[[
3
,
4
,
5
],
[
1
,
2
,
3
]]
# 得到input_shpae为
608,608
# 得到input_shpae为
416,416
input_shape
=
K
.
cast
(
K
.
shape
(
yolo_outputs
[
0
])[
1
:
3
]
*
32
,
K
.
dtype
(
y_true
[
0
]))
loss
=
0
# 取出每一张图片
# m的值就是batch_size
num_pos
=
0
#-----------------------------------------------------------#
# 取出每一张图片
# m的值就是batch_size
#-----------------------------------------------------------#
m
=
K
.
shape
(
yolo_outputs
[
0
])[
0
]
mf
=
K
.
cast
(
m
,
K
.
dtype
(
yolo_outputs
[
0
]))
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,255),(m,26,26,255),(m,52,52,255)。
#---------------------------------------------------------------------------------------------------#
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
#---------------------------------------------------------------------------------------------------#
for
l
in
range
(
num_layers
):
# 以第一个特征层(m,13,13,3,85)为例子
# 取出该特征层中存在目标的点的位置。(m,13,13,3,1)
#-----------------------------------------------------------#
# 以第一个特征层(m,13,13,3,85)为例子
# 取出该特征层中存在目标的点的位置。(m,13,13,3,1)
#-----------------------------------------------------------#
object_mask
=
y_true
[
l
][...,
4
:
5
]
# 取出其对应的种类(m,13,13,3,80)
#-----------------------------------------------------------#
# 取出其对应的种类(m,13,13,3,80)
#-----------------------------------------------------------#
true_class_probs
=
y_true
[
l
][...,
5
:]
if
label_smoothing
:
true_class_probs
=
_smooth_labels
(
true_class_probs
,
label_smoothing
)
# 将yolo_outputs的特征层输出进行处理
# grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85)
# 还有解码后的xy,wh,(m,13,13,3,2)
#-----------------------------------------------------------#
# 将yolo_outputs的特征层输出进行处理、获得四个返回值
# 其中:
# grid (13,13,1,2) 网格坐标
# raw_pred (m,13,13,3,85) 尚未处理的预测结果
# pred_xy (m,13,13,3,2) 解码后的中心坐标
# pred_wh (m,13,13,3,2) 解码后的宽高坐标
#-----------------------------------------------------------#
grid
,
raw_pred
,
pred_xy
,
pred_wh
=
yolo_head
(
yolo_outputs
[
l
],
anchors
[
anchor_mask
[
l
]],
num_classes
,
input_shape
,
calc_loss
=
True
)
# 这个是解码后的预测的box的位置
# (m,13,13,3,4)
#-----------------------------------------------------------#
# pred_box是解码后的预测的box的位置
# (m,13,13,3,4)
#-----------------------------------------------------------#
pred_box
=
K
.
concatenate
([
pred_xy
,
pred_wh
])
# 找到负样本群组,第一步是创建一个数组,[]
#-----------------------------------------------------------#
# 找到负样本群组,第一步是创建一个数组,[]
#-----------------------------------------------------------#
ignore_mask
=
tf
.
TensorArray
(
K
.
dtype
(
y_true
[
0
]),
size
=
1
,
dynamic_size
=
True
)
object_mask_bool
=
K
.
cast
(
object_mask
,
'bool'
)
# 对每一张图片计算ignore_mask
#-----------------------------------------------------------#
# 对每一张图片计算ignore_mask
#-----------------------------------------------------------#
def
loop_body
(
b
,
ignore_mask
):
# 取出第b副图内,真实存在的所有的box的参数
# n,4
#-----------------------------------------------------------#
# 取出n个真实框:n,4
#-----------------------------------------------------------#
true_box
=
tf
.
boolean_mask
(
y_true
[
l
][
b
,...,
0
:
4
],
object_mask_bool
[
b
,...,
0
])
# 计算预测结果与真实情况的iou
# pred_box为13,13,3,4
# 计算的结果是每个pred_box和其它所有真实框的iou
# 13,13,3,n
#-----------------------------------------------------------#
# 计算预测框与真实框的iou
# pred_box 13,13,3,4 预测框的坐标
# true_box n,4 真实框的坐标
# iou 13,13,3,n 预测框和真实框的iou
#-----------------------------------------------------------#
iou
=
box_iou
(
pred_box
[
b
],
true_box
)
# 13,13,3
#-----------------------------------------------------------#
# best_iou 13,13,3 每个特征点与真实框的最大重合程度
#-----------------------------------------------------------#
best_iou
=
K
.
max
(
iou
,
axis
=-
1
)
# 如果某些预测框和真实框的重合程度大于0.5,则忽略。
#-----------------------------------------------------------#
# 判断预测框和真实框的最大iou小于ignore_thresh
# 则认为该预测框没有与之对应的真实框
# 该操作的目的是:
# 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
# 不适合当作负样本,所以忽略掉。
#-----------------------------------------------------------#
ignore_mask
=
ignore_mask
.
write
(
b
,
K
.
cast
(
best_iou
<
ignore_thresh
,
K
.
dtype
(
true_box
)))
return
b
+
1
,
ignore_mask
# 遍历所有的图片
#-----------------------------------------------------------#
# 在这个地方进行一个循环、循环是对每一张图片进行的
#-----------------------------------------------------------#
_
,
ignore_mask
=
K
.
control_flow_ops
.
while_loop
(
lambda
b
,
*
args
:
b
<
m
,
loop_body
,
[
0
,
ignore_mask
])
# 将每幅图的内容压缩,进行处理
#-----------------------------------------------------------#
# ignore_mask用于提取出作为负样本的特征点
# (m,13,13,3)
#-----------------------------------------------------------#
ignore_mask
=
ignore_mask
.
stack
()
#(m,13,13,3,1)
#
(m,13,13,3,1)
ignore_mask
=
K
.
expand_dims
(
ignore_mask
,
-
1
)
#-----------------------------------------------------------#
# 真实框越大,比重越小,小框的比重更大。
#-----------------------------------------------------------#
box_loss_scale
=
2
-
y_true
[
l
][...,
2
:
3
]
*
y_true
[
l
][...,
3
:
4
]
# Calculate ciou loss as location loss
#-----------------------------------------------------------#
# 计算Ciou loss
#-----------------------------------------------------------#
raw_true_box
=
y_true
[
l
][...,
0
:
4
]
ciou
=
box_ciou
(
pred_box
,
raw_true_box
)
ciou_loss
=
object_mask
*
box_loss_scale
*
(
1
-
ciou
)
ciou_loss
=
K
.
sum
(
ciou_loss
)
/
mf
location_loss
=
ciou_loss
# 如果该位置本来有框,那么计算1与置信度的交叉熵
# 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本
# best_iou<ignore_thresh用于限制负样本数量
#------------------------------------------------------------------------------#
# 如果该位置本来有框,那么计算1与置信度的交叉熵
# 如果该位置本来没有框,那么计算0与置信度的交叉熵
# 在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh
# 该操作的目的是:
# 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
# 不适合当作负样本,所以忽略掉。
#------------------------------------------------------------------------------#
confidence_loss
=
object_mask
*
K
.
binary_crossentropy
(
object_mask
,
raw_pred
[...,
4
:
5
],
from_logits
=
True
)
+
\
(
1
-
object_mask
)
*
K
.
binary_crossentropy
(
object_mask
,
raw_pred
[...,
4
:
5
],
from_logits
=
True
)
*
ignore_mask
class_loss
=
object_mask
*
K
.
binary_crossentropy
(
true_class_probs
,
raw_pred
[...,
5
:],
from_logits
=
True
)
confidence_loss
=
K
.
sum
(
confidence_loss
)
/
mf
class_loss
=
K
.
sum
(
class_loss
)
/
mf
location_loss
=
K
.
sum
(
tf
.
where
(
tf
.
is_nan
(
ciou_loss
),
tf
.
zeros_like
(
ciou_loss
),
ciou_loss
))
confidence_loss
=
K
.
sum
(
tf
.
where
(
tf
.
is_nan
(
confidence_loss
),
tf
.
zeros_like
(
confidence_loss
),
confidence_loss
))
class_loss
=
K
.
sum
(
tf
.
where
(
tf
.
is_nan
(
class_loss
),
tf
.
zeros_like
(
class_loss
),
class_loss
))
#-----------------------------------------------------------#
# 计算正样本数量
#-----------------------------------------------------------#
num_pos
+=
tf
.
maximum
(
K
.
sum
(
K
.
cast
(
object_mask
,
tf
.
float32
)),
1
)
loss
+=
location_loss
+
confidence_loss
+
class_loss
# if print_loss:
#loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
# loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
if
normalize
:
loss
=
loss
/
num_pos
else
:
loss
=
loss
/
mf
return
loss
nets/yolo4.py
浏览文件 @
5b6f4c01
...
...
@@ -3,17 +3,21 @@ from functools import wraps
import
numpy
as
np
import
tensorflow
as
tf
from
keras
import
backend
as
K
from
keras.layers
import
Conv2D
,
Add
,
ZeroPadding2D
,
UpSampling2D
,
Concatenate
,
MaxPooling2D
from
keras.layers
import
(
Add
,
Concatenate
,
Conv2D
,
MaxPooling2D
,
UpSampling2D
,
ZeroPadding2D
)
from
keras.layers.advanced_activations
import
LeakyReLU
from
keras.layers.normalization
import
BatchNormalization
from
keras.models
import
Model
from
keras.regularizers
import
l2
from
nets.CSPdarknet53
import
darknet_body
from
utils.utils
import
compose
from
nets.CSPdarknet53
import
darknet_body
#--------------------------------------------------#
# 单次卷积
# 单次卷积DarknetConv2D
# 正则化系数为5e-4
# 如果步长为2则自己设定padding方式。
#--------------------------------------------------#
@
wraps
(
Conv2D
)
def
DarknetConv2D
(
*
args
,
**
kwargs
):
...
...
@@ -23,7 +27,7 @@ def DarknetConv2D(*args, **kwargs):
return
Conv2D
(
*
args
,
**
darknet_conv_kwargs
)
#---------------------------------------------------#
# 卷积块
# 卷积块
-> 卷积 + 标准化 + 激活函数
# DarknetConv2D + BatchNormalization + LeakyReLU
#---------------------------------------------------#
def
DarknetConv2D_BN_Leaky
(
*
args
,
**
kwargs
):
...
...
@@ -35,7 +39,7 @@ def DarknetConv2D_BN_Leaky(*args, **kwargs):
LeakyReLU
(
alpha
=
0.1
))
#---------------------------------------------------#
#
特征层->最后的输出
#
进行五次卷积
#---------------------------------------------------#
def
make_five_convs
(
x
,
num_filters
):
# 五次卷积
...
...
@@ -47,14 +51,19 @@ def make_five_convs(x, num_filters):
return
x
#---------------------------------------------------#
#
特征层->最后的输出
#
Panet网络的构建,并且获得预测结果
#---------------------------------------------------#
def
yolo_body
(
inputs
,
num_anchors
,
num_classes
):
# 生成darknet53的主干模型
#---------------------------------------------------#
# 生成CSPdarknet53的主干模型
# 获得三个有效特征层,他们的shape分别是:
# 52,52,256
# 26,26,512
# 13,13,1024
#---------------------------------------------------#
feat1
,
feat2
,
feat3
=
darknet_body
(
inputs
)
# 第一个特征层
# y1=(batch_size,13,13,3,85)
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
P5
=
DarknetConv2D_BN_Leaky
(
512
,
(
1
,
1
))(
feat3
)
P5
=
DarknetConv2D_BN_Leaky
(
1024
,
(
3
,
3
))(
P5
)
P5
=
DarknetConv2D_BN_Leaky
(
512
,
(
1
,
1
))(
P5
)
...
...
@@ -67,38 +76,60 @@ def yolo_body(inputs, num_anchors, num_classes):
P5
=
DarknetConv2D_BN_Leaky
(
1024
,
(
3
,
3
))(
P5
)
P5
=
DarknetConv2D_BN_Leaky
(
512
,
(
1
,
1
))(
P5
)
# 13,13,512 -> 13,13,256 -> 26,26,256
P5_upsample
=
compose
(
DarknetConv2D_BN_Leaky
(
256
,
(
1
,
1
)),
UpSampling2D
(
2
))(
P5
)
# 26,26,512 -> 26,26,256
P4
=
DarknetConv2D_BN_Leaky
(
256
,
(
1
,
1
))(
feat2
)
# 26,26,256 + 26,26,256 -> 26,26,512
P4
=
Concatenate
()([
P4
,
P5_upsample
])
# 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
P4
=
make_five_convs
(
P4
,
256
)
# 26,26,256 -> 26,26,128 -> 52,52,128
P4_upsample
=
compose
(
DarknetConv2D_BN_Leaky
(
128
,
(
1
,
1
)),
UpSampling2D
(
2
))(
P4
)
# 52,52,256 -> 52,52,128
P3
=
DarknetConv2D_BN_Leaky
(
128
,
(
1
,
1
))(
feat1
)
# 52,52,128 + 52,52,128 -> 52,52,256
P3
=
Concatenate
()([
P3
,
P4_upsample
])
P3
=
make_five_convs
(
P3
,
128
)
# 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
P3
=
make_five_convs
(
P3
,
128
)
#---------------------------------------------------#
# 第三个特征层
# y3=(batch_size,52,52,3,85)
#---------------------------------------------------#
P3_output
=
DarknetConv2D_BN_Leaky
(
256
,
(
3
,
3
))(
P3
)
P3_output
=
DarknetConv2D
(
num_anchors
*
(
num_classes
+
5
),
(
1
,
1
))(
P3_output
)
#
26,26 output
#
52,52,128 -> 26,26,256
P3_downsample
=
ZeroPadding2D
(((
1
,
0
),(
1
,
0
)))(
P3
)
P3_downsample
=
DarknetConv2D_BN_Leaky
(
256
,
(
3
,
3
),
strides
=
(
2
,
2
))(
P3_downsample
)
# 26,26,256 + 26,26,256 -> 26,26,512
P4
=
Concatenate
()([
P3_downsample
,
P4
])
# 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
P4
=
make_five_convs
(
P4
,
256
)
#---------------------------------------------------#
# 第二个特征层
# y2=(batch_size,26,26,3,85)
#---------------------------------------------------#
P4_output
=
DarknetConv2D_BN_Leaky
(
512
,
(
3
,
3
))(
P4
)
P4_output
=
DarknetConv2D
(
num_anchors
*
(
num_classes
+
5
),
(
1
,
1
))(
P4_output
)
#13,13 output
# 26,26,256 -> 13,13,512
P4_downsample
=
ZeroPadding2D
(((
1
,
0
),(
1
,
0
)))(
P4
)
P4_downsample
=
DarknetConv2D_BN_Leaky
(
512
,
(
3
,
3
),
strides
=
(
2
,
2
))(
P4_downsample
)
# 13,13,512 + 13,13,512 -> 13,13,1024
P5
=
Concatenate
()([
P4_downsample
,
P5
])
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
P5
=
make_five_convs
(
P5
,
512
)
#---------------------------------------------------#
# 第一个特征层
# y1=(batch_size,13,13,3,85)
#---------------------------------------------------#
P5_output
=
DarknetConv2D_BN_Leaky
(
1024
,
(
3
,
3
))(
P5
)
P5_output
=
DarknetConv2D
(
num_anchors
*
(
num_classes
+
5
),
(
1
,
1
))(
P5_output
)
...
...
@@ -109,12 +140,16 @@ def yolo_body(inputs, num_anchors, num_classes):
#---------------------------------------------------#
def
yolo_head
(
feats
,
anchors
,
num_classes
,
input_shape
,
calc_loss
=
False
):
num_anchors
=
len
(
anchors
)
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
anchors_tensor
=
K
.
reshape
(
K
.
constant
(
anchors
),
[
1
,
1
,
1
,
num_anchors
,
2
])
# 获得x,y的网格
# (13,13, 1, 2)
grid_shape
=
K
.
shape
(
feats
)[
1
:
3
]
# height, width
#---------------------------------------------------#
# 获得x,y的网格
# (13, 13, 1, 2)
#---------------------------------------------------#
grid_shape
=
K
.
shape
(
feats
)[
1
:
3
]
grid_y
=
K
.
tile
(
K
.
reshape
(
K
.
arange
(
0
,
stop
=
grid_shape
[
0
]),
[
-
1
,
1
,
1
,
1
]),
[
1
,
grid_shape
[
1
],
1
,
1
])
grid_x
=
K
.
tile
(
K
.
reshape
(
K
.
arange
(
0
,
stop
=
grid_shape
[
1
]),
[
1
,
-
1
,
1
,
1
]),
...
...
@@ -122,18 +157,29 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
grid
=
K
.
concatenate
([
grid_x
,
grid_y
])
grid
=
K
.
cast
(
grid
,
K
.
dtype
(
feats
))
# (batch_size,13,13,3,85)
#---------------------------------------------------#
# 将预测结果调整成(batch_size,13,13,3,85)
# 85可拆分成4 + 1 + 80
# 4代表的是中心宽高的调整参数
# 1代表的是框的置信度
# 80代表的是种类的置信度
#---------------------------------------------------#
feats
=
K
.
reshape
(
feats
,
[
-
1
,
grid_shape
[
0
],
grid_shape
[
1
],
num_anchors
,
num_classes
+
5
])
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
box_xy
=
(
K
.
sigmoid
(
feats
[...,
:
2
])
+
grid
)
/
K
.
cast
(
grid_shape
[::
-
1
],
K
.
dtype
(
feats
))
box_wh
=
K
.
exp
(
feats
[...,
2
:
4
])
*
anchors_tensor
/
K
.
cast
(
input_shape
[::
-
1
],
K
.
dtype
(
feats
))
box_confidence
=
K
.
sigmoid
(
feats
[...,
4
:
5
])
box_class_probs
=
K
.
sigmoid
(
feats
[...,
5
:])
# 在计算loss的时候返回如下参数
#---------------------------------------------------------------------#
# 在计算loss的时候返回grid, feats, box_xy, box_wh
# 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
#---------------------------------------------------------------------#
if
calc_loss
==
True
:
return
grid
,
feats
,
box_xy
,
box_wh
return
box_xy
,
box_wh
,
box_confidence
,
box_class_probs
...
...
@@ -142,6 +188,9 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
# 对box进行调整,使其符合真实图片的样子
#---------------------------------------------------#
def
yolo_correct_boxes
(
box_xy
,
box_wh
,
input_shape
,
image_shape
):
#-----------------------------------------------------------------#
# 把y轴放前面是因为方便预测框和图像的宽高进行相乘
#-----------------------------------------------------------------#
box_yx
=
box_xy
[...,
::
-
1
]
box_hw
=
box_wh
[...,
::
-
1
]
...
...
@@ -149,6 +198,10 @@ def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
image_shape
=
K
.
cast
(
image_shape
,
K
.
dtype
(
box_yx
))
new_shape
=
K
.
round
(
image_shape
*
K
.
min
(
input_shape
/
image_shape
))
#-----------------------------------------------------------------#
# 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
# new_shape指的是宽高缩放情况
#-----------------------------------------------------------------#
offset
=
(
input_shape
-
new_shape
)
/
2.
/
input_shape
scale
=
input_shape
/
new_shape
...
...
@@ -171,14 +224,24 @@ def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
# 获取每个box和它的得分
#---------------------------------------------------#
def
yolo_boxes_and_scores
(
feats
,
anchors
,
num_classes
,
input_shape
,
image_shape
):
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
# -1,13,13,3,2; -1,13,13,3,2; -1,13,13,3,1; -1,13,13,3,80
#-----------------------------------------------------------------#
# 将预测值调成真实值
# box_xy : -1,13,13,3,2;
# box_wh : -1,13,13,3,2;
# box_confidence : -1,13,13,3,1;
# box_class_probs : -1,13,13,3,80;
#-----------------------------------------------------------------#
box_xy
,
box_wh
,
box_confidence
,
box_class_probs
=
yolo_head
(
feats
,
anchors
,
num_classes
,
input_shape
)
# 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的box_xy, box_wh是相对于有灰条的图像的
# 我们需要对齐进行修改,去除灰条的部分。
# 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
#-----------------------------------------------------------------#
boxes
=
yolo_correct_boxes
(
box_xy
,
box_wh
,
input_shape
,
image_shape
)
# 获得得分和box
#-----------------------------------------------------------------#
# 获得最终得分和框的位置
#-----------------------------------------------------------------#
boxes
=
K
.
reshape
(
boxes
,
[
-
1
,
4
])
box_scores
=
box_confidence
*
box_class_probs
box_scores
=
K
.
reshape
(
box_scores
,
[
-
1
,
num_classes
])
...
...
@@ -194,42 +257,63 @@ def yolo_eval(yolo_outputs,
max_boxes
=
20
,
score_threshold
=
.
6
,
iou_threshold
=
.
5
):
# 获得特征层的数量
#---------------------------------------------------#
# 获得特征层的数量,有效特征层的数量为3
#---------------------------------------------------#
num_layers
=
len
(
yolo_outputs
)
# 特征层1对应的anchor是678
# 特征层2对应的anchor是345
# 特征层3对应的anchor是012
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
#-----------------------------------------------------------#
anchor_mask
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]]
#-----------------------------------------------------------#
# 这里获得的是输入图片的大小,一般是416x416
#-----------------------------------------------------------#
input_shape
=
K
.
shape
(
yolo_outputs
[
0
])[
1
:
3
]
*
32
boxes
=
[]
box_scores
=
[]
# 对每个特征层进行处理
#-----------------------------------------------------------#
# 对每个特征层进行处理
#-----------------------------------------------------------#
for
l
in
range
(
num_layers
):
_boxes
,
_box_scores
=
yolo_boxes_and_scores
(
yolo_outputs
[
l
],
anchors
[
anchor_mask
[
l
]],
num_classes
,
input_shape
,
image_shape
)
boxes
.
append
(
_boxes
)
box_scores
.
append
(
_box_scores
)
# 将每个特征层的结果进行堆叠
#-----------------------------------------------------------#
# 将每个特征层的结果进行堆叠
#-----------------------------------------------------------#
boxes
=
K
.
concatenate
(
boxes
,
axis
=
0
)
box_scores
=
K
.
concatenate
(
box_scores
,
axis
=
0
)
#-----------------------------------------------------------#
# 判断得分是否大于score_threshold
#-----------------------------------------------------------#
mask
=
box_scores
>=
score_threshold
max_boxes_tensor
=
K
.
constant
(
max_boxes
,
dtype
=
'int32'
)
boxes_
=
[]
scores_
=
[]
classes_
=
[]
for
c
in
range
(
num_classes
):
# 取出所有box_scores >= score_threshold的框,和成绩
#-----------------------------------------------------------#
# 取出所有box_scores >= score_threshold的框,和成绩
#-----------------------------------------------------------#
class_boxes
=
tf
.
boolean_mask
(
boxes
,
mask
[:,
c
])
class_box_scores
=
tf
.
boolean_mask
(
box_scores
[:,
c
],
mask
[:,
c
])
# 非极大抑制,去掉box重合程度高的那一些
#-----------------------------------------------------------#
# 非极大抑制
# 保留一定区域内得分最大的框
#-----------------------------------------------------------#
nms_index
=
tf
.
image
.
non_max_suppression
(
class_boxes
,
class_box_scores
,
max_boxes_tensor
,
iou_threshold
=
iou_threshold
)
# 获取非极大抑制后的结果
# 下列三个分别是
# 框的位置,得分与种类
#-----------------------------------------------------------#
# 获取非极大抑制后的结果
# 下列三个分别是
# 框的位置,得分与种类
#-----------------------------------------------------------#
class_boxes
=
K
.
gather
(
class_boxes
,
nms_index
)
class_box_scores
=
K
.
gather
(
class_box_scores
,
nms_index
)
classes
=
K
.
ones_like
(
class_box_scores
,
'int32'
)
*
c
...
...
predict.py
浏览文件 @
5b6f4c01
from
yolo
import
YOLO
'''
predict.py有几个注意点
1、无法进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
2、如果想要保存,利用r_image.save("img.jpg")即可保存。
3、如果想要获得框的坐标,可以进入detect_image函数,读取top,left,bottom,right这四个值。
4、如果想要截取下目标,可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
'''
from
keras.layers
import
Input
from
PIL
import
Image
from
nets.yolo4
import
yolo_body
from
yolo
import
YOLO
yolo
=
YOLO
()
while
True
:
...
...
test.py
浏览文件 @
5b6f4c01
...
...
@@ -3,11 +3,14 @@
# map测试请看get_dr_txt.py、get_gt_txt.py
# 和get_map.py
#--------------------------------------------#
from
nets.yolo4
import
yolo_body
from
keras.layers
import
Input
inputs
=
Input
([
416
,
416
,
3
])
model
=
yolo_body
(
inputs
,
3
,
80
)
model
.
summary
()
for
i
,
layer
in
enumerate
(
model
.
layers
):
print
(
i
,
layer
.
name
)
from
nets.yolo4
import
yolo_body
if
__name__
==
"__main__"
:
inputs
=
Input
([
416
,
416
,
3
])
model
=
yolo_body
(
inputs
,
3
,
80
)
model
.
summary
()
# for i,layer in enumerate(model.layers):
# print(i,layer.name)
train.py
浏览文件 @
5b6f4c01
import
keras.backend
as
K
import
numpy
as
np
import
tensorflow
as
tf
import
keras.backend
as
K
from
keras.backend.tensorflow_backend
import
set_session
from
keras.callbacks
import
(
EarlyStopping
,
ModelCheckpoint
,
ReduceLROnPlateau
,
TensorBoard
)
from
keras.layers
import
Input
,
Lambda
from
keras.models
import
Model
from
keras.optimizers
import
Adam
from
keras.callbacks
import
TensorBoard
,
ModelCheckpoint
,
ReduceLROnPlateau
,
EarlyStopping
from
nets.yolo4
import
yolo_body
from
nets.loss
import
yolo_loss
from
keras.backend.tensorflow_backend
import
set_session
from
utils.utils
import
get_random_data
,
get_random_data_with_Mosaic
,
rand
,
WarmUpCosineDecayScheduler
from
nets.yolo4
import
yolo_body
from
utils.utils
import
(
WarmUpCosineDecayScheduler
,
get_random_data
,
get_random_data_with_Mosaic
,
rand
)
#---------------------------------------------------#
...
...
@@ -31,8 +34,7 @@ def get_anchors(anchors_path):
#---------------------------------------------------#
# 训练数据生成器
#---------------------------------------------------#
def
data_generator
(
annotation_lines
,
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
False
):
'''data generator for fit_generator'''
def
data_generator
(
annotation_lines
,
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
False
,
random
=
True
):
n
=
len
(
annotation_lines
)
i
=
0
flag
=
True
...
...
@@ -47,11 +49,11 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class
image
,
box
=
get_random_data_with_Mosaic
(
annotation_lines
[
i
:
i
+
4
],
input_shape
)
i
=
(
i
+
1
)
%
n
else
:
image
,
box
=
get_random_data
(
annotation_lines
[
i
],
input_shape
)
image
,
box
=
get_random_data
(
annotation_lines
[
i
],
input_shape
,
random
=
random
)
i
=
(
i
+
1
)
%
n
flag
=
bool
(
1
-
flag
)
else
:
image
,
box
=
get_random_data
(
annotation_lines
[
i
],
input_shape
)
image
,
box
=
get_random_data
(
annotation_lines
[
i
],
input_shape
,
random
=
random
)
i
=
(
i
+
1
)
%
n
image_data
.
append
(
image
)
box_data
.
append
(
box
)
...
...
@@ -60,7 +62,6 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class
y_true
=
preprocess_true_boxes
(
box_data
,
input_shape
,
anchors
,
num_classes
)
yield
[
image_data
,
*
y_true
],
np
.
zeros
(
batch_size
)
#---------------------------------------------------#
# 读入xml文件,并输出y_true
#---------------------------------------------------#
...
...
@@ -68,80 +69,130 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
assert
(
true_boxes
[...,
4
]
<
num_classes
).
all
(),
'class id must be less than num_classes'
# 一共有三个特征层数
num_layers
=
len
(
anchors
)
//
3
# 先验框
# 678为 142,110, 192,243, 459,401
# 345为 36,75, 76,55, 72,146
# 012为 12,16, 19,36, 40,28
anchor_mask
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]]
if
num_layers
==
3
else
[[
3
,
4
,
5
],
[
1
,
2
,
3
]]
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
#-----------------------------------------------------------#
anchor_mask
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]]
#-----------------------------------------------------------#
# 获得框的坐标和图片的大小
#-----------------------------------------------------------#
true_boxes
=
np
.
array
(
true_boxes
,
dtype
=
'float32'
)
input_shape
=
np
.
array
(
input_shape
,
dtype
=
'int32'
)
# 416,416
# 读出xy轴,读出长宽
# 中心点(m,n,2)
input_shape
=
np
.
array
(
input_shape
,
dtype
=
'int32'
)
#-----------------------------------------------------------#
# 通过计算获得真实框的中心和宽高
# 中心点(m,n,2) 宽高(m,n,2)
#-----------------------------------------------------------#
boxes_xy
=
(
true_boxes
[...,
0
:
2
]
+
true_boxes
[...,
2
:
4
])
//
2
boxes_wh
=
true_boxes
[...,
2
:
4
]
-
true_boxes
[...,
0
:
2
]
# 计算比例
#-----------------------------------------------------------#
# 将真实框归一化到小数形式
#-----------------------------------------------------------#
true_boxes
[...,
0
:
2
]
=
boxes_xy
/
input_shape
[::
-
1
]
true_boxes
[...,
2
:
4
]
=
boxes_wh
/
input_shape
[::
-
1
]
# m
张图
# m
为图片数量,grid_shapes为网格的shape
m
=
true_boxes
.
shape
[
0
]
# 得到网格的shape为13,13;26,26;52,52
grid_shapes
=
[
input_shape
//
{
0
:
32
,
1
:
16
,
2
:
8
}[
l
]
for
l
in
range
(
num_layers
)]
# y_true的格式为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85)
#-----------------------------------------------------------#
# y_true的格式为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85)
#-----------------------------------------------------------#
y_true
=
[
np
.
zeros
((
m
,
grid_shapes
[
l
][
0
],
grid_shapes
[
l
][
1
],
len
(
anchor_mask
[
l
]),
5
+
num_classes
),
dtype
=
'float32'
)
for
l
in
range
(
num_layers
)]
# [1,9,2]
#-----------------------------------------------------------#
# [9,2] -> [1,9,2]
#-----------------------------------------------------------#
anchors
=
np
.
expand_dims
(
anchors
,
0
)
anchor_maxes
=
anchors
/
2.
anchor_mins
=
-
anchor_maxes
# 长宽要大于0才有效
#-----------------------------------------------------------#
# 长宽要大于0才有效
#-----------------------------------------------------------#
valid_mask
=
boxes_wh
[...,
0
]
>
0
for
b
in
range
(
m
):
# 对每一张图进行处理
wh
=
boxes_wh
[
b
,
valid_mask
[
b
]]
if
len
(
wh
)
==
0
:
continue
# [n,1,2]
#-----------------------------------------------------------#
# [n,2] -> [n,1,2]
#-----------------------------------------------------------#
wh
=
np
.
expand_dims
(
wh
,
-
2
)
box_maxes
=
wh
/
2.
box_mins
=
-
box_maxes
# 计算真实框和哪个先验框最契合
#-----------------------------------------------------------#
# 计算所有真实框和先验框的交并比
# intersect_area [n,9]
# box_area [n,1]
# anchor_area [1,9]
# iou [n,9]
#-----------------------------------------------------------#
intersect_mins
=
np
.
maximum
(
box_mins
,
anchor_mins
)
intersect_maxes
=
np
.
minimum
(
box_maxes
,
anchor_maxes
)
intersect_wh
=
np
.
maximum
(
intersect_maxes
-
intersect_mins
,
0.
)
intersect_area
=
intersect_wh
[...,
0
]
*
intersect_wh
[...,
1
]
box_area
=
wh
[...,
0
]
*
wh
[...,
1
]
anchor_area
=
anchors
[...,
0
]
*
anchors
[...,
1
]
iou
=
intersect_area
/
(
box_area
+
anchor_area
-
intersect_area
)
# 维度是(n) 感谢 消尽不死鸟 的提醒
#-----------------------------------------------------------#
# 维度是[n,] 感谢 消尽不死鸟 的提醒
#-----------------------------------------------------------#
best_anchor
=
np
.
argmax
(
iou
,
axis
=-
1
)
for
t
,
n
in
enumerate
(
best_anchor
):
#-----------------------------------------------------------#
# 找到每个真实框所属的特征层
#-----------------------------------------------------------#
for
l
in
range
(
num_layers
):
if
n
in
anchor_mask
[
l
]:
# floor用于向下取整
i
=
np
.
floor
(
true_boxes
[
b
,
t
,
0
]
*
grid_shapes
[
l
][
1
]).
astype
(
'int32'
)
j
=
np
.
floor
(
true_boxes
[
b
,
t
,
1
]
*
grid_shapes
[
l
][
0
]).
astype
(
'int32'
)
# 找到真实框在特征层l中第b副图像对应的位置
#-----------------------------------------------------------#
# floor用于向下取整,找到真实框所属的特征层对应的x、y轴坐标
#-----------------------------------------------------------#
i
=
np
.
floor
(
true_boxes
[
b
,
t
,
0
]
*
grid_shapes
[
l
][
1
]).
astype
(
'int32'
)
j
=
np
.
floor
(
true_boxes
[
b
,
t
,
1
]
*
grid_shapes
[
l
][
0
]).
astype
(
'int32'
)
#-----------------------------------------------------------#
# k指的的当前这个特征点的第k个先验框
#-----------------------------------------------------------#
k
=
anchor_mask
[
l
].
index
(
n
)
c
=
true_boxes
[
b
,
t
,
4
].
astype
(
'int32'
)
y_true
[
l
][
b
,
j
,
i
,
k
,
0
:
4
]
=
true_boxes
[
b
,
t
,
0
:
4
]
#-----------------------------------------------------------#
# c指的是当前这个真实框的种类
#-----------------------------------------------------------#
c
=
true_boxes
[
b
,
t
,
4
].
astype
(
'int32'
)
#-----------------------------------------------------------#
# y_true的shape为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85)
# 最后的85可以拆分成4+1+80,4代表的是框的中心与宽高、
# 1代表的是置信度、80代表的是种类
#-----------------------------------------------------------#
y_true
[
l
][
b
,
j
,
i
,
k
,
0
:
4
]
=
true_boxes
[
b
,
t
,
0
:
4
]
y_true
[
l
][
b
,
j
,
i
,
k
,
4
]
=
1
y_true
[
l
][
b
,
j
,
i
,
k
,
5
+
c
]
=
1
return
y_true
#----------------------------------------------------#
# 检测精度mAP和pr曲线计算参考视频
# https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------#
if
__name__
==
"__main__"
:
# 标签的位置
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
annotation_path
=
'2007_train.txt'
# 获取classes和anchor的位置
#------------------------------------------------------#
# 训练后的模型保存的位置,保存在logs文件夹里面
#------------------------------------------------------#
log_dir
=
'logs/'
#----------------------------------------------------#
# classes和anchor的路径,非常重要
# 训练前一定要修改classes_path,使其对应自己的数据集
#----------------------------------------------------#
classes_path
=
'model_data/voc_classes.txt'
anchors_path
=
'model_data/yolo_anchors.txt'
#------------------------------------------------------#
...
...
@@ -150,58 +201,81 @@ if __name__ == "__main__":
# 预测的东西都不一样了自然维度不匹配
#------------------------------------------------------#
weights_path
=
'model_data/yolo4_weight.h5'
# 获得classes和anchor
#------------------------------------------------------#
# 训练用图片大小
# 一般在416x416和608x608选择
#------------------------------------------------------#
input_shape
=
(
416
,
416
)
#------------------------------------------------------#
# 是否对损失进行归一化
#------------------------------------------------------#
normalize
=
True
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names
=
get_classes
(
classes_path
)
anchors
=
get_anchors
(
anchors_path
)
# 一共有多少类
#------------------------------------------------------#
# 一共有多少类和多少先验框
#------------------------------------------------------#
num_classes
=
len
(
class_names
)
num_anchors
=
len
(
anchors
)
#
训练后的模型保存的位置
log_dir
=
'logs/'
#
输入的shape大小
#
显存比较小可以使用416x416
#
现存比较大可以使用608x608
input_shape
=
(
416
,
416
)
#
------------------------------------------------------#
# Yolov4的tricks应用
#
mosaic 马赛克数据增强 True or False
#
Cosine_scheduler 余弦退火学习率 True or False
#
label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
#------------------------------------------------------#
mosaic
=
True
Cosine_scheduler
=
False
label_smoothing
=
0
# 清除session
K
.
clear_session
()
# 输入的图像为
#------------------------------------------------------#
# 创建yolo模型
#------------------------------------------------------#
image_input
=
Input
(
shape
=
(
None
,
None
,
3
))
h
,
w
=
input_shape
# 创建yolo模型
print
(
'Create YOLOv4 model with {} anchors and {} classes.'
.
format
(
num_anchors
,
num_classes
))
model_body
=
yolo_body
(
image_input
,
num_anchors
//
3
,
num_classes
)
# 载入预训练权重
#------------------------------------------------------#
# 载入预训练权重
#------------------------------------------------------#
print
(
'Load weights {}.'
.
format
(
weights_path
))
model_body
.
load_weights
(
weights_path
,
by_name
=
True
,
skip_mismatch
=
True
)
# y_true为13,13,3,85
# 26,26,3,85
# 52,52,3,85
#------------------------------------------------------#
# 在这个地方设置损失,将网络的输出结果传入loss函数
# 把整个模型的输出作为loss
#------------------------------------------------------#
y_true
=
[
Input
(
shape
=
(
h
//
{
0
:
32
,
1
:
16
,
2
:
8
}[
l
],
w
//
{
0
:
32
,
1
:
16
,
2
:
8
}[
l
],
\
num_anchors
//
3
,
num_classes
+
5
))
for
l
in
range
(
3
)]
# 输入为*model_body.input, *y_true
# 输出为model_loss
loss_input
=
[
*
model_body
.
output
,
*
y_true
]
model_loss
=
Lambda
(
yolo_loss
,
output_shape
=
(
1
,),
name
=
'yolo_loss'
,
arguments
=
{
'anchors'
:
anchors
,
'num_classes'
:
num_classes
,
'ignore_thresh'
:
0.5
,
'label_smoothing'
:
label_smoothing
})(
loss_input
)
arguments
=
{
'anchors'
:
anchors
,
'num_classes'
:
num_classes
,
'ignore_thresh'
:
0.5
,
'label_smoothing'
:
label_smoothing
,
'normalize'
:
normalize
})(
loss_input
)
model
=
Model
([
model_body
.
input
,
*
y_true
],
model_loss
)
# 训练参数设置
#-------------------------------------------------------------------------------#
# 训练参数的设置
# logging表示tensorboard的保存地址
# checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次
# reduce_lr用于设置学习率下降的方式
# early_stopping用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛
#-------------------------------------------------------------------------------#
logging
=
TensorBoard
(
log_dir
=
log_dir
)
checkpoint
=
ModelCheckpoint
(
log_dir
+
'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
,
monitor
=
'val_loss'
,
save_weights_only
=
True
,
save_best_only
=
False
,
period
=
1
)
early_stopping
=
EarlyStopping
(
monitor
=
'val_loss'
,
min_delta
=
0
,
patience
=
10
,
verbose
=
1
)
# 0.1用于验证,0.9用于训练
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
#----------------------------------------------------------------------#
val_split
=
0.1
with
open
(
annotation_path
)
as
f
:
lines
=
f
.
readlines
()
...
...
@@ -211,6 +285,10 @@ if __name__ == "__main__":
num_val
=
int
(
len
(
lines
)
*
val_split
)
num_train
=
len
(
lines
)
-
num_val
freeze_layers
=
249
for
i
in
range
(
freeze_layers
):
model_body
.
layers
[
i
].
trainable
=
False
print
(
'Freeze the first {} layers of total {} layers.'
.
format
(
freeze_layers
,
len
(
model_body
.
layers
)))
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
...
...
@@ -219,18 +297,12 @@ if __name__ == "__main__":
# Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
freeze_layers
=
249
for
i
in
range
(
freeze_layers
):
model_body
.
layers
[
i
].
trainable
=
False
print
(
'Freeze the first {} layers of total {} layers.'
.
format
(
freeze_layers
,
len
(
model_body
.
layers
)))
# 调整非主干模型first
if
True
:
Init_epoch
=
0
Freeze_epoch
=
50
# batch_size大小,每次喂入多少数据
batch_size
=
8
# 最大学习率
learning_rate_base
=
1e-3
if
Cosine_scheduler
:
# 预热期
warmup_epoch
=
int
((
Freeze_epoch
-
Init_epoch
)
*
0.2
)
...
...
@@ -252,9 +324,9 @@ if __name__ == "__main__":
model
.
compile
(
optimizer
=
Adam
(
learning_rate_base
),
loss
=
{
'yolo_loss'
:
lambda
y_true
,
y_pred
:
y_pred
})
print
(
'Train on {} samples, val on {} samples, with batch size {}.'
.
format
(
num_train
,
num_val
,
batch_size
))
model
.
fit_generator
(
data_generator
(
lines
[:
num_train
],
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
mosaic
),
model
.
fit_generator
(
data_generator
(
lines
[:
num_train
],
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
mosaic
,
random
=
True
),
steps_per_epoch
=
max
(
1
,
num_train
//
batch_size
),
validation_data
=
data_generator
(
lines
[
num_train
:],
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
False
),
validation_data
=
data_generator
(
lines
[
num_train
:],
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
False
,
random
=
False
),
validation_steps
=
max
(
1
,
num_val
//
batch_size
),
epochs
=
Freeze_epoch
,
initial_epoch
=
Init_epoch
,
...
...
@@ -263,15 +335,12 @@ if __name__ == "__main__":
for
i
in
range
(
freeze_layers
):
model_body
.
layers
[
i
].
trainable
=
True
# 解冻后训练
if
True
:
Freeze_epoch
=
50
Epoch
=
100
# batch_size大小,每次喂入多少数据
batch_size
=
2
# 最大学习率
learning_rate_base
=
1e-4
if
Cosine_scheduler
:
# 预热期
warmup_epoch
=
int
((
Epoch
-
Freeze_epoch
)
*
0.2
)
...
...
@@ -293,9 +362,9 @@ if __name__ == "__main__":
model
.
compile
(
optimizer
=
Adam
(
learning_rate_base
),
loss
=
{
'yolo_loss'
:
lambda
y_true
,
y_pred
:
y_pred
})
print
(
'Train on {} samples, val on {} samples, with batch size {}.'
.
format
(
num_train
,
num_val
,
batch_size
))
model
.
fit_generator
(
data_generator
(
lines
[:
num_train
],
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
mosaic
),
model
.
fit_generator
(
data_generator
(
lines
[:
num_train
],
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
mosaic
,
random
=
True
),
steps_per_epoch
=
max
(
1
,
num_train
//
batch_size
),
validation_data
=
data_generator
(
lines
[
num_train
:],
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
False
),
validation_data
=
data_generator
(
lines
[
num_train
:],
batch_size
,
input_shape
,
anchors
,
num_classes
,
mosaic
=
False
,
random
=
False
),
validation_steps
=
max
(
1
,
num_val
//
batch_size
),
epochs
=
Epoch
,
initial_epoch
=
Freeze_epoch
,
...
...
utils/utils.py
浏览文件 @
5b6f4c01
"""Miscellaneous utility functions."""
from
functools
import
reduce
import
numpy
as
np
import
cv2
import
keras
import
keras.backend
as
K
from
functools
import
reduce
import
numpy
as
np
from
matplotlib.colors
import
hsv_to_rgb
,
rgb_to_hsv
from
PIL
import
Image
from
matplotlib.colors
import
rgb_to_hsv
,
hsv_to_rgb
import
cv2
def
compose
(
*
funcs
):
if
funcs
:
...
...
@@ -101,8 +101,8 @@ def merge_bboxes(bboxes, cutx, cuty):
def
get_random_data_with_Mosaic
(
annotation_line
,
input_shape
,
max_boxes
=
100
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
):
'''random preprocessing for real-time data augmentation'''
h
,
w
=
input_shape
min_offset_x
=
0.
4
min_offset_y
=
0.
4
min_offset_x
=
0.
3
min_offset_y
=
0.
3
scale_low
=
1
-
min
(
min_offset_x
,
min_offset_y
)
scale_high
=
scale_low
+
0.2
...
...
@@ -112,6 +112,7 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue
place_x
=
[
0
,
0
,
int
(
w
*
min_offset_x
),
int
(
w
*
min_offset_x
)]
place_y
=
[
0
,
int
(
h
*
min_offset_y
),
int
(
h
*
min_offset_y
),
0
]
for
line
in
annotation_line
:
# 每一行进行分割
line_content
=
line
.
split
()
...
...
@@ -163,7 +164,6 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue
new_image
.
paste
(
image
,
(
dx
,
dy
))
image_data
=
np
.
array
(
new_image
)
/
255
index
=
index
+
1
box_data
=
[]
# 对box进行重新处理
...
...
@@ -183,8 +183,6 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue
image_datas
.
append
(
image_data
)
box_datas
.
append
(
box_data
)
# 将图片分割,放在一起
cutx
=
np
.
random
.
randint
(
int
(
w
*
min_offset_x
),
int
(
w
*
(
1
-
min_offset_x
)))
cuty
=
np
.
random
.
randint
(
int
(
h
*
min_offset_y
),
int
(
h
*
(
1
-
min_offset_y
)))
...
...
@@ -206,7 +204,7 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue
return
new_image
,
box_data
def
get_random_data
(
annotation_line
,
input_shape
,
max_boxes
=
100
,
jitter
=
.
3
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
):
def
get_random_data
(
annotation_line
,
input_shape
,
max_boxes
=
100
,
jitter
=
.
3
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
,
random
=
True
):
'''random preprocessing for real-time data augmentation'''
line
=
annotation_line
.
split
()
image
=
Image
.
open
(
line
[
0
])
...
...
@@ -214,6 +212,36 @@ def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue=
h
,
w
=
input_shape
box
=
np
.
array
([
np
.
array
(
list
(
map
(
int
,
box
.
split
(
','
))))
for
box
in
line
[
1
:]])
if
not
random
:
# resize image
scale
=
min
(
w
/
iw
,
h
/
ih
)
nw
=
int
(
iw
*
scale
)
nh
=
int
(
ih
*
scale
)
dx
=
(
w
-
nw
)
//
2
dy
=
(
h
-
nh
)
//
2
image
=
image
.
resize
((
nw
,
nh
),
Image
.
BICUBIC
)
new_image
=
Image
.
new
(
'RGB'
,
(
w
,
h
),
(
128
,
128
,
128
))
new_image
.
paste
(
image
,
(
dx
,
dy
))
image_data
=
np
.
array
(
new_image
,
np
.
float32
)
/
255
# correct boxes
box_data
=
np
.
zeros
((
max_boxes
,
5
))
if
len
(
box
)
>
0
:
np
.
random
.
shuffle
(
box
)
box
[:,
[
0
,
2
]]
=
box
[:,
[
0
,
2
]]
*
nw
/
iw
+
dx
box
[:,
[
1
,
3
]]
=
box
[:,
[
1
,
3
]]
*
nh
/
ih
+
dy
box
[:,
0
:
2
][
box
[:,
0
:
2
]
<
0
]
=
0
box
[:,
2
][
box
[:,
2
]
>
w
]
=
w
box
[:,
3
][
box
[:,
3
]
>
h
]
=
h
box_w
=
box
[:,
2
]
-
box
[:,
0
]
box_h
=
box
[:,
3
]
-
box
[:,
1
]
box
=
box
[
np
.
logical_and
(
box_w
>
1
,
box_h
>
1
)]
# discard invalid box
if
len
(
box
)
>
max_boxes
:
box
=
box
[:
max_boxes
]
box_data
[:
len
(
box
)]
=
box
return
image_data
,
box_data
# 对图像进行缩放并且进行长和宽的扭曲
new_ar
=
w
/
h
*
rand
(
1
-
jitter
,
1
+
jitter
)
/
rand
(
1
-
jitter
,
1
+
jitter
)
scale
=
rand
(.
25
,
2
)
...
...
video.py
浏览文件 @
5b6f4c01
#-------------------------------------#
# 调用摄像头检测
# 调用摄像头或者视频进行检测
# 调用摄像头直接运行即可
# 调用视频可以将cv2.VideoCapture()指定路径
# 视频的保存并不难,可以百度一下看看
#-------------------------------------#
import
time
import
cv2
import
numpy
as
np
from
keras.layers
import
Input
from
yolo
import
YOLO
from
PIL
import
Image
import
numpy
as
np
import
cv2
import
time
from
yolo
import
YOLO
yolo
=
YOLO
()
# 调用摄像头
capture
=
cv2
.
VideoCapture
(
0
)
# capture=cv2.VideoCapture("1.mp4")
#-------------------------------------#
# 调用摄像头
# capture=cv2.VideoCapture("1.mp4")
#-------------------------------------#
capture
=
cv2
.
VideoCapture
(
0
)
fps
=
0.0
while
(
True
):
...
...
@@ -20,10 +29,8 @@ while(True):
frame
=
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_BGR2RGB
)
# 转变成Image
frame
=
Image
.
fromarray
(
np
.
uint8
(
frame
))
# 进行检测
frame
=
np
.
array
(
yolo
.
detect_image
(
frame
))
# RGBtoBGR满足opencv显示格式
frame
=
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_RGB2BGR
)
...
...
@@ -37,4 +44,5 @@ while(True):
capture
.
release
()
break
yolo
.
close_session
()
yolo
.
close_session
()
vision_for_anchors.py
浏览文件 @
5b6f4c01
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
numpy
as
np
def
sigmoid
(
x
):
s
=
1
/
(
1
+
np
.
exp
(
-
x
))
return
s
...
...
@@ -78,4 +80,4 @@ def yolo_head(feats, anchors, num_classes):
#
feat
=
np
.
random
.
normal
(
0
,
0.5
,[
4
,
13
,
13
,
75
])
anchors
=
[[
142
,
110
],[
192
,
243
],[
459
,
401
]]
yolo_head
(
feat
,
anchors
,
20
)
\ No newline at end of file
yolo_head
(
feat
,
anchors
,
20
)
voc_annotation.py
浏览文件 @
5b6f4c01
#---------------------------------------------#
# 运行前一定要修改classes
# 如果生成的2007_train.txt里面没有目标信息
# 那么就是因为classes没有设定正确
#---------------------------------------------#
import
xml.etree.ElementTree
as
ET
from
os
import
getcwd
...
...
yolo.py
浏览文件 @
5b6f4c01
import
os
import
numpy
as
np
import
copy
import
colorsys
import
copy
import
os
from
timeit
import
default_timer
as
timer
import
numpy
as
np
from
keras
import
backend
as
K
from
keras.models
import
load_model
from
keras.layers
import
Input
from
PIL
import
Image
,
ImageFont
,
ImageDraw
from
nets.yolo4
import
yolo_body
,
yolo_eval
from
keras.models
import
load_model
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
nets.yolo4
import
yolo_body
,
yolo_eval
from
utils.utils
import
letterbox_image
#--------------------------------------------#
# 使用自己训练好的模型预测需要修改2个参数
# model_path和classes_path都需要修改!
# 如果出现shape不匹配,一定要注意
# 训练时的model_path和classes_path参数的修改
#--------------------------------------------#
class
YOLO
(
object
):
_defaults
=
{
...
...
@@ -64,18 +70,22 @@ class YOLO(object):
return
np
.
array
(
anchors
).
reshape
(
-
1
,
2
)
#---------------------------------------------------#
#
获得所有的分类
#
载入模型
#---------------------------------------------------#
def
generate
(
self
):
model_path
=
os
.
path
.
expanduser
(
self
.
model_path
)
assert
model_path
.
endswith
(
'.h5'
),
'Keras model or weights must be a .h5 file.'
# 计算anchor数量
#---------------------------------------------------#
# 计算先验框的数量和种类的数量
#---------------------------------------------------#
num_anchors
=
len
(
self
.
anchors
)
num_classes
=
len
(
self
.
class_names
)
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
#---------------------------------------------------------#
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
#---------------------------------------------------------#
try
:
self
.
yolo_model
=
load_model
(
model_path
,
compile
=
False
)
except
:
...
...
@@ -103,6 +113,10 @@ class YOLO(object):
self
.
input_image_shape
=
K
.
placeholder
(
shape
=
(
2
,
))
#---------------------------------------------------------#
# 在yolo_eval函数中,我们会对预测结果进行后处理
# 后处理的内容包括,解码、非极大抑制、门限筛选等
#---------------------------------------------------------#
boxes
,
scores
,
classes
=
yolo_eval
(
self
.
yolo_model
.
output
,
self
.
anchors
,
num_classes
,
self
.
input_image_shape
,
max_boxes
=
self
.
max_boxes
,
score_threshold
=
self
.
score
,
iou_threshold
=
self
.
iou
)
...
...
@@ -113,30 +127,37 @@ class YOLO(object):
#---------------------------------------------------#
def
detect_image
(
self
,
image
):
start
=
timer
()
# 调整图片使其符合输入要求
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
new_image_size
=
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])
boxed_image
=
letterbox_image
(
image
,
new_image_size
)
image_data
=
np
.
array
(
boxed_image
,
dtype
=
'float32'
)
image_data
/=
255.
image_data
=
np
.
expand_dims
(
image_data
,
0
)
# Add batch dimension.
# 预测结果
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data
=
np
.
expand_dims
(
image_data
,
0
)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
out_boxes
,
out_scores
,
out_classes
=
self
.
sess
.
run
(
[
self
.
boxes
,
self
.
scores
,
self
.
classes
],
feed_dict
=
{
self
.
yolo_model
.
input
:
image_data
,
self
.
input_image_shape
:
[
image
.
size
[
1
],
image
.
size
[
0
]],
K
.
learning_phase
():
0
})
K
.
learning_phase
():
0
})
print
(
'Found {} boxes for {}'
.
format
(
len
(
out_boxes
),
'img'
))
# 设置字体
#---------------------------------------------------------#
# 设置字体
#---------------------------------------------------------#
font
=
ImageFont
.
truetype
(
font
=
'font/simhei.ttf'
,
size
=
np
.
floor
(
3e-2
*
image
.
size
[
1
]
+
0.5
).
astype
(
'int32'
))
thickness
=
(
image
.
size
[
0
]
+
image
.
size
[
1
])
//
300
small_pic
=
[]
thickness
=
max
((
image
.
size
[
0
]
+
image
.
size
[
1
])
//
300
,
1
)
for
i
,
c
in
list
(
enumerate
(
out_classes
)):
predicted_class
=
self
.
class_names
[
c
]
...
...
@@ -159,7 +180,7 @@ class YOLO(object):
draw
=
ImageDraw
.
Draw
(
image
)
label_size
=
draw
.
textsize
(
label
,
font
)
label
=
label
.
encode
(
'utf-8'
)
print
(
label
)
print
(
label
,
top
,
left
,
bottom
,
right
)
if
top
-
label_size
[
1
]
>=
0
:
text_origin
=
np
.
array
([
left
,
top
-
label_size
[
1
]])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录