Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
谢爱卿
yolov4-pytorch
提交
a60e3df5
Y
yolov4-pytorch
项目概览
谢爱卿
/
yolov4-pytorch
与 Fork 源项目一致
从无法访问的项目Fork
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Y
yolov4-pytorch
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
a60e3df5
编写于
1月 14, 2021
作者:
B
Bubbliiiing
提交者:
GitHub
1月 14, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add files via upload
上级
49081f53
变更
19
展开全部
隐藏空白更改
内联
并排
Showing
19 changed file
with
953 addition
and
344 deletion
+953
-344
FPS_test.py
FPS_test.py
+9
-7
VOCdevkit/VOC2007/voc2yolo4.py
VOCdevkit/VOC2007/voc2yolo4.py
+10
-1
ciou_test.py
ciou_test.py
+5
-2
eval_coco.py
eval_coco.py
+130
-0
get_dr_txt.py
get_dr_txt.py
+59
-35
get_map.py
get_map.py
+4
-3
kmeans_for_anchors.py
kmeans_for_anchors.py
+8
-3
nets/CSPdarknet.py
nets/CSPdarknet.py
+52
-15
nets/yolo4.py
nets/yolo4.py
+44
-6
nets/yolo_training.py
nets/yolo_training.py
+197
-53
predict.py
predict.py
+9
-4
test.py
test.py
+1
-0
train.py
train.py
+97
-53
train_with_tensorboard.py
train_with_tensorboard.py
+110
-53
utils/dataloader.py
utils/dataloader.py
+36
-16
utils/utils.py
utils/utils.py
+79
-33
video.py
video.py
+16
-11
voc_annotation.py
voc_annotation.py
+5
-0
yolo.py
yolo.py
+82
-49
未找到文件。
FPS_test.py
浏览文件 @
a60e3df5
...
...
@@ -28,17 +28,19 @@ class FPS_YOLO(YOLO):
# 调整图片使其符合输入要求
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img
=
np
.
array
(
letterbox_image
(
image
,
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
photo
/=
255.0
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
/
255.0
photo
=
np
.
transpose
(
photo
,
(
2
,
0
,
1
))
photo
=
photo
.
astype
(
np
.
float32
)
images
=
[]
images
.
append
(
photo
)
images
=
np
.
asarray
(
images
)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images
=
[
photo
]
with
torch
.
no_grad
():
images
=
torch
.
from_numpy
(
images
)
images
=
torch
.
from_numpy
(
np
.
asarray
(
images
)
)
if
self
.
cuda
:
images
=
images
.
cuda
()
outputs
=
self
.
net
(
images
)
...
...
VOCdevkit/VOC2007/voc2yolo4.py
浏览文件 @
a60e3df5
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# test.txt和val.txt里面没有内容是正常的。训练不会使用到。
#----------------------------------------------------------------------#
import
os
import
random
random
.
seed
(
0
)
xmlfilepath
=
r
'./VOCdevkit/VOC2007/Annotations'
saveBasePath
=
r
"./VOCdevkit/VOC2007/ImageSets/Main/"
#----------------------------------------------------------------------#
# 想要增加测试集修改trainval_percent
# train_percent不需要修改
#----------------------------------------------------------------------#
trainval_percent
=
1
train_percent
=
1
...
...
ciou_test.py
浏览文件 @
a60e3df5
import
torch
import
math
import
numpy
as
np
import
torch
def
box_ciou
(
b1
,
b2
):
"""
输入为:
...
...
@@ -53,4 +56,4 @@ def box_ciou(b1, b2):
box1
=
torch
.
from_numpy
(
np
.
array
([[
25
,
25
,
40
,
40
]])).
type
(
torch
.
FloatTensor
)
box2
=
torch
.
from_numpy
(
np
.
array
([[
25
,
25
,
30
,
40
]])).
type
(
torch
.
FloatTensor
)
print
(
box_ciou
(
box1
,
box2
))
\ No newline at end of file
print
(
box_ciou
(
box1
,
box2
))
eval_coco.py
0 → 100644
浏览文件 @
a60e3df5
import
colorsys
import
json
import
os
import
cv2
import
numpy
as
np
import
torch
import
torch.backends.cudnn
as
cudnn
import
torch.nn
as
nn
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
torch.autograd
import
Variable
from
tqdm
import
tqdm
from
nets.yolo4
import
YoloBody
from
utils.utils
import
(
DecodeBox
,
bbox_iou
,
diou_non_max_suppression
,
letterbox_image
,
non_max_suppression
,
yolo_correct_boxes
)
from
yolo
import
YOLO
coco_classes
=
{
'person'
:
1
,
'bicycle'
:
2
,
'car'
:
3
,
'motorbike'
:
4
,
'aeroplane'
:
5
,
'bus'
:
6
,
'train'
:
7
,
'truck'
:
8
,
'boat'
:
9
,
'traffic light'
:
10
,
'fire hydrant'
:
11
,
''
:
83
,
'stop sign'
:
13
,
'parking meter'
:
14
,
'bench'
:
15
,
'bird'
:
16
,
'cat'
:
17
,
'dog'
:
18
,
'horse'
:
19
,
'sheep'
:
20
,
'cow'
:
21
,
'elephant'
:
22
,
'bear'
:
23
,
'zebra'
:
24
,
'giraffe'
:
25
,
'backpack'
:
27
,
'umbrella'
:
28
,
'handbag'
:
31
,
'tie'
:
32
,
'suitcase'
:
33
,
'frisbee'
:
34
,
'skis'
:
35
,
'snowboard'
:
36
,
'sports ball'
:
37
,
'kite'
:
38
,
'baseball bat'
:
39
,
'baseball glove'
:
40
,
'skateboard'
:
41
,
'surfboard'
:
42
,
'tennis racket'
:
43
,
'bottle'
:
44
,
'wine glass'
:
46
,
'cup'
:
47
,
'fork'
:
48
,
'knife'
:
49
,
'spoon'
:
50
,
'bowl'
:
51
,
'banana'
:
52
,
'apple'
:
53
,
'sandwich'
:
54
,
'orange'
:
55
,
'broccoli'
:
56
,
'carrot'
:
57
,
'hot dog'
:
58
,
'pizza'
:
59
,
'donut'
:
60
,
'cake'
:
61
,
'chair'
:
62
,
'sofa'
:
63
,
'pottedplant'
:
64
,
'bed'
:
65
,
'diningtable'
:
67
,
'toilet'
:
70
,
'tvmonitor'
:
72
,
'laptop'
:
73
,
'mouse'
:
74
,
'remote'
:
75
,
'keyboard'
:
76
,
'cell phone'
:
77
,
'microwave'
:
78
,
'oven'
:
79
,
'toaster'
:
80
,
'sink'
:
81
,
'refrigerator'
:
82
,
'book'
:
84
,
'clock'
:
85
,
'vase'
:
86
,
'scissors'
:
87
,
'teddy bear'
:
88
,
'hair drier'
:
89
,
'toothbrush'
:
90
}
class
mAP_YOLO
(
YOLO
):
#---------------------------------------------------#
# 检测图片
#---------------------------------------------------#
def
detect_image
(
self
,
image_id
,
image
,
results
):
self
.
confidence
=
0.001
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img
=
np
.
array
(
letterbox_image
(
image
,
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
/
255.0
photo
=
np
.
transpose
(
photo
,
(
2
,
0
,
1
))
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images
=
[
photo
]
with
torch
.
no_grad
():
images
=
torch
.
from_numpy
(
np
.
asarray
(
images
))
if
self
.
cuda
:
images
=
images
.
cuda
()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs
=
self
.
net
(
images
)
output_list
=
[]
for
i
in
range
(
3
):
output_list
.
append
(
self
.
yolo_decodes
[
i
](
outputs
[
i
]))
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output
=
torch
.
cat
(
output_list
,
1
)
batch_detections
=
non_max_suppression
(
output
,
len
(
self
.
class_names
),
conf_thres
=
self
.
confidence
,
nms_thres
=
self
.
iou
)
#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
#---------------------------------------------------------#
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
for
i
,
c
in
enumerate
(
top_label
):
result
=
{}
predicted_class
=
self
.
class_names
[
c
]
top
,
left
,
bottom
,
right
=
boxes
[
i
]
top
=
max
(
0
,
np
.
floor
(
top
+
0.5
).
astype
(
'int32'
))
left
=
max
(
0
,
np
.
floor
(
left
+
0.5
).
astype
(
'int32'
))
bottom
=
min
(
image
.
size
[
1
],
np
.
floor
(
bottom
+
0.5
).
astype
(
'int32'
))
right
=
min
(
image
.
size
[
0
],
np
.
floor
(
right
+
0.5
).
astype
(
'int32'
))
result
[
"image_id"
]
=
int
(
image_id
)
result
[
"category_id"
]
=
coco_classes
[
predicted_class
]
result
[
"bbox"
]
=
[
float
(
left
),
float
(
top
),
float
(
right
-
left
),
float
(
bottom
-
top
)]
result
[
"score"
]
=
float
(
top_conf
[
i
])
results
.
append
(
result
)
return
results
yolo
=
mAP_YOLO
()
jpg_names
=
os
.
listdir
(
"./coco_dataset/val2017"
)
with
open
(
"./coco_dataset/eval_results.json"
,
"w"
)
as
f
:
results
=
[]
for
jpg_name
in
tqdm
(
jpg_names
):
if
jpg_name
.
endswith
(
"jpg"
):
image_path
=
"./coco_dataset/val2017/"
+
jpg_name
image
=
Image
.
open
(
image_path
)
# 开启后在之后计算mAP可以可视化
results
=
yolo
.
detect_image
(
jpg_name
.
split
(
"."
)[
0
],
image
,
results
)
json
.
dump
(
results
,
f
)
get_dr_txt.py
浏览文件 @
a60e3df5
...
...
@@ -3,19 +3,24 @@
# 具体教程请查看Bilibili
# Bubbliiiing
#-------------------------------------#
import
cv2
import
numpy
as
np
import
colorsys
import
os
import
cv2
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.backends.cudnn
as
cudnn
import
torch.nn
as
nn
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
torch.autograd
import
Variable
from
yolo
import
YOLO
from
nets.yolo4
import
YoloBody
from
PIL
import
Image
,
ImageFont
,
ImageDraw
from
utils.utils
import
non_max_suppression
,
bbox_iou
,
DecodeBox
,
letterbox_image
,
yolo_correct_boxes
from
tqdm
import
tqdm
from
nets.yolo4
import
YoloBody
from
utils.utils
import
(
DecodeBox
,
bbox_iou
,
letterbox_image
,
non_max_suppression
,
yolo_correct_boxes
)
from
yolo
import
YOLO
class
mAP_Yolo
(
YOLO
):
#---------------------------------------------------#
# 检测图片
...
...
@@ -26,42 +31,61 @@ class mAP_Yolo(YOLO):
f
=
open
(
"./input/detection-results/"
+
image_id
+
".txt"
,
"w"
)
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img
=
np
.
array
(
letterbox_image
(
image
,
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
photo
/=
255.0
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
/
255.0
photo
=
np
.
transpose
(
photo
,
(
2
,
0
,
1
))
photo
=
photo
.
astype
(
np
.
float32
)
images
=
[]
images
.
append
(
photo
)
images
=
np
.
asarray
(
images
)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images
=
[
photo
]
with
torch
.
no_grad
():
images
=
torch
.
from_numpy
(
images
)
images
=
torch
.
from_numpy
(
np
.
asarray
(
images
)
)
if
self
.
cuda
:
images
=
images
.
cuda
()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs
=
self
.
net
(
images
)
output_list
=
[]
for
i
in
range
(
3
):
output_list
.
append
(
self
.
yolo_decodes
[
i
](
outputs
[
i
]))
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output
=
torch
.
cat
(
output_list
,
1
)
batch_detections
=
non_max_suppression
(
output
,
len
(
self
.
class_names
),
conf_thres
=
self
.
confidence
,
nms_thres
=
self
.
iou
)
#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
#---------------------------------------------------------#
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
output_list
=
[]
for
i
in
range
(
3
):
output_list
.
append
(
self
.
yolo_decodes
[
i
](
outputs
[
i
]))
output
=
torch
.
cat
(
output_list
,
1
)
batch_detections
=
non_max_suppression
(
output
,
len
(
self
.
class_names
),
conf_thres
=
self
.
confidence
,
nms_thres
=
self
.
iou
)
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
# 去掉灰条
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
for
i
,
c
in
enumerate
(
top_label
):
predicted_class
=
self
.
class_names
[
c
]
...
...
get_map.py
浏览文件 @
a60e3df5
import
argparse
import
glob
import
json
import
math
import
operator
import
os
import
shutil
import
operator
import
sys
import
argparse
import
math
import
numpy
as
np
#----------------------------------------------------#
# 用于计算mAP
# 代码克隆自https://github.com/Cartucho/mAP
...
...
kmeans_for_anchors.py
浏览文件 @
a60e3df5
import
numpy
as
np
import
xml.etree.ElementTree
as
ET
import
glob
import
random
import
xml.etree.ElementTree
as
ET
import
numpy
as
np
def
cas_iou
(
box
,
cluster
):
x
=
np
.
minimum
(
cluster
[:,
0
],
box
[
0
])
...
...
@@ -61,6 +63,9 @@ def load_data(path):
tree
=
ET
.
parse
(
xml_file
)
height
=
int
(
tree
.
findtext
(
'./size/height'
))
width
=
int
(
tree
.
findtext
(
'./size/width'
))
if
height
<=
0
or
width
<=
0
:
continue
# 对于每一个目标都获得它的宽高
for
obj
in
tree
.
iter
(
'object'
):
xmin
=
int
(
float
(
obj
.
findtext
(
'bndbox/xmin'
)))
/
width
...
...
@@ -103,4 +108,4 @@ if __name__ == '__main__':
else
:
x_y
=
", %d,%d"
%
(
data
[
i
][
0
],
data
[
i
][
1
])
f
.
write
(
x_y
)
f
.
close
()
\ No newline at end of file
f
.
close
()
nets/CSPdarknet.py
浏览文件 @
a60e3df5
import
torch
import
torch.nn.functional
as
F
import
torch.nn
as
nn
import
math
from
collections
import
OrderedDict
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
#-------------------------------------------------#
# MISH激活函数
#-------------------------------------------------#
...
...
@@ -14,10 +16,10 @@ class Mish(nn.Module):
def
forward
(
self
,
x
):
return
x
*
torch
.
tanh
(
F
.
softplus
(
x
))
#-------------------------------------------------#
# 卷积块
# C
ONV+BATCHNORM+MISH
#-------------------------------------------------#
#-------------------------------------------------
--
#
# 卷积块
-> 卷积 + 标准化 + 激活函数
# C
onv2d + BatchNormalization + Mish
#-------------------------------------------------
--
#
class
BasicConv
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
):
super
(
BasicConv
,
self
).
__init__
()
...
...
@@ -37,7 +39,7 @@ class BasicConv(nn.Module):
# 内部堆叠的残差块
#---------------------------------------------------#
class
Resblock
(
nn
.
Module
):
def
__init__
(
self
,
channels
,
hidden_channels
=
None
,
residual_activation
=
nn
.
Identity
()
):
def
__init__
(
self
,
channels
,
hidden_channels
=
None
):
super
(
Resblock
,
self
).
__init__
()
if
hidden_channels
is
None
:
...
...
@@ -51,33 +53,52 @@ class Resblock(nn.Module):
def
forward
(
self
,
x
):
return
x
+
self
.
block
(
x
)
#---------------------------------------------------#
#---------------------------------------------------
-----------------
#
# CSPdarknet的结构块
# 存在一个大残差边
# 这个大残差边绕过了很多的残差结构
#---------------------------------------------------#
# 首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
# 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
# 对于整个CSPdarknet的结构块,就是一个大残差块+内部多个小残差块
#--------------------------------------------------------------------#
class
Resblock_body
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
num_blocks
,
first
):
super
(
Resblock_body
,
self
).
__init__
()
#----------------------------------------------------------------#
# 利用一个步长为2x2的卷积块进行高和宽的压缩
#----------------------------------------------------------------#
self
.
downsample_conv
=
BasicConv
(
in_channels
,
out_channels
,
3
,
stride
=
2
)
if
first
:
#--------------------------------------------------------------------------#
# 然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
#--------------------------------------------------------------------------#
self
.
split_conv0
=
BasicConv
(
out_channels
,
out_channels
,
1
)
#----------------------------------------------------------------#
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
#----------------------------------------------------------------#
self
.
split_conv1
=
BasicConv
(
out_channels
,
out_channels
,
1
)
self
.
blocks_conv
=
nn
.
Sequential
(
Resblock
(
channels
=
out_channels
,
hidden_channels
=
out_channels
//
2
),
BasicConv
(
out_channels
,
out_channels
,
1
)
)
self
.
concat_conv
=
BasicConv
(
out_channels
*
2
,
out_channels
,
1
)
else
:
#--------------------------------------------------------------------------#
# 然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
#--------------------------------------------------------------------------#
self
.
split_conv0
=
BasicConv
(
out_channels
,
out_channels
//
2
,
1
)
self
.
split_conv1
=
BasicConv
(
out_channels
,
out_channels
//
2
,
1
)
#----------------------------------------------------------------#
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
#----------------------------------------------------------------#
self
.
split_conv1
=
BasicConv
(
out_channels
,
out_channels
//
2
,
1
)
self
.
blocks_conv
=
nn
.
Sequential
(
*
[
Resblock
(
out_channels
//
2
)
for
_
in
range
(
num_blocks
)],
BasicConv
(
out_channels
//
2
,
out_channels
//
2
,
1
)
)
self
.
concat_conv
=
BasicConv
(
out_channels
,
out_channels
,
1
)
def
forward
(
self
,
x
):
...
...
@@ -88,28 +109,44 @@ class Resblock_body(nn.Module):
x1
=
self
.
split_conv1
(
x
)
x1
=
self
.
blocks_conv
(
x1
)
#------------------------------------#
# 将大残差边再堆叠回来
#------------------------------------#
x
=
torch
.
cat
([
x1
,
x0
],
dim
=
1
)
#------------------------------------#
# 最后对通道数进行整合
#------------------------------------#
x
=
self
.
concat_conv
(
x
)
return
x
#---------------------------------------------------#
# CSPdarknet53 的主体部分
# 输入为一张416x416x3的图片
# 输出为三个有效特征层
#---------------------------------------------------#
class
CSPDarkNet
(
nn
.
Module
):
def
__init__
(
self
,
layers
):
super
(
CSPDarkNet
,
self
).
__init__
()
self
.
inplanes
=
32
# 416,416,3 -> 416,416,32
self
.
conv1
=
BasicConv
(
3
,
self
.
inplanes
,
kernel_size
=
3
,
stride
=
1
)
self
.
feature_channels
=
[
64
,
128
,
256
,
512
,
1024
]
self
.
stages
=
nn
.
ModuleList
([
# 416,416,32 -> 208,208,64
Resblock_body
(
self
.
inplanes
,
self
.
feature_channels
[
0
],
layers
[
0
],
first
=
True
),
# 208,208,64 -> 104,104,128
Resblock_body
(
self
.
feature_channels
[
0
],
self
.
feature_channels
[
1
],
layers
[
1
],
first
=
False
),
# 104,104,128 -> 52,52,256
Resblock_body
(
self
.
feature_channels
[
1
],
self
.
feature_channels
[
2
],
layers
[
2
],
first
=
False
),
# 52,52,256 -> 26,26,512
Resblock_body
(
self
.
feature_channels
[
2
],
self
.
feature_channels
[
3
],
layers
[
3
],
first
=
False
),
# 26,26,512 -> 13,13,1024
Resblock_body
(
self
.
feature_channels
[
3
],
self
.
feature_channels
[
4
],
layers
[
4
],
first
=
False
)
])
self
.
num_features
=
1
# 进行权值初始化
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
n
=
m
.
kernel_size
[
0
]
*
m
.
kernel_size
[
1
]
*
m
.
out_channels
...
...
nets/yolo4.py
浏览文件 @
a60e3df5
from
collections
import
OrderedDict
import
torch
import
torch.nn
as
nn
from
collections
import
OrderedDict
from
nets.CSPdarknet
import
darknet53
def
conv2d
(
filter_in
,
filter_out
,
kernel_size
,
stride
=
1
):
pad
=
(
kernel_size
-
1
)
//
2
if
kernel_size
else
0
return
nn
.
Sequential
(
OrderedDict
([
...
...
@@ -83,7 +86,13 @@ def yolo_head(filters_list, in_filters):
class
YoloBody
(
nn
.
Module
):
def
__init__
(
self
,
num_anchors
,
num_classes
):
super
(
YoloBody
,
self
).
__init__
()
# backbone
#---------------------------------------------------#
# 生成CSPdarknet53的主干模型
# 获得三个有效特征层,他们的shape分别是:
# 52,52,256
# 26,26,512
# 13,13,1024
#---------------------------------------------------#
self
.
backbone
=
darknet53
(
None
)
self
.
conv1
=
make_three_conv
([
512
,
1024
],
1024
)
...
...
@@ -97,20 +106,21 @@ class YoloBody(nn.Module):
self
.
upsample2
=
Upsample
(
256
,
128
)
self
.
conv_for_P3
=
conv2d
(
256
,
128
,
1
)
self
.
make_five_conv2
=
make_five_conv
([
128
,
256
],
256
)
# 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
#
4+1+num_classes
#
3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
final_out_filter2
=
num_anchors
*
(
5
+
num_classes
)
self
.
yolo_head3
=
yolo_head
([
256
,
final_out_filter2
],
128
)
self
.
down_sample1
=
conv2d
(
128
,
256
,
3
,
stride
=
2
)
self
.
make_five_conv3
=
make_five_conv
([
256
,
512
],
512
)
# 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
# 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
final_out_filter1
=
num_anchors
*
(
5
+
num_classes
)
self
.
yolo_head2
=
yolo_head
([
512
,
final_out_filter1
],
256
)
self
.
down_sample2
=
conv2d
(
256
,
512
,
3
,
stride
=
2
)
self
.
make_five_conv4
=
make_five_conv
([
512
,
1024
],
1024
)
# 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
final_out_filter0
=
num_anchors
*
(
5
+
num_classes
)
self
.
yolo_head1
=
yolo_head
([
1024
,
final_out_filter0
],
512
)
...
...
@@ -120,30 +130,58 @@ class YoloBody(nn.Module):
# backbone
x2
,
x1
,
x0
=
self
.
backbone
(
x
)
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048
P5
=
self
.
conv1
(
x0
)
P5
=
self
.
SPP
(
P5
)
# 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
P5
=
self
.
conv2
(
P5
)
# 13,13,512 -> 13,13,256 -> 26,26,256
P5_upsample
=
self
.
upsample1
(
P5
)
# 26,26,512 -> 26,26,256
P4
=
self
.
conv_for_P4
(
x1
)
# 26,26,256 + 26,26,256 -> 26,26,512
P4
=
torch
.
cat
([
P4
,
P5_upsample
],
axis
=
1
)
# 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
P4
=
self
.
make_five_conv1
(
P4
)
# 26,26,256 -> 26,26,128 -> 52,52,128
P4_upsample
=
self
.
upsample2
(
P4
)
# 52,52,256 -> 52,52,128
P3
=
self
.
conv_for_P3
(
x2
)
# 52,52,128 + 52,52,128 -> 52,52,256
P3
=
torch
.
cat
([
P3
,
P4_upsample
],
axis
=
1
)
# 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
P3
=
self
.
make_five_conv2
(
P3
)
# 52,52,128 -> 26,26,256
P3_downsample
=
self
.
down_sample1
(
P3
)
# 26,26,256 + 26,26,256 -> 26,26,512
P4
=
torch
.
cat
([
P3_downsample
,
P4
],
axis
=
1
)
# 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
P4
=
self
.
make_five_conv3
(
P4
)
# 26,26,256 -> 13,13,512
P4_downsample
=
self
.
down_sample2
(
P4
)
# 13,13,512 + 13,13,512 -> 13,13,1024
P5
=
torch
.
cat
([
P4_downsample
,
P5
],
axis
=
1
)
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
P5
=
self
.
make_five_conv4
(
P5
)
#---------------------------------------------------#
# 第三个特征层
# y3=(batch_size,75,52,52)
#---------------------------------------------------#
out2
=
self
.
yolo_head3
(
P3
)
#---------------------------------------------------#
# 第二个特征层
# y2=(batch_size,75,26,26)
#---------------------------------------------------#
out1
=
self
.
yolo_head2
(
P4
)
#---------------------------------------------------#
# 第一个特征层
# y1=(batch_size,75,13,13)
#---------------------------------------------------#
out0
=
self
.
yolo_head1
(
P5
)
return
out0
,
out1
,
out2
...
...
nets/yolo_training.py
浏览文件 @
a60e3df5
此差异已折叠。
点击以展开。
predict.py
浏览文件 @
a60e3df5
#-------------------------------------#
# 对单张图片进行预测
#-------------------------------------#
from
yolo
import
YOLO
'''
predict.py有几个注意点
1、无法进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
2、如果想要保存,利用r_image.save("img.jpg")即可保存。
3、如果想要获得框的坐标,可以进入detect_image函数,读取top,left,bottom,right这四个值。
4、如果想要截取下目标,可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
'''
from
PIL
import
Image
from
yolo
import
YOLO
yolo
=
YOLO
()
while
True
:
...
...
test.py
浏览文件 @
a60e3df5
...
...
@@ -5,6 +5,7 @@
#--------------------------------------------#
import
torch
from
torchsummary
import
summary
from
nets.CSPdarknet
import
darknet53
from
nets.yolo4
import
YoloBody
...
...
train.py
浏览文件 @
a60e3df5
...
...
@@ -2,20 +2,23 @@
# 对数据集进行训练
#-------------------------------------#
import
os
import
numpy
as
np
import
time
import
numpy
as
np
import
torch
from
torch.autograd
import
Variable
import
torch.backends.cudnn
as
cudnn
import
torch.nn
as
nn
import
torch.optim
as
optim
import
torch.nn.functional
as
F
import
torch.backends.cudnn
as
cudnn
import
torch.optim
as
optim
from
torch.autograd
import
Variable
from
torch.utils.data
import
DataLoader
from
utils.dataloader
import
yolo_dataset_collate
,
YoloDataset
from
nets.yolo_training
import
YOLOLoss
,
Generator
from
nets.yolo4
import
YoloBody
from
tqdm
import
tqdm
from
nets.yolo4
import
YoloBody
from
nets.yolo_training
import
Generator
,
YOLOLoss
from
utils.dataloader
import
YoloDataset
,
yolo_dataset_collate
#---------------------------------------------------#
# 获得类和先验框
#---------------------------------------------------#
...
...
@@ -37,10 +40,12 @@ def get_lr(optimizer):
for
param_group
in
optimizer
.
param_groups
:
return
param_group
[
'lr'
]
def
fit_one_epoch
(
net
,
yolo_losses
,
epoch
,
epoch_size
,
epoch_size_val
,
gen
,
genval
,
Epoch
,
cuda
):
total_loss
=
0
val_loss
=
0
start_time
=
time
.
time
()
net
.
train
()
with
tqdm
(
total
=
epoch_size
,
desc
=
f
'Epoch
{
epoch
+
1
}
/
{
Epoch
}
'
,
postfix
=
dict
,
mininterval
=
0.3
)
as
pbar
:
for
iteration
,
batch
in
enumerate
(
gen
):
if
iteration
>=
epoch_size
:
...
...
@@ -53,25 +58,38 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
else
:
images
=
Variable
(
torch
.
from_numpy
(
images
).
type
(
torch
.
FloatTensor
))
targets
=
[
Variable
(
torch
.
from_numpy
(
ann
).
type
(
torch
.
FloatTensor
))
for
ann
in
targets
]
#----------------------#
# 清零梯度
#----------------------#
optimizer
.
zero_grad
()
#----------------------#
# 前向传播
#----------------------#
outputs
=
net
(
images
)
losses
=
[]
num_pos_all
=
0
#----------------------#
# 计算损失
#----------------------#
for
i
in
range
(
3
):
loss_item
=
yolo_losses
[
i
](
outputs
[
i
],
targets
)
losses
.
append
(
loss_item
[
0
])
loss
=
sum
(
losses
)
loss_item
,
num_pos
=
yolo_losses
[
i
](
outputs
[
i
],
targets
)
losses
.
append
(
loss_item
)
num_pos_all
+=
num_pos
loss
=
sum
(
losses
)
/
num_pos_all
#----------------------#
# 反向传播
#----------------------#
loss
.
backward
()
optimizer
.
step
()
total_loss
+=
loss
waste_time
=
time
.
time
()
-
start_time
total_loss
+=
loss
.
item
()
pbar
.
set_postfix
(
**
{
'total_loss'
:
total_loss
.
item
()
/
(
iteration
+
1
),
'lr'
:
get_lr
(
optimizer
),
'step/s'
:
waste_time
})
pbar
.
set_postfix
(
**
{
'total_loss'
:
total_loss
/
(
iteration
+
1
),
'lr'
:
get_lr
(
optimizer
)})
pbar
.
update
(
1
)
start_time
=
time
.
time
()
net
.
eval
()
print
(
'Start Validation'
)
with
tqdm
(
total
=
epoch_size_val
,
desc
=
f
'Epoch
{
epoch
+
1
}
/
{
Epoch
}
'
,
postfix
=
dict
,
mininterval
=
0.3
)
as
pbar
:
...
...
@@ -90,14 +108,15 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
optimizer
.
zero_grad
()
outputs
=
net
(
images_val
)
losses
=
[]
num_pos_all
=
0
for
i
in
range
(
3
):
loss_item
=
yolo_losses
[
i
](
outputs
[
i
],
targets_val
)
losses
.
append
(
loss_item
[
0
])
loss
=
sum
(
losses
)
val_loss
+=
loss
pbar
.
set_postfix
(
**
{
'total_loss'
:
val_loss
.
item
()
/
(
iteration
+
1
)})
loss_item
,
num_pos
=
yolo_losses
[
i
](
outputs
[
i
],
targets_val
)
losses
.
append
(
loss_item
)
num_pos_all
+=
num_pos
loss
=
sum
(
losses
)
/
num_pos_all
val_loss
+=
loss
.
item
()
pbar
.
set_postfix
(
**
{
'total_loss'
:
val_loss
/
(
iteration
+
1
)})
pbar
.
update
(
1
)
net
.
train
()
print
(
'Finish Validation'
)
print
(
'Epoch:'
+
str
(
epoch
+
1
)
+
'/'
+
str
(
Epoch
))
print
(
'Total Loss: %.4f || Val Loss: %.4f '
%
(
total_loss
/
(
epoch_size
+
1
),
val_loss
/
(
epoch_size_val
+
1
)))
...
...
@@ -111,41 +130,58 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
#----------------------------------------------------#
if
__name__
==
"__main__"
:
#-------------------------------#
# 输入的shape大小
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
#-------------------------------#
input_shape
=
(
416
,
416
)
#-------------------------------#
# tricks的使用设置
# 是否使用Cuda
# 没有GPU可以设置成False
#-------------------------------#
Cosine_lr
=
False
mosaic
=
True
# 用于设定是否使用cuda
Cuda
=
True
smoooth_label
=
0
#-------------------------------#
# Dataloder的使用
#-------------------------------#
Use_Data_Loader
=
True
annotation_path
=
'2007_train.txt'
#------------------------------------------------------#
# 是否对损失进行归一化
#------------------------------------------------------#
normalize
=
True
#-------------------------------#
# 获得先验框和类
# 输入的shape大小
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
#-------------------------------#
input_shape
=
(
416
,
416
)
#----------------------------------------------------#
# classes和anchor的路径,非常重要
# 训练前一定要修改classes_path,使其对应自己的数据集
#----------------------------------------------------#
anchors_path
=
'model_data/yolo_anchors.txt'
classes_path
=
'model_data/voc_classes.txt'
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names
=
get_classes
(
classes_path
)
anchors
=
get_anchors
(
anchors_path
)
num_classes
=
len
(
class_names
)
# 创建模型
model
=
YoloBody
(
len
(
anchors
[
0
]),
num_classes
)
#-------------------------------------------#
# 权值文件的下载请看README
#-------------------------------------------#
#------------------------------------------------------#
# Yolov4的tricks应用
# mosaic 马赛克数据增强 True or False
# Cosine_scheduler 余弦退火学习率 True or False
# label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
#------------------------------------------------------#
mosaic
=
True
Cosine_lr
=
False
smoooth_label
=
0
#------------------------------------------------------#
# 创建yolo模型
# 训练前一定要修改classes_path和对应的txt文件
#------------------------------------------------------#
model
=
YoloBody
(
len
(
anchors
[
0
]),
num_classes
)
#------------------------------------------------------#
# 权值文件请看README,百度网盘下载
#------------------------------------------------------#
model_path
=
"model_data/yolo4_weights.pth"
# 加快模型训练的效率
print
(
'Loading weights into state dict...'
)
device
=
torch
.
device
(
'cuda'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
model_dict
=
model
.
state_dict
()
...
...
@@ -166,9 +202,17 @@ if __name__ == "__main__":
yolo_losses
=
[]
for
i
in
range
(
3
):
yolo_losses
.
append
(
YOLOLoss
(
np
.
reshape
(
anchors
,[
-
1
,
2
]),
num_classes
,
\
(
input_shape
[
1
],
input_shape
[
0
]),
smoooth_label
,
Cuda
))
(
input_shape
[
1
],
input_shape
[
0
]),
smoooth_label
,
Cuda
,
normalize
))
# 0.1用于验证,0.9用于训练
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
annotation_path
=
'2007_train.txt'
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
#----------------------------------------------------------------------#
val_split
=
0.1
with
open
(
annotation_path
)
as
f
:
lines
=
f
.
readlines
()
...
...
@@ -199,17 +243,17 @@ if __name__ == "__main__":
lr_scheduler
=
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
0.95
)
if
Use_Data_Loader
:
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
mosaic
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
False
)
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
mosaic
,
is_train
=
True
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
False
,
is_train
=
False
)
gen
=
DataLoader
(
train_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
gen_val
=
DataLoader
(
val_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
else
:
gen
=
Generator
(
Batch_size
,
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
mosaic
=
mosaic
)
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
train
=
True
,
mosaic
=
mosaic
)
gen_val
=
Generator
(
Batch_size
,
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
mosaic
=
False
)
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
train
=
False
,
mosaic
=
mosaic
)
epoch_size
=
max
(
1
,
num_train
//
Batch_size
)
epoch_size_val
=
num_val
//
Batch_size
...
...
@@ -236,17 +280,17 @@ if __name__ == "__main__":
lr_scheduler
=
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
0.95
)
if
Use_Data_Loader
:
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
mosaic
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
False
)
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
mosaic
,
is_train
=
True
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
False
,
is_train
=
False
)
gen
=
DataLoader
(
train_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
gen_val
=
DataLoader
(
val_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
else
:
gen
=
Generator
(
Batch_size
,
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
mosaic
=
mosaic
)
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
train
=
True
,
mosaic
=
mosaic
)
gen_val
=
Generator
(
Batch_size
,
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
mosaic
=
False
)
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
train
=
False
,
mosaic
=
mosaic
)
epoch_size
=
max
(
1
,
num_train
//
Batch_size
)
epoch_size_val
=
num_val
//
Batch_size
...
...
train_with_tensorboard.py
浏览文件 @
a60e3df5
...
...
@@ -39,9 +39,11 @@ def get_lr(optimizer):
return
param_group
[
'lr'
]
def
fit_ont_epoch
(
net
,
yolo_losses
,
epoch
,
epoch_size
,
epoch_size_val
,
gen
,
genval
,
Epoch
,
cuda
,
writer
):
global
train_tensorboard_step
,
val_tensorboard_step
total_loss
=
0
val_loss
=
0
start_time
=
time
.
time
()
net
.
train
()
with
tqdm
(
total
=
epoch_size
,
desc
=
f
'Epoch
{
epoch
+
1
}
/
{
Epoch
}
'
,
postfix
=
dict
,
mininterval
=
0.3
)
as
pbar
:
for
iteration
,
batch
in
enumerate
(
gen
):
if
iteration
>=
epoch_size
:
...
...
@@ -54,28 +56,41 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
else
:
images
=
Variable
(
torch
.
from_numpy
(
images
).
type
(
torch
.
FloatTensor
))
targets
=
[
Variable
(
torch
.
from_numpy
(
ann
).
type
(
torch
.
FloatTensor
))
for
ann
in
targets
]
#----------------------#
# 清零梯度
#----------------------#
optimizer
.
zero_grad
()
#----------------------#
# 前向传播
#----------------------#
outputs
=
net
(
images
)
losses
=
[]
num_pos_all
=
0
#----------------------#
# 计算损失
#----------------------#
for
i
in
range
(
3
):
loss_item
=
yolo_losses
[
i
](
outputs
[
i
],
targets
)
losses
.
append
(
loss_item
[
0
])
loss
=
sum
(
losses
)
loss_item
,
num_pos
=
yolo_losses
[
i
](
outputs
[
i
],
targets
)
losses
.
append
(
loss_item
)
num_pos_all
+=
num_pos
loss
=
sum
(
losses
)
/
num_pos_all
total_loss
+=
loss
.
item
()
#----------------------#
# 反向传播
#----------------------#
loss
.
backward
()
optimizer
.
step
()
# 将loss写入tensorboard,每一步都写
writer
.
add_scalar
(
'Train_loss'
,
loss
,
(
epoch
*
epoch_size
+
iteration
))
total_loss
+=
loss
waste_time
=
time
.
time
()
-
start_time
pbar
.
set_postfix
(
**
{
'total_loss'
:
total_loss
.
item
()
/
(
iteration
+
1
),
'lr'
:
get_lr
(
optimizer
),
'step/s'
:
waste_time
})
pbar
.
update
(
1
)
# 将loss写入tensorboard,每一步都写
writer
.
add_scalar
(
'Train_loss'
,
loss
,
train_tensorboard_step
)
train_tensorboard_step
+=
1
start_time
=
time
.
time
()
pbar
.
set_postfix
(
**
{
'total_loss'
:
total_loss
/
(
iteration
+
1
),
'lr'
:
get_lr
(
optimizer
)})
pbar
.
update
(
1
)
# 将loss写入tensorboard,下面注释的是每个世代保存一次
# writer.add_scalar('Train_loss', total_loss/(iteration+1), epoch)
...
...
@@ -97,20 +112,24 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
optimizer
.
zero_grad
()
outputs
=
net
(
images_val
)
losses
=
[]
num_pos_all
=
0
for
i
in
range
(
3
):
loss_item
=
yolo_losses
[
i
](
outputs
[
i
],
targets_val
)
losses
.
append
(
loss_item
[
0
])
loss
=
sum
(
losses
)
val_loss
+=
loss
loss_item
,
num_pos
=
yolo_losses
[
i
](
outputs
[
i
],
targets_val
)
losses
.
append
(
loss_item
)
num_pos_all
+=
num_pos
loss
=
sum
(
losses
)
/
num_pos_all
val_loss
+=
loss
.
item
()
# 将loss写入tensorboard, 下面注释的是每一步都写
# writer.add_scalar('Val_loss',val_loss/(epoch_size_val+1), (epoch*epoch_size_val + iteration))
# writer.add_scalar('Val_loss', loss, val_tensorboard_step)
# val_tensorboard_step += 1
pbar
.
set_postfix
(
**
{
'total_loss'
:
val_loss
/
(
iteration
+
1
)})
pbar
.
update
(
1
)
pbar
.
set_postfix
(
**
{
'total_loss'
:
val_loss
.
item
()
/
(
iteration
+
1
)})
pbar
.
update
(
1
)
net
.
train
()
# 将loss写入tensorboard,每个世代保存一次
writer
.
add_scalar
(
'Val_loss'
,
val_loss
/
(
epoch_size_val
+
1
),
epoch
)
writer
.
add_scalar
(
'Val_loss'
,
val_loss
/
(
epoch_size_val
+
1
),
epoch
)
print
(
'Finish Validation'
)
print
(
'Epoch:'
+
str
(
epoch
+
1
)
+
'/'
+
str
(
Epoch
))
print
(
'Total Loss: %.4f || Val Loss: %.4f '
%
(
total_loss
/
(
epoch_size
+
1
),
val_loss
/
(
epoch_size_val
+
1
)))
...
...
@@ -121,38 +140,58 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
if
__name__
==
"__main__"
:
#-------------------------------#
# 输入的shape大小
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
#-------------------------------#
input_shape
=
(
416
,
416
)
# 是否使用Cuda
# 没有GPU可以设置成False
#-------------------------------#
# tricks的使用设置
#-------------------------------#
Cosine_lr
=
False
mosaic
=
True
# 用于设定是否使用cuda
Cuda
=
True
smoooth_label
=
0
#-------------------------------#
# Dataloder的使用
#-------------------------------#
Use_Data_Loader
=
True
annotation_path
=
'2007_train.txt'
#------------------------------------------------------#
# 是否对损失进行归一化
#------------------------------------------------------#
normalize
=
True
#-------------------------------#
# 获得先验框和类
# 输入的shape大小
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
#-------------------------------#
input_shape
=
(
416
,
416
)
#----------------------------------------------------#
# classes和anchor的路径,非常重要
# 训练前一定要修改classes_path,使其对应自己的数据集
#----------------------------------------------------#
anchors_path
=
'model_data/yolo_anchors.txt'
classes_path
=
'model_data/voc_classes.txt'
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names
=
get_classes
(
classes_path
)
anchors
=
get_anchors
(
anchors_path
)
num_classes
=
len
(
class_names
)
# 创建模型
model
=
YoloBody
(
len
(
anchors
[
0
]),
num_classes
)
#------------------------------------------------------#
# Yolov4的tricks应用
# mosaic 马赛克数据增强 True or False
# Cosine_scheduler 余弦退火学习率 True or False
# label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
#------------------------------------------------------#
mosaic
=
True
Cosine_lr
=
False
smoooth_label
=
0
#------------------------------------------------------#
# 创建yolo模型
# 训练前一定要修改classes_path和对应的txt文件
#------------------------------------------------------#
model
=
YoloBody
(
len
(
anchors
[
0
]),
num_classes
)
#------------------------------------------------------#
# 权值文件请看README,百度网盘下载
#------------------------------------------------------#
model_path
=
"model_data/yolo4_weights.pth"
# 加快模型训练的效率
print
(
'Loading weights into state dict...'
)
device
=
torch
.
device
(
'cuda'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
model_dict
=
model
.
state_dict
()
...
...
@@ -173,9 +212,17 @@ if __name__ == "__main__":
yolo_losses
=
[]
for
i
in
range
(
3
):
yolo_losses
.
append
(
YOLOLoss
(
np
.
reshape
(
anchors
,[
-
1
,
2
]),
num_classes
,
\
(
input_shape
[
1
],
input_shape
[
0
]),
smoooth_label
,
Cuda
))
(
input_shape
[
1
],
input_shape
[
0
]),
smoooth_label
,
Cuda
,
normalize
))
# 0.1用于验证,0.9用于训练
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
annotation_path
=
'2007_train.txt'
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
#----------------------------------------------------------------------#
val_split
=
0.1
with
open
(
annotation_path
)
as
f
:
lines
=
f
.
readlines
()
...
...
@@ -184,7 +231,7 @@ if __name__ == "__main__":
np
.
random
.
seed
(
None
)
num_val
=
int
(
len
(
lines
)
*
val_split
)
num_train
=
len
(
lines
)
-
num_val
writer
=
SummaryWriter
(
log_dir
=
'logs'
,
flush_secs
=
60
)
if
Cuda
:
graph_inputs
=
torch
.
from_numpy
(
np
.
random
.
rand
(
1
,
3
,
input_shape
[
0
],
input_shape
[
1
])).
type
(
torch
.
FloatTensor
).
cuda
()
...
...
@@ -192,6 +239,16 @@ if __name__ == "__main__":
graph_inputs
=
torch
.
from_numpy
(
np
.
random
.
rand
(
1
,
3
,
input_shape
[
0
],
input_shape
[
1
])).
type
(
torch
.
FloatTensor
)
writer
.
add_graph
(
model
,
(
graph_inputs
,))
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
# Init_Epoch为起始世代
# Freeze_Epoch为冻结训练的世代
# Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
train_tensorboard_step
=
1
val_tensorboard_step
=
1
if
True
:
lr
=
1e-3
Batch_size
=
4
...
...
@@ -205,17 +262,17 @@ if __name__ == "__main__":
lr_scheduler
=
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
0.95
)
if
Use_Data_Loader
:
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
mosaic
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
False
)
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
mosaic
,
is_train
=
True
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
False
,
is_train
=
False
)
gen
=
DataLoader
(
train_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
gen_val
=
DataLoader
(
val_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
else
:
gen
=
Generator
(
Batch_size
,
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
mosaic
=
mosaic
)
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
train
=
True
,
mosaic
=
mosaic
)
gen_val
=
Generator
(
Batch_size
,
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
mosaic
=
False
)
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
train
=
False
,
mosaic
=
mosaic
)
epoch_size
=
max
(
1
,
num_train
//
Batch_size
)
epoch_size_val
=
num_val
//
Batch_size
...
...
@@ -242,17 +299,17 @@ if __name__ == "__main__":
lr_scheduler
=
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
0.95
)
if
Use_Data_Loader
:
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
mosaic
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
False
)
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
mosaic
,
is_train
=
True
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
]),
mosaic
=
False
,
is_train
=
False
)
gen
=
DataLoader
(
train_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
gen_val
=
DataLoader
(
val_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
else
:
gen
=
Generator
(
Batch_size
,
lines
[:
num_train
],
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
mosaic
=
mosaic
)
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
train
=
True
,
mosaic
=
mosaic
)
gen_val
=
Generator
(
Batch_size
,
lines
[
num_train
:],
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
mosaic
=
False
)
(
input_shape
[
0
],
input_shape
[
1
])).
generate
(
train
=
False
,
mosaic
=
mosaic
)
epoch_size
=
max
(
1
,
num_train
//
Batch_size
)
epoch_size_val
=
num_val
//
Batch_size
...
...
utils/dataloader.py
浏览文件 @
a60e3df5
...
...
@@ -14,7 +14,7 @@ from nets.yolo_training import Generator
import
cv2
class
YoloDataset
(
Dataset
):
def
__init__
(
self
,
train_lines
,
image_size
,
mosaic
=
True
):
def
__init__
(
self
,
train_lines
,
image_size
,
mosaic
=
True
,
is_train
=
True
):
super
(
YoloDataset
,
self
).
__init__
()
self
.
train_lines
=
train_lines
...
...
@@ -22,6 +22,7 @@ class YoloDataset(Dataset):
self
.
image_size
=
image_size
self
.
mosaic
=
mosaic
self
.
flag
=
True
self
.
is_train
=
is_train
def
__len__
(
self
):
return
self
.
train_batches
...
...
@@ -29,7 +30,7 @@ class YoloDataset(Dataset):
def
rand
(
self
,
a
=
0
,
b
=
1
):
return
np
.
random
.
rand
()
*
(
b
-
a
)
+
a
def
get_random_data
(
self
,
annotation_line
,
input_shape
,
jitter
=
.
3
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
):
def
get_random_data
(
self
,
annotation_line
,
input_shape
,
jitter
=
.
3
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
,
random
=
True
):
"""实时数据增强的随机预处理"""
line
=
annotation_line
.
split
()
image
=
Image
.
open
(
line
[
0
])
...
...
@@ -37,6 +38,35 @@ class YoloDataset(Dataset):
h
,
w
=
input_shape
box
=
np
.
array
([
np
.
array
(
list
(
map
(
int
,
box
.
split
(
','
))))
for
box
in
line
[
1
:]])
if
not
random
:
scale
=
min
(
w
/
iw
,
h
/
ih
)
nw
=
int
(
iw
*
scale
)
nh
=
int
(
ih
*
scale
)
dx
=
(
w
-
nw
)
//
2
dy
=
(
h
-
nh
)
//
2
image
=
image
.
resize
((
nw
,
nh
),
Image
.
BICUBIC
)
new_image
=
Image
.
new
(
'RGB'
,
(
w
,
h
),
(
128
,
128
,
128
))
new_image
.
paste
(
image
,
(
dx
,
dy
))
image_data
=
np
.
array
(
new_image
,
np
.
float32
)
# 调整目标框坐标
box_data
=
np
.
zeros
((
len
(
box
),
5
))
if
len
(
box
)
>
0
:
np
.
random
.
shuffle
(
box
)
box
[:,
[
0
,
2
]]
=
box
[:,
[
0
,
2
]]
*
nw
/
iw
+
dx
box
[:,
[
1
,
3
]]
=
box
[:,
[
1
,
3
]]
*
nh
/
ih
+
dy
box
[:,
0
:
2
][
box
[:,
0
:
2
]
<
0
]
=
0
box
[:,
2
][
box
[:,
2
]
>
w
]
=
w
box
[:,
3
][
box
[:,
3
]
>
h
]
=
h
box_w
=
box
[:,
2
]
-
box
[:,
0
]
box_h
=
box
[:,
3
]
-
box
[:,
1
]
box
=
box
[
np
.
logical_and
(
box_w
>
1
,
box_h
>
1
)]
# 保留有效框
box_data
=
np
.
zeros
((
len
(
box
),
5
))
box_data
[:
len
(
box
)]
=
box
return
image_data
,
box_data
# 调整图片大小
new_ar
=
w
/
h
*
self
.
rand
(
1
-
jitter
,
1
+
jitter
)
/
self
.
rand
(
1
-
jitter
,
1
+
jitter
)
scale
=
self
.
rand
(.
25
,
2
)
...
...
@@ -92,13 +122,8 @@ class YoloDataset(Dataset):
box
=
box
[
np
.
logical_and
(
box_w
>
1
,
box_h
>
1
)]
# 保留有效框
box_data
=
np
.
zeros
((
len
(
box
),
5
))
box_data
[:
len
(
box
)]
=
box
if
len
(
box
)
==
0
:
return
image_data
,
[]
if
(
box_data
[:,
:
4
]
>
0
).
any
():
return
image_data
,
box_data
else
:
return
image_data
,
[]
return
image_data
,
box_data
def
get_random_data_with_Mosaic
(
self
,
annotation_line
,
input_shape
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
):
h
,
w
=
input_shape
...
...
@@ -197,12 +222,7 @@ class YoloDataset(Dataset):
# 对框进行进一步的处理
new_boxes
=
np
.
array
(
merge_bboxes
(
box_datas
,
cutx
,
cuty
))
if
len
(
new_boxes
)
==
0
:
return
new_image
,
[]
if
(
new_boxes
[:,
:
4
]
>
0
).
any
():
return
new_image
,
new_boxes
else
:
return
new_image
,
[]
return
new_image
,
new_boxes
def
__getitem__
(
self
,
index
):
lines
=
self
.
train_lines
...
...
@@ -212,10 +232,10 @@ class YoloDataset(Dataset):
if
self
.
flag
and
(
index
+
4
)
<
n
:
img
,
y
=
self
.
get_random_data_with_Mosaic
(
lines
[
index
:
index
+
4
],
self
.
image_size
[
0
:
2
])
else
:
img
,
y
=
self
.
get_random_data
(
lines
[
index
],
self
.
image_size
[
0
:
2
])
img
,
y
=
self
.
get_random_data
(
lines
[
index
],
self
.
image_size
[
0
:
2
]
,
random
=
self
.
is_train
)
self
.
flag
=
bool
(
1
-
self
.
flag
)
else
:
img
,
y
=
self
.
get_random_data
(
lines
[
index
],
self
.
image_size
[
0
:
2
])
img
,
y
=
self
.
get_random_data
(
lines
[
index
],
self
.
image_size
[
0
:
2
]
,
random
=
self
.
is_train
)
if
len
(
y
)
!=
0
:
# 从坐标转换成0~1的百分比
...
...
utils/utils.py
浏览文件 @
a60e3df5
from
__future__
import
division
import
os
import
math
import
os
import
time
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
torch.autograd
import
Variable
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
torch.autograd
import
Variable
from
torchvision.ops
import
nms
class
DecodeBox
(
nn
.
Module
):
def
__init__
(
self
,
anchors
,
num_classes
,
img_size
):
super
(
DecodeBox
,
self
).
__init__
()
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
#-----------------------------------------------------------#
self
.
anchors
=
anchors
self
.
num_anchors
=
len
(
anchors
)
self
.
num_classes
=
num_classes
...
...
@@ -21,26 +28,33 @@ class DecodeBox(nn.Module):
self
.
img_size
=
img_size
def
forward
(
self
,
input
):
# input为bs,3*(1+4+num_classes),13,13
# 一共多少张图片
#-----------------------------------------------#
# 输入的input一共有三个,他们的shape分别是
# batch_size, 255, 13, 13
# batch_size, 255, 26, 26
# batch_size, 255, 52, 52
#-----------------------------------------------#
batch_size
=
input
.
size
(
0
)
# 13,13
input_height
=
input
.
size
(
2
)
input_width
=
input
.
size
(
3
)
#
计算步长
#
每一个特征点对应原来的图片上多少个像素点
#
如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点
#
416/13 = 32
#
-----------------------------------------------#
#
输入为416x416时
#
stride_h = stride_w = 32、16、8
#
-----------------------------------------------#
stride_h
=
self
.
img_size
[
1
]
/
input_height
stride_w
=
self
.
img_size
[
0
]
/
input_width
#
把先验框的尺寸调整成特征层大小的形式
#
计算出先验框在特征层上对应的宽高
#-------------------------------------------------#
#
此时获得的scaled_anchors大小是相对于特征层的
#
-------------------------------------------------#
scaled_anchors
=
[(
anchor_width
/
stride_w
,
anchor_height
/
stride_h
)
for
anchor_width
,
anchor_height
in
self
.
anchors
]
# bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes)
#-----------------------------------------------#
# 输入的input一共有三个,他们的shape分别是
# batch_size, 3, 13, 13, 85
# batch_size, 3, 26, 26, 85
# batch_size, 3, 52, 52, 85
#-----------------------------------------------#
prediction
=
input
.
view
(
batch_size
,
self
.
num_anchors
,
self
.
bbox_attrs
,
input_height
,
input_width
).
permute
(
0
,
1
,
3
,
4
,
2
).
contiguous
()
...
...
@@ -48,30 +62,39 @@ class DecodeBox(nn.Module):
x
=
torch
.
sigmoid
(
prediction
[...,
0
])
y
=
torch
.
sigmoid
(
prediction
[...,
1
])
# 先验框的宽高调整参数
w
=
prediction
[...,
2
]
# Width
h
=
prediction
[...,
3
]
# Height
w
=
prediction
[...,
2
]
h
=
prediction
[...,
3
]
# 获得置信度,是否有物体
conf
=
torch
.
sigmoid
(
prediction
[...,
4
])
# 种类置信度
pred_cls
=
torch
.
sigmoid
(
prediction
[...,
5
:])
# Cls pred.
pred_cls
=
torch
.
sigmoid
(
prediction
[...,
5
:])
FloatTensor
=
torch
.
cuda
.
FloatTensor
if
x
.
is_cuda
else
torch
.
FloatTensor
LongTensor
=
torch
.
cuda
.
LongTensor
if
x
.
is_cuda
else
torch
.
LongTensor
# 生成网格,先验框中心,网格左上角 batch_size,3,13,13
#----------------------------------------------------------#
# 生成网格,先验框中心,网格左上角
# batch_size,3,13,13
#----------------------------------------------------------#
grid_x
=
torch
.
linspace
(
0
,
input_width
-
1
,
input_width
).
repeat
(
input_height
,
1
).
repeat
(
batch_size
*
self
.
num_anchors
,
1
,
1
).
view
(
x
.
shape
).
type
(
FloatTensor
)
grid_y
=
torch
.
linspace
(
0
,
input_height
-
1
,
input_height
).
repeat
(
input_width
,
1
).
t
().
repeat
(
batch_size
*
self
.
num_anchors
,
1
,
1
).
view
(
y
.
shape
).
type
(
FloatTensor
)
# 生成先验框的宽高
#----------------------------------------------------------#
# 按照网格格式生成先验框的宽高
# batch_size,3,13,13
#----------------------------------------------------------#
anchor_w
=
FloatTensor
(
scaled_anchors
).
index_select
(
1
,
LongTensor
([
0
]))
anchor_h
=
FloatTensor
(
scaled_anchors
).
index_select
(
1
,
LongTensor
([
1
]))
anchor_w
=
anchor_w
.
repeat
(
batch_size
,
1
).
repeat
(
1
,
1
,
input_height
*
input_width
).
view
(
w
.
shape
)
anchor_h
=
anchor_h
.
repeat
(
batch_size
,
1
).
repeat
(
1
,
1
,
input_height
*
input_width
).
view
(
h
.
shape
)
# 计算调整后的先验框中心与宽高
#----------------------------------------------------------#
# 利用预测结果对先验框进行调整
# 首先调整先验框的中心,从先验框中心向右下角偏移
# 再调整先验框的宽高。
#----------------------------------------------------------#
pred_boxes
=
FloatTensor
(
prediction
[...,
:
4
].
shape
)
pred_boxes
[...,
0
]
=
x
.
data
+
grid_x
pred_boxes
[...,
1
]
=
y
.
data
+
grid_y
...
...
@@ -127,7 +150,10 @@ class DecodeBox(nn.Module):
# ax.add_patch(rect3)
# plt.show()
# 用于将输出调整为相对于416x416的大小
#----------------------------------------------------------#
# 将输出结果调整成相对于输入图像大小
#----------------------------------------------------------#
_scale
=
torch
.
Tensor
([
stride_w
,
stride_h
]
*
2
).
type
(
FloatTensor
)
output
=
torch
.
cat
((
pred_boxes
.
view
(
batch_size
,
-
1
,
4
)
*
_scale
,
conf
.
view
(
batch_size
,
-
1
,
1
),
pred_cls
.
view
(
batch_size
,
-
1
,
self
.
num_classes
)),
-
1
)
...
...
@@ -198,7 +224,10 @@ def bbox_iou(box1, box2, x1y1x2y2=True):
def
non_max_suppression
(
prediction
,
num_classes
,
conf_thres
=
0.5
,
nms_thres
=
0.4
):
# 求左上角和右下角
#----------------------------------------------------------#
# 将预测结果的格式转换成左上角右下角的格式。
# prediction [batch_size, num_anchors, 85]
#----------------------------------------------------------#
box_corner
=
prediction
.
new
(
prediction
.
shape
)
box_corner
[:,
:,
0
]
=
prediction
[:,
:,
0
]
-
prediction
[:,
:,
2
]
/
2
box_corner
[:,
:,
1
]
=
prediction
[:,
:,
1
]
-
prediction
[:,
:,
3
]
/
2
...
...
@@ -208,21 +237,35 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
output
=
[
None
for
_
in
range
(
len
(
prediction
))]
for
image_i
,
image_pred
in
enumerate
(
prediction
):
# 获得种类及其置信度
#----------------------------------------------------------#
# 对种类预测部分取max。
# class_conf [batch_size, num_anchors, 1] 种类置信度
# class_pred [batch_size, num_anchors, 1] 种类
#----------------------------------------------------------#
class_conf
,
class_pred
=
torch
.
max
(
image_pred
[:,
5
:
5
+
num_classes
],
1
,
keepdim
=
True
)
# 利用置信度进行第一轮筛选
conf_mask
=
(
image_pred
[:,
4
]
*
class_conf
[:,
0
]
>=
conf_thres
).
squeeze
()
#----------------------------------------------------------#
# 利用置信度进行第一轮筛选
#----------------------------------------------------------#
conf_mask
=
(
image_pred
[:,
4
]
*
class_conf
[:,
0
]
>=
conf_thres
).
squeeze
()
#----------------------------------------------------------#
# 根据置信度进行预测结果的筛选
#----------------------------------------------------------#
image_pred
=
image_pred
[
conf_mask
]
class_conf
=
class_conf
[
conf_mask
]
class_pred
=
class_pred
[
conf_mask
]
if
not
image_pred
.
size
(
0
):
continue
# 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)
#-------------------------------------------------------------------------#
# detections [batch_size, num_anchors, 7]
# 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred
#-------------------------------------------------------------------------#
detections
=
torch
.
cat
((
image_pred
[:,
:
5
],
class_conf
.
float
(),
class_pred
.
float
()),
1
)
# 获得种类
#------------------------------------------#
# 获得预测结果中包含的所有种类
#------------------------------------------#
unique_labels
=
detections
[:,
-
1
].
cpu
().
unique
()
if
prediction
.
is_cuda
:
...
...
@@ -230,7 +273,9 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
detections
=
detections
.
cuda
()
for
c
in
unique_labels
:
# 获得某一类初步筛选后全部的预测结果
#------------------------------------------#
# 获得某一类得分筛选后全部的预测结果
#------------------------------------------#
detections_class
=
detections
[
detections
[:,
-
1
]
==
c
]
#------------------------------------------#
...
...
@@ -238,7 +283,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
#------------------------------------------#
keep
=
nms
(
detections_class
[:,
:
4
],
detections_class
[:,
4
]
*
detections_class
[:,
5
],
detections_class
[:,
4
]
*
detections_class
[:,
5
],
nms_thres
)
max_detections
=
detections_class
[
keep
]
...
...
@@ -264,6 +309,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
return
output
def
merge_bboxes
(
bboxes
,
cutx
,
cuty
):
merge_bbox
=
[]
for
i
in
range
(
len
(
bboxes
)):
...
...
video.py
浏览文件 @
a60e3df5
#-------------------------------------#
# 调用摄像头检测
# 调用摄像头或者视频进行检测
# 调用摄像头直接运行即可
# 调用视频可以将cv2.VideoCapture()指定路径
# 视频的保存并不难,可以百度一下看看
#-------------------------------------#
from
yolo
import
YOLO
from
PIL
import
Image
import
numpy
as
np
import
cv2
import
time
yolo
=
YOLO
()
# 调用摄像头
capture
=
cv2
.
VideoCapture
(
0
)
# capture=cv2.VideoCapture("1.mp4")
import
cv2
import
numpy
as
np
from
PIL
import
Image
from
yolo
import
YOLO
yolo
=
YOLO
()
#-------------------------------------#
# 调用摄像头
# capture=cv2.VideoCapture("1.mp4")
#-------------------------------------#
capture
=
cv2
.
VideoCapture
(
0
)
fps
=
0.0
while
(
True
):
t1
=
time
.
time
()
...
...
@@ -19,10 +27,8 @@ while(True):
frame
=
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_BGR2RGB
)
# 转变成Image
frame
=
Image
.
fromarray
(
np
.
uint8
(
frame
))
# 进行检测
frame
=
np
.
array
(
yolo
.
detect_image
(
frame
))
# RGBtoBGR满足opencv显示格式
frame
=
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_RGB2BGR
)
...
...
@@ -32,7 +38,6 @@ while(True):
cv2
.
imshow
(
"video"
,
frame
)
c
=
cv2
.
waitKey
(
1
)
&
0xff
if
c
==
27
:
capture
.
release
()
...
...
voc_annotation.py
浏览文件 @
a60e3df5
#---------------------------------------------#
# 运行前一定要修改classes
# 如果生成的2007_train.txt里面没有目标信息
# 那么就是因为classes没有设定正确
#---------------------------------------------#
import
xml.etree.ElementTree
as
ET
from
os
import
getcwd
...
...
yolo.py
浏览文件 @
a60e3df5
#-------------------------------------#
# 创建YOLO类
#-------------------------------------#
import
cv2
import
numpy
as
np
import
colorsys
import
os
import
cv2
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
nets.yolo4
import
YoloBody
import
torch.backends.cudnn
as
cudnn
from
PIL
import
Image
,
ImageFont
,
ImageDraw
import
torch.nn
as
nn
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
torch.autograd
import
Variable
from
utils.utils
import
non_max_suppression
,
bbox_iou
,
DecodeBox
,
letterbox_image
,
yolo_correct_boxes
from
nets.yolo4
import
YoloBody
from
utils.utils
import
(
DecodeBox
,
bbox_iou
,
letterbox_image
,
non_max_suppression
,
yolo_correct_boxes
)
#--------------------------------------------#
# 使用自己训练好的模型预测需要修改2个参数
# model_path和classes_path都需要修改!
# 如果出现shape不匹配,一定要注意
# 训练时的model_path和classes_path参数的修改
#--------------------------------------------#
class
YOLO
(
object
):
_defaults
=
{
"model_path"
:
'model_data/yolo4_weights.pth'
,
"anchors_path"
:
'model_data/yolo_anchors.txt'
,
"classes_path"
:
'model_data/coco_classes.txt'
,
"model_image_size"
:
(
416
,
416
,
3
),
"confidence"
:
0.5
,
"iou"
:
0.3
,
"cuda"
:
True
"model_path"
:
'model_data/yolo4_weights.pth'
,
"anchors_path"
:
'model_data/yolo_anchors.txt'
,
"classes_path"
:
'model_data/coco_classes.txt'
,
"model_image_size"
:
(
416
,
416
,
3
),
"confidence"
:
0.5
,
"iou"
:
0.3
,
"cuda"
:
True
}
@
classmethod
...
...
@@ -43,6 +49,7 @@ class YOLO(object):
self
.
class_names
=
self
.
_get_class
()
self
.
anchors
=
self
.
_get_anchors
()
self
.
generate
()
#---------------------------------------------------#
# 获得所有的分类
#---------------------------------------------------#
...
...
@@ -64,25 +71,31 @@ class YOLO(object):
return
np
.
array
(
anchors
).
reshape
([
-
1
,
3
,
2
])[::
-
1
,:,:]
#---------------------------------------------------#
#
获得所有的分类
#
生成模型
#---------------------------------------------------#
def
generate
(
self
):
self
.
net
=
YoloBody
(
len
(
self
.
anchors
[
0
]),
len
(
self
.
class_names
)).
eval
()
# 加快模型训练的效率
#---------------------------------------------------#
# 建立yolov4模型
#---------------------------------------------------#
self
.
net
=
YoloBody
(
len
(
self
.
anchors
[
0
]),
len
(
self
.
class_names
)).
eval
()
#---------------------------------------------------#
# 载入yolov4模型的权重
#---------------------------------------------------#
print
(
'Loading weights into state dict...'
)
device
=
torch
.
device
(
'cuda'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
state_dict
=
torch
.
load
(
self
.
model_path
,
map_location
=
device
)
self
.
net
.
load_state_dict
(
state_dict
)
print
(
'Finished!'
)
if
self
.
cuda
:
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
'0'
self
.
net
=
nn
.
DataParallel
(
self
.
net
)
self
.
net
=
self
.
net
.
cuda
()
print
(
'Finished!'
)
#---------------------------------------------------#
# 建立三个特征层解码用的工具
#---------------------------------------------------#
self
.
yolo_decodes
=
[]
for
i
in
range
(
3
):
self
.
yolo_decodes
.
append
(
DecodeBox
(
self
.
anchors
[
i
],
len
(
self
.
class_names
),
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
...
...
@@ -103,45 +116,65 @@ class YOLO(object):
def
detect_image
(
self
,
image
):
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img
=
np
.
array
(
letterbox_image
(
image
,
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
photo
/=
255.0
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
/
255.0
photo
=
np
.
transpose
(
photo
,
(
2
,
0
,
1
))
photo
=
photo
.
astype
(
np
.
float32
)
images
=
[]
images
.
append
(
photo
)
images
=
np
.
asarray
(
images
)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images
=
[
photo
]
with
torch
.
no_grad
():
images
=
torch
.
from_numpy
(
images
)
images
=
torch
.
from_numpy
(
np
.
asarray
(
images
)
)
if
self
.
cuda
:
images
=
images
.
cuda
()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs
=
self
.
net
(
images
)
output_list
=
[]
for
i
in
range
(
3
):
output_list
.
append
(
self
.
yolo_decodes
[
i
](
outputs
[
i
]))
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output
=
torch
.
cat
(
output_list
,
1
)
batch_detections
=
non_max_suppression
(
output
,
len
(
self
.
class_names
),
conf_thres
=
self
.
confidence
,
nms_thres
=
self
.
iou
)
#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
#---------------------------------------------------------#
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
output_list
=
[]
for
i
in
range
(
3
):
output_list
.
append
(
self
.
yolo_decodes
[
i
](
outputs
[
i
]))
output
=
torch
.
cat
(
output_list
,
1
)
batch_detections
=
non_max_suppression
(
output
,
len
(
self
.
class_names
),
conf_thres
=
self
.
confidence
,
nms_thres
=
self
.
iou
)
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
# 去掉灰条
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
font
=
ImageFont
.
truetype
(
font
=
'model_data/simhei.ttf'
,
size
=
np
.
floor
(
3e-2
*
np
.
shape
(
image
)[
1
]
+
0.5
).
astype
(
'int32'
))
thickness
=
(
np
.
shape
(
image
)[
0
]
+
np
.
shape
(
image
)[
1
])
//
self
.
model_image_size
[
0
]
thickness
=
max
((
np
.
shape
(
image
)[
0
]
+
np
.
shape
(
image
)[
1
])
//
self
.
model_image_size
[
0
],
1
)
for
i
,
c
in
enumerate
(
top_label
):
predicted_class
=
self
.
class_names
[
c
]
...
...
@@ -163,7 +196,7 @@ class YOLO(object):
draw
=
ImageDraw
.
Draw
(
image
)
label_size
=
draw
.
textsize
(
label
,
font
)
label
=
label
.
encode
(
'utf-8'
)
print
(
label
)
print
(
label
,
top
,
left
,
bottom
,
right
)
if
top
-
label_size
[
1
]
>=
0
:
text_origin
=
np
.
array
([
left
,
top
-
label_size
[
1
]])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录