Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
码农StayUp
yolov7-obb
提交
cdc66baa
Y
yolov7-obb
项目概览
码农StayUp
/
yolov7-obb
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Y
yolov7-obb
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
cdc66baa
编写于
7月 09, 2022
作者:
Bubbliiiing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update a lot
上级
0b930c81
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
523 addition
and
343 deletion
+523
-343
nets/backbone.py
nets/backbone.py
+25
-23
nets/yolo.py
nets/yolo.py
+55
-48
nets/yolo_training.py
nets/yolo_training.py
+411
-260
summary.py
summary.py
+2
-1
train.py
train.py
+9
-3
utils/dataloader.py
utils/dataloader.py
+8
-1
utils/utils.py
utils/utils.py
+3
-3
utils/utils_bbox.py
utils/utils_bbox.py
+3
-3
yolo.py
yolo.py
+7
-1
未找到文件。
nets/
CSPdarknet
.py
→
nets/
backbone
.py
浏览文件 @
cdc66baa
...
...
@@ -25,10 +25,10 @@ class Conv(nn.Module):
def
fuseforward
(
self
,
x
):
return
self
.
act
(
self
.
conv
(
x
))
class
RCSPDark_
Block
(
nn
.
Module
):
def
__init__
(
self
,
c1
,
c2
,
c3
,
n
=
4
,
e
=
0.5
,
ids
=
[
0
]):
super
(
RCSPDark_
Block
,
self
).
__init__
()
c_
=
int
(
c
1
*
e
)
class
Block
(
nn
.
Module
):
def
__init__
(
self
,
c1
,
c2
,
c3
,
n
=
4
,
e
=
1
,
ids
=
[
0
]):
super
(
Block
,
self
).
__init__
()
c_
=
int
(
c
2
*
e
)
self
.
ids
=
ids
self
.
cv1
=
Conv
(
c1
,
c_
,
1
,
1
)
...
...
@@ -58,9 +58,9 @@ class MP(nn.Module):
def
forward
(
self
,
x
):
return
self
.
m
(
x
)
class
RCSPDark_
Transition
(
nn
.
Module
):
class
Transition
(
nn
.
Module
):
def
__init__
(
self
,
c1
,
c2
):
super
(
RCSPDark_
Transition
,
self
).
__init__
()
super
(
Transition
,
self
).
__init__
()
self
.
cv1
=
Conv
(
c1
,
c2
,
1
,
1
)
self
.
cv2
=
Conv
(
c1
,
c2
,
1
,
1
)
self
.
cv3
=
Conv
(
c2
,
c2
,
3
,
2
)
...
...
@@ -76,40 +76,42 @@ class RCSPDark_Transition(nn.Module):
return
torch
.
cat
([
x_2
,
x_1
],
1
)
class
CSPDarknet
(
nn
.
Module
):
def
__init__
(
self
,
base_channels
,
pretrained
=
False
):
class
Backbone
(
nn
.
Module
):
def
__init__
(
self
,
transition_channels
,
block_channels
,
n
,
phi
,
pretrained
=
False
):
super
().
__init__
()
#-----------------------------------------------#
# 输入图片是640, 640, 3
# 初始的基本通道是64
#-----------------------------------------------#
ids
=
{
'l'
:
[
-
1
,
-
3
,
-
5
,
-
6
],
'x'
:
[
-
1
,
-
3
,
-
5
,
-
7
,
-
8
],
}[
phi
]
self
.
stem
=
nn
.
Sequential
(
Conv
(
3
,
base
_channels
,
3
,
1
),
Conv
(
base_channels
,
base
_channels
*
2
,
3
,
2
),
Conv
(
base_channels
*
2
,
base
_channels
*
2
,
3
,
1
),
Conv
(
3
,
transition
_channels
,
3
,
1
),
Conv
(
transition_channels
,
transition
_channels
*
2
,
3
,
2
),
Conv
(
transition_channels
*
2
,
transition
_channels
*
2
,
3
,
1
),
)
self
.
dark2
=
nn
.
Sequential
(
Conv
(
base_channels
*
2
,
base
_channels
*
4
,
3
,
2
),
RCSPDark_Block
(
base_channels
*
4
,
base_channels
*
2
,
base_channels
*
8
,
ids
=
[
-
1
,
-
3
,
-
5
,
-
6
]
),
Conv
(
transition_channels
*
2
,
transition
_channels
*
4
,
3
,
2
),
Block
(
transition_channels
*
4
,
block_channels
*
2
,
transition_channels
*
8
,
n
=
n
,
ids
=
ids
),
)
self
.
dark3
=
nn
.
Sequential
(
RCSPDark_Transition
(
base_channels
*
8
,
base
_channels
*
4
),
RCSPDark_Block
(
base_channels
*
8
,
base_channels
*
4
,
base_channels
*
16
,
ids
=
[
-
1
,
-
3
,
-
5
,
-
6
]
),
Transition
(
transition_channels
*
8
,
transition
_channels
*
4
),
Block
(
transition_channels
*
8
,
block_channels
*
4
,
transition_channels
*
16
,
n
=
n
,
ids
=
ids
),
)
self
.
dark4
=
nn
.
Sequential
(
RCSPDark_Transition
(
base_channels
*
16
,
base
_channels
*
8
),
RCSPDark_Block
(
base_channels
*
16
,
base_channels
*
8
,
base_channels
*
32
,
ids
=
[
-
1
,
-
3
,
-
5
,
-
6
]
),
Transition
(
transition_channels
*
16
,
transition
_channels
*
8
),
Block
(
transition_channels
*
16
,
block_channels
*
8
,
transition_channels
*
32
,
n
=
n
,
ids
=
ids
),
)
self
.
dark5
=
nn
.
Sequential
(
RCSPDark_Transition
(
base_channels
*
32
,
base
_channels
*
16
),
RCSPDark_Block
(
base_channels
*
32
,
base_channels
*
8
,
base_channels
*
32
,
e
=
1
/
4
,
ids
=
[
-
1
,
-
3
,
-
5
,
-
6
]
),
Transition
(
transition_channels
*
32
,
transition
_channels
*
16
),
Block
(
transition_channels
*
32
,
block_channels
*
8
,
transition_channels
*
32
,
n
=
n
,
ids
=
ids
),
)
if
pretrained
:
phi
=
'l'
url
=
{
"l"
:
'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/cspdarknet_backbone.pth'
,
"l"
:
'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/yolov7_backbone.pth'
,
"x"
:
'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/yolov7_x_backbone.pth'
,
}[
phi
]
checkpoint
=
torch
.
hub
.
load_state_dict_from_url
(
url
=
url
,
map_location
=
"cpu"
,
model_dir
=
"./model_data"
)
self
.
load_state_dict
(
checkpoint
,
strict
=
False
)
...
...
nets/yolo.py
浏览文件 @
cdc66baa
...
...
@@ -2,8 +2,7 @@ import numpy as np
import
torch
import
torch.nn
as
nn
from
nets.CSPdarknet
import
(
Conv
,
CSPDarknet
,
RCSPDark_Block
,
RCSPDark_Transition
,
SiLU
,
autopad
)
from
nets.backbone
import
Backbone
,
Block
,
Conv
,
SiLU
,
Transition
,
autopad
class
SPPCSPC
(
nn
.
Module
):
...
...
@@ -211,47 +210,55 @@ def fuse_conv_and_bn(conv, bn):
# yolo_body
#---------------------------------------------------#
class
YoloBody
(
nn
.
Module
):
def
__init__
(
self
,
anchors_mask
,
num_classes
,
pretrained
=
False
):
def
__init__
(
self
,
anchors_mask
,
num_classes
,
p
hi
,
p
retrained
=
False
):
super
(
YoloBody
,
self
).
__init__
()
base_channels
=
32
#-----------------------------------------------#
# 定义了不同yolov7版本的参数
#-----------------------------------------------#
transition_channels
=
{
'l'
:
32
,
'x'
:
40
}[
phi
]
block_channels
=
32
panet_channels
=
{
'l'
:
32
,
'x'
:
64
}[
phi
]
e
=
{
'l'
:
2
,
'x'
:
1
}[
phi
]
n
=
{
'l'
:
4
,
'x'
:
6
}[
phi
]
ids
=
{
'l'
:
[
-
1
,
-
2
,
-
3
,
-
4
,
-
5
,
-
6
],
'x'
:
[
-
1
,
-
3
,
-
5
,
-
7
,
-
8
]}[
phi
]
conv
=
{
'l'
:
RepConv
,
'x'
:
Conv
}[
phi
]
#-----------------------------------------------#
# 输入图片是640, 640, 3
# 初始的基本通道是64
#-----------------------------------------------#
#---------------------------------------------------#
# 生成主干模型
# 获得三个有效特征层,他们的shape分别是:
# 80,
80,
512
# 40,
40,
1024
# 20,
20,
1024
# 80,
80,
512
# 40,
40,
1024
# 20,
20,
1024
#---------------------------------------------------#
self
.
backbone
=
CSPDarknet
(
base_channels
,
pretrained
=
pretrained
)
self
.
backbone
=
Backbone
(
transition_channels
,
block_channels
,
n
,
phi
,
pretrained
=
pretrained
)
self
.
upsample
=
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
"nearest"
)
self
.
sppcspc
=
SPPCSPC
(
base_channels
*
32
,
base
_channels
*
16
)
self
.
conv_for_P5
=
Conv
(
base_channels
*
16
,
base
_channels
*
8
)
self
.
conv_for_feat2
=
Conv
(
base_channels
*
32
,
base
_channels
*
8
)
self
.
conv3_for_upsample1
=
RCSPDark_Block
(
base_channels
*
16
,
base_channels
*
4
,
base_channels
*
8
,
ids
=
[
-
1
,
-
2
,
-
3
,
-
4
,
-
5
,
-
6
]
)
self
.
sppcspc
=
SPPCSPC
(
transition_channels
*
32
,
transition
_channels
*
16
)
self
.
conv_for_P5
=
Conv
(
transition_channels
*
16
,
transition
_channels
*
8
)
self
.
conv_for_feat2
=
Conv
(
transition_channels
*
32
,
transition
_channels
*
8
)
self
.
conv3_for_upsample1
=
Block
(
transition_channels
*
16
,
panet_channels
*
4
,
transition_channels
*
8
,
e
=
e
,
n
=
n
,
ids
=
ids
)
self
.
conv_for_P4
=
Conv
(
base_channels
*
8
,
base
_channels
*
4
)
self
.
conv_for_feat1
=
Conv
(
base_channels
*
16
,
base
_channels
*
4
)
self
.
conv3_for_upsample2
=
RCSPDark_Block
(
base_channels
*
8
,
base_channels
*
2
,
base_channels
*
4
,
ids
=
[
-
1
,
-
2
,
-
3
,
-
4
,
-
5
,
-
6
]
)
self
.
conv_for_P4
=
Conv
(
transition_channels
*
8
,
transition
_channels
*
4
)
self
.
conv_for_feat1
=
Conv
(
transition_channels
*
16
,
transition
_channels
*
4
)
self
.
conv3_for_upsample2
=
Block
(
transition_channels
*
8
,
panet_channels
*
2
,
transition_channels
*
4
,
e
=
e
,
n
=
n
,
ids
=
ids
)
self
.
down_sample1
=
RCSPDark_Transition
(
base_channels
*
4
,
base
_channels
*
4
)
self
.
conv3_for_downsample1
=
RCSPDark_Block
(
base_channels
*
16
,
base_channels
*
4
,
base_channels
*
8
,
ids
=
[
-
1
,
-
2
,
-
3
,
-
4
,
-
5
,
-
6
]
)
self
.
down_sample1
=
Transition
(
transition_channels
*
4
,
transition
_channels
*
4
)
self
.
conv3_for_downsample1
=
Block
(
transition_channels
*
16
,
panet_channels
*
4
,
transition_channels
*
8
,
e
=
e
,
n
=
n
,
ids
=
ids
)
self
.
down_sample2
=
RCSPDark_Transition
(
base_channels
*
8
,
base
_channels
*
8
)
self
.
conv3_for_downsample2
=
RCSPDark_Block
(
base_channels
*
32
,
base_channels
*
8
,
base_channels
*
16
,
ids
=
[
-
1
,
-
2
,
-
3
,
-
4
,
-
5
,
-
6
]
)
self
.
down_sample2
=
Transition
(
transition_channels
*
8
,
transition
_channels
*
8
)
self
.
conv3_for_downsample2
=
Block
(
transition_channels
*
32
,
panet_channels
*
8
,
transition_channels
*
16
,
e
=
e
,
n
=
n
,
ids
=
ids
)
self
.
rep_conv_1
=
RepConv
(
base_channels
*
4
,
base
_channels
*
8
,
3
,
1
)
self
.
rep_conv_2
=
RepConv
(
base_channels
*
8
,
base
_channels
*
16
,
3
,
1
)
self
.
rep_conv_3
=
RepConv
(
base_channels
*
16
,
base
_channels
*
32
,
3
,
1
)
self
.
rep_conv_1
=
conv
(
transition_channels
*
4
,
transition
_channels
*
8
,
3
,
1
)
self
.
rep_conv_2
=
conv
(
transition_channels
*
8
,
transition
_channels
*
16
,
3
,
1
)
self
.
rep_conv_3
=
conv
(
transition_channels
*
16
,
transition
_channels
*
32
,
3
,
1
)
self
.
yolo_head_P3
=
nn
.
Conv2d
(
base
_channels
*
8
,
len
(
anchors_mask
[
2
])
*
(
5
+
num_classes
),
1
)
self
.
yolo_head_P4
=
nn
.
Conv2d
(
base
_channels
*
16
,
len
(
anchors_mask
[
1
])
*
(
5
+
num_classes
),
1
)
self
.
yolo_head_P5
=
nn
.
Conv2d
(
base
_channels
*
32
,
len
(
anchors_mask
[
0
])
*
(
5
+
num_classes
),
1
)
self
.
yolo_head_P3
=
nn
.
Conv2d
(
transition
_channels
*
8
,
len
(
anchors_mask
[
2
])
*
(
5
+
num_classes
),
1
)
self
.
yolo_head_P4
=
nn
.
Conv2d
(
transition
_channels
*
16
,
len
(
anchors_mask
[
1
])
*
(
5
+
num_classes
),
1
)
self
.
yolo_head_P5
=
nn
.
Conv2d
(
transition
_channels
*
32
,
len
(
anchors_mask
[
0
])
*
(
5
+
num_classes
),
1
)
def
fuse
(
self
):
print
(
'Fusing layers... '
)
...
...
nets/yolo_training.py
浏览文件 @
cdc66baa
...
...
@@ -8,48 +8,50 @@ import torch.nn as nn
import
torch.nn.functional
as
F
def
box_iou
(
box1
,
box2
):
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
Returns:
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
"""
def
box_area
(
box
):
# box = 4xn
return
(
box
[
2
]
-
box
[
0
])
*
(
box
[
3
]
-
box
[
1
])
area1
=
box_area
(
box1
.
T
)
area2
=
box_area
(
box2
.
T
)
def
smooth_BCE
(
eps
=
0.1
):
# https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
# return positive, negative label smoothing BCE targets
return
1.0
-
0.5
*
eps
,
0.5
*
eps
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
inter
=
(
torch
.
min
(
box1
[:,
None
,
2
:],
box2
[:,
2
:])
-
torch
.
max
(
box1
[:,
None
,
:
2
],
box2
[:,
:
2
])).
clamp
(
0
).
prod
(
2
)
return
inter
/
(
area1
[:,
None
]
+
area2
-
inter
)
# iou = inter / (area1 + area2 - inter)
class
YOLOLoss
(
nn
.
Module
):
def
__init__
(
self
,
anchors
,
num_classes
,
input_shape
,
anchors_mask
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
label_smoothing
=
0
):
super
(
YOLOLoss
,
self
).
__init__
()
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401]
# 26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146]
# 52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28]
#-----------------------------------------------------------#
self
.
anchors
=
[
anchors
[
mask
]
for
mask
in
anchors_mask
]
self
.
num_classes
=
num_classes
self
.
input_shape
=
input_shape
self
.
anchors_mask
=
anchors_mask
def
bbox_iou
(
box1
,
box2
,
x1y1x2y2
=
True
,
GIoU
=
False
,
DIoU
=
False
,
CIoU
=
False
,
eps
=
1e-7
):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
self
.
balance
=
[
0.4
,
1.0
,
4
]
self
.
stride
=
[
32
,
16
,
8
]
self
.
box_ratio
=
0.05
self
.
obj_ratio
=
1
*
(
input_shape
[
0
]
*
input_shape
[
1
])
/
(
640
**
2
)
self
.
cls_ratio
=
0.5
*
(
num_classes
/
80
)
self
.
threshold
=
4
self
.
cp
,
self
.
cn
=
smooth_BCE
(
eps
=
label_smoothing
)
self
.
BCEcls
,
self
.
BCEobj
,
self
.
gr
=
nn
.
BCEWithLogitsLoss
(),
nn
.
BCEWithLogitsLoss
(),
1
def
bbox_iou
(
self
,
box1
,
box2
,
x1y1x2y2
=
True
,
GIoU
=
False
,
DIoU
=
False
,
CIoU
=
False
,
eps
=
1e-7
):
box2
=
box2
.
T
# Get the coordinates of bounding boxes
if
x1y1x2y2
:
# x1, y1, x2, y2 = box1
if
x1y1x2y2
:
b1_x1
,
b1_y1
,
b1_x2
,
b1_y2
=
box1
[
0
],
box1
[
1
],
box1
[
2
],
box1
[
3
]
b2_x1
,
b2_y1
,
b2_x2
,
b2_y2
=
box2
[
0
],
box2
[
1
],
box2
[
2
],
box2
[
3
]
else
:
# transform from xywh to xyxy
else
:
b1_x1
,
b1_x2
=
box1
[
0
]
-
box1
[
2
]
/
2
,
box1
[
0
]
+
box1
[
2
]
/
2
b1_y1
,
b1_y2
=
box1
[
1
]
-
box1
[
3
]
/
2
,
box1
[
1
]
+
box1
[
3
]
/
2
b2_x1
,
b2_x2
=
box2
[
0
]
-
box2
[
2
]
/
2
,
box2
[
0
]
+
box2
[
2
]
/
2
b2_y1
,
b2_y2
=
box2
[
1
]
-
box2
[
3
]
/
2
,
box2
[
1
]
+
box2
[
3
]
/
2
# Intersection area
inter
=
(
torch
.
min
(
b1_x2
,
b2_x2
)
-
torch
.
max
(
b1_x1
,
b2_x1
)).
clamp
(
0
)
*
\
(
torch
.
min
(
b1_y2
,
b2_y2
)
-
torch
.
max
(
b1_y1
,
b2_y1
)).
clamp
(
0
)
# Union Area
w1
,
h1
=
b1_x2
-
b1_x1
,
b1_y2
-
b1_y1
+
eps
w2
,
h2
=
b2_x2
-
b2_x1
,
b2_y2
-
b2_y1
+
eps
union
=
w1
*
h1
+
w2
*
h2
-
inter
+
eps
...
...
@@ -76,118 +78,180 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
else
:
return
iou
# IoU
def
xywh2xyxy
(
x
):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y
=
x
.
clone
()
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
copy
(
x
)
y
[:,
0
]
=
x
[:,
0
]
-
x
[:,
2
]
/
2
# top left x
y
[:,
1
]
=
x
[:,
1
]
-
x
[:,
3
]
/
2
# top left y
y
[:,
2
]
=
x
[:,
0
]
+
x
[:,
2
]
/
2
# bottom right x
y
[:,
3
]
=
x
[:,
1
]
+
x
[:,
3
]
/
2
# bottom right y
return
y
def
smooth_BCE
(
eps
=
0.1
):
# https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
# return positive, negative label smoothing BCE targets
return
1.0
-
0.5
*
eps
,
0.5
*
eps
class
YOLOLoss
(
nn
.
Module
):
def
__init__
(
self
,
anchors
,
num_classes
,
input_shape
,
anchors_mask
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
label_smoothing
=
0
):
super
(
YOLOLoss
,
self
).
__init__
()
#-----------------------------------------------------------#
# 20x20的特征层对应的anchor是[116,90],[156,198],[373,326]
# 40x40的特征层对应的anchor是[30,61],[62,45],[59,119]
# 80x80的特征层对应的anchor是[10,13],[16,30],[33,23]
#-----------------------------------------------------------#
self
.
anchors
=
[
anchors
[
mask
]
for
mask
in
anchors_mask
]
self
.
num_classes
=
num_classes
self
.
input_shape
=
input_shape
self
.
anchors_mask
=
anchors_mask
self
.
balance
=
[
0.4
,
1.0
,
4
]
self
.
stride
=
[
32
,
16
,
8
]
self
.
box_ratio
=
0.05
self
.
obj_ratio
=
1
*
(
input_shape
[
0
]
*
input_shape
[
1
])
/
(
640
**
2
)
self
.
cls_ratio
=
0.5
*
(
num_classes
/
80
)
self
.
threshold
=
4
self
.
cp
,
self
.
cn
=
smooth_BCE
(
eps
=
label_smoothing
)
self
.
BCEcls
,
self
.
BCEobj
,
self
.
gr
=
nn
.
BCEWithLogitsLoss
(),
nn
.
BCEWithLogitsLoss
(),
1
def
__call__
(
self
,
p
,
targets
,
imgs
):
# predictions, targets, model
for
i
in
range
(
len
(
p
)):
bs
,
_
,
h
,
w
=
p
[
i
].
size
()
p
[
i
]
=
p
[
i
].
view
(
bs
,
len
(
self
.
anchors_mask
[
i
]),
-
1
,
h
,
w
).
permute
(
0
,
1
,
3
,
4
,
2
).
contiguous
()
def
__call__
(
self
,
predictions
,
targets
,
imgs
):
#-------------------------------------------#
# 对输入进来的预测结果进行reshape
# bs, 255, 20, 20 => bs, 3, 20, 20, 85
# bs, 255, 40, 40 => bs, 3, 40, 40, 85
# bs, 255, 80, 80 => bs, 3, 80, 80, 85
#-------------------------------------------#
for
i
in
range
(
len
(
predictions
)):
bs
,
_
,
h
,
w
=
predictions
[
i
].
size
()
predictions
[
i
]
=
predictions
[
i
].
view
(
bs
,
len
(
self
.
anchors_mask
[
i
]),
-
1
,
h
,
w
).
permute
(
0
,
1
,
3
,
4
,
2
).
contiguous
()
#-------------------------------------------#
# 获得工作的设备
#-------------------------------------------#
device
=
targets
.
device
lcls
,
lbox
,
lobj
=
torch
.
zeros
(
1
,
device
=
device
),
torch
.
zeros
(
1
,
device
=
device
),
torch
.
zeros
(
1
,
device
=
device
)
bs
,
as_
,
gjs
,
gis
,
targets
,
anchors
=
self
.
build_targets
(
p
,
targets
,
imgs
)
pre_gen_gains
=
[
torch
.
tensor
(
pp
.
shape
,
device
=
device
)[[
3
,
2
,
3
,
2
]].
type_as
(
pp
)
for
pp
in
p
]
# Losses
for
i
,
pi
in
enumerate
(
p
):
# layer index, layer predictions
b
,
a
,
gj
,
gi
=
bs
[
i
],
as_
[
i
],
gjs
[
i
],
gis
[
i
]
# image, anchor, gridy, gridx
tobj
=
torch
.
zeros_like
(
pi
[...,
0
],
device
=
device
)
# target obj
n
=
b
.
shape
[
0
]
# number of targets
#-------------------------------------------#
# 初始化三个部分的损失
#-------------------------------------------#
cls_loss
,
box_loss
,
obj_loss
=
torch
.
zeros
(
1
,
device
=
device
),
torch
.
zeros
(
1
,
device
=
device
),
torch
.
zeros
(
1
,
device
=
device
)
#-------------------------------------------#
# 进行正样本的匹配
#-------------------------------------------#
bs
,
as_
,
gjs
,
gis
,
targets
,
anchors
=
self
.
build_targets
(
predictions
,
targets
,
imgs
)
#-------------------------------------------#
# 计算获得对应特征层的高宽
#-------------------------------------------#
feature_map_sizes
=
[
torch
.
tensor
(
prediction
.
shape
,
device
=
device
)[[
3
,
2
,
3
,
2
]].
type_as
(
prediction
)
for
prediction
in
predictions
]
#-------------------------------------------#
# 计算损失,对三个特征层各自进行处理
#-------------------------------------------#
for
i
,
prediction
in
enumerate
(
predictions
):
#-------------------------------------------#
# image, anchor, gridy, gridx
#-------------------------------------------#
b
,
a
,
gj
,
gi
=
bs
[
i
],
as_
[
i
],
gjs
[
i
],
gis
[
i
]
tobj
=
torch
.
zeros_like
(
prediction
[...,
0
],
device
=
device
)
# target obj
#-------------------------------------------#
# 获得目标数量,如果目标大于0
# 则开始计算种类损失和回归损失
#-------------------------------------------#
n
=
b
.
shape
[
0
]
if
n
:
ps
=
pi
[
b
,
a
,
gj
,
gi
]
# prediction subset corresponding to targets
# Regression
prediction_pos
=
prediction
[
b
,
a
,
gj
,
gi
]
# prediction subset corresponding to targets
#-------------------------------------------#
# 计算匹配上的正样本的回归损失
#-------------------------------------------#
#-------------------------------------------#
# grid 获得正样本的x、y轴坐标
#-------------------------------------------#
grid
=
torch
.
stack
([
gi
,
gj
],
dim
=
1
)
pxy
=
ps
[:,
:
2
].
sigmoid
()
*
2.
-
0.5
#pxy = ps[:, :2].sigmoid() * 3. - 1.
pwh
=
(
ps
[:,
2
:
4
].
sigmoid
()
*
2
)
**
2
*
anchors
[
i
]
pbox
=
torch
.
cat
((
pxy
,
pwh
),
1
)
# predicted box
selected_tbox
=
targets
[
i
][:,
2
:
6
]
*
pre_gen_gains
[
i
]
selected_tbox
[:,
:
2
]
-=
grid
.
type_as
(
pi
)
iou
=
bbox_iou
(
pbox
.
T
,
selected_tbox
,
x1y1x2y2
=
False
,
CIoU
=
True
)
# iou(prediction, target)
lbox
+=
(
1.0
-
iou
).
mean
()
# iou loss
# Objectness
#-------------------------------------------#
# 进行解码,获得预测结果
#-------------------------------------------#
xy
=
prediction_pos
[:,
:
2
].
sigmoid
()
*
2.
-
0.5
wh
=
(
prediction_pos
[:,
2
:
4
].
sigmoid
()
*
2
)
**
2
*
anchors
[
i
]
box
=
torch
.
cat
((
xy
,
wh
),
1
)
#-------------------------------------------#
# 对真实框进行处理,映射到特征层上
#-------------------------------------------#
selected_tbox
=
targets
[
i
][:,
2
:
6
]
*
feature_map_sizes
[
i
]
selected_tbox
[:,
:
2
]
-=
grid
.
type_as
(
prediction
)
#-------------------------------------------#
# 计算预测框和真实框的回归损失
#-------------------------------------------#
iou
=
self
.
bbox_iou
(
box
.
T
,
selected_tbox
,
x1y1x2y2
=
False
,
CIoU
=
True
)
box_loss
+=
(
1.0
-
iou
).
mean
()
#-------------------------------------------#
# 根据预测结果的iou获得置信度损失的gt
#-------------------------------------------#
tobj
[
b
,
a
,
gj
,
gi
]
=
(
1.0
-
self
.
gr
)
+
self
.
gr
*
iou
.
detach
().
clamp
(
0
).
type
(
tobj
.
dtype
)
# iou ratio
# Classification
#-------------------------------------------#
# 计算匹配上的正样本的分类损失
#-------------------------------------------#
selected_tcls
=
targets
[
i
][:,
1
].
long
()
if
self
.
num_classes
>
1
:
# cls loss (only if multiple classes)
t
=
torch
.
full_like
(
ps
[:,
5
:],
self
.
cn
,
device
=
device
)
# targets
t
=
torch
.
full_like
(
prediction_pos
[:,
5
:],
self
.
cn
,
device
=
device
)
# targets
t
[
range
(
n
),
selected_tcls
]
=
self
.
cp
lcls
+=
self
.
BCEcls
(
ps
[:,
5
:],
t
)
# BCE
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
obji
=
self
.
BCEobj
(
pi
[...,
4
],
tobj
)
lobj
+=
obji
*
self
.
balance
[
i
]
# obj loss
lbox
*=
self
.
box_ratio
lobj
*=
self
.
obj_ratio
lcls
*=
self
.
cls_ratio
bs
=
tobj
.
shape
[
0
]
# batch size
loss
=
lbox
+
lobj
+
lcls
cls_loss
+=
self
.
BCEcls
(
prediction_pos
[:,
5
:],
t
)
# BCE
#-------------------------------------------#
# 计算目标是否存在的置信度损失
# 并且乘上每个特征层的比例
#-------------------------------------------#
obj_loss
+=
self
.
BCEobj
(
prediction
[...,
4
],
tobj
)
*
self
.
balance
[
i
]
# obj loss
#-------------------------------------------#
# 将各个部分的损失乘上比例
# 全加起来后,乘上batch_size
#-------------------------------------------#
box_loss
*=
self
.
box_ratio
obj_loss
*=
self
.
obj_ratio
cls_loss
*=
self
.
cls_ratio
bs
=
tobj
.
shape
[
0
]
loss
=
box_loss
+
obj_loss
+
cls_loss
return
loss
*
bs
def
build_targets
(
self
,
p
,
targets
,
imgs
):
indices
,
anch
=
self
.
find_3_positive
(
p
,
targets
)
def
xywh2xyxy
(
self
,
x
):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2]
y
=
x
.
clone
()
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
copy
(
x
)
y
[:,
0
]
=
x
[:,
0
]
-
x
[:,
2
]
/
2
# top left x
y
[:,
1
]
=
x
[:,
1
]
-
x
[:,
3
]
/
2
# top left y
y
[:,
2
]
=
x
[:,
0
]
+
x
[:,
2
]
/
2
# bottom right x
y
[:,
3
]
=
x
[:,
1
]
+
x
[:,
3
]
/
2
# bottom right y
return
y
matching_bs
=
[[]
for
pp
in
p
]
matching_as
=
[[]
for
pp
in
p
]
matching_gjs
=
[[]
for
pp
in
p
]
matching_gis
=
[[]
for
pp
in
p
]
matching_targets
=
[[]
for
pp
in
p
]
matching_anchs
=
[[]
for
pp
in
p
]
def
box_iou
(
self
,
box1
,
box2
):
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
Returns:
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
"""
def
box_area
(
box
):
# box = 4xn
return
(
box
[
2
]
-
box
[
0
])
*
(
box
[
3
]
-
box
[
1
])
nl
=
len
(
p
)
area1
=
box_area
(
box1
.
T
)
area2
=
box_area
(
box2
.
T
)
for
batch_idx
in
range
(
p
[
0
].
shape
[
0
]):
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
inter
=
(
torch
.
min
(
box1
[:,
None
,
2
:],
box2
[:,
2
:])
-
torch
.
max
(
box1
[:,
None
,
:
2
],
box2
[:,
:
2
])).
clamp
(
0
).
prod
(
2
)
return
inter
/
(
area1
[:,
None
]
+
area2
-
inter
)
# iou = inter / (area1 + area2 - inter)
def
build_targets
(
self
,
predictions
,
targets
,
imgs
):
#-------------------------------------------#
# 匹配正样本
#-------------------------------------------#
indices
,
anch
=
self
.
find_3_positive
(
predictions
,
targets
)
matching_bs
=
[[]
for
_
in
predictions
]
matching_as
=
[[]
for
_
in
predictions
]
matching_gjs
=
[[]
for
_
in
predictions
]
matching_gis
=
[[]
for
_
in
predictions
]
matching_targets
=
[[]
for
_
in
predictions
]
matching_anchs
=
[[]
for
_
in
predictions
]
#-------------------------------------------#
# 一共三层
#-------------------------------------------#
num_layer
=
len
(
predictions
)
#-------------------------------------------#
# 对batch_size进行循环,进行OTA匹配
# 在batch_size循环中对layer进行循环
#-------------------------------------------#
for
batch_idx
in
range
(
predictions
[
0
].
shape
[
0
]):
#-------------------------------------------#
# 先判断匹配上的真实框哪些属于该图片
#-------------------------------------------#
b_idx
=
targets
[:,
0
]
==
batch_idx
this_target
=
targets
[
b_idx
]
#-------------------------------------------#
# 如果没有真实框属于该图片则continue
#-------------------------------------------#
if
this_target
.
shape
[
0
]
==
0
:
continue
#-------------------------------------------#
# 真实框的坐标进行缩放
#-------------------------------------------#
txywh
=
this_target
[:,
2
:
6
]
*
imgs
[
batch_idx
].
shape
[
1
]
txyxy
=
xywh2xyxy
(
txywh
)
#-------------------------------------------#
# 从中心宽高到左上角右下角
#-------------------------------------------#
txyxy
=
self
.
xywh2xyxy
(
txywh
)
pxyxys
=
[]
p_cls
=
[]
...
...
@@ -199,11 +263,18 @@ class YOLOLoss(nn.Module):
all_gi
=
[]
all_anch
=
[]
for
i
,
pi
in
enumerate
(
p
):
#-------------------------------------------#
# 对三个layer进行循环
#-------------------------------------------#
for
i
,
prediction
in
enumerate
(
predictions
):
#-------------------------------------------#
# b代表第几张图片 a代表第几个先验框
# gj代表y轴,gi代表x轴
#-------------------------------------------#
b
,
a
,
gj
,
gi
=
indices
[
i
]
idx
=
(
b
==
batch_idx
)
b
,
a
,
gj
,
gi
=
b
[
idx
],
a
[
idx
],
gj
[
idx
],
gi
[
idx
]
all_b
.
append
(
b
)
all_a
.
append
(
a
)
all_gj
.
append
(
gj
)
...
...
@@ -211,21 +282,33 @@ class YOLOLoss(nn.Module):
all_anch
.
append
(
anch
[
i
][
idx
])
from_which_layer
.
append
(
torch
.
ones
(
size
=
(
len
(
b
),))
*
i
)
fg_pred
=
pi
[
b
,
a
,
gj
,
gi
]
#-------------------------------------------#
# 取出这个真实框对应的预测结果
#-------------------------------------------#
fg_pred
=
prediction
[
b
,
a
,
gj
,
gi
]
p_obj
.
append
(
fg_pred
[:,
4
:
5
])
p_cls
.
append
(
fg_pred
[:,
5
:])
#-------------------------------------------#
# 获得网格后,进行解码
#-------------------------------------------#
grid
=
torch
.
stack
([
gi
,
gj
],
dim
=
1
).
type_as
(
fg_pred
)
pxy
=
(
fg_pred
[:,
:
2
].
sigmoid
()
*
2.
-
0.5
+
grid
)
*
self
.
stride
[
i
]
#/ 8.
#pxy = (fg_pred[:, :2].sigmoid() * 3. - 1. + grid) * self.stride[i]
pwh
=
(
fg_pred
[:,
2
:
4
].
sigmoid
()
*
2
)
**
2
*
anch
[
i
][
idx
]
*
self
.
stride
[
i
]
#/ 8.
pxy
=
(
fg_pred
[:,
:
2
].
sigmoid
()
*
2.
-
0.5
+
grid
)
*
self
.
stride
[
i
]
pwh
=
(
fg_pred
[:,
2
:
4
].
sigmoid
()
*
2
)
**
2
*
anch
[
i
][
idx
]
*
self
.
stride
[
i
]
pxywh
=
torch
.
cat
([
pxy
,
pwh
],
dim
=-
1
)
pxyxy
=
xywh2xyxy
(
pxywh
)
pxyxy
=
self
.
xywh2xyxy
(
pxywh
)
pxyxys
.
append
(
pxyxy
)
#-------------------------------------------#
# 判断是否存在对应的预测框,不存在则跳过
#-------------------------------------------#
pxyxys
=
torch
.
cat
(
pxyxys
,
dim
=
0
)
if
pxyxys
.
shape
[
0
]
==
0
:
continue
#-------------------------------------------#
# 进行堆叠
#-------------------------------------------#
p_obj
=
torch
.
cat
(
p_obj
,
dim
=
0
)
p_cls
=
torch
.
cat
(
p_cls
,
dim
=
0
)
from_which_layer
=
torch
.
cat
(
from_which_layer
,
dim
=
0
)
...
...
@@ -235,46 +318,62 @@ class YOLOLoss(nn.Module):
all_gi
=
torch
.
cat
(
all_gi
,
dim
=
0
)
all_anch
=
torch
.
cat
(
all_anch
,
dim
=
0
)
pair_wise_iou
=
box_iou
(
txyxy
,
pxyxys
)
#-------------------------------------------------------------#
# 计算当前图片中,真实框与预测框的重合程度
# iou的范围为0-1,取-log后为0~inf
# 重合程度越大,取-log后越小
# 因此,真实框与预测框重合度越大,pair_wise_iou_loss越小
#-------------------------------------------------------------#
pair_wise_iou
=
self
.
box_iou
(
txyxy
,
pxyxys
)
pair_wise_iou_loss
=
-
torch
.
log
(
pair_wise_iou
+
1e-8
)
#-------------------------------------------#
# 最多二十个预测框与真实框的重合程度
# 然后求和,找到每个真实框对应几个预测框
#-------------------------------------------#
top_k
,
_
=
torch
.
topk
(
pair_wise_iou
,
min
(
20
,
pair_wise_iou
.
shape
[
1
]),
dim
=
1
)
dynamic_ks
=
torch
.
clamp
(
top_k
.
sum
(
1
).
int
(),
min
=
1
)
gt_cls_per_image
=
(
F
.
one_hot
(
this_target
[:,
1
].
to
(
torch
.
int64
),
self
.
num_classes
)
.
float
()
.
unsqueeze
(
1
)
.
repeat
(
1
,
pxyxys
.
shape
[
0
],
1
)
)
#-------------------------------------------#
# gt_cls_per_image 种类的真实信息
#-------------------------------------------#
gt_cls_per_image
=
F
.
one_hot
(
this_target
[:,
1
].
to
(
torch
.
int64
),
self
.
num_classes
).
float
().
unsqueeze
(
1
).
repeat
(
1
,
pxyxys
.
shape
[
0
],
1
)
#-------------------------------------------#
# cls_preds_ 种类置信度的预测信息
# cls_preds_越接近于1,y越接近于1
# y / (1 - y)越接近于无穷大
# 也就是种类置信度预测的越准
# pair_wise_cls_loss越小
#-------------------------------------------#
num_gt
=
this_target
.
shape
[
0
]
cls_preds_
=
(
p_cls
.
float
().
unsqueeze
(
0
).
repeat
(
num_gt
,
1
,
1
).
sigmoid_
()
*
p_obj
.
unsqueeze
(
0
).
repeat
(
num_gt
,
1
,
1
).
sigmoid_
()
)
cls_preds_
=
p_cls
.
float
().
unsqueeze
(
0
).
repeat
(
num_gt
,
1
,
1
).
sigmoid_
()
*
p_obj
.
unsqueeze
(
0
).
repeat
(
num_gt
,
1
,
1
).
sigmoid_
()
y
=
cls_preds_
.
sqrt_
()
pair_wise_cls_loss
=
F
.
binary_cross_entropy_with_logits
(
torch
.
log
(
y
/
(
1
-
y
))
,
gt_cls_per_image
,
reduction
=
"none"
).
sum
(
-
1
)
pair_wise_cls_loss
=
F
.
binary_cross_entropy_with_logits
(
torch
.
log
(
y
/
(
1
-
y
)),
gt_cls_per_image
,
reduction
=
"none"
).
sum
(
-
1
)
del
cls_preds_
#-------------------------------------------#
# 求cost的总和
#-------------------------------------------#
cost
=
(
pair_wise_cls_loss
+
3.0
*
pair_wise_iou_loss
)
#-------------------------------------------#
# 求cost最小的k个预测框
#-------------------------------------------#
matching_matrix
=
torch
.
zeros_like
(
cost
)
for
gt_idx
in
range
(
num_gt
):
_
,
pos_idx
=
torch
.
topk
(
cost
[
gt_idx
],
k
=
dynamic_ks
[
gt_idx
].
item
(),
largest
=
False
)
_
,
pos_idx
=
torch
.
topk
(
cost
[
gt_idx
],
k
=
dynamic_ks
[
gt_idx
].
item
(),
largest
=
False
)
matching_matrix
[
gt_idx
][
pos_idx
]
=
1.0
del
top_k
,
dynamic_ks
#-------------------------------------------#
# 如果一个预测框对应多个真实框
# 只使用这个预测框最对应的真实框
#-------------------------------------------#
anchor_matching_gt
=
matching_matrix
.
sum
(
0
)
if
(
anchor_matching_gt
>
1
).
sum
()
>
0
:
_
,
cost_argmin
=
torch
.
min
(
cost
[:,
anchor_matching_gt
>
1
],
dim
=
0
)
...
...
@@ -283,16 +382,18 @@ class YOLOLoss(nn.Module):
fg_mask_inboxes
=
matching_matrix
.
sum
(
0
)
>
0.0
matched_gt_inds
=
matching_matrix
[:,
fg_mask_inboxes
].
argmax
(
0
)
#-------------------------------------------#
# 取出符合条件的框
#-------------------------------------------#
from_which_layer
=
from_which_layer
[
fg_mask_inboxes
]
all_b
=
all_b
[
fg_mask_inboxes
]
all_a
=
all_a
[
fg_mask_inboxes
]
all_gj
=
all_gj
[
fg_mask_inboxes
]
all_gi
=
all_gi
[
fg_mask_inboxes
]
all_anch
=
all_anch
[
fg_mask_inboxes
]
this_target
=
this_target
[
matched_gt_inds
]
for
i
in
range
(
n
l
):
for
i
in
range
(
n
um_layer
):
layer_idx
=
from_which_layer
==
i
matching_bs
[
i
].
append
(
all_b
[
layer_idx
])
matching_as
[
i
].
append
(
all_a
[
layer_idx
])
...
...
@@ -301,7 +402,7 @@ class YOLOLoss(nn.Module):
matching_targets
[
i
].
append
(
this_target
[
layer_idx
])
matching_anchs
[
i
].
append
(
all_anch
[
layer_idx
])
for
i
in
range
(
n
l
):
for
i
in
range
(
n
um_layer
):
matching_bs
[
i
]
=
torch
.
cat
(
matching_bs
[
i
],
dim
=
0
)
matching_as
[
i
]
=
torch
.
cat
(
matching_as
[
i
],
dim
=
0
)
matching_gjs
[
i
]
=
torch
.
cat
(
matching_gjs
[
i
],
dim
=
0
)
...
...
@@ -311,58 +412,108 @@ class YOLOLoss(nn.Module):
return
matching_bs
,
matching_as
,
matching_gjs
,
matching_gis
,
matching_targets
,
matching_anchs
def
find_3_positive
(
self
,
p
,
targets
):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na
,
nt
=
len
(
self
.
anchors_mask
[
0
]),
targets
.
shape
[
0
]
# number of anchors, targets
indices
,
anch
=
[],
[]
gain
=
torch
.
ones
(
7
,
device
=
targets
.
device
)
# normalized to gridspace gain
ai
=
torch
.
arange
(
na
,
device
=
targets
.
device
).
float
().
view
(
na
,
1
).
repeat
(
1
,
nt
)
# same as .repeat_interleave(nt)
targets
=
torch
.
cat
((
targets
.
repeat
(
na
,
1
,
1
),
ai
[:,
:,
None
]),
2
)
# append anchor indices
g
=
0.5
# bias
off
=
torch
.
tensor
([[
0
,
0
],
def
find_3_positive
(
self
,
predictions
,
targets
):
#------------------------------------#
# 获得每个特征层先验框的数量
# 与真实框的数量
#------------------------------------#
num_anchor
,
num_gt
=
len
(
self
.
anchors_mask
[
0
]),
targets
.
shape
[
0
]
#------------------------------------#
# 创建空列表存放indices和anchors
#------------------------------------#
indices
,
anchors
=
[],
[]
#------------------------------------#
# 创建7个1
# 序号0,1为1
# 序号2:6为特征层的高宽
# 序号6为1
#------------------------------------#
gain
=
torch
.
ones
(
7
,
device
=
targets
.
device
)
#------------------------------------#
# ai [num_anchor, num_gt]
# targets [num_gt, 6] => [num_anchor, num_gt, 7]
#------------------------------------#
ai
=
torch
.
arange
(
num_anchor
,
device
=
targets
.
device
).
float
().
view
(
num_anchor
,
1
).
repeat
(
1
,
num_gt
)
targets
=
torch
.
cat
((
targets
.
repeat
(
num_anchor
,
1
,
1
),
ai
[:,
:,
None
]),
2
)
# append anchor indices
g
=
0.5
# offsets
off
=
torch
.
tensor
([
[
0
,
0
],
[
1
,
0
],
[
0
,
1
],
[
-
1
,
0
],
[
0
,
-
1
],
# j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
],
device
=
targets
.
device
).
float
()
*
g
# offsets
for
i
in
range
(
len
(
p
)):
anchors
=
torch
.
from_numpy
(
self
.
anchors
[
i
]).
type_as
(
p
[
i
])
gain
[
2
:
6
]
=
torch
.
tensor
(
p
[
i
].
shape
)[[
3
,
2
,
3
,
2
]]
# xyxy gain
# Match targets to anchors
],
device
=
targets
.
device
).
float
()
*
g
for
i
in
range
(
len
(
predictions
)):
#----------------------------------------------------#
# 将先验框除以stride,获得相对于特征层的先验框。
# anchors_i [num_anchor, 2]
#----------------------------------------------------#
anchors_i
=
torch
.
from_numpy
(
self
.
anchors
[
i
]
/
self
.
stride
[
i
]).
type_as
(
predictions
[
i
])
#-------------------------------------------#
# 计算获得对应特征层的高宽
#-------------------------------------------#
gain
[
2
:
6
]
=
torch
.
tensor
(
predictions
[
i
].
shape
)[[
3
,
2
,
3
,
2
]]
#-------------------------------------------#
# 将真实框乘上gain,
# 其实就是将真实框映射到特征层上
#-------------------------------------------#
t
=
targets
*
gain
if
nt
:
# Matches
r
=
t
[:,
:,
4
:
6
]
/
anchors
[:,
None
]
# wh ratio
j
=
torch
.
max
(
r
,
1.
/
r
).
max
(
2
)[
0
]
<
self
.
threshold
# compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
if
num_gt
:
#-------------------------------------------#
# 计算真实框与先验框高宽的比值
# 然后根据比值大小进行判断,
# 判断结果用于取出,获得所有先验框对应的真实框
# r [num_anchor, num_gt, 2]
# t [num_anchor, num_gt, 7] => [num_matched_anchor, 7]
#-------------------------------------------#
r
=
t
[:,
:,
4
:
6
]
/
anchors_i
[:,
None
]
j
=
torch
.
max
(
r
,
1.
/
r
).
max
(
2
)[
0
]
<
self
.
threshold
t
=
t
[
j
]
# filter
# Offsets
#-------------------------------------------#
# gxy 获得所有先验框对应的真实框的x轴y轴坐标
# gxi 取相对于该特征层的右小角的坐标
#-------------------------------------------#
gxy
=
t
[:,
2
:
4
]
# grid xy
gxi
=
gain
[[
2
,
3
]]
-
gxy
# inverse
j
,
k
=
((
gxy
%
1.
<
g
)
&
(
gxy
>
1.
)).
T
l
,
m
=
((
gxi
%
1.
<
g
)
&
(
gxi
>
1.
)).
T
j
=
torch
.
stack
((
torch
.
ones_like
(
j
),
j
,
k
,
l
,
m
))
#-------------------------------------------#
# t 重复5次,使用满足条件的j进行框的提取
# j 一共五行,代表当前特征点在五个
# [0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]
# 方向是否存在
#-------------------------------------------#
t
=
t
.
repeat
((
5
,
1
,
1
))[
j
]
offsets
=
(
torch
.
zeros_like
(
gxy
)[
None
]
+
off
[:,
None
])[
j
]
else
:
t
=
targets
[
0
]
offsets
=
0
# Define
#-------------------------------------------#
# b 代表属于第几个图片
# gxy 代表该真实框所处的x、y中心坐标
# gwh 代表该真实框的wh坐标
# gij 代表真实框所属的特征点坐标
#-------------------------------------------#
b
,
c
=
t
[:,
:
2
].
long
().
T
# image, class
gxy
=
t
[:,
2
:
4
]
# grid xy
gwh
=
t
[:,
4
:
6
]
# grid wh
gij
=
(
gxy
-
offsets
).
long
()
gi
,
gj
=
gij
.
T
# grid xy indices
# Append
#-------------------------------------------#
# gj、gi不能超出特征层范围
# a代表属于该特征点的第几个先验框
#-------------------------------------------#
a
=
t
[:,
6
].
long
()
# anchor indices
indices
.
append
((
b
,
a
,
gj
.
clamp_
(
0
,
gain
[
3
]
-
1
),
gi
.
clamp_
(
0
,
gain
[
2
]
-
1
)))
# image, anchor, grid indices
anch
.
append
(
anchors
[
a
])
# anchors
anch
ors
.
append
(
anchors_i
[
a
])
# anchors
return
indices
,
anch
return
indices
,
anch
ors
def
is_parallel
(
model
):
# Returns True if model is of type DP or DDP
...
...
summary.py
浏览文件 @
cdc66baa
...
...
@@ -11,9 +11,10 @@ if __name__ == "__main__":
input_shape
=
[
640
,
640
]
anchors_mask
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]]
num_classes
=
80
phi
=
'yolov7'
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
m
=
YoloBody
(
anchors_mask
,
num_classes
,
False
).
to
(
device
)
m
=
YoloBody
(
anchors_mask
,
num_classes
,
phi
,
False
).
to
(
device
)
summary
(
m
,
(
3
,
input_shape
[
0
],
input_shape
[
1
]))
dummy_input
=
torch
.
randn
(
1
,
3
,
input_shape
[
0
],
input_shape
[
1
]).
to
(
device
)
...
...
train.py
浏览文件 @
cdc66baa
...
...
@@ -98,6 +98,12 @@ if __name__ == "__main__":
# input_shape 输入的shape大小,一定要是32的倍数
#------------------------------------------------------#
input_shape
=
[
640
,
640
]
#------------------------------------------------------#
# phi 所使用到的yolov7的版本,本仓库一共提供两个:
# l : 对应yolov7
# x : 对应yolov7_x
#------------------------------------------------------#
phi
=
'l'
#----------------------------------------------------------------------------------------------------------------------------#
# pretrained 是否使用主干网络的预训练权重,此处使用的是主干的权重,因此是在模型构建的时候进行加载的。
# 如果设置了model_path,则主干的权值无需加载,pretrained的值无意义。
...
...
@@ -268,15 +274,15 @@ if __name__ == "__main__":
if
pretrained
:
if
distributed
:
if
local_rank
==
0
:
download_weights
()
download_weights
(
phi
)
dist
.
barrier
()
else
:
download_weights
()
download_weights
(
phi
)
#------------------------------------------------------#
# 创建yolo模型
#------------------------------------------------------#
model
=
YoloBody
(
anchors_mask
,
num_classes
,
pretrained
=
pretrained
)
model
=
YoloBody
(
anchors_mask
,
num_classes
,
p
hi
,
p
retrained
=
pretrained
)
if
not
pretrained
:
weights_init
(
model
)
if
model_path
!=
''
:
...
...
utils/dataloader.py
浏览文件 @
cdc66baa
...
...
@@ -57,7 +57,10 @@ class YoloDataset(Dataset):
image
=
np
.
transpose
(
preprocess_input
(
np
.
array
(
image
,
dtype
=
np
.
float32
)),
(
2
,
0
,
1
))
box
=
np
.
array
(
box
,
dtype
=
np
.
float32
)
nL
=
len
(
box
)
# number of labels
#---------------------------------------------------#
# 对真实框进行预处理
#---------------------------------------------------#
nL
=
len
(
box
)
labels_out
=
np
.
zeros
((
nL
,
6
))
if
nL
:
#---------------------------------------------------#
...
...
@@ -73,6 +76,10 @@ class YoloDataset(Dataset):
box
[:,
2
:
4
]
=
box
[:,
2
:
4
]
-
box
[:,
0
:
2
]
box
[:,
0
:
2
]
=
box
[:,
0
:
2
]
+
box
[:,
2
:
4
]
/
2
#---------------------------------------------------#
# 调整顺序,符合训练的格式
# labels_out中序号为0的部分在collect时处理
#---------------------------------------------------#
labels_out
[:,
1
]
=
box
[:,
-
1
]
labels_out
[:,
2
:]
=
box
[:,
:
4
]
...
...
utils/utils.py
浏览文件 @
cdc66baa
...
...
@@ -71,13 +71,13 @@ def show_config(**kwargs):
print
(
'|%25s | %40s|'
%
(
str
(
key
),
str
(
value
)))
print
(
'-'
*
70
)
def
download_weights
(
model_dir
=
"./model_data"
):
def
download_weights
(
phi
,
model_dir
=
"./model_data"
):
import
os
from
torch.hub
import
load_state_dict_from_url
phi
=
"l"
download_urls
=
{
"l"
:
'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/cspdarknet_backbone.pth'
,
"l"
:
'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/yolov7_backbone.pth'
,
"x"
:
'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/yolov7_x_backbone.pth'
,
}
url
=
download_urls
[
phi
]
...
...
utils/utils_bbox.py
浏览文件 @
cdc66baa
...
...
@@ -11,9 +11,9 @@ class DecodeBox():
self
.
bbox_attrs
=
5
+
num_classes
self
.
input_shape
=
input_shape
#-----------------------------------------------------------#
#
20x20的特征层对应的anchor是[116,90],[156,198],[373,326
]
#
40x40的特征层对应的anchor是[30,61],[62,45],[59,119
]
#
80x80的特征层对应的anchor是[10,13],[16,30],[33,23
]
#
13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401
]
#
26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146
]
#
52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28
]
#-----------------------------------------------------------#
self
.
anchors_mask
=
anchors_mask
...
...
yolo.py
浏览文件 @
cdc66baa
...
...
@@ -37,6 +37,12 @@ class YOLO(object):
# 输入图片的大小,必须为32的倍数。
#---------------------------------------------------------------------#
"input_shape"
:
[
640
,
640
],
#------------------------------------------------------#
# 所使用到的yolov7的版本,本仓库一共提供两个:
# l : 对应yolov7
# x : 对应yolov7_x
#------------------------------------------------------#
"phi"
:
'l'
,
#---------------------------------------------------------------------#
# 只有得分大于置信度的预测框会被保留下来
#---------------------------------------------------------------------#
...
...
@@ -97,7 +103,7 @@ class YOLO(object):
#---------------------------------------------------#
# 建立yolo模型,载入yolo模型的权重
#---------------------------------------------------#
self
.
net
=
YoloBody
(
self
.
anchors_mask
,
self
.
num_classes
)
self
.
net
=
YoloBody
(
self
.
anchors_mask
,
self
.
num_classes
,
self
.
phi
)
device
=
torch
.
device
(
'cuda'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
self
.
net
.
load_state_dict
(
torch
.
load
(
self
.
model_path
,
map_location
=
device
))
self
.
net
=
self
.
net
.
fuse
().
eval
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录