Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
+速度
yolo_v3
提交
6c4b51d5
yolo_v3
项目概览
+速度
/
yolo_v3
与 Fork 源项目一致
Fork自
DataBall / yolo_v3
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
yolo_v3
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6c4b51d5
编写于
6月 08, 2021
作者:
DataBall
🚴🏻
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
增加了支持 voc 格式数据集的训练模式,目前验证通过的是单类数据集
上级
7cdca4fa
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
608 addition
and
8 deletion
+608
-8
cfg/hand.data
cfg/hand.data
+8
-7
cfg/hand.names
cfg/hand.names
+1
-1
show_yolo_anno_voc.py
show_yolo_anno_voc.py
+55
-0
train_voc.py
train_voc.py
+215
-0
utils/datasets_voc.py
utils/datasets_voc.py
+329
-0
未找到文件。
cfg/hand.data
浏览文件 @
6c4b51d5
cfg_model=yolo
cfg_model=yolo
classes=1
classes=1
gpus = 0
gpus = 0
num_workers =
12
num_workers =
6
batch_size =
8
batch_size =
9
img_size = 416
img_size = 416
multi_scale = True
multi_scale = True
epochs =
10
0
epochs =
32
0
train=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt
train=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt
valid=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt
valid=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt
names=./cfg/hand.names
names=./cfg/hand.names
#finetune_model=./finetune_model/yolov3_coco.pt
#finetune_model=./finetune_model/yolov3_coco.pt
#finetune_model = ./weights-yolov3/latest.pt
#finetune_model = ./weights-yolov3/latest.pt
finetune_model = ./weights-yolov3-hand/latest_416.pt
finetune_model = ./weights-yolov3-hand/yoloV3_416_epoch_12.pt
#finetune_model = ./weights-yolov3-face-tiny/latest_416.pt
#finetune_model = ./weights-yolov3-face-tiny/hand_416-2021-01-29.pt
lr_step = 20,50,80
lr_step = 120,220,280
lr0 = 0.0001
lr0 = 0.00002
train_voc=G:/tvcoco-hand/
cfg/hand.names
浏览文件 @
6c4b51d5
H
and
h
and
show_yolo_anno_voc.py
0 → 100644
浏览文件 @
6c4b51d5
#-*-coding:utf-8-*-
# date:2021-05
# Author: Eric.Lee
# function: show yolo data of voc format anno
import
cv2
import
os
import
numpy
as
np
import
xml.etree.cElementTree
as
et
if
__name__
==
"__main__"
:
path
=
'G:/hand_detect_datasets-0/'
path_voc_names
=
'./cfg/hand.names'
with
open
(
path_voc_names
,
'r'
)
as
f
:
label_map
=
f
.
readlines
()
label_dict
=
{}
for
i
in
range
(
len
(
label_map
)):
label_map
[
i
]
=
label_map
[
i
].
strip
()
print
(
i
,
') '
,
label_map
[
i
])
label_dict
[
label_map
[
i
]]
=
i
print
(
"label_dict : {}"
.
format
(
label_dict
))
for
file
in
os
.
listdir
(
path
):
if
".jpg"
in
file
:
path_img
=
path
+
file
path_label
=
path_img
.
replace
(
".jpg"
,
".xml"
)
if
not
os
.
access
(
path_label
,
os
.
F_OK
):
continue
img
=
cv2
.
imread
(
path_img
)
#
tree
=
et
.
parse
(
path_label
)
root
=
tree
.
getroot
()
for
Object
in
root
.
findall
(
'object'
):
name
=
Object
.
find
(
'name'
).
text
bndbox
=
Object
.
find
(
'bndbox'
)
x1
=
np
.
float32
((
bndbox
.
find
(
'xmin'
).
text
))
y1
=
np
.
float32
((
bndbox
.
find
(
'ymin'
).
text
))
x2
=
np
.
float32
((
bndbox
.
find
(
'xmax'
).
text
))
y2
=
np
.
float32
((
bndbox
.
find
(
'ymax'
).
text
))
cv2
.
rectangle
(
img
,
(
int
(
x1
),
int
(
y1
)),
(
int
(
x2
),
int
(
y2
)),
(
255
,
100
,
100
),
2
)
cv2
.
putText
(
img
,
"{}"
.
format
(
name
),
(
int
(
x1
),
int
(
y1
)),
\
cv2
.
FONT_HERSHEY_PLAIN
,
2.5
,
(
0
,
55
,
255
),
6
)
cv2
.
putText
(
img
,
"{}"
.
format
(
name
),
(
int
(
x1
),
int
(
y1
)),
\
cv2
.
FONT_HERSHEY_PLAIN
,
2.5
,
(
0
,
155
,
255
),
2
)
cv2
.
namedWindow
(
'image'
,
0
)
cv2
.
imshow
(
'image'
,
img
)
if
cv2
.
waitKey
(
30
)
==
27
:
break
cv2
.
destroyAllWindows
()
train_voc.py
0 → 100644
浏览文件 @
6c4b51d5
#coding:utf-8
# date:2021-06
# Author: Eric.Lee
# function: train data of voc format
import
os
from
yolov3
import
Yolov3
,
Yolov3Tiny
from
utils.parse_config
import
parse_data_cfg
from
utils.torch_utils
import
select_device
import
torch
from
torch.utils.data
import
DataLoader
from
utils.datasets_voc
import
LoadImagesAndLabels
from
utils.utils
import
*
import
numpy
as
np
def
set_learning_rate
(
optimizer
,
lr
):
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr
def
train
(
data_cfg
=
'cfg/voc.data'
,
accumulate
=
1
):
# Configure run
get_data_cfg
=
parse_data_cfg
(
data_cfg
)
#返回训练配置参数,类型:字典
gpus
=
get_data_cfg
[
'gpus'
]
voc_names
=
get_data_cfg
[
'names'
]
num_workers
=
int
(
get_data_cfg
[
'num_workers'
])
cfg_model
=
get_data_cfg
[
'cfg_model'
]
train_path
=
get_data_cfg
[
'train_voc'
]
num_classes
=
int
(
get_data_cfg
[
'classes'
])
finetune_model
=
get_data_cfg
[
'finetune_model'
]
batch_size
=
int
(
get_data_cfg
[
'batch_size'
])
img_size
=
int
(
get_data_cfg
[
'img_size'
])
multi_scale
=
get_data_cfg
[
'multi_scale'
]
epochs
=
int
(
get_data_cfg
[
'epochs'
])
lr_step
=
str
(
get_data_cfg
[
'lr_step'
])
lr0
=
float
(
get_data_cfg
[
'lr0'
])
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
gpus
device
=
select_device
()
if
multi_scale
==
'True'
:
multi_scale
=
True
else
:
multi_scale
=
False
print
(
'data_cfg : '
,
data_cfg
)
print
(
'voc.data config len : '
,
len
(
get_data_cfg
))
print
(
'gpus : '
,
gpus
)
print
(
'num_workers : '
,
num_workers
)
print
(
'model : '
,
cfg_model
)
print
(
'finetune_model : '
,
finetune_model
)
print
(
'train_voc_path : '
,
train_path
)
print
(
'num_classes : '
,
num_classes
)
print
(
'batch_size : '
,
batch_size
)
print
(
'img_size : '
,
img_size
)
print
(
'multi_scale : '
,
multi_scale
)
print
(
'lr_step : '
,
lr_step
)
print
(
'lr0 : '
,
lr0
)
# load model
pattern_data_
=
data_cfg
.
split
(
"/"
)[
-
1
:][
0
].
replace
(
".data"
,
""
)
if
"-tiny"
in
cfg_model
:
a_scalse
=
416.
/
img_size
anchors
=
[(
10
,
14
),
(
23
,
27
),
(
37
,
58
),
(
81
,
82
),
(
135
,
169
),
(
344
,
319
)]
anchors_new
=
[
(
int
(
anchors
[
j
][
0
]
/
a_scalse
),
int
(
anchors
[
j
][
1
]
/
a_scalse
))
for
j
in
range
(
len
(
anchors
))
]
model
=
Yolov3Tiny
(
num_classes
,
anchors
=
anchors_new
)
# weights = './weights-yolov3-person-tiny/'
weights
=
'./weights-yolov3-{}-tiny/'
.
format
(
pattern_data_
)
else
:
a_scalse
=
416.
/
img_size
anchors
=
[(
10
,
13
),
(
16
,
30
),
(
33
,
23
),
(
30
,
61
),
(
62
,
45
),
(
59
,
119
),
(
116
,
90
),
(
156
,
198
),
(
373
,
326
)]
anchors_new
=
[
(
int
(
anchors
[
j
][
0
]
/
a_scalse
),
int
(
anchors
[
j
][
1
]
/
a_scalse
))
for
j
in
range
(
len
(
anchors
))
]
model
=
Yolov3
(
num_classes
,
anchors
=
anchors_new
)
weights
=
'./weights-yolov3-{}/'
.
format
(
pattern_data_
)
# mkdir save model document
if
not
os
.
path
.
exists
(
weights
):
os
.
mkdir
(
weights
)
model
=
model
.
to
(
device
)
latest
=
weights
+
'latest_{}.pt'
.
format
(
img_size
)
best
=
weights
+
'best_{}.pt'
.
format
(
img_size
)
# Optimizer
optimizer
=
torch
.
optim
.
SGD
(
model
.
parameters
(),
lr
=
lr0
,
momentum
=
0.9
,
weight_decay
=
0.0005
)
start_epoch
=
0
if
os
.
access
(
finetune_model
,
os
.
F_OK
):
# load retrain/finetune_model
print
(
'loading yolo-v3 finetune_model ~~~~~~'
,
finetune_model
)
not_load_filters
=
3
*
(
80
+
5
)
# voc: 3*(20+5), coco: 3*(80+5)=255
chkpt
=
torch
.
load
(
finetune_model
,
map_location
=
device
)
model
.
load_state_dict
({
k
:
v
for
k
,
v
in
chkpt
[
'model'
].
items
()
if
v
.
numel
()
>
1
and
v
.
shape
[
0
]
!=
not_load_filters
},
strict
=
False
)
# model.load_state_dict(chkpt['model'])
if
'coco'
not
in
finetune_model
:
start_epoch
=
chkpt
[
'epoch'
]
if
chkpt
[
'optimizer'
]
is
not
None
:
optimizer
.
load_state_dict
(
chkpt
[
'optimizer'
])
best_loss
=
chkpt
[
'best_loss'
]
# Set scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k) gamma:学习率下降的乘数因子
milestones
=
[
int
(
i
)
for
i
in
lr_step
.
split
(
","
)]
print
(
'milestones : '
,
milestones
)
scheduler
=
torch
.
optim
.
lr_scheduler
.
MultiStepLR
(
optimizer
,
milestones
=
[
int
(
i
)
for
i
in
lr_step
.
split
(
","
)],
gamma
=
0.1
,
last_epoch
=
start_epoch
-
1
)
# Dataset
print
(
'multi_scale : '
,
multi_scale
)
dataset
=
LoadImagesAndLabels
(
train_path
,
voc_names
=
voc_names
,
batch_size
=
batch_size
,
img_size
=
img_size
,
augment
=
True
,
multi_scale
=
multi_scale
)
print
(
'--------------->>> imge num : '
,
dataset
.
__len__
())
# Dataloader
dataloader
=
DataLoader
(
dataset
,
batch_size
=
batch_size
,
num_workers
=
num_workers
,
shuffle
=
True
,
pin_memory
=
False
,
drop_last
=
False
,
collate_fn
=
dataset
.
collate_fn
)
# Start training
t
=
time
.
time
()
# model_info(model)# 打印模型信息
nB
=
len
(
dataloader
)
n_burnin
=
min
(
round
(
nB
/
5
+
3
),
1000
)
# burn-in batches
best_loss
=
float
(
'inf'
)
test_loss
=
float
(
'inf'
)
flag_start
=
False
for
epoch
in
range
(
0
,
epochs
):
print
(
' ~~~~'
)
model
.
train
()
if
flag_start
:
scheduler
.
step
()
flag_start
=
True
mloss
=
defaultdict
(
float
)
# mean loss
for
i
,
(
imgs
,
targets
,
img_path_
,
_
)
in
enumerate
(
dataloader
):
multi_size
=
imgs
.
size
()
imgs
=
imgs
.
to
(
device
)
targets
=
targets
.
to
(
device
)
nt
=
len
(
targets
)
if
nt
==
0
:
# if no targets continue
continue
# SGD burn-in
if
epoch
==
0
and
i
<=
n_burnin
:
lr
=
lr0
*
(
i
/
n_burnin
)
**
4
for
x
in
optimizer
.
param_groups
:
x
[
'lr'
]
=
lr
# Run model
pred
=
model
(
imgs
)
# Build targets
target_list
=
build_targets
(
model
,
targets
)
# Compute loss
loss
,
loss_dict
=
compute_loss
(
pred
,
target_list
)
# Compute gradient
loss
.
backward
()
# Accumulate gradient for x batches before optimizing
if
(
i
+
1
)
%
accumulate
==
0
or
(
i
+
1
)
==
nB
:
optimizer
.
step
()
optimizer
.
zero_grad
()
# Running epoch-means of tracked metrics
for
key
,
val
in
loss_dict
.
items
():
mloss
[
key
]
=
(
mloss
[
key
]
*
i
+
val
)
/
(
i
+
1
)
print
(
' Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, wh {:.3f}, '
'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'
.
format
(
epoch
,
epochs
-
1
,
i
,
nB
-
1
,
multi_size
[
2
],
multi_size
[
3
]
,
nt
,
scheduler
.
get_lr
()[
0
],
mloss
[
'xy'
],
mloss
[
'wh'
],
mloss
[
'conf'
],
mloss
[
'cls'
],
mloss
[
'total'
],
time
.
time
()
-
t
),
end
=
'
\r
'
)
s
=
(
'%8s%12s'
+
'%10.3g'
*
7
)
%
(
'%g/%g'
%
(
epoch
,
epochs
-
1
),
'%g/%g'
%
(
i
,
nB
-
1
),
mloss
[
'xy'
],
mloss
[
'wh'
],
mloss
[
'conf'
],
mloss
[
'cls'
],
mloss
[
'total'
],
nt
,
time
.
time
()
-
t
)
t
=
time
.
time
()
print
()
# Create checkpoint
chkpt
=
{
'epoch'
:
epoch
,
'best_loss'
:
best_loss
,
'model'
:
model
.
module
.
state_dict
()
if
type
(
model
)
is
nn
.
parallel
.
DistributedDataParallel
else
model
.
state_dict
(),
'optimizer'
:
optimizer
.
state_dict
()}
# Save latest checkpoint
torch
.
save
(
chkpt
,
latest
)
# Save best checkpoint
if
best_loss
==
test_loss
and
epoch
%
5
==
0
:
torch
.
save
(
chkpt
,
best
)
# Save backup every 10 epochs (optional)
if
True
:
#epoch > 0 and epoch % 5 == 0:
torch
.
save
(
chkpt
,
weights
+
'yoloV3_{}_epoch_{}.pt'
.
format
(
img_size
,
epoch
+
1
))
# Delete checkpoint
del
chkpt
#-------------------------------------------------------------------------------
if
__name__
==
'__main__'
:
train
(
data_cfg
=
"cfg/hand.data"
)
# train(data_cfg = "cfg/face.data")
# train(data_cfg = "cfg/person.data")
# train(data_cfg = "cfg/helmet.data")
# train(data_cfg = "cfg/transport.data")
print
(
'well done ~ '
)
utils/datasets_voc.py
0 → 100644
浏览文件 @
6c4b51d5
# date:2021-06
# Author: Eric.Lee
# function: dataloader data of voc format
import
glob
import
math
import
os
import
random
import
shutil
from
pathlib
import
Path
from
PIL
import
Image
from
tqdm
import
tqdm
import
cv2
import
numpy
as
np
import
torch
from
torch.utils.data
import
Dataset
from
torch.utils.data
import
DataLoader
import
xml.etree.cElementTree
as
et
def
xyxy2xywh
(
x
):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y
=
torch
.
zeros_like
(
x
)
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
zeros_like
(
x
)
y
[:,
0
]
=
(
x
[:,
0
]
+
x
[:,
2
])
/
2
y
[:,
1
]
=
(
x
[:,
1
]
+
x
[:,
3
])
/
2
y
[:,
2
]
=
x
[:,
2
]
-
x
[:,
0
]
y
[:,
3
]
=
x
[:,
3
]
-
x
[:,
1
]
return
y
def
xywh2xyxy
(
x
):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y
=
torch
.
zeros_like
(
x
)
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
zeros_like
(
x
)
y
[:,
0
]
=
x
[:,
0
]
-
x
[:,
2
]
/
2
y
[:,
1
]
=
x
[:,
1
]
-
x
[:,
3
]
/
2
y
[:,
2
]
=
x
[:,
0
]
+
x
[:,
2
]
/
2
y
[:,
3
]
=
x
[:,
1
]
+
x
[:,
3
]
/
2
return
y
def
read_label_xml
(
path_label
):
tree
=
et
.
parse
(
path_label
)
root
=
tree
.
getroot
()
obj_num
=
0
for
Object
in
root
.
findall
(
'object'
):
name
=
Object
.
find
(
'name'
).
text
obj_num
+=
1
return
obj_num
class
LoadImagesAndLabels
(
Dataset
):
# for training/testing
def
__init__
(
self
,
path
,
voc_names
,
batch_size
,
img_size
=
416
,
augment
=
True
,
multi_scale
=
False
):
print
(
'LoadImagesAndLabels init : '
,
path
)
# 解析标签
with
open
(
voc_names
,
'r'
)
as
f
:
label_map
=
f
.
readlines
()
label_voc_dict
=
{}
# 名称转数字标签
obj_num_sum
=
0
# 物体总数量
for
i
in
range
(
len
(
label_map
)):
label_map
[
i
]
=
label_map
[
i
].
strip
()
print
(
i
,
') '
,
label_map
[
i
])
label_voc_dict
[
label_map
[
i
]]
=
i
print
(
"label_voc_dict : {}"
.
format
(
label_voc_dict
))
img_files
=
[]
label_files
=
[]
for
file
in
os
.
listdir
(
path
):
if
".jpg"
in
file
:
path_img
=
path
+
file
path_label
=
path_img
.
replace
(
".jpg"
,
".xml"
)
if
not
os
.
access
(
path_label
,
os
.
F_OK
):
continue
obj_num
=
read_label_xml
(
path_label
)
if
obj_num
==
0
:
# 检查 xml 标注文件是否为 空
continue
obj_num_sum
+=
obj_num
img_files
.
append
(
path_img
)
label_files
.
append
(
path_label
)
self
.
label_voc_dict
=
label_voc_dict
self
.
img_files
=
img_files
assert
len
(
self
.
img_files
)
>
0
,
'No images found in %s'
%
path
self
.
img_size
=
img_size
self
.
batch_size
=
batch_size
self
.
multi_scale
=
multi_scale
self
.
augment
=
augment
self
.
scale_index
=
0
if
self
.
multi_scale
:
self
.
img_size
=
img_size
# initiate with maximum multi_scale size, in case of out of memory
print
(
"Multi scale images training, init img_size"
,
self
.
img_size
)
else
:
print
(
"Fixed scale images, img_size"
,
self
.
img_size
)
self
.
label_files
=
label_files
print
(
"init voc data_iter done ~"
)
print
(
"obj_num_sum : {}"
.
format
(
obj_num_sum
))
def
__len__
(
self
):
return
len
(
self
.
img_files
)
def
__getitem__
(
self
,
index
):
# if self.multi_scale and (index % self.batch_size == 0) and index != 0:
if
self
.
multi_scale
and
(
self
.
scale_index
%
self
.
batch_size
==
0
)
and
self
.
scale_index
!=
0
:
# self.img_size = random.choice(range(11, 18)) * 32
self
.
img_size
=
random
.
choice
(
range
(
12
,
15
))
*
32
# print("++++++ change img_size, index:", self.img_size, index)
if
self
.
multi_scale
:
self
.
scale_index
+=
1
if
self
.
scale_index
>=
(
100
*
self
.
batch_size
):
self
.
scale_index
=
0
img_path
=
self
.
img_files
[
index
]
label_path
=
self
.
label_files
[
index
]
img
=
cv2
.
imread
(
img_path
)
# BGR
# print("img shape",img.shape)
assert
img
is
not
None
,
'File Not Found '
+
img_path
augment_hsv
=
random
.
random
()
<
0.5
# hsv_aug prob = 0.5
if
self
.
augment
and
augment_hsv
:
# SV augmentation by 50%
fraction
=
0.50
# must be < 1.0
img_hsv
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2HSV
)
S
=
img_hsv
[:,
:,
1
].
astype
(
np
.
float32
)
V
=
img_hsv
[:,
:,
2
].
astype
(
np
.
float32
)
a
=
(
random
.
random
()
*
2
-
1
)
*
fraction
+
1
# a in [-0,5, 1.5]
S
*=
a
if
a
>
1
:
np
.
clip
(
S
,
None
,
255
,
out
=
S
)
a
=
(
random
.
random
()
*
2
-
1
)
*
fraction
+
1
V
*=
a
if
a
>
1
:
np
.
clip
(
V
,
None
,
255
,
out
=
V
)
img_hsv
[:,
:,
1
]
=
S
# .astype(np.uint8)
img_hsv
[:,
:,
2
]
=
V
# .astype(np.uint8)
cv2
.
cvtColor
(
img_hsv
,
cv2
.
COLOR_HSV2BGR
,
dst
=
img
)
h
,
w
,
_
=
img
.
shape
img
,
ratio
,
padw
,
padh
=
letterbox
(
img
,
height
=
self
.
img_size
,
augment
=
self
.
augment
)
# cv2.imshow("img",img)
# cv2.waitKey(1)
# Load labels
#-----------------------------------------------------------------------
tree
=
et
.
parse
(
label_path
)
root
=
tree
.
getroot
()
labels
=
[]
x
=
[]
for
Object
in
root
.
findall
(
'object'
):
name_
=
Object
.
find
(
'name'
).
text
bndbox
=
Object
.
find
(
'bndbox'
)
xmin
=
np
.
float32
((
bndbox
.
find
(
'xmin'
).
text
))
ymin
=
np
.
float32
((
bndbox
.
find
(
'ymin'
).
text
))
xmax
=
np
.
float32
((
bndbox
.
find
(
'xmax'
).
text
))
ymax
=
np
.
float32
((
bndbox
.
find
(
'ymax'
).
text
))
#
xmin
=
np
.
clip
(
xmin
,
0
,
w
-
1
)
ymin
=
np
.
clip
(
ymin
,
0
,
h
-
1
)
xmax
=
np
.
clip
(
xmax
,
0
,
w
-
1
)
ymax
=
np
.
clip
(
ymax
,
0
,
h
-
1
)
#
x_mid
=
(
xmax
+
xmin
)
/
2.
/
float
(
w
)
y_mid
=
(
ymax
+
ymin
)
/
2.
/
float
(
h
)
w_box
=
(
xmax
-
xmin
)
/
float
(
w
)
h_box
=
(
ymax
-
ymin
)
/
float
(
h
)
x
.
append
((
self
.
label_voc_dict
[
name_
],
x_mid
,
y_mid
,
w_box
,
h_box
))
x
=
np
.
array
(
x
,
dtype
=
np
.
float32
)
# print(x)
if
x
.
size
>
0
:
labels
=
x
.
copy
()
labels
[:,
1
]
=
ratio
*
w
*
(
x
[:,
1
]
-
x
[:,
3
]
/
2
)
+
padw
labels
[:,
2
]
=
ratio
*
h
*
(
x
[:,
2
]
-
x
[:,
4
]
/
2
)
+
padh
labels
[:,
3
]
=
ratio
*
w
*
(
x
[:,
1
]
+
x
[:,
3
]
/
2
)
+
padw
labels
[:,
4
]
=
ratio
*
h
*
(
x
[:,
2
]
+
x
[:,
4
]
/
2
)
+
padh
#-----------------------------------------------------------------------
# labels = []
# if os.path.isfile(label_path):
# with open(label_path, 'r') as file:
# lines = file.read().splitlines()
#
# x = np.array([x.split() for x in lines], dtype=np.float32)
# if x.size > 0:
# # Normalized xywh to pixel xyxy format
# labels = x.copy()
# labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
# labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
# labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
# labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
# Augment image and labels
if
self
.
augment
:
img
,
labels
=
random_affine
(
img
,
labels
,
degrees
=
(
-
30
,
30
),
translate
=
(
0.10
,
0.10
),
scale
=
(
0.9
,
1.1
))
nL
=
len
(
labels
)
# number of labels
if
nL
:
# convert xyxy to xywh
labels
[:,
1
:
5
]
=
xyxy2xywh
(
labels
[:,
1
:
5
])
/
self
.
img_size
# 转化 格式 ,且 归一化
if
self
.
augment
:
# random left-right flip
lr_flip
=
True
if
lr_flip
and
random
.
random
()
>
0.5
:
img
=
np
.
fliplr
(
img
)
if
nL
:
labels
[:,
1
]
=
1
-
labels
[:,
1
]
# random up-down flip
ud_flip
=
True
if
ud_flip
and
random
.
random
()
>
0.5
:
img
=
np
.
flipud
(
img
)
if
nL
:
labels
[:,
2
]
=
1
-
labels
[:,
2
]
labels_out
=
torch
.
zeros
((
nL
,
6
))
# 加了 一个 batch size
if
nL
:
labels_out
[:,
1
:]
=
torch
.
from_numpy
(
labels
)
# Normalize
img
=
img
[:,
:,
::
-
1
].
transpose
(
2
,
0
,
1
)
# BGR to RGB, to 3x416x416
img
=
np
.
ascontiguousarray
(
img
,
dtype
=
np
.
float32
)
# uint8 to float32
img
/=
255.0
# 0 - 255 to 0.0 - 1.0
return
torch
.
from_numpy
(
img
),
labels_out
,
img_path
,
(
h
,
w
)
@
staticmethod
def
collate_fn
(
batch
):
img
,
label
,
path
,
hw
=
list
(
zip
(
*
batch
))
# transposed
for
i
,
l
in
enumerate
(
label
):
l
[:,
0
]
=
i
# 获取 物体的 归属于 图片 的 index
return
torch
.
stack
(
img
,
0
),
torch
.
cat
(
label
,
0
),
path
,
hw
def
letterbox
(
img
,
height
=
416
,
augment
=
False
,
color
=
(
127.5
,
127.5
,
127.5
)):
# Resize a rectangular image to a padded square
shape
=
img
.
shape
[:
2
]
# shape = [height, width]
ratio
=
float
(
height
)
/
max
(
shape
)
# ratio = old / new
new_shape
=
(
round
(
shape
[
1
]
*
ratio
),
round
(
shape
[
0
]
*
ratio
))
dw
=
(
height
-
new_shape
[
0
])
/
2
# width padding
dh
=
(
height
-
new_shape
[
1
])
/
2
# height padding
top
,
bottom
=
round
(
dh
-
0.1
),
round
(
dh
+
0.1
)
left
,
right
=
round
(
dw
-
0.1
),
round
(
dw
+
0.1
)
# resize img
if
augment
:
interpolation
=
np
.
random
.
choice
([
None
,
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
None
,
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
])
if
interpolation
is
None
:
img
=
cv2
.
resize
(
img
,
new_shape
)
else
:
img
=
cv2
.
resize
(
img
,
new_shape
,
interpolation
=
interpolation
)
else
:
img
=
cv2
.
resize
(
img
,
new_shape
,
interpolation
=
cv2
.
INTER_NEAREST
)
# print("resize time:",time.time()-s1)
img
=
cv2
.
copyMakeBorder
(
img
,
top
,
bottom
,
left
,
right
,
cv2
.
BORDER_CONSTANT
,
value
=
color
)
# padded square
return
img
,
ratio
,
dw
,
dh
def
random_affine
(
img
,
targets
=
(),
degrees
=
(
-
10
,
10
),
translate
=
(.
1
,
.
1
),
scale
=
(.
9
,
1.1
),
shear
=
(
-
2
,
2
),
borderValue
=
(
127.5
,
127.5
,
127.5
)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
if
targets
is
None
:
targets
=
[]
border
=
0
# width of added border (optional)
height
=
max
(
img
.
shape
[
0
],
img
.
shape
[
1
])
+
border
*
2
# Rotation and Scale
R
=
np
.
eye
(
3
)
a
=
random
.
random
()
*
(
degrees
[
1
]
-
degrees
[
0
])
+
degrees
[
0
]
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
s
=
random
.
random
()
*
(
scale
[
1
]
-
scale
[
0
])
+
scale
[
0
]
R
[:
2
]
=
cv2
.
getRotationMatrix2D
(
angle
=
a
,
center
=
(
img
.
shape
[
1
]
/
2
,
img
.
shape
[
0
]
/
2
),
scale
=
s
)
# Translation
T
=
np
.
eye
(
3
)
T
[
0
,
2
]
=
(
random
.
random
()
*
2
-
1
)
*
translate
[
0
]
*
img
.
shape
[
0
]
+
border
# x translation (pixels)
T
[
1
,
2
]
=
(
random
.
random
()
*
2
-
1
)
*
translate
[
1
]
*
img
.
shape
[
1
]
+
border
# y translation (pixels)
# Shear
S
=
np
.
eye
(
3
)
S
[
0
,
1
]
=
math
.
tan
((
random
.
random
()
*
(
shear
[
1
]
-
shear
[
0
])
+
shear
[
0
])
*
math
.
pi
/
180
)
# x shear (deg)
S
[
1
,
0
]
=
math
.
tan
((
random
.
random
()
*
(
shear
[
1
]
-
shear
[
0
])
+
shear
[
0
])
*
math
.
pi
/
180
)
# y shear (deg)
M
=
S
@
T
@
R
# Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw
=
cv2
.
warpPerspective
(
img
,
M
,
dsize
=
(
height
,
height
),
flags
=
cv2
.
INTER_LINEAR
,
borderValue
=
borderValue
)
# BGR order borderValue
# Return warped points also
if
len
(
targets
)
>
0
:
n
=
targets
.
shape
[
0
]
points
=
targets
[:,
1
:
5
].
copy
()
area0
=
(
points
[:,
2
]
-
points
[:,
0
])
*
(
points
[:,
3
]
-
points
[:,
1
])
# warp points
xy
=
np
.
ones
((
n
*
4
,
3
))
xy
[:,
:
2
]
=
points
[:,
[
0
,
1
,
2
,
3
,
0
,
3
,
2
,
1
]].
reshape
(
n
*
4
,
2
)
# x1y1, x2y2, x1y2, x2y1
xy
=
(
xy
@
M
.
T
)[:,
:
2
].
reshape
(
n
,
8
)
# create new boxes
x
=
xy
[:,
[
0
,
2
,
4
,
6
]]
y
=
xy
[:,
[
1
,
3
,
5
,
7
]]
xy
=
np
.
concatenate
((
x
.
min
(
1
),
y
.
min
(
1
),
x
.
max
(
1
),
y
.
max
(
1
))).
reshape
(
4
,
n
).
T
# apply angle-based reduction of bounding boxes
radians
=
a
*
math
.
pi
/
180
reduction
=
max
(
abs
(
math
.
sin
(
radians
)),
abs
(
math
.
cos
(
radians
)))
**
0.5
x
=
(
xy
[:,
2
]
+
xy
[:,
0
])
/
2
y
=
(
xy
[:,
3
]
+
xy
[:,
1
])
/
2
w
=
(
xy
[:,
2
]
-
xy
[:,
0
])
*
reduction
h
=
(
xy
[:,
3
]
-
xy
[:,
1
])
*
reduction
xy
=
np
.
concatenate
((
x
-
w
/
2
,
y
-
h
/
2
,
x
+
w
/
2
,
y
+
h
/
2
)).
reshape
(
4
,
n
).
T
# reject warped points outside of image
np
.
clip
(
xy
,
0
,
height
,
out
=
xy
)
w
=
xy
[:,
2
]
-
xy
[:,
0
]
h
=
xy
[:,
3
]
-
xy
[:,
1
]
area
=
w
*
h
ar
=
np
.
maximum
(
w
/
(
h
+
1e-16
),
h
/
(
w
+
1e-16
))
i
=
(
w
>
4
)
&
(
h
>
4
)
&
(
area
/
(
area0
+
1e-16
)
>
0.1
)
&
(
ar
<
10
)
targets
=
targets
[
i
]
targets
[:,
1
:
5
]
=
xy
[
i
]
return
imw
,
targets
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录