Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
郑州小白
dpcas
提交
54a7374d
dpcas
项目概览
郑州小白
/
dpcas
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
dpcas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
54a7374d
编写于
4月 22, 2021
作者:
DataBall
🚴🏻
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update wyw2s
上级
13e46365
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
62 addition
and
1496 deletion
+62
-1496
components/face_detect/utils/common_utils.py
components/face_detect/utils/common_utils.py
+0
-656
components/face_detect/utils/datasets.py
components/face_detect/utils/datasets.py
+0
-395
components/face_detect/utils/utils.py
components/face_detect/utils/utils.py
+0
-438
lib/wyw2s_lib/cfg/wyw2s.cfg
lib/wyw2s_lib/cfg/wyw2s.cfg
+15
-0
lib/wyw2s_lib/doc/README.md
lib/wyw2s_lib/doc/README.md
+42
-4
main.py
main.py
+5
-3
未找到文件。
components/face_detect/utils/common_utils.py
已删除
100644 → 0
浏览文件 @
13e46365
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
import
os
import
shutil
import
cv2
import
numpy
as
np
import
json
import
torch
from
dp_models.faceboxes.config
import
cfg
from
dp_models.faceboxes.layers.functions.prior_box
import
PriorBox
from
dp_models.faceboxes.utils.box_utils
import
decode
from
dp_models.faceboxes.headpose.pose
import
*
import
torch.nn.functional
as
F
def
mkdir_
(
path
,
flag_rm
=
False
):
if
os
.
path
.
exists
(
path
):
if
flag_rm
==
True
:
shutil
.
rmtree
(
path
)
os
.
mkdir
(
path
)
print
(
'remove {} done ~ '
.
format
(
path
))
else
:
os
.
mkdir
(
path
)
def
plot_box
(
bbox
,
img
,
color
=
None
,
label
=
None
,
line_thickness
=
None
):
tl
=
line_thickness
or
round
(
0.002
*
max
(
img
.
shape
[
0
:
2
]))
+
1
color
=
color
or
[
random
.
randint
(
0
,
255
)
for
_
in
range
(
3
)]
c1
,
c2
=
(
int
(
bbox
[
0
]),
int
(
bbox
[
1
])),
(
int
(
bbox
[
2
]),
int
(
bbox
[
3
]))
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
thickness
=
tl
)
# 目标的bbox
if
label
:
tf
=
max
(
tl
-
2
,
1
)
t_size
=
cv2
.
getTextSize
(
label
,
0
,
fontScale
=
tl
/
4
,
thickness
=
tf
)[
0
]
# label size
c2
=
c1
[
0
]
+
t_size
[
0
],
c1
[
1
]
-
t_size
[
1
]
-
3
# 字体的bbox
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
-
1
)
# label 矩形填充
# 文本绘制
cv2
.
putText
(
img
,
label
,
(
c1
[
0
],
c1
[
1
]
-
2
),
0
,
tl
/
4
,
[
225
,
255
,
255
],
thickness
=
tf
,
lineType
=
cv2
.
LINE_AA
)
class
JSON_Encoder
(
json
.
JSONEncoder
):
def
default
(
self
,
obj
):
if
isinstance
(
obj
,
np
.
integer
):
return
int
(
obj
)
elif
isinstance
(
obj
,
np
.
floating
):
return
float
(
obj
)
elif
isinstance
(
obj
,
np
.
ndarray
):
return
obj
.
tolist
()
else
:
return
super
(
JSON_Encoder
,
self
).
default
(
obj
)
def
draw_landmarks
(
img
,
output
,
r_bboxes
,
draw_circle
):
img_width
=
img
.
shape
[
1
]
img_height
=
img
.
shape
[
0
]
dict_landmarks
=
{}
global_dict_landmarks
=
{}
# 全局坐标系坐标
faceswap_list
=
[]
face_pts
=
[]
for
i
in
range
(
int
(
output
.
shape
[
0
]
/
2
)):
x
=
output
[
i
*
2
+
0
]
*
float
(
img_width
)
y
=
output
[
i
*
2
+
1
]
*
float
(
img_height
)
face_pts
.
append
([
x
+
r_bboxes
[
0
],
y
+
r_bboxes
[
1
]])
if
i
==
33
or
i
==
46
or
i
==
96
or
i
==
97
or
i
==
54
or
i
==
76
or
i
==
82
:
faceswap_list
.
append
((
x
+
r_bboxes
[
0
],
y
+
r_bboxes
[
1
]))
# cv2.circle(img, (int(x),int(y)), 8, (0,255,255),-1)
#
if
41
>=
i
>=
33
:
if
'left_eyebrow'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'left_eyebrow'
]
=
[]
global_dict_landmarks
[
'left_eyebrow'
]
=
[]
dict_landmarks
[
'left_eyebrow'
].
append
([
int
(
x
),
int
(
y
),(
0
,
255
,
0
)])
global_dict_landmarks
[
'left_eyebrow'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
255
,
0
),
-
1
)
elif
50
>=
i
>=
42
:
if
'right_eyebrow'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'right_eyebrow'
]
=
[]
global_dict_landmarks
[
'right_eyebrow'
]
=
[]
dict_landmarks
[
'right_eyebrow'
].
append
([
int
(
x
),
int
(
y
),(
0
,
255
,
0
)])
global_dict_landmarks
[
'right_eyebrow'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
255
,
0
),
-
1
)
elif
67
>=
i
>=
60
:
if
'left_eye'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'left_eye'
]
=
[]
global_dict_landmarks
[
'left_eye'
]
=
[]
dict_landmarks
[
'left_eye'
].
append
([
int
(
x
),
int
(
y
),(
255
,
55
,
255
)])
global_dict_landmarks
[
'left_eye'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
0
,
255
),
-
1
)
elif
75
>=
i
>=
68
:
if
'right_eye'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'right_eye'
]
=
[]
global_dict_landmarks
[
'right_eye'
]
=
[]
dict_landmarks
[
'right_eye'
].
append
([
int
(
x
),
int
(
y
),(
255
,
55
,
255
)])
global_dict_landmarks
[
'right_eye'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
0
,
255
),
-
1
)
elif
97
>=
i
>=
96
:
if
'eye_center'
not
in
dict_landmarks
.
keys
():
global_dict_landmarks
[
'eye_center'
]
=
[]
global_dict_landmarks
[
'eye_center'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
0
,
255
),
-
1
)
elif
54
>=
i
>=
51
:
if
'bridge_nose'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'bridge_nose'
]
=
[]
global_dict_landmarks
[
'bridge_nose'
]
=
[]
dict_landmarks
[
'bridge_nose'
].
append
([
int
(
x
),
int
(
y
),(
0
,
170
,
255
)])
global_dict_landmarks
[
'bridge_nose'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
170
,
255
),
-
1
)
elif
32
>=
i
>=
0
:
if
'basin'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'basin'
]
=
[]
global_dict_landmarks
[
'basin'
]
=
[]
dict_landmarks
[
'basin'
].
append
([
int
(
x
),
int
(
y
),(
255
,
30
,
30
)])
global_dict_landmarks
[
'basin'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
30
,
30
),
-
1
)
elif
59
>=
i
>=
55
:
if
'wing_nose'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'wing_nose'
]
=
[]
global_dict_landmarks
[
'wing_nose'
]
=
[]
dict_landmarks
[
'wing_nose'
].
append
([
int
(
x
),
int
(
y
),(
0
,
255
,
255
)])
global_dict_landmarks
[
'wing_nose'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
255
,
255
),
-
1
)
elif
87
>=
i
>=
76
:
if
'out_lip'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'out_lip'
]
=
[]
global_dict_landmarks
[
'out_lip'
]
=
[]
dict_landmarks
[
'out_lip'
].
append
([
int
(
x
),
int
(
y
),(
255
,
255
,
0
)])
global_dict_landmarks
[
'out_lip'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
255
,
0
),
-
1
)
elif
95
>=
i
>=
88
:
if
'in_lip'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'in_lip'
]
=
[]
global_dict_landmarks
[
'in_lip'
]
=
[]
dict_landmarks
[
'in_lip'
].
append
([
int
(
x
),
int
(
y
),(
50
,
220
,
255
)])
global_dict_landmarks
[
'in_lip'
].
append
([
int
(
x
+
r_bboxes
[
0
]),
int
(
y
+
r_bboxes
[
1
])])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
50
,
220
,
255
),
-
1
)
# else:
# if draw_circle:
# cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
faceswap_list_e
=
[]
for
i
in
range
(
5
):
faceswap_list_e
.
append
(
faceswap_list
[
i
][
0
])
for
i
in
range
(
5
):
faceswap_list_e
.
append
(
faceswap_list
[
i
][
1
])
return
dict_landmarks
,
faceswap_list_e
,
global_dict_landmarks
,
face_pts
def
draw_contour
(
image
,
dict
,
r_bbox
,
face_pts
):
x0
=
r_bbox
[
0
]
# 全图偏置
y0
=
r_bbox
[
1
]
#------------------------------------------
face_ola_pts
=
[]
face_ola_pts
.
append
(
face_pts
[
33
])
face_ola_pts
.
append
(
face_pts
[
38
])
face_ola_pts
.
append
(
face_pts
[
50
])
face_ola_pts
.
append
(
face_pts
[
46
])
face_ola_pts
.
append
(
face_pts
[
60
])
face_ola_pts
.
append
(
face_pts
[
64
])
face_ola_pts
.
append
(
face_pts
[
68
])
face_ola_pts
.
append
(
face_pts
[
72
])
face_ola_pts
.
append
(
face_pts
[
51
])
face_ola_pts
.
append
(
face_pts
[
55
])
face_ola_pts
.
append
(
face_pts
[
59
])
face_ola_pts
.
append
(
face_pts
[
53
])
face_ola_pts
.
append
(
face_pts
[
57
])
pts_num
=
len
(
face_ola_pts
)
reprojectdst
,
euler_angle
=
get_head_pose
(
np
.
array
(
face_ola_pts
).
reshape
((
pts_num
,
2
)),
image
,
vis
=
False
)
pitch
,
yaw
,
roll
=
euler_angle
for
key
in
dict
.
keys
():
# print(key)
_
,
_
,
color
=
dict
[
key
][
0
]
if
'left_eye'
==
key
:
eye_x
=
np
.
mean
([
dict
[
key
][
i
][
0
]
+
x0
for
i
in
range
(
len
(
dict
[
key
]))])
eye_y
=
np
.
mean
([
dict
[
key
][
i
][
1
]
+
y0
for
i
in
range
(
len
(
dict
[
key
]))])
cv2
.
circle
(
image
,
(
int
(
eye_x
),
int
(
eye_y
)),
3
,
(
255
,
255
,
55
),
-
1
)
if
'right_eye'
==
key
:
eye_x
=
np
.
mean
([
dict
[
key
][
i
][
0
]
+
x0
for
i
in
range
(
len
(
dict
[
key
]))])
eye_y
=
np
.
mean
([
dict
[
key
][
i
][
1
]
+
y0
for
i
in
range
(
len
(
dict
[
key
]))])
cv2
.
circle
(
image
,
(
int
(
eye_x
),
int
(
eye_y
)),
3
,
(
255
,
215
,
25
),
-
1
)
if
'basin'
==
key
or
'wing_nose'
==
key
:
pts
=
np
.
array
([[
dict
[
key
][
i
][
0
]
+
x0
,
dict
[
key
][
i
][
1
]
+
y0
]
for
i
in
range
(
len
(
dict
[
key
]))],
np
.
int32
)
# print(pts)
cv2
.
polylines
(
image
,[
pts
],
False
,
color
,
thickness
=
2
)
else
:
points_array
=
np
.
zeros
((
1
,
len
(
dict
[
key
]),
2
),
dtype
=
np
.
int32
)
for
i
in
range
(
len
(
dict
[
key
])):
x
,
y
,
_
=
dict
[
key
][
i
]
points_array
[
0
,
i
,
0
]
=
x
+
x0
points_array
[
0
,
i
,
1
]
=
y
+
y0
# cv2.fillPoly(image, points_array, color)
cv2
.
drawContours
(
image
,
points_array
,
-
1
,
color
,
thickness
=
2
)
return
(
pitch
,
yaw
,
roll
)
import
random
rgbs
=
[]
for
j
in
range
(
100
):
rgb
=
(
random
.
randint
(
0
,
255
),
random
.
randint
(
0
,
255
),
random
.
randint
(
0
,
255
))
rgbs
.
append
(
rgb
)
def
draw_global_contour
(
image
,
dict
):
x0
,
y0
=
0
,
0
idx
=
0
for
key
in
dict
.
keys
():
idx
+=
1
# print(key)
# _,_ = dict[key][0]
if
'left_eye'
==
key
:
eye_x
=
np
.
mean
([
dict
[
key
][
i
][
0
]
+
x0
for
i
in
range
(
len
(
dict
[
key
]))])
eye_y
=
np
.
mean
([
dict
[
key
][
i
][
1
]
+
y0
for
i
in
range
(
len
(
dict
[
key
]))])
cv2
.
circle
(
image
,
(
int
(
eye_x
),
int
(
eye_y
)),
3
,
(
255
,
255
,
55
),
-
1
)
if
'right_eye'
==
key
:
eye_x
=
np
.
mean
([
dict
[
key
][
i
][
0
]
+
x0
for
i
in
range
(
len
(
dict
[
key
]))])
eye_y
=
np
.
mean
([
dict
[
key
][
i
][
1
]
+
y0
for
i
in
range
(
len
(
dict
[
key
]))])
cv2
.
circle
(
image
,
(
int
(
eye_x
),
int
(
eye_y
)),
3
,
(
255
,
215
,
25
),
-
1
)
if
'basin'
==
key
or
'wing_nose'
==
key
:
pts
=
np
.
array
([[
dict
[
key
][
i
][
0
]
+
x0
,
dict
[
key
][
i
][
1
]
+
y0
]
for
i
in
range
(
len
(
dict
[
key
]))],
np
.
int32
)
# print(pts)
cv2
.
polylines
(
image
,[
pts
],
False
,
rgbs
[
idx
],
thickness
=
2
)
else
:
points_array
=
np
.
zeros
((
1
,
len
(
dict
[
key
]),
2
),
dtype
=
np
.
int32
)
for
i
in
range
(
len
(
dict
[
key
])):
x
,
y
=
dict
[
key
][
i
]
points_array
[
0
,
i
,
0
]
=
x
+
x0
points_array
[
0
,
i
,
1
]
=
y
+
y0
# cv2.fillPoly(image, points_array, color)
cv2
.
drawContours
(
image
,
points_array
,
-
1
,
rgbs
[
idx
],
thickness
=
2
)
def
refine_face_bbox
(
bbox
,
img_shape
):
height
,
width
,
_
=
img_shape
x1
,
y1
,
x2
,
y2
=
bbox
expand_w
=
(
x2
-
x1
)
expand_h
=
(
y2
-
y1
)
x1
-=
expand_w
*
0.06
y1
+=
expand_h
*
0.15
x2
+=
expand_w
*
0.06
y2
+=
expand_h
*
0.03
x1
,
y1
,
x2
,
y2
=
int
(
x1
),
int
(
y1
),
int
(
x2
),
int
(
y2
)
x1
=
int
(
max
(
0
,
x1
))
y1
=
int
(
max
(
0
,
y1
))
x2
=
int
(
min
(
x2
,
width
-
1
))
y2
=
int
(
min
(
y2
,
height
-
1
))
return
(
x1
,
y1
,
x2
,
y2
)
def
py_cpu_nms
(
dets
,
thresh
):
"""Pure Python NMS baseline."""
x1
=
dets
[:,
0
]
y1
=
dets
[:,
1
]
x2
=
dets
[:,
2
]
y2
=
dets
[:,
3
]
scores
=
dets
[:,
4
]
areas
=
(
x2
-
x1
+
1
)
*
(
y2
-
y1
+
1
)
order
=
scores
.
argsort
()[::
-
1
]
keep
=
[]
while
order
.
size
>
0
:
i
=
order
[
0
]
keep
.
append
(
i
)
xx1
=
np
.
maximum
(
x1
[
i
],
x1
[
order
[
1
:]])
yy1
=
np
.
maximum
(
y1
[
i
],
y1
[
order
[
1
:]])
xx2
=
np
.
minimum
(
x2
[
i
],
x2
[
order
[
1
:]])
yy2
=
np
.
minimum
(
y2
[
i
],
y2
[
order
[
1
:]])
w
=
np
.
maximum
(
0.0
,
xx2
-
xx1
+
1
)
h
=
np
.
maximum
(
0.0
,
yy2
-
yy1
+
1
)
inter
=
w
*
h
ovr
=
inter
/
(
areas
[
i
]
+
areas
[
order
[
1
:]]
-
inter
)
inds
=
np
.
where
(
ovr
<=
thresh
)[
0
]
order
=
order
[
inds
+
1
]
return
keep
def
check_keys
(
model
,
pretrained_state_dict
):
ckpt_keys
=
set
(
pretrained_state_dict
.
keys
())
model_keys
=
set
(
model
.
state_dict
().
keys
())
used_pretrained_keys
=
model_keys
&
ckpt_keys
unused_pretrained_keys
=
ckpt_keys
-
model_keys
missing_keys
=
model_keys
-
ckpt_keys
# print('Missing keys:{}'.format(len(missing_keys)))
# print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
# print('Used keys:{}'.format(len(used_pretrained_keys)))
assert
len
(
used_pretrained_keys
)
>
0
,
'load NONE from pretrained checkpoint'
return
True
def
remove_prefix
(
state_dict
,
prefix
):
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
# print('remove prefix \'{}\''.format(prefix))
f
=
lambda
x
:
x
.
split
(
prefix
,
1
)[
-
1
]
if
x
.
startswith
(
prefix
)
else
x
return
{
f
(
key
):
value
for
key
,
value
in
state_dict
.
items
()}
def
load_model
(
model
,
pretrained_path
,
load_to_cpu
):
# print('Loading pretrained model from {}'.format(pretrained_path))
if
load_to_cpu
:
pretrained_dict
=
torch
.
load
(
pretrained_path
,
map_location
=
lambda
storage
,
loc
:
storage
)
else
:
device
=
torch
.
cuda
.
current_device
()
pretrained_dict
=
torch
.
load
(
pretrained_path
,
map_location
=
lambda
storage
,
loc
:
storage
.
cuda
(
device
))
if
"state_dict"
in
pretrained_dict
.
keys
():
pretrained_dict
=
remove_prefix
(
pretrained_dict
[
'state_dict'
],
'module.'
)
else
:
pretrained_dict
=
remove_prefix
(
pretrained_dict
,
'module.'
)
check_keys
(
model
,
pretrained_dict
)
model
.
load_state_dict
(
pretrained_dict
,
strict
=
False
)
return
model
def
detect_faces
(
ops
,
detect_model
,
img_raw
,
device
):
resize
=
1
img
=
np
.
float32
(
img_raw
)
if
resize
!=
1
:
img
=
cv2
.
resize
(
img
,
None
,
None
,
fx
=
resize
,
fy
=
resize
,
interpolation
=
cv2
.
INTER_LINEAR
)
im_height
,
im_width
,
_
=
img
.
shape
scale
=
torch
.
Tensor
([
img
.
shape
[
1
],
img
.
shape
[
0
],
img
.
shape
[
1
],
img
.
shape
[
0
]])
img
-=
(
104
,
117
,
123
)
img
=
img
.
transpose
(
2
,
0
,
1
)
img
=
torch
.
from_numpy
(
img
).
unsqueeze
(
0
)
img
=
img
.
to
(
device
)
scale
=
scale
.
to
(
device
)
loc
,
conf
=
detect_model
(
img
)
# forward pass
priorbox
=
PriorBox
(
cfg
,
image_size
=
(
im_height
,
im_width
))
priors
=
priorbox
.
forward
()
priors
=
priors
.
to
(
device
)
prior_data
=
priors
.
data
boxes
=
decode
(
loc
.
data
.
squeeze
(
0
),
prior_data
,
cfg
[
'variance'
])
boxes
=
boxes
*
scale
/
resize
boxes
=
boxes
.
cpu
().
numpy
()
scores
=
conf
.
squeeze
(
0
).
data
.
cpu
().
numpy
()[:,
1
]
# ignore low scores
inds
=
np
.
where
(
scores
>
ops
.
confidence_threshold
)[
0
]
boxes
=
boxes
[
inds
]
scores
=
scores
[
inds
]
# keep top-K before NMS
order
=
scores
.
argsort
()[::
-
1
][:
ops
.
top_k
]
boxes
=
boxes
[
order
]
scores
=
scores
[
order
]
# do NMS
dets
=
np
.
hstack
((
boxes
,
scores
[:,
np
.
newaxis
])).
astype
(
np
.
float32
,
copy
=
False
)
#keep = py_cpu_nms(dets, ops.nms_threshold)
# keep = nms(dets, ops.nms_threshold,force_cpu=True)
keep
=
py_cpu_nms
(
dets
,
ops
.
nms_threshold
)
dets
=
dets
[
keep
,
:]
# keep top-K faster NMS
dets
=
dets
[:
ops
.
keep_top_k
,
:]
return
dets
def
get_faces_batch_landmarks
(
ops
,
landmarks_model
,
express_model
,
dets
,
img_raw
,
use_cuda
,
draw_bbox
=
True
):
# 绘制图像
image_batch
=
None
r_bboxes
=
[]
imgs_crop
=
[]
for
b
in
dets
:
text
=
"{:.4f}"
.
format
(
b
[
4
])
b
=
list
(
map
(
int
,
b
))
r_bbox
=
refine_face_bbox
((
b
[
0
],
b
[
1
],
b
[
2
],
b
[
3
]),
img_raw
.
shape
)
r_bboxes
.
append
(
r_bbox
)
img_crop
=
img_raw
[
r_bbox
[
1
]:
r_bbox
[
3
],
r_bbox
[
0
]:
r_bbox
[
2
]]
imgs_crop
.
append
(
img_crop
)
img_
=
cv2
.
resize
(
img_crop
,
(
256
,
256
),
interpolation
=
cv2
.
INTER_LINEAR
)
# INTER_LINEAR INTER_CUBIC
img_
=
img_
.
astype
(
np
.
float32
)
img_
=
(
img_
-
128.
)
/
256.
img_
=
img_
.
transpose
(
2
,
0
,
1
)
img_
=
np
.
expand_dims
(
img_
,
0
)
if
image_batch
is
None
:
image_batch
=
img_
else
:
image_batch
=
np
.
concatenate
((
image_batch
,
img_
),
axis
=
0
)
for
b
in
dets
:
text
=
"{:.4f}"
.
format
(
b
[
4
])
b
=
list
(
map
(
int
,
b
))
if
draw_bbox
:
cv2
.
rectangle
(
img_raw
,
(
b
[
0
],
b
[
1
]),
(
b
[
2
],
b
[
3
]),
(
0
,
0
,
255
),
2
)
cx
=
b
[
0
]
cy
=
b
[
1
]
-
3
if
draw_bbox
:
cv2
.
putText
(
img_raw
,
text
,
(
cx
,
cy
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.6
,
(
155
,
155
,
255
),
3
)
cv2
.
putText
(
img_raw
,
text
,
(
cx
,
cy
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.6
,
(
155
,
10
,
10
),
1
)
# 填充最大 关键点 批次数据
# if len(dets) < 5:
# im_mask = np.zeros([1,3,ops.landmarks_img_size[0],ops.landmarks_img_size[1]], dtype = np.float32)
# for i in range(ops.max_batch_size-len(dets)):
# if image_batch is None:
# image_batch = im_mask
# else:
# image_batch = np.concatenate((image_batch,im_mask),axis=0)
image_batch
=
torch
.
from_numpy
(
image_batch
).
float
()
if
use_cuda
:
image_batch
=
image_batch
.
cuda
()
# (bs, 3, h, w)
#----------------- express
pre_e
=
express_model
(
image_batch
.
float
())
outputs_e
=
F
.
softmax
(
pre_e
,
dim
=
1
)
# print("outputs_e size : ",outputs_e.size())
outputs_e
=
outputs_e
.
cpu
().
detach
().
numpy
()
outputs_e
=
np
.
array
(
outputs_e
)
#
max_index_e
=
np
.
argmax
(
outputs_e
,
axis
=
1
)
# print("max_index_e shape :",max_index_e.shape)
# print("max_index_e:",max_index_e)
# print("outputs_e .shape:",outputs_e.shape)
express_dict
=
{
0
:
"001.anger"
,
1
:
"002.disgust"
,
2
:
"003.fear"
,
3
:
"004.happy"
,
4
:
"005.normal"
,
5
:
"006.sad"
,
6
:
"007.surprised"
,
}
express_list
=
[]
for
kk
in
range
(
max_index_e
.
shape
[
0
]):
max_index_
=
max_index_e
[
kk
]
score_
=
outputs_e
[
kk
][
max_index_
]
express_list
.
append
((
max_index_
,
express_dict
[
max_index_
],
score_
))
# print("max_index : {}, score : {:.3f}, express : {}".format(max_index_,score_,express_dict[max_index_]))
# score_e = outputs_e[max_index_e]
# print("score_e : ",score_e)
#----------------- landmarks
pre_
=
landmarks_model
(
image_batch
.
float
())
# print(pre_.size())
output
=
pre_
.
cpu
().
detach
().
numpy
()
# print('output shape : ',output.shape)
# n_array = np.zeros([ops.landmarks_img_size[0],ops.landmarks_img_size[1],3], dtype = np.float)
faceswap_landmarks
=
[]
output_dict_
=
[]
for
i
in
range
(
len
(
dets
)):
dict_landmarks
,
list_e
,
global_dict_landmarks
,
face_pts
=
draw_landmarks
(
imgs_crop
[
i
],
output
[
i
],
r_bboxes
[
i
],
draw_circle
=
False
)
faceswap_landmarks
.
append
(
list_e
)
pitch
,
yaw
,
roll
=
draw_contour
(
img_raw
,
dict_landmarks
,
r_bboxes
[
i
],
face_pts
)
output_dict_
.
append
({
"xyxy"
:(
r_bboxes
[
i
][
0
],
r_bboxes
[
i
][
1
],
r_bboxes
[
i
][
2
],
r_bboxes
[
i
][
3
]),
"score"
:
str
(
dets
[
i
][
4
]),
"landmarks"
:
global_dict_landmarks
,
"euler_angle"
:(
int
(
pitch
[
0
]),
int
(
yaw
[
0
]),
int
(
roll
[
0
])),
"express"
:(
float
(
express_list
[
i
][
0
]),
float
(
express_list
[
i
][
2
])),
})
# print('dets :',dets)
#-----------------------------------------------------------------------------------
for
i
in
range
(
len
(
dets
)):
bbox
=
dets
[
i
]
min_x
=
int
(
bbox
[
0
])
min_y
=
int
(
bbox
[
1
])
max_x
=
int
(
bbox
[
2
])
max_y
=
int
(
bbox
[
3
])
cv2
.
rectangle
(
img_raw
,
(
min_x
,
min_y
),
(
max_x
,
max_y
),
(
255
,
0
,
255
),
thickness
=
4
)
for
k
in
range
(
5
):
x
=
int
(
faceswap_landmarks
[
i
][
k
+
0
])
y
=
int
(
faceswap_landmarks
[
i
][
k
+
5
])
# cv2.circle(img_raw,(x,y),5+k*2,(0,0,255),-1)
if
draw_bbox
:
cv2
.
circle
(
img_raw
,(
x
,
y
),
2
,(
0
,
0
,
255
),
-
1
)
if
draw_bbox
:
cv2
.
putText
(
img_raw
,
"express:{},{:.2f}"
.
format
(
express_list
[
i
][
1
],
express_list
[
i
][
2
]),
(
min_x
,
min_y
-
20
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.6
,
(
155
,
155
,
255
),
3
)
cv2
.
putText
(
img_raw
,
"express:{},{:.2f}"
.
format
(
express_list
[
i
][
1
],
express_list
[
i
][
2
]),
(
min_x
,
min_y
-
20
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.6
,
(
155
,
10
,
10
),
1
)
if
draw_bbox
:
cv2
.
putText
(
img_raw
,
'face:'
+
str
(
len
(
dets
)),
(
3
,
35
),
cv2
.
FONT_HERSHEY_DUPLEX
,
1.45
,
(
55
,
255
,
255
),
5
)
cv2
.
putText
(
img_raw
,
'face:'
+
str
(
len
(
dets
)),
(
3
,
35
),
cv2
.
FONT_HERSHEY_DUPLEX
,
1.45
,
(
135
,
135
,
5
),
2
)
return
output_dict_
def
get_faces_batch_landmarks_plfd
(
ops
,
landmarks_model
,
express_model
,
dets
,
img_raw
,
use_cuda
,
draw_bbox
=
True
):
# 绘制图像
image_batch
=
None
r_bboxes
=
[]
imgs_crop
=
[]
for
b
in
dets
:
text
=
"{:.4f}"
.
format
(
b
[
4
])
b
=
list
(
map
(
int
,
b
))
r_bbox
=
refine_face_bbox
((
b
[
0
],
b
[
1
],
b
[
2
],
b
[
3
]),
img_raw
.
shape
)
r_bboxes
.
append
(
r_bbox
)
img_crop
=
img_raw
[
r_bbox
[
1
]:
r_bbox
[
3
],
r_bbox
[
0
]:
r_bbox
[
2
]]
imgs_crop
.
append
(
img_crop
)
img_
=
cv2
.
resize
(
img_crop
,
(
112
,
112
),
interpolation
=
cv2
.
INTER_LINEAR
)
# INTER_LINEAR INTER_CUBIC
img_
=
img_
.
astype
(
np
.
float32
)
img_
=
img_
/
256.
img_
=
img_
.
transpose
(
2
,
0
,
1
)
img_
=
np
.
expand_dims
(
img_
,
0
)
if
image_batch
is
None
:
image_batch
=
img_
else
:
image_batch
=
np
.
concatenate
((
image_batch
,
img_
),
axis
=
0
)
for
b
in
dets
:
text
=
"{:.4f}"
.
format
(
b
[
4
])
b
=
list
(
map
(
int
,
b
))
if
draw_bbox
:
cv2
.
rectangle
(
img_raw
,
(
b
[
0
],
b
[
1
]),
(
b
[
2
],
b
[
3
]),
(
0
,
0
,
255
),
2
)
cx
=
b
[
0
]
cy
=
b
[
1
]
-
3
if
draw_bbox
:
cv2
.
putText
(
img_raw
,
text
,
(
cx
,
cy
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.6
,
(
155
,
155
,
255
),
3
)
cv2
.
putText
(
img_raw
,
text
,
(
cx
,
cy
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.6
,
(
155
,
10
,
10
),
1
)
# 填充最大 关键点 批次数据
# if len(dets) < 5:
# im_mask = np.zeros([1,3,ops.landmarks_img_size[0],ops.landmarks_img_size[1]], dtype = np.float32)
# for i in range(ops.max_batch_size-len(dets)):
# if image_batch is None:
# image_batch = im_mask
# else:
# image_batch = np.concatenate((image_batch,im_mask),axis=0)
image_batch
=
torch
.
from_numpy
(
image_batch
).
float
()
if
use_cuda
:
image_batch
=
image_batch
.
cuda
()
# (bs, 3, h, w)
#----------------- express
pre_e
=
express_model
(
image_batch
.
float
())
outputs_e
=
F
.
softmax
(
pre_e
,
dim
=
1
)
# print("outputs_e size : ",outputs_e.size())
outputs_e
=
outputs_e
.
cpu
().
detach
().
numpy
()
outputs_e
=
np
.
array
(
outputs_e
)
#
max_index_e
=
np
.
argmax
(
outputs_e
,
axis
=
1
)
# print("max_index_e shape :",max_index_e.shape)
# print("max_index_e:",max_index_e)
# print("outputs_e .shape:",outputs_e.shape)
express_dict
=
{
0
:
"001.anger"
,
1
:
"002.disgust"
,
2
:
"003.fear"
,
3
:
"004.happy"
,
4
:
"005.normal"
,
5
:
"006.sad"
,
6
:
"007.surprised"
,
}
express_list
=
[]
for
kk
in
range
(
max_index_e
.
shape
[
0
]):
max_index_
=
max_index_e
[
kk
]
score_
=
outputs_e
[
kk
][
max_index_
]
express_list
.
append
((
max_index_
,
express_dict
[
max_index_
],
score_
))
# print("max_index : {}, score : {:.3f}, express : {}".format(max_index_,score_,express_dict[max_index_]))
# score_e = outputs_e[max_index_e]
# print("score_e : ",score_e)
#-----------------------------------------
_
,
pre_
=
landmarks_model
(
image_batch
.
float
())
# print("pre_ : ",pre_)
# print(pre_.size())
output
=
pre_
.
cpu
().
detach
().
numpy
()
# print('output shape : ',output.shape)
# n_array = np.zeros([ops.landmarks_img_size[0],ops.landmarks_img_size[1],3], dtype = np.float)
faceswap_landmarks
=
[]
output_dict_
=
[]
for
i
in
range
(
len
(
dets
)):
dict_landmarks
,
list_e
,
global_dict_landmarks
,
face_pts
=
draw_landmarks
(
imgs_crop
[
i
],
output
[
i
],
r_bboxes
[
i
],
draw_circle
=
False
)
faceswap_landmarks
.
append
(
list_e
)
pitch
,
yaw
,
roll
=
draw_contour
(
img_raw
,
dict_landmarks
,
r_bboxes
[
i
],
face_pts
)
output_dict_
.
append
({
"xyxy"
:(
r_bboxes
[
i
][
0
],
r_bboxes
[
i
][
1
],
r_bboxes
[
i
][
2
],
r_bboxes
[
i
][
3
]),
"score"
:
str
(
dets
[
i
][
4
]),
"landmarks"
:
global_dict_landmarks
,
"euler_angle"
:(
int
(
pitch
[
0
]),
int
(
yaw
[
0
]),
int
(
roll
[
0
])),
"express"
:(
float
(
express_list
[
i
][
0
]),
float
(
express_list
[
i
][
2
])),
})
# print('dets :',dets)
#-----------------------------------------------------------------------------------
for
i
in
range
(
len
(
dets
)):
bbox
=
dets
[
i
]
min_x
=
int
(
bbox
[
0
])
min_y
=
int
(
bbox
[
1
])
max_x
=
int
(
bbox
[
2
])
max_y
=
int
(
bbox
[
3
])
cv2
.
rectangle
(
img_raw
,
(
min_x
,
min_y
),
(
max_x
,
max_y
),
(
255
,
0
,
255
),
thickness
=
2
)
for
k
in
range
(
5
):
x
=
int
(
faceswap_landmarks
[
i
][
k
+
0
])
y
=
int
(
faceswap_landmarks
[
i
][
k
+
5
])
# cv2.circle(img_raw,(x,y),5+k*2,(0,0,255),-1)
if
draw_bbox
:
cv2
.
circle
(
img_raw
,(
x
,
y
),
2
,(
0
,
0
,
255
),
-
1
)
if
draw_bbox
:
cv2
.
putText
(
img_raw
,
"express:{},{:.2f}"
.
format
(
express_list
[
i
][
1
],
express_list
[
i
][
2
]),
(
min_x
,
min_y
-
20
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.6
,
(
155
,
155
,
255
),
3
)
cv2
.
putText
(
img_raw
,
"express:{},{:.2f}"
.
format
(
express_list
[
i
][
1
],
express_list
[
i
][
2
]),
(
min_x
,
min_y
-
20
),
cv2
.
FONT_HERSHEY_DUPLEX
,
0.6
,
(
155
,
10
,
10
),
1
)
if
draw_bbox
:
cv2
.
putText
(
img_raw
,
'face:'
+
str
(
len
(
dets
)),
(
3
,
35
),
cv2
.
FONT_HERSHEY_DUPLEX
,
1.45
,
(
55
,
255
,
255
),
5
)
cv2
.
putText
(
img_raw
,
'face:'
+
str
(
len
(
dets
)),
(
3
,
35
),
cv2
.
FONT_HERSHEY_DUPLEX
,
1.45
,
(
135
,
135
,
5
),
2
)
return
output_dict_
components/face_detect/utils/datasets.py
已删除
100644 → 0
浏览文件 @
13e46365
import
glob
import
math
import
os
import
random
import
shutil
from
pathlib
import
Path
from
PIL
import
Image
from
tqdm
import
tqdm
import
cv2
import
numpy
as
np
import
torch
from
torch.utils.data
import
Dataset
from
torch.utils.data
import
DataLoader
def
xyxy2xywh
(
x
):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y
=
torch
.
zeros_like
(
x
)
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
zeros_like
(
x
)
y
[:,
0
]
=
(
x
[:,
0
]
+
x
[:,
2
])
/
2
y
[:,
1
]
=
(
x
[:,
1
]
+
x
[:,
3
])
/
2
y
[:,
2
]
=
x
[:,
2
]
-
x
[:,
0
]
y
[:,
3
]
=
x
[:,
3
]
-
x
[:,
1
]
return
y
def
xywh2xyxy
(
x
):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y
=
torch
.
zeros_like
(
x
)
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
zeros_like
(
x
)
y
[:,
0
]
=
x
[:,
0
]
-
x
[:,
2
]
/
2
y
[:,
1
]
=
x
[:,
1
]
-
x
[:,
3
]
/
2
y
[:,
2
]
=
x
[:,
0
]
+
x
[:,
2
]
/
2
y
[:,
3
]
=
x
[:,
1
]
+
x
[:,
3
]
/
2
return
y
class
LoadImages
:
# for inference
def
__init__
(
self
,
path
,
img_size
=
416
):
self
.
height
=
img_size
img_formats
=
[
'.jpg'
,
'.jpeg'
,
'.png'
,
'.tif'
]
vid_formats
=
[
'.mov'
,
'.avi'
,
'.mp4'
]
files
=
[]
if
os
.
path
.
isdir
(
path
):
files
=
sorted
(
glob
.
glob
(
'%s/*.*'
%
path
))
elif
os
.
path
.
isfile
(
path
):
files
=
[
path
]
images
=
[
x
for
x
in
files
if
os
.
path
.
splitext
(
x
)[
-
1
].
lower
()
in
img_formats
]
videos
=
[
x
for
x
in
files
if
os
.
path
.
splitext
(
x
)[
-
1
].
lower
()
in
vid_formats
]
nI
,
nV
=
len
(
images
),
len
(
videos
)
self
.
files
=
images
+
videos
self
.
nF
=
nI
+
nV
# number of files
self
.
video_flag
=
[
False
]
*
nI
+
[
True
]
*
nV
self
.
mode
=
'images'
if
any
(
videos
):
self
.
new_video
(
videos
[
0
])
# new video
else
:
self
.
cap
=
None
assert
self
.
nF
>
0
,
'No images or videos found in '
+
path
def
__iter__
(
self
):
self
.
count
=
0
return
self
def
__next__
(
self
):
if
self
.
count
==
self
.
nF
:
raise
StopIteration
path
=
self
.
files
[
self
.
count
]
if
self
.
video_flag
[
self
.
count
]:
# Read video
self
.
mode
=
'video'
ret_val
,
img0
=
self
.
cap
.
read
()
if
not
ret_val
:
self
.
count
+=
1
self
.
cap
.
release
()
if
self
.
count
==
self
.
nF
:
# last video
raise
StopIteration
else
:
path
=
self
.
files
[
self
.
count
]
self
.
new_video
(
path
)
ret_val
,
img0
=
self
.
cap
.
read
()
self
.
frame
+=
1
print
(
'video %g/%g (%g/%g) %s: '
%
(
self
.
count
+
1
,
self
.
nF
,
self
.
frame
,
self
.
nframes
,
path
),
end
=
''
)
else
:
# Read image
self
.
count
+=
1
img0
=
cv2
.
imread
(
path
)
# BGR
assert
img0
is
not
None
,
'File Not Found '
+
path
print
(
'image %g/%g %s: '
%
(
self
.
count
,
self
.
nF
,
path
),
end
=
''
)
# Padded resize
img
,
_
,
_
,
_
=
letterbox
(
img0
,
height
=
self
.
height
)
# Normalize RGB
img
=
img
[:,
:,
::
-
1
].
transpose
(
2
,
0
,
1
)
# BGR to RGB
img
=
np
.
ascontiguousarray
(
img
,
dtype
=
np
.
float32
)
# uint8 to float32
img
/=
255.0
# 0 - 255 to 0.0 - 1.0
# cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
return
path
,
img
,
img0
,
self
.
cap
def
new_video
(
self
,
path
):
self
.
frame
=
0
self
.
cap
=
cv2
.
VideoCapture
(
path
)
self
.
nframes
=
int
(
self
.
cap
.
get
(
cv2
.
CAP_PROP_FRAME_COUNT
))
def
__len__
(
self
):
return
self
.
nF
# number of files
class
LoadWebcam
:
# for inference
def
__init__
(
self
,
img_size
=
416
):
self
.
cam
=
cv2
.
VideoCapture
(
0
)
self
.
height
=
img_size
def
__iter__
(
self
):
self
.
count
=
-
1
return
self
def
__next__
(
self
):
self
.
count
+=
1
if
cv2
.
waitKey
(
1
)
==
27
:
# esc to quit
cv2
.
destroyAllWindows
()
raise
StopIteration
# Read image
ret_val
,
img0
=
self
.
cam
.
read
()
assert
ret_val
,
'Webcam Error'
img_path
=
'webcam_%g.jpg'
%
self
.
count
img0
=
cv2
.
flip
(
img0
,
1
)
# flip left-right
# Padded resize
img
,
_
,
_
,
_
=
letterbox
(
img0
,
height
=
self
.
height
)
# Normalize RGB
img
=
img
[:,
:,
::
-
1
].
transpose
(
2
,
0
,
1
)
# BGR to RGB
img
=
np
.
ascontiguousarray
(
img
,
dtype
=
np
.
float32
)
# uint8 to float32
img
/=
255.0
# 0 - 255 to 0.0 - 1.0
return
img_path
,
img
,
img0
,
self
.
cam
def
__len__
(
self
):
return
0
class
LoadImagesAndLabels
(
Dataset
):
# for training/testing
def
__init__
(
self
,
path
,
batch_size
,
img_size
=
416
,
augment
=
True
,
multi_scale
=
False
):
print
(
'LoadImagesAndLabels init : '
,
path
)
with
open
(
path
,
'r'
)
as
file
:
img_files
=
file
.
read
().
splitlines
()
img_files
=
list
(
filter
(
lambda
x
:
len
(
x
)
>
0
,
img_files
))
np
.
random
.
shuffle
(
img_files
)
# shuffle img_list
print
(
"shuffle image..."
)
self
.
img_files
=
img_files
assert
len
(
self
.
img_files
)
>
0
,
'No images found in %s'
%
path
self
.
img_size
=
img_size
self
.
batch_size
=
batch_size
self
.
multi_scale
=
multi_scale
self
.
augment
=
augment
self
.
scale_index
=
0
if
self
.
multi_scale
:
self
.
img_size
=
img_size
# initiate with maximum multi_scale size, in case of out of memory
print
(
"Multi scale images training, init img_size"
,
self
.
img_size
)
else
:
print
(
"Fixed scale images, img_size"
,
self
.
img_size
)
self
.
label_files
=
[
x
.
replace
(
'images'
,
'labels'
).
replace
(
"JPEGImages"
,
'labels'
).
replace
(
'.bmp'
,
'.txt'
).
replace
(
'.jpg'
,
'.txt'
).
replace
(
'.png'
,
'.txt'
)
for
x
in
self
.
img_files
]
# print('self.img_files : ',self.img_files[1])
# print('self.label_files : ',self.label_files[1])
def
__len__
(
self
):
return
len
(
self
.
img_files
)
def
__getitem__
(
self
,
index
):
# if self.multi_scale and (index % self.batch_size == 0) and index != 0:
if
self
.
multi_scale
and
(
self
.
scale_index
%
self
.
batch_size
==
0
)
and
self
.
scale_index
!=
0
:
self
.
img_size
=
random
.
choice
(
range
(
11
,
18
))
*
32
# print("++++++ change img_size, index:", self.img_size, index)
if
self
.
multi_scale
:
self
.
scale_index
+=
1
if
self
.
scale_index
>=
(
100
*
self
.
batch_size
):
self
.
scale_index
=
0
img_path
=
self
.
img_files
[
index
]
label_path
=
self
.
label_files
[
index
]
img
=
cv2
.
imread
(
img_path
)
# BGR
assert
img
is
not
None
,
'File Not Found '
+
img_path
augment_hsv
=
random
.
random
()
<
0.5
# hsv_aug prob = 0.5
if
self
.
augment
and
augment_hsv
:
# SV augmentation by 50%
fraction
=
0.50
# must be < 1.0
img_hsv
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2HSV
)
S
=
img_hsv
[:,
:,
1
].
astype
(
np
.
float32
)
V
=
img_hsv
[:,
:,
2
].
astype
(
np
.
float32
)
a
=
(
random
.
random
()
*
2
-
1
)
*
fraction
+
1
# a in [-0,5, 1.5]
S
*=
a
if
a
>
1
:
np
.
clip
(
S
,
None
,
255
,
out
=
S
)
a
=
(
random
.
random
()
*
2
-
1
)
*
fraction
+
1
V
*=
a
if
a
>
1
:
np
.
clip
(
V
,
None
,
255
,
out
=
V
)
img_hsv
[:,
:,
1
]
=
S
# .astype(np.uint8)
img_hsv
[:,
:,
2
]
=
V
# .astype(np.uint8)
cv2
.
cvtColor
(
img_hsv
,
cv2
.
COLOR_HSV2BGR
,
dst
=
img
)
h
,
w
,
_
=
img
.
shape
img
,
ratio
,
padw
,
padh
=
letterbox
(
img
,
height
=
self
.
img_size
,
augment
=
self
.
augment
)
# Load labels
labels
=
[]
if
os
.
path
.
isfile
(
label_path
):
with
open
(
label_path
,
'r'
)
as
file
:
lines
=
file
.
read
().
splitlines
()
x
=
np
.
array
([
x
.
split
()
for
x
in
lines
],
dtype
=
np
.
float32
)
if
x
.
size
>
0
:
# Normalized xywh to pixel xyxy format
labels
=
x
.
copy
()
labels
[:,
1
]
=
ratio
*
w
*
(
x
[:,
1
]
-
x
[:,
3
]
/
2
)
+
padw
labels
[:,
2
]
=
ratio
*
h
*
(
x
[:,
2
]
-
x
[:,
4
]
/
2
)
+
padh
labels
[:,
3
]
=
ratio
*
w
*
(
x
[:,
1
]
+
x
[:,
3
]
/
2
)
+
padw
labels
[:,
4
]
=
ratio
*
h
*
(
x
[:,
2
]
+
x
[:,
4
]
/
2
)
+
padh
# Augment image and labels
if
self
.
augment
:
img
,
labels
=
random_affine
(
img
,
labels
,
degrees
=
(
-
10
,
10
),
translate
=
(
0.10
,
0.10
),
scale
=
(
0.9
,
1.1
))
nL
=
len
(
labels
)
# number of labels
if
nL
:
# convert xyxy to xywh
labels
[:,
1
:
5
]
=
xyxy2xywh
(
labels
[:,
1
:
5
])
/
self
.
img_size
# 转化 格式 ,且 归一化
if
self
.
augment
:
# random left-right flip
lr_flip
=
True
if
lr_flip
and
random
.
random
()
>
0.5
:
img
=
np
.
fliplr
(
img
)
if
nL
:
labels
[:,
1
]
=
1
-
labels
[:,
1
]
# random up-down flip
ud_flip
=
False
if
ud_flip
and
random
.
random
()
>
0.5
:
img
=
np
.
flipud
(
img
)
if
nL
:
labels
[:,
2
]
=
1
-
labels
[:,
2
]
labels_out
=
torch
.
zeros
((
nL
,
6
))
# 加了 一个 batch size
if
nL
:
labels_out
[:,
1
:]
=
torch
.
from_numpy
(
labels
)
# Normalize
img
=
img
[:,
:,
::
-
1
].
transpose
(
2
,
0
,
1
)
# BGR to RGB, to 3x416x416
img
=
np
.
ascontiguousarray
(
img
,
dtype
=
np
.
float32
)
# uint8 to float32
img
/=
255.0
# 0 - 255 to 0.0 - 1.0
return
torch
.
from_numpy
(
img
),
labels_out
,
img_path
,
(
h
,
w
)
@
staticmethod
def
collate_fn
(
batch
):
img
,
label
,
path
,
hw
=
list
(
zip
(
*
batch
))
# transposed
for
i
,
l
in
enumerate
(
label
):
l
[:,
0
]
=
i
# 获取 物体的 归属于 图片 的 index
return
torch
.
stack
(
img
,
0
),
torch
.
cat
(
label
,
0
),
path
,
hw
def
letterbox
(
img
,
height
=
416
,
augment
=
False
,
color
=
(
127.5
,
127.5
,
127.5
)):
# Resize a rectangular image to a padded square
shape
=
img
.
shape
[:
2
]
# shape = [height, width]
ratio
=
float
(
height
)
/
max
(
shape
)
# ratio = old / new
new_shape
=
(
round
(
shape
[
1
]
*
ratio
),
round
(
shape
[
0
]
*
ratio
))
dw
=
(
height
-
new_shape
[
0
])
/
2
# width padding
dh
=
(
height
-
new_shape
[
1
])
/
2
# height padding
top
,
bottom
=
round
(
dh
-
0.1
),
round
(
dh
+
0.1
)
left
,
right
=
round
(
dw
-
0.1
),
round
(
dw
+
0.1
)
# resize img
if
augment
:
interpolation
=
np
.
random
.
choice
([
None
,
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
None
,
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
])
if
interpolation
is
None
:
img
=
cv2
.
resize
(
img
,
new_shape
)
else
:
img
=
cv2
.
resize
(
img
,
new_shape
,
interpolation
=
interpolation
)
else
:
img
=
cv2
.
resize
(
img
,
new_shape
,
interpolation
=
cv2
.
INTER_NEAREST
)
# print("resize time:",time.time()-s1)
img
=
cv2
.
copyMakeBorder
(
img
,
top
,
bottom
,
left
,
right
,
cv2
.
BORDER_CONSTANT
,
value
=
color
)
# padded square
return
img
,
ratio
,
dw
,
dh
def
random_affine
(
img
,
targets
=
(),
degrees
=
(
-
10
,
10
),
translate
=
(.
1
,
.
1
),
scale
=
(.
9
,
1.1
),
shear
=
(
-
2
,
2
),
borderValue
=
(
127.5
,
127.5
,
127.5
)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
if
targets
is
None
:
targets
=
[]
border
=
0
# width of added border (optional)
height
=
max
(
img
.
shape
[
0
],
img
.
shape
[
1
])
+
border
*
2
# Rotation and Scale
R
=
np
.
eye
(
3
)
a
=
random
.
random
()
*
(
degrees
[
1
]
-
degrees
[
0
])
+
degrees
[
0
]
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
s
=
random
.
random
()
*
(
scale
[
1
]
-
scale
[
0
])
+
scale
[
0
]
R
[:
2
]
=
cv2
.
getRotationMatrix2D
(
angle
=
a
,
center
=
(
img
.
shape
[
1
]
/
2
,
img
.
shape
[
0
]
/
2
),
scale
=
s
)
# Translation
T
=
np
.
eye
(
3
)
T
[
0
,
2
]
=
(
random
.
random
()
*
2
-
1
)
*
translate
[
0
]
*
img
.
shape
[
0
]
+
border
# x translation (pixels)
T
[
1
,
2
]
=
(
random
.
random
()
*
2
-
1
)
*
translate
[
1
]
*
img
.
shape
[
1
]
+
border
# y translation (pixels)
# Shear
S
=
np
.
eye
(
3
)
S
[
0
,
1
]
=
math
.
tan
((
random
.
random
()
*
(
shear
[
1
]
-
shear
[
0
])
+
shear
[
0
])
*
math
.
pi
/
180
)
# x shear (deg)
S
[
1
,
0
]
=
math
.
tan
((
random
.
random
()
*
(
shear
[
1
]
-
shear
[
0
])
+
shear
[
0
])
*
math
.
pi
/
180
)
# y shear (deg)
M
=
S
@
T
@
R
# Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw
=
cv2
.
warpPerspective
(
img
,
M
,
dsize
=
(
height
,
height
),
flags
=
cv2
.
INTER_LINEAR
,
borderValue
=
borderValue
)
# BGR order borderValue
# Return warped points also
if
len
(
targets
)
>
0
:
n
=
targets
.
shape
[
0
]
points
=
targets
[:,
1
:
5
].
copy
()
area0
=
(
points
[:,
2
]
-
points
[:,
0
])
*
(
points
[:,
3
]
-
points
[:,
1
])
# warp points
xy
=
np
.
ones
((
n
*
4
,
3
))
xy
[:,
:
2
]
=
points
[:,
[
0
,
1
,
2
,
3
,
0
,
3
,
2
,
1
]].
reshape
(
n
*
4
,
2
)
# x1y1, x2y2, x1y2, x2y1
xy
=
(
xy
@
M
.
T
)[:,
:
2
].
reshape
(
n
,
8
)
# create new boxes
x
=
xy
[:,
[
0
,
2
,
4
,
6
]]
y
=
xy
[:,
[
1
,
3
,
5
,
7
]]
xy
=
np
.
concatenate
((
x
.
min
(
1
),
y
.
min
(
1
),
x
.
max
(
1
),
y
.
max
(
1
))).
reshape
(
4
,
n
).
T
# apply angle-based reduction of bounding boxes
radians
=
a
*
math
.
pi
/
180
reduction
=
max
(
abs
(
math
.
sin
(
radians
)),
abs
(
math
.
cos
(
radians
)))
**
0.5
x
=
(
xy
[:,
2
]
+
xy
[:,
0
])
/
2
y
=
(
xy
[:,
3
]
+
xy
[:,
1
])
/
2
w
=
(
xy
[:,
2
]
-
xy
[:,
0
])
*
reduction
h
=
(
xy
[:,
3
]
-
xy
[:,
1
])
*
reduction
xy
=
np
.
concatenate
((
x
-
w
/
2
,
y
-
h
/
2
,
x
+
w
/
2
,
y
+
h
/
2
)).
reshape
(
4
,
n
).
T
# reject warped points outside of image
np
.
clip
(
xy
,
0
,
height
,
out
=
xy
)
w
=
xy
[:,
2
]
-
xy
[:,
0
]
h
=
xy
[:,
3
]
-
xy
[:,
1
]
area
=
w
*
h
ar
=
np
.
maximum
(
w
/
(
h
+
1e-16
),
h
/
(
w
+
1e-16
))
i
=
(
w
>
4
)
&
(
h
>
4
)
&
(
area
/
(
area0
+
1e-16
)
>
0.1
)
&
(
ar
<
10
)
targets
=
targets
[
i
]
targets
[:,
1
:
5
]
=
xy
[
i
]
return
imw
,
targets
def
convert_images2bmp
():
# cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
for
path
in
[
'../coco/images/val2014/'
,
'../coco/images/train2014/'
]:
folder
=
os
.
sep
+
Path
(
path
).
name
output
=
path
.
replace
(
folder
,
folder
+
'bmp'
)
if
os
.
path
.
exists
(
output
):
shutil
.
rmtree
(
output
)
# delete output folder
os
.
makedirs
(
output
)
# make new output folder
for
f
in
tqdm
(
glob
.
glob
(
'%s*.jpg'
%
path
)):
save_name
=
f
.
replace
(
'.jpg'
,
'.bmp'
).
replace
(
folder
,
folder
+
'bmp'
)
cv2
.
imwrite
(
save_name
,
cv2
.
imread
(
f
))
for
label_path
in
[
'../coco/trainvalno5k.txt'
,
'../coco/5k.txt'
]:
with
open
(
label_path
,
'r'
)
as
file
:
lines
=
file
.
read
()
lines
=
lines
.
replace
(
'2014/'
,
'2014bmp/'
).
replace
(
'.jpg'
,
'.bmp'
).
replace
(
'/Users/glennjocher/PycharmProjects/'
,
'../'
)
with
open
(
label_path
.
replace
(
'5k'
,
'5k_bmp'
),
'w'
)
as
file
:
file
.
write
(
lines
)
components/face_detect/utils/utils.py
已删除
100644 → 0
浏览文件 @
13e46365
import
glob
import
random
import
time
from
collections
import
defaultdict
import
cv2
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
dp_models.light_pose.modules.keypoints
import
BODY_PARTS_KPT_IDS
,
BODY_PARTS_PAF_IDS
# Set printoptions
torch
.
set_printoptions
(
linewidth
=
1320
,
precision
=
5
,
profile
=
'long'
)
np
.
set_printoptions
(
linewidth
=
320
,
formatter
=
{
'float_kind'
:
'{:11.5g}'
.
format
})
# format short g, %precision=5
# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
cv2
.
setNumThreads
(
0
)
def
float3
(
x
):
# format floats to 3 decimals
return
float
(
format
(
x
,
'.3f'
))
def
init_seeds
(
seed
=
0
):
random
.
seed
(
seed
)
np
.
random
.
seed
(
seed
)
torch
.
manual_seed
(
seed
)
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
manual_seed
(
seed
)
torch
.
cuda
.
manual_seed_all
(
seed
)
else
:
torch
.
manual_seed
(
seed
)
torch
.
manual_seed_all
(
seed
)
def
load_classes
(
path
):
# Loads class labels at 'path'
fp
=
open
(
path
,
'r'
)
names
=
fp
.
read
().
split
(
'
\n
'
)
return
list
(
filter
(
None
,
names
))
# filter removes empty strings (such as last line)
def
model_info
(
model
):
# Plots a line-by-line description of a PyTorch model
n_p
=
sum
(
x
.
numel
()
for
x
in
model
.
parameters
())
# number parameters
n_g
=
sum
(
x
.
numel
()
for
x
in
model
.
parameters
()
if
x
.
requires_grad
)
# number gradients
print
(
'
\n
%5s %60s %9s %12s %20s %10s %10s'
%
(
'layer'
,
'name'
,
'gradient'
,
'parameters'
,
'shape'
,
'mu'
,
'sigma'
))
for
i
,
(
name
,
p
)
in
enumerate
(
model
.
named_parameters
()):
# name = name.replace('module_list.', '')
print
(
'%5g %60s %9s %12g %20s %10.3g %10.3g'
%
(
i
,
name
,
p
.
requires_grad
,
p
.
numel
(),
list
(
p
.
shape
),
p
.
mean
(),
p
.
std
()))
print
(
'Model Summary: %g layers, %g parameters, %g gradients'
%
(
i
+
1
,
n_p
,
n_g
))
def
weights_init_normal
(
m
):
classname
=
m
.
__class__
.
__name__
if
classname
.
find
(
'Conv'
)
!=
-
1
:
torch
.
nn
.
init
.
normal_
(
m
.
weight
.
data
,
0.0
,
0.03
)
elif
classname
.
find
(
'BatchNorm2d'
)
!=
-
1
:
torch
.
nn
.
init
.
normal_
(
m
.
weight
.
data
,
1.0
,
0.03
)
torch
.
nn
.
init
.
constant_
(
m
.
bias
.
data
,
0.0
)
def
xyxy2xywh
(
x
):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y
=
torch
.
zeros_like
(
x
)
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
zeros_like
(
x
)
y
[:,
0
]
=
(
x
[:,
0
]
+
x
[:,
2
])
/
2
y
[:,
1
]
=
(
x
[:,
1
]
+
x
[:,
3
])
/
2
y
[:,
2
]
=
x
[:,
2
]
-
x
[:,
0
]
y
[:,
3
]
=
x
[:,
3
]
-
x
[:,
1
]
return
y
def
xywh2xyxy
(
x
):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y
=
torch
.
zeros_like
(
x
)
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
zeros_like
(
x
)
y
[:,
0
]
=
x
[:,
0
]
-
x
[:,
2
]
/
2
y
[:,
1
]
=
x
[:,
1
]
-
x
[:,
3
]
/
2
y
[:,
2
]
=
x
[:,
0
]
+
x
[:,
2
]
/
2
y
[:,
3
]
=
x
[:,
1
]
+
x
[:,
3
]
/
2
return
y
def
scale_coords
(
img_size
,
coords
,
img0_shape
):
# image size 转为 原图尺寸
# Rescale x1, y1, x2, y2 from 416 to image size
# print('coords : ',coords)
# print('img0_shape : ',img0_shape)
gain
=
float
(
img_size
)
/
max
(
img0_shape
)
# gain = old / new
# print('gain : ',gain)
pad_x
=
(
img_size
-
img0_shape
[
1
]
*
gain
)
/
2
# width padding
pad_y
=
(
img_size
-
img0_shape
[
0
]
*
gain
)
/
2
# height padding
# print('pad_xpad_y : ',pad_x,pad_y)
coords
[:,
[
0
,
2
]]
-=
pad_x
coords
[:,
[
1
,
3
]]
-=
pad_y
coords
[:,
:
4
]
/=
gain
coords
[:,
:
4
]
=
torch
.
clamp
(
coords
[:,
:
4
],
min
=
0
)
# 夹紧区间最小值不为负数
return
coords
def
ap_per_class
(
tp
,
conf
,
pred_cls
,
target_cls
):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (list).
conf: Objectness value from 0-1 (list).
pred_cls: Predicted object classes (list).
target_cls: True object classes (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i
=
np
.
argsort
(
-
conf
)
tp
,
conf
,
pred_cls
=
tp
[
i
],
conf
[
i
],
pred_cls
[
i
]
# Find unique classes
unique_classes
=
np
.
unique
(
target_cls
)
# Create Precision-Recall curve and compute AP for each class
ap
,
p
,
r
=
[],
[],
[]
for
c
in
unique_classes
:
i
=
pred_cls
==
c
n_gt
=
(
target_cls
==
c
).
sum
()
# Number of ground truth objects
n_p
=
i
.
sum
()
# Number of predicted objects
if
n_p
==
0
and
n_gt
==
0
:
continue
elif
n_p
==
0
or
n_gt
==
0
:
ap
.
append
(
0
)
r
.
append
(
0
)
p
.
append
(
0
)
else
:
# Accumulate FPs and TPs
fpc
=
(
1
-
tp
[
i
]).
cumsum
()
tpc
=
(
tp
[
i
]).
cumsum
()
# Recall
recall_curve
=
tpc
/
(
n_gt
+
1e-16
)
r
.
append
(
recall_curve
[
-
1
])
# Precision
precision_curve
=
tpc
/
(
tpc
+
fpc
)
p
.
append
(
precision_curve
[
-
1
])
# AP from recall-precision curve
ap
.
append
(
compute_ap
(
recall_curve
,
precision_curve
))
# Plot
# plt.plot(recall_curve, precision_curve)
# Compute F1 score (harmonic mean of precision and recall)
p
,
r
,
ap
=
np
.
array
(
p
),
np
.
array
(
r
),
np
.
array
(
ap
)
f1
=
2
*
p
*
r
/
(
p
+
r
+
1e-16
)
return
p
,
r
,
ap
,
f1
,
unique_classes
.
astype
(
'int32'
)
def
compute_ap
(
recall
,
precision
):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec
=
np
.
concatenate
(([
0.
],
recall
,
[
1.
]))
mpre
=
np
.
concatenate
(([
0.
],
precision
,
[
0.
]))
# compute the precision envelope
for
i
in
range
(
mpre
.
size
-
1
,
0
,
-
1
):
mpre
[
i
-
1
]
=
np
.
maximum
(
mpre
[
i
-
1
],
mpre
[
i
])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i
=
np
.
where
(
mrec
[
1
:]
!=
mrec
[:
-
1
])[
0
]
# and sum (\Delta recall) * prec
ap
=
np
.
sum
((
mrec
[
i
+
1
]
-
mrec
[
i
])
*
mpre
[
i
+
1
])
return
ap
def
bbox_iou
(
box1
,
box2
,
x1y1x2y2
=
True
):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2
=
box2
.
t
()
# Get the coordinates of bounding boxes
if
x1y1x2y2
:
# x1, y1, x2, y2 = box1
b1_x1
,
b1_y1
,
b1_x2
,
b1_y2
=
box1
[
0
],
box1
[
1
],
box1
[
2
],
box1
[
3
]
b2_x1
,
b2_y1
,
b2_x2
,
b2_y2
=
box2
[
0
],
box2
[
1
],
box2
[
2
],
box2
[
3
]
else
:
# x, y, w, h = box1
b1_x1
,
b1_x2
=
box1
[
0
]
-
box1
[
2
]
/
2
,
box1
[
0
]
+
box1
[
2
]
/
2
b1_y1
,
b1_y2
=
box1
[
1
]
-
box1
[
3
]
/
2
,
box1
[
1
]
+
box1
[
3
]
/
2
b2_x1
,
b2_x2
=
box2
[
0
]
-
box2
[
2
]
/
2
,
box2
[
0
]
+
box2
[
2
]
/
2
b2_y1
,
b2_y2
=
box2
[
1
]
-
box2
[
3
]
/
2
,
box2
[
1
]
+
box2
[
3
]
/
2
# Intersection area
inter_area
=
(
torch
.
min
(
b1_x2
,
b2_x2
)
-
torch
.
max
(
b1_x1
,
b2_x1
)).
clamp
(
0
)
*
\
(
torch
.
min
(
b1_y2
,
b2_y2
)
-
torch
.
max
(
b1_y1
,
b2_y1
)).
clamp
(
0
)
# Union Area
union_area
=
((
b1_x2
-
b1_x1
)
*
(
b1_y2
-
b1_y1
)
+
1e-16
)
+
\
(
b2_x2
-
b2_x1
)
*
(
b2_y2
-
b2_y1
)
-
inter_area
return
inter_area
/
union_area
# iou
def
wh_iou
(
box1
,
box2
):
box2
=
box2
.
t
()
# w, h = box1
w1
,
h1
=
box1
[
0
],
box1
[
1
]
w2
,
h2
=
box2
[
0
],
box2
[
1
]
# Intersection area
inter_area
=
torch
.
min
(
w1
,
w2
)
*
torch
.
min
(
h1
,
h2
)
# Union Area
union_area
=
(
w1
*
h1
+
1e-16
)
+
w2
*
h2
-
inter_area
return
inter_area
/
union_area
# iou
def
compute_loss
(
p
,
targets
):
# predictions, targets
FT
=
torch
.
cuda
.
FloatTensor
if
p
[
0
].
is_cuda
else
torch
.
FloatTensor
lxy
,
lwh
,
lcls
,
lconf
=
FT
([
0
]),
FT
([
0
]),
FT
([
0
]),
FT
([
0
])
# losses 初始化 为 0
txy
,
twh
,
tcls
,
indices
=
targets
MSE
=
nn
.
MSELoss
()
CE
=
nn
.
CrossEntropyLoss
()
BCE
=
nn
.
BCEWithLogitsLoss
()
# 多标签分类时 使用 如 [1,1,0],
# Compute losses
for
i
,
pi0
in
enumerate
(
p
):
# layer i predictions, i
b
,
a
,
gj
,
gi
=
indices
[
i
]
# image_idx, anchor_idx, gridx, gridy
# print(i,') b, a, gj, gi : ')
# print('b', b)
# print('a', a)
# print('gj', gj)
# print('gi', gi)
tconf
=
torch
.
zeros_like
(
pi0
[...,
0
])
# conf
# print('tconf: ',tconf.size())
# Compute losses
k
=
1
# nT / bs
if
len
(
b
)
>
0
:
pi
=
pi0
[
b
,
a
,
gj
,
gi
]
# predictions closest to anchors
tconf
[
b
,
a
,
gj
,
gi
]
=
1
# conf
lxy
+=
(
k
*
8
)
*
MSE
(
torch
.
sigmoid
(
pi
[...,
0
:
2
]),
txy
[
i
])
# xy loss
lwh
+=
(
k
*
4
)
*
MSE
(
pi
[...,
2
:
4
],
twh
[
i
])
# wh loss
lcls
+=
(
k
*
1
)
*
CE
(
pi
[...,
5
:],
tcls
[
i
])
# class_conf loss
lconf
+=
(
k
*
64
)
*
BCE
(
pi0
[...,
4
],
tconf
)
# obj_conf loss
loss
=
lxy
+
lwh
+
lconf
+
lcls
# Add to dictionary
d
=
defaultdict
(
float
)
losses
=
[
loss
.
item
(),
lxy
.
item
(),
lwh
.
item
(),
lconf
.
item
(),
lcls
.
item
()]
for
name
,
x
in
zip
([
'total'
,
'xy'
,
'wh'
,
'conf'
,
'cls'
],
losses
):
d
[
name
]
=
x
return
loss
,
d
def
build_targets
(
model
,
targets
):
# targets = [image, class, x, y, w, h]
if
isinstance
(
model
,
nn
.
parallel
.
DistributedDataParallel
):
model
=
model
.
module
txy
,
twh
,
tcls
,
indices
=
[],
[],
[],
[]
for
i
,
layer
in
enumerate
(
get_yolo_layers
(
model
)):
# 遍历 3 个 yolo layer
# print(i,'layer ',model.module_list[layer])
layer
=
model
.
module_list
[
layer
][
0
]
# iou of targets-anchors
gwh
=
targets
[:,
4
:
6
]
*
layer
.
nG
# 以 grid 为单位的 wh
iou
=
[
wh_iou
(
x
,
gwh
)
for
x
in
layer
.
anchor_vec
]
iou
,
a
=
torch
.
stack
(
iou
,
0
).
max
(
0
)
# best iou and anchor
# reject below threshold ious (OPTIONAL, increases P, lowers R)
reject
=
True
if
reject
:
j
=
iou
>
0.10
t
,
a
,
gwh
=
targets
[
j
],
a
[
j
],
gwh
[
j
]
else
:
t
=
targets
# Indices
b
,
c
=
t
[:,
:
2
].
long
().
t
()
# target image, class
gxy
=
t
[:,
2
:
4
]
*
layer
.
nG
gi
,
gj
=
gxy
.
long
().
t
()
# grid_i, grid_j
indices
.
append
((
b
,
a
,
gj
,
gi
))
# img_index , anchor_index , grid_x , grid_y
# print('b, a, gj, gi : ')
# print('b', b)
# print('a', a)
# print('gj', gj)
# print('gi', gi)
# print('class c',c)
# XY coordinates
txy
.
append
(
gxy
-
gxy
.
floor
())
#转化为grid相对坐标
# Width and height
twh
.
append
(
torch
.
log
(
gwh
/
layer
.
anchor_vec
[
a
]))
# yolo method 对数
# twh.append(torch.sqrt(gwh / layer.anchor_vec[a]) / 2) # power method
# Class
tcls
.
append
(
c
)
# try:
# print('c.max,layer.nC: ',c.max().item() ,layer.nC)
# except:
# pass
if
c
.
shape
[
0
]:
assert
c
.
max
().
item
()
<=
layer
.
nC
,
'Target classes exceed model classes'
return
txy
,
twh
,
tcls
,
indices
# @profile
def
non_max_suppression
(
prediction
,
conf_thres
=
0.5
,
nms_thres
=
0.4
):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_conf, class)
"""
min_wh
=
2
# (pixels) minimum box width and height
output
=
[
None
]
*
len
(
prediction
)
for
image_i
,
pred
in
enumerate
(
prediction
):
# Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area
# ar = w / (h + 1e-16) # aspect ratio
# n = len(w)
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Filter out confidence scores below threshold
class_conf
,
class_pred
=
pred
[:,
5
:].
max
(
1
)
# max class_conf, index
pred
[:,
4
]
*=
class_conf
# finall conf = obj_conf * class_conf
i
=
(
pred
[:,
4
]
>
conf_thres
)
&
(
pred
[:,
2
]
>
min_wh
)
&
(
pred
[:,
3
]
>
min_wh
)
# s2=time.time()
pred2
=
pred
[
i
]
# print("++++++pred2 = pred[i]",time.time()-s2, pred2)
# If none are remaining => process next image
if
len
(
pred2
)
==
0
:
continue
# Select predicted classes
class_conf
=
class_conf
[
i
]
class_pred
=
class_pred
[
i
].
unsqueeze
(
1
).
float
()
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
pred2
[:,
:
4
]
=
xywh2xyxy
(
pred2
[:,
:
4
])
# pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551
# Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
pred2
=
torch
.
cat
((
pred2
[:,
:
5
],
class_conf
.
unsqueeze
(
1
),
class_pred
),
1
)
# Get detections sorted by decreasing confidence scores
pred2
=
pred2
[(
-
pred2
[:,
4
]).
argsort
()]
det_max
=
[]
nms_style
=
'MERGE'
# 'OR' (default), 'AND', 'MERGE' (experimental)
for
c
in
pred2
[:,
-
1
].
unique
():
dc
=
pred2
[
pred2
[:,
-
1
]
==
c
]
# select class c
dc
=
dc
[:
min
(
len
(
dc
),
100
)]
# limit to first 100 boxes
# Non-maximum suppression
if
nms_style
==
'OR'
:
# default
# METHOD1
# ind = list(range(len(dc)))
# while len(ind):
# j = ind[0]
# det_max.append(dc[j:j + 1]) # save highest conf detection
# reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
# [ind.pop(i) for i in reversed(reject)]
# METHOD2
while
dc
.
shape
[
0
]:
det_max
.
append
(
dc
[:
1
])
# save highest conf detection
if
len
(
dc
)
==
1
:
# Stop if we're at the last detection
break
iou
=
bbox_iou
(
dc
[
0
],
dc
[
1
:])
# iou with other boxes
dc
=
dc
[
1
:][
iou
<
nms_thres
]
# remove ious > threshold
elif
nms_style
==
'AND'
:
# requires overlap, single boxes erased
while
len
(
dc
)
>
1
:
iou
=
bbox_iou
(
dc
[
0
],
dc
[
1
:])
# iou with other boxes
if
iou
.
max
()
>
0.5
:
det_max
.
append
(
dc
[:
1
])
dc
=
dc
[
1
:][
iou
<
nms_thres
]
# remove ious > threshold
elif
nms_style
==
'MERGE'
:
# weighted mixture box
while
len
(
dc
):
i
=
bbox_iou
(
dc
[
0
],
dc
)
>
nms_thres
# iou with other boxes
weights
=
dc
[
i
,
4
:
5
]
dc
[
0
,
:
4
]
=
(
weights
*
dc
[
i
,
:
4
]).
sum
(
0
)
/
weights
.
sum
()
det_max
.
append
(
dc
[:
1
])
dc
=
dc
[
i
==
0
]
if
len
(
det_max
):
det_max
=
torch
.
cat
(
det_max
)
# concatenate
output
[
image_i
]
=
det_max
[(
-
det_max
[:,
4
]).
argsort
()]
# sort
return
output
def
get_yolo_layers
(
model
):
yolo_layer_index
=
[]
for
index
,
l
in
enumerate
(
model
.
module_list
):
try
:
a
=
l
[
0
].
img_size
and
l
[
0
].
nG
# only yolo layer need img_size and nG
# print("---"*50)
# print(l, index)
yolo_layer_index
.
append
(
index
)
except
:
pass
assert
len
(
yolo_layer_index
)
>
0
,
"can not find yolo layer"
return
yolo_layer_index
lib/wyw2s_lib/cfg/wyw2s.cfg
浏览文件 @
54a7374d
YouWantToSee=BradPitt
detect_model_path=./wyw2s_models/face_yolo_416-20210418.pt
detect_model_arch=yolo
detect_input_size = 416
yolo_anchor_scale=1.
detect_conf_thres=0.4
detect_nms_thres=0.45
face_verify_backbone_path=./wyw2s_models/face_verify-model_ir_se-50.pth
facebank_path=./wyw2s_models/facebank
face_verify_threshold=1.2
face_multitask_model_path=./wyw2s_models/face_multitask-resnet_50_imgsize-256-20210411.pth
face_euler_model_path=./wyw2s_models/euler_angle-resnet_18_imgsize_256.pth
lib/wyw2s_lib/doc/README.md
浏览文件 @
54a7374d
...
...
@@ -6,11 +6,12 @@
*
场景:将视频中目标人物的相关视频进行裁剪。
## 项目配置
### 1、软件
### 1、软件
*
Python 3.7
*
PyTorch >= 1.5.1
*
opencv-python
*
moviepy
*
shutil
## 相关项目
### 1、脸部检测项目(yolo_v3)
...
...
@@ -29,17 +30,54 @@ euler_angle-resnet_18_imgsize_256.pth # 人脸姿态角 pitch yaw roll 模型
face_multitask-resnet_50_imgsize-256-20210411.pth # 性别、年龄、关键点 模型
face_verify-model_ir_se-50.pth # 人脸识别特征抽取模型
facebank/facebank.pth # 人脸匹配资源库特征向量
facebank/names.npy # 人脸匹配资源库 face id,示例中的face id为人名字
```
*
目前示例提供的人脸资源库的具体face id 如下:
```
['AngelinaJolie' 'AnneHathaway' 'BradPitt' 'JenniferAniston'
'JohnnyDepp' 'JudeLaw' 'NicoleKidman' 'ScarlettJohansson' 'TomCruise']
```
## 项目使用方法
### 1、下载项目预训练模型 package 。
### 2、构建人脸匹配资源库,相关脚本 [make_facebank.py](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/make_facebank_tools/make_facebank.py)
### 2、构建人脸匹配资源库
(项目中已经生成了示例匹配库,如果不需要建立自己的人脸资源库此步骤可以跳过)
,相关脚本 [make_facebank.py](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/make_facebank_tools/make_facebank.py)
### 3、打开配置文件 lib/wyw2s_lib/cfg/[wyw2s.cfg](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/cfg/wyw2s.cfg) 进行相关参数配置,具体配置参数如下,请仔细阅读。
```
YouWantToSee=BradPitt
YouWantToSee=BradPitt # 你需要裁剪的 face id ,示例为人名字,需要与facebank/names.npy 和 facebank/facebank.pth 信息匹配
detect_model_path=./wyw2s_models/face_yolo_416-20210418.pt # 人脸检测模型
detect_model_arch=yolo # 模型类型
detect_input_size = 416 # 模型的图片输入尺寸
yolo_anchor_scale=1. # anchor 的缩放系数,默认 1
detect_conf_thres=0.4 # 人脸检测置信度,高于该置信度进行输出
detect_nms_thres=0.45 # 检测的nms阈值
face_verify_backbone_path=./wyw2s_models/face_verify-model_ir_se-50.pth # 人脸识别特征抽取模型地址
facebank_path=./wyw2s_models/facebank # 人脸资源库地址
face_verify_threshold=1.2 # 人脸匹配阈值设定,低于该设定阈值认为匹配成功
face_multitask_model_path=./wyw2s_models/face_multitask-resnet_50_imgsize-256-20210411.pth # 人脸多任务(性别、年龄、关键点)模型地址
face_euler_model_path=./wyw2s_models/euler_angle-resnet_18_imgsize_256.pth # 模型姿态角(航向角、俯仰角、翻滚角)回归模型地址
```
### 4、下载示例视频
*
[
示例视频 下载地址(百度网盘 Password: jaqh )
](
https://pan.baidu.com/s/1CSbfA1nHDhfCyt4_2NSRQg
)
*
或是用同学自己的示例视频
### 5、运行 "Who You Want To See" 项目
*
打开main.py,做如下相关参数设置:
```
APP_P = "wyw2s" # 选择不同项目
cfg_file = "./lib/wyw2s_lib/cfg/wyw2s.cfg" # 选择配置文件
main_wyw2s(video_path = "./video/f1.mp4",cfg_file = cfg_file)# 设置视频路径,加载 who you want 2 see 应用
```
### 4、
根目录下运行命令: python main.py
*
根目录下运行命令: python main.py
## 联系方式 (Contact)
*
E-mails: 305141918@qq.com
main.py
浏览文件 @
54a7374d
...
...
@@ -22,8 +22,8 @@ import sys
sys
.
path
.
append
(
"./components/"
)
# 添加模型组件路径
from
applications.handpose_local_app
import
main_handpose_x
#加载 handpose 应用
from
applications.wyw2s_local_app
import
main_wyw2s
#加载 who
youwant2
see 应用
from
applications.wyw2s_local_app
import
main_wyw2s
#加载 who
you want 2
see 应用
# from applications.video_analysis_app import main_video_analysis #加载 video_analysis 应用
def
demo_logo
():
print
(
"
\n
/*********************************/"
)
print
(
"/---------------------------------/
\n
"
)
...
...
@@ -45,6 +45,8 @@ if __name__ == '__main__':
elif
APP_P
==
"wyw2s"
:
# 基于人脸识别的视频剪辑
cfg_file
=
"./lib/wyw2s_lib/cfg/wyw2s.cfg"
main_wyw2s
(
video_path
=
"./video/f1.mp4"
,
cfg_file
=
cfg_file
)
#加载 who you want 2 see 应用
main_wyw2s
(
cfg_file
,
video_path
=
"./video/f1.mp4"
)
#加载 handpose 应用
# elif APP_P == "video_ana": # 基于人脸识别的视频剪辑
# main_video_analysis(video_path = "./video/f3.mp4")#加载 who you want 2 see 应用
print
(
" well done ~"
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录