Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
会洗碗的CV工程师
e3d_handpose_x
提交
4b6c8429
e3d_handpose_x
项目概览
会洗碗的CV工程师
/
e3d_handpose_x
与 Fork 源项目一致
Fork自
Eric.Lee2021 / e3d_handpose_x
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
e3d_handpose_x
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
4b6c8429
编写于
6月 19, 2021
作者:
Eric.Lee2021
🚴🏻
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add inference components
上级
52688ae0
变更
16
展开全部
隐藏空白更改
内联
并排
Showing
16 changed file
with
2816 addition
and
0 deletion
+2816
-0
components/hand_detect/acc_model.py
components/hand_detect/acc_model.py
+243
-0
components/hand_detect/utils/__init__.py
components/hand_detect/utils/__init__.py
+0
-0
components/hand_detect/utils/torch_utils.py
components/hand_detect/utils/torch_utils.py
+24
-0
components/hand_detect/yolo_v3_hand.py
components/hand_detect/yolo_v3_hand.py
+329
-0
components/hand_detect/yolov3.py
components/hand_detect/yolov3.py
+505
-0
components/hand_keypoints/handpose_x.py
components/hand_keypoints/handpose_x.py
+146
-0
components/hand_keypoints/models/mobilenetv2.py
components/hand_keypoints/models/mobilenetv2.py
+105
-0
components/hand_keypoints/models/my_model.py
components/hand_keypoints/models/my_model.py
+67
-0
components/hand_keypoints/models/resnet.py
components/hand_keypoints/models/resnet.py
+263
-0
components/hand_keypoints/models/resnet_50.py
components/hand_keypoints/models/resnet_50.py
+194
-0
components/hand_keypoints/models/rexnetv1.py
components/hand_keypoints/models/rexnetv1.py
+183
-0
components/hand_keypoints/models/shufflenet.py
components/hand_keypoints/models/shufflenet.py
+254
-0
components/hand_keypoints/models/shufflenetv2.py
components/hand_keypoints/models/shufflenetv2.py
+157
-0
components/hand_keypoints/models/squeezenet.py
components/hand_keypoints/models/squeezenet.py
+153
-0
components/hand_keypoints/utils/common_utils.py
components/hand_keypoints/utils/common_utils.py
+132
-0
components/hand_keypoints/utils/model_utils.py
components/hand_keypoints/utils/model_utils.py
+61
-0
未找到文件。
components/hand_detect/acc_model.py
0 → 100644
浏览文件 @
4b6c8429
import
torch
import
torch.nn
as
nn
import
torchvision
import
time
import
numpy
as
np
import
sys
def
get_model_op
(
model_
,
print_flag
=
False
):
# print('/********************* modules *******************/')
op_dict
=
{}
idx
=
0
for
m
in
model_
.
modules
():
idx
+=
1
if
isinstance
(
m
,
nn
.
Conv2d
):
if
'Conv2d'
not
in
op_dict
.
keys
():
op_dict
[
'Conv2d'
]
=
1
else
:
op_dict
[
'Conv2d'
]
+=
1
if
print_flag
:
print
(
'{}) {}'
.
format
(
idx
,
m
))
pass
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
if
'BatchNorm2d'
not
in
op_dict
.
keys
():
op_dict
[
'BatchNorm2d'
]
=
1
else
:
op_dict
[
'BatchNorm2d'
]
+=
1
if
print_flag
:
print
(
'{}) {}'
.
format
(
idx
,
m
))
pass
elif
isinstance
(
m
,
nn
.
Linear
):
if
'Linear'
not
in
op_dict
.
keys
():
op_dict
[
'Linear'
]
=
1
else
:
op_dict
[
'Linear'
]
+=
1
if
print_flag
:
print
(
'{}) {}'
.
format
(
idx
,
m
))
pass
elif
isinstance
(
m
,
nn
.
Sequential
):
if
print_flag
:
print
(
'*******************{}) {}'
.
format
(
idx
,
m
))
for
n
in
m
:
if
print_flag
:
print
(
'{}) {}'
.
format
(
idx
,
n
))
if
'Conv2d'
not
in
op_dict
.
keys
():
op_dict
[
'Conv2d'
]
=
1
else
:
op_dict
[
'Conv2d'
]
+=
1
if
'BatchNorm2d'
not
in
op_dict
.
keys
():
op_dict
[
'BatchNorm2d'
]
=
1
else
:
op_dict
[
'BatchNorm2d'
]
+=
1
if
'Linear'
not
in
op_dict
.
keys
():
op_dict
[
'Linear'
]
=
1
else
:
op_dict
[
'Linear'
]
+=
1
if
'ReLU6'
not
in
op_dict
.
keys
():
op_dict
[
'ReLU6'
]
=
1
else
:
op_dict
[
'ReLU6'
]
+=
1
pass
elif
isinstance
(
m
,
nn
.
ReLU6
):
if
print_flag
:
print
(
'{}) {}'
.
format
(
idx
,
m
))
if
'ReLU6'
not
in
op_dict
.
keys
():
op_dict
[
'ReLU6'
]
=
1
else
:
op_dict
[
'ReLU6'
]
+=
1
pass
elif
isinstance
(
m
,
nn
.
Module
):
if
print_flag
:
print
(
'{}) {}'
.
format
(
idx
,
m
))
for
n
in
m
.
modules
():
if
isinstance
(
n
,
nn
.
Conv2d
):
if
print_flag
:
print
(
'{}) {}'
.
format
(
idx
,
n
))
if
'Conv2d'
not
in
op_dict
.
keys
():
op_dict
[
'Conv2d'
]
=
1
else
:
op_dict
[
'Conv2d'
]
+=
1
if
'BatchNorm2d'
not
in
op_dict
.
keys
():
op_dict
[
'BatchNorm2d'
]
=
1
else
:
op_dict
[
'BatchNorm2d'
]
+=
1
if
'Linear'
not
in
op_dict
.
keys
():
op_dict
[
'Linear'
]
=
1
else
:
op_dict
[
'Linear'
]
+=
1
if
'ReLU6'
not
in
op_dict
.
keys
():
op_dict
[
'ReLU6'
]
=
1
else
:
op_dict
[
'ReLU6'
]
+=
1
pass
pass
else
:
if
print_flag
:
print
(
'{}) {}'
.
format
(
idx
,
m
))
pass
# print('\n/********************** {} ********************/\n'.format(ops.network))
for
key
in
op_dict
.
keys
():
if
print_flag
:
print
(
' operation - {} : {}'
.
format
(
key
,
op_dict
[
key
]))
class
DummyModule
(
nn
.
Module
):
def
__init__
(
self
):
super
(
DummyModule
,
self
).
__init__
()
def
forward
(
self
,
x
):
return
x
def
fuse
(
conv
,
bn
):
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
with
torch
.
no_grad
():
# init
if
isinstance
(
conv
,
nn
.
Conv2d
):
fusedconv
=
torch
.
nn
.
Conv2d
(
conv
.
in_channels
,
conv
.
out_channels
,
kernel_size
=
conv
.
kernel_size
,
stride
=
conv
.
stride
,
padding
=
conv
.
padding
,
bias
=
True
)
elif
isinstance
(
conv
,
nn
.
ConvTranspose2d
):
# not supprot nn.ConvTranspose2d
fusedconv
=
nn
.
ConvTranspose2d
(
conv
.
in_channels
,
conv
.
out_channels
,
kernel_size
=
conv
.
kernel_size
,
stride
=
conv
.
stride
,
padding
=
conv
.
padding
,
output_padding
=
conv
.
output_padding
,
bias
=
True
)
else
:
print
(
"error"
)
exit
()
# prepare filters
w_conv
=
conv
.
weight
.
clone
().
view
(
conv
.
out_channels
,
-
1
)
w_bn
=
torch
.
diag
(
bn
.
weight
.
div
(
torch
.
sqrt
(
bn
.
eps
+
bn
.
running_var
)))
fusedconv
.
weight
.
copy_
(
torch
.
mm
(
w_bn
,
w_conv
).
view
(
fusedconv
.
weight
.
size
()))
# prepare spatial bias
if
conv
.
bias
is
not
None
:
b_conv
=
conv
.
bias
#b_conv = conv.bias.mul(bn.weight.div(torch.sqrt(bn.running_var + bn.eps))) # maybe, you should this one ?
else
:
b_conv
=
torch
.
zeros
(
conv
.
weight
.
size
(
0
))
b_bn
=
bn
.
bias
-
bn
.
weight
.
mul
(
bn
.
running_mean
).
div
(
torch
.
sqrt
(
bn
.
running_var
+
bn
.
eps
))
fusedconv
.
bias
.
copy_
(
b_conv
+
b_bn
)
return
fusedconv
# idxx = 0
def
fuse_module
(
m
):
# global idxx
children
=
list
(
m
.
named_children
())
c
=
None
cn
=
None
for
name
,
child
in
children
:
# idxx += 1
# print('-------------->>',idxx)
# if idxx%10==0:
# continue
# print("name {}, child {}".format(name, child))
if
isinstance
(
child
,
nn
.
BatchNorm2d
)
and
c
is
not
None
:
bc
=
fuse
(
c
,
child
)
m
.
_modules
[
cn
]
=
bc
# print('DummyModule() : ',DummyModule())
m
.
_modules
[
name
]
=
DummyModule
()
c
=
None
elif
isinstance
(
child
,
nn
.
Conv2d
):
c
=
child
cn
=
name
else
:
fuse_module
(
child
)
def
test_net
(
ops
,
m
):
use_cuda
=
torch
.
cuda
.
is_available
()
use_cpu
=
False
if
ops
.
force_cpu
or
use_cuda
==
False
:
p
=
torch
.
randn
([
1
,
3
,
256
,
256
])
device
=
torch
.
device
(
"cpu"
)
use_cpu
=
True
else
:
p
=
torch
.
randn
([
1
,
3
,
256
,
256
]).
cuda
()
device
=
torch
.
device
(
"cuda:0"
)
count
=
50
time_org
=
[]
m_o
=
m
.
to
(
device
)
get_model_op
(
m_o
)
# print(m)
for
i
in
range
(
count
):
s1
=
time
.
time
()
if
use_cpu
:
o_output
=
m_o
(
p
)
else
:
o_output
=
m_o
(
p
).
cpu
()
s2
=
time
.
time
()
time_org
.
append
(
s2
-
s1
)
print
(
"Original time: "
,
s2
-
s1
)
print
(
'------------------------------------>>>>'
)
fuse_module
(
m
.
to
(
torch
.
device
(
"cpu"
)))
# print(m)
m_f
=
m
.
to
(
device
)
get_model_op
(
m_f
)
time_fuse
=
[]
for
i
in
range
(
count
):
s1
=
time
.
time
()
if
use_cpu
:
f_output
=
m_f
(
p
)
else
:
f_output
=
m_f
(
p
).
cpu
()
s2
=
time
.
time
()
time_fuse
.
append
(
s2
-
s1
)
print
(
"Fused time: "
,
s2
-
s1
)
print
(
"-"
*
50
)
print
(
"org time:"
,
np
.
mean
(
time_org
))
print
(
"fuse time:"
,
np
.
mean
(
time_fuse
))
for
o
in
o_output
:
print
(
"org size:"
,
o
.
size
())
for
o
in
f_output
:
print
(
"fuse size:"
,
o
.
size
())
for
i
in
range
(
len
(
o_output
)):
assert
o_output
[
i
].
size
()
==
f_output
[
i
].
size
()
print
(
"output[{}] max abs diff: {}"
.
format
(
i
,
(
o_output
[
i
]
-
f_output
[
i
]).
abs
().
max
().
item
()))
print
(
"output[{}] MSE diff: {}"
.
format
(
i
,
nn
.
MSELoss
()(
o_output
[
i
],
f_output
[
i
]).
item
()))
def
acc_model
(
ops
,
m
):
# print('\n-------------------------------->>> before acc model')
get_model_op
(
m
)
fuse_module
(
m
)
# print('\n-------------------------------->>> after acc model')
get_model_op
(
m
)
return
m
components/hand_detect/utils/__init__.py
0 → 100644
浏览文件 @
4b6c8429
components/hand_detect/utils/torch_utils.py
0 → 100644
浏览文件 @
4b6c8429
import
torch
def
init_seeds
(
seed
=
0
):
torch
.
manual_seed
(
seed
)
torch
.
cuda
.
manual_seed
(
seed
)
torch
.
cuda
.
manual_seed_all
(
seed
)
def
select_device
(
force_cpu
=
False
):
if
force_cpu
:
cuda
=
False
device
=
torch
.
device
(
'cpu'
)
else
:
cuda
=
torch
.
cuda
.
is_available
()
device
=
torch
.
device
(
'cuda:0'
if
cuda
else
'cpu'
)
if
torch
.
cuda
.
device_count
()
>
1
:
device
=
torch
.
device
(
'cuda'
if
cuda
else
'cpu'
)
# print('Found %g GPUs' % torch.cuda.device_count())
# print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21')
# torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available
# print('Using ', torch.cuda.device_count(), ' GPUs')
# print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
return
device
components/hand_detect/yolo_v3_hand.py
0 → 100644
浏览文件 @
4b6c8429
#-*-coding:utf-8-*-
# date:2021-03-09
# Author: Eric.Lee
# function: yolo v3 hand detect
import
os
import
cv2
import
numpy
as
np
import
time
import
torch
from
hand_detect.yolov3
import
Yolov3
,
Yolov3Tiny
from
hand_detect.utils.torch_utils
import
select_device
from
hand_detect.acc_model
import
acc_model
import
torch.backends.cudnn
as
cudnn
import
torch.nn.functional
as
F
import
random
def
show_model_param
(
model
):
params
=
list
(
model
.
parameters
())
k
=
0
for
i
in
params
:
l
=
1
for
j
in
i
.
size
():
l
*=
j
print
(
"该层的结构: {}, 参数和: {}"
.
format
(
str
(
list
(
i
.
size
())),
str
(
l
)))
k
=
k
+
l
print
(
"----------------------"
)
print
(
"总参数数量和: "
+
str
(
k
))
def
process_data
(
img
,
img_size
=
416
):
# 图像预处理
img
,
_
,
_
,
_
=
letterbox
(
img
,
height
=
img_size
)
# Normalize RG25
img
=
img
[:,
:,
::
-
1
].
transpose
(
2
,
0
,
1
)
# BGR to RGB
img
=
np
.
ascontiguousarray
(
img
,
dtype
=
np
.
float32
)
# uint8 to float32
img
/=
255.0
# 0 - 255 to 0.0 - 1.0
return
img
def
plot_one_box
(
x
,
img
,
color
=
None
,
label
=
None
,
line_thickness
=
None
):
# Plots one bounding box on image img
tl
=
line_thickness
or
round
(
0.002
*
max
(
img
.
shape
[
0
:
2
]))
+
1
# line thickness
color
=
color
or
[
random
.
randint
(
0
,
255
)
for
_
in
range
(
3
)]
c1
,
c2
=
(
int
(
x
[
0
]),
int
(
x
[
1
])),
(
int
(
x
[
2
]),
int
(
x
[
3
]))
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
thickness
=
tl
)
if
label
:
tf
=
max
(
tl
-
1
,
1
)
# font thickness
t_size
=
cv2
.
getTextSize
(
label
,
0
,
fontScale
=
tl
/
3
,
thickness
=
tf
)[
0
]
c2
=
c1
[
0
]
+
t_size
[
0
],
c1
[
1
]
-
t_size
[
1
]
-
3
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
-
1
)
# filled
cv2
.
putText
(
img
,
label
,
(
c1
[
0
],
c1
[
1
]
-
2
),
0
,
tl
/
3
,
[
255
,
55
,
90
],
thickness
=
tf
,
lineType
=
cv2
.
LINE_AA
)
def
bbox_iou
(
box1
,
box2
,
x1y1x2y2
=
True
):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2
=
box2
.
t
()
# Get the coordinates of bounding boxes
if
x1y1x2y2
:
# x1, y1, x2, y2 = box1
b1_x1
,
b1_y1
,
b1_x2
,
b1_y2
=
box1
[
0
],
box1
[
1
],
box1
[
2
],
box1
[
3
]
b2_x1
,
b2_y1
,
b2_x2
,
b2_y2
=
box2
[
0
],
box2
[
1
],
box2
[
2
],
box2
[
3
]
else
:
# x, y, w, h = box1
b1_x1
,
b1_x2
=
box1
[
0
]
-
box1
[
2
]
/
2
,
box1
[
0
]
+
box1
[
2
]
/
2
b1_y1
,
b1_y2
=
box1
[
1
]
-
box1
[
3
]
/
2
,
box1
[
1
]
+
box1
[
3
]
/
2
b2_x1
,
b2_x2
=
box2
[
0
]
-
box2
[
2
]
/
2
,
box2
[
0
]
+
box2
[
2
]
/
2
b2_y1
,
b2_y2
=
box2
[
1
]
-
box2
[
3
]
/
2
,
box2
[
1
]
+
box2
[
3
]
/
2
# Intersection area
inter_area
=
(
torch
.
min
(
b1_x2
,
b2_x2
)
-
torch
.
max
(
b1_x1
,
b2_x1
)).
clamp
(
0
)
*
\
(
torch
.
min
(
b1_y2
,
b2_y2
)
-
torch
.
max
(
b1_y1
,
b2_y1
)).
clamp
(
0
)
# Union Area
union_area
=
((
b1_x2
-
b1_x1
)
*
(
b1_y2
-
b1_y1
)
+
1e-16
)
+
\
(
b2_x2
-
b2_x1
)
*
(
b2_y2
-
b2_y1
)
-
inter_area
return
inter_area
/
union_area
# iou
def
xywh2xyxy
(
x
):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y
=
torch
.
zeros_like
(
x
)
if
isinstance
(
x
,
torch
.
Tensor
)
else
np
.
zeros_like
(
x
)
y
[:,
0
]
=
x
[:,
0
]
-
x
[:,
2
]
/
2
y
[:,
1
]
=
x
[:,
1
]
-
x
[:,
3
]
/
2
y
[:,
2
]
=
x
[:,
0
]
+
x
[:,
2
]
/
2
y
[:,
3
]
=
x
[:,
1
]
+
x
[:,
3
]
/
2
return
y
def
scale_coords
(
img_size
,
coords
,
img0_shape
):
# image size 转为 原图尺寸
# Rescale x1, y1, x2, y2 from 416 to image size
# print('coords : ',coords)
# print('img0_shape : ',img0_shape)
gain
=
float
(
img_size
)
/
max
(
img0_shape
)
# gain = old / new
# print('gain : ',gain)
pad_x
=
(
img_size
-
img0_shape
[
1
]
*
gain
)
/
2
# width padding
pad_y
=
(
img_size
-
img0_shape
[
0
]
*
gain
)
/
2
# height padding
# print('pad_xpad_y : ',pad_x,pad_y)
coords
[:,
[
0
,
2
]]
-=
pad_x
coords
[:,
[
1
,
3
]]
-=
pad_y
coords
[:,
:
4
]
/=
gain
coords
[:,
:
4
]
=
torch
.
clamp
(
coords
[:,
:
4
],
min
=
0
)
# 夹紧区间最小值不为负数
return
coords
def
non_max_suppression
(
prediction
,
conf_thres
=
0.5
,
nms_thres
=
0.4
):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_conf, class)
"""
min_wh
=
2
# (pixels) minimum box width and height
output
=
[
None
]
*
len
(
prediction
)
for
image_i
,
pred
in
enumerate
(
prediction
):
# Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area
# ar = w / (h + 1e-16) # aspect ratio
# n = len(w)
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Filter out confidence scores below threshold
class_conf
,
class_pred
=
pred
[:,
5
:].
max
(
1
)
# max class_conf, index
pred
[:,
4
]
*=
class_conf
# finall conf = obj_conf * class_conf
i
=
(
pred
[:,
4
]
>
conf_thres
)
&
(
pred
[:,
2
]
>
min_wh
)
&
(
pred
[:,
3
]
>
min_wh
)
# s2=time.time()
pred2
=
pred
[
i
]
# print("++++++pred2 = pred[i]",time.time()-s2, pred2)
# If none are remaining => process next image
if
len
(
pred2
)
==
0
:
continue
# Select predicted classes
class_conf
=
class_conf
[
i
]
class_pred
=
class_pred
[
i
].
unsqueeze
(
1
).
float
()
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
pred2
[:,
:
4
]
=
xywh2xyxy
(
pred2
[:,
:
4
])
# pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551
# Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
pred2
=
torch
.
cat
((
pred2
[:,
:
5
],
class_conf
.
unsqueeze
(
1
),
class_pred
),
1
)
# Get detections sorted by decreasing confidence scores
pred2
=
pred2
[(
-
pred2
[:,
4
]).
argsort
()]
det_max
=
[]
nms_style
=
'MERGE'
# 'OR' (default), 'AND', 'MERGE' (experimental)
for
c
in
pred2
[:,
-
1
].
unique
():
dc
=
pred2
[
pred2
[:,
-
1
]
==
c
]
# select class c
dc
=
dc
[:
min
(
len
(
dc
),
100
)]
# limit to first 100 boxes
# Non-maximum suppression
if
nms_style
==
'OR'
:
# default
# METHOD1
# ind = list(range(len(dc)))
# while len(ind):
# j = ind[0]
# det_max.append(dc[j:j + 1]) # save highest conf detection
# reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
# [ind.pop(i) for i in reversed(reject)]
# METHOD2
while
dc
.
shape
[
0
]:
det_max
.
append
(
dc
[:
1
])
# save highest conf detection
if
len
(
dc
)
==
1
:
# Stop if we're at the last detection
break
iou
=
bbox_iou
(
dc
[
0
],
dc
[
1
:])
# iou with other boxes
dc
=
dc
[
1
:][
iou
<
nms_thres
]
# remove ious > threshold
elif
nms_style
==
'AND'
:
# requires overlap, single boxes erased
while
len
(
dc
)
>
1
:
iou
=
bbox_iou
(
dc
[
0
],
dc
[
1
:])
# iou with other boxes
if
iou
.
max
()
>
0.5
:
det_max
.
append
(
dc
[:
1
])
dc
=
dc
[
1
:][
iou
<
nms_thres
]
# remove ious > threshold
elif
nms_style
==
'MERGE'
:
# weighted mixture box
while
len
(
dc
):
i
=
bbox_iou
(
dc
[
0
],
dc
)
>
nms_thres
# iou with other boxes
weights
=
dc
[
i
,
4
:
5
]
dc
[
0
,
:
4
]
=
(
weights
*
dc
[
i
,
:
4
]).
sum
(
0
)
/
weights
.
sum
()
det_max
.
append
(
dc
[:
1
])
dc
=
dc
[
i
==
0
]
if
len
(
det_max
):
det_max
=
torch
.
cat
(
det_max
)
# concatenate
output
[
image_i
]
=
det_max
[(
-
det_max
[:,
4
]).
argsort
()]
# sort
return
output
def
letterbox
(
img
,
height
=
416
,
augment
=
False
,
color
=
(
127.5
,
127.5
,
127.5
)):
# Resize a rectangular image to a padded square
shape
=
img
.
shape
[:
2
]
# shape = [height, width]
ratio
=
float
(
height
)
/
max
(
shape
)
# ratio = old / new
new_shape
=
(
round
(
shape
[
1
]
*
ratio
),
round
(
shape
[
0
]
*
ratio
))
dw
=
(
height
-
new_shape
[
0
])
/
2
# width padding
dh
=
(
height
-
new_shape
[
1
])
/
2
# height padding
top
,
bottom
=
round
(
dh
-
0.1
),
round
(
dh
+
0.1
)
left
,
right
=
round
(
dw
-
0.1
),
round
(
dw
+
0.1
)
# resize img
if
augment
:
interpolation
=
np
.
random
.
choice
([
None
,
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
None
,
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
])
if
interpolation
is
None
:
img
=
cv2
.
resize
(
img
,
new_shape
)
else
:
img
=
cv2
.
resize
(
img
,
new_shape
,
interpolation
=
interpolation
)
else
:
img
=
cv2
.
resize
(
img
,
new_shape
,
interpolation
=
cv2
.
INTER_NEAREST
)
# print("resize time:",time.time()-s1)
img
=
cv2
.
copyMakeBorder
(
img
,
top
,
bottom
,
left
,
right
,
cv2
.
BORDER_CONSTANT
,
value
=
color
)
# padded square
return
img
,
ratio
,
dw
,
dh
#---------------------------------------------------------
# model_path = './coco_model/yolov3_coco.pt' # 检测模型路径
# root_path = './test_images/'# 测试文件夹
# model_arch = 'yolov3' # 模型类型
# voc_config = 'cfg/voc.data' # 模型相关配置文件
# img_size = 416 # 图像尺寸
# conf_thres = 0.35# 检测置信度
# nms_thres = 0.5 # nms 阈值
class
yolo_v3_hand_model
(
object
):
def
__init__
(
self
,
model_path
=
'./components/hand_detect/weights/hand_416-20210606.pt'
,
model_arch
=
'yolov3'
,
yolo_anchor_scale
=
1.
,
img_size
=
416
,
conf_thres
=
0.55
,
nms_thres
=
0.4
,
model_half
=
False
,
):
print
(
"yolo v3 hand_model loading : {}"
.
format
(
model_path
))
self
.
use_cuda
=
torch
.
cuda
.
is_available
()
self
.
device
=
torch
.
device
(
"cuda:0"
if
self
.
use_cuda
else
"cpu"
)
self
.
img_size
=
img_size
self
.
classes
=
[
"Hand"
]
self
.
num_classes
=
len
(
self
.
classes
)
self
.
conf_thres
=
conf_thres
self
.
nms_thres
=
nms_thres
self
.
model_half
=
model_half
#-----------------------------------------------------------------------
weights
=
model_path
if
"tiny"
in
model_arch
:
a_scalse
=
416.
/
img_size
*
yolo_anchor_scale
anchors
=
[(
10
,
14
),
(
23
,
27
),
(
37
,
58
),
(
81
,
82
),
(
135
,
169
),
(
344
,
319
)]
anchors_new
=
[
(
int
(
anchors
[
j
][
0
]
/
a_scalse
),
int
(
anchors
[
j
][
1
]
/
a_scalse
))
for
j
in
range
(
len
(
anchors
))
]
model
=
Yolov3Tiny
(
self
.
num_classes
,
anchors
=
anchors_new
)
else
:
a_scalse
=
416.
/
img_size
anchors
=
[(
10
,
13
),
(
16
,
30
),
(
33
,
23
),
(
30
,
61
),
(
62
,
45
),
(
59
,
119
),
(
116
,
90
),
(
156
,
198
),
(
373
,
326
)]
anchors_new
=
[
(
int
(
anchors
[
j
][
0
]
/
a_scalse
),
int
(
anchors
[
j
][
1
]
/
a_scalse
))
for
j
in
range
(
len
(
anchors
))
]
model
=
Yolov3
(
self
.
num_classes
,
anchors
=
anchors_new
)
#-----------------------------------------------------------------------
self
.
model
=
model
# show_model_param(self.model)# 显示模型参数
# print('num_classes : ',self.num_classes)
self
.
device
=
select_device
()
# 运行硬件选择
self
.
use_cuda
=
torch
.
cuda
.
is_available
()
# Load weights
if
os
.
access
(
weights
,
os
.
F_OK
):
# 判断模型文件是否存在
self
.
model
.
load_state_dict
(
torch
.
load
(
weights
,
map_location
=
lambda
storage
,
loc
:
storage
)[
'model'
])
else
:
print
(
'------- >>> error : model not exists'
)
return
False
#
self
.
model
.
eval
()
#模型设置为 eval
acc_model
(
''
,
self
.
model
)
self
.
model
=
self
.
model
.
to
(
self
.
device
)
if
model_half
:
self
.
model
=
self
.
model
.
to
(
self
.
device
).
half
()
if
self
.
use_cuda
:
self
.
model
=
self
.
model
.
cuda
()
def
predict
(
self
,
img_
,
vis
):
with
torch
.
no_grad
():
t
=
time
.
time
()
img
=
process_data
(
img_
,
self
.
img_size
)
t1
=
time
.
time
()
img
=
torch
.
from_numpy
(
img
).
unsqueeze
(
0
).
to
(
self
.
device
)
if
self
.
model_half
:
img
=
img
.
type
(
torch
.
HalfTensor
)
if
self
.
use_cuda
:
img
=
img
.
cuda
()
pred
,
_
=
self
.
model
(
img
)
#图片检测
t2
=
time
.
time
()
# detections = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0] # nms
detections
=
non_max_suppression
(
pred
.
float
(),
self
.
conf_thres
,
self
.
nms_thres
)[
0
]
# nms
t3
=
time
.
time
()
# print("t3 time:", t3)
if
(
detections
is
None
)
or
len
(
detections
)
==
0
:
return
[]
# Rescale boxes from 416 to true image size
detections
[:,
:
4
]
=
scale_coords
(
self
.
img_size
,
detections
[:,
:
4
],
img_
.
shape
).
round
()
# 绘制检测结果 :detect reslut
dets_for_landmarks
=
[]
colors
=
[(
v
//
32
*
64
+
64
,
(
v
//
8
)
%
4
*
64
,
v
%
8
*
32
)
for
v
in
range
(
1
,
10
+
1
)][::
-
1
]
output_dict_
=
[]
for
*
xyxy
,
conf
,
cls_conf
,
cls
in
detections
:
label
=
'%s %.2f'
%
(
self
.
classes
[
0
],
conf
)
x1
,
y1
,
x2
,
y2
=
xyxy
output_dict_
.
append
((
float
(
x1
),
float
(
y1
),
float
(
x2
),
float
(
y2
),
float
(
conf
.
item
())))
if
vis
:
plot_one_box
(
xyxy
,
img_
,
label
=
label
,
color
=
(
0
,
175
,
255
),
line_thickness
=
2
)
if
vis
:
cv2
.
namedWindow
(
"yoloV3"
,
0
)
cv2
.
imshow
(
"yoloV3"
,
img_
)
return
output_dict_
components/hand_detect/yolov3.py
0 → 100644
浏览文件 @
4b6c8429
此差异已折叠。
点击以展开。
components/hand_keypoints/handpose_x.py
0 → 100644
浏览文件 @
4b6c8429
#-*-coding:utf-8-*-
# date:2021-03-09
# Author: Eric.Lee
# function: handpose_x 21 keypoints 2D
import
os
import
torch
import
cv2
import
numpy
as
np
import
json
import
torch
import
torch.nn
as
nn
import
time
import
math
from
datetime
import
datetime
from
hand_keypoints.models.resnet
import
resnet18
,
resnet34
,
resnet50
,
resnet101
from
hand_keypoints.models.squeezenet
import
squeezenet1_1
,
squeezenet1_0
from
hand_keypoints.models.resnet
import
resnet18
,
resnet34
,
resnet50
,
resnet101
from
hand_keypoints.models.squeezenet
import
squeezenet1_1
,
squeezenet1_0
from
hand_keypoints.models.shufflenetv2
import
ShuffleNetV2
from
hand_keypoints.models.shufflenet
import
ShuffleNet
from
hand_keypoints.models.mobilenetv2
import
MobileNetV2
from
torchvision.models
import
shufflenet_v2_x1_5
,
shufflenet_v2_x1_0
,
shufflenet_v2_x2_0
from
hand_keypoints.models.rexnetv1
import
ReXNetV1
from
hand_keypoints.utils.common_utils
import
*
def
draw_bd_handpose_c
(
img_
,
hand_
,
x
,
y
,
thick
=
3
):
# thick = 2
colors
=
[(
0
,
215
,
255
),(
255
,
115
,
55
),(
5
,
255
,
55
),(
25
,
15
,
255
),(
225
,
15
,
55
)]
#
cv2
.
line
(
img_
,
(
int
(
hand_
[
'0'
][
'x'
]
+
x
),
int
(
hand_
[
'0'
][
'y'
]
+
y
)),(
int
(
hand_
[
'1'
][
'x'
]
+
x
),
int
(
hand_
[
'1'
][
'y'
]
+
y
)),
colors
[
0
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'1'
][
'x'
]
+
x
),
int
(
hand_
[
'1'
][
'y'
]
+
y
)),(
int
(
hand_
[
'2'
][
'x'
]
+
x
),
int
(
hand_
[
'2'
][
'y'
]
+
y
)),
colors
[
0
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'2'
][
'x'
]
+
x
),
int
(
hand_
[
'2'
][
'y'
]
+
y
)),(
int
(
hand_
[
'3'
][
'x'
]
+
x
),
int
(
hand_
[
'3'
][
'y'
]
+
y
)),
colors
[
0
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'3'
][
'x'
]
+
x
),
int
(
hand_
[
'3'
][
'y'
]
+
y
)),(
int
(
hand_
[
'4'
][
'x'
]
+
x
),
int
(
hand_
[
'4'
][
'y'
]
+
y
)),
colors
[
0
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'0'
][
'x'
]
+
x
),
int
(
hand_
[
'0'
][
'y'
]
+
y
)),(
int
(
hand_
[
'5'
][
'x'
]
+
x
),
int
(
hand_
[
'5'
][
'y'
]
+
y
)),
colors
[
1
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'5'
][
'x'
]
+
x
),
int
(
hand_
[
'5'
][
'y'
]
+
y
)),(
int
(
hand_
[
'6'
][
'x'
]
+
x
),
int
(
hand_
[
'6'
][
'y'
]
+
y
)),
colors
[
1
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'6'
][
'x'
]
+
x
),
int
(
hand_
[
'6'
][
'y'
]
+
y
)),(
int
(
hand_
[
'7'
][
'x'
]
+
x
),
int
(
hand_
[
'7'
][
'y'
]
+
y
)),
colors
[
1
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'7'
][
'x'
]
+
x
),
int
(
hand_
[
'7'
][
'y'
]
+
y
)),(
int
(
hand_
[
'8'
][
'x'
]
+
x
),
int
(
hand_
[
'8'
][
'y'
]
+
y
)),
colors
[
1
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'0'
][
'x'
]
+
x
),
int
(
hand_
[
'0'
][
'y'
]
+
y
)),(
int
(
hand_
[
'9'
][
'x'
]
+
x
),
int
(
hand_
[
'9'
][
'y'
]
+
y
)),
colors
[
2
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'9'
][
'x'
]
+
x
),
int
(
hand_
[
'9'
][
'y'
]
+
y
)),(
int
(
hand_
[
'10'
][
'x'
]
+
x
),
int
(
hand_
[
'10'
][
'y'
]
+
y
)),
colors
[
2
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'10'
][
'x'
]
+
x
),
int
(
hand_
[
'10'
][
'y'
]
+
y
)),(
int
(
hand_
[
'11'
][
'x'
]
+
x
),
int
(
hand_
[
'11'
][
'y'
]
+
y
)),
colors
[
2
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'11'
][
'x'
]
+
x
),
int
(
hand_
[
'11'
][
'y'
]
+
y
)),(
int
(
hand_
[
'12'
][
'x'
]
+
x
),
int
(
hand_
[
'12'
][
'y'
]
+
y
)),
colors
[
2
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'0'
][
'x'
]
+
x
),
int
(
hand_
[
'0'
][
'y'
]
+
y
)),(
int
(
hand_
[
'13'
][
'x'
]
+
x
),
int
(
hand_
[
'13'
][
'y'
]
+
y
)),
colors
[
3
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'13'
][
'x'
]
+
x
),
int
(
hand_
[
'13'
][
'y'
]
+
y
)),(
int
(
hand_
[
'14'
][
'x'
]
+
x
),
int
(
hand_
[
'14'
][
'y'
]
+
y
)),
colors
[
3
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'14'
][
'x'
]
+
x
),
int
(
hand_
[
'14'
][
'y'
]
+
y
)),(
int
(
hand_
[
'15'
][
'x'
]
+
x
),
int
(
hand_
[
'15'
][
'y'
]
+
y
)),
colors
[
3
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'15'
][
'x'
]
+
x
),
int
(
hand_
[
'15'
][
'y'
]
+
y
)),(
int
(
hand_
[
'16'
][
'x'
]
+
x
),
int
(
hand_
[
'16'
][
'y'
]
+
y
)),
colors
[
3
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'0'
][
'x'
]
+
x
),
int
(
hand_
[
'0'
][
'y'
]
+
y
)),(
int
(
hand_
[
'17'
][
'x'
]
+
x
),
int
(
hand_
[
'17'
][
'y'
]
+
y
)),
colors
[
4
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'17'
][
'x'
]
+
x
),
int
(
hand_
[
'17'
][
'y'
]
+
y
)),(
int
(
hand_
[
'18'
][
'x'
]
+
x
),
int
(
hand_
[
'18'
][
'y'
]
+
y
)),
colors
[
4
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'18'
][
'x'
]
+
x
),
int
(
hand_
[
'18'
][
'y'
]
+
y
)),(
int
(
hand_
[
'19'
][
'x'
]
+
x
),
int
(
hand_
[
'19'
][
'y'
]
+
y
)),
colors
[
4
],
thick
)
cv2
.
line
(
img_
,
(
int
(
hand_
[
'19'
][
'x'
]
+
x
),
int
(
hand_
[
'19'
][
'y'
]
+
y
)),(
int
(
hand_
[
'20'
][
'x'
]
+
x
),
int
(
hand_
[
'20'
][
'y'
]
+
y
)),
colors
[
4
],
thick
)
#
class
handpose_x_model
(
object
):
def
__init__
(
self
,
# model_path = './components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth',
model_path
=
'./components/hand_keypoints/weights/resnet_50-size-256-wingloss102-0.119.pth'
,
img_size
=
256
,
num_classes
=
42
,
# 手部关键点个数 * 2 : 21*2
# model_arch = "rexnetv1",
model_arch
=
"resnet_50"
,
model_half
=
False
,
):
# print("handpose_x loading : ",model_path)
self
.
use_cuda
=
torch
.
cuda
.
is_available
()
self
.
device
=
torch
.
device
(
"cuda:0"
if
self
.
use_cuda
else
"cpu"
)
# 可选的设备类型及序号
self
.
img_size
=
img_size
self
.
model_half
=
model_half
#-----------------------------------------------------------------------
if
model_arch
==
'resnet_50'
:
model_
=
resnet50
(
num_classes
=
num_classes
,
img_size
=
self
.
img_size
)
elif
model_arch
==
'resnet_18'
:
model_
=
resnet18
(
num_classes
=
num_classes
,
img_size
=
self
.
img_size
)
elif
model_arch
==
'resnet_34'
:
model_
=
resnet34
(
num_classes
=
num_classes
,
img_size
=
self
.
img_size
)
elif
model_arch
==
'resnet_101'
:
model_
=
resnet101
(
num_classes
=
num_classes
,
img_size
=
self
.
img_size
)
elif
model_arch
==
"squeezenet1_0"
:
model_
=
squeezenet1_0
(
pretrained
=
True
,
num_classes
=
num_classes
)
elif
model_arch
==
"squeezenet1_1"
:
model_
=
squeezenet1_1
(
pretrained
=
True
,
num_classes
=
num_classes
)
elif
model_arch
==
"shufflenetv2"
:
model_
=
ShuffleNetV2
(
ratio
=
1.
,
num_classes
=
num_classes
)
elif
model_arch
==
"shufflenet_v2_x1_5"
:
model_
=
shufflenet_v2_x1_5
(
pretrained
=
False
,
num_classes
=
num_classes
)
elif
model_arch
==
"shufflenet_v2_x1_0"
:
model_
=
shufflenet_v2_x1_0
(
pretrained
=
False
,
num_classes
=
num_classes
)
elif
model_arch
==
"shufflenet_v2_x2_0"
:
model_
=
shufflenet_v2_x2_0
(
pretrained
=
False
,
num_classes
=
num_classes
)
elif
model_arch
==
"shufflenet"
:
model_
=
ShuffleNet
(
num_blocks
=
[
2
,
4
,
2
],
num_classes
=
num_classes
,
groups
=
3
)
elif
model_arch
==
"mobilenetv2"
:
model_
=
MobileNetV2
(
num_classes
=
num_classes
)
elif
model_arch
==
"rexnetv1"
:
model_
=
ReXNetV1
(
num_classes
=
num_classes
,
width_mult
=
1.
,
depth_mult
=
1.
)
else
:
print
(
" no support the model"
)
#-----------------------------------------------------------------------
model_
=
model_
.
to
(
self
.
device
)
model_
.
eval
()
# 设置为前向推断模式
# 加载测试模型
if
os
.
access
(
model_path
,
os
.
F_OK
):
# checkpoint
chkpt
=
torch
.
load
(
model_path
,
map_location
=
self
.
device
)
model_
.
load_state_dict
(
chkpt
)
print
(
'handpose_x model loading : {}'
.
format
(
model_path
))
self
.
model_handpose
=
model_
if
model_half
:
self
.
model_handpose
=
self
.
model_handpose
.
half
()
def
predict
(
self
,
img
,
vis
=
False
):
with
torch
.
no_grad
():
if
not
((
img
.
shape
[
0
]
==
self
.
img_size
)
and
(
img
.
shape
[
1
]
==
self
.
img_size
)):
img
=
cv2
.
resize
(
img
,
(
self
.
img_size
,
self
.
img_size
),
interpolation
=
cv2
.
INTER_CUBIC
)
img_
=
img
.
astype
(
np
.
float32
)
img_
=
(
img_
-
128.
)
/
256.
img_
=
img_
.
transpose
(
2
,
0
,
1
)
img_
=
torch
.
from_numpy
(
img_
)
img_
=
img_
.
unsqueeze_
(
0
).
float
()
if
self
.
model_half
:
img_
=
img_
.
type
(
torch
.
HalfTensor
)
if
self
.
use_cuda
:
img_
=
img_
.
cuda
()
# (bs, 3, h, w)
pre_
=
self
.
model_handpose
(
img_
)
output
=
pre_
.
cpu
().
detach
().
numpy
()
output
=
np
.
squeeze
(
output
)
return
output
components/hand_keypoints/models/mobilenetv2.py
0 → 100644
浏览文件 @
4b6c8429
"""mobilenetv2 in pytorch
[1] Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen
MobileNetV2: Inverted Residuals and Linear Bottlenecks
https://arxiv.org/abs/1801.04381
"""
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
class
LinearBottleNeck
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
t
=
6
,
class_num
=
100
):
super
().
__init__
()
self
.
residual
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
in_channels
*
t
,
1
),
nn
.
BatchNorm2d
(
in_channels
*
t
),
nn
.
ReLU6
(
inplace
=
True
),
nn
.
Conv2d
(
in_channels
*
t
,
in_channels
*
t
,
3
,
stride
=
stride
,
padding
=
1
,
groups
=
in_channels
*
t
),
nn
.
BatchNorm2d
(
in_channels
*
t
),
nn
.
ReLU6
(
inplace
=
True
),
nn
.
Conv2d
(
in_channels
*
t
,
out_channels
,
1
),
nn
.
BatchNorm2d
(
out_channels
)
)
self
.
stride
=
stride
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
def
forward
(
self
,
x
):
residual
=
self
.
residual
(
x
)
if
self
.
stride
==
1
and
self
.
in_channels
==
self
.
out_channels
:
residual
+=
x
return
residual
class
MobileNetV2
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
=
100
,
dropout_factor
=
1.0
):
super
().
__init__
()
self
.
pre
=
nn
.
Sequential
(
nn
.
Conv2d
(
3
,
32
,
1
,
padding
=
1
),
nn
.
BatchNorm2d
(
32
),
nn
.
ReLU6
(
inplace
=
True
)
)
self
.
stage1
=
LinearBottleNeck
(
32
,
16
,
1
,
1
)
self
.
stage2
=
self
.
_make_stage
(
2
,
16
,
24
,
2
,
6
)
self
.
stage3
=
self
.
_make_stage
(
3
,
24
,
32
,
2
,
6
)
self
.
stage4
=
self
.
_make_stage
(
4
,
32
,
64
,
2
,
6
)
self
.
stage5
=
self
.
_make_stage
(
3
,
64
,
96
,
1
,
6
)
self
.
stage6
=
self
.
_make_stage
(
3
,
96
,
160
,
1
,
6
)
self
.
stage7
=
LinearBottleNeck
(
160
,
320
,
1
,
6
)
self
.
conv1
=
nn
.
Sequential
(
nn
.
Conv2d
(
320
,
1280
,
1
),
nn
.
BatchNorm2d
(
1280
),
nn
.
ReLU6
(
inplace
=
True
)
)
self
.
conv2
=
nn
.
Conv2d
(
1280
,
num_classes
,
1
)
self
.
dropout
=
nn
.
Dropout
(
dropout_factor
)
def
forward
(
self
,
x
):
x
=
self
.
pre
(
x
)
x
=
self
.
stage1
(
x
)
x
=
self
.
stage2
(
x
)
x
=
self
.
stage3
(
x
)
x
=
self
.
stage4
(
x
)
x
=
self
.
stage5
(
x
)
x
=
self
.
stage6
(
x
)
x
=
self
.
stage7
(
x
)
x
=
self
.
conv1
(
x
)
x
=
F
.
adaptive_avg_pool2d
(
x
,
1
)
x
=
self
.
dropout
(
x
)
x
=
self
.
conv2
(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
return
x
def
_make_stage
(
self
,
repeat
,
in_channels
,
out_channels
,
stride
,
t
):
layers
=
[]
layers
.
append
(
LinearBottleNeck
(
in_channels
,
out_channels
,
stride
,
t
))
while
repeat
-
1
:
layers
.
append
(
LinearBottleNeck
(
out_channels
,
out_channels
,
1
,
t
))
repeat
-=
1
return
nn
.
Sequential
(
*
layers
)
def
mobilenetv2
():
return
MobileNetV2
()
components/hand_keypoints/models/my_model.py
0 → 100644
浏览文件 @
4b6c8429
#-*-coding:utf-8-*-
# date:2020-08-08
# Author: X.L.Eric
# function: my model
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
class
MY_Net
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
):
# op 初始化
super
(
MY_Net
,
self
).
__init__
()
self
.
cov
=
nn
.
Conv2d
(
3
,
32
,
3
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
layers1
=
[]
# Conv2d : in_channels, out_channels, kernel_size, stride, padding
layers1
.
append
(
nn
.
Conv2d
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
padding
=
0
))
layers1
.
append
(
nn
.
BatchNorm2d
(
64
,
affine
=
True
))
layers1
.
append
(
nn
.
ReLU
(
inplace
=
True
))
layers1
.
append
(
nn
.
AvgPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
))
self
.
layers1
=
nn
.
Sequential
(
*
layers1
)
layers2
=
[]
layers2
.
append
(
nn
.
Conv2d
(
64
,
128
,
3
))
layers2
.
append
(
nn
.
BatchNorm2d
(
128
,
affine
=
True
))
layers2
.
append
(
nn
.
ReLU
(
inplace
=
True
))
layers2
.
append
(
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
))
self
.
layers2
=
nn
.
Sequential
(
*
layers2
)
layers3
=
[]
layers3
.
append
(
nn
.
Conv2d
(
128
,
256
,
3
,
stride
=
2
))
layers3
.
append
(
nn
.
BatchNorm2d
(
256
,
affine
=
True
))
layers3
.
append
(
nn
.
ReLU
(
inplace
=
True
))
layers3
.
append
(
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
))
self
.
layers3
=
nn
.
Sequential
(
*
layers3
)
layers4
=
[]
layers4
.
append
(
nn
.
Conv2d
(
256
,
512
,
3
,
stride
=
2
))
layers4
.
append
(
nn
.
BatchNorm2d
(
512
,
affine
=
True
))
layers4
.
append
(
nn
.
ReLU
(
inplace
=
True
))
layers4
.
append
(
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
))
layers4
.
append
(
nn
.
Conv2d
(
512
,
512
,
1
,
stride
=
1
))
self
.
layers4
=
nn
.
Sequential
(
*
layers4
)
self
.
avgpool
=
nn
.
AdaptiveAvgPool2d
((
1
,
1
))
# 自适应均值池化
self
.
fc
=
nn
.
Linear
(
in_features
=
512
,
out_features
=
num_classes
)
# 全连接 fc
def
forward
(
self
,
x
):
# 模型前向推断
x
=
self
.
cov
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
layers1
(
x
)
x
=
self
.
layers2
(
x
)
x
=
self
.
layers3
(
x
)
x
=
self
.
layers4
(
x
)
x
=
self
.
avgpool
(
x
)
x
=
x
.
reshape
(
x
.
size
(
0
),
-
1
)
x
=
self
.
fc
(
x
)
return
x
if
__name__
==
"__main__"
:
#输入批次图片(batchsize,channel,height,width):8 ,3*256*256
dummy_input
=
torch
.
randn
([
8
,
3
,
256
,
256
])
model
=
MY_Net
(
num_classes
=
100
)
# 分类数为 100 类
print
(
'model:
\n
'
,
model
)
# 打印模型op
output
=
model
(
dummy_input
)
# 模型前向推断
# 模型前向推断输出特征尺寸
print
(
'model inference feature size: '
,
output
.
size
())
print
(
output
)
output_
=
F
.
softmax
(
output
,
dim
=
1
)
#
print
(
output_
)
components/hand_keypoints/models/resnet.py
0 → 100644
浏览文件 @
4b6c8429
import
torch
import
torch.nn
as
nn
import
math
import
torch.utils.model_zoo
as
model_zoo
__all__
=
[
'ResNet'
,
'resnet18'
,
'resnet34'
,
'resnet50'
,
'resnet101'
,
'resnet152'
]
model_urls
=
{
'resnet18'
:
'https://download.pytorch.org/models/resnet18-5c106cde.pth'
,
'resnet34'
:
'https://download.pytorch.org/models/resnet34-333f7ec4.pth'
,
'resnet50'
:
'https://download.pytorch.org/models/resnet50-19c8e357.pth'
,
'resnet101'
:
'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'
,
'resnet152'
:
'https://download.pytorch.org/models/resnet152-b121ed2d.pth'
,
}
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
):
"""3x3 convolution with padding"""
return
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
False
)
class
BasicBlock
(
nn
.
Module
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv1
=
conv3x3
(
inplanes
,
planes
,
stride
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
conv2
=
conv3x3
(
planes
,
planes
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
Bottleneck
(
nn
.
Module
):
expansion
=
4
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
):
super
(
Bottleneck
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
inplanes
,
planes
,
kernel_size
=
1
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
conv2
=
nn
.
Conv2d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
False
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
conv3
=
nn
.
Conv2d
(
planes
,
planes
*
4
,
kernel_size
=
1
,
bias
=
False
)
self
.
bn3
=
nn
.
BatchNorm2d
(
planes
*
4
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
bn3
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
ResNet
(
nn
.
Module
):
def
__init__
(
self
,
block
,
layers
,
num_classes
=
1000
,
img_size
=
224
,
dropout_factor
=
1.
):
self
.
inplanes
=
64
self
.
dropout_factor
=
dropout_factor
super
(
ResNet
,
self
).
__init__
()
# 26
# 586 train_sequence
self
.
conv1
=
nn
.
Conv2d
(
3
,
64
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
64
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
# see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16
# self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
ceil_mode
=
True
)
self
.
layer1
=
self
.
_make_layer
(
block
,
64
,
layers
[
0
])
self
.
layer2
=
self
.
_make_layer
(
block
,
128
,
layers
[
1
],
stride
=
2
)
self
.
layer3
=
self
.
_make_layer
(
block
,
256
,
layers
[
2
],
stride
=
2
)
self
.
layer4
=
self
.
_make_layer
(
block
,
512
,
layers
[
3
],
stride
=
2
)
assert
img_size
%
32
==
0
pool_kernel
=
int
(
img_size
/
32
)
self
.
avgpool
=
nn
.
AvgPool2d
(
pool_kernel
,
stride
=
1
,
ceil_mode
=
True
)
self
.
dropout
=
nn
.
Dropout
(
self
.
dropout_factor
)
self
.
fc
=
nn
.
Linear
(
512
*
block
.
expansion
,
num_classes
)
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
n
=
m
.
kernel_size
[
0
]
*
m
.
kernel_size
[
1
]
*
m
.
out_channels
m
.
weight
.
data
.
normal_
(
0
,
math
.
sqrt
(
2.
/
n
))
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
m
.
weight
.
data
.
fill_
(
1
)
m
.
bias
.
data
.
zero_
()
def
_make_layer
(
self
,
block
,
planes
,
blocks
,
stride
=
1
):
downsample
=
None
if
stride
!=
1
or
self
.
inplanes
!=
planes
*
block
.
expansion
:
downsample
=
nn
.
Sequential
(
nn
.
Conv2d
(
self
.
inplanes
,
planes
*
block
.
expansion
,
kernel_size
=
1
,
stride
=
stride
,
bias
=
False
),
nn
.
BatchNorm2d
(
planes
*
block
.
expansion
),
)
layers
=
[]
layers
.
append
(
block
(
self
.
inplanes
,
planes
,
stride
,
downsample
))
self
.
inplanes
=
planes
*
block
.
expansion
for
i
in
range
(
1
,
blocks
):
layers
.
append
(
block
(
self
.
inplanes
,
planes
))
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
maxpool
(
x
)
x
=
self
.
layer1
(
x
)
x
=
self
.
layer2
(
x
)
x
=
self
.
layer3
(
x
)
x
=
self
.
layer4
(
x
)
x
=
self
.
avgpool
(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
x
=
self
.
dropout
(
x
)
x
=
self
.
fc
(
x
)
return
x
def
load_model
(
model
,
pretrained_state_dict
):
model_dict
=
model
.
state_dict
()
pretrained_dict
=
{
k
:
v
for
k
,
v
in
pretrained_state_dict
.
items
()
if
k
in
model_dict
and
model_dict
[
k
].
size
()
==
pretrained_state_dict
[
k
].
size
()}
model
.
load_state_dict
(
pretrained_dict
,
strict
=
False
)
if
len
(
pretrained_dict
)
==
0
:
print
(
"[INFO] No params were loaded ..."
)
else
:
for
k
,
v
in
pretrained_state_dict
.
items
():
if
k
in
pretrained_dict
:
print
(
"==>> Load {} {}"
.
format
(
k
,
v
.
size
()))
else
:
print
(
"[INFO] Skip {} {}"
.
format
(
k
,
v
.
size
()))
return
model
def
resnet18
(
pretrained
=
False
,
**
kwargs
):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
BasicBlock
,
[
2
,
2
,
2
,
2
],
**
kwargs
)
if
pretrained
:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
print
(
"Load pretrained model from {}"
.
format
(
model_urls
[
'resnet18'
]))
pretrained_state_dict
=
model_zoo
.
load_url
(
model_urls
[
'resnet18'
])
model
=
load_model
(
model
,
pretrained_state_dict
)
return
model
def
resnet34
(
pretrained
=
False
,
**
kwargs
):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
BasicBlock
,
[
3
,
4
,
6
,
3
],
**
kwargs
)
if
pretrained
:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
print
(
"Load pretrained model from {}"
.
format
(
model_urls
[
'resnet34'
]))
pretrained_state_dict
=
model_zoo
.
load_url
(
model_urls
[
'resnet34'
])
model
=
load_model
(
model
,
pretrained_state_dict
)
return
model
def
resnet50
(
pretrained
=
False
,
**
kwargs
):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
Bottleneck
,
[
3
,
4
,
6
,
3
],
**
kwargs
)
if
pretrained
:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
print
(
"Load pretrained model from {}"
.
format
(
model_urls
[
'resnet50'
]))
pretrained_state_dict
=
model_zoo
.
load_url
(
model_urls
[
'resnet50'
])
model
=
load_model
(
model
,
pretrained_state_dict
)
return
model
def
resnet101
(
pretrained
=
False
,
**
kwargs
):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
Bottleneck
,
[
3
,
4
,
23
,
3
],
**
kwargs
)
if
pretrained
:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
print
(
"Load pretrained model from {}"
.
format
(
model_urls
[
'resnet101'
]))
pretrained_state_dict
=
model_zoo
.
load_url
(
model_urls
[
'resnet101'
])
model
=
load_model
(
model
,
pretrained_state_dict
)
return
model
def
resnet152
(
pretrained
=
False
,
**
kwargs
):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
ResNet
(
Bottleneck
,
[
3
,
8
,
36
,
3
],
**
kwargs
)
if
pretrained
:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
print
(
"Load pretrained model from {}"
.
format
(
model_urls
[
'resnet152'
]))
pretrained_state_dict
=
model_zoo
.
load_url
(
model_urls
[
'resnet152'
])
model
=
load_model
(
model
,
pretrained_state_dict
)
return
model
if
__name__
==
"__main__"
:
input
=
torch
.
randn
([
32
,
3
,
256
,
256
])
model
=
resnet34
(
False
,
num_classes
=
2
,
img_size
=
256
)
output
=
model
(
input
)
print
(
output
.
size
())
components/hand_keypoints/models/resnet_50.py
0 → 100644
浏览文件 @
4b6c8429
import
torch
import
torch.nn
as
nn
__all__
=
[
'ResNet'
,
'resnet18'
,
'resnet34'
,
'resnet50'
,
'resnet101'
,
'resnet152'
]
model_urls
=
{
'resnet18'
:
'https://download.pytorch.org/models/resnet18-5c106cde.pth'
,
'resnet34'
:
'https://download.pytorch.org/models/resnet34-333f7ec4.pth'
,
'resnet50'
:
'https://download.pytorch.org/models/resnet50-19c8e357.pth'
,
'resnet101'
:
'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'
,
'resnet152'
:
'https://download.pytorch.org/models/resnet152-b121ed2d.pth'
,
}
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
,
groups
=
1
,
dilation
=
1
):
"""3x3 convolution with padding"""
return
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
dilation
,
groups
=
groups
,
bias
=
False
,
dilation
=
dilation
)
def
conv1x1
(
in_planes
,
out_planes
,
stride
=
1
):
"""1x1 convolution"""
return
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
=
1
,
stride
=
stride
,
bias
=
False
)
class
Bottleneck
(
nn
.
Module
):
expansion
=
4
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
,
groups
=
1
,
base_width
=
64
,
dilation
=
1
,
norm_layer
=
None
):
super
(
Bottleneck
,
self
).
__init__
()
if
norm_layer
is
None
:
norm_layer
=
nn
.
BatchNorm2d
width
=
int
(
planes
*
(
base_width
/
64.
))
*
groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self
.
conv1
=
conv1x1
(
inplanes
,
width
)
self
.
bn1
=
norm_layer
(
width
)
self
.
conv2
=
conv3x3
(
width
,
width
,
stride
,
groups
,
dilation
)
self
.
bn2
=
norm_layer
(
width
)
self
.
conv3
=
conv1x1
(
width
,
planes
*
self
.
expansion
)
self
.
bn3
=
norm_layer
(
planes
*
self
.
expansion
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
bn3
(
out
)
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
+=
identity
out
=
self
.
relu
(
out
)
return
out
class
ResNet
(
nn
.
Module
):
def
__init__
(
self
,
block
,
layers
,
num_classes
=
1000
,
dropout_factor
=
1.
,
zero_init_residual
=
False
,
groups
=
1
,
width_per_group
=
64
,
replace_stride_with_dilation
=
None
,
norm_layer
=
nn
.
BatchNorm2d
):
super
(
ResNet
,
self
).
__init__
()
if
norm_layer
is
None
:
print
(
'BatchNorm2d'
)
norm_layer
=
nn
.
BatchNorm2d
self
.
_norm_layer
=
norm_layer
self
.
inplanes
=
64
self
.
dilation
=
1
if
replace_stride_with_dilation
is
None
:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation
=
[
False
,
False
,
False
]
if
len
(
replace_stride_with_dilation
)
!=
3
:
raise
ValueError
(
"replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}"
.
format
(
replace_stride_with_dilation
))
self
.
groups
=
groups
self
.
base_width
=
width_per_group
self
.
conv1
=
nn
.
Conv2d
(
3
,
self
.
inplanes
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias
=
False
)
self
.
bn1
=
norm_layer
(
self
.
inplanes
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
layer1
=
self
.
_make_layer
(
block
,
64
,
layers
[
0
])
self
.
layer2
=
self
.
_make_layer
(
block
,
128
,
layers
[
1
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
0
])
self
.
layer3
=
self
.
_make_layer
(
block
,
256
,
layers
[
2
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
1
])
self
.
layer4
=
self
.
_make_layer
(
block
,
512
,
layers
[
3
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
2
])
self
.
avgpool
=
nn
.
AdaptiveAvgPool2d
((
1
,
1
))
self
.
dropout
=
nn
.
Dropout
(
dropout_factor
)
self
.
fc
=
nn
.
Linear
(
512
*
block
.
expansion
,
num_classes
)
# ----------------------------------------------------------------------------------
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
nn
.
init
.
kaiming_normal_
(
m
.
weight
,
mode
=
'fan_out'
,
nonlinearity
=
'relu'
)
elif
isinstance
(
m
,
(
nn
.
BatchNorm2d
,
nn
.
GroupNorm
)):
nn
.
init
.
constant_
(
m
.
weight
,
1
)
nn
.
init
.
constant_
(
m
.
bias
,
0
)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if
zero_init_residual
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
Bottleneck
):
nn
.
init
.
constant_
(
m
.
bn3
.
weight
,
0
)
elif
isinstance
(
m
,
BasicBlock
):
nn
.
init
.
constant_
(
m
.
bn2
.
weight
,
0
)
def
_make_layer
(
self
,
block
,
planes
,
blocks
,
stride
=
1
,
dilate
=
False
):
norm_layer
=
self
.
_norm_layer
downsample
=
None
previous_dilation
=
self
.
dilation
if
dilate
:
self
.
dilation
*=
stride
stride
=
1
if
stride
!=
1
or
self
.
inplanes
!=
planes
*
block
.
expansion
:
downsample
=
nn
.
Sequential
(
conv1x1
(
self
.
inplanes
,
planes
*
block
.
expansion
,
stride
),
norm_layer
(
planes
*
block
.
expansion
),
)
layers
=
[]
layers
.
append
(
block
(
self
.
inplanes
,
planes
,
stride
,
downsample
,
self
.
groups
,
self
.
base_width
,
previous_dilation
,
norm_layer
))
self
.
inplanes
=
planes
*
block
.
expansion
for
_
in
range
(
1
,
blocks
):
layers
.
append
(
block
(
self
.
inplanes
,
planes
,
groups
=
self
.
groups
,
base_width
=
self
.
base_width
,
dilation
=
self
.
dilation
,
norm_layer
=
norm_layer
))
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
maxpool
(
x
)
x
=
self
.
layer1
(
x
)
x
=
self
.
layer2
(
x
)
x
=
self
.
layer3
(
x
)
x
=
self
.
layer4
(
x
)
x
=
self
.
avgpool
(
x
)
x
=
x
.
reshape
(
x
.
size
(
0
),
-
1
)
x
=
self
.
dropout
(
x
)
x
=
self
.
fc
(
x
)
return
x
def
_resnet
(
arch
,
block
,
layers
,
**
kwargs
):
model
=
ResNet
(
block
,
layers
,
**
kwargs
)
return
model
def
resnet50
(
**
kwargs
):
r
"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
"""
print
(
'Bottleneck:{}'
.
format
(
Bottleneck
))
return
_resnet
(
'resnet50'
,
Bottleneck
,
[
3
,
4
,
6
,
3
],
**
kwargs
)
if
__name__
==
"__main__"
:
dummy_input
=
torch
.
randn
([
32
,
3
,
128
,
128
])
num_classes
=
100
model
=
resnet50
(
num_classes
=
num_classes
,
dropout_factor
=
0.5
)
print
(
model
)
output
=
model
(
dummy_input
)
print
(
output
.
size
())
components/hand_keypoints/models/rexnetv1.py
0 → 100644
浏览文件 @
4b6c8429
"""
ReXNet
Copyright (c) 2020-present NAVER Corp.
MIT license
"""
import
torch
import
torch.nn
as
nn
from
math
import
ceil
# Memory-efficient Siwsh using torch.jit.script borrowed from the code in (https://twitter.com/jeremyphoward/status/1188251041835315200)
# Currently use memory-efficient Swish as default:
USE_MEMORY_EFFICIENT_SWISH
=
True
if
USE_MEMORY_EFFICIENT_SWISH
:
@
torch
.
jit
.
script
def
swish_fwd
(
x
):
return
x
.
mul
(
torch
.
sigmoid
(
x
))
@
torch
.
jit
.
script
def
swish_bwd
(
x
,
grad_output
):
x_sigmoid
=
torch
.
sigmoid
(
x
)
return
grad_output
*
(
x_sigmoid
*
(
1.
+
x
*
(
1.
-
x_sigmoid
)))
class
SwishJitImplementation
(
torch
.
autograd
.
Function
):
@
staticmethod
def
forward
(
ctx
,
x
):
ctx
.
save_for_backward
(
x
)
return
swish_fwd
(
x
)
@
staticmethod
def
backward
(
ctx
,
grad_output
):
x
=
ctx
.
saved_tensors
[
0
]
return
swish_bwd
(
x
,
grad_output
)
def
swish
(
x
,
inplace
=
False
):
return
SwishJitImplementation
.
apply
(
x
)
else
:
def
swish
(
x
,
inplace
=
False
):
return
x
.
mul_
(
x
.
sigmoid
())
if
inplace
else
x
.
mul
(
x
.
sigmoid
())
class
Swish
(
nn
.
Module
):
def
__init__
(
self
,
inplace
=
True
):
super
(
Swish
,
self
).
__init__
()
self
.
inplace
=
inplace
def
forward
(
self
,
x
):
return
swish
(
x
,
self
.
inplace
)
def
ConvBNAct
(
out
,
in_channels
,
channels
,
kernel
=
1
,
stride
=
1
,
pad
=
0
,
num_group
=
1
,
active
=
True
,
relu6
=
False
):
out
.
append
(
nn
.
Conv2d
(
in_channels
,
channels
,
kernel
,
stride
,
pad
,
groups
=
num_group
,
bias
=
False
))
out
.
append
(
nn
.
BatchNorm2d
(
channels
))
if
active
:
out
.
append
(
nn
.
ReLU6
(
inplace
=
True
)
if
relu6
else
nn
.
ReLU
(
inplace
=
True
))
def
ConvBNSwish
(
out
,
in_channels
,
channels
,
kernel
=
1
,
stride
=
1
,
pad
=
0
,
num_group
=
1
):
out
.
append
(
nn
.
Conv2d
(
in_channels
,
channels
,
kernel
,
stride
,
pad
,
groups
=
num_group
,
bias
=
False
))
out
.
append
(
nn
.
BatchNorm2d
(
channels
))
out
.
append
(
Swish
())
class
SE
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
channels
,
se_ratio
=
12
):
super
(
SE
,
self
).
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
fc
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
channels
//
se_ratio
,
kernel_size
=
1
,
padding
=
0
),
nn
.
BatchNorm2d
(
channels
//
se_ratio
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
channels
//
se_ratio
,
channels
,
kernel_size
=
1
,
padding
=
0
),
nn
.
Sigmoid
()
)
def
forward
(
self
,
x
):
y
=
self
.
avg_pool
(
x
)
y
=
self
.
fc
(
y
)
return
x
*
y
class
LinearBottleneck
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
channels
,
t
,
stride
,
use_se
=
True
,
se_ratio
=
12
,
**
kwargs
):
super
(
LinearBottleneck
,
self
).
__init__
(
**
kwargs
)
self
.
use_shortcut
=
stride
==
1
and
in_channels
<=
channels
self
.
in_channels
=
in_channels
self
.
out_channels
=
channels
out
=
[]
if
t
!=
1
:
dw_channels
=
in_channels
*
t
ConvBNSwish
(
out
,
in_channels
=
in_channels
,
channels
=
dw_channels
)
else
:
dw_channels
=
in_channels
ConvBNAct
(
out
,
in_channels
=
dw_channels
,
channels
=
dw_channels
,
kernel
=
3
,
stride
=
stride
,
pad
=
1
,
num_group
=
dw_channels
,
active
=
False
)
if
use_se
:
out
.
append
(
SE
(
dw_channels
,
dw_channels
,
se_ratio
))
out
.
append
(
nn
.
ReLU6
())
ConvBNAct
(
out
,
in_channels
=
dw_channels
,
channels
=
channels
,
active
=
False
,
relu6
=
True
)
self
.
out
=
nn
.
Sequential
(
*
out
)
def
forward
(
self
,
x
):
out
=
self
.
out
(
x
)
if
self
.
use_shortcut
:
out
[:,
0
:
self
.
in_channels
]
+=
x
return
out
class
ReXNetV1
(
nn
.
Module
):
def
__init__
(
self
,
input_ch
=
16
,
final_ch
=
180
,
width_mult
=
1.0
,
depth_mult
=
1.0
,
num_classes
=
1000
,
use_se
=
True
,
se_ratio
=
12
,
dropout_factor
=
0.2
,
bn_momentum
=
0.9
):
super
(
ReXNetV1
,
self
).
__init__
()
layers
=
[
1
,
2
,
2
,
3
,
3
,
5
]
strides
=
[
1
,
2
,
2
,
2
,
1
,
2
]
use_ses
=
[
False
,
False
,
True
,
True
,
True
,
True
]
layers
=
[
ceil
(
element
*
depth_mult
)
for
element
in
layers
]
strides
=
sum
([[
element
]
+
[
1
]
*
(
layers
[
idx
]
-
1
)
for
idx
,
element
in
enumerate
(
strides
)],
[])
if
use_se
:
use_ses
=
sum
([[
element
]
*
layers
[
idx
]
for
idx
,
element
in
enumerate
(
use_ses
)],
[])
else
:
use_ses
=
[
False
]
*
sum
(
layers
[:])
ts
=
[
1
]
*
layers
[
0
]
+
[
6
]
*
sum
(
layers
[
1
:])
self
.
depth
=
sum
(
layers
[:])
*
3
stem_channel
=
32
/
width_mult
if
width_mult
<
1.0
else
32
inplanes
=
input_ch
/
width_mult
if
width_mult
<
1.0
else
input_ch
features
=
[]
in_channels_group
=
[]
channels_group
=
[]
# The following channel configuration is a simple instance to make each layer become an expand layer.
for
i
in
range
(
self
.
depth
//
3
):
if
i
==
0
:
in_channels_group
.
append
(
int
(
round
(
stem_channel
*
width_mult
)))
channels_group
.
append
(
int
(
round
(
inplanes
*
width_mult
)))
else
:
in_channels_group
.
append
(
int
(
round
(
inplanes
*
width_mult
)))
inplanes
+=
final_ch
/
(
self
.
depth
//
3
*
1.0
)
channels_group
.
append
(
int
(
round
(
inplanes
*
width_mult
)))
ConvBNSwish
(
features
,
3
,
int
(
round
(
stem_channel
*
width_mult
)),
kernel
=
3
,
stride
=
2
,
pad
=
1
)
for
block_idx
,
(
in_c
,
c
,
t
,
s
,
se
)
in
enumerate
(
zip
(
in_channels_group
,
channels_group
,
ts
,
strides
,
use_ses
)):
features
.
append
(
LinearBottleneck
(
in_channels
=
in_c
,
channels
=
c
,
t
=
t
,
stride
=
s
,
use_se
=
se
,
se_ratio
=
se_ratio
))
pen_channels
=
int
(
1280
*
width_mult
)
ConvBNSwish
(
features
,
c
,
pen_channels
)
features
.
append
(
nn
.
AdaptiveAvgPool2d
(
1
))
self
.
features
=
nn
.
Sequential
(
*
features
)
self
.
output
=
nn
.
Sequential
(
nn
.
Dropout
(
dropout_factor
),
nn
.
Conv2d
(
pen_channels
,
num_classes
,
1
,
bias
=
True
))
def
forward
(
self
,
x
):
x
=
self
.
features
(
x
)
x
=
self
.
output
(
x
).
squeeze
()
return
x
components/hand_keypoints/models/shufflenet.py
0 → 100644
浏览文件 @
4b6c8429
"""shufflenet in pytorch
[1] Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun.
ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
https://arxiv.org/abs/1707.01083v2
"""
from
functools
import
partial
import
torch
import
torch.nn
as
nn
class
BasicConv2d
(
nn
.
Module
):
def
__init__
(
self
,
input_channels
,
output_channels
,
kernel_size
,
**
kwargs
):
super
().
__init__
()
self
.
conv
=
nn
.
Conv2d
(
input_channels
,
output_channels
,
kernel_size
,
**
kwargs
)
self
.
bn
=
nn
.
BatchNorm2d
(
output_channels
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
x
=
self
.
relu
(
x
)
return
x
class
ChannelShuffle
(
nn
.
Module
):
def
__init__
(
self
,
groups
):
super
().
__init__
()
self
.
groups
=
groups
def
forward
(
self
,
x
):
batchsize
,
channels
,
height
,
width
=
x
.
data
.
size
()
channels_per_group
=
int
(
channels
/
self
.
groups
)
#"""suppose a convolutional layer with g groups whose output has
#g x n channels; we first reshape the output channel dimension
#into (g, n)"""
x
=
x
.
view
(
batchsize
,
self
.
groups
,
channels_per_group
,
height
,
width
)
#"""transposing and then flattening it back as the input of next layer."""
x
=
x
.
transpose
(
1
,
2
).
contiguous
()
x
=
x
.
view
(
batchsize
,
-
1
,
height
,
width
)
return
x
class
DepthwiseConv2d
(
nn
.
Module
):
def
__init__
(
self
,
input_channels
,
output_channels
,
kernel_size
,
**
kwargs
):
super
().
__init__
()
self
.
depthwise
=
nn
.
Sequential
(
nn
.
Conv2d
(
input_channels
,
output_channels
,
kernel_size
,
**
kwargs
),
nn
.
BatchNorm2d
(
output_channels
)
)
def
forward
(
self
,
x
):
return
self
.
depthwise
(
x
)
class
PointwiseConv2d
(
nn
.
Module
):
def
__init__
(
self
,
input_channels
,
output_channels
,
**
kwargs
):
super
().
__init__
()
self
.
pointwise
=
nn
.
Sequential
(
nn
.
Conv2d
(
input_channels
,
output_channels
,
1
,
**
kwargs
),
nn
.
BatchNorm2d
(
output_channels
)
)
def
forward
(
self
,
x
):
return
self
.
pointwise
(
x
)
class
ShuffleNetUnit
(
nn
.
Module
):
def
__init__
(
self
,
input_channels
,
output_channels
,
stage
,
stride
,
groups
):
super
().
__init__
()
#"""Similar to [9], we set the number of bottleneck channels to 1/4
#of the output channels for each ShuffleNet unit."""
self
.
bottlneck
=
nn
.
Sequential
(
PointwiseConv2d
(
input_channels
,
int
(
output_channels
/
4
),
groups
=
groups
),
nn
.
ReLU
(
inplace
=
True
)
)
#"""Note that for Stage 2, we do not apply group convolution on the first pointwise
#layer because the number of input channels is relatively small."""
if
stage
==
2
:
self
.
bottlneck
=
nn
.
Sequential
(
PointwiseConv2d
(
input_channels
,
int
(
output_channels
/
4
),
groups
=
groups
),
nn
.
ReLU
(
inplace
=
True
)
)
self
.
channel_shuffle
=
ChannelShuffle
(
groups
)
self
.
depthwise
=
DepthwiseConv2d
(
int
(
output_channels
/
4
),
int
(
output_channels
/
4
),
3
,
groups
=
int
(
output_channels
/
4
),
stride
=
stride
,
padding
=
1
)
self
.
expand
=
PointwiseConv2d
(
int
(
output_channels
/
4
),
output_channels
,
groups
=
groups
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
fusion
=
self
.
_add
self
.
shortcut
=
nn
.
Sequential
()
#"""As for the case where ShuffleNet is applied with stride,
#we simply make two modifications (see Fig 2 (c)):
#(i) add a 3 × 3 average pooling on the shortcut path;
#(ii) replace the element-wise addition with channel concatenation,
#which makes it easy to enlarge channel dimension with little extra
#computation cost.
if
stride
!=
1
or
input_channels
!=
output_channels
:
self
.
shortcut
=
nn
.
AvgPool2d
(
3
,
stride
=
2
,
padding
=
1
)
self
.
expand
=
PointwiseConv2d
(
int
(
output_channels
/
4
),
output_channels
-
input_channels
,
groups
=
groups
)
self
.
fusion
=
self
.
_cat
def
_add
(
self
,
x
,
y
):
return
torch
.
add
(
x
,
y
)
def
_cat
(
self
,
x
,
y
):
return
torch
.
cat
([
x
,
y
],
dim
=
1
)
def
forward
(
self
,
x
):
shortcut
=
self
.
shortcut
(
x
)
shuffled
=
self
.
bottlneck
(
x
)
shuffled
=
self
.
channel_shuffle
(
shuffled
)
shuffled
=
self
.
depthwise
(
shuffled
)
shuffled
=
self
.
expand
(
shuffled
)
output
=
self
.
fusion
(
shortcut
,
shuffled
)
output
=
self
.
relu
(
output
)
return
output
class
ShuffleNet
(
nn
.
Module
):
def
__init__
(
self
,
num_blocks
=
[
2
,
4
,
2
],
num_classes
=
100
,
groups
=
3
,
dropout_factor
=
1.0
):
super
().
__init__
()
if
groups
==
1
:
out_channels
=
[
24
,
144
,
288
,
567
]
elif
groups
==
2
:
out_channels
=
[
24
,
200
,
400
,
800
]
elif
groups
==
3
:
out_channels
=
[
24
,
240
,
480
,
960
]
elif
groups
==
4
:
out_channels
=
[
24
,
272
,
544
,
1088
]
elif
groups
==
8
:
out_channels
=
[
24
,
384
,
768
,
1536
]
self
.
conv1
=
BasicConv2d
(
3
,
out_channels
[
0
],
3
,
padding
=
1
,
stride
=
1
)
self
.
input_channels
=
out_channels
[
0
]
self
.
stage2
=
self
.
_make_stage
(
ShuffleNetUnit
,
num_blocks
[
0
],
out_channels
[
1
],
stride
=
2
,
stage
=
2
,
groups
=
groups
)
self
.
stage3
=
self
.
_make_stage
(
ShuffleNetUnit
,
num_blocks
[
1
],
out_channels
[
2
],
stride
=
2
,
stage
=
3
,
groups
=
groups
)
self
.
stage4
=
self
.
_make_stage
(
ShuffleNetUnit
,
num_blocks
[
2
],
out_channels
[
3
],
stride
=
2
,
stage
=
4
,
groups
=
groups
)
self
.
avg
=
nn
.
AdaptiveAvgPool2d
((
1
,
1
))
self
.
fc
=
nn
.
Linear
(
out_channels
[
3
],
num_classes
)
self
.
dropout
=
nn
.
Dropout
(
dropout_factor
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
stage2
(
x
)
x
=
self
.
stage3
(
x
)
x
=
self
.
stage4
(
x
)
x
=
self
.
avg
(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
x
=
self
.
dropout
(
x
)
x
=
self
.
fc
(
x
)
return
x
def
_make_stage
(
self
,
block
,
num_blocks
,
output_channels
,
stride
,
stage
,
groups
):
"""make shufflenet stage
Args:
block: block type, shuffle unit
out_channels: output depth channel number of this stage
num_blocks: how many blocks per stage
stride: the stride of the first block of this stage
stage: stage index
groups: group number of group convolution
Return:
return a shuffle net stage
"""
strides
=
[
stride
]
+
[
1
]
*
(
num_blocks
-
1
)
stage
=
[]
for
stride
in
strides
:
stage
.
append
(
block
(
self
.
input_channels
,
output_channels
,
stride
=
stride
,
stage
=
stage
,
groups
=
groups
)
)
self
.
input_channels
=
output_channels
return
nn
.
Sequential
(
*
stage
)
def
shufflenet
():
return
ShuffleNet
([
4
,
8
,
4
])
components/hand_keypoints/models/shufflenetv2.py
0 → 100644
浏览文件 @
4b6c8429
"""shufflenetv2 in pytorch
[1] Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun
ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design
https://arxiv.org/abs/1807.11164
"""
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
def
channel_split
(
x
,
split
):
"""split a tensor into two pieces along channel dimension
Args:
x: input tensor
split:(int) channel size for each pieces
"""
assert
x
.
size
(
1
)
==
split
*
2
return
torch
.
split
(
x
,
split
,
dim
=
1
)
def
channel_shuffle
(
x
,
groups
):
"""channel shuffle operation
Args:
x: input tensor
groups: input branch number
"""
batch_size
,
channels
,
height
,
width
=
x
.
size
()
channels_per_group
=
int
(
channels
//
groups
)
x
=
x
.
view
(
batch_size
,
groups
,
channels_per_group
,
height
,
width
)
x
=
x
.
transpose
(
1
,
2
).
contiguous
()
x
=
x
.
view
(
batch_size
,
-
1
,
height
,
width
)
return
x
class
ShuffleUnit
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
):
super
().
__init__
()
self
.
stride
=
stride
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
if
stride
!=
1
or
in_channels
!=
out_channels
:
self
.
residual
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
in_channels
,
1
),
nn
.
BatchNorm2d
(
in_channels
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
in_channels
,
in_channels
,
3
,
stride
=
stride
,
padding
=
1
,
groups
=
in_channels
),
nn
.
BatchNorm2d
(
in_channels
),
nn
.
Conv2d
(
in_channels
,
int
(
out_channels
/
2
),
1
),
nn
.
BatchNorm2d
(
int
(
out_channels
/
2
)),
nn
.
ReLU
(
inplace
=
True
)
)
self
.
shortcut
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
in_channels
,
3
,
stride
=
stride
,
padding
=
1
,
groups
=
in_channels
),
nn
.
BatchNorm2d
(
in_channels
),
nn
.
Conv2d
(
in_channels
,
int
(
out_channels
/
2
),
1
),
nn
.
BatchNorm2d
(
int
(
out_channels
/
2
)),
nn
.
ReLU
(
inplace
=
True
)
)
else
:
self
.
shortcut
=
nn
.
Sequential
()
in_channels
=
int
(
in_channels
/
2
)
self
.
residual
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
in_channels
,
1
),
nn
.
BatchNorm2d
(
in_channels
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
in_channels
,
in_channels
,
3
,
stride
=
stride
,
padding
=
1
,
groups
=
in_channels
),
nn
.
BatchNorm2d
(
in_channels
),
nn
.
Conv2d
(
in_channels
,
in_channels
,
1
),
nn
.
BatchNorm2d
(
in_channels
),
nn
.
ReLU
(
inplace
=
True
)
)
def
forward
(
self
,
x
):
if
self
.
stride
==
1
and
self
.
out_channels
==
self
.
in_channels
:
shortcut
,
residual
=
channel_split
(
x
,
int
(
self
.
in_channels
/
2
))
else
:
shortcut
=
x
residual
=
x
shortcut
=
self
.
shortcut
(
shortcut
)
residual
=
self
.
residual
(
residual
)
x
=
torch
.
cat
([
shortcut
,
residual
],
dim
=
1
)
x
=
channel_shuffle
(
x
,
2
)
return
x
class
ShuffleNetV2
(
nn
.
Module
):
def
__init__
(
self
,
ratio
=
1.
,
num_classes
=
100
,
dropout_factor
=
1.0
):
super
().
__init__
()
if
ratio
==
0.5
:
out_channels
=
[
48
,
96
,
192
,
1024
]
elif
ratio
==
1
:
out_channels
=
[
116
,
232
,
464
,
1024
]
elif
ratio
==
1.5
:
out_channels
=
[
176
,
352
,
704
,
1024
]
elif
ratio
==
2
:
out_channels
=
[
244
,
488
,
976
,
2048
]
else
:
ValueError
(
'unsupported ratio number'
)
self
.
pre
=
nn
.
Sequential
(
nn
.
Conv2d
(
3
,
24
,
3
,
padding
=
1
),
nn
.
BatchNorm2d
(
24
)
)
self
.
stage2
=
self
.
_make_stage
(
24
,
out_channels
[
0
],
3
)
self
.
stage3
=
self
.
_make_stage
(
out_channels
[
0
],
out_channels
[
1
],
7
)
self
.
stage4
=
self
.
_make_stage
(
out_channels
[
1
],
out_channels
[
2
],
3
)
self
.
conv5
=
nn
.
Sequential
(
nn
.
Conv2d
(
out_channels
[
2
],
out_channels
[
3
],
1
),
nn
.
BatchNorm2d
(
out_channels
[
3
]),
nn
.
ReLU
(
inplace
=
True
)
)
self
.
fc
=
nn
.
Linear
(
out_channels
[
3
],
num_classes
)
self
.
dropout
=
nn
.
Dropout
(
dropout_factor
)
def
forward
(
self
,
x
):
x
=
self
.
pre
(
x
)
x
=
self
.
stage2
(
x
)
x
=
self
.
stage3
(
x
)
x
=
self
.
stage4
(
x
)
x
=
self
.
conv5
(
x
)
x
=
F
.
adaptive_avg_pool2d
(
x
,
1
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
x
=
self
.
dropout
(
x
)
x
=
self
.
fc
(
x
)
return
x
def
_make_stage
(
self
,
in_channels
,
out_channels
,
repeat
):
layers
=
[]
layers
.
append
(
ShuffleUnit
(
in_channels
,
out_channels
,
2
))
while
repeat
:
layers
.
append
(
ShuffleUnit
(
out_channels
,
out_channels
,
1
))
repeat
-=
1
return
nn
.
Sequential
(
*
layers
)
def
shufflenetv2
():
return
ShuffleNetV2
()
components/hand_keypoints/models/squeezenet.py
0 → 100644
浏览文件 @
4b6c8429
import
math
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.init
as
init
import
torch.utils.model_zoo
as
model_zoo
__all__
=
[
'SqueezeNet'
,
'squeezenet1_0'
,
'squeezenet1_1'
]
model_urls
=
{
'squeezenet1_0'
:
'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth'
,
'squeezenet1_1'
:
'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth'
,
}
class
Fire
(
nn
.
Module
):
def
__init__
(
self
,
inplanes
,
squeeze_planes
,
expand1x1_planes
,
expand3x3_planes
):
super
(
Fire
,
self
).
__init__
()
self
.
inplanes
=
inplanes
self
.
squeeze
=
nn
.
Conv2d
(
inplanes
,
squeeze_planes
,
kernel_size
=
1
)
self
.
squeeze_activation
=
nn
.
ReLU
(
inplace
=
True
)
self
.
expand1x1
=
nn
.
Conv2d
(
squeeze_planes
,
expand1x1_planes
,
kernel_size
=
1
)
self
.
expand1x1_activation
=
nn
.
ReLU
(
inplace
=
True
)
self
.
expand3x3
=
nn
.
Conv2d
(
squeeze_planes
,
expand3x3_planes
,
kernel_size
=
3
,
padding
=
1
)
self
.
expand3x3_activation
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
x
):
x
=
self
.
squeeze_activation
(
self
.
squeeze
(
x
))
return
torch
.
cat
([
self
.
expand1x1_activation
(
self
.
expand1x1
(
x
)),
self
.
expand3x3_activation
(
self
.
expand3x3
(
x
))
],
1
)
class
SqueezeNet
(
nn
.
Module
):
def
__init__
(
self
,
version
=
1.0
,
num_classes
=
1000
,
dropout_factor
=
1.
):
super
(
SqueezeNet
,
self
).
__init__
()
if
version
not
in
[
1.0
,
1.1
]:
raise
ValueError
(
"Unsupported SqueezeNet version {version}:"
"1.0 or 1.1 expected"
.
format
(
version
=
version
))
self
.
num_classes
=
num_classes
if
version
==
1.0
:
self
.
features
=
nn
.
Sequential
(
nn
.
Conv2d
(
3
,
96
,
kernel_size
=
7
,
stride
=
2
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
ceil_mode
=
True
),
Fire
(
96
,
16
,
64
,
64
),
Fire
(
128
,
16
,
64
,
64
),
Fire
(
128
,
32
,
128
,
128
),
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
ceil_mode
=
True
),
Fire
(
256
,
32
,
128
,
128
),
Fire
(
256
,
48
,
192
,
192
),
Fire
(
384
,
48
,
192
,
192
),
Fire
(
384
,
64
,
256
,
256
),
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
ceil_mode
=
True
),
Fire
(
512
,
64
,
256
,
256
),
)
else
:
self
.
features
=
nn
.
Sequential
(
nn
.
Conv2d
(
3
,
64
,
kernel_size
=
3
,
stride
=
2
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
ceil_mode
=
True
),
Fire
(
64
,
16
,
64
,
64
),
Fire
(
128
,
16
,
64
,
64
),
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
ceil_mode
=
True
),
Fire
(
128
,
32
,
128
,
128
),
Fire
(
256
,
32
,
128
,
128
),
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
ceil_mode
=
True
),
Fire
(
256
,
48
,
192
,
192
),
Fire
(
384
,
48
,
192
,
192
),
Fire
(
384
,
64
,
256
,
256
),
Fire
(
512
,
64
,
256
,
256
),
)
# Final convolution is initialized differently form the rest
final_conv
=
nn
.
Conv2d
(
512
,
self
.
num_classes
,
kernel_size
=
1
)
self
.
classifier
=
nn
.
Sequential
(
nn
.
Dropout
(
p
=
dropout_factor
),
final_conv
,
nn
.
ReLU
(
inplace
=
True
),
nn
.
AdaptiveAvgPool2d
(
1
)
)
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
if
m
is
final_conv
:
init
.
normal
(
m
.
weight
.
data
,
mean
=
0.0
,
std
=
0.01
)
else
:
init
.
kaiming_uniform
(
m
.
weight
.
data
)
if
m
.
bias
is
not
None
:
m
.
bias
.
data
.
zero_
()
def
forward
(
self
,
x
):
x
=
self
.
features
(
x
)
# print("features(x):", x.size())
x
=
self
.
classifier
(
x
)
# print("features(x):", x.size())
return
x
.
view
(
x
.
size
(
0
),
self
.
num_classes
)
def
squeezenet1_0
(
pretrained
=
False
,
**
kwargs
):
r
"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level
accuracy with 50x fewer parameters and <0.5MB model size"
<https://arxiv.org/abs/1602.07360>`_ paper.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
SqueezeNet
(
version
=
1.0
,
**
kwargs
)
model_dict
=
model
.
state_dict
()
if
pretrained
:
pretrained_state_dict
=
model_zoo
.
load_url
(
model_urls
[
'squeezenet1_0'
])
pretrained_dict
=
{
k
:
v
for
k
,
v
in
pretrained_state_dict
.
items
()
if
k
in
model_dict
and
model_dict
[
k
].
size
()
==
pretrained_state_dict
[
k
].
size
()}
model
.
load_state_dict
(
pretrained_dict
,
strict
=
False
)
return
model
def
squeezenet1_1
(
pretrained
=
False
,
**
kwargs
):
r
"""SqueezeNet 1.1 model from the `official SqueezeNet repo
<https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1>`_.
SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
than SqueezeNet 1.0, without sacrificing accuracy.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
SqueezeNet
(
version
=
1.1
,
**
kwargs
)
model_dict
=
model
.
state_dict
()
if
pretrained
:
pretrained_state_dict
=
model_zoo
.
load_url
(
model_urls
[
'squeezenet1_0'
])
pretrained_dict
=
{
k
:
v
for
k
,
v
in
pretrained_state_dict
.
items
()
if
k
in
model_dict
and
model_dict
[
k
].
size
()
==
pretrained_state_dict
[
k
].
size
()}
model
.
load_state_dict
(
pretrained_dict
,
strict
=
False
)
return
model
if
__name__
==
"__main__"
:
from
thop
import
profile
dummy
=
torch
.
from_numpy
(
np
.
random
.
random
([
16
,
3
,
256
,
256
]).
astype
(
np
.
float32
))
model
=
squeezenet1_0
(
pretrained
=
True
,
num_classes
=
42
,
dropout_factor
=
0.5
)
print
(
model
)
flops
,
params
=
profile
(
model
,
inputs
=
(
dummy
,
))
model
.
eval
()
output
=
model
(
dummy
)
print
(
output
.
size
())
print
(
"flops: {}, params: {}"
.
format
(
flops
,
params
))
components/hand_keypoints/utils/common_utils.py
0 → 100644
浏览文件 @
4b6c8429
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
# function: common utils
import
os
import
shutil
import
cv2
import
numpy
as
np
import
json
def
mkdir_
(
path
,
flag_rm
=
False
):
if
os
.
path
.
exists
(
path
):
if
flag_rm
==
True
:
shutil
.
rmtree
(
path
)
os
.
mkdir
(
path
)
print
(
'remove {} done ~ '
.
format
(
path
))
else
:
os
.
mkdir
(
path
)
def
plot_box
(
bbox
,
img
,
color
=
None
,
label
=
None
,
line_thickness
=
None
):
tl
=
line_thickness
or
round
(
0.002
*
max
(
img
.
shape
[
0
:
2
]))
+
1
color
=
color
or
[
random
.
randint
(
0
,
255
)
for
_
in
range
(
3
)]
c1
,
c2
=
(
int
(
bbox
[
0
]),
int
(
bbox
[
1
])),
(
int
(
bbox
[
2
]),
int
(
bbox
[
3
]))
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
thickness
=
tl
)
# 目标的bbox
if
label
:
tf
=
max
(
tl
-
2
,
1
)
t_size
=
cv2
.
getTextSize
(
label
,
0
,
fontScale
=
tl
/
3
,
thickness
=
tf
)[
0
]
# label size
c2
=
c1
[
0
]
+
t_size
[
0
],
c1
[
1
]
-
t_size
[
1
]
-
3
# 字体的bbox
cv2
.
rectangle
(
img
,
c1
,
c2
,
color
,
-
1
)
# label 矩形填充
# 文本绘制
cv2
.
putText
(
img
,
label
,
(
c1
[
0
],
c1
[
1
]
-
2
),
0
,
tl
/
4
,
[
225
,
255
,
255
],
thickness
=
tf
,
lineType
=
cv2
.
LINE_AA
)
class
JSON_Encoder
(
json
.
JSONEncoder
):
def
default
(
self
,
obj
):
if
isinstance
(
obj
,
np
.
integer
):
return
int
(
obj
)
elif
isinstance
(
obj
,
np
.
floating
):
return
float
(
obj
)
elif
isinstance
(
obj
,
np
.
ndarray
):
return
obj
.
tolist
()
else
:
return
super
(
JSON_Encoder
,
self
).
default
(
obj
)
def
draw_landmarks
(
img
,
output
,
draw_circle
):
img_width
=
img
.
shape
[
1
]
img_height
=
img
.
shape
[
0
]
dict_landmarks
=
{}
for
i
in
range
(
int
(
output
.
shape
[
0
]
/
2
)):
x
=
output
[
i
*
2
+
0
]
*
float
(
img_width
)
y
=
output
[
i
*
2
+
1
]
*
float
(
img_height
)
if
41
>=
i
>=
33
:
if
'left_eyebrow'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'left_eyebrow'
]
=
[]
dict_landmarks
[
'left_eyebrow'
].
append
([
int
(
x
),
int
(
y
),(
0
,
255
,
0
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
255
,
0
),
-
1
)
elif
50
>=
i
>=
42
:
if
'right_eyebrow'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'right_eyebrow'
]
=
[]
dict_landmarks
[
'right_eyebrow'
].
append
([
int
(
x
),
int
(
y
),(
0
,
255
,
0
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
255
,
0
),
-
1
)
elif
67
>=
i
>=
60
:
if
'left_eye'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'left_eye'
]
=
[]
dict_landmarks
[
'left_eye'
].
append
([
int
(
x
),
int
(
y
),(
255
,
0
,
255
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
0
,
255
),
-
1
)
elif
75
>=
i
>=
68
:
if
'right_eye'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'right_eye'
]
=
[]
dict_landmarks
[
'right_eye'
].
append
([
int
(
x
),
int
(
y
),(
255
,
0
,
255
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
0
,
255
),
-
1
)
elif
97
>=
i
>=
96
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
0
,
255
),
-
1
)
elif
54
>=
i
>=
51
:
if
'bridge_nose'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'bridge_nose'
]
=
[]
dict_landmarks
[
'bridge_nose'
].
append
([
int
(
x
),
int
(
y
),(
0
,
170
,
255
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
170
,
255
),
-
1
)
elif
32
>=
i
>=
0
:
if
'basin'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'basin'
]
=
[]
dict_landmarks
[
'basin'
].
append
([
int
(
x
),
int
(
y
),(
255
,
30
,
30
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
30
,
30
),
-
1
)
elif
59
>=
i
>=
55
:
if
'wing_nose'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'wing_nose'
]
=
[]
dict_landmarks
[
'wing_nose'
].
append
([
int
(
x
),
int
(
y
),(
0
,
255
,
255
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
0
,
255
,
255
),
-
1
)
elif
87
>=
i
>=
76
:
if
'out_lip'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'out_lip'
]
=
[]
dict_landmarks
[
'out_lip'
].
append
([
int
(
x
),
int
(
y
),(
255
,
255
,
0
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
255
,
0
),
-
1
)
elif
95
>=
i
>=
88
:
if
'in_lip'
not
in
dict_landmarks
.
keys
():
dict_landmarks
[
'in_lip'
]
=
[]
dict_landmarks
[
'in_lip'
].
append
([
int
(
x
),
int
(
y
),(
50
,
220
,
255
)])
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
50
,
220
,
255
),
-
1
)
else
:
if
draw_circle
:
cv2
.
circle
(
img
,
(
int
(
x
),
int
(
y
)),
2
,
(
255
,
0
,
255
),
-
1
)
return
dict_landmarks
def
draw_contour
(
image
,
dict
):
for
key
in
dict
.
keys
():
# print(key)
_
,
_
,
color
=
dict
[
key
][
0
]
if
'basin'
==
key
or
'wing_nose'
==
key
:
pts
=
np
.
array
([[
dict
[
key
][
i
][
0
],
dict
[
key
][
i
][
1
]]
for
i
in
range
(
len
(
dict
[
key
]))],
np
.
int32
)
# print(pts)
cv2
.
polylines
(
image
,[
pts
],
False
,
color
)
else
:
points_array
=
np
.
zeros
((
1
,
len
(
dict
[
key
]),
2
),
dtype
=
np
.
int32
)
for
i
in
range
(
len
(
dict
[
key
])):
x
,
y
,
_
=
dict
[
key
][
i
]
points_array
[
0
,
i
,
0
]
=
x
points_array
[
0
,
i
,
1
]
=
y
# cv2.fillPoly(image, points_array, color)
cv2
.
drawContours
(
image
,
points_array
,
-
1
,
color
,
thickness
=
1
)
components/hand_keypoints/utils/model_utils.py
0 → 100644
浏览文件 @
4b6c8429
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
# function: model utils
import
os
import
numpy
as
np
import
torch
import
torch.backends.cudnn
as
cudnn
import
random
def
get_acc
(
output
,
label
):
total
=
output
.
shape
[
0
]
_
,
pred_label
=
output
.
max
(
1
)
num_correct
=
(
pred_label
==
label
).
sum
().
item
()
return
num_correct
/
float
(
total
)
def
set_learning_rate
(
optimizer
,
lr
):
for
param_group
in
optimizer
.
param_groups
:
param_group
[
'lr'
]
=
lr
def
set_seed
(
seed
=
666
):
np
.
random
.
seed
(
seed
)
random
.
seed
(
seed
)
torch
.
manual_seed
(
seed
)
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
manual_seed
(
seed
)
torch
.
cuda
.
manual_seed_all
(
seed
)
cudnn
.
deterministic
=
True
def
split_trainval_datasets
(
ops
):
print
(
' --------------->>> split_trainval_datasets '
)
train_split_datasets
=
[]
train_split_datasets_label
=
[]
val_split_datasets
=
[]
val_split_datasets_label
=
[]
for
idx
,
doc
in
enumerate
(
sorted
(
os
.
listdir
(
ops
.
train_path
),
key
=
lambda
x
:
int
(
x
.
split
(
'.'
)[
0
]),
reverse
=
False
)):
# print(' %s label is %s \n'%(doc,idx))
data_list
=
os
.
listdir
(
ops
.
train_path
+
doc
)
random
.
shuffle
(
data_list
)
cal_split_num
=
int
(
len
(
data_list
)
*
ops
.
val_factor
)
for
i
,
file
in
enumerate
(
data_list
):
if
'.jpg'
in
file
:
if
i
<
cal_split_num
:
val_split_datasets
.
append
(
ops
.
train_path
+
doc
+
'/'
+
file
)
val_split_datasets_label
.
append
(
idx
)
else
:
train_split_datasets
.
append
(
ops
.
train_path
+
doc
+
'/'
+
file
)
train_split_datasets_label
.
append
(
idx
)
print
(
ops
.
train_path
+
doc
+
'/'
+
file
,
idx
)
print
(
'
\n
'
)
print
(
'train_split_datasets len {}'
.
format
(
len
(
train_split_datasets
)))
print
(
'val_split_datasets len {}'
.
format
(
len
(
val_split_datasets
)))
return
train_split_datasets
,
train_split_datasets_label
,
val_split_datasets
,
val_split_datasets_label
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录