提交 bc20c001 编写于 作者: Eric.Lee2021's avatar Eric.Lee2021 🚴🏻

create first dpcas demo

上级 6415fc2f
# DpCas-Light
### dpcas(Deep Learning Componentized Application System):深度学习组件化应用系统,为了更好更快的将已有的模型进行快速集成,实现应用。
### 第一个完整pipelien 的 Demo,本地手势交互应用,之后会推出web架构的手势交互。
## 项目介绍
### 项目1:手势交互项目(local 本地版本)
* 采用python多进程实现,100% python代码。
* 1、实现单手点击,即大拇指和食指捏合时认为点击。
* 2、实现双手配合点击选中目标区域。
* 3、基于第2点的功能,支持识别架构的拓展(目前没有加任何物体识别模型,后面会加上)。
* 4、实现基于IOU的手部跟踪。
* 5、支持语音拓展功能。
## 项目配置
### 1、软件
* Python 3.7
* PyTorch >= 1.5.1
* opencv-python
* playsound
### 2、硬件
* 普通USB彩色(RGB)网络摄像头
## 相关项目
### 1、手部检测项目(yolo_v3)
* 项目地址:https://codechina.csdn.net/EricLee/yolo_v3
* [预训练模型下载地址(百度网盘 Password: 7mk0 )](https://pan.baidu.com/s/1hqzvz0MeFX0EdpWXUV6aFg)
### 2、手21关键点回归项目(handpose_x)
* https://codechina.csdn.net/EricLee/handpose_x
* * [预训练模型下载地址(百度网盘 Password: 99f3 )](https://pan.baidu.com/s/1Ur6Ikp31XGEuA3hQjYzwIw)
## 项目使用方法
### 项目1:手势交互项目(local 本地版本)
### 1、下载手部检测模型和21关键点回归模型。
### 2、确定摄像头连接成功。
### 3、打开配置文件 lib/hand_lib/cfg/handpose.cfg 进行相关参数配置,具体配置参数如下,请仔细阅读(一般只需要配置模型路径及模型结构)
```
detect_model_path=./components/hand_detect/weights/latest_416.pt #手部检测模型地址
detect_model_arch=yolo_v3 #检测模型类型 ,yolo or yolo-tiny
detect_conf_thres=0.5 # 检测模型阈值
detect_nms_thres=0.45 # 检测模型 nms 阈值
handpose_x_model_path=./components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth # 21点手回归模型地址
handpose_x_model_arch=rexnetv1 # 回归模型结构
camera_id = 0 # 相机 ID ,一般默认为0,如果不是请自行确认
vis_gesture_lines = True # True: 点击时的轨迹可视化, Flase:点击时的轨迹不可视化
charge_cycle_step = 32 # 点击稳定状态计数器,点击稳定充电环。
```
### 4、根目录下运行命令: python main.py
## 联系方式 (Contact)
* E-mails: 305141918@qq.com
#-*-coding:utf-8-*-
'''
DpCas-Light
|||| ||||| |||| || |||||||
|| || || || || || |||| || ||
|| || || || || || || || ||
|| || || || || ||====|| ||||||
|| || ||||| || || ||======|| ||
|| || || || || || || || ||
|||| || |||| || || |||||||
/--------------------- HandPose_X ---------------------/
'''
# date:2021-03-12
# Author: Eric.Lee
# function: handpose demo
import os
import cv2
import time
from multiprocessing import Process
from multiprocessing import Manager
import cv2
import numpy as np
import random
import time
# 加载模型组件库
from hand_detect.yolo_v3_hand import yolo_v3_hand_model
from hand_keypoints.handpose_x import handpose_x_model
# 加载工具库
import sys
sys.path.append("./lib/hand_lib/")
from cores.handpose_fuction import handpose_track_keypoints21_pipeline
from cores.handpose_fuction import hand_tracking,audio_recognize,judge_click_stabel,draw_click_lines
from utils.utils import parse_data_cfg
from playsound import playsound
def audio_process_dw_edge_cnt(info_dict):
while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
time.sleep(2)
gesture_names = ["click"]
gesture_dict = {}
for k_ in gesture_names:
gesture_dict[k_] = None
# time.sleep(1)
# playsound("./materials/audio/sentences/WelcomeAR.mp3")
# time.sleep(0.01)
# playsound("./materials/audio/sentences/MorningEric.mp3")
# time.sleep(1)
reg_cnt = 0
while True:
time.sleep(0.01)
try:
reg_cnt = info_dict["click_dw_cnt"]
for i in range(reg_cnt):
# playsound("./materials/audio/cue/winwin-1.mp3")
playsound("./materials/audio/sentences/welldone.mp3")
info_dict["click_dw_cnt"] = info_dict["click_dw_cnt"] - reg_cnt
except Exception as inst:
print(type(inst),inst) # exception instance
if info_dict["break"] == True:
break
def audio_process_up_edge_cnt(info_dict):
while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
time.sleep(2)
gesture_names = ["click"]
gesture_dict = {}
for k_ in gesture_names:
gesture_dict[k_] = None
reg_cnt = 0
while True:
time.sleep(0.01)
# print(" --->>> audio_process")
try:
reg_cnt = info_dict["click_up_cnt"]
for i in range(reg_cnt):
# playsound("./materials/audio/cue/m2-0.mp3")
playsound("./materials/audio/sentences/Click.mp3")
info_dict["click_up_cnt"] = info_dict["click_up_cnt"] - reg_cnt
except Exception as inst:
print(type(inst),inst) # the exception instance
if info_dict["break"] == True:
break
def audio_process_dw_edge(info_dict):
while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
time.sleep(2)
gesture_names = ["click"]
gesture_dict = {}
for k_ in gesture_names:
gesture_dict[k_] = None
while True:
time.sleep(0.01)
# print(" --->>> audio_process")
try:
for g_ in gesture_names:
if gesture_dict[g_] is None:
gesture_dict[g_] = info_dict[g_]
else:
if ("click"==g_):
if (info_dict[g_]^gesture_dict[g_]) and info_dict[g_]==False:# 判断Click手势信号为下降沿,Click动作结束
playsound("./materials/audio/cue/winwin.mp3")
# playsound("./materials/audio/sentences/welldone.mp3")
gesture_dict[g_] = info_dict[g_]
except Exception as inst:
print(type(inst),inst) # the exception instance
if info_dict["break"] == True:
break
def audio_process_up_edge(info_dict):
while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
time.sleep(2)
gesture_names = ["click"]
gesture_dict = {}
for k_ in gesture_names:
gesture_dict[k_] = None
while True:
time.sleep(0.01)
# print(" --->>> audio_process")
try:
for g_ in gesture_names:
if gesture_dict[g_] is None:
gesture_dict[g_] = info_dict[g_]
else:
if ("click"==g_):
if (info_dict[g_]^gesture_dict[g_]) and info_dict[g_]==True:# 判断Click手势信号为上升沿,Click动作开始
playsound("./materials/audio/cue/m2.mp3")
# playsound("./materials/audio/sentences/clik_quick.mp3")
gesture_dict[g_] = info_dict[g_]
except Exception as inst:
print(type(inst),inst) # the exception instance
if info_dict["break"] == True:
break
'''
启动识别语音进程
'''
def audio_process_recognize_up_edge(info_dict):
while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
time.sleep(2)
gesture_names = ["double_en_pts"]
gesture_dict = {}
for k_ in gesture_names:
gesture_dict[k_] = None
while True:
time.sleep(0.01)
# print(" --->>> audio_process")
try:
for g_ in gesture_names:
if gesture_dict[g_] is None:
gesture_dict[g_] = info_dict[g_]
else:
if ("double_en_pts"==g_):
if (info_dict[g_]^gesture_dict[g_]) and info_dict[g_]==True:# 判断Click手势信号为上升沿,Click动作开始
playsound("./materials/audio/sentences/IdentifyingObjectsWait.mp3")
playsound("./materials/audio/sentences/ObjectMayBeIdentified.mp3")
gesture_dict[g_] = info_dict[g_]
except Exception as inst:
print(type(inst),inst) # exception instance
if info_dict["break"] == True:
break
'''
/*****************************************/
算法 pipeline
/*****************************************/
'''
def handpose_x_process(info_dict,config):
# 模型初始化
print("load model component ...")
# yolo v3 手部检测模型初始化
hand_detect_model = yolo_v3_hand_model(conf_thres=float(config["detect_conf_thres"]),nms_thres=float(config["detect_nms_thres"]),
model_arch = config["detect_model_arch"],model_path = config["detect_model_path"])
# handpose_x 21 关键点回归模型初始化
handpose_model = handpose_x_model(model_arch = config["handpose_x_model_arch"],model_path = config["handpose_x_model_path"])
#
gesture_model = None # 目前缺省
#
object_recognize_model = None # 识别分类模型,目前缺省
#
img_reco_crop = None
cap = cv2.VideoCapture(int(config["camera_id"])) # 开启摄像机
cap.set(cv2.CAP_PROP_EXPOSURE, -8) # 设置相机曝光,(注意:不是所有相机有效)
# url="http://admin:admin@192.168.43.1:8081"
# cap=cv2.VideoCapture(url)
print("start handpose process ~")
info_dict["handpose_procss_ready"] = True #多进程间的开始同步信号
gesture_lines_dict = {} # 点击使能时的轨迹点
hands_dict = {} # 手的信息
hands_click_dict = {} #手的按键信息计数
track_index = 0 # 跟踪的全局索引
while True:
ret, img = cap.read()# 读取相机图像
if ret:# 读取相机图像成功
# img = cv2.flip(img,-1)
algo_img = img.copy()
st_ = time.time()
#------
hand_bbox =hand_detect_model.predict(img,vis = True) # 检测手,获取手的边界框
hands_dict,track_index = hand_tracking(data = hand_bbox,hands_dict = hands_dict,track_index = track_index) # 手跟踪,目前通过IOU方式进行目标跟踪
# 检测每个手的关键点及相关信息
handpose_list = handpose_track_keypoints21_pipeline(img,hands_dict = hands_dict,hands_click_dict = hands_click_dict,track_index = track_index,algo_img = algo_img,
handpose_model = handpose_model,gesture_model = gesture_model,
icon = None,vis = True)
et_ = time.time()
fps_ = 1./(et_-st_+1e-8)
#------------------------------------------ 跟踪手的 信息维护
#------------------ 获取跟踪到的手ID
id_list = []
for i in range(len(handpose_list)):
_,_,_,dict_ = handpose_list[i]
id_list.append(dict_["id"])
# print(id_list)
#----------------- 获取需要删除的手ID
id_del_list = []
for k_ in gesture_lines_dict.keys():
if k_ not in id_list:#去除过往已经跟踪失败的目标手的相关轨迹
id_del_list.append(k_)
#----------------- 删除无法跟踪到的手的相关信息
for k_ in id_del_list:
del gesture_lines_dict[k_]
del hands_click_dict[k_]
#----------------- 更新检测到手的轨迹信息,及手点击使能时的上升沿和下降沿信号
double_en_pts = []
for i in range(len(handpose_list)):
_,_,_,dict_ = handpose_list[i]
id_ = dict_["id"]
if dict_["click"]:
if id_ not in gesture_lines_dict.keys():
gesture_lines_dict[id_] = {}
gesture_lines_dict[id_]["pts"]=[]
gesture_lines_dict[id_]["line_color"] = (random.randint(100,255),random.randint(100,255),random.randint(100,255))
gesture_lines_dict[id_]["click"] = None
#判断是否上升沿
if gesture_lines_dict[id_]["click"] is not None:
if gesture_lines_dict[id_]["click"] == False:#上升沿计数器
info_dict["click_up_cnt"] += 1
#获得点击状态
gesture_lines_dict[id_]["click"] = True
#---获得坐标
gesture_lines_dict[id_]["pts"].append(dict_["choose_pt"])
double_en_pts.append(dict_["choose_pt"])
else:
if id_ not in gesture_lines_dict.keys():
gesture_lines_dict[id_] = {}
gesture_lines_dict[id_]["pts"]=[]
gesture_lines_dict[id_]["line_color"] = (random.randint(100,255),random.randint(100,255),random.randint(100,255))
gesture_lines_dict[id_]["click"] = None
elif id_ in gesture_lines_dict.keys():
gesture_lines_dict[id_]["pts"]=[]# 清除轨迹
#判断是否上升沿
if gesture_lines_dict[id_]["click"] == True:#下降沿计数器
info_dict["click_dw_cnt"] += 1
# 更新点击状态
gesture_lines_dict[id_]["click"] = False
#绘制手click 状态时的大拇指和食指中心坐标点轨迹
draw_click_lines(img,gesture_lines_dict,vis = bool(config["vis_gesture_lines"]))
# 判断各手的click状态是否稳定,且满足设定阈值
flag_click_stable = judge_click_stabel(img,handpose_list,int(config["charge_cycle_step"]))
# 判断是否启动识别语音,且进行选中目标识别
img_reco_crop = audio_recognize(img,algo_img,img_reco_crop,object_recognize_model,info_dict,double_en_pts,flag_click_stable)
cv2.putText(img, 'HandNum:[{}]'.format(len(hand_bbox)), (5,25),cv2.FONT_HERSHEY_COMPLEX, 0.7, (255, 0, 0),5)
cv2.putText(img, 'HandNum:[{}]'.format(len(hand_bbox)), (5,25),cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255))
cv2.namedWindow("image",0)
cv2.imshow("image",img)
if cv2.waitKey(1) == 27:
info_dict["break"] = True
break
else:
break
cap.release()
cv2.destroyAllWindows()
def main_handpose_x(cfg_file):
config = parse_data_cfg(cfg_file)
print("\n/---------------------- main_handpose_x config ------------------------/\n")
for k_ in config.keys():
print("{} : {}".format(k_,config[k_]))
print("\n/------------------------------------------------------------------------/\n")
print(" loading handpose_x local demo ...")
g_info_dict = Manager().dict()# 多进程共享字典初始化:用于多进程间的 key:value 操作
g_info_dict["handpose_procss_ready"] = False # 进程间的开启同步信号
g_info_dict["break"] = False # 进程间的退出同步信号
g_info_dict["double_en_pts"] = False # 双手选中动作使能信号
g_info_dict["click_up_cnt"] = 0
g_info_dict["click_dw_cnt"] = 0
print(" multiprocessing dict key:\n")
for key_ in g_info_dict.keys():
print( " -> ",key_)
print()
#-------------------------------------------------- 初始化各进程
process_list = []
t = Process(target=handpose_x_process,args=(g_info_dict,config,))
process_list.append(t)
t = Process(target=audio_process_recognize_up_edge,args=(g_info_dict,)) # 上升沿播放
process_list.append(t)
# t = Process(target=audio_process_dw_edge_cnt,args=(g_info_dict,)) # 下降沿播放
# process_list.append(t)
# t = Process(target=audio_process_up_edge_cnt,args=(g_info_dict,)) # 上升沿播放
# process_list.append(t)
for i in range(len(process_list)):
process_list[i].start()
for i in range(len(process_list)):
process_list[i].join()# 设置主线程等待子线程结束
del process_list
import torch
import torch.nn as nn
import torchvision
import time
import numpy as np
import sys
def get_model_op(model_,print_flag = False):
# print('/********************* modules *******************/')
op_dict = {}
idx = 0
for m in model_.modules():
idx += 1
if isinstance(m, nn.Conv2d):
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
else:
op_dict['Conv2d'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
pass
elif isinstance(m, nn.BatchNorm2d):
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
else:
op_dict['BatchNorm2d'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
pass
elif isinstance(m, nn.Linear):
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
else:
op_dict['Linear'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
pass
elif isinstance(m, nn.Sequential):
if print_flag:
print('*******************{}) {}'.format(idx,m))
for n in m:
if print_flag:
print('{}) {}'.format(idx,n))
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
else:
op_dict['Conv2d'] += 1
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
else:
op_dict['BatchNorm2d'] += 1
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
else:
op_dict['Linear'] += 1
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
else:
op_dict['ReLU6'] += 1
pass
elif isinstance(m, nn.ReLU6):
if print_flag:
print('{}) {}'.format(idx,m))
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
else:
op_dict['ReLU6'] += 1
pass
elif isinstance(m, nn.Module):
if print_flag:
print('{}) {}'.format(idx,m))
for n in m.modules():
if isinstance(n, nn.Conv2d):
if print_flag:
print('{}) {}'.format(idx,n))
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
else:
op_dict['Conv2d'] += 1
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
else:
op_dict['BatchNorm2d'] += 1
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
else:
op_dict['Linear'] += 1
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
else:
op_dict['ReLU6'] += 1
pass
pass
else:
if print_flag:
print('{}) {}'.format(idx,m))
pass
# print('\n/********************** {} ********************/\n'.format(ops.network))
for key in op_dict.keys():
if print_flag:
print(' operation - {} : {}'.format(key,op_dict[key]))
class DummyModule(nn.Module):
def __init__(self):
super(DummyModule, self).__init__()
def forward(self, x):
return x
def fuse(conv, bn):
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
with torch.no_grad():
# init
if isinstance(conv, nn.Conv2d):
fusedconv = torch.nn.Conv2d(conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
bias=True)
elif isinstance(conv, nn.ConvTranspose2d): # not supprot nn.ConvTranspose2d
fusedconv = nn.ConvTranspose2d(
conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
output_padding=conv.output_padding,
bias=True)
else:
print("error")
exit()
# prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
# prepare spatial bias
if conv.bias is not None:
b_conv = conv.bias
#b_conv = conv.bias.mul(bn.weight.div(torch.sqrt(bn.running_var + bn.eps))) # maybe, you should this one ?
else:
b_conv = torch.zeros(conv.weight.size(0))
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(b_conv + b_bn)
return fusedconv
# idxx = 0
def fuse_module(m):
# global idxx
children = list(m.named_children())
c = None
cn = None
for name, child in children:
# idxx += 1
# print('-------------->>',idxx)
# if idxx%10==0:
# continue
# print("name {}, child {}".format(name, child))
if isinstance(child, nn.BatchNorm2d) and c is not None:
bc = fuse(c, child)
m._modules[cn] = bc
# print('DummyModule() : ',DummyModule())
m._modules[name] = DummyModule()
c = None
elif isinstance(child, nn.Conv2d):
c = child
cn = name
else:
fuse_module(child)
def test_net(ops,m):
use_cuda = torch.cuda.is_available()
use_cpu = False
if ops.force_cpu or use_cuda == False:
p = torch.randn([1, 3, 256, 256])
device = torch.device("cpu")
use_cpu = True
else:
p = torch.randn([1, 3, 256, 256]).cuda()
device = torch.device("cuda:0")
count = 50
time_org = []
m_o = m.to(device)
get_model_op(m_o)
# print(m)
for i in range(count):
s1 = time.time()
if use_cpu:
o_output = m_o(p)
else:
o_output = m_o(p).cpu()
s2 = time.time()
time_org.append(s2 - s1)
print("Original time: ", s2 - s1)
print('------------------------------------>>>>')
fuse_module(m.to(torch.device("cpu")))
# print(m)
m_f = m.to(device)
get_model_op(m_f)
time_fuse = []
for i in range(count):
s1 = time.time()
if use_cpu:
f_output = m_f(p)
else:
f_output = m_f(p).cpu()
s2 = time.time()
time_fuse.append(s2 - s1)
print("Fused time: ", s2 - s1)
print("-" * 50)
print("org time:", np.mean(time_org))
print("fuse time:", np.mean(time_fuse))
for o in o_output:
print("org size:", o.size())
for o in f_output:
print("fuse size:", o.size())
for i in range(len(o_output)):
assert o_output[i].size()==f_output[i].size()
print("output[{}] max abs diff: {}".format(i, (o_output[i] - f_output[i]).abs().max().item()))
print("output[{}] MSE diff: {}".format(i, nn.MSELoss()(o_output[i], f_output[i]).item()))
def acc_model(ops,m):
# print('\n-------------------------------->>> before acc model')
get_model_op(m)
fuse_module(m)
# print('\n-------------------------------->>> after acc model')
get_model_op(m)
return m
此差异已折叠。
import glob
import math
import os
import random
import shutil
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
def xyxy2xywh(x):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
y[:, 2] = x[:, 2] - x[:, 0]
y[:, 3] = x[:, 3] - x[:, 1]
return y
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
class LoadImages: # for inference
def __init__(self, path, img_size=416):
self.height = img_size
img_formats = ['.jpg', '.jpeg', '.png', '.tif']
vid_formats = ['.mov', '.avi', '.mp4']
files = []
if os.path.isdir(path):
files = sorted(glob.glob('%s/*.*' % path))
elif os.path.isfile(path):
files = [path]
images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
nI, nV = len(images), len(videos)
self.files = images + videos
self.nF = nI + nV # number of files
self.video_flag = [False] * nI + [True] * nV
self.mode = 'images'
if any(videos):
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nF > 0, 'No images or videos found in ' + path
def __iter__(self):
self.count = 0
return self
def __next__(self):
if self.count == self.nF:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
# Read video
self.mode = 'video'
ret_val, img0 = self.cap.read()
if not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nF: # last video
raise StopIteration
else:
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
else:
# Read image
self.count += 1
img0 = cv2.imread(path) # BGR
assert img0 is not None, 'File Not Found ' + path
print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
# cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
return path, img, img0, self.cap
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self):
return self.nF # number of files
class LoadWebcam: # for inference
def __init__(self, img_size=416):
self.cam = cv2.VideoCapture(0)
self.height = img_size
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if cv2.waitKey(1) == 27: # esc to quit
cv2.destroyAllWindows()
raise StopIteration
# Read image
ret_val, img0 = self.cam.read()
assert ret_val, 'Webcam Error'
img_path = 'webcam_%g.jpg' % self.count
img0 = cv2.flip(img0, 1) # flip left-right
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return img_path, img, img0, self.cam
def __len__(self):
return 0
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, batch_size, img_size=416, augment=True, multi_scale=False):
print('LoadImagesAndLabels init : ',path)
with open(path, 'r') as file:
img_files = file.read().splitlines()
img_files = list(filter(lambda x: len(x) > 0, img_files))
np.random.shuffle(img_files) # shuffle img_list
print("shuffle image...")
self.img_files = img_files
assert len(self.img_files) > 0, 'No images found in %s' % path
self.img_size = img_size
self.batch_size = batch_size
self.multi_scale = multi_scale
self.augment = augment
self.scale_index = 0
if self.multi_scale:
self.img_size = img_size # initiate with maximum multi_scale size, in case of out of memory
print("Multi scale images training, init img_size", self.img_size)
else:
print("Fixed scale images, img_size", self.img_size)
self.label_files = [
x.replace('images', 'labels').replace("JPEGImages", 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
for x in self.img_files]
# print('self.img_files : ',self.img_files[1])
# print('self.label_files : ',self.label_files[1])
def __len__(self):
return len(self.img_files)
def __getitem__(self, index):
# if self.multi_scale and (index % self.batch_size == 0) and index != 0:
if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0:
self.img_size = random.choice(range(11, 18)) * 32
# print("++++++ change img_size, index:", self.img_size, index)
if self.multi_scale:
self.scale_index += 1
if self.scale_index >= (100*self.batch_size):
self.scale_index = 0
img_path = self.img_files[index]
label_path = self.label_files[index]
img = cv2.imread(img_path) # BGR
assert img is not None, 'File Not Found ' + img_path
augment_hsv = random.random() < 0.5 # hsv_aug prob = 0.5
if self.augment and augment_hsv:
# SV augmentation by 50%
fraction = 0.50 # must be < 1.0
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
S = img_hsv[:, :, 1].astype(np.float32)
V = img_hsv[:, :, 2].astype(np.float32)
a = (random.random() * 2 - 1) * fraction + 1 # a in [-0,5, 1.5]
S *= a
if a > 1:
np.clip(S, None, 255, out=S)
a = (random.random() * 2 - 1) * fraction + 1
V *= a
if a > 1:
np.clip(V, None, 255, out=V)
img_hsv[:, :, 1] = S # .astype(np.uint8)
img_hsv[:, :, 2] = V # .astype(np.uint8)
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
h, w, _ = img.shape
img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment)
# Load labels
labels = []
if os.path.isfile(label_path):
with open(label_path, 'r') as file:
lines = file.read().splitlines()
x = np.array([x.split() for x in lines], dtype=np.float32)
if x.size > 0:
# Normalized xywh to pixel xyxy format
labels = x.copy()
labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
# Augment image and labels
if self.augment:
img, labels = random_affine(img, labels, degrees=(-10, 10), translate=(0.10, 0.10), scale=(0.9, 1.1))
nL = len(labels) # number of labels
if nL:
# convert xyxy to xywh
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ,且 归一化
if self.augment:
# random left-right flip
lr_flip = True
if lr_flip and random.random() > 0.5:
img = np.fliplr(img)
if nL:
labels[:, 1] = 1 - labels[:, 1]
# random up-down flip
ud_flip = False
if ud_flip and random.random() > 0.5:
img = np.flipud(img)
if nL:
labels[:, 2] = 1 - labels[:, 2]
labels_out = torch.zeros((nL, 6))# 加了 一个 batch size
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
# Normalize
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return torch.from_numpy(img), labels_out, img_path, (h, w)
@staticmethod
def collate_fn(batch):
img, label, path, hw = list(zip(*batch)) # transposed
for i, l in enumerate(label):
l[:, 0] = i # 获取 物体的 归属于 图片 的 index
return torch.stack(img, 0), torch.cat(label, 0), path, hw
def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
# Resize a rectangular image to a padded square
shape = img.shape[:2] # shape = [height, width]
ratio = float(height) / max(shape) # ratio = old / new
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
dw = (height - new_shape[0]) / 2 # width padding
dh = (height - new_shape[1]) / 2 # height padding
top, bottom = round(dh - 0.1), round(dh + 0.1)
left, right = round(dw - 0.1), round(dw + 0.1)
# resize img
if augment:
interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
if interpolation is None:
img = cv2.resize(img, new_shape)
else:
img = cv2.resize(img, new_shape, interpolation=interpolation)
else:
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
# print("resize time:",time.time()-s1)
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
return img, ratio, dw, dh
def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
borderValue=(127.5, 127.5, 127.5)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
if targets is None:
targets = []
border = 0 # width of added border (optional)
height = max(img.shape[0], img.shape[1]) + border * 2
# Rotation and Scale
R = np.eye(3)
a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
s = random.random() * (scale[1] - scale[0]) + scale[0]
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
# Translation
T = np.eye(3)
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)
# Shear
S = np.eye(3)
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
borderValue=borderValue) # BGR order borderValue
# Return warped points also
if len(targets) > 0:
n = targets.shape[0]
points = targets[:, 1:5].copy()
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @ M.T)[:, :2].reshape(n, 8)
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# apply angle-based reduction of bounding boxes
radians = a * math.pi / 180
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
x = (xy[:, 2] + xy[:, 0]) / 2
y = (xy[:, 3] + xy[:, 1]) / 2
w = (xy[:, 2] - xy[:, 0]) * reduction
h = (xy[:, 3] - xy[:, 1]) * reduction
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# reject warped points outside of image
np.clip(xy, 0, height, out=xy)
w = xy[:, 2] - xy[:, 0]
h = xy[:, 3] - xy[:, 1]
area = w * h
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
targets = targets[i]
targets[:, 1:5] = xy[i]
return imw, targets
def convert_images2bmp():
# cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
folder = os.sep + Path(path).name
output = path.replace(folder, folder + 'bmp')
if os.path.exists(output):
shutil.rmtree(output) # delete output folder
os.makedirs(output) # make new output folder
for f in tqdm(glob.glob('%s*.jpg' % path)):
save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
cv2.imwrite(save_name, cv2.imread(f))
for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
with open(label_path, 'r') as file:
lines = file.read()
lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
'/Users/glennjocher/PycharmProjects/', '../')
with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
file.write(lines)
import torch
def init_seeds(seed=0):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def select_device(force_cpu=False):
if force_cpu:
cuda = False
device = torch.device('cpu')
else:
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
if torch.cuda.device_count() > 1:
device = torch.device('cuda' if cuda else 'cpu')
# print('Found %g GPUs' % torch.cuda.device_count())
# print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21')
# torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available
# print('Using ', torch.cuda.device_count(), ' GPUs')
# print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
return device
import glob
import random
import time
from collections import defaultdict
import cv2
import numpy as np
import torch
import torch.nn as nn
from dp_models.light_pose.modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS
# Set printoptions
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
cv2.setNumThreads(0)
def float3(x): # format floats to 3 decimals
return float(format(x, '.3f'))
def init_seeds(seed=0):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
else:
torch.manual_seed(seed)
torch.manual_seed_all(seed)
def load_classes(path):
# Loads class labels at 'path'
fp = open(path, 'r')
names = fp.read().split('\n')
return list(filter(None, names)) # filter removes empty strings (such as last line)
def model_info(model):
# Plots a line-by-line description of a PyTorch model
n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
print('\n%5s %60s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()):
# name = name.replace('module_list.', '')
print('%5g %60s %9s %12g %20s %10.3g %10.3g' % (
i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g))
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
elif classname.find('BatchNorm2d') != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
torch.nn.init.constant_(m.bias.data, 0.0)
def xyxy2xywh(x):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
y[:, 2] = x[:, 2] - x[:, 0]
y[:, 3] = x[:, 3] - x[:, 1]
return y
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
# Rescale x1, y1, x2, y2 from 416 to image size
# print('coords : ',coords)
# print('img0_shape : ',img0_shape)
gain = float(img_size) / max(img0_shape) # gain = old / new
# print('gain : ',gain)
pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding
pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding
# print('pad_xpad_y : ',pad_x,pad_y)
coords[:, [0, 2]] -= pad_x
coords[:, [1, 3]] -= pad_y
coords[:, :4] /= gain
coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
return coords
def ap_per_class(tp, conf, pred_cls, target_cls):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (list).
conf: Objectness value from 0-1 (list).
pred_cls: Predicted object classes (list).
target_cls: True object classes (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(target_cls)
# Create Precision-Recall curve and compute AP for each class
ap, p, r = [], [], []
for c in unique_classes:
i = pred_cls == c
n_gt = (target_cls == c).sum() # Number of ground truth objects
n_p = i.sum() # Number of predicted objects
if n_p == 0 and n_gt == 0:
continue
elif n_p == 0 or n_gt == 0:
ap.append(0)
r.append(0)
p.append(0)
else:
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum()
tpc = (tp[i]).cumsum()
# Recall
recall_curve = tpc / (n_gt + 1e-16)
r.append(recall_curve[-1])
# Precision
precision_curve = tpc / (tpc + fpc)
p.append(precision_curve[-1])
# AP from recall-precision curve
ap.append(compute_ap(recall_curve, precision_curve))
# Plot
# plt.plot(recall_curve, precision_curve)
# Compute F1 score (harmonic mean of precision and recall)
p, r, ap = np.array(p), np.array(r), np.array(ap)
f1 = 2 * p * r / (p + r + 1e-16)
return p, r, ap, f1, unique_classes.astype('int32')
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def bbox_iou(box1, box2, x1y1x2y2=True):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t()
# Get the coordinates of bounding boxes
if x1y1x2y2:
# x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else:
# x, y, w, h = box1
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
(b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
return inter_area / union_area # iou
def wh_iou(box1, box2):
box2 = box2.t()
# w, h = box1
w1, h1 = box1[0], box1[1]
w2, h2 = box2[0], box2[1]
# Intersection area
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
# Union Area
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
return inter_area / union_area # iou
def compute_loss(p, targets): # predictions, targets
FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) # losses 初始化 为 0
txy, twh, tcls, indices = targets
MSE = nn.MSELoss()
CE = nn.CrossEntropyLoss()
BCE = nn.BCEWithLogitsLoss()# 多标签分类时 使用 如 [1,1,0],
# Compute losses
for i, pi0 in enumerate(p): # layer i predictions, i
b, a, gj, gi = indices[i] # image_idx, anchor_idx, gridx, gridy
# print(i,') b, a, gj, gi : ')
# print('b', b)
# print('a', a)
# print('gj', gj)
# print('gi', gi)
tconf = torch.zeros_like(pi0[..., 0]) # conf
# print('tconf: ',tconf.size())
# Compute losses
k = 1 # nT / bs
if len(b) > 0:
pi = pi0[b, a, gj, gi] # predictions closest to anchors
tconf[b, a, gj, gi] = 1 # conf
lxy += (k * 8) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss
lwh += (k * 4) * MSE(pi[..., 2:4], twh[i]) # wh loss
lcls += (k * 1) * CE(pi[..., 5:], tcls[i]) # class_conf loss
lconf += (k * 64) * BCE(pi0[..., 4], tconf) # obj_conf loss
loss = lxy + lwh + lconf + lcls
# Add to dictionary
d = defaultdict(float)
losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()]
for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses):
d[name] = x
return loss, d
def build_targets(model, targets):
# targets = [image, class, x, y, w, h]
if isinstance(model, nn.parallel.DistributedDataParallel):
model = model.module
txy, twh, tcls, indices = [], [], [], []
for i, layer in enumerate(get_yolo_layers(model)):# 遍历 3 个 yolo layer
# print(i,'layer ',model.module_list[layer])
layer = model.module_list[layer][0]
# iou of targets-anchors
gwh = targets[:, 4:6] * layer.nG # 以 grid 为单位的 wh
iou = [wh_iou(x, gwh) for x in layer.anchor_vec]
iou, a = torch.stack(iou, 0).max(0) # best iou and anchor
# reject below threshold ious (OPTIONAL, increases P, lowers R)
reject = True
if reject:
j = iou > 0.10
t, a, gwh = targets[j], a[j], gwh[j]
else:
t = targets
# Indices
b, c = t[:, :2].long().t() # target image, class
gxy = t[:, 2:4] * layer.nG
gi, gj = gxy.long().t() # grid_i, grid_j
indices.append((b, a, gj, gi)) # img_index , anchor_index , grid_x , grid_y
# print('b, a, gj, gi : ')
# print('b', b)
# print('a', a)
# print('gj', gj)
# print('gi', gi)
# print('class c',c)
# XY coordinates
txy.append(gxy - gxy.floor())#转化为grid相对坐标
# Width and height
twh.append(torch.log(gwh / layer.anchor_vec[a])) # yolo method 对数
# twh.append(torch.sqrt(gwh / layer.anchor_vec[a]) / 2) # power method
# Class
tcls.append(c)
# try:
# print('c.max,layer.nC: ',c.max().item() ,layer.nC)
# except:
# pass
if c.shape[0]:
assert c.max().item() <= layer.nC, 'Target classes exceed model classes'
return txy, twh, tcls, indices
# @profile
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_conf, class)
"""
min_wh = 2 # (pixels) minimum box width and height
output = [None] * len(prediction)
for image_i, pred in enumerate(prediction):
# Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area
# ar = w / (h + 1e-16) # aspect ratio
# n = len(w)
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Filter out confidence scores below threshold
class_conf, class_pred = pred[:, 5:].max(1) # max class_conf, index
pred[:, 4] *= class_conf # finall conf = obj_conf * class_conf
i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
# s2=time.time()
pred2 = pred[i]
# print("++++++pred2 = pred[i]",time.time()-s2, pred2)
# If none are remaining => process next image
if len(pred2) == 0:
continue
# Select predicted classes
class_conf = class_conf[i]
class_pred = class_pred[i].unsqueeze(1).float()
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
pred2[:, :4] = xywh2xyxy(pred2[:, :4])
# pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551
# Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
# Get detections sorted by decreasing confidence scores
pred2 = pred2[(-pred2[:, 4]).argsort()]
det_max = []
nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental)
for c in pred2[:, -1].unique():
dc = pred2[pred2[:, -1] == c] # select class c
dc = dc[:min(len(dc), 100)] # limit to first 100 boxes
# Non-maximum suppression
if nms_style == 'OR': # default
# METHOD1
# ind = list(range(len(dc)))
# while len(ind):
# j = ind[0]
# det_max.append(dc[j:j + 1]) # save highest conf detection
# reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
# [ind.pop(i) for i in reversed(reject)]
# METHOD2
while dc.shape[0]:
det_max.append(dc[:1]) # save highest conf detection
if len(dc) == 1: # Stop if we're at the last detection
break
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'AND': # requires overlap, single boxes erased
while len(dc) > 1:
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
if iou.max() > 0.5:
det_max.append(dc[:1])
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'MERGE': # weighted mixture box
while len(dc):
i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes
weights = dc[i, 4:5]
dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
det_max.append(dc[:1])
dc = dc[i == 0]
if len(det_max):
det_max = torch.cat(det_max) # concatenate
output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort
return output
def get_yolo_layers(model):
yolo_layer_index = []
for index, l in enumerate(model.module_list):
try:
a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG
# print("---"*50)
# print(l, index)
yolo_layer_index.append(index)
except:
pass
assert len(yolo_layer_index) > 0, "can not find yolo layer"
return yolo_layer_index
#-*-coding:utf-8-*-
# date:2021-03-09
# Author: Eric.Lee
# function: yolo v3 hand detect
import os
import cv2
import numpy as np
import time
import torch
from hand_detect.yolov3 import Yolov3, Yolov3Tiny
from hand_detect.utils.torch_utils import select_device
from hand_detect.acc_model import acc_model
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
import random
def show_model_param(model):
params = list(model.parameters())
k = 0
for i in params:
l = 1
for j in i.size():
l *= j
print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l)))
k = k + l
print("----------------------")
print("总参数数量和: " + str(k))
def process_data(img, img_size=416):# 图像预处理
img, _, _, _ = letterbox(img, height=img_size)
# Normalize RG25
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return img
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [255, 55,90], thickness=tf, lineType=cv2.LINE_AA)
def bbox_iou(box1, box2, x1y1x2y2=True):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t()
# Get the coordinates of bounding boxes
if x1y1x2y2:
# x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else:
# x, y, w, h = box1
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
(b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
return inter_area / union_area # iou
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
# Rescale x1, y1, x2, y2 from 416 to image size
# print('coords : ',coords)
# print('img0_shape : ',img0_shape)
gain = float(img_size) / max(img0_shape) # gain = old / new
# print('gain : ',gain)
pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding
pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding
# print('pad_xpad_y : ',pad_x,pad_y)
coords[:, [0, 2]] -= pad_x
coords[:, [1, 3]] -= pad_y
coords[:, :4] /= gain
coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
return coords
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_conf, class)
"""
min_wh = 2 # (pixels) minimum box width and height
output = [None] * len(prediction)
for image_i, pred in enumerate(prediction):
# Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area
# ar = w / (h + 1e-16) # aspect ratio
# n = len(w)
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Filter out confidence scores below threshold
class_conf, class_pred = pred[:, 5:].max(1) # max class_conf, index
pred[:, 4] *= class_conf # finall conf = obj_conf * class_conf
i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
# s2=time.time()
pred2 = pred[i]
# print("++++++pred2 = pred[i]",time.time()-s2, pred2)
# If none are remaining => process next image
if len(pred2) == 0:
continue
# Select predicted classes
class_conf = class_conf[i]
class_pred = class_pred[i].unsqueeze(1).float()
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
pred2[:, :4] = xywh2xyxy(pred2[:, :4])
# pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551
# Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
# Get detections sorted by decreasing confidence scores
pred2 = pred2[(-pred2[:, 4]).argsort()]
det_max = []
nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental)
for c in pred2[:, -1].unique():
dc = pred2[pred2[:, -1] == c] # select class c
dc = dc[:min(len(dc), 100)] # limit to first 100 boxes
# Non-maximum suppression
if nms_style == 'OR': # default
# METHOD1
# ind = list(range(len(dc)))
# while len(ind):
# j = ind[0]
# det_max.append(dc[j:j + 1]) # save highest conf detection
# reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
# [ind.pop(i) for i in reversed(reject)]
# METHOD2
while dc.shape[0]:
det_max.append(dc[:1]) # save highest conf detection
if len(dc) == 1: # Stop if we're at the last detection
break
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'AND': # requires overlap, single boxes erased
while len(dc) > 1:
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
if iou.max() > 0.5:
det_max.append(dc[:1])
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'MERGE': # weighted mixture box
while len(dc):
i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes
weights = dc[i, 4:5]
dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
det_max.append(dc[:1])
dc = dc[i == 0]
if len(det_max):
det_max = torch.cat(det_max) # concatenate
output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort
return output
def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
# Resize a rectangular image to a padded square
shape = img.shape[:2] # shape = [height, width]
ratio = float(height) / max(shape) # ratio = old / new
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
dw = (height - new_shape[0]) / 2 # width padding
dh = (height - new_shape[1]) / 2 # height padding
top, bottom = round(dh - 0.1), round(dh + 0.1)
left, right = round(dw - 0.1), round(dw + 0.1)
# resize img
if augment:
interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
if interpolation is None:
img = cv2.resize(img, new_shape)
else:
img = cv2.resize(img, new_shape, interpolation=interpolation)
else:
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
# print("resize time:",time.time()-s1)
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
return img, ratio, dw, dh
#---------------------------------------------------------
# model_path = './coco_model/yolov3_coco.pt' # 检测模型路径
# root_path = './test_images/'# 测试文件夹
# model_arch = 'yolov3' # 模型类型
# voc_config = 'cfg/voc.data' # 模型相关配置文件
# img_size = 416 # 图像尺寸
# conf_thres = 0.35# 检测置信度
# nms_thres = 0.5 # nms 阈值
class yolo_v3_hand_model(object):
def __init__(self,
model_path = './components/hand_detect/weights/latest_416-2021-02-19.pt',
model_arch = 'yolov3',
img_size=416,
conf_thres=0.16,
nms_thres=0.4,):
print("yolo v3 hand_model loading : {}".format(model_path))
self.use_cuda = torch.cuda.is_available()
self.device = torch.device("cuda:0" if self.use_cuda else "cpu")
self.img_size = img_size
self.classes = ["Hand"]
self.num_classes = len(self.classes)
self.conf_thres = conf_thres
self.nms_thres = nms_thres
#-----------------------------------------------------------------------
weights = model_path
if "-tiny" in model_arch:
a_scalse = 416./img_size
anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
model = Yolov3Tiny(self.num_classes,anchors = anchors_new)
else:
a_scalse = 416./img_size
anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
model = Yolov3(self.num_classes,anchors = anchors_new)
#-----------------------------------------------------------------------
self.model = model
# show_model_param(self.model)# 显示模型参数
# print('num_classes : ',self.num_classes)
self.device = select_device() # 运行硬件选择
self.use_cuda = torch.cuda.is_available()
# Load weights
if os.access(weights,os.F_OK):# 判断模型文件是否存在
self.model.load_state_dict(torch.load(weights, map_location=lambda storage, loc: storage)['model'])
else:
print('------- >>> error : model not exists')
return False
#
self.model.eval()#模型设置为 eval
acc_model('',self.model)
self.model = self.model.to(self.device)
def predict(self, img_,vis):
with torch.no_grad():
t = time.time()
img = process_data(img_, self.img_size)
t1 = time.time()
img = torch.from_numpy(img).unsqueeze(0).to(self.device)
pred, _ = self.model(img)#图片检测
t2 = time.time()
detections = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0] # nms
t3 = time.time()
# print("t3 time:", t3)
if (detections is None) or len(detections) == 0:
return []
# Rescale boxes from 416 to true image size
detections[:, :4] = scale_coords(self.img_size, detections[:, :4], img_.shape).round()
# 绘制检测结果 :detect reslut
dets_for_landmarks = []
colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, 10 + 1)][::-1]
output_dict_ = []
for *xyxy, conf, cls_conf, cls in detections:
label = '%s %.2f' % (self.classes[0], conf)
x1,y1,x2,y2 = xyxy
output_dict_.append((float(x1),float(y1),float(x2),float(y2),float(conf.item())))
if vis:
plot_one_box(xyxy, img_, label=label, color=(0,175,255), line_thickness = 2)
return output_dict_
此差异已折叠。
#-*-coding:utf-8-*-
# date:2021-03-09
# Author: Eric.Lee
# function: handpose_x 21 keypoints 2D
import os
import torch
import cv2
import numpy as np
import json
import torch
import torch.nn as nn
import time
import math
from datetime import datetime
from hand_keypoints.models.resnet import resnet18,resnet34,resnet50,resnet101
from hand_keypoints.models.squeezenet import squeezenet1_1,squeezenet1_0
from hand_keypoints.models.resnet import resnet18,resnet34,resnet50,resnet101
from hand_keypoints.models.squeezenet import squeezenet1_1,squeezenet1_0
from hand_keypoints.models.shufflenetv2 import ShuffleNetV2
from hand_keypoints.models.shufflenet import ShuffleNet
from hand_keypoints.models.mobilenetv2 import MobileNetV2
from torchvision.models import shufflenet_v2_x1_5 ,shufflenet_v2_x1_0 , shufflenet_v2_x2_0
from hand_keypoints.models.rexnetv1 import ReXNetV1
from hand_keypoints.utils.common_utils import *
def draw_bd_handpose_c(img_,hand_,x,y,thick = 3):
# thick = 2
colors = [(0,215,255),(255,115,55),(5,255,55),(25,15,255),(225,15,55)]
#
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['1']['x']+x), int(hand_['1']['y']+y)), colors[0], thick)
cv2.line(img_, (int(hand_['1']['x']+x), int(hand_['1']['y']+y)),(int(hand_['2']['x']+x), int(hand_['2']['y']+y)), colors[0], thick)
cv2.line(img_, (int(hand_['2']['x']+x), int(hand_['2']['y']+y)),(int(hand_['3']['x']+x), int(hand_['3']['y']+y)), colors[0], thick)
cv2.line(img_, (int(hand_['3']['x']+x), int(hand_['3']['y']+y)),(int(hand_['4']['x']+x), int(hand_['4']['y']+y)), colors[0], thick)
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['5']['x']+x), int(hand_['5']['y']+y)), colors[1], thick)
cv2.line(img_, (int(hand_['5']['x']+x), int(hand_['5']['y']+y)),(int(hand_['6']['x']+x), int(hand_['6']['y']+y)), colors[1], thick)
cv2.line(img_, (int(hand_['6']['x']+x), int(hand_['6']['y']+y)),(int(hand_['7']['x']+x), int(hand_['7']['y']+y)), colors[1], thick)
cv2.line(img_, (int(hand_['7']['x']+x), int(hand_['7']['y']+y)),(int(hand_['8']['x']+x), int(hand_['8']['y']+y)), colors[1], thick)
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['9']['x']+x), int(hand_['9']['y']+y)), colors[2], thick)
cv2.line(img_, (int(hand_['9']['x']+x), int(hand_['9']['y']+y)),(int(hand_['10']['x']+x), int(hand_['10']['y']+y)), colors[2], thick)
cv2.line(img_, (int(hand_['10']['x']+x), int(hand_['10']['y']+y)),(int(hand_['11']['x']+x), int(hand_['11']['y']+y)), colors[2], thick)
cv2.line(img_, (int(hand_['11']['x']+x), int(hand_['11']['y']+y)),(int(hand_['12']['x']+x), int(hand_['12']['y']+y)), colors[2], thick)
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['13']['x']+x), int(hand_['13']['y']+y)), colors[3], thick)
cv2.line(img_, (int(hand_['13']['x']+x), int(hand_['13']['y']+y)),(int(hand_['14']['x']+x), int(hand_['14']['y']+y)), colors[3], thick)
cv2.line(img_, (int(hand_['14']['x']+x), int(hand_['14']['y']+y)),(int(hand_['15']['x']+x), int(hand_['15']['y']+y)), colors[3], thick)
cv2.line(img_, (int(hand_['15']['x']+x), int(hand_['15']['y']+y)),(int(hand_['16']['x']+x), int(hand_['16']['y']+y)), colors[3], thick)
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['17']['x']+x), int(hand_['17']['y']+y)), colors[4], thick)
cv2.line(img_, (int(hand_['17']['x']+x), int(hand_['17']['y']+y)),(int(hand_['18']['x']+x), int(hand_['18']['y']+y)), colors[4], thick)
cv2.line(img_, (int(hand_['18']['x']+x), int(hand_['18']['y']+y)),(int(hand_['19']['x']+x), int(hand_['19']['y']+y)), colors[4], thick)
cv2.line(img_, (int(hand_['19']['x']+x), int(hand_['19']['y']+y)),(int(hand_['20']['x']+x), int(hand_['20']['y']+y)), colors[4], thick)
#
class handpose_x_model(object):
def __init__(self,
model_path = './components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth',
img_size= 256,
num_classes = 42,# 手部关键点个数 * 2 : 21*2
model_arch = "rexnetv1",
):
# print("handpose_x loading : ",model_path)
self.use_cuda = torch.cuda.is_available()
self.device = torch.device("cuda:0" if self.use_cuda else "cpu") # 可选的设备类型及序号
self.img_size = img_size
#-----------------------------------------------------------------------
if model_arch == 'resnet_50':
model_ = resnet50(num_classes = num_classes,img_size = self.img_size)
elif model_arch == 'resnet_18':
model_ = resnet18(num_classes = num_classes,img_size = self.img_size)
elif model_arch == 'resnet_34':
model_ = resnet34(num_classes = num_classes,img_size = self.img_size)
elif model_arch == 'resnet_101':
model_ = resnet101(num_classes = num_classes,img_size = self.img_size)
elif model_arch == "squeezenet1_0":
model_ = squeezenet1_0(pretrained=True, num_classes=num_classes)
elif model_arch == "squeezenet1_1":
model_ = squeezenet1_1(pretrained=True, num_classes=num_classes)
elif model_arch == "shufflenetv2":
model_ = ShuffleNetV2(ratio=1., num_classes=num_classes)
elif model_arch == "shufflenet_v2_x1_5":
model_ = shufflenet_v2_x1_5(pretrained=False,num_classes=num_classes)
elif model_arch == "shufflenet_v2_x1_0":
model_ = shufflenet_v2_x1_0(pretrained=False,num_classes=num_classes)
elif model_arch == "shufflenet_v2_x2_0":
model_ = shufflenet_v2_x2_0(pretrained=False,num_classes=num_classes)
elif model_arch == "shufflenet":
model_ = ShuffleNet(num_blocks = [2,4,2], num_classes=num_classes, groups=3)
elif model_arch == "mobilenetv2":
model_ = MobileNetV2(num_classes=num_classes)
elif model_arch == "rexnetv1":
model_ = ReXNetV1(num_classes=num_classes)
else:
print(" no support the model")
#-----------------------------------------------------------------------
model_ = model_.to(self.device)
model_.eval() # 设置为前向推断模式
# 加载测试模型
if os.access(model_path,os.F_OK):# checkpoint
chkpt = torch.load(model_path, map_location=self.device)
model_.load_state_dict(chkpt)
print('handpose_x model loading : {}'.format(model_path))
self.model_handpose = model_
def predict(self, img, vis = False):
with torch.no_grad():
if not((img.shape[0] == self.img_size) and (img.shape[1] == self.img_size)):
img = cv2.resize(img, (self.img_size,self.img_size), interpolation = cv2.INTER_CUBIC)
img_ = img.astype(np.float32)
img_ = (img_-128.)/256.
img_ = img_.transpose(2, 0, 1)
img_ = torch.from_numpy(img_)
img_ = img_.unsqueeze_(0)
if self.use_cuda:
img_ = img_.cuda() # (bs, 3, h, w)
pre_ = self.model_handpose(img_.float())
output = pre_.cpu().detach().numpy()
output = np.squeeze(output)
return output
"""mobilenetv2 in pytorch
[1] Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen
MobileNetV2: Inverted Residuals and Linear Bottlenecks
https://arxiv.org/abs/1801.04381
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
class LinearBottleNeck(nn.Module):
def __init__(self, in_channels, out_channels, stride, t=6, class_num=100):
super().__init__()
self.residual = nn.Sequential(
nn.Conv2d(in_channels, in_channels * t, 1),
nn.BatchNorm2d(in_channels * t),
nn.ReLU6(inplace=True),
nn.Conv2d(in_channels * t, in_channels * t, 3, stride=stride, padding=1, groups=in_channels * t),
nn.BatchNorm2d(in_channels * t),
nn.ReLU6(inplace=True),
nn.Conv2d(in_channels * t, out_channels, 1),
nn.BatchNorm2d(out_channels)
)
self.stride = stride
self.in_channels = in_channels
self.out_channels = out_channels
def forward(self, x):
residual = self.residual(x)
if self.stride == 1 and self.in_channels == self.out_channels:
residual += x
return residual
class MobileNetV2(nn.Module):
def __init__(self, num_classes=100,dropout_factor = 1.0):
super().__init__()
self.pre = nn.Sequential(
nn.Conv2d(3, 32, 1, padding=1),
nn.BatchNorm2d(32),
nn.ReLU6(inplace=True)
)
self.stage1 = LinearBottleNeck(32, 16, 1, 1)
self.stage2 = self._make_stage(2, 16, 24, 2, 6)
self.stage3 = self._make_stage(3, 24, 32, 2, 6)
self.stage4 = self._make_stage(4, 32, 64, 2, 6)
self.stage5 = self._make_stage(3, 64, 96, 1, 6)
self.stage6 = self._make_stage(3, 96, 160, 1, 6)
self.stage7 = LinearBottleNeck(160, 320, 1, 6)
self.conv1 = nn.Sequential(
nn.Conv2d(320, 1280, 1),
nn.BatchNorm2d(1280),
nn.ReLU6(inplace=True)
)
self.conv2 = nn.Conv2d(1280, num_classes, 1)
self.dropout = nn.Dropout(dropout_factor)
def forward(self, x):
x = self.pre(x)
x = self.stage1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.stage5(x)
x = self.stage6(x)
x = self.stage7(x)
x = self.conv1(x)
x = F.adaptive_avg_pool2d(x, 1)
x = self.dropout(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
return x
def _make_stage(self, repeat, in_channels, out_channels, stride, t):
layers = []
layers.append(LinearBottleNeck(in_channels, out_channels, stride, t))
while repeat - 1:
layers.append(LinearBottleNeck(out_channels, out_channels, 1, t))
repeat -= 1
return nn.Sequential(*layers)
def mobilenetv2():
return MobileNetV2()
#-*-coding:utf-8-*-
# date:2020-08-08
# Author: X.L.Eric
# function: my model
import torch
import torch.nn as nn
import torch.nn.functional as F
class MY_Net(nn.Module):
def __init__(self,num_classes):# op 初始化
super(MY_Net, self).__init__()
self.cov = nn.Conv2d(3, 32, 3)
self.relu = nn.ReLU(inplace=True)
layers1 = []
# Conv2d : in_channels, out_channels, kernel_size, stride, padding
layers1.append(nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3,stride=1,padding = 0))
layers1.append(nn.BatchNorm2d(64,affine=True))
layers1.append(nn.ReLU(inplace=True))
layers1.append(nn.AvgPool2d(kernel_size=3, stride=2, padding=1))
self.layers1 = nn.Sequential(*layers1)
layers2 = []
layers2.append(nn.Conv2d(64, 128, 3))
layers2.append(nn.BatchNorm2d(128,affine=True))
layers2.append(nn.ReLU(inplace=True))
layers2.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
self.layers2 = nn.Sequential(*layers2)
layers3 = []
layers3.append(nn.Conv2d(128, 256, 3,stride=2))
layers3.append(nn.BatchNorm2d(256,affine=True))
layers3.append(nn.ReLU(inplace=True))
layers3.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
self.layers3 = nn.Sequential(*layers3)
layers4 = []
layers4.append(nn.Conv2d(256, 512, 3,stride=2))
layers4.append(nn.BatchNorm2d(512,affine=True))
layers4.append(nn.ReLU(inplace=True))
layers4.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
layers4.append(nn.Conv2d(512, 512, 1,stride=1))
self.layers4 = nn.Sequential(*layers4)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))# 自适应均值池化
self.fc = nn.Linear(in_features = 512 , out_features = num_classes)# 全连接 fc
def forward(self, x):# 模型前向推断
x = self.cov(x)
x = self.relu(x)
x = self.layers1(x)
x = self.layers2(x)
x = self.layers3(x)
x = self.layers4(x)
x = self.avgpool(x)
x = x.reshape(x.size(0), -1)
x = self.fc(x)
return x
if __name__ == "__main__":
#输入批次图片(batchsize,channel,height,width):8 ,3*256*256
dummy_input = torch.randn([8, 3, 256,256])
model = MY_Net(num_classes = 100)# 分类数为 100 类
print('model:\n',model)# 打印模型op
output = model(dummy_input)# 模型前向推断
# 模型前向推断输出特征尺寸
print('model inference feature size: ',output.size())
print(output)
output_ = F.softmax(output,dim = 1)
#
print(output_)
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.):
self.inplanes = 64
self.dropout_factor = dropout_factor
super(ResNet, self).__init__()
# 26
# 586 train_sequence
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
# see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16
# self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
assert img_size % 32 == 0
pool_kernel = int(img_size / 32)
self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True)
self.dropout = nn.Dropout(self.dropout_factor)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def load_model(model, pretrained_state_dict):
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
model.load_state_dict(pretrained_dict, strict=False)
if len(pretrained_dict) == 0:
print("[INFO] No params were loaded ...")
else:
for k, v in pretrained_state_dict.items():
if k in pretrained_dict:
print("==>> Load {} {}".format(k, v.size()))
else:
print("[INFO] Skip {} {}".format(k, v.size()))
return model
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
print("Load pretrained model from {}".format(model_urls['resnet18']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
model = load_model(model, pretrained_state_dict)
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
print("Load pretrained model from {}".format(model_urls['resnet34']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
model = load_model(model, pretrained_state_dict)
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
print("Load pretrained model from {}".format(model_urls['resnet50']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
model = load_model(model, pretrained_state_dict)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
print("Load pretrained model from {}".format(model_urls['resnet101']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
model = load_model(model, pretrained_state_dict)
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
print("Load pretrained model from {}".format(model_urls['resnet152']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
model = load_model(model, pretrained_state_dict)
return model
if __name__ == "__main__":
input = torch.randn([32, 3, 256,256])
model = resnet34(False, num_classes=2, img_size=256)
output = model(input)
print(output.size())
import torch
import torch.nn as nn
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000,dropout_factor = 1., zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=nn.BatchNorm2d):
super(ResNet, self).__init__()
if norm_layer is None:
print('BatchNorm2d')
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.dropout = nn.Dropout(dropout_factor)
self.fc = nn.Linear(512 * block.expansion, num_classes)
# ----------------------------------------------------------------------------------
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.reshape(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def _resnet(arch, block, layers, **kwargs):
model = ResNet(block, layers, **kwargs)
return model
def resnet50(**kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
"""
print('Bottleneck:{}'.format(Bottleneck))
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3],**kwargs)
if __name__ == "__main__":
dummy_input = torch.randn([32, 3, 128,128])
num_classes = 100
model = resnet50(num_classes = num_classes,dropout_factor=0.5)
print(model)
output = model(dummy_input)
print(output.size())
"""
ReXNet
Copyright (c) 2020-present NAVER Corp.
MIT license
"""
import torch
import torch.nn as nn
from math import ceil
# Memory-efficient Siwsh using torch.jit.script borrowed from the code in (https://twitter.com/jeremyphoward/status/1188251041835315200)
# Currently use memory-efficient Swish as default:
USE_MEMORY_EFFICIENT_SWISH = True
if USE_MEMORY_EFFICIENT_SWISH:
@torch.jit.script
def swish_fwd(x):
return x.mul(torch.sigmoid(x))
@torch.jit.script
def swish_bwd(x, grad_output):
x_sigmoid = torch.sigmoid(x)
return grad_output * (x_sigmoid * (1. + x * (1. - x_sigmoid)))
class SwishJitImplementation(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)
return swish_fwd(x)
@staticmethod
def backward(ctx, grad_output):
x = ctx.saved_tensors[0]
return swish_bwd(x, grad_output)
def swish(x, inplace=False):
return SwishJitImplementation.apply(x)
else:
def swish(x, inplace=False):
return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())
class Swish(nn.Module):
def __init__(self, inplace=True):
super(Swish, self).__init__()
self.inplace = inplace
def forward(self, x):
return swish(x, self.inplace)
def ConvBNAct(out, in_channels, channels, kernel=1, stride=1, pad=0,
num_group=1, active=True, relu6=False):
out.append(nn.Conv2d(in_channels, channels, kernel,
stride, pad, groups=num_group, bias=False))
out.append(nn.BatchNorm2d(channels))
if active:
out.append(nn.ReLU6(inplace=True) if relu6 else nn.ReLU(inplace=True))
def ConvBNSwish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1):
out.append(nn.Conv2d(in_channels, channels, kernel,
stride, pad, groups=num_group, bias=False))
out.append(nn.BatchNorm2d(channels))
out.append(Swish())
class SE(nn.Module):
def __init__(self, in_channels, channels, se_ratio=12):
super(SE, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Conv2d(in_channels, channels // se_ratio, kernel_size=1, padding=0),
nn.BatchNorm2d(channels // se_ratio),
nn.ReLU(inplace=True),
nn.Conv2d(channels // se_ratio, channels, kernel_size=1, padding=0),
nn.Sigmoid()
)
def forward(self, x):
y = self.avg_pool(x)
y = self.fc(y)
return x * y
class LinearBottleneck(nn.Module):
def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12,
**kwargs):
super(LinearBottleneck, self).__init__(**kwargs)
self.use_shortcut = stride == 1 and in_channels <= channels
self.in_channels = in_channels
self.out_channels = channels
out = []
if t != 1:
dw_channels = in_channels * t
ConvBNSwish(out, in_channels=in_channels, channels=dw_channels)
else:
dw_channels = in_channels
ConvBNAct(out, in_channels=dw_channels, channels=dw_channels, kernel=3, stride=stride, pad=1,
num_group=dw_channels, active=False)
if use_se:
out.append(SE(dw_channels, dw_channels, se_ratio))
out.append(nn.ReLU6())
ConvBNAct(out, in_channels=dw_channels, channels=channels, active=False, relu6=True)
self.out = nn.Sequential(*out)
def forward(self, x):
out = self.out(x)
if self.use_shortcut:
out[:, 0:self.in_channels] += x
return out
class ReXNetV1(nn.Module):
def __init__(self, input_ch=16, final_ch=180, width_mult=1.0, depth_mult=1.0, num_classes=1000,
use_se=True,
se_ratio=12,
dropout_factor=0.2,
bn_momentum=0.9):
super(ReXNetV1, self).__init__()
layers = [1, 2, 2, 3, 3, 5]
strides = [1, 2, 2, 2, 1, 2]
use_ses = [False, False, True, True, True, True]
layers = [ceil(element * depth_mult) for element in layers]
strides = sum([[element] + [1] * (layers[idx] - 1)
for idx, element in enumerate(strides)], [])
if use_se:
use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], [])
else:
use_ses = [False] * sum(layers[:])
ts = [1] * layers[0] + [6] * sum(layers[1:])
self.depth = sum(layers[:]) * 3
stem_channel = 32 / width_mult if width_mult < 1.0 else 32
inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch
features = []
in_channels_group = []
channels_group = []
# The following channel configuration is a simple instance to make each layer become an expand layer.
for i in range(self.depth // 3):
if i == 0:
in_channels_group.append(int(round(stem_channel * width_mult)))
channels_group.append(int(round(inplanes * width_mult)))
else:
in_channels_group.append(int(round(inplanes * width_mult)))
inplanes += final_ch / (self.depth // 3 * 1.0)
channels_group.append(int(round(inplanes * width_mult)))
ConvBNSwish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1)
for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)):
features.append(LinearBottleneck(in_channels=in_c,
channels=c,
t=t,
stride=s,
use_se=se, se_ratio=se_ratio))
pen_channels = int(1280 * width_mult)
ConvBNSwish(features, c, pen_channels)
features.append(nn.AdaptiveAvgPool2d(1))
self.features = nn.Sequential(*features)
self.output = nn.Sequential(
nn.Dropout(dropout_factor),
nn.Conv2d(pen_channels, num_classes, 1, bias=True))
def forward(self, x):
x = self.features(x)
x = self.output(x).squeeze()
return x
"""shufflenet in pytorch
[1] Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun.
ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
https://arxiv.org/abs/1707.01083v2
"""
from functools import partial
import torch
import torch.nn as nn
class BasicConv2d(nn.Module):
def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
super().__init__()
self.conv = nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs)
self.bn = nn.BatchNorm2d(output_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class ChannelShuffle(nn.Module):
def __init__(self, groups):
super().__init__()
self.groups = groups
def forward(self, x):
batchsize, channels, height, width = x.data.size()
channels_per_group = int(channels / self.groups)
#"""suppose a convolutional layer with g groups whose output has
#g x n channels; we first reshape the output channel dimension
#into (g, n)"""
x = x.view(batchsize, self.groups, channels_per_group, height, width)
#"""transposing and then flattening it back as the input of next layer."""
x = x.transpose(1, 2).contiguous()
x = x.view(batchsize, -1, height, width)
return x
class DepthwiseConv2d(nn.Module):
def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
super().__init__()
self.depthwise = nn.Sequential(
nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs),
nn.BatchNorm2d(output_channels)
)
def forward(self, x):
return self.depthwise(x)
class PointwiseConv2d(nn.Module):
def __init__(self, input_channels, output_channels, **kwargs):
super().__init__()
self.pointwise = nn.Sequential(
nn.Conv2d(input_channels, output_channels, 1, **kwargs),
nn.BatchNorm2d(output_channels)
)
def forward(self, x):
return self.pointwise(x)
class ShuffleNetUnit(nn.Module):
def __init__(self, input_channels, output_channels, stage, stride, groups):
super().__init__()
#"""Similar to [9], we set the number of bottleneck channels to 1/4
#of the output channels for each ShuffleNet unit."""
self.bottlneck = nn.Sequential(
PointwiseConv2d(
input_channels,
int(output_channels / 4),
groups=groups
),
nn.ReLU(inplace=True)
)
#"""Note that for Stage 2, we do not apply group convolution on the first pointwise
#layer because the number of input channels is relatively small."""
if stage == 2:
self.bottlneck = nn.Sequential(
PointwiseConv2d(
input_channels,
int(output_channels / 4),
groups=groups
),
nn.ReLU(inplace=True)
)
self.channel_shuffle = ChannelShuffle(groups)
self.depthwise = DepthwiseConv2d(
int(output_channels / 4),
int(output_channels / 4),
3,
groups=int(output_channels / 4),
stride=stride,
padding=1
)
self.expand = PointwiseConv2d(
int(output_channels / 4),
output_channels,
groups=groups
)
self.relu = nn.ReLU(inplace=True)
self.fusion = self._add
self.shortcut = nn.Sequential()
#"""As for the case where ShuffleNet is applied with stride,
#we simply make two modifications (see Fig 2 (c)):
#(i) add a 3 × 3 average pooling on the shortcut path;
#(ii) replace the element-wise addition with channel concatenation,
#which makes it easy to enlarge channel dimension with little extra
#computation cost.
if stride != 1 or input_channels != output_channels:
self.shortcut = nn.AvgPool2d(3, stride=2, padding=1)
self.expand = PointwiseConv2d(
int(output_channels / 4),
output_channels - input_channels,
groups=groups
)
self.fusion = self._cat
def _add(self, x, y):
return torch.add(x, y)
def _cat(self, x, y):
return torch.cat([x, y], dim=1)
def forward(self, x):
shortcut = self.shortcut(x)
shuffled = self.bottlneck(x)
shuffled = self.channel_shuffle(shuffled)
shuffled = self.depthwise(shuffled)
shuffled = self.expand(shuffled)
output = self.fusion(shortcut, shuffled)
output = self.relu(output)
return output
class ShuffleNet(nn.Module):
def __init__(self, num_blocks = [2,4,2], num_classes=100, groups=3, dropout_factor = 1.0):
super().__init__()
if groups == 1:
out_channels = [24, 144, 288, 567]
elif groups == 2:
out_channels = [24, 200, 400, 800]
elif groups == 3:
out_channels = [24, 240, 480, 960]
elif groups == 4:
out_channels = [24, 272, 544, 1088]
elif groups == 8:
out_channels = [24, 384, 768, 1536]
self.conv1 = BasicConv2d(3, out_channels[0], 3, padding=1, stride=1)
self.input_channels = out_channels[0]
self.stage2 = self._make_stage(
ShuffleNetUnit,
num_blocks[0],
out_channels[1],
stride=2,
stage=2,
groups=groups
)
self.stage3 = self._make_stage(
ShuffleNetUnit,
num_blocks[1],
out_channels[2],
stride=2,
stage=3,
groups=groups
)
self.stage4 = self._make_stage(
ShuffleNetUnit,
num_blocks[2],
out_channels[3],
stride=2,
stage=4,
groups=groups
)
self.avg = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(out_channels[3], num_classes)
self.dropout = nn.Dropout(dropout_factor)
def forward(self, x):
x = self.conv1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.avg(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def _make_stage(self, block, num_blocks, output_channels, stride, stage, groups):
"""make shufflenet stage
Args:
block: block type, shuffle unit
out_channels: output depth channel number of this stage
num_blocks: how many blocks per stage
stride: the stride of the first block of this stage
stage: stage index
groups: group number of group convolution
Return:
return a shuffle net stage
"""
strides = [stride] + [1] * (num_blocks - 1)
stage = []
for stride in strides:
stage.append(
block(
self.input_channels,
output_channels,
stride=stride,
stage=stage,
groups=groups
)
)
self.input_channels = output_channels
return nn.Sequential(*stage)
def shufflenet():
return ShuffleNet([4, 8, 4])
"""shufflenetv2 in pytorch
[1] Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun
ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design
https://arxiv.org/abs/1807.11164
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
def channel_split(x, split):
"""split a tensor into two pieces along channel dimension
Args:
x: input tensor
split:(int) channel size for each pieces
"""
assert x.size(1) == split * 2
return torch.split(x, split, dim=1)
def channel_shuffle(x, groups):
"""channel shuffle operation
Args:
x: input tensor
groups: input branch number
"""
batch_size, channels, height, width = x.size()
channels_per_group = int(channels // groups)
x = x.view(batch_size, groups, channels_per_group, height, width)
x = x.transpose(1, 2).contiguous()
x = x.view(batch_size, -1, height, width)
return x
class ShuffleUnit(nn.Module):
def __init__(self, in_channels, out_channels, stride):
super().__init__()
self.stride = stride
self.in_channels = in_channels
self.out_channels = out_channels
if stride != 1 or in_channels != out_channels:
self.residual = nn.Sequential(
nn.Conv2d(in_channels, in_channels, 1),
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
nn.BatchNorm2d(in_channels),
nn.Conv2d(in_channels, int(out_channels / 2), 1),
nn.BatchNorm2d(int(out_channels / 2)),
nn.ReLU(inplace=True)
)
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
nn.BatchNorm2d(in_channels),
nn.Conv2d(in_channels, int(out_channels / 2), 1),
nn.BatchNorm2d(int(out_channels / 2)),
nn.ReLU(inplace=True)
)
else:
self.shortcut = nn.Sequential()
in_channels = int(in_channels / 2)
self.residual = nn.Sequential(
nn.Conv2d(in_channels, in_channels, 1),
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
nn.BatchNorm2d(in_channels),
nn.Conv2d(in_channels, in_channels, 1),
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
if self.stride == 1 and self.out_channels == self.in_channels:
shortcut, residual = channel_split(x, int(self.in_channels / 2))
else:
shortcut = x
residual = x
shortcut = self.shortcut(shortcut)
residual = self.residual(residual)
x = torch.cat([shortcut, residual], dim=1)
x = channel_shuffle(x, 2)
return x
class ShuffleNetV2(nn.Module):
def __init__(self, ratio=1., num_classes=100, dropout_factor = 1.0):
super().__init__()
if ratio == 0.5:
out_channels = [48, 96, 192, 1024]
elif ratio == 1:
out_channels = [116, 232, 464, 1024]
elif ratio == 1.5:
out_channels = [176, 352, 704, 1024]
elif ratio == 2:
out_channels = [244, 488, 976, 2048]
else:
ValueError('unsupported ratio number')
self.pre = nn.Sequential(
nn.Conv2d(3, 24, 3, padding=1),
nn.BatchNorm2d(24)
)
self.stage2 = self._make_stage(24, out_channels[0], 3)
self.stage3 = self._make_stage(out_channels[0], out_channels[1], 7)
self.stage4 = self._make_stage(out_channels[1], out_channels[2], 3)
self.conv5 = nn.Sequential(
nn.Conv2d(out_channels[2], out_channels[3], 1),
nn.BatchNorm2d(out_channels[3]),
nn.ReLU(inplace=True)
)
self.fc = nn.Linear(out_channels[3], num_classes)
self.dropout = nn.Dropout(dropout_factor)
def forward(self, x):
x = self.pre(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.conv5(x)
x = F.adaptive_avg_pool2d(x, 1)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def _make_stage(self, in_channels, out_channels, repeat):
layers = []
layers.append(ShuffleUnit(in_channels, out_channels, 2))
while repeat:
layers.append(ShuffleUnit(out_channels, out_channels, 1))
repeat -= 1
return nn.Sequential(*layers)
def shufflenetv2():
return ShuffleNetV2()
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.utils.model_zoo as model_zoo
__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1']
model_urls = {
'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',
'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
}
class Fire(nn.Module):
def __init__(self, inplanes, squeeze_planes,
expand1x1_planes, expand3x3_planes):
super(Fire, self).__init__()
self.inplanes = inplanes
self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
self.squeeze_activation = nn.ReLU(inplace=True)
self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
kernel_size=1)
self.expand1x1_activation = nn.ReLU(inplace=True)
self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
kernel_size=3, padding=1)
self.expand3x3_activation = nn.ReLU(inplace=True)
def forward(self, x):
x = self.squeeze_activation(self.squeeze(x))
return torch.cat([
self.expand1x1_activation(self.expand1x1(x)),
self.expand3x3_activation(self.expand3x3(x))
], 1)
class SqueezeNet(nn.Module):
def __init__(self, version=1.0, num_classes=1000,dropout_factor = 1.):
super(SqueezeNet, self).__init__()
if version not in [1.0, 1.1]:
raise ValueError("Unsupported SqueezeNet version {version}:"
"1.0 or 1.1 expected".format(version=version))
self.num_classes = num_classes
if version == 1.0:
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=7, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(96, 16, 64, 64),
Fire(128, 16, 64, 64),
Fire(128, 32, 128, 128),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(256, 32, 128, 128),
Fire(256, 48, 192, 192),
Fire(384, 48, 192, 192),
Fire(384, 64, 256, 256),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(512, 64, 256, 256),
)
else:
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(64, 16, 64, 64),
Fire(128, 16, 64, 64),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(128, 32, 128, 128),
Fire(256, 32, 128, 128),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(256, 48, 192, 192),
Fire(384, 48, 192, 192),
Fire(384, 64, 256, 256),
Fire(512, 64, 256, 256),
)
# Final convolution is initialized differently form the rest
final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
self.classifier = nn.Sequential(
nn.Dropout(p=dropout_factor),
final_conv,
nn.ReLU(inplace=True),
nn.AdaptiveAvgPool2d(1)
)
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m is final_conv:
init.normal(m.weight.data, mean=0.0, std=0.01)
else:
init.kaiming_uniform(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
def forward(self, x):
x = self.features(x)
# print("features(x):", x.size())
x = self.classifier(x)
# print("features(x):", x.size())
return x.view(x.size(0), self.num_classes)
def squeezenet1_0(pretrained=False, **kwargs):
r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level
accuracy with 50x fewer parameters and <0.5MB model size"
<https://arxiv.org/abs/1602.07360>`_ paper.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = SqueezeNet(version=1.0, **kwargs)
model_dict = model.state_dict()
if pretrained:
pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0'])
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
model.load_state_dict(pretrained_dict,strict=False)
return model
def squeezenet1_1(pretrained=False, **kwargs):
r"""SqueezeNet 1.1 model from the `official SqueezeNet repo
<https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1>`_.
SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
than SqueezeNet 1.0, without sacrificing accuracy.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = SqueezeNet(version=1.1, **kwargs)
model_dict = model.state_dict()
if pretrained:
pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0'])
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
model.load_state_dict(pretrained_dict,strict=False)
return model
if __name__ == "__main__":
from thop import profile
dummy = torch.from_numpy(np.random.random([16, 3, 256, 256]).astype(np.float32))
model = squeezenet1_0(pretrained=True, num_classes=42,dropout_factor = 0.5)
print(model)
flops, params = profile(model, inputs=(dummy, ))
model.eval()
output = model(dummy)
print(output.size())
print("flops: {}, params: {}".format(flops, params))
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
# function: common utils
import os
import shutil
import cv2
import numpy as np
import json
def mkdir_(path, flag_rm=False):
if os.path.exists(path):
if flag_rm == True:
shutil.rmtree(path)
os.mkdir(path)
print('remove {} done ~ '.format(path))
else:
os.mkdir(path)
def plot_box(bbox, img, color=None, label=None, line_thickness=None):
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox
if label:
tf = max(tl - 2, 1)
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox
cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充
# 文本绘制
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA)
class JSON_Encoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(JSON_Encoder, self).default(obj)
def draw_landmarks(img,output,draw_circle):
img_width = img.shape[1]
img_height = img.shape[0]
dict_landmarks = {}
for i in range(int(output.shape[0]/2)):
x = output[i*2+0]*float(img_width)
y = output[i*2+1]*float(img_height)
if 41>= i >=33:
if 'left_eyebrow' not in dict_landmarks.keys():
dict_landmarks['left_eyebrow'] = []
dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 50>= i >=42:
if 'right_eyebrow' not in dict_landmarks.keys():
dict_landmarks['right_eyebrow'] = []
dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 67>= i >=60:
if 'left_eye' not in dict_landmarks.keys():
dict_landmarks['left_eye'] = []
dict_landmarks['left_eye'].append([int(x),int(y),(255,0,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 75>= i >=68:
if 'right_eye' not in dict_landmarks.keys():
dict_landmarks['right_eye'] = []
dict_landmarks['right_eye'].append([int(x),int(y),(255,0,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 97>= i >=96:
cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1)
elif 54>= i >=51:
if 'bridge_nose' not in dict_landmarks.keys():
dict_landmarks['bridge_nose'] = []
dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1)
elif 32>= i >=0:
if 'basin' not in dict_landmarks.keys():
dict_landmarks['basin'] = []
dict_landmarks['basin'].append([int(x),int(y),(255,30,30)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1)
elif 59>= i >=55:
if 'wing_nose' not in dict_landmarks.keys():
dict_landmarks['wing_nose'] = []
dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1)
elif 87>= i >=76:
if 'out_lip' not in dict_landmarks.keys():
dict_landmarks['out_lip'] = []
dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1)
elif 95>= i >=88:
if 'in_lip' not in dict_landmarks.keys():
dict_landmarks['in_lip'] = []
dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1)
else:
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
return dict_landmarks
def draw_contour(image,dict):
for key in dict.keys():
# print(key)
_,_,color = dict[key][0]
if 'basin' == key or 'wing_nose' == key:
pts = np.array([[dict[key][i][0],dict[key][i][1]] for i in range(len(dict[key]))],np.int32)
# print(pts)
cv2.polylines(image,[pts],False,color)
else:
points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32)
for i in range(len(dict[key])):
x,y,_ = dict[key][i]
points_array[0,i,0] = x
points_array[0,i,1] = y
# cv2.fillPoly(image, points_array, color)
cv2.drawContours(image,points_array,-1,color,thickness=1)
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
# function: model utils
import os
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import random
def get_acc(output, label):
total = output.shape[0]
_, pred_label = output.max(1)
num_correct = (pred_label == label).sum().item()
return num_correct / float(total)
def set_learning_rate(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def set_seed(seed = 666):
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
cudnn.deterministic = True
def split_trainval_datasets(ops):
print(' --------------->>> split_trainval_datasets ')
train_split_datasets = []
train_split_datasets_label = []
val_split_datasets = []
val_split_datasets_label = []
for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('.')[0]), reverse=False)):
# print(' %s label is %s \n'%(doc,idx))
data_list = os.listdir(ops.train_path+doc)
random.shuffle(data_list)
cal_split_num = int(len(data_list)*ops.val_factor)
for i,file in enumerate(data_list):
if '.jpg' in file:
if i < cal_split_num:
val_split_datasets.append(ops.train_path+doc + '/' + file)
val_split_datasets_label.append(idx)
else:
train_split_datasets.append(ops.train_path+doc + '/' + file)
train_split_datasets_label.append(idx)
print(ops.train_path+doc + '/' + file,idx)
print('\n')
print('train_split_datasets len {}'.format(len(train_split_datasets)))
print('val_split_datasets len {}'.format(len(val_split_datasets)))
return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label
detect_model_path=./components/hand_detect/weights/latest_416-2021-02-19.pt
detect_model_arch=yolo_v3
detect_conf_thres=0.5
detect_nms_thres=0.45
handpose_x_model_path=./components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth
handpose_x_model_arch=rexnetv1
camera_id = 0
vis_gesture_lines = True
charge_cycle_step = 32
#-*-coding:utf-8-*-
'''
DpCas-Light
|||| ||||| |||| || |||||||
|| || || || || || |||| || ||
|| || || || || || || || ||
|| || || || || ||====|| ||||||
|| || ||||| || || ||======|| ||
|| || || || || || || || ||
|||| || |||| || || |||||||
/--------------------- HandPose_X ---------------------/
'''
# date:2019-12-10
# Author: Eric.Lee
# function: handpose :rotation & translation
import cv2
import numpy as np
# 人脸外轮廓
def get_face_outline(img_crop,face_crop_region,obj_crop_points,face_w,face_h):
face_mask = np.zeros((1,27,2),dtype = np.int32)
for m in range(obj_crop_points.shape[0]):
if m <=16:
x = int(face_crop_region[0]+obj_crop_points[m][0]*face_w)
y = int(face_crop_region[1]+obj_crop_points[m][1]*face_h)
# face_mask.append((x,y))
face_mask[0,m,0]=x
face_mask[0,m,1]=y
for k in range(16,26):
m = 42-k
x = int(face_crop_region[0]+obj_crop_points[m][0]*face_w)
y = int(face_crop_region[1]+obj_crop_points[m][1]*face_h)
# face_mask.append((x,y))
face_mask[0,k+1,0]=x
face_mask[0,k+1,1]=y
# print(x,y)
return face_mask
# 人脸公共模型三维坐标
object_pts = np.float32([
[0., 0.4,0.],#掌心
[0., 5.,0.],#hand 根部
# [-2, 2.5,0.],#thumb 第一指节
# [-4, 0.5,0.],#thumb 第二指节
[-2.7, -4.5, 0.],# index 根部
[0., -5., 0.],# middle 根部
[2.6, -4., 0.], # ring 根部
[5.2, -3., 0.],# pink 根部
]
)
# object_pts = np.float32([[-2.5, -7.45, 0.5],# pink 根部
#
# [-1.2, -7.45, 0.5], # ring 根部
#
#
# [1.2, -7.5, 0.5],# middle 根部
#
# [2.5, -7.45, 0.5],# index 根部
# [4.2, -3.45, 0.5],# thumb 第二指节
# [2.5, -2.0, 0.5],# thumb 根部
# [0.00, -0.0,0.5],#hand 根部
# ]
# )
# xyz 立体矩形框
# reprojectsrc = np.float32([[3.0, 11.0, 2.0],
# [3.0, 11.0, -4.0],
# [3.0, -7.0, -4.0],
# [3.0, -7.0, 2.0],
# [-3.0, 11.0, 2.0],
# [-3.0, 11.0, -4.0],
# [-3.0, -7.0, -4.0],
# [-3.0, -7.0, 2.0]])
reprojectsrc = np.float32([[5.0, 8.0, 2.0],
[5.0, 8.0, -2.0],
[5.0, -8.0, -2.0],
[5.0, -8.0, 2.0],
[-5.0, 8.0, 2.0],
[-5.0, 8.0, -2.0],
[-5.0, -8.0, -2.0],
[-5.0, -8.0, 2.0]])
# reprojectsrc = np.float32([[6.0, 4.0, 2.0],
# [6.0, 4.0, -4.0],
# [6.0, -3.0, -4.0],
# [6.0, -3.0, 2.0],
# [-6.0, 4.0, 2.0],
# [-6.0, 4.0, -4.0],
# [-6.0, -3.0, -4.0],
# [-6.0, -3.0, 2.0]])
# reprojectsrc = np.float32([[6.0, 6.0, 6.0],
# [6.0, 6.0, -6.0],
# [6.0, -6.0, -6.0],
# [6.0, -6.0, 6.0],
# [-6.0, 6.0, 6.0],
# [-6.0, 6.0, -6.0],
# [-6.0, -6.0, -6.0],
# [-6.0, -6.0, 6.0]])
# 立体矩形框连线,连接组合
line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0],
[4, 5], [5, 6], [6, 7], [7, 4],
[0, 4], [1, 5], [2, 6], [3, 7]]
def get_hand_pose(shape,img,vis = True):
h,w,_=img.shape
K = [w, 0.0, w//2,
0.0, w, h//2,
0.0, 0.0, 1.0]
# Assuming no lens distortion
D = [0, 0, 0.0, 0.0, 0]
cam_matrix = np.array(K).reshape(3, 3).astype(np.float32)# 相机矩阵
# dist_coeffs = np.array(D).reshape(5, 1).astype(np.float32)#相机畸变矩阵,默认无畸变
dist_coeffs = np.float32([0.0, 0.0, 0.0, 0.0, 0.0])
# 选取的人脸关键点的二维图像坐标
# image_pts = np.float32([shape[17], shape[21], shape[22], shape[26], shape[36],
# shape[39], shape[42], shape[45],
# shape[27],shape[31], shape[35],shape[30],shape[33]])
image_pts = np.float32([shape[0], shape[1], shape[2], shape[3], shape[4], shape[5]
]
)
# PNP 计算图像二维和三维实际关系,获得旋转和偏移矩阵
_, rotation_vec, translation_vec = cv2.solvePnP(object_pts, image_pts, cam_matrix, dist_coeffs)
# _, rotation_vec, translation_vec = cv2.solvePnPRansac(object_pts, image_pts, cam_matrix, dist_coeffs)
# print("translation_vec:",translation_vec)
#print('translation_vec : {}'.format(translation_vec))
# 映射矩形框
reprojectdst, _ = cv2.projectPoints(reprojectsrc, rotation_vec, translation_vec, cam_matrix,dist_coeffs)
reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2)))
# calc euler angle
rotation_mat, _ = cv2.Rodrigues(rotation_vec)#旋转向量转为旋转矩阵
pose_mat = cv2.hconcat((rotation_mat, translation_vec))# 拼接操作 旋转 + 偏移
_, _, _, _, _, _, euler_angle = cv2.decomposeProjectionMatrix(pose_mat)#欧拉角估计
if vis:
for i,line_pair in enumerate(line_pairs):# 显示立体矩形框
x1 = int(reprojectdst[line_pair[0]][0])
y1 = int(reprojectdst[line_pair[0]][1])
x2 = int(reprojectdst[line_pair[1]][0])
y2 = int(reprojectdst[line_pair[1]][1])
if line_pair[0] in [0,3,4,7] and line_pair[1] in [0,3,4,7]:
cv2.line(img,(x1,y1),(x2,y2),(255,0,0),2)
elif line_pair[0] in [1,2,5,6] and line_pair[1] in [1,2,5,6]:
cv2.line(img,(x1,y1),(x2,y2),(250,150,0),2)
else:
cv2.line(img,(x1,y1),(x2,y2),(0,90,255),2)
return reprojectdst, euler_angle,translation_vec
此差异已折叠。
#-*-coding:utf-8-*-
'''
DpCas-Light
|||| ||||| |||| || |||||||
|| || || || || || |||| || ||
|| || || || || || || || ||
|| || || || || ||====|| ||||||
|| || ||||| || || ||======|| ||
|| || || || || || || || ||
|||| || |||| || || |||||||
/--------------------- HandPose_X ---------------------/
'''
import copy
def compute_iou_tk(rec1, rec2):
"""
computing IoU
:param rec1: (y0, x0, y1, x1), which reflects
(top, left, bottom, right)
:param rec2: (y0, x0, y1, x1)
:return: scala value of IoU
"""
# computing area of each rectangles
S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
# computing the sum_area
sum_area = S_rec1 + S_rec2
# find the each edge of intersect rectangle
left_line = max(rec1[1], rec2[1])
right_line = min(rec1[3], rec2[3])
top_line = max(rec1[0], rec2[0])
bottom_line = min(rec1[2], rec2[2])
# judge if there is an intersect
if left_line >= right_line or top_line >= bottom_line:
return 0.
else:
intersect = (right_line - left_line) * (bottom_line - top_line)
return (intersect / (sum_area - intersect)) * 1.0
def tracking_bbox(data,hand_dict,index,iou_thr = 0.5):
track_index = index
reg_dict = {}
Flag_ = True if hand_dict else False
if Flag_ == False:
# print("------------------->>. False")
for bbox in data:
x_min,y_min,x_max,y_max,score = bbox
reg_dict[track_index] = (x_min,y_min,x_max,y_max,score,0.,1,1)
track_index += 1
if track_index >= 65535:
track_index = 0
else:
# print("------------------->>. True ")
for bbox in data:
xa0,ya0,xa1,ya1,score = bbox
is_track = False
for k_ in hand_dict.keys():
xb0,yb0,xb1,yb1,_,_,cnt_,bbox_stanbel_cnt = hand_dict[k_]
iou_ = compute_iou_tk((ya0,xa0,ya1,xa1),(yb0,xb0,yb1,xb1))
# print((ya0,xa0,ya1,xa1),(yb0,xb0,yb1,xb1))
# print("iou : ",iou_)
if iou_ > iou_thr: # 跟踪成功目标
UI_CNT = 1
if iou_ > 0.888:
UI_CNT = bbox_stanbel_cnt + 1
reg_dict[k_] = (xa0,ya0,xa1,ya1,score,iou_,cnt_ + 1,UI_CNT)
is_track = True
# print("is_track : " ,cnt_ + 1)
if is_track == False: # 新目标
reg_dict[track_index] = (xa0,ya0,xa1,ya1,score,0.,1,1)
track_index += 1
if track_index >=65535: #索引越界归零
track_index = 0
if track_index>=100:
track_index = 0
hand_dict = copy.deepcopy(reg_dict)
# print("a:",hand_dict)
return hand_dict,track_index
import os
"""Parses the data configuration file"""
def parse_data_cfg(path):
print('data_cfg : ',path)
options = dict()
with open(path, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.strip()
if line == '' or line.startswith('#'):
continue
key, value = line.split('=')
options[key.strip()] = value.strip()
return options
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册