提交 a412396d 编写于 作者: Eric.Lee2021's avatar Eric.Lee2021 🚴🏻

add DpCas -sample : who you want to see

上级 6fccd866
#-*-coding:utf-8-*-
'''
DpCas-Light
|||| ||||| |||| || |||||||
|| || || || || || |||| || ||
|| || || || || || || || ||
|| || || || || ||====|| ||||||
|| || ||||| || || ||======|| ||
|| || || || || || || || ||
|||| || |||| || || |||||||
/--------------------- Who You Want To See ---------------------/
'''
# date:2021-04-18
# Author: Eric.Lee
# function: who you want to see "你想看谁"
import os
import cv2
import time
from multiprocessing import Process
from multiprocessing import Manager
import cv2
import numpy as np
import random
import time
# 加载模型组件库
from face_detect.yolo_v3_face import yolo_v3_face_model
from insight_face.face_verify import insight_face_model
from face_multi_task.face_multi_task_component import FaceMuitiTask_Model
from face_euler_angle.face_euler_angle_component import FaceAngle_Model
# 加载工具库
import sys
sys.path.append("./lib/wyw2s_lib/")
from cores.wyw2s_fuction import get_faces_batch_attribute
from utils.utils import parse_data_cfg
from utils.show_videos_thread import run_show
from moviepy.editor import *
def main_wyw2s(cfg_file,video_path = None):
config = parse_data_cfg(cfg_file)
face_detect_model = yolo_v3_face_model(conf_thres = 0.43)
face_verify_model = insight_face_model(threshold = 1.2)
face_multitask_model = FaceMuitiTask_Model()
face_euler_model = FaceAngle_Model()
print("\n/------------------------------------------------------------------------/\n")
YouWantToSee = config["YouWantToSee"]
YouWantToSee_=[name_ for name_ in YouWantToSee.split(",")]
print(" YouWantToSee : {}".format(YouWantToSee_))
print("\n/------------------------------------------------------------------------/\n")
p_colors = []
for i in range(len(face_verify_model.face_names)):
if i == 0 :
p_colors.append((100,155,100))
if i == 1 :
p_colors.append((0,255,0))
elif i == 2:
p_colors.append((255,0,0))
elif i == 3:
p_colors.append((0,255,255))
elif i == 4:
p_colors.append((0,185,255))
elif i == 5:
p_colors.append((255,185,55))
else:
p_colors.append((random.randint(60,255),random.randint(70,255),random.randint(130,255)))
cap = cv2.VideoCapture(video_path)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# 时间轴
time_map = np.zeros([200,frame_count,3]).astype(np.uint8)
time_map[:,:,0].fill(105)
time_map[:,:,1].fill(105)
time_map[:,:,2].fill(105)
pts_last = None
frame_idx = 0
Flag_Last,Flag_Now = False,False
start_time = 0.
end_time = 0.
YouWantToSee_time_list = []
while cap.isOpened():
ret,img = cap.read()
if ret:
frame_idx += 1
video_time = cap.get(cv2.CAP_PROP_POS_MSEC)
algo_image = img.copy()
faces_bbox =face_detect_model.predict(img,vis = True) # 检测手,获取手的边界框
if len(faces_bbox) > 0:
faces_identify,faces_identify_bboxes,bboxes,face_map = get_faces_batch_attribute(face_multitask_model,face_euler_model,faces_bbox,algo_image,use_cuda = True,vis = True)
YouHaveSeen_list = []
if len(faces_identify) > 0:
results, face_dst = face_verify_model.predict(faces_identify)
face_dst = list(face_dst.cpu().detach().numpy())
# print("face_dst : ",face_dst)
for idx,bbox_ in enumerate(faces_identify_bboxes):
cv2.putText(algo_image, "{}: {:.2f}".format(face_verify_model.face_names[results[idx] + 1],face_dst[idx]), (bbox_[0],bbox_[1]+23),cv2.FONT_HERSHEY_DUPLEX, 0.7, (255, 95,220), 5)
cv2.putText(algo_image, "{}: {:.2f}".format(face_verify_model.face_names[results[idx] + 1],face_dst[idx]), (bbox_[0],bbox_[1]+23),cv2.FONT_HERSHEY_DUPLEX, 0.7, p_colors[results[idx] + 1], 2)
if face_verify_model.face_names[results[idx] + 1] in YouWantToSee_:
YouHaveSeen_list.append(face_verify_model.face_names[results[idx] + 1])
# 绘制时间轴
if len(YouHaveSeen_list)>0:
cv2.rectangle(time_map, (frame_idx-1,0), (frame_idx,100), (0,255,0), -1) # 绘制时间轴
print(" YouHaveSeen : {}".format(YouHaveSeen_list))
Flag_Now = True
else:
cv2.rectangle(time_map, (frame_idx-1,100), (frame_idx,200), (255,0,0), -1) # 绘制时间轴
print(" ------ ")
Flag_Now = False
else:
face_map = np.zeros([112*3,112*3,3]).astype(np.uint8)
face_map[:,:,0].fill(205)
face_map[:,:,1].fill(205)
face_map[:,:,2].fill(205)
print(" ------ ")
Flag_Now = False
cv2.rectangle(time_map, (frame_idx-1,100), (frame_idx,200), (255,0,0), -1) # 绘制时间轴
cv2.line(time_map, (frame_idx,100),(frame_idx,100), (0,80,255), 8) # 绘制时间轴 中轴线
#-------------
if Flag_Now == True and Flag_Last == False:
start_time = video_time
elif Flag_Now == False and Flag_Last == True:
YouWantToSee_time_list.append((start_time/1000.,video_time/1000.))
Flag_Last = Flag_Now
#
cv2.putText(algo_image, "WhoYouWant 2 See", (algo_image.shape[1]-420,45),cv2.FONT_HERSHEY_DUPLEX, 1.2, (205, 95,250), 7)
cv2.putText(algo_image, "WhoYouWant 2 See", (algo_image.shape[1]-420,45),cv2.FONT_HERSHEY_DUPLEX, 1.2, (12, 255,12), 2)
cv2.putText(algo_image, "DpCas -", (algo_image.shape[1]-620,45),cv2.FONT_HERSHEY_DUPLEX, 1.2, (255, 95,210), 7)
cv2.putText(algo_image, "DpCas -", (algo_image.shape[1]-620,45),cv2.FONT_HERSHEY_DUPLEX, 1.2, (12, 255,12), 2)
cv2.rectangle(algo_image, (algo_image.shape[1]-640,5), (algo_image.shape[1]-30,65), (0,185,255), 6)
cv2.rectangle(algo_image, (algo_image.shape[1]-640,5), (algo_image.shape[1]-30,65), (255,100,100), 2)
cv2.putText(algo_image, "[{}/{}]".format(frame_idx,frame_count), (5,30),cv2.FONT_HERSHEY_DUPLEX, 1.0, (255, 95,220), 5)
cv2.putText(algo_image, "[{}/{}]".format(frame_idx,frame_count), (5,30),cv2.FONT_HERSHEY_DUPLEX, 1.0, (12, 255,12), 2)
cv2.putText(algo_image, "[{:.2f} sec]".format(video_time/1000.), (5,70),cv2.FONT_HERSHEY_DUPLEX, 1.0, (15, 185,255), 7)
cv2.putText(algo_image, "[{:.2f} sec]".format(video_time/1000.), (5,70),cv2.FONT_HERSHEY_DUPLEX, 1.0, (255, 112,112), 2)
#
cv2.putText(face_map, "YouWantToSee {}".format(YouWantToSee_), (5,face_map.shape[0]-3),cv2.FONT_HERSHEY_DUPLEX, 0.55, (255, 95,220), 5)
cv2.putText(face_map, "YouWantToSee {}".format(YouWantToSee_), (5,face_map.shape[0]-3),cv2.FONT_HERSHEY_DUPLEX, 0.55, (12, 215,12), 1)
face_map = cv2.resize(face_map,(algo_image.shape[0],algo_image.shape[0]))
algo_image = np.hstack((algo_image,face_map)) # 合并显示
time_map_r = cv2.resize(time_map,(algo_image.shape[1],200))
algo_image = np.vstack((algo_image,time_map_r))
cv2.namedWindow('WhoYouWant2See', 0)
cv2.imshow('WhoYouWant2See', algo_image)
key_id = cv2.waitKey(1)
if key_id == 27:
break
else:
break
#-------------
print("\n ----->>> YouWantToSee_Time_list : \n")
movie = VideoFileClip(video_path)
video_s = "./clip_wyw2s/"
if not os.path.exists(video_s): # 如果文件夹不存在
os.mkdir(video_s) # 生成文件夹
seg_idx = 0
for seg_ in YouWantToSee_time_list:
seg_idx += 1
print(" Seg {} : {}".format(seg_idx,seg_))
print(" 开始剪切目标人物视频 第 {} 段 \n".format(seg_idx))
movie_clip = movie.subclip(seg_[0],seg_[1])# 将剪切的片段保存
movie_clip.write_videofile("{}clip_{}.mp4".format(video_s,seg_idx))
run_show(path = video_s , vis = True)
cv2.destroyAllWindows()
import torch
import torch.nn as nn
import torchvision
import time
import numpy as np
import sys
def get_model_op(model_,print_flag = False):
# print('/********************* modules *******************/')
op_dict = {}
idx = 0
for m in model_.modules():
idx += 1
if isinstance(m, nn.Conv2d):
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
else:
op_dict['Conv2d'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
pass
elif isinstance(m, nn.BatchNorm2d):
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
else:
op_dict['BatchNorm2d'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
pass
elif isinstance(m, nn.Linear):
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
else:
op_dict['Linear'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
pass
elif isinstance(m, nn.Sequential):
if print_flag:
print('*******************{}) {}'.format(idx,m))
for n in m:
if print_flag:
print('{}) {}'.format(idx,n))
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
else:
op_dict['Conv2d'] += 1
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
else:
op_dict['BatchNorm2d'] += 1
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
else:
op_dict['Linear'] += 1
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
else:
op_dict['ReLU6'] += 1
pass
elif isinstance(m, nn.ReLU6):
if print_flag:
print('{}) {}'.format(idx,m))
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
else:
op_dict['ReLU6'] += 1
pass
elif isinstance(m, nn.Module):
if print_flag:
print('{}) {}'.format(idx,m))
for n in m.modules():
if isinstance(n, nn.Conv2d):
if print_flag:
print('{}) {}'.format(idx,n))
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
else:
op_dict['Conv2d'] += 1
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
else:
op_dict['BatchNorm2d'] += 1
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
else:
op_dict['Linear'] += 1
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
else:
op_dict['ReLU6'] += 1
pass
pass
else:
if print_flag:
print('{}) {}'.format(idx,m))
pass
# print('\n/********************** {} ********************/\n'.format(ops.network))
for key in op_dict.keys():
if print_flag:
print(' operation - {} : {}'.format(key,op_dict[key]))
class DummyModule(nn.Module):
def __init__(self):
super(DummyModule, self).__init__()
def forward(self, x):
return x
def fuse(conv, bn):
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
with torch.no_grad():
# init
if isinstance(conv, nn.Conv2d):
fusedconv = torch.nn.Conv2d(conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
bias=True)
elif isinstance(conv, nn.ConvTranspose2d): # not supprot nn.ConvTranspose2d
fusedconv = nn.ConvTranspose2d(
conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
output_padding=conv.output_padding,
bias=True)
else:
print("error")
exit()
# prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
# prepare spatial bias
if conv.bias is not None:
b_conv = conv.bias
#b_conv = conv.bias.mul(bn.weight.div(torch.sqrt(bn.running_var + bn.eps))) # maybe, you should this one ?
else:
b_conv = torch.zeros(conv.weight.size(0))
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(b_conv + b_bn)
return fusedconv
# idxx = 0
def fuse_module(m):
# global idxx
children = list(m.named_children())
c = None
cn = None
for name, child in children:
# idxx += 1
# print('-------------->>',idxx)
# if idxx%10==0:
# continue
# print("name {}, child {}".format(name, child))
if isinstance(child, nn.BatchNorm2d) and c is not None:
bc = fuse(c, child)
m._modules[cn] = bc
# print('DummyModule() : ',DummyModule())
m._modules[name] = DummyModule()
c = None
elif isinstance(child, nn.Conv2d):
c = child
cn = name
else:
fuse_module(child)
def test_net(ops,m):
use_cuda = torch.cuda.is_available()
use_cpu = False
if ops.force_cpu or use_cuda == False:
p = torch.randn([1, 3, 256, 256])
device = torch.device("cpu")
use_cpu = True
else:
p = torch.randn([1, 3, 256, 256]).cuda()
device = torch.device("cuda:0")
count = 50
time_org = []
m_o = m.to(device)
get_model_op(m_o)
# print(m)
for i in range(count):
s1 = time.time()
if use_cpu:
o_output = m_o(p)
else:
o_output = m_o(p).cpu()
s2 = time.time()
time_org.append(s2 - s1)
print("Original time: ", s2 - s1)
print('------------------------------------>>>>')
fuse_module(m.to(torch.device("cpu")))
# print(m)
m_f = m.to(device)
get_model_op(m_f)
time_fuse = []
for i in range(count):
s1 = time.time()
if use_cpu:
f_output = m_f(p)
else:
f_output = m_f(p).cpu()
s2 = time.time()
time_fuse.append(s2 - s1)
print("Fused time: ", s2 - s1)
print("-" * 50)
print("org time:", np.mean(time_org))
print("fuse time:", np.mean(time_fuse))
for o in o_output:
print("org size:", o.size())
for o in f_output:
print("fuse size:", o.size())
for i in range(len(o_output)):
assert o_output[i].size()==f_output[i].size()
print("output[{}] max abs diff: {}".format(i, (o_output[i] - f_output[i]).abs().max().item()))
print("output[{}] MSE diff: {}".format(i, nn.MSELoss()(o_output[i], f_output[i]).item()))
def acc_model(ops,m):
# print('\n-------------------------------->>> before acc model')
get_model_op(m)
fuse_module(m)
# print('\n-------------------------------->>> after acc model')
get_model_op(m)
return m
此差异已折叠。
import glob
import math
import os
import random
import shutil
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
def xyxy2xywh(x):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
y[:, 2] = x[:, 2] - x[:, 0]
y[:, 3] = x[:, 3] - x[:, 1]
return y
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
class LoadImages: # for inference
def __init__(self, path, img_size=416):
self.height = img_size
img_formats = ['.jpg', '.jpeg', '.png', '.tif']
vid_formats = ['.mov', '.avi', '.mp4']
files = []
if os.path.isdir(path):
files = sorted(glob.glob('%s/*.*' % path))
elif os.path.isfile(path):
files = [path]
images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
nI, nV = len(images), len(videos)
self.files = images + videos
self.nF = nI + nV # number of files
self.video_flag = [False] * nI + [True] * nV
self.mode = 'images'
if any(videos):
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nF > 0, 'No images or videos found in ' + path
def __iter__(self):
self.count = 0
return self
def __next__(self):
if self.count == self.nF:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
# Read video
self.mode = 'video'
ret_val, img0 = self.cap.read()
if not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nF: # last video
raise StopIteration
else:
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
else:
# Read image
self.count += 1
img0 = cv2.imread(path) # BGR
assert img0 is not None, 'File Not Found ' + path
print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
# cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
return path, img, img0, self.cap
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self):
return self.nF # number of files
class LoadWebcam: # for inference
def __init__(self, img_size=416):
self.cam = cv2.VideoCapture(0)
self.height = img_size
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if cv2.waitKey(1) == 27: # esc to quit
cv2.destroyAllWindows()
raise StopIteration
# Read image
ret_val, img0 = self.cam.read()
assert ret_val, 'Webcam Error'
img_path = 'webcam_%g.jpg' % self.count
img0 = cv2.flip(img0, 1) # flip left-right
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return img_path, img, img0, self.cam
def __len__(self):
return 0
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, batch_size, img_size=416, augment=True, multi_scale=False):
print('LoadImagesAndLabels init : ',path)
with open(path, 'r') as file:
img_files = file.read().splitlines()
img_files = list(filter(lambda x: len(x) > 0, img_files))
np.random.shuffle(img_files) # shuffle img_list
print("shuffle image...")
self.img_files = img_files
assert len(self.img_files) > 0, 'No images found in %s' % path
self.img_size = img_size
self.batch_size = batch_size
self.multi_scale = multi_scale
self.augment = augment
self.scale_index = 0
if self.multi_scale:
self.img_size = img_size # initiate with maximum multi_scale size, in case of out of memory
print("Multi scale images training, init img_size", self.img_size)
else:
print("Fixed scale images, img_size", self.img_size)
self.label_files = [
x.replace('images', 'labels').replace("JPEGImages", 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
for x in self.img_files]
# print('self.img_files : ',self.img_files[1])
# print('self.label_files : ',self.label_files[1])
def __len__(self):
return len(self.img_files)
def __getitem__(self, index):
# if self.multi_scale and (index % self.batch_size == 0) and index != 0:
if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0:
self.img_size = random.choice(range(11, 18)) * 32
# print("++++++ change img_size, index:", self.img_size, index)
if self.multi_scale:
self.scale_index += 1
if self.scale_index >= (100*self.batch_size):
self.scale_index = 0
img_path = self.img_files[index]
label_path = self.label_files[index]
img = cv2.imread(img_path) # BGR
assert img is not None, 'File Not Found ' + img_path
augment_hsv = random.random() < 0.5 # hsv_aug prob = 0.5
if self.augment and augment_hsv:
# SV augmentation by 50%
fraction = 0.50 # must be < 1.0
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
S = img_hsv[:, :, 1].astype(np.float32)
V = img_hsv[:, :, 2].astype(np.float32)
a = (random.random() * 2 - 1) * fraction + 1 # a in [-0,5, 1.5]
S *= a
if a > 1:
np.clip(S, None, 255, out=S)
a = (random.random() * 2 - 1) * fraction + 1
V *= a
if a > 1:
np.clip(V, None, 255, out=V)
img_hsv[:, :, 1] = S # .astype(np.uint8)
img_hsv[:, :, 2] = V # .astype(np.uint8)
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
h, w, _ = img.shape
img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment)
# Load labels
labels = []
if os.path.isfile(label_path):
with open(label_path, 'r') as file:
lines = file.read().splitlines()
x = np.array([x.split() for x in lines], dtype=np.float32)
if x.size > 0:
# Normalized xywh to pixel xyxy format
labels = x.copy()
labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
# Augment image and labels
if self.augment:
img, labels = random_affine(img, labels, degrees=(-10, 10), translate=(0.10, 0.10), scale=(0.9, 1.1))
nL = len(labels) # number of labels
if nL:
# convert xyxy to xywh
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ,且 归一化
if self.augment:
# random left-right flip
lr_flip = True
if lr_flip and random.random() > 0.5:
img = np.fliplr(img)
if nL:
labels[:, 1] = 1 - labels[:, 1]
# random up-down flip
ud_flip = False
if ud_flip and random.random() > 0.5:
img = np.flipud(img)
if nL:
labels[:, 2] = 1 - labels[:, 2]
labels_out = torch.zeros((nL, 6))# 加了 一个 batch size
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
# Normalize
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return torch.from_numpy(img), labels_out, img_path, (h, w)
@staticmethod
def collate_fn(batch):
img, label, path, hw = list(zip(*batch)) # transposed
for i, l in enumerate(label):
l[:, 0] = i # 获取 物体的 归属于 图片 的 index
return torch.stack(img, 0), torch.cat(label, 0), path, hw
def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
# Resize a rectangular image to a padded square
shape = img.shape[:2] # shape = [height, width]
ratio = float(height) / max(shape) # ratio = old / new
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
dw = (height - new_shape[0]) / 2 # width padding
dh = (height - new_shape[1]) / 2 # height padding
top, bottom = round(dh - 0.1), round(dh + 0.1)
left, right = round(dw - 0.1), round(dw + 0.1)
# resize img
if augment:
interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
if interpolation is None:
img = cv2.resize(img, new_shape)
else:
img = cv2.resize(img, new_shape, interpolation=interpolation)
else:
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
# print("resize time:",time.time()-s1)
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
return img, ratio, dw, dh
def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
borderValue=(127.5, 127.5, 127.5)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
if targets is None:
targets = []
border = 0 # width of added border (optional)
height = max(img.shape[0], img.shape[1]) + border * 2
# Rotation and Scale
R = np.eye(3)
a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
s = random.random() * (scale[1] - scale[0]) + scale[0]
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
# Translation
T = np.eye(3)
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)
# Shear
S = np.eye(3)
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
borderValue=borderValue) # BGR order borderValue
# Return warped points also
if len(targets) > 0:
n = targets.shape[0]
points = targets[:, 1:5].copy()
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @ M.T)[:, :2].reshape(n, 8)
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# apply angle-based reduction of bounding boxes
radians = a * math.pi / 180
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
x = (xy[:, 2] + xy[:, 0]) / 2
y = (xy[:, 3] + xy[:, 1]) / 2
w = (xy[:, 2] - xy[:, 0]) * reduction
h = (xy[:, 3] - xy[:, 1]) * reduction
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# reject warped points outside of image
np.clip(xy, 0, height, out=xy)
w = xy[:, 2] - xy[:, 0]
h = xy[:, 3] - xy[:, 1]
area = w * h
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
targets = targets[i]
targets[:, 1:5] = xy[i]
return imw, targets
def convert_images2bmp():
# cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
folder = os.sep + Path(path).name
output = path.replace(folder, folder + 'bmp')
if os.path.exists(output):
shutil.rmtree(output) # delete output folder
os.makedirs(output) # make new output folder
for f in tqdm(glob.glob('%s*.jpg' % path)):
save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
cv2.imwrite(save_name, cv2.imread(f))
for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
with open(label_path, 'r') as file:
lines = file.read()
lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
'/Users/glennjocher/PycharmProjects/', '../')
with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
file.write(lines)
import torch
def init_seeds(seed=0):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def select_device(force_cpu=False):
if force_cpu:
cuda = False
device = torch.device('cpu')
else:
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
if torch.cuda.device_count() > 1:
device = torch.device('cuda' if cuda else 'cpu')
# print('Found %g GPUs' % torch.cuda.device_count())
# print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21')
# torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available
# print('Using ', torch.cuda.device_count(), ' GPUs')
# print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
return device
import glob
import random
import time
from collections import defaultdict
import cv2
import numpy as np
import torch
import torch.nn as nn
from dp_models.light_pose.modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS
# Set printoptions
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
cv2.setNumThreads(0)
def float3(x): # format floats to 3 decimals
return float(format(x, '.3f'))
def init_seeds(seed=0):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
else:
torch.manual_seed(seed)
torch.manual_seed_all(seed)
def load_classes(path):
# Loads class labels at 'path'
fp = open(path, 'r')
names = fp.read().split('\n')
return list(filter(None, names)) # filter removes empty strings (such as last line)
def model_info(model):
# Plots a line-by-line description of a PyTorch model
n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
print('\n%5s %60s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()):
# name = name.replace('module_list.', '')
print('%5g %60s %9s %12g %20s %10.3g %10.3g' % (
i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g))
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
elif classname.find('BatchNorm2d') != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
torch.nn.init.constant_(m.bias.data, 0.0)
def xyxy2xywh(x):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
y[:, 2] = x[:, 2] - x[:, 0]
y[:, 3] = x[:, 3] - x[:, 1]
return y
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
# Rescale x1, y1, x2, y2 from 416 to image size
# print('coords : ',coords)
# print('img0_shape : ',img0_shape)
gain = float(img_size) / max(img0_shape) # gain = old / new
# print('gain : ',gain)
pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding
pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding
# print('pad_xpad_y : ',pad_x,pad_y)
coords[:, [0, 2]] -= pad_x
coords[:, [1, 3]] -= pad_y
coords[:, :4] /= gain
coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
return coords
def ap_per_class(tp, conf, pred_cls, target_cls):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (list).
conf: Objectness value from 0-1 (list).
pred_cls: Predicted object classes (list).
target_cls: True object classes (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(target_cls)
# Create Precision-Recall curve and compute AP for each class
ap, p, r = [], [], []
for c in unique_classes:
i = pred_cls == c
n_gt = (target_cls == c).sum() # Number of ground truth objects
n_p = i.sum() # Number of predicted objects
if n_p == 0 and n_gt == 0:
continue
elif n_p == 0 or n_gt == 0:
ap.append(0)
r.append(0)
p.append(0)
else:
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum()
tpc = (tp[i]).cumsum()
# Recall
recall_curve = tpc / (n_gt + 1e-16)
r.append(recall_curve[-1])
# Precision
precision_curve = tpc / (tpc + fpc)
p.append(precision_curve[-1])
# AP from recall-precision curve
ap.append(compute_ap(recall_curve, precision_curve))
# Plot
# plt.plot(recall_curve, precision_curve)
# Compute F1 score (harmonic mean of precision and recall)
p, r, ap = np.array(p), np.array(r), np.array(ap)
f1 = 2 * p * r / (p + r + 1e-16)
return p, r, ap, f1, unique_classes.astype('int32')
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def bbox_iou(box1, box2, x1y1x2y2=True):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t()
# Get the coordinates of bounding boxes
if x1y1x2y2:
# x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else:
# x, y, w, h = box1
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
(b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
return inter_area / union_area # iou
def wh_iou(box1, box2):
box2 = box2.t()
# w, h = box1
w1, h1 = box1[0], box1[1]
w2, h2 = box2[0], box2[1]
# Intersection area
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
# Union Area
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
return inter_area / union_area # iou
def compute_loss(p, targets): # predictions, targets
FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) # losses 初始化 为 0
txy, twh, tcls, indices = targets
MSE = nn.MSELoss()
CE = nn.CrossEntropyLoss()
BCE = nn.BCEWithLogitsLoss()# 多标签分类时 使用 如 [1,1,0],
# Compute losses
for i, pi0 in enumerate(p): # layer i predictions, i
b, a, gj, gi = indices[i] # image_idx, anchor_idx, gridx, gridy
# print(i,') b, a, gj, gi : ')
# print('b', b)
# print('a', a)
# print('gj', gj)
# print('gi', gi)
tconf = torch.zeros_like(pi0[..., 0]) # conf
# print('tconf: ',tconf.size())
# Compute losses
k = 1 # nT / bs
if len(b) > 0:
pi = pi0[b, a, gj, gi] # predictions closest to anchors
tconf[b, a, gj, gi] = 1 # conf
lxy += (k * 8) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss
lwh += (k * 4) * MSE(pi[..., 2:4], twh[i]) # wh loss
lcls += (k * 1) * CE(pi[..., 5:], tcls[i]) # class_conf loss
lconf += (k * 64) * BCE(pi0[..., 4], tconf) # obj_conf loss
loss = lxy + lwh + lconf + lcls
# Add to dictionary
d = defaultdict(float)
losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()]
for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses):
d[name] = x
return loss, d
def build_targets(model, targets):
# targets = [image, class, x, y, w, h]
if isinstance(model, nn.parallel.DistributedDataParallel):
model = model.module
txy, twh, tcls, indices = [], [], [], []
for i, layer in enumerate(get_yolo_layers(model)):# 遍历 3 个 yolo layer
# print(i,'layer ',model.module_list[layer])
layer = model.module_list[layer][0]
# iou of targets-anchors
gwh = targets[:, 4:6] * layer.nG # 以 grid 为单位的 wh
iou = [wh_iou(x, gwh) for x in layer.anchor_vec]
iou, a = torch.stack(iou, 0).max(0) # best iou and anchor
# reject below threshold ious (OPTIONAL, increases P, lowers R)
reject = True
if reject:
j = iou > 0.10
t, a, gwh = targets[j], a[j], gwh[j]
else:
t = targets
# Indices
b, c = t[:, :2].long().t() # target image, class
gxy = t[:, 2:4] * layer.nG
gi, gj = gxy.long().t() # grid_i, grid_j
indices.append((b, a, gj, gi)) # img_index , anchor_index , grid_x , grid_y
# print('b, a, gj, gi : ')
# print('b', b)
# print('a', a)
# print('gj', gj)
# print('gi', gi)
# print('class c',c)
# XY coordinates
txy.append(gxy - gxy.floor())#转化为grid相对坐标
# Width and height
twh.append(torch.log(gwh / layer.anchor_vec[a])) # yolo method 对数
# twh.append(torch.sqrt(gwh / layer.anchor_vec[a]) / 2) # power method
# Class
tcls.append(c)
# try:
# print('c.max,layer.nC: ',c.max().item() ,layer.nC)
# except:
# pass
if c.shape[0]:
assert c.max().item() <= layer.nC, 'Target classes exceed model classes'
return txy, twh, tcls, indices
# @profile
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_conf, class)
"""
min_wh = 2 # (pixels) minimum box width and height
output = [None] * len(prediction)
for image_i, pred in enumerate(prediction):
# Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area
# ar = w / (h + 1e-16) # aspect ratio
# n = len(w)
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Filter out confidence scores below threshold
class_conf, class_pred = pred[:, 5:].max(1) # max class_conf, index
pred[:, 4] *= class_conf # finall conf = obj_conf * class_conf
i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
# s2=time.time()
pred2 = pred[i]
# print("++++++pred2 = pred[i]",time.time()-s2, pred2)
# If none are remaining => process next image
if len(pred2) == 0:
continue
# Select predicted classes
class_conf = class_conf[i]
class_pred = class_pred[i].unsqueeze(1).float()
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
pred2[:, :4] = xywh2xyxy(pred2[:, :4])
# pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551
# Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
# Get detections sorted by decreasing confidence scores
pred2 = pred2[(-pred2[:, 4]).argsort()]
det_max = []
nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental)
for c in pred2[:, -1].unique():
dc = pred2[pred2[:, -1] == c] # select class c
dc = dc[:min(len(dc), 100)] # limit to first 100 boxes
# Non-maximum suppression
if nms_style == 'OR': # default
# METHOD1
# ind = list(range(len(dc)))
# while len(ind):
# j = ind[0]
# det_max.append(dc[j:j + 1]) # save highest conf detection
# reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
# [ind.pop(i) for i in reversed(reject)]
# METHOD2
while dc.shape[0]:
det_max.append(dc[:1]) # save highest conf detection
if len(dc) == 1: # Stop if we're at the last detection
break
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'AND': # requires overlap, single boxes erased
while len(dc) > 1:
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
if iou.max() > 0.5:
det_max.append(dc[:1])
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'MERGE': # weighted mixture box
while len(dc):
i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes
weights = dc[i, 4:5]
dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
det_max.append(dc[:1])
dc = dc[i == 0]
if len(det_max):
det_max = torch.cat(det_max) # concatenate
output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort
return output
def get_yolo_layers(model):
yolo_layer_index = []
for index, l in enumerate(model.module_list):
try:
a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG
# print("---"*50)
# print(l, index)
yolo_layer_index.append(index)
except:
pass
assert len(yolo_layer_index) > 0, "can not find yolo layer"
return yolo_layer_index
#-*-coding:utf-8-*-
# date:2021-04-16
# Author: Eric.Lee
# function: yolo v3 face detect
import os
import cv2
import numpy as np
import time
import torch
from hand_detect.yolov3 import Yolov3, Yolov3Tiny
from hand_detect.utils.torch_utils import select_device
from hand_detect.acc_model import acc_model
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
import random
def show_model_param(model):
params = list(model.parameters())
k = 0
for i in params:
l = 1
for j in i.size():
l *= j
print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l)))
k = k + l
print("----------------------")
print("总参数数量和: " + str(k))
def process_data(img, img_size=416):# 图像预处理
img, _, _, _ = letterbox(img, height=img_size)
# Normalize RG25
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return img
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [255, 55,90], thickness=tf, lineType=cv2.LINE_AA)
def bbox_iou(box1, box2, x1y1x2y2=True):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t()
# Get the coordinates of bounding boxes
if x1y1x2y2:
# x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else:
# x, y, w, h = box1
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
(b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
return inter_area / union_area # iou
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
# Rescale x1, y1, x2, y2 from 416 to image size
# print('coords : ',coords)
# print('img0_shape : ',img0_shape)
gain = float(img_size) / max(img0_shape) # gain = old / new
# print('gain : ',gain)
pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding
pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding
# print('pad_xpad_y : ',pad_x,pad_y)
coords[:, [0, 2]] -= pad_x
coords[:, [1, 3]] -= pad_y
coords[:, :4] /= gain
coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
return coords
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_conf, class)
"""
min_wh = 2 # (pixels) minimum box width and height
output = [None] * len(prediction)
for image_i, pred in enumerate(prediction):
# Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area
# ar = w / (h + 1e-16) # aspect ratio
# n = len(w)
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Filter out confidence scores below threshold
class_conf, class_pred = pred[:, 5:].max(1) # max class_conf, index
pred[:, 4] *= class_conf # finall conf = obj_conf * class_conf
i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
# s2=time.time()
pred2 = pred[i]
# print("++++++pred2 = pred[i]",time.time()-s2, pred2)
# If none are remaining => process next image
if len(pred2) == 0:
continue
# Select predicted classes
class_conf = class_conf[i]
class_pred = class_pred[i].unsqueeze(1).float()
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
pred2[:, :4] = xywh2xyxy(pred2[:, :4])
# pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551
# Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
# Get detections sorted by decreasing confidence scores
pred2 = pred2[(-pred2[:, 4]).argsort()]
det_max = []
nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental)
for c in pred2[:, -1].unique():
dc = pred2[pred2[:, -1] == c] # select class c
dc = dc[:min(len(dc), 100)] # limit to first 100 boxes
# Non-maximum suppression
if nms_style == 'OR': # default
# METHOD1
# ind = list(range(len(dc)))
# while len(ind):
# j = ind[0]
# det_max.append(dc[j:j + 1]) # save highest conf detection
# reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
# [ind.pop(i) for i in reversed(reject)]
# METHOD2
while dc.shape[0]:
det_max.append(dc[:1]) # save highest conf detection
if len(dc) == 1: # Stop if we're at the last detection
break
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'AND': # requires overlap, single boxes erased
while len(dc) > 1:
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
if iou.max() > 0.5:
det_max.append(dc[:1])
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'MERGE': # weighted mixture box
while len(dc):
i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes
weights = dc[i, 4:5]
dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
det_max.append(dc[:1])
dc = dc[i == 0]
if len(det_max):
det_max = torch.cat(det_max) # concatenate
output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort
return output
def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
# Resize a rectangular image to a padded square
shape = img.shape[:2] # shape = [height, width]
ratio = float(height) / max(shape) # ratio = old / new
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
dw = (height - new_shape[0]) / 2 # width padding
dh = (height - new_shape[1]) / 2 # height padding
top, bottom = round(dh - 0.1), round(dh + 0.1)
left, right = round(dw - 0.1), round(dw + 0.1)
# resize img
if augment:
interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
if interpolation is None:
img = cv2.resize(img, new_shape)
else:
img = cv2.resize(img, new_shape, interpolation=interpolation)
else:
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
# print("resize time:",time.time()-s1)
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
return img, ratio, dw, dh
#---------------------------------------------------------
# model_path = './coco_model/yolov3_coco.pt' # 检测模型路径
# model_arch = 'yolov3' # 模型类型
# img_size = 416 # 图像尺寸
# conf_thres = 0.35# 检测置信度
# nms_thres = 0.5 # nms 阈值
class yolo_v3_face_model(object):
def __init__(self,
model_path = './components/face_detect/weights/face_yolo_416-20210418.pt',
model_arch = 'yolov3',
yolo_anchor_scale = 1.,
img_size=416,
conf_thres=0.4,
nms_thres=0.4,):
print("yolo v3 face_model loading : {}".format(model_path))
self.use_cuda = torch.cuda.is_available()
self.device = torch.device("cuda:0" if self.use_cuda else "cpu")
self.img_size = img_size
self.classes = ["Face"]
self.num_classes = len(self.classes)
self.conf_thres = conf_thres
self.nms_thres = nms_thres
#-----------------------------------------------------------------------
weights = model_path
if "tiny" in model_arch:
a_scalse = 416./img_size*yolo_anchor_scale
anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
model = Yolov3Tiny(self.num_classes,anchors = anchors_new)
else:
a_scalse = 416./img_size
anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
model = Yolov3(self.num_classes,anchors = anchors_new)
#-----------------------------------------------------------------------
self.model = model
# show_model_param(self.model)# 显示模型参数
# print('num_classes : ',self.num_classes)
self.device = select_device() # 运行硬件选择
self.use_cuda = torch.cuda.is_available()
# Load weights
if os.access(weights,os.F_OK):# 判断模型文件是否存在
self.model.load_state_dict(torch.load(weights, map_location=lambda storage, loc: storage)['model'])
else:
print('------- >>> error : model not exists')
return False
#
self.model.eval()#模型设置为 eval
acc_model('',self.model)
self.model = self.model.to(self.device)
def predict(self, img_,vis):
with torch.no_grad():
t = time.time()
img = process_data(img_, self.img_size)
t1 = time.time()
img = torch.from_numpy(img).unsqueeze(0).to(self.device)
pred, _ = self.model(img)#图片检测
t2 = time.time()
detections = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0] # nms
t3 = time.time()
# print("t3 time:", t3)
if (detections is None) or len(detections) == 0:
return []
# Rescale boxes from 416 to true image size
detections[:, :4] = scale_coords(self.img_size, detections[:, :4], img_.shape).round()
# 绘制检测结果 :detect reslut
dets_for_landmarks = []
colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, 10 + 1)][::-1]
output_dict_ = []
for *xyxy, conf, cls_conf, cls in detections:
label = '%s %.2f' % (self.classes[0], conf)
x1,y1,x2,y2 = xyxy
output_dict_.append((float(x1),float(y1),float(x2),float(y2),float(conf.item())))
if vis:
plot_one_box(xyxy, img_, label=label, color=(0,175,255), line_thickness = 2)
return output_dict_
此差异已折叠。
#-*-coding:utf-8-*-
# date:2020-04-11
# author: Eric.Lee
# function : face_euler_angle - yaw,pitch,roll
import os
import torch
import cv2
import torch.nn.functional as F
from face_euler_angle.network.resnet import resnet18
from face_euler_angle.utils.common_utils import *
import numpy as np
class FaceAngle_Model(object):
def __init__(self,
model_path = './components/face_euler_angle/weights_euler_angle/resnet_18_imgsize_256-epoch-225.pth',
img_size=256,
num_classes = 3,# yaw,pitch,roll
):
use_cuda = torch.cuda.is_available()
self.use_cuda = use_cuda
self.device = torch.device("cuda:0" if use_cuda else "cpu") # 可选的设备类型及序号
self.img_size = img_size
#-----------------------------------------------------------------------
model_ = resnet18(num_classes=num_classes, img_size=img_size)
chkpt = torch.load(model_path, map_location=lambda storage, loc: storage)
model_.load_state_dict(chkpt)
model_.eval()
print("load face euler angle model : {}".format(model_path))
self.model_ = model_.to(self.device)
def predict(self, img,vis = False):# img is align img
with torch.no_grad():
img_ = torch.from_numpy(img)
# img_ = img_.unsqueeze_(0)
if self.use_cuda:
img_ = img_.cuda() # (bs, 3, h, w)
output_ = self.model_(img_.float())
# print(pre_.size())
output_ = output_.cpu().detach().numpy()
output_ = output_*90.
return output_
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.):
self.inplanes = 64
self.dropout_factor = dropout_factor
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
# see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16
# self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
assert img_size % 32 == 0
pool_kernel = int(img_size / 32)
self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True)
self.dropout = nn.Dropout(self.dropout_factor)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def load_model(model, pretrained_state_dict):
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
model.load_state_dict(pretrained_dict, strict=False)
if len(pretrained_dict) == 0:
print("[INFO] No params were loaded ...")
else:
for k, v in pretrained_state_dict.items():
if k in pretrained_dict:
print("==>> Load {} {}".format(k, v.size()))
else:
print("[INFO] Skip {} {}".format(k, v.size()))
return model
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
print("Load pretrained model from {}".format(model_urls['resnet18']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
model = load_model(model, pretrained_state_dict)
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
print("Load pretrained model from {}".format(model_urls['resnet34']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
model = load_model(model, pretrained_state_dict)
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
print("Load pretrained model from {}".format(model_urls['resnet50']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
model = load_model(model, pretrained_state_dict)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
print("Load pretrained model from {}".format(model_urls['resnet101']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
model = load_model(model, pretrained_state_dict)
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
print("Load pretrained model from {}".format(model_urls['resnet152']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
model = load_model(model, pretrained_state_dict)
return model
if __name__ == "__main__":
input = torch.randn([32, 3, 256,256])
model = resnet34(False, num_classes=2, img_size=256)
output = model(input)
print(output.size())
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
# function: common utils
import os
import shutil
import cv2
import numpy as np
import json
def mkdir_(path, flag_rm=False):
if os.path.exists(path):
if flag_rm == True:
shutil.rmtree(path)
os.mkdir(path)
print('remove {} done ~ '.format(path))
else:
os.mkdir(path)
def plot_box(bbox, img, color=None, label=None, line_thickness=None):
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox
if label:
tf = max(tl - 2, 1)
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox
cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充
# 文本绘制
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA)
class JSON_Encoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(JSON_Encoder, self).default(obj)
def draw_landmarks(img,output,draw_circle):
img_width = img.shape[1]
img_height = img.shape[0]
dict_landmarks = {}
for i in range(int(output.shape[0]/2)):
x = output[i*2+0]*float(img_width)
y = output[i*2+1]*float(img_height)
if 41>= i >=33:
if 'left_eyebrow' not in dict_landmarks.keys():
dict_landmarks['left_eyebrow'] = []
dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 50>= i >=42:
if 'right_eyebrow' not in dict_landmarks.keys():
dict_landmarks['right_eyebrow'] = []
dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 67>= i >=60:
if 'left_eye' not in dict_landmarks.keys():
dict_landmarks['left_eye'] = []
dict_landmarks['left_eye'].append([int(x),int(y),(255,0,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 75>= i >=68:
if 'right_eye' not in dict_landmarks.keys():
dict_landmarks['right_eye'] = []
dict_landmarks['right_eye'].append([int(x),int(y),(255,0,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 97>= i >=96:
cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1)
elif 54>= i >=51:
if 'bridge_nose' not in dict_landmarks.keys():
dict_landmarks['bridge_nose'] = []
dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1)
elif 32>= i >=0:
if 'basin' not in dict_landmarks.keys():
dict_landmarks['basin'] = []
dict_landmarks['basin'].append([int(x),int(y),(255,30,30)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1)
elif 59>= i >=55:
if 'wing_nose' not in dict_landmarks.keys():
dict_landmarks['wing_nose'] = []
dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1)
elif 87>= i >=76:
if 'out_lip' not in dict_landmarks.keys():
dict_landmarks['out_lip'] = []
dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1)
elif 95>= i >=88:
if 'in_lip' not in dict_landmarks.keys():
dict_landmarks['in_lip'] = []
dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1)
else:
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
return dict_landmarks
def draw_contour(image,dict):
for key in dict.keys():
# print(key)
_,_,color = dict[key][0]
if 'basin' == key or 'wing_nose' == key:
pts = np.array([[dict[key][i][0],dict[key][i][1]] for i in range(len(dict[key]))],np.int32)
# print(pts)
cv2.polylines(image,[pts],False,color)
else:
points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32)
for i in range(len(dict[key])):
x,y,_ = dict[key][i]
points_array[0,i,0] = x
points_array[0,i,1] = y
# cv2.fillPoly(image, points_array, color)
cv2.drawContours(image,points_array,-1,color,thickness=1)
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
# function: model utils
import os
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import random
def get_acc(output, label):
total = output.shape[0]
_, pred_label = output.max(1)
num_correct = (pred_label == label).sum().item()
return num_correct / float(total)
def set_learning_rate(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def set_seed(seed = 666):
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
cudnn.deterministic = True
def split_trainval_datasets(ops):
print(' --------------->>> split_trainval_datasets ')
train_split_datasets = []
train_split_datasets_label = []
val_split_datasets = []
val_split_datasets_label = []
for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('.')[0]), reverse=False)):
# print(' %s label is %s \n'%(doc,idx))
data_list = os.listdir(ops.train_path+doc)
random.shuffle(data_list)
cal_split_num = int(len(data_list)*ops.val_factor)
for i,file in enumerate(data_list):
if '.jpg' in file:
if i < cal_split_num:
val_split_datasets.append(ops.train_path+doc + '/' + file)
val_split_datasets_label.append(idx)
else:
train_split_datasets.append(ops.train_path+doc + '/' + file)
train_split_datasets_label.append(idx)
print(ops.train_path+doc + '/' + file,idx)
print('\n')
print('train_split_datasets len {}'.format(len(train_split_datasets)))
print('val_split_datasets len {}'.format(len(val_split_datasets)))
return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label
#-*-coding:utf-8-*-
# date:2020-04-11
# author: Eric.Lee
# function : face_multi_task - landmarks & age & gender
import os
import torch
import cv2
import torch.nn.functional as F
from face_multi_task.network.resnet import resnet50
from face_multi_task.utils.common_utils import *
import numpy as np
class FaceMuitiTask_Model(object):
def __init__(self,
model_path = './components/face_multi_task/weights_multask/resnet_50_imgsize-256-20210411.pth',
img_size=256,
num_classes = 196,# 人脸关键点,年龄,性别
):
use_cuda = torch.cuda.is_available()
self.use_cuda = use_cuda
self.device = torch.device("cuda:0" if use_cuda else "cpu") # 可选的设备类型及序号
self.img_size = img_size
#-----------------------------------------------------------------------
face_multi_model = resnet50(landmarks_num=num_classes, img_size=img_size)
chkpt = torch.load(model_path, map_location=lambda storage, loc: storage)
face_multi_model.load_state_dict(chkpt)
face_multi_model.eval()
print("load face multi task model : {}".format(model_path))
self.face_multi_model = face_multi_model.to(self.device)
def predict(self, img,vis = False):# img is align img
with torch.no_grad():
# img = cv2.resize(img, (self.img_size,self.img_size), interpolation = cv2.INTER_LINEAR)
# #-------------------- inferece face
#
# img_ = img.astype(np.float32)
# img_ = (img_-128.)/256.
#
# img_ = img_.transpose(2, 0, 1)
img_ = torch.from_numpy(img)
# img_ = img_.unsqueeze_(0)
#
if self.use_cuda:
img_ = img_.cuda() # (bs, 3, h, w)
output_landmarks,output_gender,output_age = self.face_multi_model(img_.float())
# print(pre_.size())
output_landmarks = output_landmarks.cpu().detach().numpy()
output_gender = output_gender
output_gender = output_gender.cpu().detach().numpy()
output_gender = np.array(output_gender)
output_age = output_age.cpu().detach().numpy()
output_age = (output_age*100.+50.)
return output_landmarks,output_gender,output_age
if __name__ == '__main__':
au_model = FaceMuitiTask_Component()
path = "./samples/"
for img_name in os.listdir(path):
img_path = path + img_name
img = cv2.imread(img_path)
dict_landmarks,output_gender,output_age = au_model.predict(img,vis = False)
draw_contour(img,dict_landmarks)
cv2.putText(img, 'gender:{}'.format(output_gender), (2,20),
cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0),2)
cv2.putText(img, 'gender:{}'.format(output_gender), (2,20),
cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,20,0),1)
cv2.putText(img, 'age:{:.2f}'.format(output_age), (2,50),
cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0),2)
cv2.putText(img, 'age:{:.2f}'.format(output_age), (2,50),
cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,20, 0),1)
cv2.namedWindow('image',0)
cv2.imshow('image',img)
cv2.waitKey(500)
cv2.destroyAllWindows()
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, landmarks_num=1000, img_size=224,dropout_factor = 1.):
self.inplanes = 64
self.dropout_factor = dropout_factor
super(ResNet, self).__init__()
# 26
# 586 train_sequence
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
# see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16
# self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
assert img_size % 32 == 0
pool_kernel = int(img_size / 32)
self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True)
self.dropout1 = nn.Dropout(self.dropout_factor)
self.dropout2 = nn.Dropout(0.8)
self.dropout3 = nn.Dropout(0.8)
self.dropout = nn.Dropout(self.dropout_factor)
self.fc_landmarks_1 = nn.Linear(512 * block.expansion, 1024)
self.fc_landmarks_2 = nn.Linear(1024, landmarks_num)
self.fc_gender_1 = nn.Linear(512 * block.expansion, 64)
self.fc_gender_2 = nn.Linear(64, 2)
self.fc_age_1 = nn.Linear(512 * block.expansion, 64)
self.fc_age_2 = nn.Linear(64, 1)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
# x = self.dropout(x)
landmarks = self.fc_landmarks_1(x)
landmarks = self.dropout1(landmarks)
landmarks = self.fc_landmarks_2(landmarks)
gender = self.fc_gender_1(x)
gender = self.dropout2(gender)
gender = self.fc_gender_2(gender)
age = self.fc_age_1(x)
age = self.dropout3(age)
age = self.fc_age_2(age)
return landmarks,gender,age
def load_model(model, pretrained_state_dict):
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
model.load_state_dict(pretrained_dict, strict=False)
if len(pretrained_dict) == 0:
print("[INFO] No params were loaded ...")
else:
for k, v in pretrained_state_dict.items():
if k in pretrained_dict:
print("==>> Load {} {}".format(k, v.size()))
else:
print("[INFO] Skip {} {}".format(k, v.size()))
return model
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
print("Load pretrained model from {}".format(model_urls['resnet18']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
model = load_model(model, pretrained_state_dict)
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
print("Load pretrained model from {}".format(model_urls['resnet34']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
model = load_model(model, pretrained_state_dict)
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
print("Load pretrained model from {}".format(model_urls['resnet50']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
model = load_model(model, pretrained_state_dict)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
print("Load pretrained model from {}".format(model_urls['resnet101']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
model = load_model(model, pretrained_state_dict)
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
print("Load pretrained model from {}".format(model_urls['resnet152']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
model = load_model(model, pretrained_state_dict)
return model
if __name__ == "__main__":
input = torch.randn([32, 3, 256,256])
model = resnet50(pretrained=False, landmarks_num=196, img_size=256)
landmarks,gender,age = model(input)
print(landmarks.size(),gender.size(),age.size())
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
# function: common utils
import os
import shutil
import cv2
import numpy as np
import json
def mkdir_(path, flag_rm=False):
if os.path.exists(path):
if flag_rm == True:
shutil.rmtree(path)
os.mkdir(path)
print('remove {} done ~ '.format(path))
else:
os.mkdir(path)
def plot_box(bbox, img, color=None, label=None, line_thickness=None):
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox
if label:
tf = max(tl - 2, 1)
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox
cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充
# 文本绘制
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA)
class JSON_Encoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(JSON_Encoder, self).default(obj)
def draw_landmarks(img,output,draw_circle):
img_width = img.shape[1]
img_height = img.shape[0]
dict_landmarks = {}
for i in range(int(output.shape[0]/2)):
x = output[i*2+0]*float(img_width)
y = output[i*2+1]*float(img_height)
if 41>= i >=33:
if 'left_eyebrow' not in dict_landmarks.keys():
dict_landmarks['left_eyebrow'] = []
dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 50>= i >=42:
if 'right_eyebrow' not in dict_landmarks.keys():
dict_landmarks['right_eyebrow'] = []
dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 67>= i >=60:
if 'left_eye' not in dict_landmarks.keys():
dict_landmarks['left_eye'] = []
dict_landmarks['left_eye'].append([int(x),int(y),(255,0,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 75>= i >=68:
if 'right_eye' not in dict_landmarks.keys():
dict_landmarks['right_eye'] = []
dict_landmarks['right_eye'].append([int(x),int(y),(255,0,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 97>= i >=96:
cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1)
elif 54>= i >=51:
if 'bridge_nose' not in dict_landmarks.keys():
dict_landmarks['bridge_nose'] = []
dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1)
elif 32>= i >=0:
if 'basin' not in dict_landmarks.keys():
dict_landmarks['basin'] = []
dict_landmarks['basin'].append([int(x),int(y),(255,30,30)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1)
elif 59>= i >=55:
if 'wing_nose' not in dict_landmarks.keys():
dict_landmarks['wing_nose'] = []
dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1)
elif 87>= i >=76:
if 'out_lip' not in dict_landmarks.keys():
dict_landmarks['out_lip'] = []
dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1)
elif 95>= i >=88:
if 'in_lip' not in dict_landmarks.keys():
dict_landmarks['in_lip'] = []
dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)])
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1)
else:
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
return dict_landmarks
def draw_contour(image,dict):
for key in dict.keys():
# print(key)
_,_,color = dict[key][0]
if 'basin' == key or 'wing_nose' == key:
pts = np.array([[dict[key][i][0],dict[key][i][1]] for i in range(len(dict[key]))],np.int32)
# print(pts)
cv2.polylines(image,[pts],False,color)
else:
points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32)
for i in range(len(dict[key])):
x,y,_ = dict[key][i]
points_array[0,i,0] = x
points_array[0,i,1] = y
# cv2.fillPoly(image, points_array, color)
cv2.drawContours(image,points_array,-1,color,thickness=1)
#-*-coding:utf-8-*-
# date:2020-04-11
# Author: Eric.Lee
# function: model utils
import os
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import random
def get_acc(output, label):
total = output.shape[0]
_, pred_label = output.max(1)
num_correct = (pred_label == label).sum().item()
return num_correct / float(total)
def set_learning_rate(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def set_seed(seed = 666):
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
cudnn.deterministic = True
def split_trainval_datasets(ops):
print(' --------------->>> split_trainval_datasets ')
train_split_datasets = []
train_split_datasets_label = []
val_split_datasets = []
val_split_datasets_label = []
for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('.')[0]), reverse=False)):
# print(' %s label is %s \n'%(doc,idx))
data_list = os.listdir(ops.train_path+doc)
random.shuffle(data_list)
cal_split_num = int(len(data_list)*ops.val_factor)
for i,file in enumerate(data_list):
if '.jpg' in file:
if i < cal_split_num:
val_split_datasets.append(ops.train_path+doc + '/' + file)
val_split_datasets_label.append(idx)
else:
train_split_datasets.append(ops.train_path+doc + '/' + file)
train_split_datasets_label.append(idx)
print(ops.train_path+doc + '/' + file,idx)
print('\n')
print('train_split_datasets len {}'.format(len(train_split_datasets)))
print('val_split_datasets len {}'.format(len(val_split_datasets)))
return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label
#-*-coding:utf-8-*-
'''
DpCas-Light
|||| |||| |||| || ||||
|| || || || || || |||| || ||
|| || || || || || || || ||
|| || || || || ||====|| ||||
|| || |||| || || ||======|| ||
|| || || || || || || || ||
|||| || |||| || || ||||
/------------------ Who You Want 2 See ------------------/
'''
# date:2021-04-17
# Author: Eric.Lee
# function: pipline
import os
import numpy as np
import cv2
import torch
from PIL import Image
def compute_iou(rec1, rec2):
"""
computing IoU
:param rec1: (y0, x0, y1, x1), which reflects
(top, left, bottom, right)
:param rec2: (y0, x0, y1, x1)
:return: scala value of IoU
"""
# computing area of each rectangles
S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
# computing the sum_area
sum_area = S_rec1 + S_rec2
# find the each edge of intersect rectangle
left_line = max(rec1[1], rec2[1])
right_line = min(rec1[3], rec2[3])
top_line = max(rec1[0], rec2[0])
bottom_line = min(rec1[2], rec2[2])
# judge if there is an intersect
if left_line >= right_line or top_line >= bottom_line:
return 0
else:
intersect = (right_line - left_line) * (bottom_line - top_line)
#return (intersect / (sum_area - intersect))*1.0
return (intersect / (S_rec1 + 1e-6))*1.0
def draw_landmarks(img,output,face_w,face_h,x0,y0,vis = False):
img_width = img.shape[1]
img_height = img.shape[0]
dict_landmarks = {}
eyes_center = []
x_list = []
y_list = []
for i in range(int(output.shape[0]/2)):
x = output[i*2+0]*float(face_w) + x0
y = output[i*2+1]*float(face_h) + y0
x_list.append(x)
y_list.append(y)
if 41>= i >=33:
if 'left_eyebrow' not in dict_landmarks.keys():
dict_landmarks['left_eyebrow'] = []
dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 50>= i >=42:
if 'right_eyebrow' not in dict_landmarks.keys():
dict_landmarks['right_eyebrow'] = []
dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 67>= i >=60:
if 'left_eye' not in dict_landmarks.keys():
dict_landmarks['left_eye'] = []
dict_landmarks['left_eye'].append([int(x),int(y),(255,55,255)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 75>= i >=68:
if 'right_eye' not in dict_landmarks.keys():
dict_landmarks['right_eye'] = []
dict_landmarks['right_eye'].append([int(x),int(y),(255,55,255)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 97>= i >=96:
eyes_center.append((x,y))
if vis:
cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1)
elif 54>= i >=51:
if 'bridge_nose' not in dict_landmarks.keys():
dict_landmarks['bridge_nose'] = []
dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1)
elif 32>= i >=0:
if 'basin' not in dict_landmarks.keys():
dict_landmarks['basin'] = []
dict_landmarks['basin'].append([int(x),int(y),(255,30,30)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1)
elif 59>= i >=55:
if 'wing_nose' not in dict_landmarks.keys():
dict_landmarks['wing_nose'] = []
dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1)
elif 87>= i >=76:
if 'out_lip' not in dict_landmarks.keys():
dict_landmarks['out_lip'] = []
dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1)
elif 95>= i >=88:
if 'in_lip' not in dict_landmarks.keys():
dict_landmarks['in_lip'] = []
dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)])
if vis:
cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1)
else:
if vis:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
face_area = (max(x_list) - min(x_list))*(max(y_list) - min(y_list))
return dict_landmarks,eyes_center,face_area
def draw_contour(image,dict,vis = False):
x0 = 0# 偏置
y0 = 0
for key in dict.keys():
# print(key)
_,_,color = dict[key][0]
if 'left_eye' == key:
eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))])
eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))])
if vis:
cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,255,55),-1)
if 'right_eye' == key:
eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))])
eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))])
if vis:
cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,215,25),-1)
if 'basin' == key or 'wing_nose' == key:
pts = np.array([[dict[key][i][0]+x0,dict[key][i][1]+y0] for i in range(len(dict[key]))],np.int32)
if vis:
cv2.polylines(image,[pts],False,color,thickness = 2)
else:
points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32)
for i in range(len(dict[key])):
x,y,_ = dict[key][i]
points_array[0,i,0] = x+x0
points_array[0,i,1] = y+y0
# cv2.fillPoly(image, points_array, color)
if vis:
cv2.drawContours(image,points_array,-1,color,thickness=2)
def plot_box(x, img, color=None, label=None, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)
if label:
tf = max(tl - 1, 2) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [185, 195,190], thickness=tf, lineType=cv2.LINE_AA)
#-------------------------------------------------------------------------------
def face_alignment(imgn,eye_left_n,eye_right_n,\
desiredLeftEye=(0.34, 0.42),desiredFaceWidth=256, desiredFaceHeight=None):
if desiredFaceHeight is None:
desiredFaceHeight = desiredFaceWidth
leftEyeCenter = eye_left_n
rightEyeCenter = eye_right_n
# compute the angle between the eye centroids
dY = rightEyeCenter[1] - leftEyeCenter[1]
dX = rightEyeCenter[0] - leftEyeCenter[0]
angle = np.degrees(np.arctan2(dY, dX))
# compute the desired right eye x-coordinate based on the
# desired x-coordinate of the left eye
desiredRightEyeX = 1.0 - desiredLeftEye[0]
# determine the scale of the new resulting image by taking
# the ratio of the distance between eyes in the *current*
# image to the ratio of distance between eyes in the
# *desired* image
dist = np.sqrt((dX ** 2) + (dY ** 2))
desiredDist = (desiredRightEyeX - desiredLeftEye[0])
desiredDist *= desiredFaceWidth
scale = desiredDist / dist
# compute center (x, y)-coordinates (i.e., the median point)
# between the two eyes in the input image
eyesCenter = ((leftEyeCenter[0] + rightEyeCenter[0]) / 2,(leftEyeCenter[1] + rightEyeCenter[1]) / 2)
# grab the rotation matrix for rotating and scaling the face
M = cv2.getRotationMatrix2D(eyesCenter, angle, scale)
# update the translation component of the matrix
tX = desiredFaceWidth * 0.5
tY = desiredFaceHeight * desiredLeftEye[1]
M[0, 2] += (tX - eyesCenter[0])
M[1, 2] += (tY - eyesCenter[1])
M_reg = np.zeros((3,3),dtype = np.float32)
M_reg[0,:] = M[0,:]
M_reg[1,:] = M[1,:]
M_reg[2,:] = (0,0,1.)
# print(M_reg)
M_I = np.linalg.inv(M_reg)#矩阵求逆,从而获得,目标图到原图的关系
# print(M_I)
# apply the affine transformation
(w, h) = (desiredFaceWidth, desiredFaceHeight)
# cv_resize_model = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_NEAREST,cv2.INTER_AREA]
output = cv2.warpAffine(imgn, M, (w, h),flags=cv2.INTER_LINEAR,borderMode=cv2.BORDER_CONSTANT)#
#---------------------------------------------------------------------------------------
# ptx1 = int(eye_left_gt_n[0]*M[0][0] + eye_left_gt_n[1]*M[0][1] + M[0][2])
# pty1 = int(eye_left_gt_n[0]*M[1][0] + eye_left_gt_n[1]*M[1][1] + M[1][2])
#
# ptx2 = int(eye_right_gt_n[0]*M[0][0] + eye_right_gt_n[1]*M[0][1] + M[0][2])
# pty2 = int(eye_right_gt_n[0]*M[1][0] + eye_right_gt_n[1]*M[1][1] + M[1][2])
return output
def refine_face_bbox(bbox,img_shape):
height,width,_ = img_shape
x1,y1,x2,y2 = bbox
expand_w = (x2-x1)
expand_h = (y2-y1)
x1 -= expand_w*0.12
y1 -= expand_h*0.12
x2 += expand_w*0.12
y2 += expand_h*0.08
x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)
x1 = np.clip(x1,0,width-1)
y1 = np.clip(y1,0,height-1)
x2 = np.clip(x2,0,width-1)
y2 = np.clip(y2,0,height-1)
return (x1,y1,x2,y2)
def get_faces_batch_attribute(face_multitask_model,face_euler_model,dets,img_raw,use_cuda,face_size = 256,vis = False):
face_map = np.zeros([112*3,112*3,3]).astype(np.uint8)
face_map[:,:,0].fill(205)
face_map[:,:,1].fill(205)
face_map[:,:,2].fill(205)
if len(dets) == 0:
return [],[],[],face_map
img_align = img_raw.copy()
# 绘制图像
image_batch = None
r_bboxes = []
imgs_crop = []
for b in dets:
b = list(map(int, b))
r_bbox = refine_face_bbox((b[0],b[1],b[2],b[3]),img_raw.shape)
r_bboxes.append(r_bbox)
img_crop = img_raw[r_bbox[1]:r_bbox[3],r_bbox[0]:r_bbox[2]]
imgs_crop.append(img_crop)
img_ = cv2.resize(img_crop, (face_size,face_size), interpolation = cv2.INTER_LINEAR) # INTER_LINEAR INTER_CUBIC
img_ = img_.astype(np.float32)
img_ = (img_-128.)/256.
img_ = img_.transpose(2, 0, 1)
img_ = np.expand_dims(img_,0)
if image_batch is None:
image_batch = img_
else:
image_batch = np.concatenate((image_batch,img_),axis=0)
# # 填充最大 关键点 批次数据
# if len(dets) < ops.max_batch_size:
# im_mask = np.zeros([1,3,ops.landmarks_img_size[0],ops.landmarks_img_size[1]], dtype = np.float32)
# for i in range(ops.max_batch_size-len(dets)):
# if image_batch is None:
# image_batch = im_mask
# else:
# image_batch = np.concatenate((image_batch,im_mask),axis=0)
#
# print("image_batch shape:",image_batch.shape)
# image_batch = torch.from_numpy(image_batch).float()
# #
# if use_cuda:
# image_batch = image_batch.cuda() # (bs, 3, h, w)
landmarks_pre,gender_pre,age_pre = face_multitask_model.predict(image_batch)
euler_angles = face_euler_model.predict(image_batch)
# print(" -------->>> euler_angles : ",euler_angles)
# print("landmarks_pre,gender_pre,age_pre :",landmarks_pre.shape,gender_pre.shape,age_pre.shape)
faces_identify = [] # 符合要求,需要识别的人的图像
faces_identify_bboxes = []# 符合要求,需要识别的人脸边界框
faceid_idx = 0 # 符合要求,需要识别的人的索引计数器
for i in range(len(dets)):
x0,y0 = r_bboxes[i][0],r_bboxes[i][1]
face_w = r_bboxes[i][2]-r_bboxes[i][0]
face_h = r_bboxes[i][3]-r_bboxes[i][1]
dict_landmarks,eyes_center,face_area = draw_landmarks(img_raw,landmarks_pre[i],face_w,face_h,x0,y0,vis = False)
gray_ = cv2.cvtColor(img_align[r_bboxes[i][1]:r_bboxes[i][3],r_bboxes[i][0]:r_bboxes[i][2],:], cv2.COLOR_BGR2GRAY)
blur_ = cv2.Laplacian(gray_, cv2.CV_64F).var()
gender_max_index = np.argmax(gender_pre[i])#概率最大类别索引
score_gender = gender_pre[i][gender_max_index]# 最大概率
yaw,pitch,roll = euler_angles[i]
cv2.putText(img_raw, "yaw:{:.1f},pitch:{:.1f},roll:{:.1f}".format(yaw,pitch,roll),(int(r_bboxes[i][0]-20),int(r_bboxes[i][1]-30)),cv2.FONT_HERSHEY_DUPLEX, 0.65, (253,139,54), 5)
cv2.putText(img_raw, "yaw:{:.1f},pitch:{:.1f},roll:{:.1f}".format(yaw,pitch,roll),(int(r_bboxes[i][0]-20),int(r_bboxes[i][1]-30)),cv2.FONT_HERSHEY_DUPLEX, 0.65, (20,185,255), 1)
cv2.putText(img_raw, "{}".format(int(face_area)),(int(r_bboxes[i][0]-1),int(r_bboxes[i][3]-3)),cv2.FONT_HERSHEY_DUPLEX, 0.65, (253,39,54), 5) # face_area
cv2.putText(img_raw, "{}".format(int(face_area)),(int(r_bboxes[i][0]-1),int(r_bboxes[i][3]-3)),cv2.FONT_HERSHEY_DUPLEX, 0.65, (20,185,255), 1)
if gender_max_index == 1.:
gender_str = "male"
else:
gender_str = "female"
if abs(yaw)<45.:
face_align_output = face_alignment(img_align,eyes_center[0],eyes_center[1],
desiredLeftEye=(0.365, 0.38),desiredFaceWidth=112, desiredFaceHeight=None)
else:
face_align_output = face_alignment(img_align,eyes_center[0],eyes_center[1],
desiredLeftEye=(0.38, 0.40),desiredFaceWidth=112, desiredFaceHeight=None)
# plot_box(r_bboxes[i][0:4], img_raw,label="{}, age: {:.1f}, unblur:{}".format(gender_str,age_pre[i][0],int(blur_)), color=(255,90,90), line_thickness=2)
plot_box(r_bboxes[i][0:4], img_raw,label="{}, age: {:.1f}".format(gender_str,age_pre[i][0]), color=(255,90,90), line_thickness=2)
# print("face_area:",face_area)
# if (blur_>35) and abs(yaw)<36. and abs(pitch)<30. and (face_area>(60*60)):
if abs(yaw)<36. and abs(pitch)<36. and (face_area>(60*60)):
if vis :
draw_contour(img_raw,dict_landmarks,vis = True)
faces_identify.append(Image.fromarray(face_align_output))
faces_identify_bboxes.append(r_bboxes[i][0:4])
if faceid_idx<9:
y1_map,y2_map = int(faceid_idx/3)*112,(int(faceid_idx/3)+1)*112
x1_map,x2_map = int(faceid_idx%3)*112,(int(faceid_idx%3)+1)*112
face_map[y1_map:y2_map,x1_map:x2_map,:] = face_align_output
cv2.rectangle(face_map, (int(x1_map),int(y1_map)), (int(x2_map),int(y2_map)), (55,255,255), 2)
faceid_idx += 1
else:
cv2.putText(img_raw, "bad for face reco",(int(r_bboxes[i][0]-1),int(r_bboxes[i][3]+20)),cv2.FONT_HERSHEY_DUPLEX, 0.65, (20,15,255), 4)
cv2.putText(img_raw, "bad for face reco",(int(r_bboxes[i][0]-1),int(r_bboxes[i][3]+20)),cv2.FONT_HERSHEY_DUPLEX, 0.65, (220,185,25), 1)
return faces_identify,faces_identify_bboxes,r_bboxes,face_map
* make_facebank.py # 构建人脸底库
* 默认人脸需要alignment ,以及 resize 到分辨率 112*112
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册