diff --git a/components/face_detect/utils/common_utils.py b/components/face_detect/utils/common_utils.py deleted file mode 100644 index 6e8f105c9850fc59d0bfebbd0bcef1b746887da9..0000000000000000000000000000000000000000 --- a/components/face_detect/utils/common_utils.py +++ /dev/null @@ -1,656 +0,0 @@ -#-*-coding:utf-8-*- -# date:2020-04-11 -# Author: Eric.Lee - -import os -import shutil -import cv2 -import numpy as np -import json -import torch -from dp_models.faceboxes.config import cfg -from dp_models.faceboxes.layers.functions.prior_box import PriorBox -from dp_models.faceboxes.utils.box_utils import decode -from dp_models.faceboxes.headpose.pose import * -import torch.nn.functional as F - -def mkdir_(path, flag_rm=False): - if os.path.exists(path): - if flag_rm == True: - shutil.rmtree(path) - os.mkdir(path) - print('remove {} done ~ '.format(path)) - else: - os.mkdir(path) - -def plot_box(bbox, img, color=None, label=None, line_thickness=None): - tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 - color = color or [random.randint(0, 255) for _ in range(3)] - c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) - cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox - if label: - tf = max(tl - 2, 1) - t_size = cv2.getTextSize(label, 0, fontScale=tl / 4, thickness=tf)[0] # label size - c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox - cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 - # 文本绘制 - cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA) - -class JSON_Encoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.integer): - return int(obj) - elif isinstance(obj, np.floating): - return float(obj) - elif isinstance(obj, np.ndarray): - return obj.tolist() - else: - return super(JSON_Encoder, self).default(obj) - -def draw_landmarks(img,output,r_bboxes,draw_circle): - img_width = img.shape[1] - img_height = img.shape[0] - dict_landmarks = {} - global_dict_landmarks = {} # 全局坐标系坐标 - faceswap_list = [] - - face_pts = [] - - for i in range(int(output.shape[0]/2)): - x = output[i*2+0]*float(img_width) - y = output[i*2+1]*float(img_height) - - face_pts .append([x+r_bboxes[0],y+r_bboxes[1]]) - - if i ==33 or i == 46 or i == 96 or i == 97 or i == 54 or i == 76 or i == 82: - faceswap_list.append((x+r_bboxes[0],y+r_bboxes[1])) - # cv2.circle(img, (int(x),int(y)), 8, (0,255,255),-1) - # - if 41>= i >=33: - if 'left_eyebrow' not in dict_landmarks.keys(): - dict_landmarks['left_eyebrow'] = [] - global_dict_landmarks['left_eyebrow'] = [] - dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)]) - global_dict_landmarks['left_eyebrow'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - - - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1) - elif 50>= i >=42: - if 'right_eyebrow' not in dict_landmarks.keys(): - dict_landmarks['right_eyebrow'] = [] - global_dict_landmarks['right_eyebrow'] = [] - dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)]) - global_dict_landmarks['right_eyebrow'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1) - elif 67>= i >=60: - if 'left_eye' not in dict_landmarks.keys(): - dict_landmarks['left_eye'] = [] - global_dict_landmarks['left_eye'] = [] - dict_landmarks['left_eye'].append([int(x),int(y),(255,55,255)]) - global_dict_landmarks['left_eye'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) - elif 75>= i >=68: - if 'right_eye' not in dict_landmarks.keys(): - dict_landmarks['right_eye'] = [] - global_dict_landmarks['right_eye'] = [] - dict_landmarks['right_eye'].append([int(x),int(y),(255,55,255)]) - global_dict_landmarks['right_eye'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) - elif 97>= i >=96: - if 'eye_center' not in dict_landmarks.keys(): - global_dict_landmarks['eye_center'] = [] - global_dict_landmarks['eye_center'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - - cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1) - elif 54>= i >=51: - if 'bridge_nose' not in dict_landmarks.keys(): - dict_landmarks['bridge_nose'] = [] - global_dict_landmarks['bridge_nose'] = [] - dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)]) - global_dict_landmarks['bridge_nose'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1) - elif 32>= i >=0: - if 'basin' not in dict_landmarks.keys(): - dict_landmarks['basin'] = [] - global_dict_landmarks['basin'] = [] - dict_landmarks['basin'].append([int(x),int(y),(255,30,30)]) - global_dict_landmarks['basin'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1) - elif 59>= i >=55: - if 'wing_nose' not in dict_landmarks.keys(): - dict_landmarks['wing_nose'] = [] - global_dict_landmarks['wing_nose'] = [] - dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)]) - global_dict_landmarks['wing_nose'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1) - elif 87>= i >=76: - if 'out_lip' not in dict_landmarks.keys(): - dict_landmarks['out_lip'] = [] - global_dict_landmarks['out_lip'] = [] - dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)]) - global_dict_landmarks['out_lip'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1) - elif 95>= i >=88: - if 'in_lip' not in dict_landmarks.keys(): - dict_landmarks['in_lip'] = [] - global_dict_landmarks['in_lip'] = [] - dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)]) - global_dict_landmarks['in_lip'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])]) - if draw_circle: - cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1) - # else: - # if draw_circle: - # cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) - - faceswap_list_e = [] - - for i in range(5): - faceswap_list_e.append(faceswap_list[i][0]) - for i in range(5): - faceswap_list_e.append(faceswap_list[i][1]) - - - return dict_landmarks,faceswap_list_e,global_dict_landmarks,face_pts - -def draw_contour(image,dict,r_bbox,face_pts): - x0 = r_bbox[0]# 全图偏置 - y0 = r_bbox[1] - - #------------------------------------------ - face_ola_pts = [] - face_ola_pts.append(face_pts[33]) - face_ola_pts.append(face_pts[38]) - face_ola_pts.append(face_pts[50]) - face_ola_pts.append(face_pts[46]) - - face_ola_pts.append(face_pts[60]) - face_ola_pts.append(face_pts[64]) - face_ola_pts.append(face_pts[68]) - face_ola_pts.append(face_pts[72]) - - face_ola_pts.append(face_pts[51]) - face_ola_pts.append(face_pts[55]) - face_ola_pts.append(face_pts[59]) - - face_ola_pts.append(face_pts[53]) - face_ola_pts.append(face_pts[57]) - - pts_num = len(face_ola_pts) - reprojectdst, euler_angle = get_head_pose(np.array(face_ola_pts).reshape((pts_num,2)),image,vis = False) - pitch, yaw, roll = euler_angle - - for key in dict.keys(): - # print(key) - _,_,color = dict[key][0] - - if 'left_eye' == key: - eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))]) - eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))]) - cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,255,55),-1) - if 'right_eye' == key: - eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))]) - eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))]) - cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,215,25),-1) - - if 'basin' == key or 'wing_nose' == key: - pts = np.array([[dict[key][i][0]+x0,dict[key][i][1]+y0] for i in range(len(dict[key]))],np.int32) - # print(pts) - cv2.polylines(image,[pts],False,color,thickness = 2) - - else: - points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32) - for i in range(len(dict[key])): - x,y,_ = dict[key][i] - points_array[0,i,0] = x+x0 - points_array[0,i,1] = y+y0 - - # cv2.fillPoly(image, points_array, color) - cv2.drawContours(image,points_array,-1,color,thickness=2) - return (pitch, yaw, roll) - -import random -rgbs = [] -for j in range(100): - rgb = (random.randint(0,255),random.randint(0,255),random.randint(0,255)) - rgbs.append(rgb) - -def draw_global_contour(image,dict): - - - x0,y0 = 0,0 - idx = 0 - for key in dict.keys(): - idx += 1 - # print(key) - # _,_ = dict[key][0] - - if 'left_eye' == key: - eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))]) - eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))]) - cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,255,55),-1) - if 'right_eye' == key: - eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))]) - eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))]) - cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,215,25),-1) - - if 'basin' == key or 'wing_nose' == key: - pts = np.array([[dict[key][i][0]+x0,dict[key][i][1]+y0] for i in range(len(dict[key]))],np.int32) - # print(pts) - cv2.polylines(image,[pts],False,rgbs[idx],thickness = 2) - - else: - points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32) - for i in range(len(dict[key])): - x,y = dict[key][i] - points_array[0,i,0] = x+x0 - points_array[0,i,1] = y+y0 - - # cv2.fillPoly(image, points_array, color) - cv2.drawContours(image,points_array,-1,rgbs[idx],thickness=2) - -def refine_face_bbox(bbox,img_shape): - height,width,_ = img_shape - - x1,y1,x2,y2 = bbox - - expand_w = (x2-x1) - expand_h = (y2-y1) - - x1 -= expand_w*0.06 - y1 += expand_h*0.15 - x2 += expand_w*0.06 - y2 += expand_h*0.03 - - x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2) - - x1 = int(max(0,x1)) - y1 = int(max(0,y1)) - x2 = int(min(x2,width-1)) - y2 = int(min(y2,height-1)) - - return (x1,y1,x2,y2) -def py_cpu_nms(dets, thresh): - """Pure Python NMS baseline.""" - x1 = dets[:, 0] - y1 = dets[:, 1] - x2 = dets[:, 2] - y2 = dets[:, 3] - scores = dets[:, 4] - - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - order = scores.argsort()[::-1] - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - xx1 = np.maximum(x1[i], x1[order[1:]]) - yy1 = np.maximum(y1[i], y1[order[1:]]) - xx2 = np.minimum(x2[i], x2[order[1:]]) - yy2 = np.minimum(y2[i], y2[order[1:]]) - - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (areas[i] + areas[order[1:]] - inter) - - inds = np.where(ovr <= thresh)[0] - order = order[inds + 1] - - return keep - -def check_keys(model, pretrained_state_dict): - ckpt_keys = set(pretrained_state_dict.keys()) - model_keys = set(model.state_dict().keys()) - used_pretrained_keys = model_keys & ckpt_keys - unused_pretrained_keys = ckpt_keys - model_keys - missing_keys = model_keys - ckpt_keys - # print('Missing keys:{}'.format(len(missing_keys))) - # print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) - # print('Used keys:{}'.format(len(used_pretrained_keys))) - assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' - return True - -def remove_prefix(state_dict, prefix): - ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' - # print('remove prefix \'{}\''.format(prefix)) - f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x - return {f(key): value for key, value in state_dict.items()} - - -def load_model(model, pretrained_path, load_to_cpu): - # print('Loading pretrained model from {}'.format(pretrained_path)) - if load_to_cpu: - pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) - else: - device = torch.cuda.current_device() - pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) - if "state_dict" in pretrained_dict.keys(): - pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') - else: - pretrained_dict = remove_prefix(pretrained_dict, 'module.') - check_keys(model, pretrained_dict) - model.load_state_dict(pretrained_dict, strict=False) - return model - - -def detect_faces(ops,detect_model,img_raw,device): - resize = 1 - img = np.float32(img_raw) - if resize != 1: - img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) - im_height, im_width, _ = img.shape - scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) - img -= (104, 117, 123) - img = img.transpose(2, 0, 1) - img = torch.from_numpy(img).unsqueeze(0) - img = img.to(device) - scale = scale.to(device) - - - loc, conf = detect_model(img) # forward pass - - priorbox = PriorBox(cfg, image_size=(im_height, im_width)) - priors = priorbox.forward() - priors = priors.to(device) - prior_data = priors.data - boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) - boxes = boxes * scale / resize - boxes = boxes.cpu().numpy() - scores = conf.squeeze(0).data.cpu().numpy()[:, 1] - - # ignore low scores - inds = np.where(scores > ops.confidence_threshold)[0] - boxes = boxes[inds] - scores = scores[inds] - - # keep top-K before NMS - order = scores.argsort()[::-1][:ops.top_k] - boxes = boxes[order] - scores = scores[order] - - # do NMS - dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) - #keep = py_cpu_nms(dets, ops.nms_threshold) - # keep = nms(dets, ops.nms_threshold,force_cpu=True) - keep = py_cpu_nms(dets, ops.nms_threshold) - dets = dets[keep, :] - - # keep top-K faster NMS - dets = dets[:ops.keep_top_k, :] - - return dets - - - - -def get_faces_batch_landmarks(ops,landmarks_model,express_model,dets,img_raw,use_cuda,draw_bbox = True): - # 绘制图像 - image_batch = None - r_bboxes = [] - imgs_crop = [] - for b in dets: - - text = "{:.4f}".format(b[4]) - b = list(map(int, b)) - - r_bbox = refine_face_bbox((b[0],b[1],b[2],b[3]),img_raw.shape) - r_bboxes.append(r_bbox) - img_crop = img_raw[r_bbox[1]:r_bbox[3],r_bbox[0]:r_bbox[2]] - imgs_crop.append(img_crop) - img_ = cv2.resize(img_crop, (256,256), interpolation = cv2.INTER_LINEAR) # INTER_LINEAR INTER_CUBIC - - img_ = img_.astype(np.float32) - img_ = (img_-128.)/256. - - img_ = img_.transpose(2, 0, 1) - img_ = np.expand_dims(img_,0) - - if image_batch is None: - image_batch = img_ - else: - image_batch = np.concatenate((image_batch,img_),axis=0) - for b in dets: - - text = "{:.4f}".format(b[4]) - b = list(map(int, b)) - if draw_bbox: - cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) - cx = b[0] - cy = b[1] - 3 - if draw_bbox: - cv2.putText(img_raw, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 155, 255),3) - cv2.putText(img_raw, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 10, 10),1) - - # 填充最大 关键点 批次数据 - # if len(dets) < 5: - # im_mask = np.zeros([1,3,ops.landmarks_img_size[0],ops.landmarks_img_size[1]], dtype = np.float32) - # for i in range(ops.max_batch_size-len(dets)): - # if image_batch is None: - # image_batch = im_mask - # else: - # image_batch = np.concatenate((image_batch,im_mask),axis=0) - - image_batch = torch.from_numpy(image_batch).float() - - if use_cuda: - image_batch = image_batch.cuda() # (bs, 3, h, w) - #----------------- express - pre_e = express_model(image_batch.float()) - - outputs_e = F.softmax(pre_e,dim = 1) - - # print("outputs_e size : ",outputs_e.size()) - - outputs_e = outputs_e.cpu().detach().numpy() - outputs_e = np.array(outputs_e) - # - max_index_e = np.argmax(outputs_e,axis = 1) - # print("max_index_e shape :",max_index_e.shape) - # print("max_index_e:",max_index_e) - # print("outputs_e .shape:",outputs_e.shape) - express_dict = { - 0:"001.anger", - 1:"002.disgust", - 2:"003.fear", - 3:"004.happy", - 4:"005.normal", - 5:"006.sad", - 6:"007.surprised", - } - express_list = [] - for kk in range(max_index_e.shape[0]): - max_index_ = max_index_e[kk] - score_ = outputs_e[kk][max_index_] - express_list.append((max_index_,express_dict[max_index_],score_)) - # print("max_index : {}, score : {:.3f}, express : {}".format(max_index_,score_,express_dict[max_index_])) - # score_e = outputs_e[max_index_e] - # print("score_e : ",score_e) - #----------------- landmarks - pre_ = landmarks_model(image_batch.float()) - - # print(pre_.size()) - output = pre_.cpu().detach().numpy() - # print('output shape : ',output.shape) - # n_array = np.zeros([ops.landmarks_img_size[0],ops.landmarks_img_size[1],3], dtype = np.float) - faceswap_landmarks = [] - output_dict_ = [] - for i in range(len(dets)): - - dict_landmarks,list_e,global_dict_landmarks,face_pts = draw_landmarks(imgs_crop[i],output[i],r_bboxes[i],draw_circle = False) - faceswap_landmarks.append(list_e) - pitch, yaw, roll = draw_contour(img_raw,dict_landmarks,r_bboxes[i],face_pts) - - output_dict_.append({ - "xyxy":(r_bboxes[i][0],r_bboxes[i][1],r_bboxes[i][2],r_bboxes[i][3]), - "score":str(dets[i][4]), - "landmarks":global_dict_landmarks, - "euler_angle":(int(pitch[0]), int(yaw[0]), int(roll[0])), - "express":(float(express_list[i][0]),float(express_list[i][2])), - }) - - - # print('dets :',dets) - #----------------------------------------------------------------------------------- - for i in range(len(dets)): - bbox = dets[i] - min_x = int(bbox[0]) - min_y = int(bbox[1]) - max_x = int(bbox[2]) - max_y = int(bbox[3]) - cv2.rectangle(img_raw, (min_x, min_y), (max_x, max_y), (255, 0, 255), thickness=4) - for k in range(5): - x = int(faceswap_landmarks[i][k+0]) - y = int(faceswap_landmarks[i][k+5]) - # cv2.circle(img_raw,(x,y),5+k*2,(0,0,255),-1) - if draw_bbox: - cv2.circle(img_raw,(x,y),2,(0,0,255),-1) - if draw_bbox: - - cv2.putText(img_raw, "express:{},{:.2f}".format(express_list[i][1],express_list[i][2]), (min_x, min_y-20),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 155, 255),3) - cv2.putText(img_raw, "express:{},{:.2f}".format(express_list[i][1],express_list[i][2]), (min_x, min_y-20),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 10, 10),1) - if draw_bbox: - cv2.putText(img_raw, 'face:'+str(len(dets)), (3,35),cv2.FONT_HERSHEY_DUPLEX, 1.45, (55, 255, 255),5) - cv2.putText(img_raw, 'face:'+str(len(dets)), (3,35),cv2.FONT_HERSHEY_DUPLEX, 1.45, (135, 135, 5),2) - - return output_dict_ -def get_faces_batch_landmarks_plfd(ops,landmarks_model,express_model,dets,img_raw,use_cuda,draw_bbox = True): - # 绘制图像 - image_batch = None - r_bboxes = [] - imgs_crop = [] - for b in dets: - - text = "{:.4f}".format(b[4]) - b = list(map(int, b)) - - r_bbox = refine_face_bbox((b[0],b[1],b[2],b[3]),img_raw.shape) - r_bboxes.append(r_bbox) - img_crop = img_raw[r_bbox[1]:r_bbox[3],r_bbox[0]:r_bbox[2]] - imgs_crop.append(img_crop) - img_ = cv2.resize(img_crop, (112,112), interpolation = cv2.INTER_LINEAR) # INTER_LINEAR INTER_CUBIC - - img_ = img_.astype(np.float32) - img_ = img_/256. - - img_ = img_.transpose(2, 0, 1) - img_ = np.expand_dims(img_,0) - - if image_batch is None: - image_batch = img_ - else: - image_batch = np.concatenate((image_batch,img_),axis=0) - for b in dets: - - text = "{:.4f}".format(b[4]) - b = list(map(int, b)) - if draw_bbox: - cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) - cx = b[0] - cy = b[1] - 3 - if draw_bbox: - cv2.putText(img_raw, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 155, 255),3) - cv2.putText(img_raw, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 10, 10),1) - - # 填充最大 关键点 批次数据 - # if len(dets) < 5: - # im_mask = np.zeros([1,3,ops.landmarks_img_size[0],ops.landmarks_img_size[1]], dtype = np.float32) - # for i in range(ops.max_batch_size-len(dets)): - # if image_batch is None: - # image_batch = im_mask - # else: - # image_batch = np.concatenate((image_batch,im_mask),axis=0) - - image_batch = torch.from_numpy(image_batch).float() - - if use_cuda: - image_batch = image_batch.cuda() # (bs, 3, h, w) - - #----------------- express - pre_e = express_model(image_batch.float()) - - outputs_e = F.softmax(pre_e,dim = 1) - - # print("outputs_e size : ",outputs_e.size()) - - outputs_e = outputs_e.cpu().detach().numpy() - outputs_e = np.array(outputs_e) - # - max_index_e = np.argmax(outputs_e,axis = 1) - # print("max_index_e shape :",max_index_e.shape) - # print("max_index_e:",max_index_e) - # print("outputs_e .shape:",outputs_e.shape) - express_dict = { - 0:"001.anger", - 1:"002.disgust", - 2:"003.fear", - 3:"004.happy", - 4:"005.normal", - 5:"006.sad", - 6:"007.surprised", - } - express_list = [] - for kk in range(max_index_e.shape[0]): - max_index_ = max_index_e[kk] - score_ = outputs_e[kk][max_index_] - express_list.append((max_index_,express_dict[max_index_],score_)) - # print("max_index : {}, score : {:.3f}, express : {}".format(max_index_,score_,express_dict[max_index_])) - # score_e = outputs_e[max_index_e] - # print("score_e : ",score_e) - #----------------------------------------- - _,pre_ = landmarks_model(image_batch.float()) - # print("pre_ : ",pre_) - # print(pre_.size()) - output = pre_.cpu().detach().numpy() - # print('output shape : ',output.shape) - # n_array = np.zeros([ops.landmarks_img_size[0],ops.landmarks_img_size[1],3], dtype = np.float) - faceswap_landmarks = [] - output_dict_ = [] - for i in range(len(dets)): - - dict_landmarks,list_e,global_dict_landmarks,face_pts = draw_landmarks(imgs_crop[i],output[i],r_bboxes[i],draw_circle = False) - faceswap_landmarks.append(list_e) - pitch, yaw, roll = draw_contour(img_raw,dict_landmarks,r_bboxes[i],face_pts) - - output_dict_.append({ - "xyxy":(r_bboxes[i][0],r_bboxes[i][1],r_bboxes[i][2],r_bboxes[i][3]), - "score":str(dets[i][4]), - "landmarks":global_dict_landmarks, - "euler_angle":(int(pitch[0]), int(yaw[0]), int(roll[0])), - "express":(float(express_list[i][0]),float(express_list[i][2])), - }) - - - # print('dets :',dets) - #----------------------------------------------------------------------------------- - for i in range(len(dets)): - bbox = dets[i] - min_x = int(bbox[0]) - min_y = int(bbox[1]) - max_x = int(bbox[2]) - max_y = int(bbox[3]) - cv2.rectangle(img_raw, (min_x, min_y), (max_x, max_y), (255, 0, 255), thickness=2) - for k in range(5): - x = int(faceswap_landmarks[i][k+0]) - y = int(faceswap_landmarks[i][k+5]) - # cv2.circle(img_raw,(x,y),5+k*2,(0,0,255),-1) - if draw_bbox: - cv2.circle(img_raw,(x,y),2,(0,0,255),-1) - if draw_bbox: - - cv2.putText(img_raw, "express:{},{:.2f}".format(express_list[i][1],express_list[i][2]), (min_x, min_y-20),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 155, 255),3) - cv2.putText(img_raw, "express:{},{:.2f}".format(express_list[i][1],express_list[i][2]), (min_x, min_y-20),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 10, 10),1) - - if draw_bbox: - cv2.putText(img_raw, 'face:'+str(len(dets)), (3,35),cv2.FONT_HERSHEY_DUPLEX, 1.45, (55, 255, 255),5) - cv2.putText(img_raw, 'face:'+str(len(dets)), (3,35),cv2.FONT_HERSHEY_DUPLEX, 1.45, (135, 135, 5),2) - - return output_dict_ diff --git a/components/face_detect/utils/datasets.py b/components/face_detect/utils/datasets.py deleted file mode 100644 index 02ebd47c2cb05043fe27f0912c2fc61e1282ea83..0000000000000000000000000000000000000000 --- a/components/face_detect/utils/datasets.py +++ /dev/null @@ -1,395 +0,0 @@ -import glob -import math -import os -import random -import shutil -from pathlib import Path -from PIL import Image -from tqdm import tqdm -import cv2 -import numpy as np -import torch -from torch.utils.data import Dataset -from torch.utils.data import DataLoader - -def xyxy2xywh(x): - # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] - y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) - y[:, 0] = (x[:, 0] + x[:, 2]) / 2 - y[:, 1] = (x[:, 1] + x[:, 3]) / 2 - y[:, 2] = x[:, 2] - x[:, 0] - y[:, 3] = x[:, 3] - x[:, 1] - return y - - -def xywh2xyxy(x): - # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] - y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) - y[:, 0] = x[:, 0] - x[:, 2] / 2 - y[:, 1] = x[:, 1] - x[:, 3] / 2 - y[:, 2] = x[:, 0] + x[:, 2] / 2 - y[:, 3] = x[:, 1] + x[:, 3] / 2 - return y - - -class LoadImages: # for inference - def __init__(self, path, img_size=416): - self.height = img_size - img_formats = ['.jpg', '.jpeg', '.png', '.tif'] - vid_formats = ['.mov', '.avi', '.mp4'] - - files = [] - if os.path.isdir(path): - files = sorted(glob.glob('%s/*.*' % path)) - elif os.path.isfile(path): - files = [path] - - images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats] - videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats] - nI, nV = len(images), len(videos) - - self.files = images + videos - self.nF = nI + nV # number of files - self.video_flag = [False] * nI + [True] * nV - self.mode = 'images' - if any(videos): - self.new_video(videos[0]) # new video - else: - self.cap = None - assert self.nF > 0, 'No images or videos found in ' + path - - def __iter__(self): - self.count = 0 - return self - - def __next__(self): - if self.count == self.nF: - raise StopIteration - path = self.files[self.count] - - if self.video_flag[self.count]: - # Read video - self.mode = 'video' - ret_val, img0 = self.cap.read() - if not ret_val: - self.count += 1 - self.cap.release() - if self.count == self.nF: # last video - raise StopIteration - else: - path = self.files[self.count] - self.new_video(path) - ret_val, img0 = self.cap.read() - - self.frame += 1 - print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='') - - else: - # Read image - self.count += 1 - img0 = cv2.imread(path) # BGR - assert img0 is not None, 'File Not Found ' + path - print('image %g/%g %s: ' % (self.count, self.nF, path), end='') - - # Padded resize - img, _, _, _ = letterbox(img0, height=self.height) - - # Normalize RGB - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB - img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - - # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image - return path, img, img0, self.cap - - def new_video(self, path): - self.frame = 0 - self.cap = cv2.VideoCapture(path) - self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - - def __len__(self): - return self.nF # number of files - - -class LoadWebcam: # for inference - def __init__(self, img_size=416): - self.cam = cv2.VideoCapture(0) - self.height = img_size - - def __iter__(self): - self.count = -1 - return self - - def __next__(self): - self.count += 1 - if cv2.waitKey(1) == 27: # esc to quit - cv2.destroyAllWindows() - raise StopIteration - - # Read image - ret_val, img0 = self.cam.read() - assert ret_val, 'Webcam Error' - img_path = 'webcam_%g.jpg' % self.count - img0 = cv2.flip(img0, 1) # flip left-right - - # Padded resize - img, _, _, _ = letterbox(img0, height=self.height) - - # Normalize RGB - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB - img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - - return img_path, img, img0, self.cam - - def __len__(self): - return 0 - - -class LoadImagesAndLabels(Dataset): # for training/testing - def __init__(self, path, batch_size, img_size=416, augment=True, multi_scale=False): - print('LoadImagesAndLabels init : ',path) - with open(path, 'r') as file: - img_files = file.read().splitlines() - img_files = list(filter(lambda x: len(x) > 0, img_files)) - np.random.shuffle(img_files) # shuffle img_list - print("shuffle image...") - self.img_files = img_files - assert len(self.img_files) > 0, 'No images found in %s' % path - self.img_size = img_size - self.batch_size = batch_size - self.multi_scale = multi_scale - self.augment = augment - self.scale_index = 0 - if self.multi_scale: - self.img_size = img_size # initiate with maximum multi_scale size, in case of out of memory - print("Multi scale images training, init img_size", self.img_size) - else: - print("Fixed scale images, img_size", self.img_size) - self.label_files = [ - x.replace('images', 'labels').replace("JPEGImages", 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt') - for x in self.img_files] - - # print('self.img_files : ',self.img_files[1]) - # print('self.label_files : ',self.label_files[1]) - - def __len__(self): - return len(self.img_files) - - def __getitem__(self, index): - - # if self.multi_scale and (index % self.batch_size == 0) and index != 0: - if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0: - self.img_size = random.choice(range(11, 18)) * 32 - # print("++++++ change img_size, index:", self.img_size, index) - if self.multi_scale: - self.scale_index += 1 - if self.scale_index >= (100*self.batch_size): - self.scale_index = 0 - - - img_path = self.img_files[index] - label_path = self.label_files[index] - - img = cv2.imread(img_path) # BGR - assert img is not None, 'File Not Found ' + img_path - - augment_hsv = random.random() < 0.5 # hsv_aug prob = 0.5 - if self.augment and augment_hsv: - # SV augmentation by 50% - fraction = 0.50 # must be < 1.0 - img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - S = img_hsv[:, :, 1].astype(np.float32) - V = img_hsv[:, :, 2].astype(np.float32) - - a = (random.random() * 2 - 1) * fraction + 1 # a in [-0,5, 1.5] - S *= a - if a > 1: - np.clip(S, None, 255, out=S) - - a = (random.random() * 2 - 1) * fraction + 1 - V *= a - if a > 1: - np.clip(V, None, 255, out=V) - - img_hsv[:, :, 1] = S # .astype(np.uint8) - img_hsv[:, :, 2] = V # .astype(np.uint8) - cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) - - h, w, _ = img.shape - img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment) - - # Load labels - labels = [] - if os.path.isfile(label_path): - with open(label_path, 'r') as file: - lines = file.read().splitlines() - - x = np.array([x.split() for x in lines], dtype=np.float32) - if x.size > 0: - # Normalized xywh to pixel xyxy format - labels = x.copy() - labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw - labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh - labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw - labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh - - # Augment image and labels - if self.augment: - img, labels = random_affine(img, labels, degrees=(-10, 10), translate=(0.10, 0.10), scale=(0.9, 1.1)) - - nL = len(labels) # number of labels - if nL: - # convert xyxy to xywh - labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ,且 归一化 - - if self.augment: - # random left-right flip - lr_flip = True - if lr_flip and random.random() > 0.5: - img = np.fliplr(img) - if nL: - labels[:, 1] = 1 - labels[:, 1] - - # random up-down flip - ud_flip = False - if ud_flip and random.random() > 0.5: - img = np.flipud(img) - if nL: - labels[:, 2] = 1 - labels[:, 2] - - labels_out = torch.zeros((nL, 6))# 加了 一个 batch size - if nL: - labels_out[:, 1:] = torch.from_numpy(labels) - - # Normalize - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 - img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - - return torch.from_numpy(img), labels_out, img_path, (h, w) - - @staticmethod - def collate_fn(batch): - img, label, path, hw = list(zip(*batch)) # transposed - for i, l in enumerate(label): - l[:, 0] = i # 获取 物体的 归属于 图片 的 index - return torch.stack(img, 0), torch.cat(label, 0), path, hw - - -def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)): - # Resize a rectangular image to a padded square - shape = img.shape[:2] # shape = [height, width] - ratio = float(height) / max(shape) # ratio = old / new - new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) - dw = (height - new_shape[0]) / 2 # width padding - dh = (height - new_shape[1]) / 2 # height padding - top, bottom = round(dh - 0.1), round(dh + 0.1) - left, right = round(dw - 0.1), round(dw + 0.1) - # resize img - if augment: - interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, - None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, - cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]) - if interpolation is None: - img = cv2.resize(img, new_shape) - else: - img = cv2.resize(img, new_shape, interpolation=interpolation) - else: - img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST) - # print("resize time:",time.time()-s1) - - img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square - return img, ratio, dw, dh - - -def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), - borderValue=(127.5, 127.5, 127.5)): - # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) - # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 - - if targets is None: - targets = [] - border = 0 # width of added border (optional) - height = max(img.shape[0], img.shape[1]) + border * 2 - - # Rotation and Scale - R = np.eye(3) - a = random.random() * (degrees[1] - degrees[0]) + degrees[0] - # a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations - s = random.random() * (scale[1] - scale[0]) + scale[0] - R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) - - # Translation - T = np.eye(3) - T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels) - T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels) - - # Shear - S = np.eye(3) - S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg) - S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg) - - M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! - imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR, - borderValue=borderValue) # BGR order borderValue - - # Return warped points also - if len(targets) > 0: - n = targets.shape[0] - points = targets[:, 1:5].copy() - area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1]) - - # warp points - xy = np.ones((n * 4, 3)) - xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - xy = (xy @ M.T)[:, :2].reshape(n, 8) - - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - - # apply angle-based reduction of bounding boxes - radians = a * math.pi / 180 - reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 - x = (xy[:, 2] + xy[:, 0]) / 2 - y = (xy[:, 3] + xy[:, 1]) / 2 - w = (xy[:, 2] - xy[:, 0]) * reduction - h = (xy[:, 3] - xy[:, 1]) * reduction - xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T - - # reject warped points outside of image - np.clip(xy, 0, height, out=xy) - w = xy[:, 2] - xy[:, 0] - h = xy[:, 3] - xy[:, 1] - area = w * h - ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) - i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) - - targets = targets[i] - targets[:, 1:5] = xy[i] - - return imw, targets - - -def convert_images2bmp(): - # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s - for path in ['../coco/images/val2014/', '../coco/images/train2014/']: - folder = os.sep + Path(path).name - output = path.replace(folder, folder + 'bmp') - if os.path.exists(output): - shutil.rmtree(output) # delete output folder - os.makedirs(output) # make new output folder - - for f in tqdm(glob.glob('%s*.jpg' % path)): - save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp') - cv2.imwrite(save_name, cv2.imread(f)) - - for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']: - with open(label_path, 'r') as file: - lines = file.read() - lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace( - '/Users/glennjocher/PycharmProjects/', '../') - with open(label_path.replace('5k', '5k_bmp'), 'w') as file: - file.write(lines) diff --git a/components/face_detect/utils/utils.py b/components/face_detect/utils/utils.py deleted file mode 100644 index fd529c54a7e3205b2fa42cee288bc7b295af17de..0000000000000000000000000000000000000000 --- a/components/face_detect/utils/utils.py +++ /dev/null @@ -1,438 +0,0 @@ -import glob -import random -import time -from collections import defaultdict - -import cv2 -import numpy as np -import torch -import torch.nn as nn -from dp_models.light_pose.modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS - -# Set printoptions -torch.set_printoptions(linewidth=1320, precision=5, profile='long') -np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 - -# Prevent OpenCV from multithreading (to use PyTorch DataLoader) -cv2.setNumThreads(0) - -def float3(x): # format floats to 3 decimals - return float(format(x, '.3f')) - -def init_seeds(seed=0): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - if torch.cuda.is_available(): - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - else: - torch.manual_seed(seed) - torch.manual_seed_all(seed) - - -def load_classes(path): - # Loads class labels at 'path' - fp = open(path, 'r') - names = fp.read().split('\n') - return list(filter(None, names)) # filter removes empty strings (such as last line) - - -def model_info(model): - # Plots a line-by-line description of a PyTorch model - n_p = sum(x.numel() for x in model.parameters()) # number parameters - n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients - print('\n%5s %60s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) - for i, (name, p) in enumerate(model.named_parameters()): - # name = name.replace('module_list.', '') - print('%5g %60s %9s %12g %20s %10.3g %10.3g' % ( - i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) - print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g)) - - - - - -def weights_init_normal(m): - classname = m.__class__.__name__ - if classname.find('Conv') != -1: - torch.nn.init.normal_(m.weight.data, 0.0, 0.03) - elif classname.find('BatchNorm2d') != -1: - torch.nn.init.normal_(m.weight.data, 1.0, 0.03) - torch.nn.init.constant_(m.bias.data, 0.0) - - -def xyxy2xywh(x): - # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] - y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) - y[:, 0] = (x[:, 0] + x[:, 2]) / 2 - y[:, 1] = (x[:, 1] + x[:, 3]) / 2 - y[:, 2] = x[:, 2] - x[:, 0] - y[:, 3] = x[:, 3] - x[:, 1] - return y - - -def xywh2xyxy(x): - # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] - y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) - y[:, 0] = x[:, 0] - x[:, 2] / 2 - y[:, 1] = x[:, 1] - x[:, 3] / 2 - y[:, 2] = x[:, 0] + x[:, 2] / 2 - y[:, 3] = x[:, 1] + x[:, 3] / 2 - return y - -def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸 - # Rescale x1, y1, x2, y2 from 416 to image size - # print('coords : ',coords) - # print('img0_shape : ',img0_shape) - gain = float(img_size) / max(img0_shape) # gain = old / new - # print('gain : ',gain) - pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding - pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding - # print('pad_xpad_y : ',pad_x,pad_y) - coords[:, [0, 2]] -= pad_x - coords[:, [1, 3]] -= pad_y - coords[:, :4] /= gain - coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数 - return coords - - -def ap_per_class(tp, conf, pred_cls, target_cls): - """ Compute the average precision, given the recall and precision curves. - Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. - # Arguments - tp: True positives (list). - conf: Objectness value from 0-1 (list). - pred_cls: Predicted object classes (list). - target_cls: True object classes (list). - # Returns - The average precision as computed in py-faster-rcnn. - """ - - # Sort by objectness - i = np.argsort(-conf) - tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] - - # Find unique classes - unique_classes = np.unique(target_cls) - - # Create Precision-Recall curve and compute AP for each class - ap, p, r = [], [], [] - for c in unique_classes: - i = pred_cls == c - n_gt = (target_cls == c).sum() # Number of ground truth objects - n_p = i.sum() # Number of predicted objects - - if n_p == 0 and n_gt == 0: - continue - elif n_p == 0 or n_gt == 0: - ap.append(0) - r.append(0) - p.append(0) - else: - # Accumulate FPs and TPs - fpc = (1 - tp[i]).cumsum() - tpc = (tp[i]).cumsum() - - # Recall - recall_curve = tpc / (n_gt + 1e-16) - r.append(recall_curve[-1]) - - # Precision - precision_curve = tpc / (tpc + fpc) - p.append(precision_curve[-1]) - - # AP from recall-precision curve - ap.append(compute_ap(recall_curve, precision_curve)) - - # Plot - # plt.plot(recall_curve, precision_curve) - - # Compute F1 score (harmonic mean of precision and recall) - p, r, ap = np.array(p), np.array(r), np.array(ap) - f1 = 2 * p * r / (p + r + 1e-16) - - return p, r, ap, f1, unique_classes.astype('int32') - - -def compute_ap(recall, precision): - """ Compute the average precision, given the recall and precision curves. - Source: https://github.com/rbgirshick/py-faster-rcnn. - # Arguments - recall: The recall curve (list). - precision: The precision curve (list). - # Returns - The average precision as computed in py-faster-rcnn. - """ - # correct AP calculation - # first append sentinel values at the end - - mrec = np.concatenate(([0.], recall, [1.])) - mpre = np.concatenate(([0.], precision, [0.])) - - # compute the precision envelope - for i in range(mpre.size - 1, 0, -1): - mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - i = np.where(mrec[1:] != mrec[:-1])[0] - - # and sum (\Delta recall) * prec - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) - return ap - - -def bbox_iou(box1, box2, x1y1x2y2=True): - # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 - box2 = box2.t() - - # Get the coordinates of bounding boxes - if x1y1x2y2: - # x1, y1, x2, y2 = box1 - b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] - b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] - else: - # x, y, w, h = box1 - b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 - b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 - b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 - b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 - - # Intersection area - inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ - (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) - - # Union Area - union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \ - (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area - - return inter_area / union_area # iou - - -def wh_iou(box1, box2): - - box2 = box2.t() - - # w, h = box1 - w1, h1 = box1[0], box1[1] - w2, h2 = box2[0], box2[1] - - # Intersection area - inter_area = torch.min(w1, w2) * torch.min(h1, h2) - - # Union Area - union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area - - return inter_area / union_area # iou - - -def compute_loss(p, targets): # predictions, targets - FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor - lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) # losses 初始化 为 0 - txy, twh, tcls, indices = targets - MSE = nn.MSELoss() - CE = nn.CrossEntropyLoss() - BCE = nn.BCEWithLogitsLoss()# 多标签分类时 使用 如 [1,1,0], - - # Compute losses - for i, pi0 in enumerate(p): # layer i predictions, i - b, a, gj, gi = indices[i] # image_idx, anchor_idx, gridx, gridy - - # print(i,') b, a, gj, gi : ') - # print('b', b) - # print('a', a) - # print('gj', gj) - # print('gi', gi) - - tconf = torch.zeros_like(pi0[..., 0]) # conf - - # print('tconf: ',tconf.size()) - # Compute losses - k = 1 # nT / bs - if len(b) > 0: - pi = pi0[b, a, gj, gi] # predictions closest to anchors - tconf[b, a, gj, gi] = 1 # conf - - lxy += (k * 8) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss - lwh += (k * 4) * MSE(pi[..., 2:4], twh[i]) # wh loss - lcls += (k * 1) * CE(pi[..., 5:], tcls[i]) # class_conf loss - - lconf += (k * 64) * BCE(pi0[..., 4], tconf) # obj_conf loss - loss = lxy + lwh + lconf + lcls - - # Add to dictionary - d = defaultdict(float) - losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()] - for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses): - d[name] = x - - return loss, d - - -def build_targets(model, targets): - # targets = [image, class, x, y, w, h] - if isinstance(model, nn.parallel.DistributedDataParallel): - model = model.module - - txy, twh, tcls, indices = [], [], [], [] - for i, layer in enumerate(get_yolo_layers(model)):# 遍历 3 个 yolo layer - # print(i,'layer ',model.module_list[layer]) - layer = model.module_list[layer][0] - - # iou of targets-anchors - gwh = targets[:, 4:6] * layer.nG # 以 grid 为单位的 wh - iou = [wh_iou(x, gwh) for x in layer.anchor_vec] - iou, a = torch.stack(iou, 0).max(0) # best iou and anchor - - # reject below threshold ious (OPTIONAL, increases P, lowers R) - reject = True - if reject: - j = iou > 0.10 - t, a, gwh = targets[j], a[j], gwh[j] - else: - t = targets - - # Indices - b, c = t[:, :2].long().t() # target image, class - gxy = t[:, 2:4] * layer.nG - gi, gj = gxy.long().t() # grid_i, grid_j - indices.append((b, a, gj, gi)) # img_index , anchor_index , grid_x , grid_y - - # print('b, a, gj, gi : ') - # print('b', b) - # print('a', a) - # print('gj', gj) - # print('gi', gi) - # print('class c',c) - - # XY coordinates - txy.append(gxy - gxy.floor())#转化为grid相对坐标 - - # Width and height - twh.append(torch.log(gwh / layer.anchor_vec[a])) # yolo method 对数 - # twh.append(torch.sqrt(gwh / layer.anchor_vec[a]) / 2) # power method - - # Class - tcls.append(c) - # try: - # print('c.max,layer.nC: ',c.max().item() ,layer.nC) - # except: - # pass - if c.shape[0]: - assert c.max().item() <= layer.nC, 'Target classes exceed model classes' - - return txy, twh, tcls, indices - - -# @profile -def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): - """ - Removes detections with lower object confidence score than 'conf_thres' - Non-Maximum Suppression to further filter detections. - Returns detections with shape: - (x1, y1, x2, y2, object_conf, class_conf, class) - """ - - min_wh = 2 # (pixels) minimum box width and height - - output = [None] * len(prediction) - for image_i, pred in enumerate(prediction): - # Experiment: Prior class size rejection - # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] - # a = w * h # area - # ar = w / (h + 1e-16) # aspect ratio - # n = len(w) - # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar) - # shape_likelihood = np.zeros((n, 60), dtype=np.float32) - # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1) - # from scipy.stats import multivariate_normal - # for c in range(60): - # shape_likelihood[:, c] = - # multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2]) - - # Filter out confidence scores below threshold - class_conf, class_pred = pred[:, 5:].max(1) # max class_conf, index - pred[:, 4] *= class_conf # finall conf = obj_conf * class_conf - - i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh) - # s2=time.time() - pred2 = pred[i] - # print("++++++pred2 = pred[i]",time.time()-s2, pred2) - - # If none are remaining => process next image - if len(pred2) == 0: - continue - - # Select predicted classes - class_conf = class_conf[i] - class_pred = class_pred[i].unsqueeze(1).float() - - # Box (center x, center y, width, height) to (x1, y1, x2, y2) - pred2[:, :4] = xywh2xyxy(pred2[:, :4]) - # pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551 - - # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred) - pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1) - - # Get detections sorted by decreasing confidence scores - pred2 = pred2[(-pred2[:, 4]).argsort()] - - det_max = [] - nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental) - for c in pred2[:, -1].unique(): - dc = pred2[pred2[:, -1] == c] # select class c - dc = dc[:min(len(dc), 100)] # limit to first 100 boxes - - # Non-maximum suppression - if nms_style == 'OR': # default - # METHOD1 - # ind = list(range(len(dc))) - # while len(ind): - # j = ind[0] - # det_max.append(dc[j:j + 1]) # save highest conf detection - # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero() - # [ind.pop(i) for i in reversed(reject)] - - # METHOD2 - while dc.shape[0]: - det_max.append(dc[:1]) # save highest conf detection - if len(dc) == 1: # Stop if we're at the last detection - break - iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes - dc = dc[1:][iou < nms_thres] # remove ious > threshold - - elif nms_style == 'AND': # requires overlap, single boxes erased - while len(dc) > 1: - iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes - if iou.max() > 0.5: - det_max.append(dc[:1]) - dc = dc[1:][iou < nms_thres] # remove ious > threshold - - elif nms_style == 'MERGE': # weighted mixture box - while len(dc): - i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes - weights = dc[i, 4:5] - dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum() - det_max.append(dc[:1]) - dc = dc[i == 0] - - if len(det_max): - det_max = torch.cat(det_max) # concatenate - output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort - return output - - -def get_yolo_layers(model): - yolo_layer_index = [] - for index, l in enumerate(model.module_list): - try: - a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG - # print("---"*50) - # print(l, index) - yolo_layer_index.append(index) - except: - pass - assert len(yolo_layer_index) > 0, "can not find yolo layer" - return yolo_layer_index diff --git a/lib/wyw2s_lib/cfg/wyw2s.cfg b/lib/wyw2s_lib/cfg/wyw2s.cfg index 869466b16601275b58329db725fb4b4f22e652ee..ce7b5ef6c7c91959cc0055d698607f0f1ff6f326 100644 --- a/lib/wyw2s_lib/cfg/wyw2s.cfg +++ b/lib/wyw2s_lib/cfg/wyw2s.cfg @@ -1 +1,16 @@ YouWantToSee=BradPitt + +detect_model_path=./wyw2s_models/face_yolo_416-20210418.pt +detect_model_arch=yolo +detect_input_size = 416 +yolo_anchor_scale=1. +detect_conf_thres=0.4 +detect_nms_thres=0.45 + +face_verify_backbone_path=./wyw2s_models/face_verify-model_ir_se-50.pth +facebank_path=./wyw2s_models/facebank +face_verify_threshold=1.2 + +face_multitask_model_path=./wyw2s_models/face_multitask-resnet_50_imgsize-256-20210411.pth + +face_euler_model_path=./wyw2s_models/euler_angle-resnet_18_imgsize_256.pth diff --git a/lib/wyw2s_lib/doc/README.md b/lib/wyw2s_lib/doc/README.md index 5a705262a83e10ea63ceffae581d0a46bc820828..c2db3c06aff0c997bcf2d06ee9687fb665d14519 100644 --- a/lib/wyw2s_lib/doc/README.md +++ b/lib/wyw2s_lib/doc/README.md @@ -6,11 +6,12 @@ * 场景:将视频中目标人物的相关视频进行裁剪。 ## 项目配置 -### 1、软件 +### 1、软件 * Python 3.7 * PyTorch >= 1.5.1 * opencv-python * moviepy +* shutil ## 相关项目 ### 1、脸部检测项目(yolo_v3) @@ -29,17 +30,54 @@ euler_angle-resnet_18_imgsize_256.pth # 人脸姿态角 pitch yaw roll 模型 face_multitask-resnet_50_imgsize-256-20210411.pth # 性别、年龄、关键点 模型 face_verify-model_ir_se-50.pth # 人脸识别特征抽取模型 + +facebank/facebank.pth # 人脸匹配资源库特征向量 + +facebank/names.npy # 人脸匹配资源库 face id,示例中的face id为人名字 +``` + +* 目前示例提供的人脸资源库的具体face id 如下: +``` +['AngelinaJolie' 'AnneHathaway' 'BradPitt' 'JenniferAniston' + 'JohnnyDepp' 'JudeLaw' 'NicoleKidman' 'ScarlettJohansson' 'TomCruise'] ``` ## 项目使用方法 ### 1、下载项目预训练模型 package 。 -### 2、构建人脸匹配资源库,相关脚本 [make_facebank.py](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/make_facebank_tools/make_facebank.py) +### 2、构建人脸匹配资源库(项目中已经生成了示例匹配库,如果不需要建立自己的人脸资源库此步骤可以跳过),相关脚本 [make_facebank.py](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/make_facebank_tools/make_facebank.py) ### 3、打开配置文件 lib/wyw2s_lib/cfg/[wyw2s.cfg](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/cfg/wyw2s.cfg) 进行相关参数配置,具体配置参数如下,请仔细阅读。 ``` -YouWantToSee=BradPitt +YouWantToSee=BradPitt # 你需要裁剪的 face id ,示例为人名字,需要与facebank/names.npy 和 facebank/facebank.pth 信息匹配 + +detect_model_path=./wyw2s_models/face_yolo_416-20210418.pt # 人脸检测模型 +detect_model_arch=yolo # 模型类型 +detect_input_size = 416 # 模型的图片输入尺寸 +yolo_anchor_scale=1. # anchor 的缩放系数,默认 1 +detect_conf_thres=0.4 # 人脸检测置信度,高于该置信度进行输出 +detect_nms_thres=0.45 # 检测的nms阈值 + +face_verify_backbone_path=./wyw2s_models/face_verify-model_ir_se-50.pth # 人脸识别特征抽取模型地址 +facebank_path=./wyw2s_models/facebank # 人脸资源库地址 +face_verify_threshold=1.2 # 人脸匹配阈值设定,低于该设定阈值认为匹配成功 + +face_multitask_model_path=./wyw2s_models/face_multitask-resnet_50_imgsize-256-20210411.pth # 人脸多任务(性别、年龄、关键点)模型地址 + +face_euler_model_path=./wyw2s_models/euler_angle-resnet_18_imgsize_256.pth # 模型姿态角(航向角、俯仰角、翻滚角)回归模型地址 +``` + +### 4、下载示例视频 +* [示例视频 下载地址(百度网盘 Password: jaqh )](https://pan.baidu.com/s/1CSbfA1nHDhfCyt4_2NSRQg) +* 或是用同学自己的示例视频 + +### 5、运行 "Who You Want To See" 项目 +* 打开main.py,做如下相关参数设置: +``` +APP_P = "wyw2s" # 选择不同项目 +cfg_file = "./lib/wyw2s_lib/cfg/wyw2s.cfg" # 选择配置文件 +main_wyw2s(video_path = "./video/f1.mp4",cfg_file = cfg_file)# 设置视频路径,加载 who you want 2 see 应用 ``` -### 4、根目录下运行命令: python main.py +* 根目录下运行命令: python main.py ## 联系方式 (Contact) * E-mails: 305141918@qq.com diff --git a/main.py b/main.py index c466e39e33427794cb9725c219540833aa9c4e3f..9fafd5e321b33cbc56b0bcbf965747b176d38d5d 100644 --- a/main.py +++ b/main.py @@ -22,8 +22,8 @@ import sys sys.path.append("./components/") # 添加模型组件路径 from applications.handpose_local_app import main_handpose_x #加载 handpose 应用 -from applications.wyw2s_local_app import main_wyw2s #加载 whoyouwant2see 应用 - +from applications.wyw2s_local_app import main_wyw2s #加载 who you want 2 see 应用 +# from applications.video_analysis_app import main_video_analysis #加载 video_analysis 应用 def demo_logo(): print("\n/*********************************/") print("/---------------------------------/\n") @@ -45,6 +45,8 @@ if __name__ == '__main__': elif APP_P == "wyw2s": # 基于人脸识别的视频剪辑 cfg_file = "./lib/wyw2s_lib/cfg/wyw2s.cfg" + main_wyw2s(video_path = "./video/f1.mp4",cfg_file = cfg_file)#加载 who you want 2 see 应用 - main_wyw2s(cfg_file,video_path = "./video/f1.mp4")#加载 handpose 应用 + # elif APP_P == "video_ana": # 基于人脸识别的视频剪辑 + # main_video_analysis(video_path = "./video/f3.mp4")#加载 who you want 2 see 应用 print(" well done ~")