update wyw2s

54a7374d · DataBall · 13e46365 · 13e46365 · 13e46365 · 13e46365
6 changed file
--- a/components/face_detect/utils/common_utils.py
+++ b/components/face_detect/utils/common_utils.py
-#-*-coding:utf-8-*-
-# date:2020-04-11
-# Author: Eric.Lee
-
-import os
-import shutil
-import cv2
-import numpy as np
-import json
-import torch
-from dp_models.faceboxes.config import cfg
-from dp_models.faceboxes.layers.functions.prior_box import PriorBox
-from dp_models.faceboxes.utils.box_utils import decode
-from dp_models.faceboxes.headpose.pose import *
-import torch.nn.functional as F
-
-def mkdir_(path, flag_rm=False):
-    if os.path.exists(path):
-        if flag_rm == True:
-            shutil.rmtree(path)
-            os.mkdir(path)
-            print('remove {} done ~ '.format(path))
-    else:
-        os.mkdir(path)
-
-def plot_box(bbox, img, color=None, label=None, line_thickness=None):
-    tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1
-    color = color or [random.randint(0, 255) for _ in range(3)]
-    c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
-    cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox
-    if label:
-        tf = max(tl - 2, 1)
-        t_size = cv2.getTextSize(label, 0, fontScale=tl / 4, thickness=tf)[0] # label size
-        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox
-        cv2.rectangle(img, c1, c2, color, -1)  # label 矩形填充
-        # 文本绘制
-        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA)
-
-class JSON_Encoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, np.integer):
-            return int(obj)
-        elif isinstance(obj, np.floating):
-            return float(obj)
-        elif isinstance(obj, np.ndarray):
-            return obj.tolist()
-        else:
-            return super(JSON_Encoder, self).default(obj)
-
-def draw_landmarks(img,output,r_bboxes,draw_circle):
-    img_width = img.shape[1]
-    img_height = img.shape[0]
-    dict_landmarks = {}
-    global_dict_landmarks = {} # 全局坐标系坐标
-    faceswap_list = []
-
-    face_pts = []
-
-    for i in range(int(output.shape[0]/2)):
-        x = output[i*2+0]*float(img_width)
-        y = output[i*2+1]*float(img_height)
-
-        face_pts .append([x+r_bboxes[0],y+r_bboxes[1]])
-
-        if i ==33 or i == 46 or i == 96 or i == 97 or i == 54 or i == 76 or i == 82:
-            faceswap_list.append((x+r_bboxes[0],y+r_bboxes[1]))
-            # cv2.circle(img, (int(x),int(y)), 8, (0,255,255),-1)
-        #
-        if 41>= i >=33:
-            if 'left_eyebrow' not in dict_landmarks.keys():
-                dict_landmarks['left_eyebrow'] = []
-                global_dict_landmarks['left_eyebrow'] = []
-            dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)])
-            global_dict_landmarks['left_eyebrow'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-
-
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
-        elif 50>= i >=42:
-            if 'right_eyebrow' not in dict_landmarks.keys():
-                dict_landmarks['right_eyebrow'] = []
-                global_dict_landmarks['right_eyebrow'] = []
-            dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)])
-            global_dict_landmarks['right_eyebrow'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
-        elif 67>= i >=60:
-            if 'left_eye' not in dict_landmarks.keys():
-                dict_landmarks['left_eye'] = []
-                global_dict_landmarks['left_eye'] = []
-            dict_landmarks['left_eye'].append([int(x),int(y),(255,55,255)])
-            global_dict_landmarks['left_eye'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
-        elif 75>= i >=68:
-            if 'right_eye' not in dict_landmarks.keys():
-                dict_landmarks['right_eye'] = []
-                global_dict_landmarks['right_eye'] = []
-            dict_landmarks['right_eye'].append([int(x),int(y),(255,55,255)])
-            global_dict_landmarks['right_eye'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
-        elif 97>= i >=96:
-            if 'eye_center' not in dict_landmarks.keys():
-                global_dict_landmarks['eye_center'] = []
-            global_dict_landmarks['eye_center'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-
-            cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1)
-        elif 54>= i >=51:
-            if 'bridge_nose' not in dict_landmarks.keys():
-                dict_landmarks['bridge_nose'] = []
-                global_dict_landmarks['bridge_nose'] = []
-            dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)])
-            global_dict_landmarks['bridge_nose'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1)
-        elif 32>= i >=0:
-            if 'basin' not in dict_landmarks.keys():
-                dict_landmarks['basin'] = []
-                global_dict_landmarks['basin'] = []
-            dict_landmarks['basin'].append([int(x),int(y),(255,30,30)])
-            global_dict_landmarks['basin'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1)
-        elif 59>= i >=55:
-            if 'wing_nose' not in dict_landmarks.keys():
-                dict_landmarks['wing_nose'] = []
-                global_dict_landmarks['wing_nose'] = []
-            dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)])
-            global_dict_landmarks['wing_nose'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1)
-        elif 87>= i >=76:
-            if 'out_lip' not in dict_landmarks.keys():
-                dict_landmarks['out_lip'] = []
-                global_dict_landmarks['out_lip'] = []
-            dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)])
-            global_dict_landmarks['out_lip'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1)
-        elif 95>= i >=88:
-            if 'in_lip' not in dict_landmarks.keys():
-                dict_landmarks['in_lip'] = []
-                global_dict_landmarks['in_lip'] = []
-            dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)])
-            global_dict_landmarks['in_lip'].append([int(x+r_bboxes[0]),int(y+r_bboxes[1])])
-            if draw_circle:
-                cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1)
-        # else:
-        #     if draw_circle:
-        #         cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
-
-    faceswap_list_e = []
-
-    for i in range(5):
-        faceswap_list_e.append(faceswap_list[i][0])
-    for i in range(5):
-        faceswap_list_e.append(faceswap_list[i][1])
-
-
-    return dict_landmarks,faceswap_list_e,global_dict_landmarks,face_pts
-
-def draw_contour(image,dict,r_bbox,face_pts):
-    x0 = r_bbox[0]# 全图偏置
-    y0 = r_bbox[1]
-
-    #------------------------------------------
-    face_ola_pts = []
-    face_ola_pts.append(face_pts[33])
-    face_ola_pts.append(face_pts[38])
-    face_ola_pts.append(face_pts[50])
-    face_ola_pts.append(face_pts[46])
-
-    face_ola_pts.append(face_pts[60])
-    face_ola_pts.append(face_pts[64])
-    face_ola_pts.append(face_pts[68])
-    face_ola_pts.append(face_pts[72])
-
-    face_ola_pts.append(face_pts[51])
-    face_ola_pts.append(face_pts[55])
-    face_ola_pts.append(face_pts[59])
-
-    face_ola_pts.append(face_pts[53])
-    face_ola_pts.append(face_pts[57])
-
-    pts_num = len(face_ola_pts)
-    reprojectdst, euler_angle = get_head_pose(np.array(face_ola_pts).reshape((pts_num,2)),image,vis = False)
-    pitch, yaw, roll = euler_angle
-
-    for key in dict.keys():
-        # print(key)
-        _,_,color = dict[key][0]
-
-        if 'left_eye' == key:
-            eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))])
-            eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))])
-            cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,255,55),-1)
-        if 'right_eye' == key:
-            eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))])
-            eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))])
-            cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,215,25),-1)
-
-        if 'basin' == key or 'wing_nose' == key:
-            pts = np.array([[dict[key][i][0]+x0,dict[key][i][1]+y0] for i in range(len(dict[key]))],np.int32)
-            # print(pts)
-            cv2.polylines(image,[pts],False,color,thickness = 2)
-
-        else:
-            points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32)
-            for i in range(len(dict[key])):
-                x,y,_ = dict[key][i]
-                points_array[0,i,0] = x+x0
-                points_array[0,i,1] = y+y0
-
-            # cv2.fillPoly(image, points_array, color)
-            cv2.drawContours(image,points_array,-1,color,thickness=2)
-    return (pitch, yaw, roll)
-
-import random
-rgbs = []
-for j in range(100):
-    rgb = (random.randint(0,255),random.randint(0,255),random.randint(0,255))
-    rgbs.append(rgb)
-
-def draw_global_contour(image,dict):
-
-
-    x0,y0 = 0,0
-    idx = 0
-    for key in dict.keys():
-        idx += 1
-        # print(key)
-        # _,_ = dict[key][0]
-
-        if 'left_eye' == key:
-            eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))])
-            eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))])
-            cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,255,55),-1)
-        if 'right_eye' == key:
-            eye_x = np.mean([dict[key][i][0]+x0 for i in range(len(dict[key]))])
-            eye_y = np.mean([dict[key][i][1]+y0 for i in range(len(dict[key]))])
-            cv2.circle(image, (int(eye_x),int(eye_y)), 3, (255,215,25),-1)
-
-        if 'basin' == key or 'wing_nose' == key:
-            pts = np.array([[dict[key][i][0]+x0,dict[key][i][1]+y0] for i in range(len(dict[key]))],np.int32)
-            # print(pts)
-            cv2.polylines(image,[pts],False,rgbs[idx],thickness = 2)
-
-        else:
-            points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32)
-            for i in range(len(dict[key])):
-                x,y = dict[key][i]
-                points_array[0,i,0] = x+x0
-                points_array[0,i,1] = y+y0
-
-            # cv2.fillPoly(image, points_array, color)
-            cv2.drawContours(image,points_array,-1,rgbs[idx],thickness=2)
-
-def refine_face_bbox(bbox,img_shape):
-    height,width,_ = img_shape
-
-    x1,y1,x2,y2 = bbox
-
-    expand_w = (x2-x1)
-    expand_h = (y2-y1)
-
-    x1 -= expand_w*0.06
-    y1 += expand_h*0.15
-    x2 += expand_w*0.06
-    y2 += expand_h*0.03
-
-    x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)
-
-    x1 = int(max(0,x1))
-    y1 = int(max(0,y1))
-    x2 = int(min(x2,width-1))
-    y2 = int(min(y2,height-1))
-
-    return (x1,y1,x2,y2)
-def py_cpu_nms(dets, thresh):
-    """Pure Python NMS baseline."""
-    x1 = dets[:, 0]
-    y1 = dets[:, 1]
-    x2 = dets[:, 2]
-    y2 = dets[:, 3]
-    scores = dets[:, 4]
-
-    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
-    order = scores.argsort()[::-1]
-
-    keep = []
-    while order.size > 0:
-        i = order[0]
-        keep.append(i)
-        xx1 = np.maximum(x1[i], x1[order[1:]])
-        yy1 = np.maximum(y1[i], y1[order[1:]])
-        xx2 = np.minimum(x2[i], x2[order[1:]])
-        yy2 = np.minimum(y2[i], y2[order[1:]])
-
-        w = np.maximum(0.0, xx2 - xx1 + 1)
-        h = np.maximum(0.0, yy2 - yy1 + 1)
-        inter = w * h
-        ovr = inter / (areas[i] + areas[order[1:]] - inter)
-
-        inds = np.where(ovr <= thresh)[0]
-        order = order[inds + 1]
-
-    return keep
-
-def check_keys(model, pretrained_state_dict):
-    ckpt_keys = set(pretrained_state_dict.keys())
-    model_keys = set(model.state_dict().keys())
-    used_pretrained_keys = model_keys & ckpt_keys
-    unused_pretrained_keys = ckpt_keys - model_keys
-    missing_keys = model_keys - ckpt_keys
-    # print('Missing keys:{}'.format(len(missing_keys)))
-    # print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
-    # print('Used keys:{}'.format(len(used_pretrained_keys)))
-    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
-    return True
-
-def remove_prefix(state_dict, prefix):
-    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
-    # print('remove prefix \'{}\''.format(prefix))
-    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
-    return {f(key): value for key, value in state_dict.items()}
-
-
-def load_model(model, pretrained_path, load_to_cpu):
-    # print('Loading pretrained model from {}'.format(pretrained_path))
-    if load_to_cpu:
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
-    else:
-        device = torch.cuda.current_device()
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
-    if "state_dict" in pretrained_dict.keys():
-        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
-    else:
-        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
-    check_keys(model, pretrained_dict)
-    model.load_state_dict(pretrained_dict, strict=False)
-    return model
-
-
-def detect_faces(ops,detect_model,img_raw,device):
-    resize = 1
-    img = np.float32(img_raw)
-    if resize != 1:
-        img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
-    im_height, im_width, _ = img.shape
-    scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
-    img -= (104, 117, 123)
-    img = img.transpose(2, 0, 1)
-    img = torch.from_numpy(img).unsqueeze(0)
-    img = img.to(device)
-    scale = scale.to(device)
-
-
-    loc, conf = detect_model(img)  # forward pass
-
-    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
-    priors = priorbox.forward()
-    priors = priors.to(device)
-    prior_data = priors.data
-    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
-    boxes = boxes * scale / resize
-    boxes = boxes.cpu().numpy()
-    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
-
-    # ignore low scores
-    inds = np.where(scores > ops.confidence_threshold)[0]
-    boxes = boxes[inds]
-    scores = scores[inds]
-
-    # keep top-K before NMS
-    order = scores.argsort()[::-1][:ops.top_k]
-    boxes = boxes[order]
-    scores = scores[order]
-
-    # do NMS
-    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
-    #keep = py_cpu_nms(dets, ops.nms_threshold)
-    # keep = nms(dets, ops.nms_threshold,force_cpu=True)
-    keep = py_cpu_nms(dets, ops.nms_threshold)
-    dets = dets[keep, :]
-
-    # keep top-K faster NMS
-    dets = dets[:ops.keep_top_k, :]
-
-    return dets
-
-
-
-
-def get_faces_batch_landmarks(ops,landmarks_model,express_model,dets,img_raw,use_cuda,draw_bbox = True):
-    # 绘制图像
-    image_batch = None
-    r_bboxes = []
-    imgs_crop = []
-    for b in dets:
-
-        text = "{:.4f}".format(b[4])
-        b = list(map(int, b))
-
-        r_bbox = refine_face_bbox((b[0],b[1],b[2],b[3]),img_raw.shape)
-        r_bboxes.append(r_bbox)
-        img_crop = img_raw[r_bbox[1]:r_bbox[3],r_bbox[0]:r_bbox[2]]
-        imgs_crop.append(img_crop)
-        img_ = cv2.resize(img_crop, (256,256), interpolation = cv2.INTER_LINEAR) # INTER_LINEAR INTER_CUBIC
-
-        img_ = img_.astype(np.float32)
-        img_ = (img_-128.)/256.
-
-        img_ = img_.transpose(2, 0, 1)
-        img_ = np.expand_dims(img_,0)
-
-        if image_batch is None:
-            image_batch = img_
-        else:
-            image_batch = np.concatenate((image_batch,img_),axis=0)
-    for b in dets:
-
-        text = "{:.4f}".format(b[4])
-        b = list(map(int, b))
-        if draw_bbox:
-            cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
-        cx = b[0]
-        cy = b[1] - 3
-        if draw_bbox:
-            cv2.putText(img_raw, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 155, 255),3)
-            cv2.putText(img_raw, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 10, 10),1)
-
-    # 填充最大 关键点 批次数据
-    # if len(dets) < 5:
-    #     im_mask = np.zeros([1,3,ops.landmarks_img_size[0],ops.landmarks_img_size[1]], dtype = np.float32)
-    #     for i in range(ops.max_batch_size-len(dets)):
-    #         if image_batch is None:
-    #             image_batch = im_mask
-    #         else:
-    #             image_batch = np.concatenate((image_batch,im_mask),axis=0)
-
-    image_batch = torch.from_numpy(image_batch).float()
-
-    if use_cuda:
-        image_batch = image_batch.cuda()  # (bs, 3, h, w)
-    #----------------- express
-    pre_e = express_model(image_batch.float())
-
-    outputs_e = F.softmax(pre_e,dim = 1)
-
-    # print("outputs_e size : ",outputs_e.size())
-
-    outputs_e = outputs_e.cpu().detach().numpy()
-    outputs_e = np.array(outputs_e)
-    #
-    max_index_e = np.argmax(outputs_e,axis = 1)
-    # print("max_index_e shape :",max_index_e.shape)
-    # print("max_index_e:",max_index_e)
-    # print("outputs_e .shape:",outputs_e.shape)
-    express_dict = {
-        0:"001.anger",
-        1:"002.disgust",
-        2:"003.fear",
-        3:"004.happy",
-        4:"005.normal",
-        5:"006.sad",
-        6:"007.surprised",
-        }
-    express_list = []
-    for kk in range(max_index_e.shape[0]):
-        max_index_ = max_index_e[kk]
-        score_ = outputs_e[kk][max_index_]
-        express_list.append((max_index_,express_dict[max_index_],score_))
-        # print("max_index : {}, score : {:.3f}, express : {}".format(max_index_,score_,express_dict[max_index_]))
-    # score_e = outputs_e[max_index_e]
-    # print("score_e : ",score_e)
-    #----------------- landmarks
-    pre_ = landmarks_model(image_batch.float())
-
-    # print(pre_.size())
-    output = pre_.cpu().detach().numpy()
-    # print('output shape : ',output.shape)
-    # n_array = np.zeros([ops.landmarks_img_size[0],ops.landmarks_img_size[1],3], dtype = np.float)
-    faceswap_landmarks = []
-    output_dict_ = []
-    for i in range(len(dets)):
-
-        dict_landmarks,list_e,global_dict_landmarks,face_pts = draw_landmarks(imgs_crop[i],output[i],r_bboxes[i],draw_circle = False)
-        faceswap_landmarks.append(list_e)
-        pitch, yaw, roll = draw_contour(img_raw,dict_landmarks,r_bboxes[i],face_pts)
-
-        output_dict_.append({
-            "xyxy":(r_bboxes[i][0],r_bboxes[i][1],r_bboxes[i][2],r_bboxes[i][3]),
-            "score":str(dets[i][4]),
-            "landmarks":global_dict_landmarks,
-            "euler_angle":(int(pitch[0]), int(yaw[0]), int(roll[0])),
-            "express":(float(express_list[i][0]),float(express_list[i][2])),
-            })
-
-
-    # print('dets :',dets)
-    #-----------------------------------------------------------------------------------
-    for  i in range(len(dets)):
-        bbox = dets[i]
-        min_x = int(bbox[0])
-        min_y = int(bbox[1])
-        max_x = int(bbox[2])
-        max_y = int(bbox[3])
-        cv2.rectangle(img_raw, (min_x, min_y), (max_x, max_y), (255, 0, 255), thickness=4)
-        for k in range(5):
-            x = int(faceswap_landmarks[i][k+0])
-            y = int(faceswap_landmarks[i][k+5])
-            # cv2.circle(img_raw,(x,y),5+k*2,(0,0,255),-1)
-            if draw_bbox:
-                cv2.circle(img_raw,(x,y),2,(0,0,255),-1)
-        if draw_bbox:
-
-            cv2.putText(img_raw, "express:{},{:.2f}".format(express_list[i][1],express_list[i][2]), (min_x, min_y-20),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 155, 255),3)
-            cv2.putText(img_raw, "express:{},{:.2f}".format(express_list[i][1],express_list[i][2]), (min_x, min_y-20),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 10, 10),1)
-    if draw_bbox:
-        cv2.putText(img_raw, 'face:'+str(len(dets)), (3,35),cv2.FONT_HERSHEY_DUPLEX, 1.45, (55, 255, 255),5)
-        cv2.putText(img_raw, 'face:'+str(len(dets)), (3,35),cv2.FONT_HERSHEY_DUPLEX, 1.45, (135, 135, 5),2)
-
-    return output_dict_
-def get_faces_batch_landmarks_plfd(ops,landmarks_model,express_model,dets,img_raw,use_cuda,draw_bbox = True):
-    # 绘制图像
-    image_batch = None
-    r_bboxes = []
-    imgs_crop = []
-    for b in dets:
-
-        text = "{:.4f}".format(b[4])
-        b = list(map(int, b))
-
-        r_bbox = refine_face_bbox((b[0],b[1],b[2],b[3]),img_raw.shape)
-        r_bboxes.append(r_bbox)
-        img_crop = img_raw[r_bbox[1]:r_bbox[3],r_bbox[0]:r_bbox[2]]
-        imgs_crop.append(img_crop)
-        img_ = cv2.resize(img_crop, (112,112), interpolation = cv2.INTER_LINEAR) # INTER_LINEAR INTER_CUBIC
-
-        img_ = img_.astype(np.float32)
-        img_ = img_/256.
-
-        img_ = img_.transpose(2, 0, 1)
-        img_ = np.expand_dims(img_,0)
-
-        if image_batch is None:
-            image_batch = img_
-        else:
-            image_batch = np.concatenate((image_batch,img_),axis=0)
-    for b in dets:
-
-        text = "{:.4f}".format(b[4])
-        b = list(map(int, b))
-        if draw_bbox:
-            cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
-        cx = b[0]
-        cy = b[1] - 3
-        if draw_bbox:
-            cv2.putText(img_raw, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 155, 255),3)
-            cv2.putText(img_raw, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 10, 10),1)
-
-    # 填充最大 关键点 批次数据
-    # if len(dets) < 5:
-    #     im_mask = np.zeros([1,3,ops.landmarks_img_size[0],ops.landmarks_img_size[1]], dtype = np.float32)
-    #     for i in range(ops.max_batch_size-len(dets)):
-    #         if image_batch is None:
-    #             image_batch = im_mask
-    #         else:
-    #             image_batch = np.concatenate((image_batch,im_mask),axis=0)
-
-    image_batch = torch.from_numpy(image_batch).float()
-
-    if use_cuda:
-        image_batch = image_batch.cuda()  # (bs, 3, h, w)
-
-    #----------------- express
-    pre_e = express_model(image_batch.float())
-
-    outputs_e = F.softmax(pre_e,dim = 1)
-
-    # print("outputs_e size : ",outputs_e.size())
-
-    outputs_e = outputs_e.cpu().detach().numpy()
-    outputs_e = np.array(outputs_e)
-    #
-    max_index_e = np.argmax(outputs_e,axis = 1)
-    # print("max_index_e shape :",max_index_e.shape)
-    # print("max_index_e:",max_index_e)
-    # print("outputs_e .shape:",outputs_e.shape)
-    express_dict = {
-        0:"001.anger",
-        1:"002.disgust",
-        2:"003.fear",
-        3:"004.happy",
-        4:"005.normal",
-        5:"006.sad",
-        6:"007.surprised",
-        }
-    express_list = []
-    for kk in range(max_index_e.shape[0]):
-        max_index_ = max_index_e[kk]
-        score_ = outputs_e[kk][max_index_]
-        express_list.append((max_index_,express_dict[max_index_],score_))
-        # print("max_index : {}, score : {:.3f}, express : {}".format(max_index_,score_,express_dict[max_index_]))
-    # score_e = outputs_e[max_index_e]
-    # print("score_e : ",score_e)
-    #-----------------------------------------
-    _,pre_ = landmarks_model(image_batch.float())
-    # print("pre_ : ",pre_)
-    # print(pre_.size())
-    output = pre_.cpu().detach().numpy()
-    # print('output shape : ',output.shape)
-    # n_array = np.zeros([ops.landmarks_img_size[0],ops.landmarks_img_size[1],3], dtype = np.float)
-    faceswap_landmarks = []
-    output_dict_ = []
-    for i in range(len(dets)):
-
-        dict_landmarks,list_e,global_dict_landmarks,face_pts = draw_landmarks(imgs_crop[i],output[i],r_bboxes[i],draw_circle = False)
-        faceswap_landmarks.append(list_e)
-        pitch, yaw, roll = draw_contour(img_raw,dict_landmarks,r_bboxes[i],face_pts)
-
-        output_dict_.append({
-            "xyxy":(r_bboxes[i][0],r_bboxes[i][1],r_bboxes[i][2],r_bboxes[i][3]),
-            "score":str(dets[i][4]),
-            "landmarks":global_dict_landmarks,
-            "euler_angle":(int(pitch[0]), int(yaw[0]), int(roll[0])),
-            "express":(float(express_list[i][0]),float(express_list[i][2])),
-            })
-
-
-    # print('dets :',dets)
-    #-----------------------------------------------------------------------------------
-    for  i in range(len(dets)):
-        bbox = dets[i]
-        min_x = int(bbox[0])
-        min_y = int(bbox[1])
-        max_x = int(bbox[2])
-        max_y = int(bbox[3])
-        cv2.rectangle(img_raw, (min_x, min_y), (max_x, max_y), (255, 0, 255), thickness=2)
-        for k in range(5):
-            x = int(faceswap_landmarks[i][k+0])
-            y = int(faceswap_landmarks[i][k+5])
-            # cv2.circle(img_raw,(x,y),5+k*2,(0,0,255),-1)
-            if draw_bbox:
-                cv2.circle(img_raw,(x,y),2,(0,0,255),-1)
-        if draw_bbox:
-
-            cv2.putText(img_raw, "express:{},{:.2f}".format(express_list[i][1],express_list[i][2]), (min_x, min_y-20),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 155, 255),3)
-            cv2.putText(img_raw, "express:{},{:.2f}".format(express_list[i][1],express_list[i][2]), (min_x, min_y-20),cv2.FONT_HERSHEY_DUPLEX, 0.6, (155, 10, 10),1)
-
-    if draw_bbox:
-        cv2.putText(img_raw, 'face:'+str(len(dets)), (3,35),cv2.FONT_HERSHEY_DUPLEX, 1.45, (55, 255, 255),5)
-        cv2.putText(img_raw, 'face:'+str(len(dets)), (3,35),cv2.FONT_HERSHEY_DUPLEX, 1.45, (135, 135, 5),2)
-
-    return output_dict_
--- a/components/face_detect/utils/datasets.py
+++ b/components/face_detect/utils/datasets.py
-import glob
-import math
-import os
-import random
-import shutil
-from pathlib import Path
-from PIL import Image
-from tqdm import tqdm
-import cv2
-import numpy as np
-import torch
-from torch.utils.data import Dataset
-from torch.utils.data import DataLoader
-
-def xyxy2xywh(x):
-    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
-    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
-    y[:, 0] = (x[:, 0] + x[:, 2]) / 2
-    y[:, 1] = (x[:, 1] + x[:, 3]) / 2
-    y[:, 2] = x[:, 2] - x[:, 0]
-    y[:, 3] = x[:, 3] - x[:, 1]
-    return y
-
-
-def xywh2xyxy(x):
-    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
-    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
-    y[:, 0] = x[:, 0] - x[:, 2] / 2
-    y[:, 1] = x[:, 1] - x[:, 3] / 2
-    y[:, 2] = x[:, 0] + x[:, 2] / 2
-    y[:, 3] = x[:, 1] + x[:, 3] / 2
-    return y
-
-
-class LoadImages:  # for inference
-    def __init__(self, path, img_size=416):
-        self.height = img_size
-        img_formats = ['.jpg', '.jpeg', '.png', '.tif']
-        vid_formats = ['.mov', '.avi', '.mp4']
-
-        files = []
-        if os.path.isdir(path):
-            files = sorted(glob.glob('%s/*.*' % path))
-        elif os.path.isfile(path):
-            files = [path]
-
-        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
-        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
-        nI, nV = len(images), len(videos)
-
-        self.files = images + videos
-        self.nF = nI + nV  # number of files
-        self.video_flag = [False] * nI + [True] * nV
-        self.mode = 'images'
-        if any(videos):
-            self.new_video(videos[0])  # new video
-        else:
-            self.cap = None
-        assert self.nF > 0, 'No images or videos found in ' + path
-
-    def __iter__(self):
-        self.count = 0
-        return self
-
-    def __next__(self):
-        if self.count == self.nF:
-            raise StopIteration
-        path = self.files[self.count]
-
-        if self.video_flag[self.count]:
-            # Read video
-            self.mode = 'video'
-            ret_val, img0 = self.cap.read()
-            if not ret_val:
-                self.count += 1
-                self.cap.release()
-                if self.count == self.nF:  # last video
-                    raise StopIteration
-                else:
-                    path = self.files[self.count]
-                    self.new_video(path)
-                    ret_val, img0 = self.cap.read()
-
-            self.frame += 1
-            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
-
-        else:
-            # Read image
-            self.count += 1
-            img0 = cv2.imread(path)  # BGR
-            assert img0 is not None, 'File Not Found ' + path
-            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
-
-        # Padded resize
-        img, _, _, _ = letterbox(img0, height=self.height)
-
-        # Normalize RGB
-        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
-        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-
-        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
-        return path, img, img0, self.cap
-
-    def new_video(self, path):
-        self.frame = 0
-        self.cap = cv2.VideoCapture(path)
-        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
-
-    def __len__(self):
-        return self.nF  # number of files
-
-
-class LoadWebcam:  # for inference
-    def __init__(self, img_size=416):
-        self.cam = cv2.VideoCapture(0)
-        self.height = img_size
-
-    def __iter__(self):
-        self.count = -1
-        return self
-
-    def __next__(self):
-        self.count += 1
-        if cv2.waitKey(1) == 27:  # esc to quit
-            cv2.destroyAllWindows()
-            raise StopIteration
-
-        # Read image
-        ret_val, img0 = self.cam.read()
-        assert ret_val, 'Webcam Error'
-        img_path = 'webcam_%g.jpg' % self.count
-        img0 = cv2.flip(img0, 1)  # flip left-right
-
-        # Padded resize
-        img, _, _, _ = letterbox(img0, height=self.height)
-
-        # Normalize RGB
-        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
-        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-
-        return img_path, img, img0, self.cam
-
-    def __len__(self):
-        return 0
-
-
-class LoadImagesAndLabels(Dataset):  # for training/testing
-    def __init__(self, path, batch_size, img_size=416, augment=True, multi_scale=False):
-        print('LoadImagesAndLabels init : ',path)
-        with open(path, 'r') as file:
-            img_files = file.read().splitlines()
-            img_files = list(filter(lambda x: len(x) > 0, img_files))
-        np.random.shuffle(img_files)  # shuffle img_list
-        print("shuffle image...")
-        self.img_files = img_files
-        assert len(self.img_files) > 0, 'No images found in %s' % path
-        self.img_size = img_size
-        self.batch_size = batch_size
-        self.multi_scale = multi_scale
-        self.augment = augment
-        self.scale_index = 0
-        if self.multi_scale:
-            self.img_size = img_size  # initiate with maximum multi_scale size, in case of out of memory
-            print("Multi scale images training, init img_size", self.img_size)
-        else:
-            print("Fixed scale images, img_size", self.img_size)
-        self.label_files = [
-            x.replace('images', 'labels').replace("JPEGImages", 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
-            for x in self.img_files]
-
-        # print('self.img_files   : ',self.img_files[1])
-        # print('self.label_files : ',self.label_files[1])
-
-    def __len__(self):
-        return len(self.img_files)
-
-    def __getitem__(self, index):
-
-        # if self.multi_scale and (index % self.batch_size == 0) and index != 0:
-        if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0:
-            self.img_size = random.choice(range(11, 18)) * 32
-            # print("++++++ change img_size, index:", self.img_size, index)
-        if self.multi_scale:
-            self.scale_index += 1
-            if self.scale_index >= (100*self.batch_size):
-                self.scale_index = 0
-
-
-        img_path = self.img_files[index]
-        label_path = self.label_files[index]
-
-        img = cv2.imread(img_path)  # BGR
-        assert img is not None, 'File Not Found ' + img_path
-
-        augment_hsv = random.random() < 0.5  # hsv_aug prob = 0.5
-        if self.augment and augment_hsv:
-            # SV augmentation by 50%
-            fraction = 0.50  # must be < 1.0
-            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
-            S = img_hsv[:, :, 1].astype(np.float32)
-            V = img_hsv[:, :, 2].astype(np.float32)
-
-            a = (random.random() * 2 - 1) * fraction + 1  # a in [-0,5, 1.5]
-            S *= a
-            if a > 1:
-                np.clip(S, None, 255, out=S)
-
-            a = (random.random() * 2 - 1) * fraction + 1
-            V *= a
-            if a > 1:
-                np.clip(V, None, 255, out=V)
-
-            img_hsv[:, :, 1] = S  # .astype(np.uint8)
-            img_hsv[:, :, 2] = V  # .astype(np.uint8)
-            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
-
-        h, w, _ = img.shape
-        img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment)
-
-        # Load labels
-        labels = []
-        if os.path.isfile(label_path):
-            with open(label_path, 'r') as file:
-                lines = file.read().splitlines()
-
-            x = np.array([x.split() for x in lines], dtype=np.float32)
-            if x.size > 0:
-                # Normalized xywh to pixel xyxy format
-                labels = x.copy()
-                labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
-                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
-                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
-                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
-
-        # Augment image and labels
-        if self.augment:
-            img, labels = random_affine(img, labels, degrees=(-10, 10), translate=(0.10, 0.10), scale=(0.9, 1.1))
-
-        nL = len(labels)  # number of labels
-        if nL:
-            # convert xyxy to xywh
-            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ，且 归一化
-
-        if self.augment:
-            # random left-right flip
-            lr_flip = True
-            if lr_flip and random.random() > 0.5:
-                img = np.fliplr(img)
-                if nL:
-                    labels[:, 1] = 1 - labels[:, 1]
-
-            # random up-down flip
-            ud_flip = False
-            if ud_flip and random.random() > 0.5:
-                img = np.flipud(img)
-                if nL:
-                    labels[:, 2] = 1 - labels[:, 2]
-
-        labels_out = torch.zeros((nL, 6))# 加了 一个 batch size
-        if nL:
-            labels_out[:, 1:] = torch.from_numpy(labels)
-
-        # Normalize
-        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
-        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-
-        return torch.from_numpy(img), labels_out, img_path, (h, w)
-
-    @staticmethod
-    def collate_fn(batch):
-        img, label, path, hw = list(zip(*batch))  # transposed
-        for i, l in enumerate(label):
-            l[:, 0] = i  # 获取 物体的 归属于 图片 的 index
-        return torch.stack(img, 0), torch.cat(label, 0), path, hw
-
-
-def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
-    # Resize a rectangular image to a padded square
-    shape = img.shape[:2]  # shape = [height, width]
-    ratio = float(height) / max(shape)  # ratio  = old / new
-    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
-    dw = (height - new_shape[0]) / 2  # width padding
-    dh = (height - new_shape[1]) / 2  # height padding
-    top, bottom = round(dh - 0.1), round(dh + 0.1)
-    left, right = round(dw - 0.1), round(dw + 0.1)
-    # resize img
-    if augment:
-        interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
-                                          None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
-                                          cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
-        if interpolation is None:
-            img = cv2.resize(img, new_shape)
-        else:
-            img = cv2.resize(img, new_shape, interpolation=interpolation)
-    else:
-        img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
-    # print("resize time:",time.time()-s1)
-
-    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded square
-    return img, ratio, dw, dh
-
-
-def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
-                  borderValue=(127.5, 127.5, 127.5)):
-    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
-    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
-
-    if targets is None:
-        targets = []
-    border = 0  # width of added border (optional)
-    height = max(img.shape[0], img.shape[1]) + border * 2
-
-    # Rotation and Scale
-    R = np.eye(3)
-    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
-    # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
-    s = random.random() * (scale[1] - scale[0]) + scale[0]
-    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
-
-    # Translation
-    T = np.eye(3)
-    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
-    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)
-
-    # Shear
-    S = np.eye(3)
-    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
-    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)
-
-    M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
-    imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
-                              borderValue=borderValue)  # BGR order borderValue
-
-    # Return warped points also
-    if len(targets) > 0:
-        n = targets.shape[0]
-        points = targets[:, 1:5].copy()
-        area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
-
-        # warp points
-        xy = np.ones((n * 4, 3))
-        xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
-        xy = (xy @ M.T)[:, :2].reshape(n, 8)
-
-        # create new boxes
-        x = xy[:, [0, 2, 4, 6]]
-        y = xy[:, [1, 3, 5, 7]]
-        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-
-        # apply angle-based reduction of bounding boxes
-        radians = a * math.pi / 180
-        reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
-        x = (xy[:, 2] + xy[:, 0]) / 2
-        y = (xy[:, 3] + xy[:, 1]) / 2
-        w = (xy[:, 2] - xy[:, 0]) * reduction
-        h = (xy[:, 3] - xy[:, 1]) * reduction
-        xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
-
-        # reject warped points outside of image
-        np.clip(xy, 0, height, out=xy)
-        w = xy[:, 2] - xy[:, 0]
-        h = xy[:, 3] - xy[:, 1]
-        area = w * h
-        ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
-        i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
-
-        targets = targets[i]
-        targets[:, 1:5] = xy[i]
-
-    return imw, targets
-
-
-def convert_images2bmp():
-    # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
-    for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
-        folder = os.sep + Path(path).name
-        output = path.replace(folder, folder + 'bmp')
-        if os.path.exists(output):
-            shutil.rmtree(output)  # delete output folder
-        os.makedirs(output)  # make new output folder
-
-        for f in tqdm(glob.glob('%s*.jpg' % path)):
-            save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
-            cv2.imwrite(save_name, cv2.imread(f))
-
-    for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
-        with open(label_path, 'r') as file:
-            lines = file.read()
-        lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
-            '/Users/glennjocher/PycharmProjects/', '../')
-        with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
-            file.write(lines)
--- a/components/face_detect/utils/utils.py
+++ b/components/face_detect/utils/utils.py
-import glob
-import random
-import time
-from collections import defaultdict
-
-import cv2
-import numpy as np
-import torch
-import torch.nn as nn
-from dp_models.light_pose.modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS
-
-# Set printoptions
-torch.set_printoptions(linewidth=1320, precision=5, profile='long')
-np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
-
-# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
-cv2.setNumThreads(0)
-
-def float3(x):  # format floats to 3 decimals
-    return float(format(x, '.3f'))
-
-def init_seeds(seed=0):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
-        torch.cuda.manual_seed_all(seed)
-    else:
-        torch.manual_seed(seed)
-        torch.manual_seed_all(seed)
-
-
-def load_classes(path):
-    # Loads class labels at 'path'
-    fp = open(path, 'r')
-    names = fp.read().split('\n')
-    return list(filter(None, names))  # filter removes empty strings (such as last line)
-
-
-def model_info(model):
-    # Plots a line-by-line description of a PyTorch model
-    n_p = sum(x.numel() for x in model.parameters())  # number parameters
-    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
-    print('\n%5s %60s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
-    for i, (name, p) in enumerate(model.named_parameters()):
-        # name = name.replace('module_list.', '')
-        print('%5g %60s %9s %12g %20s %10.3g %10.3g' % (
-            i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
-    print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g))
-
-
-
-
-
-def weights_init_normal(m):
-    classname = m.__class__.__name__
-    if classname.find('Conv') != -1:
-        torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
-    elif classname.find('BatchNorm2d') != -1:
-        torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
-        torch.nn.init.constant_(m.bias.data, 0.0)
-
-
-def xyxy2xywh(x):
-    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
-    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
-    y[:, 0] = (x[:, 0] + x[:, 2]) / 2
-    y[:, 1] = (x[:, 1] + x[:, 3]) / 2
-    y[:, 2] = x[:, 2] - x[:, 0]
-    y[:, 3] = x[:, 3] - x[:, 1]
-    return y
-
-
-def xywh2xyxy(x):
-    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
-    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
-    y[:, 0] = x[:, 0] - x[:, 2] / 2
-    y[:, 1] = x[:, 1] - x[:, 3] / 2
-    y[:, 2] = x[:, 0] + x[:, 2] / 2
-    y[:, 3] = x[:, 1] + x[:, 3] / 2
-    return y
-
-def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
-    # Rescale x1, y1, x2, y2 from 416 to image size
-    # print('coords     : ',coords)
-    # print('img0_shape : ',img0_shape)
-    gain = float(img_size) / max(img0_shape)  # gain  = old / new
-    # print('gain       : ',gain)
-    pad_x = (img_size - img0_shape[1] * gain) / 2  # width padding
-    pad_y = (img_size - img0_shape[0] * gain) / 2  # height padding
-    # print('pad_xpad_y : ',pad_x,pad_y)
-    coords[:, [0, 2]] -= pad_x
-    coords[:, [1, 3]] -= pad_y
-    coords[:, :4] /= gain
-    coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
-    return coords
-
-
-def ap_per_class(tp, conf, pred_cls, target_cls):
-    """ Compute the average precision, given the recall and precision curves.
-    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
-    # Arguments
-        tp:    True positives (list).
-        conf:  Objectness value from 0-1 (list).
-        pred_cls: Predicted object classes (list).
-        target_cls: True object classes (list).
-    # Returns
-        The average precision as computed in py-faster-rcnn.
-    """
-
-    # Sort by objectness
-    i = np.argsort(-conf)
-    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
-
-    # Find unique classes
-    unique_classes = np.unique(target_cls)
-
-    # Create Precision-Recall curve and compute AP for each class
-    ap, p, r = [], [], []
-    for c in unique_classes:
-        i = pred_cls == c
-        n_gt = (target_cls == c).sum()  # Number of ground truth objects
-        n_p = i.sum()  # Number of predicted objects
-
-        if n_p == 0 and n_gt == 0:
-            continue
-        elif n_p == 0 or n_gt == 0:
-            ap.append(0)
-            r.append(0)
-            p.append(0)
-        else:
-            # Accumulate FPs and TPs
-            fpc = (1 - tp[i]).cumsum()
-            tpc = (tp[i]).cumsum()
-
-            # Recall
-            recall_curve = tpc / (n_gt + 1e-16)
-            r.append(recall_curve[-1])
-
-            # Precision
-            precision_curve = tpc / (tpc + fpc)
-            p.append(precision_curve[-1])
-
-            # AP from recall-precision curve
-            ap.append(compute_ap(recall_curve, precision_curve))
-
-            # Plot
-            # plt.plot(recall_curve, precision_curve)
-
-    # Compute F1 score (harmonic mean of precision and recall)
-    p, r, ap = np.array(p), np.array(r), np.array(ap)
-    f1 = 2 * p * r / (p + r + 1e-16)
-
-    return p, r, ap, f1, unique_classes.astype('int32')
-
-
-def compute_ap(recall, precision):
-    """ Compute the average precision, given the recall and precision curves.
-    Source: https://github.com/rbgirshick/py-faster-rcnn.
-    # Arguments
-        recall:    The recall curve (list).
-        precision: The precision curve (list).
-    # Returns
-        The average precision as computed in py-faster-rcnn.
-    """
-    # correct AP calculation
-    # first append sentinel values at the end
-
-    mrec = np.concatenate(([0.], recall, [1.]))
-    mpre = np.concatenate(([0.], precision, [0.]))
-
-    # compute the precision envelope
-    for i in range(mpre.size - 1, 0, -1):
-        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
-
-    # to calculate area under PR curve, look for points
-    # where X axis (recall) changes value
-    i = np.where(mrec[1:] != mrec[:-1])[0]
-
-    # and sum (\Delta recall) * prec
-    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
-    return ap
-
-
-def bbox_iou(box1, box2, x1y1x2y2=True):
-    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
-    box2 = box2.t()
-
-    # Get the coordinates of bounding boxes
-    if x1y1x2y2:
-        # x1, y1, x2, y2 = box1
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
-    else:
-        # x, y, w, h = box1
-        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
-        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
-        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
-        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
-
-    # Intersection area
-    inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
-                 (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
-
-    # Union Area
-    union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
-                 (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
-
-    return inter_area / union_area  # iou
-
-
-def wh_iou(box1, box2):
-
-    box2 = box2.t()
-
-    # w, h = box1
-    w1, h1 = box1[0], box1[1]
-    w2, h2 = box2[0], box2[1]
-
-    # Intersection area
-    inter_area = torch.min(w1, w2) * torch.min(h1, h2)
-
-    # Union Area
-    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
-
-    return inter_area / union_area  # iou
-
-
-def compute_loss(p, targets):  # predictions, targets
-    FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
-    lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) # losses 初始化 为 0
-    txy, twh, tcls, indices = targets
-    MSE = nn.MSELoss()
-    CE = nn.CrossEntropyLoss()
-    BCE = nn.BCEWithLogitsLoss()# 多标签分类时 使用 如 [1,1,0],
-
-    # Compute losses
-    for i, pi0 in enumerate(p):  # layer i predictions, i
-        b, a, gj, gi = indices[i]  # image_idx, anchor_idx, gridx, gridy
-
-        # print(i,') b, a, gj, gi : ')
-        # print('b', b)
-        # print('a', a)
-        # print('gj', gj)
-        # print('gi', gi)
-
-        tconf = torch.zeros_like(pi0[..., 0])  # conf
-
-        # print('tconf: ',tconf.size())
-        # Compute losses
-        k = 1  # nT / bs
-        if len(b) > 0:
-            pi = pi0[b, a, gj, gi]  # predictions closest to anchors
-            tconf[b, a, gj, gi] = 1  # conf
-
-            lxy += (k * 8) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
-            lwh += (k * 4) * MSE(pi[..., 2:4], twh[i])  # wh loss
-            lcls += (k * 1) * CE(pi[..., 5:], tcls[i])  # class_conf loss
-
-        lconf += (k * 64) * BCE(pi0[..., 4], tconf)  # obj_conf loss
-    loss = lxy + lwh + lconf + lcls
-
-    # Add to dictionary
-    d = defaultdict(float)
-    losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()]
-    for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses):
-        d[name] = x
-
-    return loss, d
-
-
-def build_targets(model, targets):
-    # targets = [image, class, x, y, w, h]
-    if isinstance(model, nn.parallel.DistributedDataParallel):
-        model = model.module
-
-    txy, twh, tcls, indices = [], [], [], []
-    for i, layer in enumerate(get_yolo_layers(model)):# 遍历 3 个 yolo layer
-        # print(i,'layer ',model.module_list[layer])
-        layer = model.module_list[layer][0]
-
-        # iou of targets-anchors
-        gwh = targets[:, 4:6] * layer.nG # 以 grid 为单位的 wh
-        iou = [wh_iou(x, gwh) for x in layer.anchor_vec]
-        iou, a = torch.stack(iou, 0).max(0)  # best iou and anchor
-
-        # reject below threshold ious (OPTIONAL, increases P, lowers R)
-        reject = True
-        if reject:
-            j = iou > 0.10
-            t, a, gwh = targets[j], a[j], gwh[j]
-        else:
-            t = targets
-
-        # Indices
-        b, c = t[:, :2].long().t()  # target image, class
-        gxy = t[:, 2:4] * layer.nG
-        gi, gj = gxy.long().t()  # grid_i, grid_j
-        indices.append((b, a, gj, gi)) # img_index , anchor_index , grid_x , grid_y
-
-        # print('b, a, gj, gi : ')
-        # print('b', b)
-        # print('a', a)
-        # print('gj', gj)
-        # print('gi', gi)
-        # print('class c',c)
-
-        # XY coordinates
-        txy.append(gxy - gxy.floor())#转化为grid相对坐标
-
-        # Width and height
-        twh.append(torch.log(gwh / layer.anchor_vec[a]))  # yolo method 对数
-        # twh.append(torch.sqrt(gwh / layer.anchor_vec[a]) / 2)  # power method
-
-        # Class
-        tcls.append(c)
-        # try:
-        #     print('c.max,layer.nC: ',c.max().item() ,layer.nC)
-        # except:
-        #     pass
-        if c.shape[0]:
-            assert c.max().item() <= layer.nC, 'Target classes exceed model classes'
-
-    return txy, twh, tcls, indices
-
-
-# @profile
-def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
-    """
-    Removes detections with lower object confidence score than 'conf_thres'
-    Non-Maximum Suppression to further filter detections.
-    Returns detections with shape:
-        (x1, y1, x2, y2, object_conf, class_conf, class)
-    """
-
-    min_wh = 2  # (pixels) minimum box width and height
-
-    output = [None] * len(prediction)
-    for image_i, pred in enumerate(prediction):
-        # Experiment: Prior class size rejection
-        # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
-        # a = w * h  # area
-        # ar = w / (h + 1e-16)  # aspect ratio
-        # n = len(w)
-        # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
-        # shape_likelihood = np.zeros((n, 60), dtype=np.float32)
-        # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
-        # from scipy.stats import multivariate_normal
-        # for c in range(60):
-        # shape_likelihood[:, c] =
-        #   multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
-
-        # Filter out confidence scores below threshold
-        class_conf, class_pred = pred[:, 5:].max(1)  # max class_conf, index
-        pred[:, 4] *= class_conf  # finall conf = obj_conf * class_conf
-
-        i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
-        # s2=time.time()
-        pred2 = pred[i]
-        # print("++++++pred2 = pred[i]",time.time()-s2, pred2)
-
-        # If none are remaining => process next image
-        if len(pred2) == 0:
-            continue
-
-        # Select predicted classes
-        class_conf = class_conf[i]
-        class_pred = class_pred[i].unsqueeze(1).float()
-
-        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
-        pred2[:, :4] = xywh2xyxy(pred2[:, :4])
-        # pred[:, 4] *= class_conf  # improves mAP from 0.549 to 0.551
-
-        # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
-        pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
-
-        # Get detections sorted by decreasing confidence scores
-        pred2 = pred2[(-pred2[:, 4]).argsort()]
-
-        det_max = []
-        nms_style = 'MERGE'  # 'OR' (default), 'AND', 'MERGE' (experimental)
-        for c in pred2[:, -1].unique():
-            dc = pred2[pred2[:, -1] == c]  # select class c
-            dc = dc[:min(len(dc), 100)]  # limit to first 100 boxes
-
-            # Non-maximum suppression
-            if nms_style == 'OR':  # default
-                # METHOD1
-                # ind = list(range(len(dc)))
-                # while len(ind):
-                # j = ind[0]
-                # det_max.append(dc[j:j + 1])  # save highest conf detection
-                # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
-                # [ind.pop(i) for i in reversed(reject)]
-
-                # METHOD2
-                while dc.shape[0]:
-                    det_max.append(dc[:1])  # save highest conf detection
-                    if len(dc) == 1:  # Stop if we're at the last detection
-                        break
-                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
-                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold
-
-            elif nms_style == 'AND':  # requires overlap, single boxes erased
-                while len(dc) > 1:
-                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
-                    if iou.max() > 0.5:
-                        det_max.append(dc[:1])
-                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold
-
-            elif nms_style == 'MERGE':  # weighted mixture box
-                while len(dc):
-                    i = bbox_iou(dc[0], dc) > nms_thres  # iou with other boxes
-                    weights = dc[i, 4:5]
-                    dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
-                    det_max.append(dc[:1])
-                    dc = dc[i == 0]
-
-        if len(det_max):
-            det_max = torch.cat(det_max)  # concatenate
-            output[image_i] = det_max[(-det_max[:, 4]).argsort()]  # sort
-    return output
-
-
-def get_yolo_layers(model):
-    yolo_layer_index = []
-    for index, l in enumerate(model.module_list):
-        try:
-            a = l[0].img_size and l[0].nG  # only yolo layer need img_size and nG
-            # print("---"*50)
-            # print(l, index)
-            yolo_layer_index.append(index)
-        except:
-            pass
-    assert len(yolo_layer_index) > 0, "can not find yolo layer"
-    return yolo_layer_index
--- a/lib/wyw2s_lib/cfg/wyw2s.cfg
+++ b/lib/wyw2s_lib/cfg/wyw2s.cfg
 YouWantToSee=BradPitt
+
+detect_model_path=./wyw2s_models/face_yolo_416-20210418.pt
+detect_model_arch=yolo
+detect_input_size = 416
+yolo_anchor_scale=1.
+detect_conf_thres=0.4
+detect_nms_thres=0.45
+
+face_verify_backbone_path=./wyw2s_models/face_verify-model_ir_se-50.pth
+facebank_path=./wyw2s_models/facebank
+face_verify_threshold=1.2
+
+face_multitask_model_path=./wyw2s_models/face_multitask-resnet_50_imgsize-256-20210411.pth
+
+face_euler_model_path=./wyw2s_models/euler_angle-resnet_18_imgsize_256.pth
--- a/lib/wyw2s_lib/doc/README.md
+++ b/lib/wyw2s_lib/doc/README.md
@@ -6,11 +6,12 @@
 *  场景：将视频中目标人物的相关视频进行裁剪。

 ## 项目配置  
-### 1、软件  
+### 1、软件
 * Python 3.7  
 * PyTorch >= 1.5.1  
 * opencv-python
 * moviepy
+* shutil

 ## 相关项目
 ### 1、脸部检测项目（yolo_v3）
@@ -29,17 +30,54 @@ euler_angle-resnet_18_imgsize_256.pth # 人脸姿态角 pitch yaw roll 模型
 face_multitask-resnet_50_imgsize-256-20210411.pth # 性别、年龄、关键点 模型

 face_verify-model_ir_se-50.pth  # 人脸识别特征抽取模型
+
+facebank/facebank.pth # 人脸匹配资源库特征向量
+
+facebank/names.npy # 人脸匹配资源库 face id，示例中的face id为人名字
+```
+
+* 目前示例提供的人脸资源库的具体face id 如下：
+```
+['AngelinaJolie' 'AnneHathaway' 'BradPitt' 'JenniferAniston'
+ 'JohnnyDepp' 'JudeLaw' 'NicoleKidman' 'ScarlettJohansson' 'TomCruise']
 ```

 ## 项目使用方法  

 ### 1、下载项目预训练模型 package 。
-### 2、构建人脸匹配资源库，相关脚本 [make_facebank.py](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/make_facebank_tools/make_facebank.py)
+### 2、构建人脸匹配资源库（项目中已经生成了示例匹配库，如果不需要建立自己的人脸资源库此步骤可以跳过），相关脚本 [make_facebank.py](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/make_facebank_tools/make_facebank.py)
 ### 3、打开配置文件 lib/wyw2s_lib/cfg/[wyw2s.cfg](https://codechina.csdn.net/EricLee/dpcas/-/blob/master/lib/wyw2s_lib/cfg/wyw2s.cfg) 进行相关参数配置，具体配置参数如下，请仔细阅读。
 ```
-YouWantToSee=BradPitt
+YouWantToSee=BradPitt # 你需要裁剪的 face id ，示例为人名字，需要与facebank/names.npy 和 facebank/facebank.pth 信息匹配
+
+detect_model_path=./wyw2s_models/face_yolo_416-20210418.pt # 人脸检测模型
+detect_model_arch=yolo # 模型类型
+detect_input_size = 416 # 模型的图片输入尺寸
+yolo_anchor_scale=1. # anchor 的缩放系数，默认 1
+detect_conf_thres=0.4 # 人脸检测置信度，高于该置信度进行输出
+detect_nms_thres=0.45 # 检测的nms阈值
+
+face_verify_backbone_path=./wyw2s_models/face_verify-model_ir_se-50.pth # 人脸识别特征抽取模型地址
+facebank_path=./wyw2s_models/facebank  # 人脸资源库地址
+face_verify_threshold=1.2 # 人脸匹配阈值设定，低于该设定阈值认为匹配成功
+
+face_multitask_model_path=./wyw2s_models/face_multitask-resnet_50_imgsize-256-20210411.pth # 人脸多任务（性别、年龄、关键点）模型地址
+
+face_euler_model_path=./wyw2s_models/euler_angle-resnet_18_imgsize_256.pth # 模型姿态角（航向角、俯仰角、翻滚角）回归模型地址
+```
+
+### 4、下载示例视频
+* [示例视频 下载地址(百度网盘 Password: jaqh )](https://pan.baidu.com/s/1CSbfA1nHDhfCyt4_2NSRQg)
+* 或是用同学自己的示例视频
+
+### 5、运行 "Who You Want To See" 项目
+* 打开main.py，做如下相关参数设置：
+```
+APP_P = "wyw2s" # 选择不同项目
+cfg_file = "./lib/wyw2s_lib/cfg/wyw2s.cfg" # 选择配置文件
+main_wyw2s(video_path = "./video/f1.mp4",cfg_file = cfg_file)# 设置视频路径，加载 who you want 2 see  应用
 ```
-### 4、根目录下运行命令： python main.py
+* 根目录下运行命令： python main.py

 ## 联系方式 （Contact）  
 * E-mails: 305141918@qq.com   
--- a/main.py
+++ b/main.py
@@ -22,8 +22,8 @@ import sys
 sys.path.append("./components/") # 添加模型组件路径

 from applications.handpose_local_app import main_handpose_x #加载 handpose 应用
-from applications.wyw2s_local_app import main_wyw2s #加载 whoyouwant2see 应用
-
+from applications.wyw2s_local_app import main_wyw2s #加载 who you want 2 see 应用
+# from applications.video_analysis_app import main_video_analysis #加载 video_analysis 应用
 def demo_logo():
    print("\n/*********************************/")
    print("/---------------------------------/\n")
@@ -45,6 +45,8 @@ if __name__ == '__main__':
        
    elif APP_P == "wyw2s": # 基于人脸识别的视频剪辑
        cfg_file = "./lib/wyw2s_lib/cfg/wyw2s.cfg"
+        main_wyw2s(video_path = "./video/f1.mp4",cfg_file = cfg_file)#加载 who you want 2 see  应用

-        main_wyw2s(cfg_file,video_path = "./video/f1.mp4")#加载 handpose 应用
+    # elif APP_P == "video_ana": # 基于人脸识别的视频剪辑
+    #     main_video_analysis(video_path = "./video/f3.mp4")#加载 who you want 2 see  应用
    print(" well done ~")