yolo_inference.py

#-*-coding:utf-8-*-
# date:2021-06-15
# Author: Eric.Lee
# function: handpose 3D Yolo_v3 Detect Inference

import os
import argparse
import torch
import torch.nn as nn
import numpy as np

import time
import datetime
import os
import math
from datetime import datetime
import cv2
import torch.nn.functional as F

from models.resnet import resnet18,resnet34,resnet50,resnet101
from e3d_data_iter.datasets import letterbox,get_heatmap
import sys
sys.path.append("./components/") # 添加模型组件路径
from hand_keypoints.handpose_x import handpose_x_model,draw_bd_handpose_c
from hand_detect.yolo_v3_hand import yolo_v3_hand_model

from utils.common_utils import *
import copy

from utils import func, bone, AIK, smoother
from utils.LM_new import LM_Solver
from op_pso import PSO
import open3d
from mpl_toolkits.mplot3d import Axes3D
from manopth import manolayer
if __name__ == "__main__":

    parser = argparse.ArgumentParser(description=' Project Hand Pose 3D Inference')
    parser.add_argument('--model_path', type=str, default = './if_package/e3d_handposex-resnet_50-size-128-loss-wing_loss-20210619.pth',
        help = 'model_path') # e3d handpose 模型路径
    parser.add_argument('--detect_model_path', type=str, default = './if_package/hand_detect_416-20210606.pt',
        help = 'model_path') # detect 模型路径
    parser.add_argument('--handpose_x2d_model_path', type=str, default = './if_package/handposex_2d_resnet_50-size-256-wingloss102-0.119.pth',
        help = 'model_path') # 手2维关键点 模型路径
    parser.add_argument('--model', type=str, default = 'resnet_50',
        help = '''model : resnet_18,resnet_34,resnet_50,resnet_101''') # 模型类型
    parser.add_argument('--num_classes', type=int , default = 63,
        help = 'num_classes') #  手部21关键点， (x,y)*2 = 42
    parser.add_argument('--GPUS', type=str, default = '0',
        help = 'GPUS') # GPU选择
    parser.add_argument('--test_path', type=str, default = './image/',
        help = 'test_path') # 测试图片路径
    parser.add_argument('--img_size', type=tuple , default = (128,128),
        help = 'img_size') # 输入模型图片尺寸
    parser.add_argument('--vis', type=bool , default = True,
        help = 'vis') # 是否可视化图片

    print('\n/******************* {} ******************/\n'.format(parser.description))
    #--------------------------------------------------------------------------
    ops = parser.parse_args()# 解析添加参数
    #--------------------------------------------------------------------------
    print('----------------------------------')

    unparsed = vars(ops) # parse_args()方法的返回值为namespace，用vars()内建函数化为字典
    for key in unparsed.keys():
        print('{} : {}'.format(key,unparsed[key]))

    #---------------------------------------------------------------------------
    os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS

    test_path =  ops.test_path # 测试图片文件夹路径
    #---------------------------------------------------------------- 构建模型
    print('use model : %s'%(ops.model))

    if ops.model == 'resnet_50':
        model_ = resnet50(num_classes = ops.num_classes,img_size=ops.img_size[0])
    elif ops.model == 'resnet_18':
        model_ = resnet18(num_classes = ops.num_classes,img_size=ops.img_size[0])
    elif ops.model == 'resnet_34':
        model_ = resnet34(num_classes = ops.num_classes,img_size=ops.img_size[0])
    elif ops.model == 'resnet_101':
        model_ = resnet101(num_classes = ops.num_classes,img_size=ops.img_size[0])

    use_cuda = torch.cuda.is_available()

    device = torch.device("cuda:0" if use_cuda else "cpu")
    model_ = model_.to(device)
    model_.eval() # 设置为前向推断模式
    # print(model_)# 打印模型结构
    # 加载测试模型
    if os.access(ops.model_path,os.F_OK):# checkpoint
        chkpt = torch.load(ops.model_path, map_location=device)
        model_.load_state_dict(chkpt)
        print('load test model : {}'.format(ops.model_path))

    #----------------- 构建 handpose_x 2D关键点检测模型
    handpose_2d_model = handpose_x_model(model_path = ops.handpose_x2d_model_path)
    #----------------- 构建 yolo 检测模型
    hand_detect_model = yolo_v3_hand_model(model_path = ops.detect_model_path,model_arch = "yolo",conf_thres = 0.3)
    # hand_detect_model = yolo_v3_hand_model()

    #----------------- 构建 manopth
    g_side = "right"
    print('load model finished')
    pose, shape = func.initiate("zero")
    pre_useful_bone_len = np.zeros((1, 15)) # 骨架点信息
    solver = LM_Solver(num_Iter=99, th_beta=shape.cpu(), th_pose=pose.cpu(), lb_target=pre_useful_bone_len,
                       weight=1e-5)
    pose0 = torch.eye(3).repeat(1, 16, 1, 1)

    mano = manolayer.ManoLayer(flat_hand_mean=True,
                               side=g_side,
                               mano_root='./mano/models',
                               use_pca=False,
                               root_rot_mode='rotmat',
                               joint_rot_mode='rotmat')
    print('start ~')
    point_fliter = smoother.OneEuroFilter(23.0, 0.0)
    mesh_fliter = smoother.OneEuroFilter(23.0, 0.0)
    shape_fliter = smoother.OneEuroFilter(1.5, 0.0)
    #--------------------------- 配置点云
    view_mat = np.array([[1.0, 0.0, 0.0],
                         [0.0, -1.0, 0],
                         [0.0, 0, -1.0]])
    mesh = open3d.geometry.TriangleMesh()
    hand_verts, j3d_recon = mano(pose0, shape.float())
    mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces)
    hand_verts = hand_verts.clone().detach().cpu().numpy()[0]
    mesh.vertices = open3d.utility.Vector3dVector(hand_verts)
    viewer = open3d.visualization.Visualizer()
    viewer.create_window(width=800, height=800, window_name='HandPose3d_Mesh')
    viewer.add_geometry(mesh)
    viewer.update_renderer()
    renderOptions = viewer.get_render_option ()
    renderOptions.background_color = np.asarray([120/255,120/255,120/255]) # 设置背景颜色
    # axis_pcd = open3d.create_mesh_coordinate_frame(size=0.5, origin=[0, 0, 0])

    # vis.add_geometry(axis_pcd)
    pts_flag = False
    if pts_flag:
        test_pcd = open3d.geometry.PointCloud()  # 定义点云
        viewer.add_geometry(test_pcd)

    print('start pose estimate')

    pre_uv = None
    shape_time = 0
    opt_shape = None
    shape_flag = True
    #---------------------------------------------------------------- 预测图片

    with torch.no_grad():
        idx = 0
        cap = cv2.VideoCapture(0) #一般usb默认相机号为 0，如果没有相机无法启动，如果相机不为0需要自行确定其编号。

        while True:
            ret, img_o = cap.read()# 获取相机图像
            if ret == True:# 如果 ret 返回值为 True，显示图片
                img_yolo_x = img_o.copy()
                hand_bbox =hand_detect_model.predict(img_yolo_x,vis = False) # 检测手，获取手的边界框
                if len(hand_bbox) == 1:
                    x_min,y_min,x_max,y_max,_ = hand_bbox[0]

                    w_ = max(abs(x_max-x_min),abs(y_max-y_min))
                    w_ = w_*1.6
                    x_mid = (x_max+x_min)/2
                    y_mid = (y_max+y_min)/2
                    #
                    x1,y1,x2,y2 = int(x_mid-w_/2),int(y_mid-w_/2),int(x_mid+w_/2),int(y_mid+w_/2)
                    #
                    x1 = int(np.clip(x1,0,img_o.shape[1]-1))
                    y1 = int(np.clip(y1,0,img_o.shape[0]-1))
                    x2 = int(np.clip(x2,0,img_o.shape[1]-1))
                    y2 = int(np.clip(y2,0,img_o.shape[0]-1))

                    img = img_o[y1:y2,x1:x2]
                else:
                    continue
                #--------------------------------
                img_show = img.copy() # 用于显示使用
                pts_2d_ = handpose_2d_model.predict(img.copy()) # handpose_2d predict
                pts_2d_hand = {}
                for ptk in range(int(pts_2d_.shape[0]/2)):

                    xh = pts_2d_[ptk*2+0]*float(img.shape[1])
                    yh = pts_2d_[ptk*2+1]*float(img.shape[0])
                    pts_2d_hand[str(ptk)] = {
                        "x":xh,
                        "y":yh,
                        }
                    if ops.vis:
                        cv2.circle(img_show, (int(xh),int(yh)), 4, (255,50,60),-1)
                        cv2.circle(img_show, (int(xh),int(yh)), 3, (25,160,255),-1)
                if ops.vis:
                    draw_bd_handpose_c(img_show,pts_2d_hand,0,0,2)
                    cv2.namedWindow("handpose_2d",0)
                    cv2.imshow("handpose_2d",img_show)

                #--------------------------------
                img_lbox,ratio, dw, dh = letterbox(img.copy(), height=ops.img_size[0], color=(0,0,0))
                # if ops.vis:
                #     cv2.namedWindow("letterbox",0)
                #     cv2.imshow("letterbox",img_lbox)

                #-------------------------------- get heatmap
                x1y1x2y2 = 0,0,0,0
                offset_x1,offset_y1 = 0,0
                hm,hm_w = get_heatmap(img_lbox.copy(),x1y1x2y2,pts_2d_hand,ratio, dw, dh,offset_x1,offset_y1,vis=False)
                if ops.vis:
                    cv2.namedWindow("hm_w",0)
                    cv2.imshow("hm_w",hm_w)

                #--------------------------------
                img_fix_size = img_lbox.astype(np.float32)

                img_fix_size_r = img_fix_size.astype(np.float32)
                img_fix_size_r = (img_fix_size_r-128.)/256.
                #--------------------------------------------------
                image_fusion = np.concatenate((img_fix_size_r,hm),axis=2)
                image_fusion = image_fusion.transpose(2, 0, 1)
                image_fusion = torch.from_numpy(image_fusion)
                image_fusion = image_fusion.unsqueeze_(0)
                if use_cuda:
                    image_fusion = image_fusion.cuda()  # (bs, channel, h, w)
                # print("image_fusion size : {}".format(image_fusion.size()))

                #--------------------------------  # handpose_3d predict
                pre_ = model_(image_fusion.float()) # 模型推理
                output = pre_.cpu().detach().numpy()
                output = np.squeeze(output)
                # print("handpose_3d output shape : {}".format(output.shape))

                pre_3d_joints = output.reshape((21,3))
                # print("pre_3d_joints shape : {}".format(pre_3d_joints.shape))

                if g_side == "left":
                    print("------------------->>. left")
                    pre_3d_joints[:,0] *=(-1.)
                pre_3d_joints = torch.tensor(pre_3d_joints).squeeze(0)
                pre_3d_joints= pre_3d_joints.cuda()
                # print(pre_3d_joints.size())
                #--------------------------------------------------------------------
                # now_uv = result['uv'].clone().detach().cpu().numpy()[0, 0]
                # now_uv = now_uv.astype(np.float)
                trans = np.zeros((1, 3))
                # trans[0, 0:2] = now_uv - 16.0
                trans = trans / 16.0
                new_tran = np.array([[trans[0, 1], trans[0, 0], trans[0, 2]]])
                pre_joints = pre_3d_joints.clone().detach().cpu().numpy()

                flited_joints = point_fliter.process(pre_joints)

                # fliter_ax.cla()
                #
                # filted_ax = vis.plot3d(flited_joints + new_tran, fliter_ax)
                # pre_useful_bone_len = bone.caculate_length(pre_joints, label="useful")

                pre_useful_bone_len = bone.caculate_length(pre_joints, label="useful")

                NGEN = 0 # PSO 迭代次数
                popsize = 100
                low = np.zeros((1, 10)) - 3.0
                up = np.zeros((1, 10)) - 2.0
                parameters = [NGEN, popsize, low, up]
                pso = PSO(parameters, pre_useful_bone_len.reshape((1, 15)),g_side)
                pso.main(solver)
                if True:#opt_shape is None:
                    opt_shape = pso.ng_best
                    opt_shape = shape_fliter.process(opt_shape)

                opt_tensor_shape = torch.tensor(opt_shape, dtype=torch.float)
                _, j3d_p0_ops = mano(pose0, opt_tensor_shape)
                template = j3d_p0_ops.cpu().numpy().squeeze(0) / 1000.0  # template, m 21*3
                ratio = np.linalg.norm(template[9] - template[0]) / np.linalg.norm(pre_joints[9] - pre_joints[0])
                j3d_pre_process = pre_joints * ratio  # template, m
                j3d_pre_process = j3d_pre_process - j3d_pre_process[0] + template[0]
                pose_R = AIK.adaptive_IK(template, j3d_pre_process)
                pose_R = torch.from_numpy(pose_R).float()
                #  reconstruction
                hand_verts, j3d_recon = mano(pose_R, opt_tensor_shape.float())
                hand_verts[:,:,:] = hand_verts[:,:,:]*(0.573)
                # print(j3d_recon.size())

                mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces)
                hand_verts = hand_verts.clone().detach().cpu().numpy()[0]
                hand_verts = mesh_fliter.process(hand_verts)
                hand_verts = np.matmul(view_mat, hand_verts.T).T
                if g_side == "right":
                    hand_verts[:, 0] = hand_verts[:, 0] - 40
                else:
                    hand_verts[:, 0] = hand_verts[:, 0] + 40
                hand_verts[:, 1] = hand_verts[:, 1] - 0
                mesh_tran = np.array([[-new_tran[0, 0], new_tran[0, 1], new_tran[0, 2]]])
                hand_verts = hand_verts - 100 * mesh_tran

                mesh.vertices = open3d.utility.Vector3dVector(hand_verts)
                # mesh.paint_uniform_color([252 / 255, 224 / 255, 203 / 255])
                # mesh.paint_uniform_color([238 / 255, 188 / 255, 158 / 255])
                mesh.paint_uniform_color([87 / 255, 131 / 255, 235 / 255])
                mesh.compute_triangle_normals()
                mesh.compute_vertex_normals()
                #-----------
                if pts_flag:
                    if False:
                        j3d_ = j3d_recon.detach().cpu().numpy()
                        j3d_[0][:,1] *=(-1.)
                        # j3d_[0][:,0] +=trans[0,0]
                        j3d_[0] = j3d_[0] - 100 * mesh_tran
                        j3d_[0][:,0] -=50
                        j3d_[0][:,1] -=30
                        # print(j3d_.shape,j3d_)
                        test_pcd.points = open3d.utility.Vector3dVector(j3d_[0])  # 定义点云坐标位置
                    else:
                        # test_pcd.points = open3d.utility.Vector3dVector(hand_verts)
                        pre_joints[:,1] *=-1.
                        pre_joints = pre_joints*70
                        pre_joints[:,1] -= 40
                        pre_joints[:,0] -= 0
                        # print("pre_joints",pre_joints.shape)
                        test_pcd.points = open3d.utility.Vector3dVector(pre_joints)
                        # test_pcd.points = open3d.utility.Vector3dVector(pre_joints[1,:].reshape(1,3))
                        # rgb = np.asarray([250,0,250])
                        # rgb_t = np.transpose(rgb)
                        # test_pcd.colors = open3d.utility.Vector3dVector(rgb_t.astype(np.float) / 255.0)
                # print("hand_verts shape",hand_verts)
                #-----------
                viewer.update_geometry(mesh)
                if pts_flag:
                    viewer.update_geometry(test_pcd)
                viewer.poll_events()
                viewer.update_renderer()
                #---------------------------------------------------------------
                image_open3d = viewer.capture_screen_float_buffer(False)
                # viewer.capture_screen_image("open3d.jpg", False)
                # depth = vis.capture_depth_float_buffer(False)
                image_3d = viewer.capture_screen_float_buffer(False)
                image_3d = np.asarray(image_3d)
                image_3d = image_3d*255
                image_3d = np.clip(image_3d,0,255)
                image_3d = image_3d.astype(np.uint8)
                image_3d = cv2.cvtColor(image_3d, cv2.COLOR_RGB2BGR)

                # print(image_3d.shape)
                mask_0 = np.where(image_3d[:,:,0]!=120,1,0)
                mask_1 = np.where(image_3d[:,:,1]!=120,1,0)
                mask_2 = np.where(image_3d[:,:,2]!=120,1,0)
                img_mask = np.logical_or(mask_0,mask_1)
                img_mask = np.logical_or(img_mask,mask_2)
                # cv2.namedWindow("img_mask",0)
                # cv2.imshow("img_mask",img_mask.astype(np.float))

                locs = np.where(img_mask != 0)
                xx1 = np.min(locs[1])
                xx2 = np.max(locs[1])
                yy1 = np.min(locs[0])
                yy2 = np.max(locs[0])
                # cv2.rectangle(image_3d, (xx1,yy1), (xx2,yy2), (255,0,255), 5) # 绘制image_3d
                model_hand_w = (xx2-xx1)
                model_hand_h = (yy2-yy1)
                #----------
                cv2.namedWindow("image_3d",0)
                cv2.imshow("image_3d",image_3d)
                scale_ = ((x_max-x_min)/(xx2-xx1) + (y_max-y_min)/(yy2-yy1))/2.*1.01
                w_3d_ = (xx2-xx1)*scale_
                h_3d_ = (yy2-yy1)*scale_
                x_mid_3d = (xx1+xx2)/2.
                y_mid_3d = (yy1+yy2)/2.

                x_mid,y_mid = int(x_mid),int(y_mid)
                x1,y1,x2,y2 = int(x_mid-w_3d_/2.),int(y_mid-h_3d_/2.),int(x_mid+w_3d_/2.),int(y_mid+h_3d_/2.)
                crop_ = image_3d[yy1:yy2,xx1:xx2]
                crop_mask = (img_mask[yy1:yy2,xx1:xx2].astype(np.float)*255).astype(np.uint8)
                w_r,h_r = int(crop_.shape[1]*scale_/2),int(crop_.shape[0]*scale_/2)
                crop_ = cv2.resize(crop_, (w_r*2, h_r*2))
                crop_mask = cv2.resize(crop_mask, (w_r*2, h_r*2))
                crop_mask = np.where(crop_mask[:,:]>0.,1.,0.)
                crop_mask = np.expand_dims(crop_mask, 2)

                try:
                    img_ff = img_yolo_x[int(y_mid - h_r ):int(y_mid + h_r ),int(x_mid - w_r ):int(x_mid + w_r ),:]*(1.-crop_mask) + crop_*crop_mask
                    img_yolo_x[int(y_mid - h_r ):int(y_mid + h_r ),int(x_mid - w_r ):int(x_mid + w_r ),:] = img_ff.astype(np.uint8)
                except:
                    continue

                real_hand_w = w_r*2
                real_hand_h = h_r*2
                depth_z = (model_hand_h/real_hand_h  + model_hand_w/real_hand_w)/2.# 相对深度 z
                #
                cv2.putText(img_yolo_x, " Relative Depth_Z :{:.3f} ".format(depth_z), (4,42),cv2.FONT_HERSHEY_DUPLEX, 1.1, (55, 0, 220),7)
                cv2.putText(img_yolo_x, " Relative Depth_Z :{:.3f} ".format(depth_z), (4,42),cv2.FONT_HERSHEY_DUPLEX, 1.1, (25, 180, 250),2)


                cv2.namedWindow("img_yolo_x",0)
                cv2.imshow("img_yolo_x",img_yolo_x)

                # x_mid = (x_max+x_min)/2
                # y_mid = (y_max+y_min)/2

                if cv2.waitKey(1) == 27:
                    break
            else:
                break

    cv2.destroyAllWindows()

    print('well done ')