add yolo detect 3d handpose pipeline

987841dd · Eric.Lee2021 · 4fc60d7c · 987841dd
隐藏空白更改
内联并排

Showing with 412 addition and 0 deletion

yolo_inference.py yolo_inference.py +412 -0

未找到文件。
--- a/yolo_inference.py
+++ b/yolo_inference.py
+#-*-coding:utf-8-*-
+# date:2021-06-15
+# Author: Eric.Lee
+# function: handpose 3D Inference
+
+import os
+import argparse
+import torch
+import torch.nn as nn
+import numpy as np
+
+import time
+import datetime
+import os
+import math
+from datetime import datetime
+import cv2
+import torch.nn.functional as F
+
+from models.resnet import resnet18,resnet34,resnet50,resnet101
+from e3d_data_iter.datasets import letterbox,get_heatmap
+import sys
+sys.path.append("./components/") # 添加模型组件路径
+from hand_keypoints.handpose_x import handpose_x_model,draw_bd_handpose_c
+from hand_detect.yolo_v3_hand import yolo_v3_hand_model
+
+from utils.common_utils import *
+import copy
+
+from utils import func, bone, AIK, smoother
+from utils.LM_new import LM_Solver
+from op_pso import PSO
+import open3d
+from mpl_toolkits.mplot3d import Axes3D
+from manopth import manolayer
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description=' Project Hand Pose 3D Inference')
+    parser.add_argument('--model_path', type=str, default = './model_exp/2021-06-16_10-53-50/resnet_50-size-256-loss-wing_loss-model_epoch-225.pth',
+        help = 'model_path') # 模型路径
+    parser.add_argument('--model', type=str, default = 'resnet_50',
+        help = '''model : resnet_18,resnet_34,resnet_50,resnet_101''') # 模型类型
+    parser.add_argument('--num_classes', type=int , default = 63,
+        help = 'num_classes') #  手部21关键点， (x,y)*2 = 42
+    parser.add_argument('--GPUS', type=str, default = '0',
+        help = 'GPUS') # GPU选择
+    parser.add_argument('--test_path', type=str, default = './image/',
+        help = 'test_path') # 测试图片路径
+    parser.add_argument('--img_size', type=tuple , default = (256,256),
+        help = 'img_size') # 输入模型图片尺寸
+    parser.add_argument('--vis', type=bool , default = True,
+        help = 'vis') # 是否可视化图片
+
+    print('\n/******************* {} ******************/\n'.format(parser.description))
+    #--------------------------------------------------------------------------
+    ops = parser.parse_args()# 解析添加参数
+    #--------------------------------------------------------------------------
+    print('----------------------------------')
+
+    unparsed = vars(ops) # parse_args()方法的返回值为namespace，用vars()内建函数化为字典
+    for key in unparsed.keys():
+        print('{} : {}'.format(key,unparsed[key]))
+
+    #---------------------------------------------------------------------------
+    os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS
+
+    test_path =  ops.test_path # 测试图片文件夹路径
+    #---------------------------------------------------------------- 构建模型
+    print('use model : %s'%(ops.model))
+
+    if ops.model == 'resnet_50':
+        model_ = resnet50(num_classes = ops.num_classes,img_size=ops.img_size[0])
+    elif ops.model == 'resnet_18':
+        model_ = resnet18(num_classes = ops.num_classes,img_size=ops.img_size[0])
+    elif ops.model == 'resnet_34':
+        model_ = resnet34(num_classes = ops.num_classes,img_size=ops.img_size[0])
+    elif ops.model == 'resnet_101':
+        model_ = resnet101(num_classes = ops.num_classes,img_size=ops.img_size[0])
+
+    use_cuda = torch.cuda.is_available()
+
+    device = torch.device("cuda:0" if use_cuda else "cpu")
+    model_ = model_.to(device)
+    model_.eval() # 设置为前向推断模式
+    # print(model_)# 打印模型结构
+    # 加载测试模型
+    if os.access(ops.model_path,os.F_OK):# checkpoint
+        chkpt = torch.load(ops.model_path, map_location=device)
+        model_.load_state_dict(chkpt)
+        print('load test model : {}'.format(ops.model_path))
+
+    #----------------- 构建 handpose_x 2D关键点检测模型
+    handpose_2d_model = handpose_x_model()
+    #----------------- 构建 yolo 检测模型
+    hand_detect_model = yolo_v3_hand_model()
+
+    #----------------- 构建 manopth
+    g_side = "right"
+    print('load model finished')
+    pose, shape = func.initiate("zero")
+    pre_useful_bone_len = np.zeros((1, 15)) # 骨架点信息
+    solver = LM_Solver(num_Iter=666, th_beta=shape.cpu(), th_pose=pose.cpu(), lb_target=pre_useful_bone_len,
+                       weight=1e-5)
+    pose0 = torch.eye(3).repeat(1, 16, 1, 1)
+
+    mano = manolayer.ManoLayer(flat_hand_mean=True,
+                               side=g_side,
+                               mano_root='./mano/models',
+                               use_pca=False,
+                               root_rot_mode='rotmat',
+                               joint_rot_mode='rotmat')
+    print('start ~')
+    point_fliter = smoother.OneEuroFilter(4.0, 0.0)
+    mesh_fliter = smoother.OneEuroFilter(4.0, 0.0)
+    shape_fliter = smoother.OneEuroFilter(1.5, 0.0)
+    #--------------------------- 配置点云
+    view_mat = np.array([[1.0, 0.0, 0.0],
+                         [0.0, -1.0, 0],
+                         [0.0, 0, -1.0]])
+    mesh = open3d.geometry.TriangleMesh()
+    hand_verts, j3d_recon = mano(pose0, shape.float())
+    mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces)
+    hand_verts = hand_verts.clone().detach().cpu().numpy()[0]
+    mesh.vertices = open3d.utility.Vector3dVector(hand_verts)
+    viewer = open3d.visualization.Visualizer()
+    viewer.create_window(width=800, height=800, window_name='HandPose3d_Mesh')
+    viewer.add_geometry(mesh)
+    viewer.update_renderer()
+    renderOptions = viewer.get_render_option ()
+    renderOptions.background_color = np.asarray([120/255,120/255,120/255]) # 设置背景颜色
+    # axis_pcd = open3d.create_mesh_coordinate_frame(size=0.5, origin=[0, 0, 0])
+
+    # vis.add_geometry(axis_pcd)
+    pts_flag = False
+    if pts_flag:
+        test_pcd = open3d.geometry.PointCloud()  # 定义点云
+        viewer.add_geometry(test_pcd)
+
+
+    print('start pose estimate')
+
+    pre_uv = None
+    shape_time = 0
+    opt_shape = None
+    shape_flag = True
+    #---------------------------------------------------------------- 预测图片
+
+
+    with torch.no_grad():
+        idx = 0
+        cap = cv2.VideoCapture(0) #一般usb默认相机号为 0，如果没有相机无法启动，如果相机不为0需要自行确定其编号。
+
+        while True:
+            ret, img_o = cap.read()# 获取相机图像
+            if ret == True:# 如果 ret 返回值为 True，显示图片
+                img_yolo_x = img_o.copy()
+                hand_bbox =hand_detect_model.predict(img_yolo_x,vis = False) # 检测手，获取手的边界框
+                if len(hand_bbox) == 1:
+                    x_min,y_min,x_max,y_max,_ = hand_bbox[0]
+
+                    w_ = max(abs(x_max-x_min),abs(y_max-y_min))
+                    w_ = w_*1.5
+                    x_mid = (x_max+x_min)/2
+                    y_mid = (y_max+y_min)/2
+                    #
+                    x1,y1,x2,y2 = int(x_mid-w_/2),int(y_mid-w_/2),int(x_mid+w_/2),int(y_mid+w_/2)
+                    #
+                    x1 = int(np.clip(x1,0,img_o.shape[1]-1))
+                    y1 = int(np.clip(y1,0,img_o.shape[0]-1))
+                    x2 = int(np.clip(x2,0,img_o.shape[1]-1))
+                    y2 = int(np.clip(y2,0,img_o.shape[0]-1))
+
+                    img = img_o[y1:y2,x1:x2]
+                else:
+                    continue
+                #--------------------------------
+                img_show = img.copy() # 用于显示使用
+                pts_2d_ = handpose_2d_model.predict(img.copy()) # handpose_2d predict
+                pts_2d_hand = {}
+                for ptk in range(int(pts_2d_.shape[0]/2)):
+
+                    xh = pts_2d_[ptk*2+0]*float(img.shape[1])
+                    yh = pts_2d_[ptk*2+1]*float(img.shape[0])
+                    pts_2d_hand[str(ptk)] = {
+                        "x":xh,
+                        "y":yh,
+                        }
+                    if ops.vis:
+                        cv2.circle(img_show, (int(xh),int(yh)), 4, (255,50,60),-1)
+                        cv2.circle(img_show, (int(xh),int(yh)), 3, (25,160,255),-1)
+                if ops.vis:
+                    draw_bd_handpose_c(img_show,pts_2d_hand,0,0,2)
+                    cv2.namedWindow("handpose_2d",0)
+                    cv2.imshow("handpose_2d",img_show)
+
+                #--------------------------------
+                img_lbox,ratio, dw, dh = letterbox(img.copy(), height=ops.img_size[0], color=(0,0,0))
+                if ops.vis:
+                    cv2.namedWindow("letterbox",0)
+                    cv2.imshow("letterbox",img_lbox)
+
+                #-------------------------------- get heatmap
+                x1y1x2y2 = 0,0,0,0
+                offset_x1,offset_y1 = 0,0
+                hm,hm_w = get_heatmap(img_lbox.copy(),x1y1x2y2,pts_2d_hand,ratio, dw, dh,offset_x1,offset_y1,vis=False)
+                if ops.vis:
+                    cv2.namedWindow("hm_w",0)
+                    cv2.imshow("hm_w",hm_w)
+
+                #--------------------------------
+                img_fix_size = img_lbox.astype(np.float32)
+
+                img_fix_size_r = img_fix_size.astype(np.float32)
+                img_fix_size_r = (img_fix_size_r-128.)/256.
+                #--------------------------------------------------
+                image_fusion = np.concatenate((img_fix_size_r,hm),axis=2)
+                image_fusion = image_fusion.transpose(2, 0, 1)
+                image_fusion = torch.from_numpy(image_fusion)
+                image_fusion = image_fusion.unsqueeze_(0)
+                if use_cuda:
+                    image_fusion = image_fusion.cuda()  # (bs, channel, h, w)
+                print("image_fusion size : {}".format(image_fusion.size()))
+
+
+                #--------------------------------  # handpose_3d predict
+                pre_ = model_(image_fusion.float()) # 模型推理
+                output = pre_.cpu().detach().numpy()
+                output = np.squeeze(output)
+                print("handpose_3d output shape : {}".format(output.shape))
+
+                pre_3d_joints = output.reshape((21,3))
+                print("pre_3d_joints shape : {}".format(pre_3d_joints.shape))
+
+                if g_side == "left":
+                    print("------------------->>. left")
+                    pre_3d_joints[:,0] *=(-1.)
+                pre_3d_joints = torch.tensor(pre_3d_joints).squeeze(0)
+                pre_3d_joints= pre_3d_joints.cuda()
+                print(pre_3d_joints.size())
+                #--------------------------------------------------------------------
+                # now_uv = result['uv'].clone().detach().cpu().numpy()[0, 0]
+                # now_uv = now_uv.astype(np.float)
+                trans = np.zeros((1, 3))
+                # trans[0, 0:2] = now_uv - 16.0
+                trans = trans / 16.0
+                new_tran = np.array([[trans[0, 1], trans[0, 0], trans[0, 2]]])
+                pre_joints = pre_3d_joints.clone().detach().cpu().numpy()
+
+                flited_joints = point_fliter.process(pre_joints)
+
+                # fliter_ax.cla()
+                #
+                # filted_ax = vis.plot3d(flited_joints + new_tran, fliter_ax)
+                pre_useful_bone_len = bone.caculate_length(pre_joints, label="useful")
+
+                NGEN = 0 # PSO 迭代次数
+                popsize = 100
+                low = np.zeros((1, 10)) - 3.0
+                up = np.zeros((1, 10)) - 2.0
+                parameters = [NGEN, popsize, low, up]
+                pso = PSO(parameters, pre_useful_bone_len.reshape((1, 15)),g_side)
+                pso.main(solver)
+                if True:#opt_shape is None:
+                    opt_shape = pso.ng_best
+                    opt_shape = shape_fliter.process(opt_shape)
+
+                opt_tensor_shape = torch.tensor(opt_shape, dtype=torch.float)
+                _, j3d_p0_ops = mano(pose0, opt_tensor_shape)
+                template = j3d_p0_ops.cpu().numpy().squeeze(0) / 1000.0  # template, m 21*3
+                ratio = np.linalg.norm(template[9] - template[0]) / np.linalg.norm(pre_joints[9] - pre_joints[0])
+                j3d_pre_process = pre_joints * ratio  # template, m
+                j3d_pre_process = j3d_pre_process - j3d_pre_process[0] + template[0]
+                pose_R = AIK.adaptive_IK(template, j3d_pre_process)
+                pose_R = torch.from_numpy(pose_R).float()
+                #  reconstruction
+                hand_verts, j3d_recon = mano(pose_R, opt_tensor_shape.float())
+                hand_verts[:,:,:] = hand_verts[:,:,:]*(0.55)
+                # print(j3d_recon.size())
+
+                mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces)
+                hand_verts = hand_verts.clone().detach().cpu().numpy()[0]
+                hand_verts = mesh_fliter.process(hand_verts)
+                hand_verts = np.matmul(view_mat, hand_verts.T).T
+                if g_side == "right":
+                    hand_verts[:, 0] = hand_verts[:, 0] - 40
+                else:
+                    hand_verts[:, 0] = hand_verts[:, 0] + 40
+                hand_verts[:, 1] = hand_verts[:, 1] - 0
+                mesh_tran = np.array([[-new_tran[0, 0], new_tran[0, 1], new_tran[0, 2]]])
+                hand_verts = hand_verts - 100 * mesh_tran
+
+                mesh.vertices = open3d.utility.Vector3dVector(hand_verts)
+                # mesh.paint_uniform_color([252 / 255, 224 / 255, 203 / 255])
+                # mesh.paint_uniform_color([238 / 255, 188 / 255, 158 / 255])
+                mesh.paint_uniform_color([87 / 255, 131 / 255, 235 / 255])
+                mesh.compute_triangle_normals()
+                mesh.compute_vertex_normals()
+                #-----------
+                if pts_flag:
+                    if False:
+                        j3d_ = j3d_recon.detach().cpu().numpy()
+                        j3d_[0][:,1] *=(-1.)
+                        # j3d_[0][:,0] +=trans[0,0]
+                        j3d_[0] = j3d_[0] - 100 * mesh_tran
+                        j3d_[0][:,0] -=50
+                        j3d_[0][:,1] -=30
+                        # print(j3d_.shape,j3d_)
+                        test_pcd.points = open3d.utility.Vector3dVector(j3d_[0])  # 定义点云坐标位置
+                    else:
+                        # test_pcd.points = open3d.utility.Vector3dVector(hand_verts)
+                        pre_joints[:,1] *=-1.
+                        pre_joints = pre_joints*70
+                        pre_joints[:,1] -= 40
+                        pre_joints[:,0] -= 0
+                        print("pre_joints",pre_joints.shape)
+                        test_pcd.points = open3d.utility.Vector3dVector(pre_joints)
+                        # test_pcd.points = open3d.utility.Vector3dVector(pre_joints[1,:].reshape(1,3))
+                        # rgb = np.asarray([250,0,250])
+                        # rgb_t = np.transpose(rgb)
+                        # test_pcd.colors = open3d.utility.Vector3dVector(rgb_t.astype(np.float) / 255.0)
+                # print("hand_verts shape",hand_verts)
+                #-----------
+                viewer.update_geometry(mesh)
+                if pts_flag:
+                    viewer.update_geometry(test_pcd)
+                viewer.poll_events()
+                viewer.update_renderer()
+                #---------------------------------------------------------------
+                image_open3d = viewer.capture_screen_float_buffer(False)
+                # viewer.capture_screen_image("open3d.jpg", False)
+                # depth = vis.capture_depth_float_buffer(False)
+                image_3d = viewer.capture_screen_float_buffer(False)
+                image_3d = np.asarray(image_3d)
+                image_3d = image_3d*255
+                image_3d = np.clip(image_3d,0,255)
+                image_3d = image_3d.astype(np.uint8)
+                image_3d = cv2.cvtColor(image_3d, cv2.COLOR_RGB2BGR)
+
+                # print(image_3d.shape)
+                mask_0 = np.where(image_3d[:,:,0]!=120,1,0)
+                mask_1 = np.where(image_3d[:,:,1]!=120,1,0)
+                mask_2 = np.where(image_3d[:,:,2]!=120,1,0)
+                img_mask = np.logical_or(mask_0,mask_1)
+                img_mask = np.logical_or(img_mask,mask_2)
+                cv2.namedWindow("img_mask",0)
+                cv2.imshow("img_mask",img_mask.astype(np.float))
+
+                locs = np.where(img_mask != 0)
+                xx1 = np.min(locs[1])
+                xx2 = np.max(locs[1])
+                yy1 = np.min(locs[0])
+                yy2 = np.max(locs[0])
+                # cv2.rectangle(image_3d, (xx1,yy1), (xx2,yy2), (255,0,255), 5) # 绘制image_3d
+                model_hand_w = (xx2-xx1)
+                model_hand_h = (yy2-yy1)
+                #----------
+                cv2.namedWindow("image_3d",0)
+                cv2.imshow("image_3d",image_3d)
+                scale_ = ((x_max-x_min)/(xx2-xx1) + (y_max-y_min)/(yy2-yy1))/2.
+                w_3d_ = (xx2-xx1)*scale_
+                h_3d_ = (yy2-yy1)*scale_
+                x_mid_3d = (xx1+xx2)/2.
+                y_mid_3d = (yy1+yy2)/2.
+
+                x_mid,y_mid = int(x_mid),int(y_mid)
+                x1,y1,x2,y2 = int(x_mid-w_3d_/2.),int(y_mid-h_3d_/2.),int(x_mid+w_3d_/2.),int(y_mid+h_3d_/2.)
+                crop_ = image_3d[yy1:yy2,xx1:xx2]
+                crop_mask = (img_mask[yy1:yy2,xx1:xx2].astype(np.float)*255).astype(np.uint8)
+                w_r,h_r = int(crop_.shape[1]*scale_/2),int(crop_.shape[0]*scale_/2)
+                crop_ = cv2.resize(crop_, (w_r*2, h_r*2))
+                crop_mask = cv2.resize(crop_mask, (w_r*2, h_r*2))
+                crop_mask = np.where(crop_mask[:,:]>0.,1.,0.)
+                crop_mask = np.expand_dims(crop_mask, 2)
+
+                # print("-----------------------------ppppppppppppppwwww : ",crop_.shape,crop_mask.shape,h_r,w_r)
+                try:
+                    img_ff = img_yolo_x[int(y_mid - h_r ):int(y_mid + h_r ),int(x_mid - w_r ):int(x_mid + w_r ),:]*(1.-crop_mask) + crop_*crop_mask
+                    img_yolo_x[int(y_mid - h_r ):int(y_mid + h_r ),int(x_mid - w_r ):int(x_mid + w_r ),:] = img_ff.astype(np.uint8)
+                except:
+                    continue
+
+                real_hand_w = w_r*2
+                real_hand_h = h_r*2
+                depth_z = (model_hand_h/real_hand_h  + model_hand_w/real_hand_w)/2.# 相对深度 z
+                #
+                # ff_img = (img_yolo_x[int(y_mid - h_r ):int(y_mid + h_r ),int(x_mid - w_r ):int(x_mid + w_r ),:]*(1.-crop_mask)
+                #     + crop_mask *  crop_)
+                # img_yolo_x[int(y_mid - h_r ):int(y_mid + h_r ),int(x_mid - w_r ):int(x_mid + w_r ),:]
+                # cv2.namedWindow("crop_mask",0)
+                # cv2.imshow("crop_mask",crop_mask)
+
+                # cv2.rectangle(img_yolo_x, (x1,y1),(x2,y2), (0,255,0), 5) # 绘制空心矩形
+                # print("scale : {}".format(scale_))
+                # cv2.rectangle(image_3d, (xx1,yy1), (xx2,yy2), (255,0,255), 5) # 绘制空心矩形
+                cv2.putText(img_yolo_x, " Relative Depth_Z :{:.3f} ".format(depth_z), (4,42),cv2.FONT_HERSHEY_DUPLEX, 1.1, (55, 0, 220),7)
+                cv2.putText(img_yolo_x, " Relative Depth_Z :{:.3f} ".format(depth_z), (4,42),cv2.FONT_HERSHEY_DUPLEX, 1.1, (25, 180, 250),2)
+
+
+                cv2.namedWindow("img_yolo_x",0)
+                cv2.imshow("img_yolo_x",img_yolo_x)
+
+                # x_mid = (x_max+x_min)/2
+                # y_mid = (y_max+y_min)/2
+
+                if cv2.waitKey(1) == 27:
+                    break
+            else:
+                break
+
+    cv2.destroyAllWindows()
+
+    print('well done ')