diff --git a/config.py b/config.py new file mode 100644 index 0000000000000000000000000000000000000000..f4f0ce168e2196f69cd1d2995a1d6105e9efbb4e --- /dev/null +++ b/config.py @@ -0,0 +1,132 @@ +DEPTH_RANGE = 3.0 +DEPTH_MIN = -1.5 + +stb_joints = [ + 'loc_bn_palm_L', + 'loc_bn_pinky_L_01', + 'loc_bn_pinky_L_02', + 'loc_bn_pinky_L_03', + 'loc_bn_pinky_L_04', + 'loc_bn_ring_L_01', + 'loc_bn_ring_L_02', + 'loc_bn_ring_L_03', + 'loc_bn_ring_L_04', + 'loc_bn_mid_L_01', + 'loc_bn_mid_L_02', + 'loc_bn_mid_L_03', + 'loc_bn_mid_L_04', + 'loc_bn_index_L_01', + 'loc_bn_index_L_02', + 'loc_bn_index_L_03', + 'loc_bn_index_L_04', + 'loc_bn_thumb_L_01', + 'loc_bn_thumb_L_02', + 'loc_bn_thumb_L_03', + 'loc_bn_thumb_L_04', +] + +rhd_joints = [ + 'loc_bn_palm_L', + 'loc_bn_thumb_L_04', + 'loc_bn_thumb_L_03', + 'loc_bn_thumb_L_02', + 'loc_bn_thumb_L_01', + 'loc_bn_index_L_04', + 'loc_bn_index_L_03', + 'loc_bn_index_L_02', + 'loc_bn_index_L_01', + 'loc_bn_mid_L_04', + 'loc_bn_mid_L_03', + 'loc_bn_mid_L_02', + 'loc_bn_mid_L_01', + 'loc_bn_ring_L_04', + 'loc_bn_ring_L_03', + 'loc_bn_ring_L_02', + 'loc_bn_ring_L_01', + 'loc_bn_pinky_L_04', + 'loc_bn_pinky_L_03', + 'loc_bn_pinky_L_02', + 'loc_bn_pinky_L_01' +] + +snap_joint_names = [ + 'loc_bn_palm_L', + 'loc_bn_thumb_L_01', + 'loc_bn_thumb_L_02', + 'loc_bn_thumb_L_03', + 'loc_bn_thumb_L_04', + 'loc_bn_index_L_01', + 'loc_bn_index_L_02', + 'loc_bn_index_L_03', + 'loc_bn_index_L_04', + 'loc_bn_mid_L_01', + 'loc_bn_mid_L_02', + 'loc_bn_mid_L_03', + 'loc_bn_mid_L_04', + 'loc_bn_ring_L_01', + 'loc_bn_ring_L_02', + 'loc_bn_ring_L_03', + 'loc_bn_ring_L_04', + 'loc_bn_pinky_L_01', + 'loc_bn_pinky_L_02', + 'loc_bn_pinky_L_03', + 'loc_bn_pinky_L_04' +] + +SNAP_BONES = [ + (0, 1, 2, 3, 4), + (0, 5, 6, 7, 8), + (0, 9, 10, 11, 12), + (0, 13, 14, 15, 16), + (0, 17, 18, 19, 20) +] + +SNAP_PARENT = [ + 0, # 0's parent + 0, # 1's parent + 1, + 2, + 3, + 0, # 5's parent + 5, + 6, + 7, + 0, # 9's parent + 9, + 10, + 11, + 0, # 13's parent + 13, + 14, + 15, + 0, # 17's parent + 17, + 18, + 19, +] + +JOINT_COLORS = ( + (216, 31, 53), + (214, 208, 0), + (136, 72, 152), + (126, 199, 216), + (0, 0, 230), +) + +DEFAULT_CACHE_DIR = 'datasets/data/.cache' + +USEFUL_BONE = [1, 2, 3, + 5, 6, 7, + 9, 10, 11, + 13, 14, 15, + 17, 18, 19] + +kinematic_tree = [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19, 20] + +ID2ROT = { + 2: 13, 3: 14, 4: 15, + 6: 1, 7: 2, 8: 3, + 10: 4, 11: 5, 12: 6, + 14: 10, 15: 11, 16: 12, + 18: 7, 19: 8, 20: 9, + } \ No newline at end of file diff --git a/e3d_data_iter/datasets.py b/e3d_data_iter/datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..f9296a598073bebe7e15253225e24fd2d25b4a31 --- /dev/null +++ b/e3d_data_iter/datasets.py @@ -0,0 +1,553 @@ +#-*-coding:utf-8-*- +# date:2021-06-15 +# Author: Eric.Lee +# function: easy 3d handpose data iter +import glob +import math +import os +import random + +from tqdm import tqdm +import cv2 +import numpy as np +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +import json +#---------------------- +import torch +from manopth import manolayer +# from model.detnet import detnet +from utils import func, bone, AIK, smoother +from utils.LM_new import LM_Solver +import numpy as np +import matplotlib.pyplot as plt +from utils import vis +from op_pso import PSO +import open3d +from mpl_toolkits.mplot3d import Axes3D +import time +#---------------------- + +def draw_handpose_2d(img_,hand_,x,y,thick = 3): + # thick = 2 + colors = [(0,215,255),(255,115,55),(5,255,55),(25,15,255),(225,15,55)] + # + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['1']['x']+x), int(hand_['1']['y']+y)), colors[0], thick) + cv2.line(img_, (int(hand_['1']['x']+x), int(hand_['1']['y']+y)),(int(hand_['2']['x']+x), int(hand_['2']['y']+y)), colors[0], thick) + cv2.line(img_, (int(hand_['2']['x']+x), int(hand_['2']['y']+y)),(int(hand_['3']['x']+x), int(hand_['3']['y']+y)), colors[0], thick) + cv2.line(img_, (int(hand_['3']['x']+x), int(hand_['3']['y']+y)),(int(hand_['4']['x']+x), int(hand_['4']['y']+y)), colors[0], thick) + + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['5']['x']+x), int(hand_['5']['y']+y)), colors[1], thick) + cv2.line(img_, (int(hand_['5']['x']+x), int(hand_['5']['y']+y)),(int(hand_['6']['x']+x), int(hand_['6']['y']+y)), colors[1], thick) + cv2.line(img_, (int(hand_['6']['x']+x), int(hand_['6']['y']+y)),(int(hand_['7']['x']+x), int(hand_['7']['y']+y)), colors[1], thick) + cv2.line(img_, (int(hand_['7']['x']+x), int(hand_['7']['y']+y)),(int(hand_['8']['x']+x), int(hand_['8']['y']+y)), colors[1], thick) + + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['9']['x']+x), int(hand_['9']['y']+y)), colors[2], thick) + cv2.line(img_, (int(hand_['9']['x']+x), int(hand_['9']['y']+y)),(int(hand_['10']['x']+x), int(hand_['10']['y']+y)), colors[2], thick) + cv2.line(img_, (int(hand_['10']['x']+x), int(hand_['10']['y']+y)),(int(hand_['11']['x']+x), int(hand_['11']['y']+y)), colors[2], thick) + cv2.line(img_, (int(hand_['11']['x']+x), int(hand_['11']['y']+y)),(int(hand_['12']['x']+x), int(hand_['12']['y']+y)), colors[2], thick) + + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['13']['x']+x), int(hand_['13']['y']+y)), colors[3], thick) + cv2.line(img_, (int(hand_['13']['x']+x), int(hand_['13']['y']+y)),(int(hand_['14']['x']+x), int(hand_['14']['y']+y)), colors[3], thick) + cv2.line(img_, (int(hand_['14']['x']+x), int(hand_['14']['y']+y)),(int(hand_['15']['x']+x), int(hand_['15']['y']+y)), colors[3], thick) + cv2.line(img_, (int(hand_['15']['x']+x), int(hand_['15']['y']+y)),(int(hand_['16']['x']+x), int(hand_['16']['y']+y)), colors[3], thick) + + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['17']['x']+x), int(hand_['17']['y']+y)), colors[4], thick) + cv2.line(img_, (int(hand_['17']['x']+x), int(hand_['17']['y']+y)),(int(hand_['18']['x']+x), int(hand_['18']['y']+y)), colors[4], thick) + cv2.line(img_, (int(hand_['18']['x']+x), int(hand_['18']['y']+y)),(int(hand_['19']['x']+x), int(hand_['19']['y']+y)), colors[4], thick) + cv2.line(img_, (int(hand_['19']['x']+x), int(hand_['19']['y']+y)),(int(hand_['20']['x']+x), int(hand_['20']['y']+y)), colors[4], thick) + +def img_agu_channel_same(img_): + img_a = np.zeros(img_.shape, dtype = np.uint8) + gray = cv2.cvtColor(img_,cv2.COLOR_RGB2GRAY) + img_a[:,:,0] =gray + img_a[:,:,1] =gray + img_a[:,:,2] =gray + + return img_a +# 图像白化 +def prewhiten(x): + mean = np.mean(x) + std = np.std(x) + std_adj = np.maximum(std, 1.0 / np.sqrt(x.size)) + y = np.multiply(np.subtract(x, mean), 1 / std_adj) + return y + +# 图像亮度、对比度增强 +def contrast_img(img, c, b): # 亮度就是每个像素所有通道都加上b + rows, cols, channels = img.shape + # 新建全零(黑色)图片数组:np.zeros(img1.shape, dtype=uint8) + blank = np.zeros([rows, cols, channels], img.dtype) + dst = cv2.addWeighted(img, c, blank, 1-c, b) + return dst + +def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)): + # Resize a rectangular image to a padded square + shape = img.shape[:2] # shape = [height, width] + ratio = float(height) / max(shape) # ratio = old / new + new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) + dw = (height - new_shape[0]) / 2 # width padding + dh = (height - new_shape[1]) / 2 # height padding + top, bottom = round(dh - 0.1), round(dh + 0.1) + left, right = round(dw - 0.1), round(dw + 0.1) + # resize img + if augment: + interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, + None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, + cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]) + if interpolation is None: + img = cv2.resize(img, new_shape) + else: + img = cv2.resize(img, new_shape, interpolation=interpolation) + else: + img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST) + # print("resize time:",time.time()-s1) + + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square + return img, ratio, dw, dh + +def draw_umich_gaussian(heatmap, center, radius, k=1): + diameter = 2 * radius + 1 + gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) + + x, y = int(center[0]), int(center[1]) + + height, width = heatmap.shape[0:2] + + left, right = min(x, radius), min(width - x, radius + 1) + top, bottom = min(y, radius), min(height - y, radius + 1) + + masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] + masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] + if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug + np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) + return heatmap + +def gaussian2D(shape, sigma=1): + m, n = [(ss - 1.) / 2. for ss in shape] + y, x = np.ogrid[-m:m+1,-n:n+1] + + h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) + h[h < np.finfo(h.dtype).eps * h.max()] = 0 + return h + +def draw_msra_gaussian(heatmap, center, sigma): + tmp_size = sigma * 3 + mu_x = int(center[0] + 0.5) + mu_y = int(center[1] + 0.5) + w, h = heatmap.shape[0], heatmap.shape[1] + ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] + br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] + if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0: + return heatmap + size = 2 * tmp_size + 1 + x = np.arange(0, size, 1, np.float32) + y = x[:, np.newaxis] + x0 = y0 = size // 2 + g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) + g_x = max(0, -ul[0]), min(br[0], h) - ul[0] + g_y = max(0, -ul[1]), min(br[1], w) - ul[1] + img_x = max(0, ul[0]), min(br[0], h) + img_y = max(0, ul[1]), min(br[1], w) + heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum( + heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]], + g[g_y[0]:g_y[1], g_x[0]:g_x[1]]) + return heatmap +def get_heatmap(img_fix_size,x1y1x2y2,handpose_2d,ratio, dw, dh,offset_x1=0,offset_y1=0,radius=20,vis = False): + num_objs = 21 + hm = np.zeros((256,256,num_objs), dtype=np.float32) + draw_gaussian = draw_msra_gaussian if False else draw_umich_gaussian + + for k in range(num_objs): + x,y = (handpose_2d[str(k)]["x"]-offset_x1)*ratio+round(dw - 0.1),(handpose_2d[str(k)]["y"]-offset_y1)*ratio+round(dh - 0.1) + + draw_gaussian(hm[:,:,k], (x,y), radius) + + if vis: + # print("x,y : ",x,y) + cv2.namedWindow("hm",0) + cv2.imshow("hm",hm[:,:,k]) + cv2.circle(img_fix_size, (int(x),int(y)), 3, (250,60,255),-1) + cv2.namedWindow("fix_size",0) + cv2.imshow("fix_size",img_fix_size) + cv2.waitKey(1) + # print("------------------------>>>") + hm_w = hm.max(axis=2) + if vis: + cv2.namedWindow("hm_w",0) + cv2.imshow("hm_w",hm_w) + # cv2.waitKey(1) + # print(hm_w.size) + return hm,hm_w +class LoadImagesAndLabels(Dataset): + def __init__(self, ops, img_size=(256,256), flag_agu = False,vis = False): + print('img_size (height,width) : ',img_size[0],img_size[1]) + print("train_path : {}".format(ops.train_path)) + g_side = "right" + path = ops.train_path + + #----------------------- + if vis: + pose, shape = func.initiate("zero") + pre_useful_bone_len = np.zeros((1, 15)) # 骨架点信息 + solver = LM_Solver(num_Iter=666, th_beta=shape.cpu(), th_pose=pose.cpu(), lb_target=pre_useful_bone_len, + weight=1e-5) + pose0 = torch.eye(3).repeat(1, 16, 1, 1) + + mano = manolayer.ManoLayer(flat_hand_mean=True, + side=g_side, + mano_root='./mano/models', + use_pca=False, + root_rot_mode='rotmat', + joint_rot_mode='rotmat') + print('start ~') + point_fliter = smoother.OneEuroFilter(4.0, 0.0) + mesh_fliter = smoother.OneEuroFilter(4.0, 0.0) + shape_fliter = smoother.OneEuroFilter(1.5, 0.0) + #--------------------------- 配置点云 + view_mat = np.array([[1.0, 0.0, 0.0], + [0.0, -1.0, 0], + [0.0, 0, -1.0]]) + mesh = open3d.geometry.TriangleMesh() + hand_verts, j3d_recon = mano(pose0, shape.float()) + mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces) + hand_verts = hand_verts.clone().detach().cpu().numpy()[0] + mesh.vertices = open3d.utility.Vector3dVector(hand_verts) + viewer = open3d.visualization.Visualizer() + viewer.create_window(width=640, height=640, window_name='HandPose3d_Mesh') + viewer.add_geometry(mesh) + viewer.update_renderer() + renderOptions = viewer.get_render_option () + renderOptions.background_color = np.asarray([120/255,120/255,120/255]) # 设置背景颜色 + # axis_pcd = open3d.create_mesh_coordinate_frame(size=0.5, origin=[0, 0, 0]) + + # vis.add_geometry(axis_pcd) + pts_flag = True + if pts_flag: + test_pcd = open3d.geometry.PointCloud() # 定义点云 + viewer.add_geometry(test_pcd) + + + print('start pose estimate') + + pre_uv = None + shape_time = 0 + opt_shape = None + shape_flag = True + #----------------------------------------------------------------------- + file_list = [] + label_list = [] + bbox_list = [] + handpose_2d_x1y1x2y2_list = [] + handpose_2d_pts_hand_list = [] + handpose_3d_xyz_list = [] + idx = 0 + for f_ in os.listdir(path): + if ".jpg" in f_: + thr = 0 + num_ = int(f_.split("_")[-1].replace(".jpg","")) + file_img = path + f_ + file_json = file_img.replace("_{}.jpg".format(num_),"_{}.json".format(num_+thr)) + if not os.access(file_json,os.F_OK): + continue + #----------------------------- + file_list.append(file_img) + label_list.append(file_json) + #----------------------------- + # print(file_json) + f = open(file_json, encoding='utf-8')#读取 json文件 + dict_x = json.load(f) + f.close() + # print(dict_x) + #-------------------- + if vis: + img = cv2.imread(file_img) + if g_side == "left": + img = cv2.flip(img,1) + bbox = dict_x["bbox"] + handpose_2d = dict_x["handpose_2d"] + #----------------- + x1_,y1_,x2_,y2_ = handpose_2d["x1y1x2y2"] + x1_,y1_,x2_,y2_ = int(x1_),int(y1_),int(x2_),int(y2_) + gt_3d_joints = dict_x["handpose_3d_xyz"] + # + handpose_2d_x1y1x2y2_list.append((x1_,y1_,x2_,y2_)) + handpose_2d_pts_hand_list.append(handpose_2d["pts_hand"]) + handpose_3d_xyz_list.append(gt_3d_joints) + if vis: + img_fix_size,ratio, dw, dh = letterbox(img[y1_:y2_,x1_:x2_], height=img_size[0], color=(0,0,0)) + + hm,hm_w = get_heatmap(img_fix_size,handpose_2d["x1y1x2y2"],handpose_2d["pts_hand"],ratio, dw, dh,vis=False) + + cv2.namedWindow("fix_size",0) + cv2.imshow("fix_size",img_fix_size) + + hm_w = np.expand_dims(hm_w,2) + + print("hm.shape : {}".format(hm.shape)) + print("hm_w.shape : {}".format(hm_w.shape)) + print("img_fix_size.shape : {}".format(img_fix_size.shape)) + img_fix_size_r = img_fix_size.astype(np.float32) + img_fix_size_r = (img_fix_size_r-128.)/256. + #-------------------------------------------------- + image_fusion = np.concatenate((img_fix_size_r,hm),axis=2) + print(" A image_fusion.shape : {}".format(image_fusion.shape)) + + image_fusion = image_fusion.transpose(2, 0, 1) + print(" B image_fusion.shape : {}".format(image_fusion.shape)) + + image_fusion = np.expand_dims(image_fusion,0) + print(" C image_fusion.shape : {}".format(image_fusion.shape)) + + # img_fix_size_r = np.expand_dims(img_fix_size_r,0) + # print(hm.shape ," 《《-------------》》",img_fix_size_r.shape) + # #-------------------------------------------------- + # image_fusion = np.concatenate((img_fix_size_r,hm_w),axis=0) + + + #----------------- + cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])), (255,0,255), 5) # 绘制空心矩形 + + pts_hand2d = handpose_2d["pts_hand"] + draw_handpose_2d(img,pts_hand2d,x1_,y1_,2) + + #--------------------------------- + gt_3d_joints= np.array(gt_3d_joints) + print(gt_3d_joints.shape) + if g_side == "left": + print("------------------->>. left") + gt_3d_joints[:,0] *=(-1.) + gt_3d_joints = torch.tensor(gt_3d_joints).squeeze(0) + gt_3d_joints= gt_3d_joints.cuda() + print(gt_3d_joints.size()) + #------------------------------ + # now_uv = result['uv'].clone().detach().cpu().numpy()[0, 0] + # now_uv = now_uv.astype(np.float) + trans = np.zeros((1, 3)) + # trans[0, 0:2] = now_uv - 16.0 + trans = trans / 16.0 + new_tran = np.array([[trans[0, 1], trans[0, 0], trans[0, 2]]]) + gt_3d_joints = gt_3d_joints.clone().detach().cpu().numpy() + + flited_joints = point_fliter.process(gt_3d_joints) + + # fliter_ax.cla() + # + # filted_ax = vis.plot3d(flited_joints + new_tran, fliter_ax) + pre_useful_bone_len = bone.caculate_length(gt_3d_joints, label="useful") + + NGEN = 0 # PSO 迭代次数 + popsize = 100 + low = np.zeros((1, 10)) - 3.0 + up = np.zeros((1, 10)) - 2.0 + parameters = [NGEN, popsize, low, up] + pso = PSO(parameters, pre_useful_bone_len.reshape((1, 15)),g_side) + pso.main(solver) + if True:#opt_shape is None: + opt_shape = pso.ng_best + opt_shape = shape_fliter.process(opt_shape) + + opt_tensor_shape = torch.tensor(opt_shape, dtype=torch.float) + _, j3d_p0_ops = mano(pose0, opt_tensor_shape) + template = j3d_p0_ops.cpu().numpy().squeeze(0) / 1000.0 # template, m 21*3 + ratio = np.linalg.norm(template[9] - template[0]) / np.linalg.norm(gt_3d_joints[9] - gt_3d_joints[0]) + j3d_pre_process = gt_3d_joints * ratio # template, m + j3d_pre_process = j3d_pre_process - j3d_pre_process[0] + template[0] + pose_R = AIK.adaptive_IK(template, j3d_pre_process) + pose_R = torch.from_numpy(pose_R).float() + # reconstruction + hand_verts, j3d_recon = mano(pose_R, opt_tensor_shape.float()) + hand_verts[:,:,:] = hand_verts[:,:,:]*(0.85) + # print(j3d_recon.size()) + + mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces) + hand_verts = hand_verts.clone().detach().cpu().numpy()[0] + hand_verts = mesh_fliter.process(hand_verts) + hand_verts = np.matmul(view_mat, hand_verts.T).T + if g_side == "right": + hand_verts[:, 0] = hand_verts[:, 0] - 80 + else: + hand_verts[:, 0] = hand_verts[:, 0] + 80 + hand_verts[:, 1] = hand_verts[:, 1] - 0 + mesh_tran = np.array([[-new_tran[0, 0], new_tran[0, 1], new_tran[0, 2]]]) + hand_verts = hand_verts - 100 * mesh_tran + + mesh.vertices = open3d.utility.Vector3dVector(hand_verts) + # mesh.paint_uniform_color([252 / 255, 224 / 255, 203 / 255]) + mesh.paint_uniform_color([238 / 255, 188 / 255, 158 / 255]) + mesh.compute_triangle_normals() + mesh.compute_vertex_normals() + #----------- + if pts_flag: + if False: + j3d_ = j3d_recon.detach().cpu().numpy() + j3d_[0][:,1] *=(-1.) + # j3d_[0][:,0] +=trans[0,0] + j3d_[0] = j3d_[0] - 100 * mesh_tran + j3d_[0][:,0] -=50 + j3d_[0][:,1] -=30 + # print(j3d_.shape,j3d_) + test_pcd.points = open3d.utility.Vector3dVector(j3d_[0]) # 定义点云坐标位置 + else: + # test_pcd.points = open3d.utility.Vector3dVector(hand_verts) + gt_3d_joints[:,1] *=-1. + gt_3d_joints = gt_3d_joints*70 + gt_3d_joints[:,1] -= 40 + gt_3d_joints[:,0] -= 0 + print("gt_3d_joints",gt_3d_joints.shape) + test_pcd.points = open3d.utility.Vector3dVector(gt_3d_joints) + # test_pcd.points = open3d.utility.Vector3dVector(gt_3d_joints[1,:].reshape(1,3)) + # rgb = np.asarray([250,0,250]) + # rgb_t = np.transpose(rgb) + # test_pcd.colors = open3d.utility.Vector3dVector(rgb_t.astype(np.float) / 255.0) + # print("hand_verts shape",hand_verts) + #----------- + viewer.update_geometry(mesh) + if pts_flag: + viewer.update_geometry(test_pcd) + viewer.poll_events() + viewer.update_renderer() + + cv2.namedWindow("img",0) + cv2.imshow("img",img) + cv2.waitKey(1) + + #----------------------------------------------------------------------- + if vis: + cv2.destroyAllWindows() + + # + print() + self.files = file_list + self.img_size = img_size + self.flag_agu = flag_agu + self.vis = vis + + # label_list = [] + self.bbox_list = bbox_list + self.x1y1x2y2_2d_list = handpose_2d_x1y1x2y2_list + self.pts_hand_2d_list = handpose_2d_pts_hand_list + self.xyz_3d_list = handpose_3d_xyz_list + + def __len__(self): + return len(self.files) + + def __getitem__(self, index): + # gt_3d_joints = dict_x["handpose_3d_xyz"] + # # + # handpose_2d_x1y1x2y2_list.append((x1_,y1_,x2_,y2_)) + # handpose_2d_pts_hand_list.append(handpose_2d["pts_hand"]) + # handpose_3d_xyz_list.append(gt_3d_joints) + img_path = self.files[index] + x1y1x2y2 = self.x1y1x2y2_2d_list[index] + pts_hand = self.pts_hand_2d_list[index] + gt_3d_joints = self.xyz_3d_list[index] + img = cv2.imread(img_path) # BGR + + x1_,y1_,x2_,y2_ = x1y1x2y2 + + + + hand_w = int((x2_-x1_)/8) + hand_h = int((y2_-y1_)/8) + offset_x1 = random.randint(-hand_w,int(hand_w/2)) + offset_y1 = random.randint(-hand_h,int(hand_h/2)) + offset_x2 = random.randint(-int(hand_w/2),hand_w) + offset_y2 = random.randint(-int(hand_h/2),hand_h) + # print(" A : x1_,y1_,x2_,y2_ : ",x1_,y1_,x2_,y2_) + x1_new = x1_+offset_x1 + y1_new = y1_+offset_y1 + x2_new = x2_+offset_x2 + y2_new = y2_+offset_y2 + + x1_new = np.clip(x1_,0,img.shape[1]-1) + x2_new = np.clip(x2_,0,img.shape[1]-1) + y1_new = np.clip(y1_,0,img.shape[0]-1) + y2_new = np.clip(y2_,0,img.shape[0]-1) + + offset_x1 = x1_new - x1_ + offset_y1 = y1_new - y1_ + offset_x2 = x2_new - x2_ + offset_y2 = y2_new - y2_ + # print(" B : x1_,y1_,x2_,y2_ : ",x1_,y1_,x2_,y2_) + x1_ = x1_new + y1_ = y1_new + x2_ = x2_new + y2_ = y2_new + #------------------------------------- + # if self.vis: + # aa = img[y1_:y2_,x1_:x2_] + # for k in range(21): + # x,y = (pts_hand[str(k)]["x"]-offset_x1),(pts_hand[str(k)]["y"]-offset_y1) + # + # + # cv2.circle(aa, (int(x),int(y)), 3, (250,60,255),-1) + # cv2.namedWindow("fix_size_a",0) + # cv2.imshow("fix_size_a",aa) + #------------------------------------- + # print("self.img_size : ",self.img_size) + img_,ratio, dw, dh = letterbox(img[y1_:y2_,x1_:x2_], height=self.img_size[0], color=(0,0,0)) + + hm,hm_w = get_heatmap(img_,x1y1x2y2,pts_hand,ratio, dw, dh,offset_x1,offset_y1,vis=self.vis) + if self.vis: + cv2.namedWindow("fix_size",0) + cv2.imshow("fix_size",img_) + + hm_w = np.expand_dims(hm_w,2) + if self.vis: + print("hm.shape : {}".format(hm.shape)) + print("hm_w.shape : {}".format(hm_w.shape)) + print("img_fix_size.shape : {}".format(img_.shape)) + + #------------------------------------- + #------------------------------------- + if self.flag_agu == True: + if random.random() > 0.5: + c = float(random.randint(80,120))/100. + b = random.randint(-10,10) + img_ = contrast_img(img_, c, b) + if self.flag_agu == True: + if random.random() > 0.9: + # print('agu hue ') + img_hsv=cv2.cvtColor(img_,cv2.COLOR_BGR2HSV) + hue_x = random.randint(-10,10) + # print(cc) + img_hsv[:,:,0]=(img_hsv[:,:,0]+hue_x) + img_hsv[:,:,0] =np.maximum(img_hsv[:,:,0],0) + img_hsv[:,:,0] =np.minimum(img_hsv[:,:,0],180)#范围 0 ~180 + img_=cv2.cvtColor(img_hsv,cv2.COLOR_HSV2BGR) + if self.flag_agu == True: + if random.random() > 0.95: + img_ = img_agu_channel_same(img_) + + if self.vis: + cv2.namedWindow("fix_size_agu",0) + cv2.imshow("fix_size_agu",img_) + cv2.waitKey(1) + + img_fix_size = img_.astype(np.float32) + + + img_fix_size_r = img_fix_size.astype(np.float32) + img_fix_size_r = (img_fix_size_r-128.)/256. + #-------------------------------------------------- + image_fusion = np.concatenate((img_fix_size_r,hm),axis=2) + if self.vis: + print(" A image_fusion.shape : {}".format(image_fusion.shape)) + + image_fusion = image_fusion.transpose(2, 0, 1) + if self.vis: + print(" B image_fusion.shape : {}".format(image_fusion.shape)) + + # image_fusion = np.expand_dims(image_fusion,0) + # if self.vis: + # print(" C image_fusion.shape : {}".format(image_fusion.shape)) + # cv2.waitKey(0) + + + gt_3d_joints = np.array(gt_3d_joints).ravel() + if self.vis: + print(gt_3d_joints.shape) + print(image_fusion.shape) + return image_fusion,gt_3d_joints diff --git a/inference.py b/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3a75559c95457a1c68943f942714a45ca7add161 --- /dev/null +++ b/inference.py @@ -0,0 +1,321 @@ +#-*-coding:utf-8-*- +# date:2021-06-15 +# Author: Eric.Lee +# function: handpose 3D Inference + +import os +import argparse +import torch +import torch.nn as nn +import numpy as np + +import time +import datetime +import os +import math +from datetime import datetime +import cv2 +import torch.nn.functional as F + +from models.resnet import resnet18,resnet34,resnet50,resnet101 +from e3d_data_iter.datasets import letterbox,get_heatmap +import sys +sys.path.append("./components/") # 添加模型组件路径 +from hand_keypoints.handpose_x import handpose_x_model,draw_bd_handpose_c + +from utils.common_utils import * +import copy + +from utils import func, bone, AIK, smoother +from utils.LM_new import LM_Solver +from op_pso import PSO +import open3d +from mpl_toolkits.mplot3d import Axes3D +from manopth import manolayer +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description=' Project Hand Pose 3D Inference') + parser.add_argument('--model_path', type=str, default = './model_exp/2021-06-16_02-09-37/resnet_50-size-256-loss-wing_loss-model_epoch-732.pth', + help = 'model_path') # 模型路径 + parser.add_argument('--model', type=str, default = 'resnet_50', + help = '''model : resnet_18,resnet_34,resnet_50,resnet_101''') # 模型类型 + parser.add_argument('--num_classes', type=int , default = 63, + help = 'num_classes') # 手部21关键点, (x,y)*2 = 42 + parser.add_argument('--GPUS', type=str, default = '0', + help = 'GPUS') # GPU选择 + parser.add_argument('--test_path', type=str, default = './image/', + help = 'test_path') # 测试图片路径 + parser.add_argument('--img_size', type=tuple , default = (256,256), + help = 'img_size') # 输入模型图片尺寸 + parser.add_argument('--vis', type=bool , default = True, + help = 'vis') # 是否可视化图片 + + print('\n/******************* {} ******************/\n'.format(parser.description)) + #-------------------------------------------------------------------------- + ops = parser.parse_args()# 解析添加参数 + #-------------------------------------------------------------------------- + print('----------------------------------') + + unparsed = vars(ops) # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 + for key in unparsed.keys(): + print('{} : {}'.format(key,unparsed[key])) + + #--------------------------------------------------------------------------- + os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS + + test_path = ops.test_path # 测试图片文件夹路径 + #---------------------------------------------------------------- 构建模型 + print('use model : %s'%(ops.model)) + + if ops.model == 'resnet_50': + model_ = resnet50(num_classes = ops.num_classes,img_size=ops.img_size[0]) + elif ops.model == 'resnet_18': + model_ = resnet18(num_classes = ops.num_classes,img_size=ops.img_size[0]) + elif ops.model == 'resnet_34': + model_ = resnet34(num_classes = ops.num_classes,img_size=ops.img_size[0]) + elif ops.model == 'resnet_101': + model_ = resnet101(num_classes = ops.num_classes,img_size=ops.img_size[0]) + + use_cuda = torch.cuda.is_available() + + device = torch.device("cuda:0" if use_cuda else "cpu") + model_ = model_.to(device) + model_.eval() # 设置为前向推断模式 + # print(model_)# 打印模型结构 + # 加载测试模型 + if os.access(ops.model_path,os.F_OK):# checkpoint + chkpt = torch.load(ops.model_path, map_location=device) + model_.load_state_dict(chkpt) + print('load test model : {}'.format(ops.model_path)) + + #----------------- 构建 handpose_x 2D关键点检测模型 + handpose_2d_model = handpose_x_model() + + #----------------- 构建 manopth + g_side = "right" + print('load model finished') + pose, shape = func.initiate("zero") + pre_useful_bone_len = np.zeros((1, 15)) # 骨架点信息 + solver = LM_Solver(num_Iter=666, th_beta=shape.cpu(), th_pose=pose.cpu(), lb_target=pre_useful_bone_len, + weight=1e-5) + pose0 = torch.eye(3).repeat(1, 16, 1, 1) + + mano = manolayer.ManoLayer(flat_hand_mean=True, + side=g_side, + mano_root='./mano/models', + use_pca=False, + root_rot_mode='rotmat', + joint_rot_mode='rotmat') + print('start ~') + point_fliter = smoother.OneEuroFilter(4.0, 0.0) + mesh_fliter = smoother.OneEuroFilter(4.0, 0.0) + shape_fliter = smoother.OneEuroFilter(1.5, 0.0) + #--------------------------- 配置点云 + view_mat = np.array([[1.0, 0.0, 0.0], + [0.0, -1.0, 0], + [0.0, 0, -1.0]]) + mesh = open3d.geometry.TriangleMesh() + hand_verts, j3d_recon = mano(pose0, shape.float()) + mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces) + hand_verts = hand_verts.clone().detach().cpu().numpy()[0] + mesh.vertices = open3d.utility.Vector3dVector(hand_verts) + viewer = open3d.visualization.Visualizer() + viewer.create_window(width=640, height=640, window_name='HandPose3d_Mesh') + viewer.add_geometry(mesh) + viewer.update_renderer() + renderOptions = viewer.get_render_option () + renderOptions.background_color = np.asarray([120/255,120/255,120/255]) # 设置背景颜色 + # axis_pcd = open3d.create_mesh_coordinate_frame(size=0.5, origin=[0, 0, 0]) + + # vis.add_geometry(axis_pcd) + pts_flag = True + if pts_flag: + test_pcd = open3d.geometry.PointCloud() # 定义点云 + viewer.add_geometry(test_pcd) + + + print('start pose estimate') + + pre_uv = None + shape_time = 0 + opt_shape = None + shape_flag = True + #---------------------------------------------------------------- 预测图片 + + with torch.no_grad(): + idx = 0 + for file in os.listdir(ops.test_path): + if '.jpg' not in file: + continue + idx += 1 + print('{}) image : {}'.format(idx,file)) + img = cv2.imread(ops.test_path + file) + #-------------------------------- + img_show = img.copy() # 用于显示使用 + pts_2d_ = handpose_2d_model.predict(img.copy()) # handpose_2d predict + pts_2d_hand = {} + for ptk in range(int(pts_2d_.shape[0]/2)): + + xh = pts_2d_[ptk*2+0]*float(img.shape[1]) + yh = pts_2d_[ptk*2+1]*float(img.shape[0]) + pts_2d_hand[str(ptk)] = { + "x":xh, + "y":yh, + } + if ops.vis: + cv2.circle(img_show, (int(xh),int(yh)), 4, (255,50,60),-1) + cv2.circle(img_show, (int(xh),int(yh)), 3, (25,160,255),-1) + if ops.vis: + draw_bd_handpose_c(img_show,pts_2d_hand,0,0,2) + cv2.namedWindow("handpose_2d",0) + cv2.imshow("handpose_2d",img_show) + + #-------------------------------- + img_lbox,ratio, dw, dh = letterbox(img.copy(), height=ops.img_size[0], color=(0,0,0)) + if ops.vis: + cv2.namedWindow("letterbox",0) + cv2.imshow("letterbox",img_lbox) + + #-------------------------------- get heatmap + x1y1x2y2 = 0,0,0,0 + offset_x1,offset_y1 = 0,0 + hm,hm_w = get_heatmap(img_lbox.copy(),x1y1x2y2,pts_2d_hand,ratio, dw, dh,offset_x1,offset_y1,vis=False) + if ops.vis: + cv2.namedWindow("hm_w",0) + cv2.imshow("hm_w",hm_w) + + #-------------------------------- + img_fix_size = img_lbox.astype(np.float32) + + img_fix_size_r = img_fix_size.astype(np.float32) + img_fix_size_r = (img_fix_size_r-128.)/256. + #-------------------------------------------------- + image_fusion = np.concatenate((img_fix_size_r,hm),axis=2) + image_fusion = image_fusion.transpose(2, 0, 1) + image_fusion = torch.from_numpy(image_fusion) + image_fusion = image_fusion.unsqueeze_(0) + if use_cuda: + image_fusion = image_fusion.cuda() # (bs, channel, h, w) + print("image_fusion size : {}".format(image_fusion.size())) + + + #-------------------------------- # handpose_3d predict + pre_ = model_(image_fusion.float()) # 模型推理 + output = pre_.cpu().detach().numpy() + output = np.squeeze(output) + print("handpose_3d output shape : {}".format(output.shape)) + + pre_3d_joints = output.reshape((21,3)) + print("pre_3d_joints shape : {}".format(pre_3d_joints.shape)) + + if g_side == "left": + print("------------------->>. left") + pre_3d_joints[:,0] *=(-1.) + pre_3d_joints = torch.tensor(pre_3d_joints).squeeze(0) + pre_3d_joints= pre_3d_joints.cuda() + print(pre_3d_joints.size()) + #-------------------------------------------------------------------- + # now_uv = result['uv'].clone().detach().cpu().numpy()[0, 0] + # now_uv = now_uv.astype(np.float) + trans = np.zeros((1, 3)) + # trans[0, 0:2] = now_uv - 16.0 + trans = trans / 16.0 + new_tran = np.array([[trans[0, 1], trans[0, 0], trans[0, 2]]]) + pre_joints = pre_3d_joints.clone().detach().cpu().numpy() + + flited_joints = point_fliter.process(pre_joints) + + # fliter_ax.cla() + # + # filted_ax = vis.plot3d(flited_joints + new_tran, fliter_ax) + pre_useful_bone_len = bone.caculate_length(pre_joints, label="useful") + + NGEN = 0 # PSO 迭代次数 + popsize = 100 + low = np.zeros((1, 10)) - 3.0 + up = np.zeros((1, 10)) - 2.0 + parameters = [NGEN, popsize, low, up] + pso = PSO(parameters, pre_useful_bone_len.reshape((1, 15)),g_side) + pso.main(solver) + if True:#opt_shape is None: + opt_shape = pso.ng_best + opt_shape = shape_fliter.process(opt_shape) + + opt_tensor_shape = torch.tensor(opt_shape, dtype=torch.float) + _, j3d_p0_ops = mano(pose0, opt_tensor_shape) + template = j3d_p0_ops.cpu().numpy().squeeze(0) / 1000.0 # template, m 21*3 + ratio = np.linalg.norm(template[9] - template[0]) / np.linalg.norm(pre_joints[9] - pre_joints[0]) + j3d_pre_process = pre_joints * ratio # template, m + j3d_pre_process = j3d_pre_process - j3d_pre_process[0] + template[0] + pose_R = AIK.adaptive_IK(template, j3d_pre_process) + pose_R = torch.from_numpy(pose_R).float() + # reconstruction + hand_verts, j3d_recon = mano(pose_R, opt_tensor_shape.float()) + hand_verts[:,:,:] = hand_verts[:,:,:]*(0.85) + # print(j3d_recon.size()) + + mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces) + hand_verts = hand_verts.clone().detach().cpu().numpy()[0] + hand_verts = mesh_fliter.process(hand_verts) + hand_verts = np.matmul(view_mat, hand_verts.T).T + if g_side == "right": + hand_verts[:, 0] = hand_verts[:, 0] - 80 + else: + hand_verts[:, 0] = hand_verts[:, 0] + 80 + hand_verts[:, 1] = hand_verts[:, 1] - 0 + mesh_tran = np.array([[-new_tran[0, 0], new_tran[0, 1], new_tran[0, 2]]]) + hand_verts = hand_verts - 100 * mesh_tran + + mesh.vertices = open3d.utility.Vector3dVector(hand_verts) + # mesh.paint_uniform_color([252 / 255, 224 / 255, 203 / 255]) + mesh.paint_uniform_color([238 / 255, 188 / 255, 158 / 255]) + mesh.compute_triangle_normals() + mesh.compute_vertex_normals() + #----------- + if pts_flag: + if False: + j3d_ = j3d_recon.detach().cpu().numpy() + j3d_[0][:,1] *=(-1.) + # j3d_[0][:,0] +=trans[0,0] + j3d_[0] = j3d_[0] - 100 * mesh_tran + j3d_[0][:,0] -=50 + j3d_[0][:,1] -=30 + # print(j3d_.shape,j3d_) + test_pcd.points = open3d.utility.Vector3dVector(j3d_[0]) # 定义点云坐标位置 + else: + # test_pcd.points = open3d.utility.Vector3dVector(hand_verts) + pre_joints[:,1] *=-1. + pre_joints = pre_joints*70 + pre_joints[:,1] -= 40 + pre_joints[:,0] -= 0 + print("pre_joints",pre_joints.shape) + test_pcd.points = open3d.utility.Vector3dVector(pre_joints) + # test_pcd.points = open3d.utility.Vector3dVector(pre_joints[1,:].reshape(1,3)) + # rgb = np.asarray([250,0,250]) + # rgb_t = np.transpose(rgb) + # test_pcd.colors = open3d.utility.Vector3dVector(rgb_t.astype(np.float) / 255.0) + # print("hand_verts shape",hand_verts) + # x_min,y_min,x_max,y_max = 65535,65535,0,0 + # for i in range(hand_verts.shape[0]): + # x_,y_,z_ = hand_verts[i] + # x_min = x_ if x_min>x_ else x_min + # y_min = y_ if y_min>y_ else y_min + # x_max = x_ if x_maxabsolute_x),\ + w * torch.log(1.0 + absolute_x / epsilon),\ + absolute_x - c) + + + # loss = tf.reduce_mean(tf.reduce_mean(losses, axis=[1]), axis=0) + losses = torch.mean(losses,dim=1,keepdim=True) + loss = torch.mean(losses) + return loss + +def got_total_wing_loss(output,crop_landmarks): + loss = wing_loss(output, crop_landmarks) + return loss diff --git a/manopth/__init__.py b/manopth/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e27cf8699e86b13d9d4cb28da10d54c405effe96 --- /dev/null +++ b/manopth/__init__.py @@ -0,0 +1 @@ +name = 'manopth' diff --git a/manopth/argutils.py b/manopth/argutils.py new file mode 100644 index 0000000000000000000000000000000000000000..7e86eb025ad0618e63d730b4f59ee3615118d197 --- /dev/null +++ b/manopth/argutils.py @@ -0,0 +1,51 @@ +import datetime +import os +import pickle +import subprocess +import sys + + +def print_args(args): + opts = vars(args) + print('======= Options ========') + for k, v in sorted(opts.items()): + print('{}: {}'.format(k, v)) + print('========================') + + +def save_args(args, save_folder, opt_prefix='opt', verbose=True): + opts = vars(args) + # Create checkpoint folder + if not os.path.exists(save_folder): + os.makedirs(save_folder, exist_ok=True) + + # Save options + opt_filename = '{}.txt'.format(opt_prefix) + opt_path = os.path.join(save_folder, opt_filename) + with open(opt_path, 'a') as opt_file: + opt_file.write('====== Options ======\n') + for k, v in sorted(opts.items()): + opt_file.write( + '{option}: {value}\n'.format(option=str(k), value=str(v))) + opt_file.write('=====================\n') + opt_file.write('launched {} at {}\n'.format( + str(sys.argv[0]), str(datetime.datetime.now()))) + + # Add git info + label = subprocess.check_output(["git", "describe", + "--always"]).strip() + if subprocess.call( + ["git", "branch"], + stderr=subprocess.STDOUT, + stdout=open(os.devnull, 'w')) == 0: + opt_file.write('=== Git info ====\n') + opt_file.write('{}\n'.format(label)) + commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']) + opt_file.write('commit : {}\n'.format(commit.strip())) + + opt_picklename = '{}.pkl'.format(opt_prefix) + opt_picklepath = os.path.join(save_folder, opt_picklename) + with open(opt_picklepath, 'wb') as opt_file: + pickle.dump(opts, opt_file) + if verbose: + print('Saved options to {}'.format(opt_path)) diff --git a/manopth/demo.py b/manopth/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..0bca468f297d02c3713bfe8ea15d0f62d4dd60cc --- /dev/null +++ b/manopth/demo.py @@ -0,0 +1,59 @@ +from matplotlib import pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +from mpl_toolkits.mplot3d.art3d import Poly3DCollection +import numpy as np +import torch + +from manopth.manolayer import ManoLayer + + +def generate_random_hand(batch_size=1, ncomps=6, mano_root='mano/models'): + nfull_comps = ncomps + 3 # Add global orientation dims to PCA + random_pcapose = torch.rand(batch_size, nfull_comps) + mano_layer = ManoLayer(mano_root=mano_root) + verts, joints = mano_layer(random_pcapose) + return {'verts': verts, 'joints': joints, 'faces': mano_layer.th_faces} + + +def display_hand(hand_info, mano_faces=None, ax=None, alpha=0.2, batch_idx=0, show=True): + """ + Displays hand batch_idx in batch of hand_info, hand_info as returned by + generate_random_hand + """ + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + verts, joints = hand_info['verts'][batch_idx], hand_info['joints'][ + batch_idx] + if mano_faces is None: + ax.scatter(verts[:, 0], verts[:, 1], verts[:, 2], alpha=0.1) + else: + mesh = Poly3DCollection(verts[mano_faces], alpha=alpha) + face_color = (141 / 255, 184 / 255, 226 / 255) + edge_color = (50 / 255, 50 / 255, 50 / 255) + mesh.set_edgecolor(edge_color) + mesh.set_facecolor(face_color) + ax.add_collection3d(mesh) + ax.scatter(joints[:, 0], joints[:, 1], joints[:, 2], color='r') + cam_equal_aspect_3d(ax, verts.numpy()) + if show: + plt.show() + + +def cam_equal_aspect_3d(ax, verts, flip_x=False): + """ + Centers view on cuboid containing hand and flips y and z axis + and fixes azimuth + """ + extents = np.stack([verts.min(0), verts.max(0)], axis=1) + sz = extents[:, 1] - extents[:, 0] + centers = np.mean(extents, axis=1) + maxsize = max(abs(sz)) + r = maxsize / 2 + if flip_x: + ax.set_xlim(centers[0] + r, centers[0] - r) + else: + ax.set_xlim(centers[0] - r, centers[0] + r) + # Invert y and z axis + ax.set_ylim(centers[1] + r, centers[1] - r) + ax.set_zlim(centers[2] + r, centers[2] - r) diff --git a/manopth/manolayer.py b/manopth/manolayer.py new file mode 100644 index 0000000000000000000000000000000000000000..493dc045ad34d44547cefdce422c9a1c51890863 --- /dev/null +++ b/manopth/manolayer.py @@ -0,0 +1,275 @@ +import os + +import numpy as np +import torch +from torch.nn import Module + +from mano.webuser.smpl_handpca_wrapper_HAND_only import ready_arguments +from manopth import rodrigues_layer, rotproj, rot6d +from manopth.tensutils import (th_posemap_axisang, th_with_zeros, th_pack, + subtract_flat_id, make_list) + + +class ManoLayer(Module): + __constants__ = [ + 'use_pca', 'rot', 'ncomps', 'ncomps', 'kintree_parents', 'check', + 'side', 'center_idx', 'joint_rot_mode' + ] + + def __init__(self, + center_idx=None, + flat_hand_mean=True, + ncomps=6, + side='right', + mano_root='mano/models', + use_pca=True, + root_rot_mode='axisang', + joint_rot_mode='axisang', + robust_rot=False): + """ + Args: + center_idx: index of center joint in our computations, + if -1 centers on estimate of palm as middle of base + of middle finger and wrist + flat_hand_mean: if True, (0, 0, 0, ...) pose coefficients match + flat hand, else match average hand pose + mano_root: path to MANO pkl files for left and right hand + ncomps: number of PCA components form pose space (<45) + side: 'right' or 'left' + use_pca: Use PCA decomposition for pose space. + joint_rot_mode: 'axisang' or 'rotmat', ignored if use_pca + """ + super().__init__() + + self.center_idx = center_idx + self.robust_rot = robust_rot + if root_rot_mode == 'axisang': + self.rot = 3 + else: + self.rot = 6 + self.flat_hand_mean = flat_hand_mean + self.side = side + self.use_pca = use_pca + self.joint_rot_mode = joint_rot_mode + self.root_rot_mode = root_rot_mode + if use_pca: + self.ncomps = ncomps + else: + self.ncomps = 45 + + if side == 'right': + self.mano_path = os.path.join(mano_root, 'MANO_RIGHT.pkl') + elif side == 'left': + self.mano_path = os.path.join(mano_root, 'MANO_LEFT.pkl') + + smpl_data = ready_arguments(self.mano_path,side) + + hands_components = smpl_data['hands_components'] + + self.smpl_data = smpl_data + + self.register_buffer('th_betas', + torch.Tensor(smpl_data['betas'].r).unsqueeze(0)) + self.register_buffer('th_shapedirs', + torch.Tensor(smpl_data['shapedirs'].r)) + self.register_buffer('th_posedirs', + torch.Tensor(smpl_data['posedirs'].r)) + self.register_buffer( + 'th_v_template', + torch.Tensor(smpl_data['v_template'].r).unsqueeze(0)) + self.register_buffer( + 'th_J_regressor', + torch.Tensor(np.array(smpl_data['J_regressor'].toarray()))) + self.register_buffer('th_weights', + torch.Tensor(smpl_data['weights'].r)) + self.register_buffer('th_faces', + torch.Tensor(smpl_data['f'].astype(np.int32)).long()) + + # Get hand mean + hands_mean = np.zeros(hands_components.shape[1] + ) if flat_hand_mean else smpl_data['hands_mean'] + hands_mean = hands_mean.copy() + th_hands_mean = torch.Tensor(hands_mean).unsqueeze(0) + if self.use_pca or self.joint_rot_mode == 'axisang': + # Save as axis-angle + self.register_buffer('th_hands_mean', th_hands_mean) + selected_components = hands_components[:ncomps] + self.register_buffer('th_comps', torch.Tensor(hands_components)) + self.register_buffer('th_selected_comps', + torch.Tensor(selected_components)) + else: + th_hands_mean_rotmat = rodrigues_layer.batch_rodrigues( + th_hands_mean.view(15, 3)).reshape(15, 3, 3) + self.register_buffer('th_hands_mean_rotmat', th_hands_mean_rotmat) + + # Kinematic chain params + self.kintree_table = smpl_data['kintree_table'] + parents = list(self.kintree_table[0].tolist()) + self.kintree_parents = parents + + def forward(self, + th_pose_coeffs, + th_betas=torch.zeros(1), + th_trans=torch.zeros(1), + root_palm=torch.Tensor([0]), + share_betas=torch.Tensor([0]), + ): + """ + Args: + th_trans (Tensor (batch_size x ncomps)): if provided, applies trans to joints and vertices + th_betas (Tensor (batch_size x 10)): if provided, uses given shape parameters for hand shape + else centers on root joint (9th joint) + root_palm: return palm as hand root instead of wrist + """ + # if len(th_pose_coeffs) == 0: + # return th_pose_coeffs.new_empty(0), th_pose_coeffs.new_empty(0) + + batch_size = th_pose_coeffs.shape[0] + # Get axis angle from PCA components and coefficients + if self.use_pca or self.joint_rot_mode == 'axisang': + # Remove global rot coeffs + th_hand_pose_coeffs = th_pose_coeffs[:, self.rot:self.rot + + self.ncomps] + if self.use_pca: + # PCA components --> axis angles + th_full_hand_pose = th_hand_pose_coeffs.mm(self.th_selected_comps) + else: + th_full_hand_pose = th_hand_pose_coeffs + + # Concatenate back global rot + th_full_pose = torch.cat([ + th_pose_coeffs[:, :self.rot], + self.th_hands_mean + th_full_hand_pose + ], 1) + if self.root_rot_mode == 'axisang': + # compute rotation matrixes from axis-angle while skipping global rotation + th_pose_map, th_rot_map = th_posemap_axisang(th_full_pose) + root_rot = th_rot_map[:, :9].view(batch_size, 3, 3) + th_rot_map = th_rot_map[:, 9:] + th_pose_map = th_pose_map[:, 9:] + else: + # th_posemap offsets by 3, so add offset or 3 to get to self.rot=6 + th_pose_map, th_rot_map = th_posemap_axisang(th_full_pose[:, 6:]) + if self.robust_rot: + root_rot = rot6d.robust_compute_rotation_matrix_from_ortho6d(th_full_pose[:, :6]) + else: + root_rot = rot6d.compute_rotation_matrix_from_ortho6d(th_full_pose[:, :6]) + else: + assert th_pose_coeffs.dim() == 4, ( + 'When not self.use_pca, ' + 'th_pose_coeffs should have 4 dims, got {}'.format( + th_pose_coeffs.dim())) + assert th_pose_coeffs.shape[2:4] == (3, 3), ( + 'When not self.use_pca, th_pose_coeffs have 3x3 matrix for two' + 'last dims, got {}'.format(th_pose_coeffs.shape[2:4])) + th_pose_rots = rotproj.batch_rotprojs(th_pose_coeffs) + th_rot_map = th_pose_rots[:, 1:].view(batch_size, -1) + th_pose_map = subtract_flat_id(th_rot_map) + root_rot = th_pose_rots[:, 0] + + # Full axis angle representation with root joint + if th_betas is None or th_betas.numel() == 1: + th_v_shaped = torch.matmul(self.th_shapedirs, + self.th_betas.transpose(1, 0)).permute( + 2, 0, 1) + self.th_v_template + th_j = torch.matmul(self.th_J_regressor, th_v_shaped).repeat( + batch_size, 1, 1) + + else: + if share_betas: + th_betas = th_betas.mean(0, keepdim=True).expand(th_betas.shape[0], 10) + th_v_shaped = torch.matmul(self.th_shapedirs, + th_betas.transpose(1, 0)).permute( + 2, 0, 1) + self.th_v_template + th_j = torch.matmul(self.th_J_regressor, th_v_shaped) + # th_pose_map should have shape 20x135 + + th_v_posed = th_v_shaped.cuda() + torch.matmul( + self.th_posedirs.cuda(), th_pose_map.cuda().transpose(0, 1)).permute(2, 0, 1) + # Final T pose with transformation done ! + + # Global rigid transformation + + root_j = th_j[:, 0, :].contiguous().view(batch_size, 3, 1) + root_trans = th_with_zeros(torch.cat([root_rot.cuda(), root_j.cuda()], 2)) + + all_rots = th_rot_map.view(th_rot_map.shape[0], 15, 3, 3) + lev1_idxs = [1, 4, 7, 10, 13] + lev2_idxs = [2, 5, 8, 11, 14] + lev3_idxs = [3, 6, 9, 12, 15] + lev1_rots = all_rots[:, [idx - 1 for idx in lev1_idxs]] + lev2_rots = all_rots[:, [idx - 1 for idx in lev2_idxs]] + lev3_rots = all_rots[:, [idx - 1 for idx in lev3_idxs]] + lev1_j = th_j[:, lev1_idxs] + lev2_j = th_j[:, lev2_idxs] + lev3_j = th_j[:, lev3_idxs] + + # From base to tips + # Get lev1 results + all_transforms = [root_trans.unsqueeze(1)] + lev1_j_rel = lev1_j - root_j.transpose(1, 2) + lev1_rel_transform_flt = th_with_zeros(torch.cat([lev1_rots.cuda(), lev1_j_rel.cuda().unsqueeze(3)], 3).view(-1, 3, 4)) + root_trans_flt = root_trans.unsqueeze(1).repeat(1, 5, 1, 1).view(root_trans.shape[0] * 5, 4, 4) + lev1_flt = torch.matmul(root_trans_flt.cuda(), lev1_rel_transform_flt.cuda()) + all_transforms.append(lev1_flt.view(all_rots.shape[0], 5, 4, 4)) + + # Get lev2 results + lev2_j_rel = lev2_j - lev1_j + lev2_rel_transform_flt = th_with_zeros(torch.cat([lev2_rots.cuda(), lev2_j_rel.cuda().unsqueeze(3)], 3).view(-1, 3, 4)) + lev2_flt = torch.matmul(lev1_flt.cuda(), lev2_rel_transform_flt.cuda()) + all_transforms.append(lev2_flt.view(all_rots.shape[0], 5, 4, 4)) + + # Get lev3 results + lev3_j_rel = lev3_j - lev2_j + lev3_rel_transform_flt = th_with_zeros(torch.cat([lev3_rots.cuda(), lev3_j_rel.cuda().unsqueeze(3)], 3).view(-1, 3, 4)) + lev3_flt = torch.matmul(lev2_flt.cuda(), lev3_rel_transform_flt.cuda()) + all_transforms.append(lev3_flt.view(all_rots.shape[0], 5, 4, 4)) + + reorder_idxs = [0, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14, 5, 10, 15] + th_results = torch.cat(all_transforms, 1)[:, reorder_idxs] + th_results_global = th_results + + joint_js = torch.cat([th_j, th_j.new_zeros(th_j.shape[0], 16, 1)], 2) + tmp2 = torch.matmul(th_results.cuda(), joint_js.cuda().unsqueeze(3)) + th_results2 = (th_results - torch.cat([tmp2.new_zeros(*tmp2.shape[:2], 4, 3), tmp2], 3)).permute(0, 2, 3, 1) + + th_T = torch.matmul(th_results2.cuda(), self.th_weights.cuda().transpose(0, 1)).cuda() + # print("th_T.device",th_T.device) + # print("th_v_posed.device",th_v_posed.device) + th_rest_shape_h = torch.cat([ + th_v_posed.cuda().transpose(2, 1), + torch.ones((batch_size, 1, th_v_posed.shape[1]), + dtype=th_T.dtype, + device=th_T.device), + ], 1) + + th_verts = (th_T * th_rest_shape_h.unsqueeze(1)).sum(2).transpose(2, 1) + th_verts = th_verts[:, :, :3] + th_jtr = th_results_global[:, :, :3, 3] + # In addition to MANO reference joints we sample vertices on each finger + # to serve as finger tips + if self.side == 'right': + tips = th_verts[:, [745, 317, 444, 556, 673]] + else: + tips = th_verts[:, [745, 317, 445, 556, 673]] + if bool(root_palm): + palm = (th_verts[:, 95] + th_verts[:, 22]).unsqueeze(1) / 2 + th_jtr = torch.cat([palm, th_jtr[:, 1:]], 1) + th_jtr = torch.cat([th_jtr.cuda(), tips.cuda()], 1) + + # Reorder joints to match visualization utilities + th_jtr = th_jtr[:, [0, 13, 14, 15, 16, 1, 2, 3, 17, 4, 5, 6, 18, 10, 11, 12, 19, 7, 8, 9, 20]] + + if th_trans is None or bool(torch.norm(th_trans) == 0): + if self.center_idx is not None: + center_joint = th_jtr[:, self.center_idx].unsqueeze(1) + th_jtr = th_jtr - center_joint + th_verts = th_verts - center_joint + else: + th_jtr = th_jtr + th_trans.unsqueeze(1) + th_verts = th_verts + th_trans.unsqueeze(1) + + # Scale to milimeters + th_verts = th_verts * 1000 + th_jtr = th_jtr * 1000 + return th_verts, th_jtr diff --git a/manopth/rodrigues_layer.py b/manopth/rodrigues_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..bb5ac1e9780a91342a76e9c48bb5c15814197893 --- /dev/null +++ b/manopth/rodrigues_layer.py @@ -0,0 +1,89 @@ +""" +This part reuses code from https://github.com/MandyMo/pytorch_HMR/blob/master/src/util.py +which is part of a PyTorch port of SMPL. +Thanks to Zhang Xiong (MandyMo) for making this great code available on github ! +""" + +import argparse +from torch.autograd import gradcheck +import torch +from torch.autograd import Variable + +from manopth import argutils + + +def quat2mat(quat): + """Convert quaternion coefficients to rotation matrix. + Args: + quat: size = [batch_size, 4] 4 <===>(w, x, y, z) + Returns: + Rotation matrix corresponding to the quaternion -- size = [batch_size, 3, 3] + """ + norm_quat = quat + norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True) + w, x, y, z = norm_quat[:, 0], norm_quat[:, 1], norm_quat[:, + 2], norm_quat[:, + 3] + + batch_size = quat.size(0) + + w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2) + wx, wy, wz = w * x, w * y, w * z + xy, xz, yz = x * y, x * z, y * z + + rotMat = torch.stack([ + w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy, + w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz, + w2 - x2 - y2 + z2 + ], + dim=1).view(batch_size, 3, 3) + return rotMat + + +def batch_rodrigues(axisang): + #axisang N x 3 + axisang_norm = torch.norm(axisang + 1e-8, p=2, dim=1) + angle = torch.unsqueeze(axisang_norm, -1) + axisang_normalized = torch.div(axisang, angle) + angle = angle * 0.5 + v_cos = torch.cos(angle) + v_sin = torch.sin(angle) + quat = torch.cat([v_cos, v_sin * axisang_normalized], dim=1) + rot_mat = quat2mat(quat) + rot_mat = rot_mat.view(rot_mat.shape[0], 9) + return rot_mat + + +def th_get_axis_angle(vector): + angle = torch.norm(vector, 2, 1) + axes = vector / angle.unsqueeze(1) + return axes, angle + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--batch_size', default=1, type=int) + parser.add_argument('--cuda', action='store_true') + args = parser.parse_args() + + argutils.print_args(args) + + n_components = 6 + rot = 3 + inputs = torch.rand(args.batch_size, rot) + inputs_var = Variable(inputs.double(), requires_grad=True) + if args.cuda: + inputs = inputs.cuda() + # outputs = batch_rodrigues(inputs) + test_function = gradcheck(batch_rodrigues, (inputs_var, )) + print('batch test passed !') + + inputs = torch.rand(rot) + inputs_var = Variable(inputs.double(), requires_grad=True) + test_function = gradcheck(th_cv2_rod_sub_id.apply, (inputs_var, )) + print('th_cv2_rod test passed') + + inputs = torch.rand(rot) + inputs_var = Variable(inputs.double(), requires_grad=True) + test_th = gradcheck(th_cv2_rod.apply, (inputs_var, )) + print('th_cv2_rod_id test passed !') diff --git a/manopth/rot6d.py b/manopth/rot6d.py new file mode 100644 index 0000000000000000000000000000000000000000..c1d60efbcfadda5f216c0eb9a60b348e248435a0 --- /dev/null +++ b/manopth/rot6d.py @@ -0,0 +1,71 @@ +import torch + + +def compute_rotation_matrix_from_ortho6d(poses): + """ + Code from + https://github.com/papagina/RotationContinuity + On the Continuity of Rotation Representations in Neural Networks + Zhou et al. CVPR19 + https://zhouyisjtu.github.io/project_rotation/rotation.html + """ + x_raw = poses[:, 0:3] # batch*3 + y_raw = poses[:, 3:6] # batch*3 + + x = normalize_vector(x_raw) # batch*3 + z = cross_product(x, y_raw) # batch*3 + z = normalize_vector(z) # batch*3 + y = cross_product(z, x) # batch*3 + + x = x.view(-1, 3, 1) + y = y.view(-1, 3, 1) + z = z.view(-1, 3, 1) + matrix = torch.cat((x, y, z), 2) # batch*3*3 + return matrix + +def robust_compute_rotation_matrix_from_ortho6d(poses): + """ + Instead of making 2nd vector orthogonal to first + create a base that takes into account the two predicted + directions equally + """ + x_raw = poses[:, 0:3] # batch*3 + y_raw = poses[:, 3:6] # batch*3 + + x = normalize_vector(x_raw) # batch*3 + y = normalize_vector(y_raw) # batch*3 + middle = normalize_vector(x + y) + orthmid = normalize_vector(x - y) + x = normalize_vector(middle + orthmid) + y = normalize_vector(middle - orthmid) + # Their scalar product should be small ! + # assert torch.einsum("ij,ij->i", [x, y]).abs().max() < 0.00001 + z = normalize_vector(cross_product(x, y)) + + x = x.view(-1, 3, 1) + y = y.view(-1, 3, 1) + z = z.view(-1, 3, 1) + matrix = torch.cat((x, y, z), 2) # batch*3*3 + # Check for reflection in matrix ! If found, flip last vector TODO + assert (torch.stack([torch.det(mat) for mat in matrix ])< 0).sum() == 0 + return matrix + + +def normalize_vector(v): + batch = v.shape[0] + v_mag = torch.sqrt(v.pow(2).sum(1)) # batch + v_mag = torch.max(v_mag, v.new([1e-8])) + v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1]) + v = v/v_mag + return v + + +def cross_product(u, v): + batch = u.shape[0] + i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1] + j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2] + k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0] + + out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1) + + return out diff --git a/manopth/rotproj.py b/manopth/rotproj.py new file mode 100644 index 0000000000000000000000000000000000000000..91a601d5de8117ed9fe1708c2d11e8713dc18011 --- /dev/null +++ b/manopth/rotproj.py @@ -0,0 +1,21 @@ +import torch + + +def batch_rotprojs(batches_rotmats): + proj_rotmats = [] + for batch_idx, batch_rotmats in enumerate(batches_rotmats): + proj_batch_rotmats = [] + for rot_idx, rotmat in enumerate(batch_rotmats): + # GPU implementation of svd is VERY slow + # ~ 2 10^-3 per hit vs 5 10^-5 on cpu + U, S, V = rotmat.cpu().svd() + rotmat = torch.matmul(U, V.transpose(0, 1)) + orth_det = rotmat.det() + # Remove reflection + if orth_det < 0: + rotmat[:, 2] = -1 * rotmat[:, 2] + + rotmat = rotmat.cuda() + proj_batch_rotmats.append(rotmat) + proj_rotmats.append(torch.stack(proj_batch_rotmats)) + return torch.stack(proj_rotmats) diff --git a/manopth/tensutils.py b/manopth/tensutils.py new file mode 100644 index 0000000000000000000000000000000000000000..0c64c78d4d89e25fd1fff5edee9431cb3a98df1e --- /dev/null +++ b/manopth/tensutils.py @@ -0,0 +1,47 @@ +import torch + +from manopth import rodrigues_layer + + +def th_posemap_axisang(pose_vectors): + rot_nb = int(pose_vectors.shape[1] / 3) + pose_vec_reshaped = pose_vectors.contiguous().view(-1, 3) + rot_mats = rodrigues_layer.batch_rodrigues(pose_vec_reshaped) + rot_mats = rot_mats.view(pose_vectors.shape[0], rot_nb * 9) + pose_maps = subtract_flat_id(rot_mats) + return pose_maps, rot_mats + + +def th_with_zeros(tensor): + batch_size = tensor.shape[0] + padding = tensor.new([0.0, 0.0, 0.0, 1.0]) + padding.requires_grad = False + + concat_list = [tensor, padding.view(1, 1, 4).repeat(batch_size, 1, 1)] + cat_res = torch.cat(concat_list, 1) + return cat_res + + +def th_pack(tensor): + batch_size = tensor.shape[0] + padding = tensor.new_zeros((batch_size, 4, 3)) + padding.requires_grad = False + pack_list = [padding, tensor] + pack_res = torch.cat(pack_list, 2) + return pack_res + + +def subtract_flat_id(rot_mats): + # Subtracts identity as a flattened tensor + rot_nb = int(rot_mats.shape[1] / 9) + id_flat = torch.eye( + 3, dtype=rot_mats.dtype, device=rot_mats.device).view(1, 9).repeat( + rot_mats.shape[0], rot_nb) + # id_flat.requires_grad = False + results = rot_mats - id_flat + return results + + +def make_list(tensor): + # type: (List[int]) -> List[int] + return tensor diff --git a/models/resnet.py b/models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..12c195927283598072f29019816fb1f00a399f00 --- /dev/null +++ b/models/resnet.py @@ -0,0 +1,262 @@ +import torch +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.): + self.inplanes = 64 + self.dropout_factor = dropout_factor + super(ResNet, self).__init__() + + self.conv1 = nn.Conv2d(24, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + # see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16 + # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + assert img_size % 32 == 0 + pool_kernel = int(img_size / 32) + self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True) + + self.dropout = nn.Dropout(self.dropout_factor) + + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + + x = self.dropout(x) + + x = self.fc(x) + + return x + + +def load_model(model, pretrained_state_dict): + model_dict = model.state_dict() + pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if + k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()} + model.load_state_dict(pretrained_dict, strict=False) + if len(pretrained_dict) == 0: + print("[INFO] No params were loaded ...") + else: + for k, v in pretrained_state_dict.items(): + if k in pretrained_dict: + print("==>> Load {} {}".format(k, v.size())) + else: + print("[INFO] Skip {} {}".format(k, v.size())) + return model + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + print("Load pretrained model from {}".format(model_urls['resnet18'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet18']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + print("Load pretrained model from {}".format(model_urls['resnet34'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet34']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + print("Load pretrained model from {}".format(model_urls['resnet50'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet50']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + print("Load pretrained model from {}".format(model_urls['resnet101'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet101']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + print("Load pretrained model from {}".format(model_urls['resnet152'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet152']) + model = load_model(model, pretrained_state_dict) + return model + +if __name__ == "__main__": + input = torch.randn([1, 24, 256,256]) + model = resnet34(True, num_classes=63, img_size=256) + output = model(input) + print(output.size()) diff --git a/train.py b/train.py new file mode 100644 index 0000000000000000000000000000000000000000..6fc022d1bb6037122f884e978b96cce2d0a6f3a8 --- /dev/null +++ b/train.py @@ -0,0 +1,229 @@ +#-*-coding:utf-8-*- +# date:2021-06-15 +# Author: Eric.Lee +## function: train + +import os +import argparse +import torch +import torch.nn as nn +import torch.optim as optim +import sys + +from utils.model_utils import * +from utils.common_utils import * +from e3d_data_iter.datasets import * + +from models.resnet import resnet18,resnet34,resnet50,resnet101 + +from torchvision.models import shufflenet_v2_x1_5 ,shufflenet_v2_x1_0 , shufflenet_v2_x2_0 + +from loss.loss import * +import cv2 +import time +import json +from datetime import datetime +import random +def trainer(ops,f_log): + if 1: + os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS + + if ops.log_flag: + sys.stdout = f_log + + set_seed(ops.seed) + #---------------------------------------------------------------- 构建模型 + + if ops.model == 'resnet_50': + model_ = resnet50(pretrained = True,num_classes = ops.num_classes,img_size = ops.img_size[0],dropout_factor=ops.dropout) + elif ops.model == 'resnet_18': + model_ = resnet18(pretrained = True,num_classes = ops.num_classes,img_size = ops.img_size[0],dropout_factor=ops.dropout) + elif ops.model == 'resnet_34': + model_ = resnet34(pretrained = True,num_classes = ops.num_classes,img_size = ops.img_size[0],dropout_factor=ops.dropout) + elif ops.model == 'resnet_101': + model_ = resnet101(pretrained = True,num_classes = ops.num_classes,img_size = ops.img_size[0],dropout_factor=ops.dropout) + else: + print(" no support the model") + + use_cuda = torch.cuda.is_available() + + device = torch.device("cuda:0" if use_cuda else "cpu") + model_ = model_.to(device) + + # print(model_)# 打印模型结构 + # Dataset + dataset = LoadImagesAndLabels(ops= ops,img_size=ops.img_size,flag_agu=ops.flag_agu,vis = False) + print("handpose done") + + print('len train datasets : %s'%(dataset.__len__())) + # Dataloader + dataloader = DataLoader(dataset, + batch_size=ops.batch_size, + num_workers=ops.num_workers, + shuffle=True, + pin_memory=False, + drop_last = True) + # 优化器设计 + optimizer_Adam = torch.optim.Adam(model_.parameters(), lr=ops.init_lr, betas=(0.9, 0.99),weight_decay=1e-6) + # optimizer_SGD = optim.SGD(model_.parameters(), lr=ops.init_lr, momentum=ops.momentum, weight_decay=ops.weight_decay)# 优化器初始化 + optimizer = optimizer_Adam + # 加载 finetune 模型 + if os.access(ops.fintune_model,os.F_OK):# checkpoint + chkpt = torch.load(ops.fintune_model, map_location=device) + model_.load_state_dict(chkpt) + print('load fintune model : {}'.format(ops.fintune_model)) + + print('/**********************************************/') + # 损失函数 + if ops.loss_define == 'mse_loss': + criterion = nn.MSELoss(reduce=True, reduction='mean') + + step = 0 + idx = 0 + + # 变量初始化 + best_loss = np.inf + loss_mean = 0. # 损失均值 + loss_idx = 0. # 损失计算计数器 + flag_change_lr_cnt = 0 # 学习率更新计数器 + init_lr = ops.init_lr # 学习率 + + epochs_loss_dict = {} + + for epoch in range(0, ops.epochs): + if ops.log_flag: + sys.stdout = f_log + print('\nepoch %d ------>>>'%epoch) + model_.train() + # 学习率更新策略 + if loss_mean!=0.: + if best_loss > (loss_mean/loss_idx): + flag_change_lr_cnt = 0 + best_loss = (loss_mean/loss_idx) + else: + flag_change_lr_cnt += 1 + + if flag_change_lr_cnt > 50: + init_lr = init_lr*ops.lr_decay + set_learning_rate(optimizer, init_lr) + flag_change_lr_cnt = 0 + + loss_mean = 0. # 损失均值 + loss_idx = 0. # 损失计算计数器 + + for i, (imgs_, pts_) in enumerate(dataloader): + # print('imgs_, pts_',imgs_.size(), pts_.size()) + if use_cuda: + imgs_ = imgs_.cuda() # pytorch 的 数据输入格式 : (batch, channel, height, width) + pts_ = pts_.cuda() + + output = model_(imgs_.float()) + if ops.loss_define == 'wing_loss': + loss = got_total_wing_loss(output, pts_.float()) + else: + loss = criterion(output, pts_.float()) + loss_mean += loss.item() + loss_idx += 1. + if i%10 == 0: + loc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + print(' %s - %s - epoch [%s/%s] (%s/%s):'%(loc_time,ops.model,epoch,ops.epochs,i,int(dataset.__len__()/ops.batch_size)),\ + 'Mean Loss : %.6f - Loss: %.6f'%(loss_mean/loss_idx,loss.item()),\ + ' lr : %.8f'%init_lr,' bs :',ops.batch_size,\ + ' img_size: %s x %s'%(ops.img_size[0],ops.img_size[1]),' best_loss: %.6f'%best_loss, " {}".format(ops.loss_define)) + # 计算梯度 + loss.backward() + # 优化器对模型参数更新 + optimizer.step() + # 优化器梯度清零 + optimizer.zero_grad() + step += 1 + + set_seed(random.randint(0,65535)) + torch.save(model_.state_dict(), ops.model_exp + '{}-size-{}-loss-{}-model_epoch-{}.pth'.format(ops.model,ops.img_size[0],ops.loss_define,epoch)) + torch.save(model_.state_dict(), ops.model_exp + '{}-size-{}-latest.pth'.format(ops.model,ops.img_size[0])) + + # except Exception as e: + # print('Exception : ',e) # 打印异常 + # print('Exception file : ', e.__traceback__.tb_frame.f_globals['__file__'])# 发生异常所在的文件 + # print('Exception line : ', e.__traceback__.tb_lineno)# 发生异常所在的行数 + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description=' Project Hand Train') + parser.add_argument('--seed', type=int, default = 126673, + help = 'seed') # 设置随机种子 + parser.add_argument('--model_exp', type=str, default = './model_exp', + help = 'model_exp') # 模型输出文件夹 + parser.add_argument('--model', type=str, default = 'resnet_50', + help = '''model : resnet_18,resnet_34,resnet_50,resnet_101''') # 模型类型 + parser.add_argument('--num_classes', type=int , default = 63, + help = 'num_classes') # landmarks 个数*2 + parser.add_argument('--GPUS', type=str, default = '0', + help = 'GPUS') # GPU选择 + + parser.add_argument('--train_path', type=str, + default = "../Minimal-Hand-pytorch-20210609/ehandpose_3d/", + help = 'datasets')# 训练集标注信息 + + parser.add_argument('--pretrained', type=bool, default = True, + help = 'imageNet_Pretrain') # 初始化学习率 + parser.add_argument('--fintune_model', type=str, default = 'model_exp/2021-06-15_23-29-22/resnet_50-size-256-loss-wing_loss-model_epoch-262.pth', + help = 'fintune_model') # fintune model + parser.add_argument('--loss_define', type=str, default = 'wing_loss', + help = 'define_loss : wing_loss, mse_loss ') # 损失函数定义 + parser.add_argument('--init_lr', type=float, default = 1e-4, + help = 'init learning Rate') # 初始化学习率 + parser.add_argument('--lr_decay', type=float, default = 0.1, + help = 'learningRate_decay') # 学习率权重衰减率 + parser.add_argument('--weight_decay', type=float, default = 1e-6, + help = 'weight_decay') # 优化器正则损失权重 + parser.add_argument('--momentum', type=float, default = 0.9, + help = 'momentum') # 优化器动量 + parser.add_argument('--batch_size', type=int, default = 16, + help = 'batch_size') # 训练每批次图像数量 + parser.add_argument('--dropout', type=float, default = 0.5, + help = 'dropout') # dropout + parser.add_argument('--epochs', type=int, default = 3000, + help = 'epochs') # 训练周期 + parser.add_argument('--num_workers', type=int, default = 4, + help = 'num_workers') # 训练数据生成器线程数 + parser.add_argument('--img_size', type=tuple , default = (256,256), + help = 'img_size') # 输入模型图片尺寸 + parser.add_argument('--flag_agu', type=bool , default = True, + help = 'data_augmentation') # 训练数据生成器是否进行数据扩增 + parser.add_argument('--clear_model_exp', type=bool, default = False, + help = 'clear_model_exp') # 模型输出文件夹是否进行清除 + parser.add_argument('--log_flag', type=bool, default = False, + help = 'log flag') # 是否保存训练 log + + #-------------------------------------------------------------------------- + args = parser.parse_args()# 解析添加参数 + #-------------------------------------------------------------------------- + mkdir_(args.model_exp, flag_rm=args.clear_model_exp) + loc_time = time.localtime() + args.model_exp = args.model_exp + '/' + time.strftime("%Y-%m-%d_%H-%M-%S", loc_time)+'/' + mkdir_(args.model_exp, flag_rm=args.clear_model_exp) + + f_log = None + if args.log_flag: + f_log = open(args.model_exp+'/train_{}.log'.format(time.strftime("%Y-%m-%d_%H-%M-%S",loc_time)), 'a+') + sys.stdout = f_log + + print('---------------------------------- log : {}'.format(time.strftime("%Y-%m-%d %H:%M:%S", loc_time))) + print('\n/******************* {} ******************/\n'.format(parser.description)) + + unparsed = vars(args) # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 + for key in unparsed.keys(): + print('{} : {}'.format(key,unparsed[key])) + + unparsed['time'] = time.strftime("%Y-%m-%d %H:%M:%S", loc_time) + + fs = open(args.model_exp+'train_ops.json',"w",encoding='utf-8') + json.dump(unparsed,fs,ensure_ascii=False,indent = 1) + fs.close() + + trainer(ops = args,f_log = f_log)# 模型训练 + + if args.log_flag: + sys.stdout = f_log + print('well done : {}'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) diff --git a/utils/AIK.py b/utils/AIK.py new file mode 100644 index 0000000000000000000000000000000000000000..99270438fe0b5f9057f2d059e85ec1143e0fdb6b --- /dev/null +++ b/utils/AIK.py @@ -0,0 +1,106 @@ +# Copyright (c) Hao Meng. All Rights Reserved. +import numpy as np +import transforms3d + +import config as cfg + +angels0 = np.zeros((1, 21)) + + +def to_dict(joints): + temp_dict = dict() + for i in range(21): + temp_dict[i] = joints[:, [i]] + return temp_dict + + +def adaptive_IK(T_, P_): + ''' + Computes pose parameters given template and predictions. + We think the twist of hand bone could be omitted. + + :param T: template ,21*3 + :param P: target, 21*3 + :return: pose params. + ''' + + T = T_.copy().astype(np.float64) + P = P_.copy().astype(np.float64) + + P = P.transpose(1, 0) + T = T.transpose(1, 0) + + # to dict + P = to_dict(P) + T = to_dict(T) + + # some globals + R = {} + R_pa_k = {} + q = {} + + q[0] = T[0] # in fact, q[0] = P[0] = T[0]. + + # compute R0, here we think R0 is not only a Orthogonal matrix, but also a Rotation matrix. + # you can refer to paper "Least-Squares Fitting of Two 3-D Point Sets. K. S. Arun; T. S. Huang; S. D. Blostein" + # It is slightly different from https://github.com/Jeff-sjtu/HybrIK/blob/main/hybrik/utils/pose_utils.py#L4, in which R0 is regard as orthogonal matrix only. + # Using their method might further boost accuracy. + P_0 = np.concatenate([P[1] - P[0], P[5] - P[0], + P[9] - P[0], P[13] - P[0], + P[17] - P[0]], axis=-1) + T_0 = np.concatenate([T[1] - T[0], T[5] - T[0], + T[9] - T[0], T[13] - T[0], + T[17] - T[0]], axis=-1) + H = np.matmul(T_0, P_0.T) + + U, S, V_T = np.linalg.svd(H) + V = V_T.T + R0 = np.matmul(V, U.T) + + det0 = np.linalg.det(R0) + + if abs(det0 + 1) < 1e-6: + V_ = V.copy() + + if (abs(S) < 1e-4).sum(): + V_[:, 2] = -V_[:, 2] + R0 = np.matmul(V_, U.T) + + R[0] = R0 + + # the bone from 1,5,9,13,17 to 0 has same rotations + R[1] = R[0].copy() + R[5] = R[0].copy() + R[9] = R[0].copy() + R[13] = R[0].copy() + R[17] = R[0].copy() + + # compute rotation along kinematics + for k in cfg.kinematic_tree: + pa = cfg.SNAP_PARENT[k] + pa_pa = cfg.SNAP_PARENT[pa] + q[pa] = np.matmul(R[pa], (T[pa] - T[pa_pa])) + q[pa_pa] + delta_p_k = np.matmul(np.linalg.inv(R[pa]), P[k] - q[pa]) + delta_p_k = delta_p_k.reshape((3,)) + delta_t_k = T[k] - T[pa] + delta_t_k = delta_t_k.reshape((3,)) + temp_axis = np.cross(delta_t_k, delta_p_k) + axis = temp_axis / (np.linalg.norm(temp_axis, axis=-1) + 1e-8) + temp = (np.linalg.norm(delta_t_k, axis=0) + 1e-8) * (np.linalg.norm(delta_p_k, axis=0) + 1e-8) + cos_alpha = np.dot(delta_t_k, delta_p_k) / temp + + alpha = np.arccos(cos_alpha) + + twist = delta_t_k + D_sw = transforms3d.axangles.axangle2mat(axis=axis, angle=alpha, is_normalized=False) + D_tw = transforms3d.axangles.axangle2mat(axis=twist, angle=angels0[:, k], is_normalized=False) + R_pa_k[k] = np.matmul(D_sw, D_tw) + R[k] = np.matmul(R[pa], R_pa_k[k]) + + pose_R = np.zeros((1, 16, 3, 3)) + pose_R[0, 0] = R[0] + for key in cfg.ID2ROT.keys(): + value = cfg.ID2ROT[key] + pose_R[0, value] = R_pa_k[key] + + return pose_R diff --git a/utils/LM.py b/utils/LM.py new file mode 100644 index 0000000000000000000000000000000000000000..ddabaf5bf7a84dcbda14e6308975017ac58cdf38 --- /dev/null +++ b/utils/LM.py @@ -0,0 +1,182 @@ +# Copyright (c) Hao Meng. All Rights Reserved. +# import time + +import numpy as np +import torch +from manopth.manolayer import ManoLayer + +from utils import bone + + +class LM_Solver(): + def __init__(self, num_Iter=500, th_beta=None, th_pose=None, lb_target=None, + weight=0.01): + self.count = 0 + # self.time_start = time.time() + # self.time_in_mano = 0 + self.minimal_loss = 9999 + self.best_beta = np.zeros([10, 1]) + self.num_Iter = num_Iter + + self.th_beta = th_beta + self.th_pose = th_pose + + self.beta = th_beta.numpy() + self.pose = th_pose.numpy() + + self.mano_layer = ManoLayer(side="right", + mano_root='mano/models', use_pca=False, flat_hand_mean=True) + + self.threshold_stop = 10 ** -13 + self.weight = weight + self.residual_memory = [] + + self.lb = np.zeros(21) + + _, self.joints = self.mano_layer(self.th_pose, self.th_beta) + self.joints = self.joints.cpu().numpy().reshape(21, 3) + + self.lb_target = lb_target.reshape(15, 1) + # self.test_time = 0 + + def update(self, beta_): + beta = beta_.copy() + self.count += 1 + # now = time.time() + my_th_beta = torch.from_numpy(beta).float().reshape(1, 10) + _, joints = self.mano_layer(self.th_pose, my_th_beta) + # self.time_in_mano = time.time() - now + + useful_lb = bone.caculate_length(joints, label="useful") + lb_ref = useful_lb[6] + return useful_lb, lb_ref + + def new_cal_ref_bone(self, _shape): + # now = time.time() + parent_index = [0, + 0, 1, 2, + 0, 4, 5, + 0, 7, 8, + 0, 10, 11, + 0, 13, 14 + ] + # index = [0, + # 1, 2, 3, # index + # 4, 5, 6, # middle + # 7, 8, 9, # pinky + # 10, 11, 12, # ring + # 13, 14, 15] # thumb + reoder_index = [ + 13, 14, 15, + 1, 2, 3, + 4, 5, 6, + 10, 11, 12, + 7, 8, 9] + shape = torch.Tensor(_shape.reshape((-1, 10))) + th_v_shaped = torch.matmul(self.mano_layer.th_shapedirs, + shape.transpose(1, 0)).permute(2, 0, 1) \ + + self.mano_layer.th_v_template + th_j = torch.matmul(self.mano_layer.th_J_regressor, th_v_shaped) + temp1 = th_j.clone().detach() + temp2 = th_j.clone().detach()[:, parent_index, :] + result = temp1 - temp2 + result = torch.norm(result, dim=-1, keepdim=True) + ref_len = result[:, [4]] + result = result / ref_len + # self.time_in_mano = time.time() - now + return torch.squeeze(result, dim=-1)[:, reoder_index].cpu().numpy() + + def get_residual(self, beta_): + beta = beta_.copy() + lb, lb_ref = self.update(beta) + lb = lb.reshape(45, 1) + return lb / lb_ref - self.lb_target + + def get_count(self): + return self.count + + def get_bones(self, beta_): + beta = beta_.copy() + lb, _ = self.update(beta) + lb = lb.reshape(15, 1) + + return lb + + # Vectorization implementation + def batch_get_l2_loss(self, beta_): + weight = 1e-5 + beta = beta_.copy() + temp = self.new_cal_ref_bone(beta) + loss = np.transpose(temp) + loss = np.linalg.norm(loss - self.lb_target, axis=0) ** 2 + \ + weight * np.linalg.norm(beta, axis=-1) + return loss + + def new_get_derivative(self, beta_): + # params: beta_ 10*1 + # return: 1*10 + beta = beta_.copy().reshape((1, 10)) + temp_shape = np.zeros((20, beta.shape[1])) # 20*10 + step = 0.01 + for t2 in range(10): # 位置 + t3 = 10 + t2 + temp_shape[t2] = beta.copy() + temp_shape[t3] = beta.copy() + temp_shape[t2, t2] += step + temp_shape[t3, t2] -= step + + res = self.batch_get_l2_loss(temp_shape) + d = res[0:10] - res[10:20] # 10*1 + d = d.reshape((1, 10)) / (2 * step) + return d + + # LM algorithm + def LM(self): + u = 1e-2 + v = 1.5 + beta = self.beta.reshape(10, 1) + + out_n = 1 + # num_beta = np.shape(beta)[0] # the number of beta + # calculating the init Jocobian matrix + Jacobian = np.zeros([out_n, beta.shape[0]]) + + last_update = 0 + last_loss = 0 + # self.test_time = 0 + for i in range(self.num_Iter): + # loss = self.new_get_loss(beta) + loss = self.batch_get_l2_loss(beta) + loss = loss[0] + if loss < self.minimal_loss: + self.minimal_loss = loss + self.best_beta = beta + + if abs(loss - last_loss) < self.threshold_stop: + # self.time_total = time.time() - self.time_start + return beta + + # for k in range(num_beta): + # Jacobian[:, k] = self.get_derivative(beta, k) + Jacobian = self.new_get_derivative(beta) + jtj = np.matmul(Jacobian.T, Jacobian) + jtj = jtj + u * np.eye(jtj.shape[0]) + + update = last_loss - loss + delta = (np.matmul(np.linalg.inv(jtj), Jacobian.T) * loss) + + beta -= delta + + if update > last_update and update > 0: + u /= v + else: + u *= v + + last_update = update + last_loss = loss + self.residual_memory.append(loss) + + return beta + + def get_result(self): + return self.residual_memory diff --git a/utils/LM_new.py b/utils/LM_new.py new file mode 100644 index 0000000000000000000000000000000000000000..995fa039e855bf7543de0d78e1e15cff1843a452 --- /dev/null +++ b/utils/LM_new.py @@ -0,0 +1,217 @@ +# Copyright (c) Hao Meng. All Rights Reserved. +import time + +import numpy as np +import torch +from manopth.manolayer import ManoLayer + +from utils import bone + + +class LM_Solver(): + def __init__(self, side = "right",num_Iter=500, th_beta=None, th_pose=None, lb_target=None, + weight=0.01): + self.count = 0 + self.time_start = time.time() + self.time_in_mano = 0 + self.minimal_loss = 9999 + self.best_beta = np.zeros([10, 1]) + self.num_Iter = num_Iter + + self.th_beta = th_beta + self.th_pose = th_pose + + self.beta = th_beta.numpy() + self.pose = th_pose.numpy() + + self.mano_layer = ManoLayer(side=side, + mano_root='D:/code/manopth/mano/models', use_pca=False, flat_hand_mean=True) + + self.threshold_stop = 10 ** -13 + self.weight = weight + self.residual_memory = [] + + self.lb = np.zeros(21) + + _, self.joints = self.mano_layer(self.th_pose, self.th_beta) + self.joints = self.joints.cpu().numpy().reshape(21, 3) + + self.lb_target = lb_target.reshape(15, 1) + self.test_time = 0 + + def update_target(self, target): + self.lb_target = target.copy().reshape(15, 1) + + def update(self, beta_): + beta = beta_.copy() + self.count += 1 + now = time.time() + my_th_beta = torch.from_numpy(beta).float().reshape(1, 10) + _, joints = self.mano_layer(self.th_pose, my_th_beta) + self.time_in_mano = time.time() - now + + useful_lb = bone.caculate_length(joints, label="useful") + lb_ref = useful_lb[6] + return useful_lb, lb_ref + + def new_cal_ref_bone(self, _shape): + now = time.time() + parent_index = [0, + 0, 1, 2, + 0, 4, 5, + 0, 7, 8, + 0, 10, 11, + 0, 13, 14 + ] + index = [0, + 1, 2, 3, # index + 4, 5, 6, # middle + 7, 8, 9, # pinky + 10, 11, 12, # ring + 13, 14, 15] # thumb + reoder_index = [ + 13, 14, 15, + 1, 2, 3, + 4, 5, 6, + 10, 11, 12, + 7, 8, 9] + shape = torch.Tensor(_shape.reshape((-1, 10))) + th_v_shaped = torch.matmul(self.mano_layer.th_shapedirs, + shape.transpose(1, 0)).permute(2, 0, 1) \ + + self.mano_layer.th_v_template + th_j = torch.matmul(self.mano_layer.th_J_regressor, th_v_shaped) + temp1 = th_j.clone().detach() + temp2 = th_j.clone().detach()[:, parent_index, :] + result = temp1 - temp2 + result = torch.norm(result, dim=-1, keepdim=True) + ref_len = result[:, [4]] + result = result / ref_len + self.time_in_mano = time.time() - now + return torch.squeeze(result, dim=-1)[:, reoder_index].cpu().numpy() + + def get_residual(self, beta_): + beta = beta_.copy() + lb, lb_ref = self.update(beta) + lb = lb.reshape(45, 1) + return lb / lb_ref - self.lb_target + + def get_count(self): + return self.count + + def get_bones(self, beta_): + beta = beta_.copy() + lb, _ = self.update(beta) + lb = lb.reshape(15, 1) + return lb + + def get_loss(self, beta_): + + beta = beta_.copy() + + lb, lb_ref = self.update(beta) + lb = lb.reshape(15, 1) + + loss = np.linalg.norm(lb / lb_ref - self.lb_target) ** 2 + \ + self.weight * np.linalg.norm(beta) ** 2 + + return loss + + def new_get_loss(self, beta_): + beta = beta_.copy() + temp = self.new_cal_ref_bone(beta_) + loss = temp.reshape((15, 1)) + loss = np.linalg.norm(loss - self.lb_target) ** 2 + \ + self.weight * np.linalg.norm(beta_) + return loss + + def get_derivative(self, beta_, n): + + beta = beta_.copy() + params1 = np.array(beta) + params2 = np.array(beta) + step = 0.01 + params1[n] += step + params2[n] -= step + + res1 = self.new_get_loss(params1) + res2 = self.new_get_loss(params2) + + d = (res1 - res2) / (2 * step) + + return d.ravel() + + def batch_new_get_loss(self, beta_): + weight = 1e-5 + beta = beta_.copy() + temp = self.new_cal_ref_bone(beta) + loss = np.transpose(temp) + loss = np.linalg.norm(loss - self.lb_target, axis=0) ** 2 + \ + weight * np.linalg.norm(beta, axis=-1) + return loss + + def new_get_derivative(self, beta_): + # params: beta_ 10*1 + # return: 1*10 + beta = beta_.copy().reshape((1, 10)) + temp_shape = np.zeros((20, beta.shape[1])) # 20*10 + step = 0.01 + for t2 in range(10): # 位置 + t3 = 10 + t2 + temp_shape[t2] = beta.copy() + temp_shape[t3] = beta.copy() + temp_shape[t2, t2] += step + temp_shape[t3, t2] -= step + + res = self.batch_new_get_loss(temp_shape) + d = res[0:10] - res[10:20] # 10*1 + d = d.reshape((1, 10)) / (2 * step) + return d + + # LM algorithm + def LM(self): + u = 1e-2 + v = 1.5 + beta = self.beta.reshape(10, 1) + + out_n = 1 + num_beta = np.shape(beta)[0] # the number of beta + # calculating the init Jocobian matrix + Jacobian = np.zeros([out_n, beta.shape[0]]) + + last_update = 0 + last_loss = 0 + self.test_time = 0 + for i in range(self.num_Iter): + loss = self.new_get_loss(beta) + if loss < self.minimal_loss: + self.minimal_loss = loss + self.best_beta = beta + + if abs(loss - last_loss) < self.threshold_stop: + self.time_total = time.time() - self.time_start + return beta + + # for k in range(num_beta): + # Jacobian[:, k] = self.get_derivative(beta, k) + Jacobian = self.new_get_derivative(beta) + jtj = np.matmul(Jacobian.T, Jacobian) + jtj = jtj + u * np.eye(jtj.shape[0]) + + update = last_loss - loss + delta = (np.matmul(np.linalg.inv(jtj), Jacobian.T) * loss) + + beta -= delta + + if update > last_update and update > 0: + u /= v + else: + u *= v + + last_update = update + last_loss = loss + self.residual_memory.append(loss) + + return beta + + def get_result(self): + return self.residual_memory diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/utils/align.py b/utils/align.py new file mode 100644 index 0000000000000000000000000000000000000000..e26511db1c385b46c94458dc26fd2d56705d5a6a --- /dev/null +++ b/utils/align.py @@ -0,0 +1,53 @@ +import numpy as np +def global_align(gtj0, prj0, key): + gtj = gtj0.copy() + prj = prj0.copy() + + if key in ["stb", "rhd"]: + # gtj :B*21*3 + # prj :B*21*3 + root_idx = 9 # root + ref_bone_link = [0, 9] # mid mcp + pred_align = prj.copy() + for i in range(prj.shape[0]): + + pred_ref_bone_len = np.linalg.norm(prj[i][ref_bone_link[0]] - prj[i][ref_bone_link[1]]) + gt_ref_bone_len = np.linalg.norm(gtj[i][ref_bone_link[0]] - gtj[i][ref_bone_link[1]]) + scale = gt_ref_bone_len / pred_ref_bone_len + + for j in range(21): + pred_align[i][j] = gtj[i][root_idx] + scale * (prj[i][j] - prj[i][root_idx]) + + return gtj, pred_align + + if key in ["do", "eo"]: + # gtj :B*5*3 + # prj :B*5*3 + + prj_ = prj.copy()[:, [4, 8, 12, 16, 20], :] # B*5*3 + + gtj_valid = [] + prj_valid_align = [] + + for i in range(prj_.shape[0]): + # 5*3 + mask = ~(np.isnan(gtj[i][:, 0])) + if mask.sum() < 2: + continue + + prj_mask = prj_[i][mask] # m*3 + gtj_mask = gtj[i][mask] # m*3 + + gtj_valid_center = np.mean(gtj_mask, 0) + prj_valid_center = np.mean(prj_mask, 0) + + gtj_center_length = np.linalg.norm(gtj_mask - gtj_valid_center, axis=1).mean() + prj_center_length = np.linalg.norm(prj_mask - prj_valid_center, axis=1).mean() + scale = gtj_center_length / prj_center_length + + prj_valid_align_i = gtj_valid_center + scale * (prj_[i][mask] - prj_valid_center) + + gtj_valid.append(gtj_mask) + prj_valid_align.append(prj_valid_align_i) + + return np.array(gtj_valid), np.array(prj_valid_align) \ No newline at end of file diff --git a/utils/bone.py b/utils/bone.py new file mode 100644 index 0000000000000000000000000000000000000000..10dfa2cec975e1756e24a194bc50489458317023 --- /dev/null +++ b/utils/bone.py @@ -0,0 +1,30 @@ +import config as cfg +import numpy as np +import torch + + +def caculate_length(j3d_, label=None): + if isinstance(j3d_, torch.Tensor): + j3d = j3d_.clone() + j3d = j3d.detach().cpu() + j3d = j3d.numpy() + else: + j3d = j3d_.copy() + + if len(j3d.shape) != 2: + j3d = j3d.squeeze() + + bone = [ + j3d[i] - j3d[cfg.SNAP_PARENT[i]] + for i in range(21) + ] + bone_len = np.linalg.norm( + bone, ord=2, axis=-1, keepdims=True # 21*1 + ) + + if label == "full": + return bone_len + elif label == "useful": + return bone_len[cfg.USEFUL_BONE] + else: + raise ValueError("{} not in ['full'|'useful']".format(label)) diff --git a/utils/common_utils.py b/utils/common_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d2e67fd19c9100f123715bd947877e8a4b333773 --- /dev/null +++ b/utils/common_utils.py @@ -0,0 +1,43 @@ +#-*-coding:utf-8-*- +# date:2020-04-11 +# Author: Eric.Lee +# function: common utils + +import os +import shutil +import cv2 +import numpy as np +import json + +def mkdir_(path, flag_rm=False): + if os.path.exists(path): + if flag_rm == True: + shutil.rmtree(path) + os.mkdir(path) + print('remove {} done ~ '.format(path)) + else: + os.mkdir(path) + +def plot_box(bbox, img, color=None, label=None, line_thickness=None): + tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox + if label: + tf = max(tl - 2, 1) + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox + cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 + # 文本绘制 + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA) + +class JSON_Encoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + else: + return super(JSON_Encoder, self).default(obj) diff --git a/utils/func.py b/utils/func.py new file mode 100644 index 0000000000000000000000000000000000000000..d9d35840f920f215a59fe25f037aa82c37ce6a79 --- /dev/null +++ b/utils/func.py @@ -0,0 +1,75 @@ +from torchvision.transforms.functional import * + + +def batch_denormalize(tensor, mean, std, inplace=False): + """Normalize a tensor image with mean and standard deviation. + + .. note:: + This transform acts out_testset of place by default, i.e., it does not mutates the input tensor. + + See :class:`~torchvision.transforms.Normalize` for more details. + + Args: + tensor (Tensor): Tensor image of size (B, C, H, W) to be normalized. + mean (sequence): Sequence of means for each channel. + std (sequence): Sequence of standard deviations for each channel. + inplace(bool,optional): Bool to make this operation inplace. + + Returns: + Tensor: Normalized Tensor image. + """ + if not torch.is_tensor(tensor) or tensor.ndimension() != 4: + raise TypeError('invalid tensor or tensor channel is not BCHW') + + if not inplace: + tensor = tensor.clone() + + dtype = tensor.dtype + mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device) + std = torch.as_tensor(std, dtype=dtype, device=tensor.device) + tensor.mul_(std[None, :, None, None]).sub_(-1 * mean[None, :, None, None]) + return tensor + + +def to_numpy(tensor): + if torch.is_tensor(tensor): + return tensor.detach().cpu().numpy() + elif type(tensor).__module__ != 'numpy': + raise ValueError("Cannot convert {} to numpy array" + .format(type(tensor))) + else: + return tensor + + +def bhwc_2_bchw(tensor): + """ + :param x: torch tensor, B x H x W x C + :return: torch tensor, B x C x H x W + """ + if not torch.is_tensor(tensor) or tensor.ndimension() != 4: + raise TypeError('invalid tensor or tensor channel is not BCHW') + return tensor.unsqueeze(1).transpose(1, -1).squeeze(-1) + + +def bchw_2_bhwc(tensor): + """ + :param x: torch tensor, B x C x H x W + :return: torch tensor, B x H x W x C + """ + if not torch.is_tensor(tensor) or tensor.ndimension() != 4: + raise TypeError('invalid tensor or tensor channel is not BCHW') + return tensor.unsqueeze(-1).transpose(1, -1).squeeze(1) + +def initiate(label=None): + if label == "zero": + shape = torch.zeros(10).unsqueeze(0) + pose = torch.zeros(48).unsqueeze(0) + elif label == "uniform": + shape = torch.from_numpy(np.random.normal(size=[1, 10])).float() + pose = torch.from_numpy(np.random.normal(size=[1, 48])).float() + elif label == "01": + shape = torch.rand(1, 10) + pose = torch.rand(1, 48) + else: + raise ValueError("{} not in ['zero'|'uniform'|'01']".format(label)) + return pose, shape diff --git a/utils/handutils.py b/utils/handutils.py new file mode 100644 index 0000000000000000000000000000000000000000..b73b9b19702c2b04bd0c67caafea562953c75b31 --- /dev/null +++ b/utils/handutils.py @@ -0,0 +1,452 @@ +import numpy as np +import torch + +try: + from PIL import Image +except ImportError: + print('Could not import PIL in handutils') +import config as cfg + + +def get_joint_bone(joint, ref_bone_link=None): + if ref_bone_link is None: + ref_bone_link = (0, 9) + + if ( + not torch.is_tensor(joint) + and not isinstance(joint, np.ndarray) + ): + raise TypeError('joint should be ndarray or torch tensor. Got {}'.format(type(joint))) + if ( + len(joint.shape) != 3 + or joint.shape[1] != 21 + or joint.shape[2] != 3 + ): + raise TypeError('joint should have shape (B, njoint, 3), Got {}'.format(joint.shape)) + + batch_size = joint.shape[0] + bone = 0 + if torch.is_tensor(joint): + bone = torch.zeros((batch_size, 1)).to(joint.device) + for jid, nextjid in zip( + ref_bone_link[:-1], ref_bone_link[1:] + ): + bone += torch.norm( + joint[:, jid, :] - joint[:, nextjid, :], + dim=1, keepdim=True + ) # (B, 1) + elif isinstance(joint, np.ndarray): + bone = np.zeros((batch_size, 1)) + for jid, nextjid in zip( + ref_bone_link[:-1], ref_bone_link[1:] + ): + bone += np.linalg.norm( + (joint[:, jid, :] - joint[:, nextjid, :]), + ord=2, axis=1, keepdims=True + ) # (B, 1) + return bone + + +def uvd2xyz( + uvd, + joint_root, + joint_bone, + intr=None, + trans=None, + scale=None, + inp_res=256, + mode='persp' +): + bs = uvd.shape[0] + if mode in ['persp', 'perspective']: + if intr is None: + raise Exception("No intr found in perspective") + '''1. denormalized uvd''' + uv = uvd[:, :, :2] * inp_res # 0~256 + depth = (uvd[:, :, 2] * cfg.DEPTH_RANGE) + cfg.DEPTH_MIN + root_depth = joint_root[:, -1].unsqueeze(-1) # (B, 1) + z = depth * joint_bone.expand_as(uvd[:, :, 2]) + \ + root_depth.expand_as(uvd[:, :, 2]) # B x M + + '''2. uvd->xyz''' + camparam = torch.zeros((bs, 4)).float().to(intr.device) # (B, 4) + camparam[:, 0] = intr[:, 0, 0] # fx + camparam[:, 1] = intr[:, 1, 1] # fx + camparam[:, 2] = intr[:, 0, 2] # cx + camparam[:, 3] = intr[:, 1, 2] # cy + camparam = camparam.unsqueeze(1).expand(-1, uvd.size(1), -1) # B x M x 4 + xy = ((uv - camparam[:, :, 2:4]) / camparam[:, :, :2]) * \ + z.unsqueeze(-1).expand_as(uv) # B x M x 2 + return torch.cat((xy, z.unsqueeze(-1)), -1) # B x M x 3 + elif mode in ['ortho', 'orthogonal']: + if trans is None or scale is None: + raise Exception("No trans or scale found in orthorgnal") + raise Exception("orth Unimplement !") + else: + raise Exception("Unkonwn mode type. should in ['persp', 'ortho']") + + +def xyz2uvd( + xyz, + joint_root, + joint_bone, + intr=None, + trans=None, + scale=None, + inp_res=256, + mode='persp' +): + bs = xyz.shape[0] + if mode in ['persp', 'perspective']: + if intr is None: + raise Exception("No intr found in perspective") + z = xyz[:, :, 2] + xy = xyz[:, :, :2] + xy = xy / z.unsqueeze(-1).expand_as(xy) + + ''' 1. normalize depth : root_relative, scale_invariant ''' + root_depth = joint_root[:, -1].unsqueeze(-1) # (B, 1) + depth = (z - root_depth.expand_as(z)) / joint_bone.expand_as(z) + + '''2. xy->uv''' + camparam = torch.zeros((bs, 4)).float().to(intr.device) # (B, 4) + camparam[:, 0] = intr[:, 0, 0] # fx + camparam[:, 1] = intr[:, 1, 1] # fx + camparam[:, 2] = intr[:, 0, 2] # cx + camparam[:, 3] = intr[:, 1, 2] # cy + camparam = camparam.unsqueeze(1).expand(-1, xyz.size(1), -1) # B x M x 4 + uv = (xy * camparam[:, :, :2]) + camparam[:, :, 2:4] + + '''3. normalize uvd to 0~1''' + uv = uv / inp_res + depth = (depth - cfg.DEPTH_MIN) / cfg.DEPTH_RANGE + + return torch.cat((uv, depth.unsqueeze(-1)), -1) + elif mode in ['ortho', 'orthogonal']: + if trans is None or scale is None: + raise Exception("No trans or scale found in orthorgnal") + raise Exception("orth Unimplement !") + else: + raise Exception("Unkonwn proj type. should in ['persp', 'ortho']") + + +def persp_joint2kp(joint, intr): + joint_homo = torch.matmul(joint, intr.transpose(1, 2)) + kp2d = joint_homo / joint_homo[:, :, 2:] + kp2d = kp2d[:, :, :2] + return kp2d + + +def rot_kp2d(kp2d, rot): + kp2d = np.concatenate((kp2d, np.ones((kp2d.shape[0], 1))), axis=1) + new_kp2d = np.matmul(kp2d, rot.transpose()) + return new_kp2d + + +def get_annot_scale(annots, visibility=None, scale_factor=2.0): + """ + Retreives the size of the square we want to crop by taking the + maximum of vertical and horizontal span of the hand and multiplying + it by the scale_factor to add some padding around the hand + """ + if visibility is not None: + annots = annots[visibility] + min_x, min_y = annots.min(0) + max_x, max_y = annots.max(0) + delta_x = max_x - min_x + delta_y = max_y - min_y + max_delta = max(delta_x, delta_y) + s = max_delta * scale_factor + return s + + +def get_mask_mini_scale(mask_, side): + """ + Retreives the size of the square... + """ + # mask = np.array(mask_.copy())[:, :, 2:].squeeze() + mask = mask_.copy().squeeze() + mask_scale = 0 + # print(mask.shape) + if side == "l": + id_left = [i for i in range(2, 18)] + np.putmask(mask, np.logical_and(mask >= id_left[0], mask <= id_left[-1]), 128) + seg = np.argwhere(mask == 128) + # print("seg.shape=",seg.shape) + seg_rmin, seg_cmin = np.min(seg, axis=0) + seg_rmax, seg_cmax = np.max(seg, axis=0) + mask_scale = max(seg_rmax - seg_rmin + 1, seg_cmax - seg_cmin + 1) + + elif side == "r": + id_right = [i for i in range(18, 34)] + np.putmask(mask, np.logical_and(mask >= id_right[0], mask <= id_right[-1]), 255) + + seg = np.argwhere(mask == 255) + seg_rmin, seg_cmin = np.min(seg, axis=0) + seg_rmax, seg_cmax = np.max(seg, axis=0) + mask_scale = max(seg_rmax - seg_rmin + 1, seg_cmax - seg_cmin + 1) + elif side == 0: + rmin, cmin = mask.min(0) + rmax, cmax = mask.max(0) + mask_scale = max(rmax - rmin + 1, cmax - cmin + 1) + + if not mask_scale: + raise ValueError("mask_scale is 0!") + + return mask_scale + + +def get_kp2d_mini_scale(annots): + """ + get mini square to include kp2d + """ + # print("annots=",annots) + min_x, min_y = annots.min(0) # opencv convention + max_x, max_y = annots.max(0) + # delta_x = int(max_x - min_x) + # delta_y = int(max_y - min_y) + + delta_x = max_x - min_x + delta_y = max_y - min_y + + max_delta = max(delta_x, delta_y) + + # return delta_x + 1 if delta_x > delta_y else delta_y + 1 + return max_delta + + +# def get_ori_crop_scale(mask, side, kp2d, scale_factor=2.0): +# mask_mini_scale = get_mask_mini_scale(mask, side) +# kp2d_mini_scale = get_kp2d_mini_scale(kp2d) +# ori_crop_scale = max(mask_mini_scale, kp2d_mini_scale) +# +# # if ori_crop_scale % 2 == 0: +# # ori_crop_scale += 2 +# # else: +# # ori_crop_scale += 3 +# +# return ori_crop_scale * scale_factor + +def get_ori_crop_scale(mask, side, kp2d, mask_flag=True,scale_factor=2.0): + kp2d_mini_scale = get_kp2d_mini_scale(kp2d) + + ori_crop_scale =kp2d_mini_scale + + # if mask.any()!=None: + if mask_flag: + # print("HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH") + mask_mini_scale = get_mask_mini_scale(mask, side) + ori_crop_scale = max(mask_mini_scale, kp2d_mini_scale) + + # if ori_crop_scale % 2 == 0: + # ori_crop_scale += 2 + # else: + # ori_crop_scale += 3 + + return ori_crop_scale * scale_factor + +def get_annot_center(annots, visibility=None): + # Get scale + if visibility is not None: + annots = annots[visibility] + min_x, min_y = annots.min(0) + max_x, max_y = annots.max(0) + c_x = int((max_x + min_x) / 2) + c_y = int((max_y + min_y) / 2) + return np.asarray([c_x, c_y]) + + +def transform_coords(pts, affine_trans, invert=False): + """ + Args: + pts(np.ndarray): (point_nb, 2) + """ + if invert: + affine_trans = np.linalg.inv(affine_trans) + hom2d = np.concatenate([pts, np.ones([np.array(pts).shape[0], 1])], 1) + transformed_rows = affine_trans.dot(hom2d.transpose()).transpose()[:, :2] + return transformed_rows.astype(int) + + +def transform_img(img, affine_trans, res): + """ + Args: + center (tuple): crop center coordinates + scale (int): size in pixels of the final crop + res (tuple): final image size + """ + trans = np.linalg.inv(affine_trans) + + img = img.transform( + tuple(res), Image.AFFINE, (trans[0, 0], trans[0, 1], trans[0, 2], + trans[1, 0], trans[1, 1], trans[1, 2]) + ) + return img + + +##### Original from Obman (buggy) ##### +# def get_affine_transform(center, scale, res, rot=0): +# rot_mat = np.zeros((3, 3)) +# sn, cs = np.sin(rot), np.cos(rot) +# rot_mat[0, :2] = [cs, -sn] +# rot_mat[1, :2] = [sn, cs] +# rot_mat[2, 2] = 1 +# # Rotate center to obtain coordinate of center in rotated image +# origin_rot_center = rot_mat.dot(center.tolist() + [ +# 1, +# ])[:2] +# # Get center for transform with verts rotated around optical axis +# # (through pixel center, smthg like 128, 128 in pixels and 0,0 in 3d world) +# # For this, rotate the center but around center of image (vs 0,0 in pixel space) +# t_mat = np.eye(3) +# t_mat[0, 2] = -res[1] / 2 +# t_mat[1, 2] = -res[0] / 2 +# t_inv = t_mat.copy() +# t_inv[:2, 2] *= -1 +# transformed_center = t_inv.dot(rot_mat).dot(t_mat).dot(center.tolist() + [ +# 1, +# ]) +# post_rot_trans = get_affine_trans_no_rot(origin_rot_center, scale, res) +# total_trans = post_rot_trans.dot(rot_mat) +# # check_t = get_affine_transform_bak(center, scale, res, rot) +# # print(total_trans, check_t) +# affinetrans_post_rot = get_affine_trans_no_rot(transformed_center[:2], +# scale, res) +# return total_trans.astype(np.float32), affinetrans_post_rot.astype( +# np.float32) + + +def get_affine_transform(center, scale, optical_center, out_res, rot=0): + rot_mat = np.zeros((3, 3)) + sn, cs = np.sin(rot), np.cos(rot) + rot_mat[0, :2] = [cs, -sn] + rot_mat[1, :2] = [sn, cs] + rot_mat[2, 2] = 1 + # Rotate center to obtain coordinate of center in rotated image + origin_rot_center = rot_mat.dot(center.tolist() + [1])[:2] + # Get center for transform with verts rotated around optical axis + # (through pixel center, smthg like 128, 128 in pixels and 0,0 in 3d world) + # For this, rotate the center but around center of image (vs 0,0 in pixel space) + t_mat = np.eye(3) + t_mat[0, 2] = - optical_center[0] + t_mat[1, 2] = - optical_center[1] + t_inv = t_mat.copy() + t_inv[:2, 2] *= -1 + transformed_center = ( + t_inv.dot(rot_mat).dot(t_mat).dot(center.tolist() + [1]) + ) + post_rot_trans = get_affine_trans_no_rot(origin_rot_center, scale, out_res) + total_trans = post_rot_trans.dot(rot_mat) + # check_t = get_affine_transform_bak(center, scale, res, rot) + # print(total_trans, check_t) + affinetrans_post_rot = get_affine_trans_no_rot( + transformed_center[:2], scale, out_res + ) + return ( + total_trans.astype(np.float32), + affinetrans_post_rot.astype(np.float32), + ) + + + +###################################### +def get_affine_transform_test(center, scale, res, rot=0): + rot_mat = np.zeros((3, 3)) + sn, cs = np.sin(rot), np.cos(rot) + rot_mat[0, :2] = [cs, -sn] + rot_mat[1, :2] = [sn, cs] + rot_mat[2, 2] = 1 + # Rotate center to obtain coordinate of center in rotated image + origin_rot_center = rot_mat.dot(center.tolist() + [ + 1, + ])[:2] + # Get center for transform with verts rotated around optical axis + # (through pixel center, smthg like 128, 128 in pixels and 0,0 in 3d world) + # For this, rotate the center but around center of image (vs 0,0 in pixel space) + t_mat = np.eye(3) + t_mat[0, 2] = -res[1] / 2 + t_mat[1, 2] = -res[0] / 2 + t_inv = t_mat.copy() + t_inv[:2, 2] *= -1 + transformed_center = t_inv.dot(rot_mat).dot(t_mat).dot(center.tolist() + [ + 1, + ]) + post_rot_trans = get_affine_trans_no_rot(origin_rot_center, scale, res) + total_trans = post_rot_trans.dot(rot_mat) + # check_t = get_affine_transform_bak(center, scale, res, rot) + # print(total_trans, check_t) + affinetrans_post_rot = get_affine_trans_no_rot(transformed_center[:2], + scale, res) + return total_trans.astype(np.float32), affinetrans_post_rot.astype( + np.float32) + +def get_affine_trans_no_rot(center, scale, res): + affinet = np.zeros((3, 3)) + affinet[0, 0] = float(res[1]) / scale + affinet[1, 1] = float(res[0]) / scale + affinet[0, 2] = res[1] * (-float(center[0]) / scale + .5) + affinet[1, 2] = res[0] * (-float(center[1]) / scale + .5) + affinet[2, 2] = 1 + return affinet + + +def get_affine_transform_bak(center, scale, res, rot): + t = np.zeros((3, 3)) + t[0, 0] = float(res[1]) / scale + t[1, 1] = float(res[0]) / scale + t[0, 2] = res[1] * (-float(center[0]) / scale + .5) + t[1, 2] = res[0] * (-float(center[1]) / scale + .5) + t[2, 2] = 1 + if rot != 0: + rot_mat = np.zeros((3, 3)) + sn, cs = np.sin(rot), np.cos(rot) + rot_mat[0, :2] = [cs, -sn] + rot_mat[1, :2] = [sn, cs] + rot_mat[2, 2] = 1 + t_mat = np.eye(3) + t_mat[0, 2] = -res[1] / 2 + t_mat[1, 2] = -res[0] / 2 + t_inv = t_mat.copy() + t_inv[:2, 2] *= -1 + t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))).astype(np.float32) + return t, t + + +def gen_cam_param(joint, kp2d, mode='ortho'): + if mode in ['persp', 'perspective']: + kp2d = kp2d.reshape(-1)[:, np.newaxis] # (42, 1) + joint = joint / joint[:, 2:] + joint = joint[:, :2] + jM = np.zeros((42, 2), dtype="float32") + for i in range(joint.shape[0]): # 21 + jM[2 * i][0] = joint[i][0] + jM[2 * i + 1][1] = joint[i][1] + pad2 = np.array(range(42)) + pad2 = (pad2 % 2)[:, np.newaxis] + pad1 = (1 - pad2) + + jM = np.concatenate([jM, pad1, pad2], axis=1) # (42, 4) + jMT = jM.transpose() # (4, 42)print + jMTjM = np.matmul(jMT, jM) # (4,4) + jMTb = np.matmul(jMT, kp2d) + cam_param = np.matmul(np.linalg.inv(jMTjM), jMTb) + cam_param = cam_param.reshape(-1) + return cam_param + elif mode in ['ortho', 'orthogonal']: + # ortho only when + assert np.sum(np.abs(joint[0, :])) == 0 + joint = joint[:, :2] # (21, 2) + joint = joint.reshape(-1)[:, np.newaxis] + kp2d = kp2d.reshape(-1)[:, np.newaxis] + pad2 = np.array(range(42)) + pad2 = (pad2 % 2)[:, np.newaxis] + pad1 = (1 - pad2) + jM = np.concatenate([joint, pad1, pad2], axis=1) # (42, 3) + jMT = jM.transpose() # (3, 42) + jMTjM = np.matmul(jMT, jM) + jMTb = np.matmul(jMT, kp2d) + cam_param = np.matmul(np.linalg.inv(jMTjM), jMTb) + cam_param = cam_param.reshape(-1) + return cam_param + else: + raise Exception("Unkonwn mode type. should in ['persp', 'orth']") diff --git a/utils/heatmaputils.py b/utils/heatmaputils.py new file mode 100644 index 0000000000000000000000000000000000000000..7147fd5284dba2af22811f4d8b703b3a2e47c026 --- /dev/null +++ b/utils/heatmaputils.py @@ -0,0 +1,69 @@ +# Copyright (c) Lixin YANG, Jiasen Li. All Rights Reserved. +import torch +import numpy as np + + +def gen_heatmap(img, pt, sigma): + """generate heatmap based on pt coord. + + :param img: original heatmap, zeros + :type img: np (H,W) float32 + :param pt: keypoint coord. + :type pt: np (2,) int32 + :param sigma: guassian sigma + :type sigma: float + :return + - generated heatmap, np (H, W) each pixel values id a probability + - flag 0 or 1: indicate wheather this heatmap is valid(1) + + """ + + pt = pt.astype(np.int32) + # Check that any part of the gaussian is in-bounds + ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)] + br = [int(pt[0] + 3 * sigma + 1), int(pt[1] + 3 * sigma + 1)] + if ( + ul[0] >= img.shape[1] + or ul[1] >= img.shape[0] + or br[0] < 0 + or br[1] < 0 + ): + # If not, just return the image as is + print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") + return img, 0 + + # Generate gaussian + size = 6 * sigma + 1 + x = np.arange(0, size, 1, float) + y = x[:, np.newaxis] + x0 = y0 = size // 2 + g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) + # Usable gaussian range + g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0] + g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1] + # Image range + img_x = max(0, ul[0]), min(br[0], img.shape[1]) + img_y = max(0, ul[1]), min(br[1], img.shape[0]) + + img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]] + return img, 1 + + +def get_heatmap_pred(heatmaps): + """ get predictions from heatmaps in torch Tensor + return type: torch.LongTensor + """ + assert heatmaps.dim() == 4, 'Score maps should be 4-dim (B, nJoints, H, W)' + maxval, idx = torch.max(heatmaps.view(heatmaps.size(0), heatmaps.size(1), -1), 2) + + maxval = maxval.view(heatmaps.size(0), heatmaps.size(1), 1) + idx = idx.view(heatmaps.size(0), heatmaps.size(1), 1) + + preds = idx.repeat(1, 1, 2).float() # (B, njoint, 2) + + preds[:, :, 0] = (preds[:, :, 0]) % heatmaps.size(3) # + 1 + preds[:, :, 1] = torch.floor((preds[:, :, 1]) / heatmaps.size(3)) # + 1 + + pred_mask = maxval.gt(0).repeat(1, 1, 2).float() + preds *= pred_mask + return preds diff --git a/utils/imgutils.py b/utils/imgutils.py new file mode 100644 index 0000000000000000000000000000000000000000..8f1b5a9600d1059c3f5a163425eb3727cfe592c2 --- /dev/null +++ b/utils/imgutils.py @@ -0,0 +1,195 @@ +import torch +import cv2 +import numpy as np +import random +import torchvision +import utils.func as func +import config as cfg + + +def get_color_params(brightness=0, contrast=0, saturation=0, hue=0): + if brightness > 0: + brightness_factor = random.uniform( + max(0, 1 - brightness), 1 + brightness) + else: + brightness_factor = None + + if contrast > 0: + contrast_factor = random.uniform(max(0, 1 - contrast), 1 + contrast) + else: + contrast_factor = None + + if saturation > 0: + saturation_factor = random.uniform( + max(0, 1 - saturation), 1 + saturation) + else: + saturation_factor = None + + if hue > 0: + hue_factor = random.uniform(-hue, hue) + else: + hue_factor = None + return brightness_factor, contrast_factor, saturation_factor, hue_factor + + +def color_jitter(img, brightness=0, contrast=0, saturation=0, hue=0): + brightness, contrast, saturation, hue = get_color_params( + brightness=brightness, + contrast=contrast, + saturation=saturation, + hue=hue) + + # Create img transform function sequence + img_transforms = [] + if brightness is not None: + img_transforms.append(lambda img: torchvision.transforms.functional.adjust_brightness(img, brightness)) + if saturation is not None: + img_transforms.append(lambda img: torchvision.transforms.functional.adjust_saturation(img, saturation)) + if hue is not None: + img_transforms.append( + lambda img: torchvision.transforms.functional.adjust_hue(img, hue)) + if contrast is not None: + img_transforms.append(lambda img: torchvision.transforms.functional.adjust_contrast(img, contrast)) + random.shuffle(img_transforms) + + jittered_img = img + for func in img_transforms: + jittered_img = func(jittered_img) + return jittered_img + + +def batch_with_dep(clrs, deps): + clrs = func.to_numpy(clrs) + if clrs.dtype is not np.uint8: + clrs = (clrs * 255).astype(np.uint8) + assert len(deps.shape) == 4, "deps should have shape (B, 1, H, W)" + deps = func.to_numpy(deps) + deps = deps.swapaxes(1, 2).swapaxes(2, 3) + deps = deps.repeat(3, axis=3) + if deps.dtype is not np.uint8: + deps = (deps * 255).astype(np.uint8) + + batch_size = clrs.shape[0] + + alpha = 0.6 + beta = 0.9 + gamma = 0 + + batch = [] + for i in range(16): + if i >= batch_size: + batch.append(np.zeros((64, 64, 3)).astype(np.uint8)) + continue + clr = clrs[i] + clr = cv2.resize(clr, (64, 64)) + dep = deps[i] + dep_img = cv2.addWeighted(clr, alpha, dep, beta, gamma) + batch.append(dep_img) + + resu = [] + for i in range(4): + resu.append(np.concatenate(batch[i * 4: i * 4 + 4], axis=1)) + resu = np.concatenate(resu) + return resu + + +def batch_with_joint(clrs, uvds): + clrs = func.to_numpy(clrs) + if clrs.dtype is not np.uint8: + clrs = (clrs * 255).astype(np.uint8) + uvds = func.to_numpy(uvds) + + batch_size = clrs.shape[0] + + batch = [] + for i in range(16): + if i >= batch_size: + batch.append(np.zeros((256, 256, 3)).astype(np.uint8)) + continue + clr = clrs[i] + uv = (np.array(uvds[i][:, :2]) * clr.shape[0]).astype(np.uint8) # (256) + clr = draw_hand_skeloten(clr, uv, cfg.SNAP_BONES) + batch.append(clr) + + resu = [] + for i in range(4): + resu.append(np.concatenate(batch[i * 4: i * 4 + 4], axis=1)) + resu = np.concatenate(resu) + return resu + + +def draw_hand_skeloten(clr, uv, bone_links, colors=cfg.JOINT_COLORS): + for i in range(len(bone_links)): + bone = bone_links[i] + for j in bone: + cv2.circle(clr, tuple(uv[j]), 4, colors[i], -1) + for j, nj in zip(bone[:-1], bone[1:]): + cv2.line(clr, tuple(uv[j]), tuple(uv[nj]), colors[i], 2) + return clr + + +def batch_with_heatmap( + inputs, + heatmaps, + num_rows=2, + parts_to_show=None, + n_in_batch=1, +): + # inputs = func.to_numpy(inputs * 255) # 0~1 -> 0 ~255 + heatmaps = func.to_numpy(heatmaps) + batch_img = [] + for n in range(min(inputs.shape[0], n_in_batch)): + inp = inputs[n] + batch_img.append( + sample_with_heatmap( + inp, + heatmaps[n], + num_rows=num_rows, + parts_to_show=parts_to_show + ) + ) + resu = np.concatenate(batch_img) + return resu + + +def sample_with_heatmap(img, heatmap, num_rows=2, parts_to_show=None): + if parts_to_show is None: + parts_to_show = np.arange(heatmap.shape[0]) # 21 + + # Generate a single image to display input/output pair + num_cols = int(np.ceil(float(len(parts_to_show)) / num_rows)) + size = img.shape[0] // num_rows + + full_img = np.zeros((img.shape[0], size * (num_cols + num_rows), 3), np.uint8) + full_img[:img.shape[0], :img.shape[1]] = img + + inp_small = cv2.resize(img, (size, size)) + + # Set up heatmap display for each part + for i, part in enumerate(parts_to_show): + part_idx = part + out_resized = cv2.resize(heatmap[part_idx], (size, size)) + out_resized = out_resized.astype(float) + out_img = inp_small.copy() * .4 + color_hm = color_heatmap(out_resized) + out_img += color_hm * .6 + + col_offset = (i % num_cols + num_rows) * size + row_offset = (i // num_cols) * size + full_img[row_offset:row_offset + size, col_offset:col_offset + size] = out_img + + return full_img + + +def color_heatmap(x): + color = np.zeros((x.shape[0], x.shape[1], 3)) + color[:, :, 0] = gauss(x, .5, .6, .2) + gauss(x, 1, .8, .3) + color[:, :, 1] = gauss(x, 1, .5, .3) + color[:, :, 2] = gauss(x, 1, .2, .3) + color[color > 1] = 1 + color = (color * 255).astype(np.uint8) + return color + + +def gauss(x, a, b, c, d=0): + return a * np.exp(-(x - b) ** 2 / (2 * c ** 2)) + d diff --git a/utils/misc.py b/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..1a93629509b3563916e5f5191744fb6eb8c41ff4 --- /dev/null +++ b/utils/misc.py @@ -0,0 +1,187 @@ +import os +import shutil + +import numpy as np +import scipy.io +import torch +from termcolor import colored, cprint + +import utils.func as func +import copy + + +def print_args(args): + opts = vars(args) + cprint("{:>30} Options {}".format("=" * 15, "=" * 15), 'yellow') + for k, v in sorted(opts.items()): + print("{:>30} : {}".format(k, v)) + cprint("{:>30} Options {}".format("=" * 15, "=" * 15), 'yellow') + + +def param_count(net): + return sum(p.numel() for p in net.parameters()) / 1e6 + + + + +def out_loss_auc( + loss_all_, auc_all_, acc_hm_all_, outpath +): + loss_all = copy.deepcopy(loss_all_) + acc_hm_all = copy.deepcopy(acc_hm_all_) + auc_all = copy.deepcopy(auc_all_) + + for k, l in zip(loss_all.keys(), loss_all.values()): + np.save(os.path.join(outpath, "{}.npy".format(k)), np.vstack((np.arange(1, len(l) + 1), np.array(l))).T) + + if len(acc_hm_all): + for key ,value in acc_hm_all.items(): + acc_hm_all[key]=np.array(value) + np.save(os.path.join(outpath, "acc_hm_all.npy"), acc_hm_all) + + + if len(auc_all): + for key ,value in auc_all.items(): + auc_all[key]=np.array(value) + np.save(os.path.join(outpath, "auc_all.npy"), np.array(auc_all)) + + +def saveloss(d): + for k, v in zip(d.keys(), d.values()): + mat = np.array(v) + np.save(os.path.join("losses", "{}.npy".format(k)), mat) + + +def save_checkpoint( + state, + checkpoint='checkpoint', + filename='checkpoint.pth', + snapshot=None, + # is_best=False + is_best=None +): + # preds = to_numpy(preds) + filepath = os.path.join(checkpoint, filename) + fileprefix = filename.split('.')[0] + # torch.save(state, filepath) + torch.save(state['model'].state_dict(), filepath) + + if snapshot and state['epoch'] % snapshot == 0: + shutil.copyfile( + filepath, + os.path.join( + checkpoint, + '{}_{}.pth'.format(fileprefix, state['epoch']) + ) + ) + + [auc, best_acc] = is_best + + for key in auc.keys(): + if auc[key] > best_acc[key]: + shutil.copyfile( + filepath, + os.path.join( + checkpoint, + '{}_{}best.pth'.format(fileprefix, key) + ) + ) + + +# def load_checkpoint(model, checkpoint): +# name = checkpoint +# checkpoint = torch.load(name) +# pretrain_dict = clean_state_dict(checkpoint['state_dict']) +# model_state = model.state_dict() +# state = {} +# for k, v in pretrain_dict.items(): +# if k in model_state: +# state[k] = v +# else: +# print(k, ' is NOT in current model') +# model_state.update(state) +# model.load_state_dict(model_state) +# print(colored('loaded {}'.format(name), 'cyan')) + +def load_checkpoint(model, checkpoint): + name = checkpoint + checkpoint = torch.load(name) + pretrain_dict = clean_state_dict(checkpoint['state_dict']) + model_state = model.state_dict() + state = {} + for k, v in pretrain_dict.items(): + if k in model_state: + state[k] = v + else: + print(k, ' is NOT in current model') + model_state.update(state) + model.load_state_dict(model_state) + print(colored('loaded {}'.format(name), 'cyan')) + + +def clean_state_dict(state_dict): + """save a cleaned version of model without dict and DataParallel + + Arguments: + state_dict {collections.OrderedDict} -- [description] + + Returns: + clean_model {collections.OrderedDict} -- [description] + """ + + clean_model = state_dict + # create new OrderedDict that does not contain `module.` + from collections import OrderedDict + clean_model = OrderedDict() + if any(key.startswith('module') for key in state_dict): + for k, v in state_dict.items(): + name = k[7:] # remove `module.` + clean_model[name] = v + else: + return state_dict + + return clean_model + + +def save_pred(preds, checkpoint='checkpoint', filename='preds_valid.mat'): + preds = func.to_numpy(preds) + filepath = os.path.join(checkpoint, filename) + scipy.io.savemat(filepath, mdict={'preds': preds}) + + +def adjust_learning_rate(optimizer, epoch, lr, schedule, gamma): + """Sets the learning rate to the initial LR decayed by schedule""" + if epoch in schedule: + lr *= gamma + print("adjust learning rate to: %.3e" % lr) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + return lr + + +def adjust_learning_rate_in_group(optimizer, group_id, epoch, lr, schedule, gamma): + """Sets the learning rate to the initial LR decayed by schedule""" + if epoch in schedule: + lr *= gamma + print("adjust learning rate of group %d to: %.3e" % (group_id, lr)) + optimizer.param_groups[group_id]['lr'] = lr + return lr + + +def resume_learning_rate(optimizer, epoch, lr, schedule, gamma): + for decay_id in schedule: + if epoch > decay_id: + lr *= gamma + print("adjust learning rate to: %.3e" % lr) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + return lr + + +def resume_learning_rate_in_group(optimizer, group_id, epoch, lr, schedule, gamma): + for decay_id in schedule: + if epoch > decay_id: + lr *= gamma + print("adjust learning rate of group %d to: %.3e" % (group_id, lr)) + optimizer.param_groups[group_id]['lr'] = lr + return lr diff --git a/utils/model_utils.py b/utils/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c0752637f5e07702e0d4e7536216e3ddabd3f044 --- /dev/null +++ b/utils/model_utils.py @@ -0,0 +1,29 @@ +#-*-coding:utf-8-*- +# date:2020-04-11 +# Author: Eric.Lee +# function: model utils + +import os +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import random + +def get_acc(output, label): + total = output.shape[0] + _, pred_label = output.max(1) + num_correct = (pred_label == label).sum().item() + return num_correct / float(total) + +def set_learning_rate(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + +def set_seed(seed = 666): + np.random.seed(seed) + random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + cudnn.deterministic = True diff --git a/utils/smoother.py b/utils/smoother.py new file mode 100644 index 0000000000000000000000000000000000000000..838e25b3afab7ff03e7e73f3416f44c8e8da9283 --- /dev/null +++ b/utils/smoother.py @@ -0,0 +1,55 @@ +import numpy as np + + +class LowPassFilter: + def __init__(self): + self.prev_raw_value = None + self.prev_filtered_value = None + + def process(self, value, alpha): + if self.prev_raw_value is None: + s = value + else: + s = alpha * value + (1.0 - alpha) * self.prev_filtered_value + self.prev_raw_value = value + self.prev_filtered_value = s + return s + + +class OneEuroFilter: + def __init__(self, mincutoff=1.0, beta=0.0, dcutoff=1.0, freq=30): + self.freq = freq + self.mincutoff = mincutoff + self.beta = beta + self.dcutoff = dcutoff + self.x_filter = LowPassFilter() + self.dx_filter = LowPassFilter() + + def compute_alpha(self, cutoff): + te = 1.0 / self.freq + tau = 1.0 / (2 * np.pi * cutoff) + return 1.0 / (1.0 + tau / te) + + def process(self, x): + prev_x = self.x_filter.prev_raw_value + dx = 0.0 if prev_x is None else (x - prev_x) * self.freq + edx = self.dx_filter.process(dx, self.compute_alpha(self.dcutoff)) + cutoff = self.mincutoff + self.beta * np.abs(edx) + return self.x_filter.process(x, self.compute_alpha(cutoff)) + + +if __name__ == '__main__': + fliter = OneEuroFilter(4.0, 0.0) + noise = 0.01 * np.random.rand(1000) + x = np.linspace(0, 1, 1000) + X = x + noise + import matplotlib.pyplot as plt + + plt.plot(x) + plt.plot(X) + y = np.zeros((1000,)) + for i in range(1000): + y[i] = fliter.process(x[i]) + plt.plot(y) + plt.draw() + plt.show() diff --git a/utils/vis.py b/utils/vis.py new file mode 100644 index 0000000000000000000000000000000000000000..342c05450c45db80ae913918fe35a210c520888f --- /dev/null +++ b/utils/vis.py @@ -0,0 +1,131 @@ +import matplotlib.pyplot as plt + + +def plot3d(joints_,ax, title=None): + joints = joints_.copy() + ax.plot(joints[:, 0], joints[:, 1], joints[:, 2], 'yo', label='keypoint') + + ax.plot(joints[:5, 0], joints[:5, 1], + joints[:5, 2], + 'r', + label='thumb') + + ax.plot(joints[[0, 5, 6, 7, 8, ], 0], joints[[0, 5, 6, 7, 8, ], 1], + joints[[0, 5, 6, 7, 8, ], 2], + 'b', + label='index') + ax.plot(joints[[0, 9, 10, 11, 12, ], 0], joints[[0, 9, 10, 11, 12], 1], + joints[[0, 9, 10, 11, 12], 2], + 'b', + label='middle') + ax.plot(joints[[0, 13, 14, 15, 16], 0], joints[[0, 13, 14, 15, 16], 1], + joints[[0, 13, 14, 15, 16], 2], + 'b', + label='ring') + ax.plot(joints[[0, 17, 18, 19, 20], 0], joints[[0, 17, 18, 19, 20], 1], + joints[[0, 17, 18, 19, 20], 2], + 'b', + label='pinky') + # snap convention + # print(joints) + # ax.plot(joints[4][0], joints[4][1], joints[4][2], 'rD', label='thumb') + # ax.plot(joints[8][0], joints[8][1], joints[8][2], 'ro', label='index') + # ax.plot(joints[12][0], joints[12][1], joints[12][2], 'ro', label='middle') + # ax.plot(joints[16][0], joints[16][1], joints[16][2], 'ro', label='ring') + # ax.plot(joints[20][0], joints[20][1], joints[20][2], 'ro', label='pinky') + # plt.plot(joints [1:, 0], joints [1:, 1], joints [1:, 2], 'o') + ax.set_xlabel('x') + ax.set_ylabel('y') + ax.set_zlabel('z') + ax.set_xlim(xmin=-10.0,xmax=10.0) + ax.set_ylim(ymin=-10.0,ymax=10.0) + ax.set_zlim(zmin=-10.0,zmax=10.0) + # plt.legend() + # ax.view_init(330, 110) + ax.view_init(-190, -190) + return ax + + +def multi_plot3d(jointss_, title=None): + jointss = jointss_.copy() + fig = plt.figure(figsize=[50, 50]) + + ax = fig.add_subplot(111, projection='3d') + + colors = ['b', 'r', "g"] + + for i in range(len(jointss)): + joints = jointss[i] + + plt.plot(joints[:, 0], joints[:, 1], joints[:, 2], 'yo') + + plt.plot(joints[:5, 0], joints[:5, 1], + joints[:5, 2], + colors[i], + ) + + plt.plot(joints[[0, 5, 6, 7, 8, ], 0], joints[[0, 5, 6, 7, 8, ], 1], + joints[[0, 5, 6, 7, 8, ], 2], + colors[i], + ) + plt.plot(joints[[0, 9, 10, 11, 12, ], 0], joints[[0, 9, 10, 11, 12], 1], + joints[[0, 9, 10, 11, 12], 2], + colors[i], + ) + plt.plot(joints[[0, 13, 14, 15, 16], 0], joints[[0, 13, 14, 15, 16], 1], + joints[[0, 13, 14, 15, 16], 2], + colors[i], + ) + plt.plot(joints[[0, 17, 18, 19, 20], 0], joints[[0, 17, 18, 19, 20], 1], + joints[[0, 17, 18, 19, 20], 2], + colors[i], + ) + + ####### + # plt.plot(joints[:1, 0], joints[:1, 1], + # joints[:1, 2], + # colors[i], + # ) + # + # plt.plot(joints[[0, 5, ], 0], joints[[0, 5, ], 1], + # joints[[0, 5, ], 2], + # colors[i], + # ) + # plt.plot(joints[[0, 9, ], 0], joints[[0, 9, ], 1], + # joints[[0, 9,], 2], + # colors[i], + # ) + # plt.plot(joints[[0, 13, ], 0], joints[[0, 13, ], 1], + # joints[[0, 13, ], 2], + # colors[i], + # ) + # plt.plot(joints[[0, 17, ], 0], joints[[0, 17, ], 1], + # joints[[0, 17, ], 2], + # colors[i], + # ) + + # snap convention + plt.plot(joints[4][0], joints[4][1], joints[4][2], 'rD') + plt.plot(joints[8][0], joints[8][1], joints[8][2], 'ro', ) + plt.plot(joints[12][0], joints[12][1], joints[12][2], 'ro', ) + plt.plot(joints[16][0], joints[16][1], joints[16][2], 'ro', ) + plt.plot(joints[20][0], joints[20][1], joints[20][2], 'ro', ) + # plt.plot(joints [1:, 0], joints [1:, 1], joints [1:, 2], 'o') + + plt.title(title) + ax.set_xlabel('x') + ax.set_ylabel('y') + ax.set_zlabel('z') + plt.legend() + # ax.view_init(330, 110) + ax.view_init(-90, -90) + + if title: + title_ = "" + for i in range(len(title)): + title_ += "{}: {} ".format(colors[i], title[i]) + + ax.set_title(title_, fontsize=12, color='black') + else: + ax.set_title("None", fontsize=12, color='black') + plt.show()