提交 4fc60d7c 编写于 作者: Eric.Lee2021's avatar Eric.Lee2021 🚴🏻

create e3d handpose

上级 022b233b
DEPTH_MIN = -1.5
stb_joints = [
rhd_joints = [
snap_joint_names = [
(0, 1, 2, 3, 4),
(0, 5, 6, 7, 8),
(0, 9, 10, 11, 12),
(0, 13, 14, 15, 16),
(0, 17, 18, 19, 20)
0, # 0's parent
0, # 1's parent
0, # 5's parent
0, # 9's parent
0, # 13's parent
0, # 17's parent
(216, 31, 53),
(214, 208, 0),
(136, 72, 152),
(126, 199, 216),
(0, 0, 230),
DEFAULT_CACHE_DIR = 'datasets/data/.cache'
USEFUL_BONE = [1, 2, 3,
5, 6, 7,
9, 10, 11,
13, 14, 15,
17, 18, 19]
kinematic_tree = [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19, 20]
ID2ROT = {
2: 13, 3: 14, 4: 15,
6: 1, 7: 2, 8: 3,
10: 4, 11: 5, 12: 6,
14: 10, 15: 11, 16: 12,
18: 7, 19: 8, 20: 9,
\ No newline at end of file
# date:2021-06-15
# Author: Eric.Lee
# function: handpose 3D Inference
import os
import argparse
import torch
import torch.nn as nn
import numpy as np
import time
import datetime
import os
import math
from datetime import datetime
import cv2
import torch.nn.functional as F
from models.resnet import resnet18,resnet34,resnet50,resnet101
from e3d_data_iter.datasets import letterbox,get_heatmap
import sys
sys.path.append("./components/") # 添加模型组件路径
from hand_keypoints.handpose_x import handpose_x_model,draw_bd_handpose_c
from utils.common_utils import *
import copy
from utils import func, bone, AIK, smoother
from utils.LM_new import LM_Solver
from op_pso import PSO
import open3d
from mpl_toolkits.mplot3d import Axes3D
from manopth import manolayer
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=' Project Hand Pose 3D Inference')
parser.add_argument('--model_path', type=str, default = './model_exp/2021-06-16_02-09-37/resnet_50-size-256-loss-wing_loss-model_epoch-732.pth',
help = 'model_path') # 模型路径
parser.add_argument('--model', type=str, default = 'resnet_50',
help = '''model : resnet_18,resnet_34,resnet_50,resnet_101''') # 模型类型
parser.add_argument('--num_classes', type=int , default = 63,
help = 'num_classes') # 手部21关键点, (x,y)*2 = 42
parser.add_argument('--GPUS', type=str, default = '0',
help = 'GPUS') # GPU选择
parser.add_argument('--test_path', type=str, default = './image/',
help = 'test_path') # 测试图片路径
parser.add_argument('--img_size', type=tuple , default = (256,256),
help = 'img_size') # 输入模型图片尺寸
parser.add_argument('--vis', type=bool , default = True,
help = 'vis') # 是否可视化图片
print('\n/******************* {} ******************/\n'.format(parser.description))
ops = parser.parse_args()# 解析添加参数
unparsed = vars(ops) # parse_args()方法的返回值为namespace,用vars()内建函数化为字典
for key in unparsed.keys():
print('{} : {}'.format(key,unparsed[key]))
os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS
test_path = ops.test_path # 测试图片文件夹路径
#---------------------------------------------------------------- 构建模型
print('use model : %s'%(ops.model))
if ops.model == 'resnet_50':
model_ = resnet50(num_classes = ops.num_classes,img_size=ops.img_size[0])
elif ops.model == 'resnet_18':
model_ = resnet18(num_classes = ops.num_classes,img_size=ops.img_size[0])
elif ops.model == 'resnet_34':
model_ = resnet34(num_classes = ops.num_classes,img_size=ops.img_size[0])
elif ops.model == 'resnet_101':
model_ = resnet101(num_classes = ops.num_classes,img_size=ops.img_size[0])
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
model_ = model_.to(device)
model_.eval() # 设置为前向推断模式
# print(model_)# 打印模型结构
# 加载测试模型
if os.access(ops.model_path,os.F_OK):# checkpoint
chkpt = torch.load(ops.model_path, map_location=device)
print('load test model : {}'.format(ops.model_path))
#----------------- 构建 handpose_x 2D关键点检测模型
handpose_2d_model = handpose_x_model()
#----------------- 构建 manopth
g_side = "right"
print('load model finished')
pose, shape = func.initiate("zero")
pre_useful_bone_len = np.zeros((1, 15)) # 骨架点信息
solver = LM_Solver(num_Iter=666, th_beta=shape.cpu(), th_pose=pose.cpu(), lb_target=pre_useful_bone_len,
pose0 = torch.eye(3).repeat(1, 16, 1, 1)
mano = manolayer.ManoLayer(flat_hand_mean=True,
print('start ~')
point_fliter = smoother.OneEuroFilter(4.0, 0.0)
mesh_fliter = smoother.OneEuroFilter(4.0, 0.0)
shape_fliter = smoother.OneEuroFilter(1.5, 0.0)
#--------------------------- 配置点云
view_mat = np.array([[1.0, 0.0, 0.0],
[0.0, -1.0, 0],
[0.0, 0, -1.0]])
mesh = open3d.geometry.TriangleMesh()
hand_verts, j3d_recon = mano(pose0, shape.float())
mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces)
hand_verts = hand_verts.clone().detach().cpu().numpy()[0]
mesh.vertices = open3d.utility.Vector3dVector(hand_verts)
viewer = open3d.visualization.Visualizer()
viewer.create_window(width=640, height=640, window_name='HandPose3d_Mesh')
renderOptions = viewer.get_render_option ()
renderOptions.background_color = np.asarray([120/255,120/255,120/255]) # 设置背景颜色
# axis_pcd = open3d.create_mesh_coordinate_frame(size=0.5, origin=[0, 0, 0])
# vis.add_geometry(axis_pcd)
pts_flag = True
if pts_flag:
test_pcd = open3d.geometry.PointCloud() # 定义点云
print('start pose estimate')
pre_uv = None
shape_time = 0
opt_shape = None
shape_flag = True
#---------------------------------------------------------------- 预测图片
with torch.no_grad():
idx = 0
for file in os.listdir(ops.test_path):
if '.jpg' not in file:
idx += 1
print('{}) image : {}'.format(idx,file))
img = cv2.imread(ops.test_path + file)
img_show = img.copy() # 用于显示使用
pts_2d_ = handpose_2d_model.predict(img.copy()) # handpose_2d predict
pts_2d_hand = {}
for ptk in range(int(pts_2d_.shape[0]/2)):
xh = pts_2d_[ptk*2+0]*float(img.shape[1])
yh = pts_2d_[ptk*2+1]*float(img.shape[0])
pts_2d_hand[str(ptk)] = {
if ops.vis:
cv2.circle(img_show, (int(xh),int(yh)), 4, (255,50,60),-1)
cv2.circle(img_show, (int(xh),int(yh)), 3, (25,160,255),-1)
if ops.vis:
img_lbox,ratio, dw, dh = letterbox(img.copy(), height=ops.img_size[0], color=(0,0,0))
if ops.vis:
#-------------------------------- get heatmap
x1y1x2y2 = 0,0,0,0
offset_x1,offset_y1 = 0,0
hm,hm_w = get_heatmap(img_lbox.copy(),x1y1x2y2,pts_2d_hand,ratio, dw, dh,offset_x1,offset_y1,vis=False)
if ops.vis:
img_fix_size = img_lbox.astype(np.float32)
img_fix_size_r = img_fix_size.astype(np.float32)
img_fix_size_r = (img_fix_size_r-128.)/256.
image_fusion = np.concatenate((img_fix_size_r,hm),axis=2)
image_fusion = image_fusion.transpose(2, 0, 1)
image_fusion = torch.from_numpy(image_fusion)
image_fusion = image_fusion.unsqueeze_(0)
if use_cuda:
image_fusion = image_fusion.cuda() # (bs, channel, h, w)
print("image_fusion size : {}".format(image_fusion.size()))
#-------------------------------- # handpose_3d predict
pre_ = model_(image_fusion.float()) # 模型推理
output = pre_.cpu().detach().numpy()
output = np.squeeze(output)
print("handpose_3d output shape : {}".format(output.shape))
pre_3d_joints = output.reshape((21,3))
print("pre_3d_joints shape : {}".format(pre_3d_joints.shape))
if g_side == "left":
print("------------------->>. left")
pre_3d_joints[:,0] *=(-1.)
pre_3d_joints = torch.tensor(pre_3d_joints).squeeze(0)
pre_3d_joints= pre_3d_joints.cuda()
# now_uv = result['uv'].clone().detach().cpu().numpy()[0, 0]
# now_uv = now_uv.astype(np.float)
trans = np.zeros((1, 3))
# trans[0, 0:2] = now_uv - 16.0
trans = trans / 16.0
new_tran = np.array([[trans[0, 1], trans[0, 0], trans[0, 2]]])
pre_joints = pre_3d_joints.clone().detach().cpu().numpy()
flited_joints = point_fliter.process(pre_joints)
# fliter_ax.cla()
# filted_ax = vis.plot3d(flited_joints + new_tran, fliter_ax)
pre_useful_bone_len = bone.caculate_length(pre_joints, label="useful")
NGEN = 0 # PSO 迭代次数
popsize = 100
low = np.zeros((1, 10)) - 3.0
up = np.zeros((1, 10)) - 2.0
parameters = [NGEN, popsize, low, up]
pso = PSO(parameters, pre_useful_bone_len.reshape((1, 15)),g_side)
if True:#opt_shape is None:
opt_shape = pso.ng_best
opt_shape = shape_fliter.process(opt_shape)
opt_tensor_shape = torch.tensor(opt_shape, dtype=torch.float)
_, j3d_p0_ops = mano(pose0, opt_tensor_shape)
template = j3d_p0_ops.cpu().numpy().squeeze(0) / 1000.0 # template, m 21*3
ratio = np.linalg.norm(template[9] - template[0]) / np.linalg.norm(pre_joints[9] - pre_joints[0])
j3d_pre_process = pre_joints * ratio # template, m
j3d_pre_process = j3d_pre_process - j3d_pre_process[0] + template[0]
pose_R = AIK.adaptive_IK(template, j3d_pre_process)
pose_R = torch.from_numpy(pose_R).float()
# reconstruction
hand_verts, j3d_recon = mano(pose_R, opt_tensor_shape.float())
hand_verts[:,:,:] = hand_verts[:,:,:]*(0.85)
# print(j3d_recon.size())
mesh.triangles = open3d.utility.Vector3iVector(mano.th_faces)
hand_verts = hand_verts.clone().detach().cpu().numpy()[0]
hand_verts = mesh_fliter.process(hand_verts)
hand_verts = np.matmul(view_mat, hand_verts.T).T
if g_side == "right":
hand_verts[:, 0] = hand_verts[:, 0] - 80
hand_verts[:, 0] = hand_verts[:, 0] + 80
hand_verts[:, 1] = hand_verts[:, 1] - 0
mesh_tran = np.array([[-new_tran[0, 0], new_tran[0, 1], new_tran[0, 2]]])
hand_verts = hand_verts - 100 * mesh_tran
mesh.vertices = open3d.utility.Vector3dVector(hand_verts)
# mesh.paint_uniform_color([252 / 255, 224 / 255, 203 / 255])
mesh.paint_uniform_color([238 / 255, 188 / 255, 158 / 255])
if pts_flag:
if False:
j3d_ = j3d_recon.detach().cpu().numpy()
j3d_[0][:,1] *=(-1.)
# j3d_[0][:,0] +=trans[0,0]
j3d_[0] = j3d_[0] - 100 * mesh_tran
j3d_[0][:,0] -=50
j3d_[0][:,1] -=30
# print(j3d_.shape,j3d_)
test_pcd.points = open3d.utility.Vector3dVector(j3d_[0]) # 定义点云坐标位置
# test_pcd.points = open3d.utility.Vector3dVector(hand_verts)
pre_joints[:,1] *=-1.
pre_joints = pre_joints*70
pre_joints[:,1] -= 40
pre_joints[:,0] -= 0
test_pcd.points = open3d.utility.Vector3dVector(pre_joints)
# test_pcd.points = open3d.utility.Vector3dVector(pre_joints[1,:].reshape(1,3))
# rgb = np.asarray([250,0,250])
# rgb_t = np.transpose(rgb)
# test_pcd.colors = open3d.utility.Vector3dVector(rgb_t.astype(np.float) / 255.0)
# print("hand_verts shape",hand_verts)
# x_min,y_min,x_max,y_max = 65535,65535,0,0
# for i in range(hand_verts.shape[0]):
# x_,y_,z_ = hand_verts[i]
# x_min = x_ if x_min>x_ else x_min
# y_min = y_ if y_min>y_ else y_min
# x_max = x_ if x_max<x_ else x_max
# y_max = y_ if y_max<y_ else y_max
# print("x_min,y_min,x_max,y_max : ",x_min,y_min,x_max,y_max)
if pts_flag:
if cv2.waitKey(1) == 27:
print('well done ')
# date:2019-05-20
# function: wing loss & adaptive wing loss
import torch
import torch.nn as nn
import torch.optim as optim
import os
import math
# def wing_loss(landmarks, labels, w=0.06, epsilon=0.01):
def wing_loss(landmarks, labels, w=10., epsilon=2.):
landmarks, labels: float tensors with shape [batch_size, landmarks]. landmarks means x1,x2,x3,x4...y1,y2,y3,y4 1-D
w, epsilon: a float numbers.
a float tensor with shape [].
x = landmarks - labels
c = w * (1.0 - math.log(1.0 + w / epsilon))
absolute_x = torch.abs(x)
losses = torch.where(\
w * torch.log(1.0 + absolute_x / epsilon),\
absolute_x - c)
# loss = tf.reduce_mean(tf.reduce_mean(losses, axis=[1]), axis=0)
losses = torch.mean(losses,dim=1,keepdim=True)
loss = torch.mean(losses)
return loss
def got_total_wing_loss(output,crop_landmarks):
loss = wing_loss(output, crop_landmarks)
return loss
name = 'manopth'
import datetime
import os
import pickle
import subprocess
import sys
def print_args(args):
opts = vars(args)
print('======= Options ========')
for k, v in sorted(opts.items()):
print('{}: {}'.format(k, v))
def save_args(args, save_folder, opt_prefix='opt', verbose=True):
opts = vars(args)
# Create checkpoint folder
if not os.path.exists(save_folder):
os.makedirs(save_folder, exist_ok=True)
# Save options
opt_filename = '{}.txt'.format(opt_prefix)
opt_path = os.path.join(save_folder, opt_filename)
with open(opt_path, 'a') as opt_file:
opt_file.write('====== Options ======\n')
for k, v in sorted(opts.items()):
'{option}: {value}\n'.format(option=str(k), value=str(v)))
opt_file.write('launched {} at {}\n'.format(
str(sys.argv[0]), str(datetime.datetime.now())))
# Add git info
label = subprocess.check_output(["git", "describe",
if subprocess.call(
["git", "branch"],
stdout=open(os.devnull, 'w')) == 0:
opt_file.write('=== Git info ====\n')
commit = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
opt_file.write('commit : {}\n'.format(commit.strip()))
opt_picklename = '{}.pkl'.format(opt_prefix)
opt_picklepath = os.path.join(save_folder, opt_picklename)
with open(opt_picklepath, 'wb') as opt_file:
pickle.dump(opts, opt_file)
if verbose:
print('Saved options to {}'.format(opt_path))
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import numpy as np
import torch
from manopth.manolayer import ManoLayer
def generate_random_hand(batch_size=1, ncomps=6, mano_root='mano/models'):
nfull_comps = ncomps + 3 # Add global orientation dims to PCA
random_pcapose = torch.rand(batch_size, nfull_comps)
mano_layer = ManoLayer(mano_root=mano_root)
verts, joints = mano_layer(random_pcapose)
return {'verts': verts, 'joints': joints, 'faces': mano_layer.th_faces}
def display_hand(hand_info, mano_faces=None, ax=None, alpha=0.2, batch_idx=0, show=True):
Displays hand batch_idx in batch of hand_info, hand_info as returned by
if ax is None:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
verts, joints = hand_info['verts'][batch_idx], hand_info['joints'][
if mano_faces is None:
ax.scatter(verts[:, 0], verts[:, 1], verts[:, 2], alpha=0.1)
mesh = Poly3DCollection(verts[mano_faces], alpha=alpha)
face_color = (141 / 255, 184 / 255, 226 / 255)
edge_color = (50 / 255, 50 / 255, 50 / 255)
ax.scatter(joints[:, 0], joints[:, 1], joints[:, 2], color='r')
cam_equal_aspect_3d(ax, verts.numpy())
if show:
def cam_equal_aspect_3d(ax, verts, flip_x=False):
Centers view on cuboid containing hand and flips y and z axis
and fixes azimuth
extents = np.stack([verts.min(0), verts.max(0)], axis=1)
sz = extents[:, 1] - extents[:, 0]
centers = np.mean(extents, axis=1)
maxsize = max(abs(sz))
r = maxsize / 2
if flip_x:
ax.set_xlim(centers[0] + r, centers[0] - r)
ax.set_xlim(centers[0] - r, centers[0] + r)
# Invert y and z axis
ax.set_ylim(centers[1] + r, centers[1] - r)
ax.set_zlim(centers[2] + r, centers[2] - r)
import os
import numpy as np
import torch
from torch.nn import Module
from mano.webuser.smpl_handpca_wrapper_HAND_only import ready_arguments
from manopth import rodrigues_layer, rotproj, rot6d
from manopth.tensutils import (th_posemap_axisang, th_with_zeros, th_pack,
subtract_flat_id, make_list)
class ManoLayer(Module):
__constants__ = [
'use_pca', 'rot', 'ncomps', 'ncomps', 'kintree_parents', 'check',
'side', 'center_idx', 'joint_rot_mode'
def __init__(self,
center_idx: index of center joint in our computations,
if -1 centers on estimate of palm as middle of base
of middle finger and wrist
flat_hand_mean: if True, (0, 0, 0, ...) pose coefficients match
flat hand, else match average hand pose
mano_root: path to MANO pkl files for left and right hand
ncomps: number of PCA components form pose space (<45)
side: 'right' or 'left'
use_pca: Use PCA decomposition for pose space.
joint_rot_mode: 'axisang' or 'rotmat', ignored if use_pca
self.center_idx = center_idx
self.robust_rot = robust_rot
if root_rot_mode == 'axisang':
self.rot = 3
self.rot = 6
self.flat_hand_mean = flat_hand_mean
self.side = side
self.use_pca = use_pca
self.joint_rot_mode = joint_rot_mode
self.root_rot_mode = root_rot_mode
if use_pca:
self.ncomps = ncomps
self.ncomps = 45
if side == 'right':
self.mano_path = os.path.join(mano_root, 'MANO_RIGHT.pkl')
elif side == 'left':
self.mano_path = os.path.join(mano_root, 'MANO_LEFT.pkl')
smpl_data = ready_arguments(self.mano_path,side)
hands_components = smpl_data['hands_components']
self.smpl_data = smpl_data
# Get hand mean
hands_mean = np.zeros(hands_components.shape[1]
) if flat_hand_mean else smpl_data['hands_mean']
hands_mean = hands_mean.copy()
th_hands_mean = torch.Tensor(hands_mean).unsqueeze(0)
if self.use_pca or self.joint_rot_mode == 'axisang':
# Save as axis-angle
self.register_buffer('th_hands_mean', th_hands_mean)
selected_components = hands_components[:ncomps]
self.register_buffer('th_comps', torch.Tensor(hands_components))
th_hands_mean_rotmat = rodrigues_layer.batch_rodrigues(
th_hands_mean.view(15, 3)).reshape(15, 3, 3)
self.register_buffer('th_hands_mean_rotmat', th_hands_mean_rotmat)
# Kinematic chain params
self.kintree_table = smpl_data['kintree_table']
parents = list(self.kintree_table[0].tolist())
self.kintree_parents = parents
def forward(self,
th_trans (Tensor (batch_size x ncomps)): if provided, applies trans to joints and vertices
th_betas (Tensor (batch_size x 10)): if provided, uses given shape parameters for hand shape
else centers on root joint (9th joint)
root_palm: return palm as hand root instead of wrist
# if len(th_pose_coeffs) == 0:
# return th_pose_coeffs.new_empty(0), th_pose_coeffs.new_empty(0)
batch_size = th_pose_coeffs.shape[0]
# Get axis angle from PCA components and coefficients
if self.use_pca or self.joint_rot_mode == 'axisang':
# Remove global rot coeffs
th_hand_pose_coeffs = th_pose_coeffs[:, self.rot:self.rot +
if self.use_pca:
# PCA components --> axis angles
th_full_hand_pose = th_hand_pose_coeffs.mm(self.th_selected_comps)
th_full_hand_pose = th_hand_pose_coeffs
# Concatenate back global rot
th_full_pose = torch.cat([
th_pose_coeffs[:, :self.rot],
self.th_hands_mean + th_full_hand_pose
], 1)
if self.root_rot_mode == 'axisang':
# compute rotation matrixes from axis-angle while skipping global rotation
th_pose_map, th_rot_map = th_posemap_axisang(th_full_pose)
root_rot = th_rot_map[:, :9].view(batch_size, 3, 3)
th_rot_map = th_rot_map[:, 9:]
th_pose_map = th_pose_map[:, 9:]
# th_posemap offsets by 3, so add offset or 3 to get to self.rot=6
th_pose_map, th_rot_map = th_posemap_axisang(th_full_pose[:, 6:])
if self.robust_rot:
root_rot = rot6d.robust_compute_rotation_matrix_from_ortho6d(th_full_pose[:, :6])
root_rot = rot6d.compute_rotation_matrix_from_ortho6d(th_full_pose[:, :6])
assert th_pose_coeffs.dim() == 4, (
'When not self.use_pca, '
'th_pose_coeffs should have 4 dims, got {}'.format(
assert th_pose_coeffs.shape[2:4] == (3, 3), (
'When not self.use_pca, th_pose_coeffs have 3x3 matrix for two'
'last dims, got {}'.format(th_pose_coeffs.shape[2:4]))
th_pose_rots = rotproj.batch_rotprojs(th_pose_coeffs)
th_rot_map = th_pose_rots[:, 1:].view(batch_size, -1)
th_pose_map = subtract_flat_id(th_rot_map)
root_rot = th_pose_rots[:, 0]
# Full axis angle representation with root joint
if th_betas is None or th_betas.numel() == 1:
th_v_shaped = torch.matmul(self.th_shapedirs,
self.th_betas.transpose(1, 0)).permute(
2, 0, 1) + self.th_v_template
th_j = torch.matmul(self.th_J_regressor, th_v_shaped).repeat(
batch_size, 1, 1)
if share_betas:
th_betas = th_betas.mean(0, keepdim=True).expand(th_betas.shape[0], 10)
th_v_shaped = torch.matmul(self.th_shapedirs,
th_betas.transpose(1, 0)).permute(
2, 0, 1) + self.th_v_template
th_j = torch.matmul(self.th_J_regressor, th_v_shaped)
# th_pose_map should have shape 20x135
th_v_posed = th_v_shaped.cuda() + torch.matmul(
self.th_posedirs.cuda(), th_pose_map.cuda().transpose(0, 1)).permute(2, 0, 1)
# Final T pose with transformation done !
# Global rigid transformation
root_j = th_j[:, 0, :].contiguous().view(batch_size, 3, 1)
root_trans = th_with_zeros(torch.cat([root_rot.cuda(), root_j.cuda()], 2))
all_rots = th_rot_map.view(th_rot_map.shape[0], 15, 3, 3)
lev1_idxs = [1, 4, 7, 10, 13]
lev2_idxs = [2, 5, 8, 11, 14]
lev3_idxs = [3, 6, 9, 12, 15]
lev1_rots = all_rots[:, [idx - 1 for idx in lev1_idxs]]
lev2_rots = all_rots[:, [idx - 1 for idx in lev2_idxs]]
lev3_rots = all_rots[:, [idx - 1 for idx in lev3_idxs]]
lev1_j = th_j[:, lev1_idxs]
lev2_j = th_j[:, lev2_idxs]
lev3_j = th_j[:, lev3_idxs]
# From base to tips
# Get lev1 results
all_transforms = [root_trans.unsqueeze(1)]
lev1_j_rel = lev1_j - root_j.transpose(1, 2)
lev1_rel_transform_flt = th_with_zeros(torch.cat([lev1_rots.cuda(), lev1_j_rel.cuda().unsqueeze(3)], 3).view(-1, 3, 4))
root_trans_flt = root_trans.unsqueeze(1).repeat(1, 5, 1, 1).view(root_trans.shape[0] * 5, 4, 4)
lev1_flt = torch.matmul(root_trans_flt.cuda(), lev1_rel_transform_flt.cuda())
all_transforms.append(lev1_flt.view(all_rots.shape[0], 5, 4, 4))
# Get lev2 results
lev2_j_rel = lev2_j - lev1_j
lev2_rel_transform_flt = th_with_zeros(torch.cat([lev2_rots.cuda(), lev2_j_rel.cuda().unsqueeze(3)], 3).view(-1, 3, 4))
lev2_flt = torch.matmul(lev1_flt.cuda(), lev2_rel_transform_flt.cuda())
all_transforms.append(lev2_flt.view(all_rots.shape[0], 5, 4, 4))
# Get lev3 results
lev3_j_rel = lev3_j - lev2_j
lev3_rel_transform_flt = th_with_zeros(torch.cat([lev3_rots.cuda(), lev3_j_rel.cuda().unsqueeze(3)], 3).view(-1, 3, 4))
lev3_flt = torch.matmul(lev2_flt.cuda(), lev3_rel_transform_flt.cuda())
all_transforms.append(lev3_flt.view(all_rots.shape[0], 5, 4, 4))
reorder_idxs = [0, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14, 5, 10, 15]
th_results = torch.cat(all_transforms, 1)[:, reorder_idxs]
th_results_global = th_results
joint_js = torch.cat([th_j, th_j.new_zeros(th_j.shape[0], 16, 1)], 2)
tmp2 = torch.matmul(th_results.cuda(), joint_js.cuda().unsqueeze(3))
th_results2 = (th_results - torch.cat([tmp2.new_zeros(*tmp2.shape[:2], 4, 3), tmp2], 3)).permute(0, 2, 3, 1)
th_T = torch.matmul(th_results2.cuda(), self.th_weights.cuda().transpose(0, 1)).cuda()
# print("th_T.device",th_T.device)
# print("th_v_posed.device",th_v_posed.device)
th_rest_shape_h = torch.cat([
th_v_posed.cuda().transpose(2, 1),
torch.ones((batch_size, 1, th_v_posed.shape[1]),
], 1)
th_verts = (th_T * th_rest_shape_h.unsqueeze(1)).sum(2).transpose(2, 1)
th_verts = th_verts[:, :, :3]
th_jtr = th_results_global[:, :, :3, 3]
# In addition to MANO reference joints we sample vertices on each finger
# to serve as finger tips
if self.side == 'right':
tips = th_verts[:, [745, 317, 444, 556, 673]]
tips = th_verts[:, [745, 317, 445, 556, 673]]
if bool(root_palm):
palm = (th_verts[:, 95] + th_verts[:, 22]).unsqueeze(1) / 2
th_jtr = torch.cat([palm, th_jtr[:, 1:]], 1)
th_jtr = torch.cat([th_jtr.cuda(), tips.cuda()], 1)
# Reorder joints to match visualization utilities
th_jtr = th_jtr[:, [0, 13, 14, 15, 16, 1, 2, 3, 17, 4, 5, 6, 18, 10, 11, 12, 19, 7, 8, 9, 20]]
if th_trans is None or bool(torch.norm(th_trans) == 0):
if self.center_idx is not None:
center_joint = th_jtr[:, self.center_idx].unsqueeze(1)
th_jtr = th_jtr - center_joint
th_verts = th_verts - center_joint
th_jtr = th_jtr + th_trans.unsqueeze(1)
th_verts = th_verts + th_trans.unsqueeze(1)
# Scale to milimeters
th_verts = th_verts * 1000
th_jtr = th_jtr * 1000
return th_verts, th_jtr
This part reuses code from https://github.com/MandyMo/pytorch_HMR/blob/master/src/util.py
which is part of a PyTorch port of SMPL.
Thanks to Zhang Xiong (MandyMo) for making this great code available on github !
import argparse
from torch.autograd import gradcheck
import torch
from torch.autograd import Variable
from manopth import argutils
def quat2mat(quat):
"""Convert quaternion coefficients to rotation matrix.
quat: size = [batch_size, 4] 4 <===>(w, x, y, z)
Rotation matrix corresponding to the quaternion -- size = [batch_size, 3, 3]
norm_quat = quat
norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True)
w, x, y, z = norm_quat[:, 0], norm_quat[:, 1], norm_quat[:,
2], norm_quat[:,
batch_size = quat.size(0)
w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
wx, wy, wz = w * x, w * y, w * z
xy, xz, yz = x * y, x * z, y * z
rotMat = torch.stack([
w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy,
w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz,
w2 - x2 - y2 + z2
dim=1).view(batch_size, 3, 3)
return rotMat
def batch_rodrigues(axisang):
#axisang N x 3
axisang_norm = torch.norm(axisang + 1e-8, p=2, dim=1)
angle = torch.unsqueeze(axisang_norm, -1)
axisang_normalized = torch.div(axisang, angle)
angle = angle * 0.5
v_cos = torch.cos(angle)
v_sin = torch.sin(angle)
quat = torch.cat([v_cos, v_sin * axisang_normalized], dim=1)
rot_mat = quat2mat(quat)
rot_mat = rot_mat.view(rot_mat.shape[0], 9)
return rot_mat
def th_get_axis_angle(vector):
angle = torch.norm(vector, 2, 1)
axes = vector / angle.unsqueeze(1)
return axes, angle
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=1, type=int)
parser.add_argument('--cuda', action='store_true')
args = parser.parse_args()
n_components = 6
rot = 3
inputs = torch.rand(args.batch_size, rot)
inputs_var = Variable(inputs.double(), requires_grad=True)
if args.cuda:
inputs = inputs.cuda()
# outputs = batch_rodrigues(inputs)
test_function = gradcheck(batch_rodrigues, (inputs_var, ))
print('batch test passed !')
inputs = torch.rand(rot)
inputs_var = Variable(inputs.double(), requires_grad=True)
test_function = gradcheck(th_cv2_rod_sub_id.apply, (inputs_var, ))
print('th_cv2_rod test passed')
inputs = torch.rand(rot)
inputs_var = Variable(inputs.double(), requires_grad=True)
test_th = gradcheck(th_cv2_rod.apply, (inputs_var, ))
print('th_cv2_rod_id test passed !')
import torch
def compute_rotation_matrix_from_ortho6d(poses):
Code from
On the Continuity of Rotation Representations in Neural Networks
Zhou et al. CVPR19
x_raw = poses[:, 0:3] # batch*3
y_raw = poses[:, 3:6] # batch*3
x = normalize_vector(x_raw) # batch*3
z = cross_product(x, y_raw) # batch*3
z = normalize_vector(z) # batch*3
y = cross_product(z, x) # batch*3
x = x.view(-1, 3, 1)
y = y.view(-1, 3, 1)
z = z.view(-1, 3, 1)
matrix = torch.cat((x, y, z), 2) # batch*3*3
return matrix
def robust_compute_rotation_matrix_from_ortho6d(poses):
Instead of making 2nd vector orthogonal to first
create a base that takes into account the two predicted
directions equally
x_raw = poses[:, 0:3] # batch*3
y_raw = poses[:, 3:6] # batch*3
x = normalize_vector(x_raw) # batch*3
y = normalize_vector(y_raw) # batch*3
middle = normalize_vector(x + y)
orthmid = normalize_vector(x - y)
x = normalize_vector(middle + orthmid)
y = normalize_vector(middle - orthmid)
# Their scalar product should be small !
# assert torch.einsum("ij,ij->i", [x, y]).abs().max() < 0.00001
z = normalize_vector(cross_product(x, y))
x = x.view(-1, 3, 1)
y = y.view(-1, 3, 1)
z = z.view(-1, 3, 1)
matrix = torch.cat((x, y, z), 2) # batch*3*3
# Check for reflection in matrix ! If found, flip last vector TODO
assert (torch.stack([torch.det(mat) for mat in matrix ])< 0).sum() == 0
return matrix
def normalize_vector(v):
batch = v.shape[0]
v_mag = torch.sqrt(v.pow(2).sum(1)) # batch
v_mag = torch.max(v_mag, v.new([1e-8]))
v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1])
v = v/v_mag
return v
def cross_product(u, v):
batch = u.shape[0]
i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1]
j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2]
k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0]
out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1)
return out
import torch
def batch_rotprojs(batches_rotmats):
proj_rotmats = []
for batch_idx, batch_rotmats in enumerate(batches_rotmats):
proj_batch_rotmats = []
for rot_idx, rotmat in enumerate(batch_rotmats):
# GPU implementation of svd is VERY slow
# ~ 2 10^-3 per hit vs 5 10^-5 on cpu
U, S, V = rotmat.cpu().svd()
rotmat = torch.matmul(U, V.transpose(0, 1))
orth_det = rotmat.det()
# Remove reflection
if orth_det < 0:
rotmat[:, 2] = -1 * rotmat[:, 2]
rotmat = rotmat.cuda()
return torch.stack(proj_rotmats)
import torch
from manopth import rodrigues_layer
def th_posemap_axisang(pose_vectors):
rot_nb = int(pose_vectors.shape[1] / 3)
pose_vec_reshaped = pose_vectors.contiguous().view(-1, 3)
rot_mats = rodrigues_layer.batch_rodrigues(pose_vec_reshaped)
rot_mats = rot_mats.view(pose_vectors.shape[0], rot_nb * 9)
pose_maps = subtract_flat_id(rot_mats)
return pose_maps, rot_mats
def th_with_zeros(tensor):
batch_size = tensor.shape[0]
padding = tensor.new([0.0, 0.0, 0.0, 1.0])
padding.requires_grad = False
concat_list = [tensor, padding.view(1, 1, 4).repeat(batch_size, 1, 1)]
cat_res = torch.cat(concat_list, 1)
return cat_res
def th_pack(tensor):
batch_size = tensor.shape[0]
padding = tensor.new_zeros((batch_size, 4, 3))
padding.requires_grad = False
pack_list = [padding, tensor]
pack_res = torch.cat(pack_list, 2)
return pack_res
def subtract_flat_id(rot_mats):
# Subtracts identity as a flattened tensor
rot_nb = int(rot_mats.shape[1] / 9)
id_flat = torch.eye(
3, dtype=rot_mats.dtype, device=rot_mats.device).view(1, 9).repeat(
rot_mats.shape[0], rot_nb)
# id_flat.requires_grad = False
results = rot_mats - id_flat
return results
def make_list(tensor):
# type: (List[int]) -> List[int]
return tensor
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.):
self.inplanes = 64
self.dropout_factor = dropout_factor
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(24, 64, kernel_size=7, stride=2, padding=3,
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
# see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16
# self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
assert img_size % 32 == 0
pool_kernel = int(img_size / 32)
self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True)
self.dropout = nn.Dropout(self.dropout_factor)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def load_model(model, pretrained_state_dict):
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
model.load_state_dict(pretrained_dict, strict=False)
if len(pretrained_dict) == 0:
print("[INFO] No params were loaded ...")
for k, v in pretrained_state_dict.items():
if k in pretrained_dict:
print("==>> Load {} {}".format(k, v.size()))
print("[INFO] Skip {} {}".format(k, v.size()))
return model
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
print("Load pretrained model from {}".format(model_urls['resnet18']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
model = load_model(model, pretrained_state_dict)
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
print("Load pretrained model from {}".format(model_urls['resnet34']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
model = load_model(model, pretrained_state_dict)
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
print("Load pretrained model from {}".format(model_urls['resnet50']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
model = load_model(model, pretrained_state_dict)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
print("Load pretrained model from {}".format(model_urls['resnet101']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
model = load_model(model, pretrained_state_dict)
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
print("Load pretrained model from {}".format(model_urls['resnet152']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
model = load_model(model, pretrained_state_dict)
return model
if __name__ == "__main__":
input = torch.randn([1, 24, 256,256])
model = resnet34(True, num_classes=63, img_size=256)
output = model(input)
# date:2021-06-15
# Author: Eric.Lee
## function: train
import os
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
import sys
from utils.model_utils import *
from utils.common_utils import *
from e3d_data_iter.datasets import *
from models.resnet import resnet18,resnet34,resnet50,resnet101
from torchvision.models import shufflenet_v2_x1_5 ,shufflenet_v2_x1_0 , shufflenet_v2_x2_0
from loss.loss import *
import cv2
import time
import json
from datetime import datetime
import random
def trainer(ops,f_log):
if 1:
os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS
if ops.log_flag:
sys.stdout = f_log
#---------------------------------------------------------------- 构建模型
if ops.model == 'resnet_50':
model_ = resnet50(pretrained = True,num_classes = ops.num_classes,img_size = ops.img_size[0],dropout_factor=ops.dropout)
elif ops.model == 'resnet_18':
model_ = resnet18(pretrained = True,num_classes = ops.num_classes,img_size = ops.img_size[0],dropout_factor=ops.dropout)
elif ops.model == 'resnet_34':
model_ = resnet34(pretrained = True,num_classes = ops.num_classes,img_size = ops.img_size[0],dropout_factor=ops.dropout)
elif ops.model == 'resnet_101':
model_ = resnet101(pretrained = True,num_classes = ops.num_classes,img_size = ops.img_size[0],dropout_factor=ops.dropout)
print(" no support the model")
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
model_ = model_.to(device)
# print(model_)# 打印模型结构
# Dataset
dataset = LoadImagesAndLabels(ops= ops,img_size=ops.img_size,flag_agu=ops.flag_agu,vis = False)
print("handpose done")
print('len train datasets : %s'%(dataset.__len__()))
# Dataloader
dataloader = DataLoader(dataset,
drop_last = True)
# 优化器设计
optimizer_Adam = torch.optim.Adam(model_.parameters(), lr=ops.init_lr, betas=(0.9, 0.99),weight_decay=1e-6)
# optimizer_SGD = optim.SGD(model_.parameters(), lr=ops.init_lr, momentum=ops.momentum, weight_decay=ops.weight_decay)# 优化器初始化
optimizer = optimizer_Adam
# 加载 finetune 模型
if os.access(ops.fintune_model,os.F_OK):# checkpoint
chkpt = torch.load(ops.fintune_model, map_location=device)
print('load fintune model : {}'.format(ops.fintune_model))
# 损失函数
if ops.loss_define == 'mse_loss':
criterion = nn.MSELoss(reduce=True, reduction='mean')
step = 0
idx = 0
# 变量初始化
best_loss = np.inf
loss_mean = 0. # 损失均值
loss_idx = 0. # 损失计算计数器
flag_change_lr_cnt = 0 # 学习率更新计数器
init_lr = ops.init_lr # 学习率
epochs_loss_dict = {}
for epoch in range(0, ops.epochs):
if ops.log_flag:
sys.stdout = f_log
print('\nepoch %d ------>>>'%epoch)
# 学习率更新策略
if loss_mean!=0.:
if best_loss > (loss_mean/loss_idx):
flag_change_lr_cnt = 0
best_loss = (loss_mean/loss_idx)
flag_change_lr_cnt += 1
if flag_change_lr_cnt > 50:
init_lr = init_lr*ops.lr_decay
set_learning_rate(optimizer, init_lr)
flag_change_lr_cnt = 0
loss_mean = 0. # 损失均值
loss_idx = 0. # 损失计算计数器
for i, (imgs_, pts_) in enumerate(dataloader):
# print('imgs_, pts_',imgs_.size(), pts_.size())
if use_cuda:
imgs_ = imgs_.cuda() # pytorch 的 数据输入格式 : (batch, channel, height, width)
pts_ = pts_.cuda()
output = model_(imgs_.float())
if ops.loss_define == 'wing_loss':
loss = got_total_wing_loss(output, pts_.float())
loss = criterion(output, pts_.float())
loss_mean += loss.item()
loss_idx += 1.
if i%10 == 0:
loc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print(' %s - %s - epoch [%s/%s] (%s/%s):'%(loc_time,ops.model,epoch,ops.epochs,i,int(dataset.__len__()/ops.batch_size)),\
'Mean Loss : %.6f - Loss: %.6f'%(loss_mean/loss_idx,loss.item()),\
' lr : %.8f'%init_lr,' bs :',ops.batch_size,\
' img_size: %s x %s'%(ops.img_size[0],ops.img_size[1]),' best_loss: %.6f'%best_loss, " {}".format(ops.loss_define))
# 计算梯度
# 优化器对模型参数更新
# 优化器梯度清零
step += 1
torch.save(model_.state_dict(), ops.model_exp + '{}-size-{}-loss-{}-model_epoch-{}.pth'.format(ops.model,ops.img_size[0],ops.loss_define,epoch))
torch.save(model_.state_dict(), ops.model_exp + '{}-size-{}-latest.pth'.format(ops.model,ops.img_size[0]))
# except Exception as e:
# print('Exception : ',e) # 打印异常
# print('Exception file : ', e.__traceback__.tb_frame.f_globals['__file__'])# 发生异常所在的文件
# print('Exception line : ', e.__traceback__.tb_lineno)# 发生异常所在的行数
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=' Project Hand Train')
parser.add_argument('--seed', type=int, default = 126673,
help = 'seed') # 设置随机种子
parser.add_argument('--model_exp', type=str, default = './model_exp',
help = 'model_exp') # 模型输出文件夹
parser.add_argument('--model', type=str, default = 'resnet_50',
help = '''model : resnet_18,resnet_34,resnet_50,resnet_101''') # 模型类型
parser.add_argument('--num_classes', type=int , default = 63,
help = 'num_classes') # landmarks 个数*2
parser.add_argument('--GPUS', type=str, default = '0',
help = 'GPUS') # GPU选择
parser.add_argument('--train_path', type=str,
default = "../Minimal-Hand-pytorch-20210609/ehandpose_3d/",
help = 'datasets')# 训练集标注信息
parser.add_argument('--pretrained', type=bool, default = True,
help = 'imageNet_Pretrain') # 初始化学习率
parser.add_argument('--fintune_model', type=str, default = 'model_exp/2021-06-15_23-29-22/resnet_50-size-256-loss-wing_loss-model_epoch-262.pth',
help = 'fintune_model') # fintune model
parser.add_argument('--loss_define', type=str, default = 'wing_loss',
help = 'define_loss : wing_loss, mse_loss ') # 损失函数定义
parser.add_argument('--init_lr', type=float, default = 1e-4,
help = 'init learning Rate') # 初始化学习率
parser.add_argument('--lr_decay', type=float, default = 0.1,
help = 'learningRate_decay') # 学习率权重衰减率
parser.add_argument('--weight_decay', type=float, default = 1e-6,
help = 'weight_decay') # 优化器正则损失权重
parser.add_argument('--momentum', type=float, default = 0.9,
help = 'momentum') # 优化器动量
parser.add_argument('--batch_size', type=int, default = 16,
help = 'batch_size') # 训练每批次图像数量
parser.add_argument('--dropout', type=float, default = 0.5,
help = 'dropout') # dropout
parser.add_argument('--epochs', type=int, default = 3000,
help = 'epochs') # 训练周期
parser.add_argument('--num_workers', type=int, default = 4,
help = 'num_workers') # 训练数据生成器线程数
parser.add_argument('--img_size', type=tuple , default = (256,256),
help = 'img_size') # 输入模型图片尺寸
parser.add_argument('--flag_agu', type=bool , default = True,
help = 'data_augmentation') # 训练数据生成器是否进行数据扩增
parser.add_argument('--clear_model_exp', type=bool, default = False,
help = 'clear_model_exp') # 模型输出文件夹是否进行清除
parser.add_argument('--log_flag', type=bool, default = False,
help = 'log flag') # 是否保存训练 log
args = parser.parse_args()# 解析添加参数
mkdir_(args.model_exp, flag_rm=args.clear_model_exp)
loc_time = time.localtime()
args.model_exp = args.model_exp + '/' + time.strftime("%Y-%m-%d_%H-%M-%S", loc_time)+'/'
mkdir_(args.model_exp, flag_rm=args.clear_model_exp)
f_log = None
if args.log_flag:
f_log = open(args.model_exp+'/train_{}.log'.format(time.strftime("%Y-%m-%d_%H-%M-%S",loc_time)), 'a+')
sys.stdout = f_log
print('---------------------------------- log : {}'.format(time.strftime("%Y-%m-%d %H:%M:%S", loc_time)))
print('\n/******************* {} ******************/\n'.format(parser.description))
unparsed = vars(args) # parse_args()方法的返回值为namespace,用vars()内建函数化为字典
for key in unparsed.keys():
print('{} : {}'.format(key,unparsed[key]))
unparsed['time'] = time.strftime("%Y-%m-%d %H:%M:%S", loc_time)
fs = open(args.model_exp+'train_ops.json',"w",encoding='utf-8')
json.dump(unparsed,fs,ensure_ascii=False,indent = 1)
trainer(ops = args,f_log = f_log)# 模型训练
if args.log_flag:
sys.stdout = f_log
print('well done : {}'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
# Copyright (c) Hao Meng. All Rights Reserved.
import numpy as np
import transforms3d
import config as cfg
angels0 = np.zeros((1, 21))
def to_dict(joints):
temp_dict = dict()
for i in range(21):
temp_dict[i] = joints[:, [i]]
return temp_dict
def adaptive_IK(T_, P_):
Computes pose parameters given template and predictions.
We think the twist of hand bone could be omitted.
:param T: template ,21*3
:param P: target, 21*3
:return: pose params.
T = T_.copy().astype(np.float64)
P = P_.copy().astype(np.float64)
P = P.transpose(1, 0)
T = T.transpose(1, 0)
# to dict
P = to_dict(P)
T = to_dict(T)
# some globals
R = {}
R_pa_k = {}
q = {}
q[0] = T[0] # in fact, q[0] = P[0] = T[0].
# compute R0, here we think R0 is not only a Orthogonal matrix, but also a Rotation matrix.
# you can refer to paper "Least-Squares Fitting of Two 3-D Point Sets. K. S. Arun; T. S. Huang; S. D. Blostein"
# It is slightly different from https://github.com/Jeff-sjtu/HybrIK/blob/main/hybrik/utils/pose_utils.py#L4, in which R0 is regard as orthogonal matrix only.
# Using their method might further boost accuracy.
P_0 = np.concatenate([P[1] - P[0], P[5] - P[0],
P[9] - P[0], P[13] - P[0],
P[17] - P[0]], axis=-1)
T_0 = np.concatenate([T[1] - T[0], T[5] - T[0],
T[9] - T[0], T[13] - T[0],
T[17] - T[0]], axis=-1)
H = np.matmul(T_0, P_0.T)
U, S, V_T = np.linalg.svd(H)
V = V_T.T
R0 = np.matmul(V, U.T)
det0 = np.linalg.det(R0)
if abs(det0 + 1) < 1e-6:
V_ = V.copy()
if (abs(S) < 1e-4).sum():
V_[:, 2] = -V_[:, 2]
R0 = np.matmul(V_, U.T)
R[0] = R0
# the bone from 1,5,9,13,17 to 0 has same rotations
R[1] = R[0].copy()
R[5] = R[0].copy()
R[9] = R[0].copy()
R[13] = R[0].copy()
R[17] = R[0].copy()
# compute rotation along kinematics
for k in cfg.kinematic_tree:
pa = cfg.SNAP_PARENT[k]
pa_pa = cfg.SNAP_PARENT[pa]
q[pa] = np.matmul(R[pa], (T[pa] - T[pa_pa])) + q[pa_pa]
delta_p_k = np.matmul(np.linalg.inv(R[pa]), P[k] - q[pa])
delta_p_k = delta_p_k.reshape((3,))
delta_t_k = T[k] - T[pa]
delta_t_k = delta_t_k.reshape((3,))
temp_axis = np.cross(delta_t_k, delta_p_k)
axis = temp_axis / (np.linalg.norm(temp_axis, axis=-1) + 1e-8)
temp = (np.linalg.norm(delta_t_k, axis=0) + 1e-8) * (np.linalg.norm(delta_p_k, axis=0) + 1e-8)
cos_alpha = np.dot(delta_t_k, delta_p_k) / temp
alpha = np.arccos(cos_alpha)
twist = delta_t_k
D_sw = transforms3d.axangles.axangle2mat(axis=axis, angle=alpha, is_normalized=False)
D_tw = transforms3d.axangles.axangle2mat(axis=twist, angle=angels0[:, k], is_normalized=False)
R_pa_k[k] = np.matmul(D_sw, D_tw)
R[k] = np.matmul(R[pa], R_pa_k[k])
pose_R = np.zeros((1, 16, 3, 3))
pose_R[0, 0] = R[0]
for key in cfg.ID2ROT.keys():
value = cfg.ID2ROT[key]
pose_R[0, value] = R_pa_k[key]
return pose_R
# Copyright (c) Hao Meng. All Rights Reserved.
# import time
import numpy as np
import torch
from manopth.manolayer import ManoLayer
from utils import bone
class LM_Solver():
def __init__(self, num_Iter=500, th_beta=None, th_pose=None, lb_target=None,
self.count = 0
# self.time_start = time.time()
# self.time_in_mano = 0
self.minimal_loss = 9999
self.best_beta = np.zeros([10, 1])
self.num_Iter = num_Iter
self.th_beta = th_beta
self.th_pose = th_pose
self.beta = th_beta.numpy()
self.pose = th_pose.numpy()
self.mano_layer = ManoLayer(side="right",
mano_root='mano/models', use_pca=False, flat_hand_mean=True)
self.threshold_stop = 10 ** -13
self.weight = weight
self.residual_memory = []
self.lb = np.zeros(21)
_, self.joints = self.mano_layer(self.th_pose, self.th_beta)
self.joints = self.joints.cpu().numpy().reshape(21, 3)
self.lb_target = lb_target.reshape(15, 1)
# self.test_time = 0
def update(self, beta_):
beta = beta_.copy()
self.count += 1
# now = time.time()
my_th_beta = torch.from_numpy(beta).float().reshape(1, 10)
_, joints = self.mano_layer(self.th_pose, my_th_beta)
# self.time_in_mano = time.time() - now
useful_lb = bone.caculate_length(joints, label="useful")
lb_ref = useful_lb[6]
return useful_lb, lb_ref
def new_cal_ref_bone(self, _shape):
# now = time.time()
parent_index = [0,
0, 1, 2,
0, 4, 5,
0, 7, 8,
0, 10, 11,
0, 13, 14
# index = [0,
# 1, 2, 3, # index
# 4, 5, 6, # middle
# 7, 8, 9, # pinky
# 10, 11, 12, # ring
# 13, 14, 15] # thumb
reoder_index = [
13, 14, 15,
1, 2, 3,
4, 5, 6,
10, 11, 12,
7, 8, 9]
shape = torch.Tensor(_shape.reshape((-1, 10)))
th_v_shaped = torch.matmul(self.mano_layer.th_shapedirs,
shape.transpose(1, 0)).permute(2, 0, 1) \
+ self.mano_layer.th_v_template
th_j = torch.matmul(self.mano_layer.th_J_regressor, th_v_shaped)
temp1 = th_j.clone().detach()
temp2 = th_j.clone().detach()[:, parent_index, :]
result = temp1 - temp2
result = torch.norm(result, dim=-1, keepdim=True)
ref_len = result[:, [4]]
result = result / ref_len
# self.time_in_mano = time.time() - now
return torch.squeeze(result, dim=-1)[:, reoder_index].cpu().numpy()
def get_residual(self, beta_):
beta = beta_.copy()
lb, lb_ref = self.update(beta)
lb = lb.reshape(45, 1)
return lb / lb_ref - self.lb_target
def get_count(self):
return self.count
def get_bones(self, beta_):
beta = beta_.copy()
lb, _ = self.update(beta)
lb = lb.reshape(15, 1)
return lb
# Vectorization implementation
def batch_get_l2_loss(self, beta_):
weight = 1e-5
beta = beta_.copy()
temp = self.new_cal_ref_bone(beta)
loss = np.transpose(temp)
loss = np.linalg.norm(loss - self.lb_target, axis=0) ** 2 + \
weight * np.linalg.norm(beta, axis=-1)
return loss
def new_get_derivative(self, beta_):
# params: beta_ 10*1
# return: 1*10
beta = beta_.copy().reshape((1, 10))
temp_shape = np.zeros((20, beta.shape[1])) # 20*10
step = 0.01
for t2 in range(10): # 位置
t3 = 10 + t2
temp_shape[t2] = beta.copy()
temp_shape[t3] = beta.copy()
temp_shape[t2, t2] += step
temp_shape[t3, t2] -= step
res = self.batch_get_l2_loss(temp_shape)
d = res[0:10] - res[10:20] # 10*1
d = d.reshape((1, 10)) / (2 * step)
return d
# LM algorithm
def LM(self):
u = 1e-2
v = 1.5
beta = self.beta.reshape(10, 1)
out_n = 1
# num_beta = np.shape(beta)[0] # the number of beta
# calculating the init Jocobian matrix
Jacobian = np.zeros([out_n, beta.shape[0]])
last_update = 0
last_loss = 0
# self.test_time = 0
for i in range(self.num_Iter):
# loss = self.new_get_loss(beta)
loss = self.batch_get_l2_loss(beta)
loss = loss[0]
if loss < self.minimal_loss:
self.minimal_loss = loss
self.best_beta = beta
if abs(loss - last_loss) < self.threshold_stop:
# self.time_total = time.time() - self.time_start
return beta
# for k in range(num_beta):
# Jacobian[:, k] = self.get_derivative(beta, k)
Jacobian = self.new_get_derivative(beta)
jtj = np.matmul(Jacobian.T, Jacobian)
jtj = jtj + u * np.eye(jtj.shape[0])
update = last_loss - loss
delta = (np.matmul(np.linalg.inv(jtj), Jacobian.T) * loss)
beta -= delta
if update > last_update and update > 0:
u /= v
u *= v
last_update = update
last_loss = loss
return beta
def get_result(self):
return self.residual_memory
# Copyright (c) Hao Meng. All Rights Reserved.
import time
import numpy as np
import torch
from manopth.manolayer import ManoLayer
from utils import bone
class LM_Solver():
def __init__(self, side = "right",num_Iter=500, th_beta=None, th_pose=None, lb_target=None,
self.count = 0
self.time_start = time.time()
self.time_in_mano = 0
self.minimal_loss = 9999
self.best_beta = np.zeros([10, 1])
self.num_Iter = num_Iter
self.th_beta = th_beta
self.th_pose = th_pose
self.beta = th_beta.numpy()
self.pose = th_pose.numpy()
self.mano_layer = ManoLayer(side=side,
mano_root='D:/code/manopth/mano/models', use_pca=False, flat_hand_mean=True)
self.threshold_stop = 10 ** -13
self.weight = weight
self.residual_memory = []
self.lb = np.zeros(21)
_, self.joints = self.mano_layer(self.th_pose, self.th_beta)
self.joints = self.joints.cpu().numpy().reshape(21, 3)
self.lb_target = lb_target.reshape(15, 1)
self.test_time = 0
def update_target(self, target):
self.lb_target = target.copy().reshape(15, 1)
def update(self, beta_):
beta = beta_.copy()
self.count += 1
now = time.time()
my_th_beta = torch.from_numpy(beta).float().reshape(1, 10)
_, joints = self.mano_layer(self.th_pose, my_th_beta)
self.time_in_mano = time.time() - now
useful_lb = bone.caculate_length(joints, label="useful")
lb_ref = useful_lb[6]
return useful_lb, lb_ref
def new_cal_ref_bone(self, _shape):
now = time.time()
parent_index = [0,
0, 1, 2,
0, 4, 5,
0, 7, 8,
0, 10, 11,
0, 13, 14
index = [0,
1, 2, 3, # index
4, 5, 6, # middle
7, 8, 9, # pinky
10, 11, 12, # ring
13, 14, 15] # thumb
reoder_index = [
13, 14, 15,
1, 2, 3,
4, 5, 6,
10, 11, 12,
7, 8, 9]
shape = torch.Tensor(_shape.reshape((-1, 10)))
th_v_shaped = torch.matmul(self.mano_layer.th_shapedirs,
shape.transpose(1, 0)).permute(2, 0, 1) \
+ self.mano_layer.th_v_template
th_j = torch.matmul(self.mano_layer.th_J_regressor, th_v_shaped)
temp1 = th_j.clone().detach()
temp2 = th_j.clone().detach()[:, parent_index, :]
result = temp1 - temp2
result = torch.norm(result, dim=-1, keepdim=True)
ref_len = result[:, [4]]
result = result / ref_len
self.time_in_mano = time.time() - now
return torch.squeeze(result, dim=-1)[:, reoder_index].cpu().numpy()
def get_residual(self, beta_):
beta = beta_.copy()
lb, lb_ref = self.update(beta)
lb = lb.reshape(45, 1)
return lb / lb_ref - self.lb_target
def get_count(self):
return self.count
def get_bones(self, beta_):
beta = beta_.copy()
lb, _ = self.update(beta)
lb = lb.reshape(15, 1)
return lb
def get_loss(self, beta_):
beta = beta_.copy()
lb, lb_ref = self.update(beta)
lb = lb.reshape(15, 1)
loss = np.linalg.norm(lb / lb_ref - self.lb_target) ** 2 + \
self.weight * np.linalg.norm(beta) ** 2
return loss
def new_get_loss(self, beta_):
beta = beta_.copy()
temp = self.new_cal_ref_bone(beta_)
loss = temp.reshape((15, 1))
loss = np.linalg.norm(loss - self.lb_target) ** 2 + \
self.weight * np.linalg.norm(beta_)
return loss
def get_derivative(self, beta_, n):
beta = beta_.copy()
params1 = np.array(beta)
params2 = np.array(beta)
step = 0.01
params1[n] += step
params2[n] -= step
res1 = self.new_get_loss(params1)
res2 = self.new_get_loss(params2)
d = (res1 - res2) / (2 * step)
return d.ravel()
def batch_new_get_loss(self, beta_):
weight = 1e-5
beta = beta_.copy()
temp = self.new_cal_ref_bone(beta)
loss = np.transpose(temp)
loss = np.linalg.norm(loss - self.lb_target, axis=0) ** 2 + \
weight * np.linalg.norm(beta, axis=-1)
return loss
def new_get_derivative(self, beta_):
# params: beta_ 10*1
# return: 1*10
beta = beta_.copy().reshape((1, 10))
temp_shape = np.zeros((20, beta.shape[1])) # 20*10
step = 0.01
for t2 in range(10): # 位置
t3 = 10 + t2
temp_shape[t2] = beta.copy()
temp_shape[t3] = beta.copy()
temp_shape[t2, t2] += step
temp_shape[t3, t2] -= step
res = self.batch_new_get_loss(temp_shape)
d = res[0:10] - res[10:20] # 10*1
d = d.reshape((1, 10)) / (2 * step)
return d
# LM algorithm
def LM(self):
u = 1e-2
v = 1.5
beta = self.beta.reshape(10, 1)
out_n = 1
num_beta = np.shape(beta)[0] # the number of beta
# calculating the init Jocobian matrix
Jacobian = np.zeros([out_n, beta.shape[0]])
last_update = 0
last_loss = 0
self.test_time = 0
for i in range(self.num_Iter):
loss = self.new_get_loss(beta)
if loss < self.minimal_loss:
self.minimal_loss = loss
self.best_beta = beta
if abs(loss - last_loss) < self.threshold_stop:
self.time_total = time.time() - self.time_start
return beta
# for k in range(num_beta):
# Jacobian[:, k] = self.get_derivative(beta, k)
Jacobian = self.new_get_derivative(beta)
jtj = np.matmul(Jacobian.T, Jacobian)
jtj = jtj + u * np.eye(jtj.shape[0])
update = last_loss - loss
delta = (np.matmul(np.linalg.inv(jtj), Jacobian.T) * loss)
beta -= delta
if update > last_update and update > 0:
u /= v
u *= v
last_update = update
last_loss = loss
return beta
def get_result(self):
return self.residual_memory
import numpy as np
def global_align(gtj0, prj0, key):
gtj = gtj0.copy()
prj = prj0.copy()
if key in ["stb", "rhd"]:
# gtj :B*21*3
# prj :B*21*3
root_idx = 9 # root
ref_bone_link = [0, 9] # mid mcp
pred_align = prj.copy()
for i in range(prj.shape[0]):
pred_ref_bone_len = np.linalg.norm(prj[i][ref_bone_link[0]] - prj[i][ref_bone_link[1]])
gt_ref_bone_len = np.linalg.norm(gtj[i][ref_bone_link[0]] - gtj[i][ref_bone_link[1]])
scale = gt_ref_bone_len / pred_ref_bone_len
for j in range(21):
pred_align[i][j] = gtj[i][root_idx] + scale * (prj[i][j] - prj[i][root_idx])
return gtj, pred_align
if key in ["do", "eo"]:
# gtj :B*5*3
# prj :B*5*3
prj_ = prj.copy()[:, [4, 8, 12, 16, 20], :] # B*5*3
gtj_valid = []
prj_valid_align = []
for i in range(prj_.shape[0]):
# 5*3
mask = ~(np.isnan(gtj[i][:, 0]))
if mask.sum() < 2:
prj_mask = prj_[i][mask] # m*3
gtj_mask = gtj[i][mask] # m*3
gtj_valid_center = np.mean(gtj_mask, 0)
prj_valid_center = np.mean(prj_mask, 0)
gtj_center_length = np.linalg.norm(gtj_mask - gtj_valid_center, axis=1).mean()
prj_center_length = np.linalg.norm(prj_mask - prj_valid_center, axis=1).mean()
scale = gtj_center_length / prj_center_length
prj_valid_align_i = gtj_valid_center + scale * (prj_[i][mask] - prj_valid_center)
return np.array(gtj_valid), np.array(prj_valid_align)
\ No newline at end of file
import config as cfg
import numpy as np
import torch
def caculate_length(j3d_, label=None):
if isinstance(j3d_, torch.Tensor):
j3d = j3d_.clone()
j3d = j3d.detach().cpu()
j3d = j3d.numpy()
j3d = j3d_.copy()
if len(j3d.shape) != 2:
j3d = j3d.squeeze()
bone = [
j3d[i] - j3d[cfg.SNAP_PARENT[i]]
for i in range(21)
bone_len = np.linalg.norm(
bone, ord=2, axis=-1, keepdims=True # 21*1
if label == "full":
return bone_len
elif label == "useful":
return bone_len[cfg.USEFUL_BONE]
raise ValueError("{} not in ['full'|'useful']".format(label))
# date:2020-04-11
# Author: Eric.Lee
# function: common utils
import os
import shutil
import cv2
import numpy as np
import json
def mkdir_(path, flag_rm=False):
if os.path.exists(path):
if flag_rm == True:
print('remove {} done ~ '.format(path))
def plot_box(bbox, img, color=None, label=None, line_thickness=None):
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox
if label:
tf = max(tl - 2, 1)
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox
cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充
# 文本绘制
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA)
class JSON_Encoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
return super(JSON_Encoder, self).default(obj)
from torchvision.transforms.functional import *
def batch_denormalize(tensor, mean, std, inplace=False):
"""Normalize a tensor image with mean and standard deviation.
.. note::
This transform acts out_testset of place by default, i.e., it does not mutates the input tensor.
See :class:`~torchvision.transforms.Normalize` for more details.
tensor (Tensor): Tensor image of size (B, C, H, W) to be normalized.
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation inplace.
Tensor: Normalized Tensor image.
if not torch.is_tensor(tensor) or tensor.ndimension() != 4:
raise TypeError('invalid tensor or tensor channel is not BCHW')
if not inplace:
tensor = tensor.clone()
dtype = tensor.dtype
mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
tensor.mul_(std[None, :, None, None]).sub_(-1 * mean[None, :, None, None])
return tensor
def to_numpy(tensor):
if torch.is_tensor(tensor):
return tensor.detach().cpu().numpy()
elif type(tensor).__module__ != 'numpy':
raise ValueError("Cannot convert {} to numpy array"
return tensor
def bhwc_2_bchw(tensor):
:param x: torch tensor, B x H x W x C
:return: torch tensor, B x C x H x W
if not torch.is_tensor(tensor) or tensor.ndimension() != 4:
raise TypeError('invalid tensor or tensor channel is not BCHW')
return tensor.unsqueeze(1).transpose(1, -1).squeeze(-1)
def bchw_2_bhwc(tensor):
:param x: torch tensor, B x C x H x W
:return: torch tensor, B x H x W x C
if not torch.is_tensor(tensor) or tensor.ndimension() != 4:
raise TypeError('invalid tensor or tensor channel is not BCHW')
return tensor.unsqueeze(-1).transpose(1, -1).squeeze(1)
def initiate(label=None):
if label == "zero":
shape = torch.zeros(10).unsqueeze(0)
pose = torch.zeros(48).unsqueeze(0)
elif label == "uniform":
shape = torch.from_numpy(np.random.normal(size=[1, 10])).float()
pose = torch.from_numpy(np.random.normal(size=[1, 48])).float()
elif label == "01":
shape = torch.rand(1, 10)
pose = torch.rand(1, 48)
raise ValueError("{} not in ['zero'|'uniform'|'01']".format(label))
return pose, shape
import numpy as np
import torch
from PIL import Image
except ImportError:
print('Could not import PIL in handutils')
import config as cfg
def get_joint_bone(joint, ref_bone_link=None):
if ref_bone_link is None:
ref_bone_link = (0, 9)
if (
not torch.is_tensor(joint)
and not isinstance(joint, np.ndarray)
raise TypeError('joint should be ndarray or torch tensor. Got {}'.format(type(joint)))
if (
len(joint.shape) != 3
or joint.shape[1] != 21
or joint.shape[2] != 3
raise TypeError('joint should have shape (B, njoint, 3), Got {}'.format(joint.shape))
batch_size = joint.shape[0]
bone = 0
if torch.is_tensor(joint):
bone = torch.zeros((batch_size, 1)).to(joint.device)
for jid, nextjid in zip(
ref_bone_link[:-1], ref_bone_link[1:]
bone += torch.norm(
joint[:, jid, :] - joint[:, nextjid, :],
dim=1, keepdim=True
) # (B, 1)
elif isinstance(joint, np.ndarray):
bone = np.zeros((batch_size, 1))
for jid, nextjid in zip(
ref_bone_link[:-1], ref_bone_link[1:]
bone += np.linalg.norm(
(joint[:, jid, :] - joint[:, nextjid, :]),
ord=2, axis=1, keepdims=True
) # (B, 1)
return bone
def uvd2xyz(
bs = uvd.shape[0]
if mode in ['persp', 'perspective']:
if intr is None:
raise Exception("No intr found in perspective")
'''1. denormalized uvd'''
uv = uvd[:, :, :2] * inp_res # 0~256
depth = (uvd[:, :, 2] * cfg.DEPTH_RANGE) + cfg.DEPTH_MIN
root_depth = joint_root[:, -1].unsqueeze(-1) # (B, 1)
z = depth * joint_bone.expand_as(uvd[:, :, 2]) + \
root_depth.expand_as(uvd[:, :, 2]) # B x M
'''2. uvd->xyz'''
camparam = torch.zeros((bs, 4)).float().to(intr.device) # (B, 4)
camparam[:, 0] = intr[:, 0, 0] # fx
camparam[:, 1] = intr[:, 1, 1] # fx
camparam[:, 2] = intr[:, 0, 2] # cx
camparam[:, 3] = intr[:, 1, 2] # cy
camparam = camparam.unsqueeze(1).expand(-1, uvd.size(1), -1) # B x M x 4
xy = ((uv - camparam[:, :, 2:4]) / camparam[:, :, :2]) * \
z.unsqueeze(-1).expand_as(uv) # B x M x 2
return torch.cat((xy, z.unsqueeze(-1)), -1) # B x M x 3
elif mode in ['ortho', 'orthogonal']:
if trans is None or scale is None:
raise Exception("No trans or scale found in orthorgnal")
raise Exception("orth Unimplement !")
raise Exception("Unkonwn mode type. should in ['persp', 'ortho']")
def xyz2uvd(
bs = xyz.shape[0]
if mode in ['persp', 'perspective']:
if intr is None:
raise Exception("No intr found in perspective")
z = xyz[:, :, 2]
xy = xyz[:, :, :2]
xy = xy / z.unsqueeze(-1).expand_as(xy)
''' 1. normalize depth : root_relative, scale_invariant '''
root_depth = joint_root[:, -1].unsqueeze(-1) # (B, 1)
depth = (z - root_depth.expand_as(z)) / joint_bone.expand_as(z)
'''2. xy->uv'''
camparam = torch.zeros((bs, 4)).float().to(intr.device) # (B, 4)
camparam[:, 0] = intr[:, 0, 0] # fx
camparam[:, 1] = intr[:, 1, 1] # fx
camparam[:, 2] = intr[:, 0, 2] # cx
camparam[:, 3] = intr[:, 1, 2] # cy
camparam = camparam.unsqueeze(1).expand(-1, xyz.size(1), -1) # B x M x 4
uv = (xy * camparam[:, :, :2]) + camparam[:, :, 2:4]
'''3. normalize uvd to 0~1'''
uv = uv / inp_res
depth = (depth - cfg.DEPTH_MIN) / cfg.DEPTH_RANGE
return torch.cat((uv, depth.unsqueeze(-1)), -1)
elif mode in ['ortho', 'orthogonal']:
if trans is None or scale is None:
raise Exception("No trans or scale found in orthorgnal")
raise Exception("orth Unimplement !")
raise Exception("Unkonwn proj type. should in ['persp', 'ortho']")
def persp_joint2kp(joint, intr):
joint_homo = torch.matmul(joint, intr.transpose(1, 2))
kp2d = joint_homo / joint_homo[:, :, 2:]
kp2d = kp2d[:, :, :2]
return kp2d
def rot_kp2d(kp2d, rot):
kp2d = np.concatenate((kp2d, np.ones((kp2d.shape[0], 1))), axis=1)
new_kp2d = np.matmul(kp2d, rot.transpose())
return new_kp2d
def get_annot_scale(annots, visibility=None, scale_factor=2.0):
Retreives the size of the square we want to crop by taking the
maximum of vertical and horizontal span of the hand and multiplying
it by the scale_factor to add some padding around the hand
if visibility is not None:
annots = annots[visibility]
min_x, min_y = annots.min(0)
max_x, max_y = annots.max(0)
delta_x = max_x - min_x
delta_y = max_y - min_y
max_delta = max(delta_x, delta_y)
s = max_delta * scale_factor
return s
def get_mask_mini_scale(mask_, side):
Retreives the size of the square...
# mask = np.array(mask_.copy())[:, :, 2:].squeeze()
mask = mask_.copy().squeeze()
mask_scale = 0
# print(mask.shape)
if side == "l":
id_left = [i for i in range(2, 18)]
np.putmask(mask, np.logical_and(mask >= id_left[0], mask <= id_left[-1]), 128)
seg = np.argwhere(mask == 128)
# print("seg.shape=",seg.shape)
seg_rmin, seg_cmin = np.min(seg, axis=0)
seg_rmax, seg_cmax = np.max(seg, axis=0)
mask_scale = max(seg_rmax - seg_rmin + 1, seg_cmax - seg_cmin + 1)
elif side == "r":
id_right = [i for i in range(18, 34)]
np.putmask(mask, np.logical_and(mask >= id_right[0], mask <= id_right[-1]), 255)
seg = np.argwhere(mask == 255)
seg_rmin, seg_cmin = np.min(seg, axis=0)
seg_rmax, seg_cmax = np.max(seg, axis=0)
mask_scale = max(seg_rmax - seg_rmin + 1, seg_cmax - seg_cmin + 1)
elif side == 0:
rmin, cmin = mask.min(0)
rmax, cmax = mask.max(0)
mask_scale = max(rmax - rmin + 1, cmax - cmin + 1)
if not mask_scale:
raise ValueError("mask_scale is 0!")
return mask_scale
def get_kp2d_mini_scale(annots):
get mini square to include kp2d
# print("annots=",annots)
min_x, min_y = annots.min(0) # opencv convention
max_x, max_y = annots.max(0)
# delta_x = int(max_x - min_x)
# delta_y = int(max_y - min_y)
delta_x = max_x - min_x
delta_y = max_y - min_y
max_delta = max(delta_x, delta_y)
# return delta_x + 1 if delta_x > delta_y else delta_y + 1
return max_delta
# def get_ori_crop_scale(mask, side, kp2d, scale_factor=2.0):
# mask_mini_scale = get_mask_mini_scale(mask, side)
# kp2d_mini_scale = get_kp2d_mini_scale(kp2d)
# ori_crop_scale = max(mask_mini_scale, kp2d_mini_scale)
# # if ori_crop_scale % 2 == 0:
# # ori_crop_scale += 2
# # else:
# # ori_crop_scale += 3
# return ori_crop_scale * scale_factor
def get_ori_crop_scale(mask, side, kp2d, mask_flag=True,scale_factor=2.0):
kp2d_mini_scale = get_kp2d_mini_scale(kp2d)
ori_crop_scale =kp2d_mini_scale
# if mask.any()!=None:
if mask_flag:
mask_mini_scale = get_mask_mini_scale(mask, side)
ori_crop_scale = max(mask_mini_scale, kp2d_mini_scale)
# if ori_crop_scale % 2 == 0:
# ori_crop_scale += 2
# else:
# ori_crop_scale += 3
return ori_crop_scale * scale_factor
def get_annot_center(annots, visibility=None):
# Get scale
if visibility is not None:
annots = annots[visibility]
min_x, min_y = annots.min(0)
max_x, max_y = annots.max(0)
c_x = int((max_x + min_x) / 2)
c_y = int((max_y + min_y) / 2)
return np.asarray([c_x, c_y])
def transform_coords(pts, affine_trans, invert=False):
pts(np.ndarray): (point_nb, 2)
if invert:
affine_trans = np.linalg.inv(affine_trans)
hom2d = np.concatenate([pts, np.ones([np.array(pts).shape[0], 1])], 1)
transformed_rows = affine_trans.dot(hom2d.transpose()).transpose()[:, :2]
return transformed_rows.astype(int)
def transform_img(img, affine_trans, res):
center (tuple): crop center coordinates
scale (int): size in pixels of the final crop
res (tuple): final image size
trans = np.linalg.inv(affine_trans)
img = img.transform(
tuple(res), Image.AFFINE, (trans[0, 0], trans[0, 1], trans[0, 2],
trans[1, 0], trans[1, 1], trans[1, 2])
return img
##### Original from Obman (buggy) #####
# def get_affine_transform(center, scale, res, rot=0):
# rot_mat = np.zeros((3, 3))
# sn, cs = np.sin(rot), np.cos(rot)
# rot_mat[0, :2] = [cs, -sn]
# rot_mat[1, :2] = [sn, cs]
# rot_mat[2, 2] = 1
# # Rotate center to obtain coordinate of center in rotated image
# origin_rot_center = rot_mat.dot(center.tolist() + [
# 1,
# ])[:2]
# # Get center for transform with verts rotated around optical axis
# # (through pixel center, smthg like 128, 128 in pixels and 0,0 in 3d world)
# # For this, rotate the center but around center of image (vs 0,0 in pixel space)
# t_mat = np.eye(3)
# t_mat[0, 2] = -res[1] / 2
# t_mat[1, 2] = -res[0] / 2
# t_inv = t_mat.copy()
# t_inv[:2, 2] *= -1
# transformed_center = t_inv.dot(rot_mat).dot(t_mat).dot(center.tolist() + [
# 1,
# ])
# post_rot_trans = get_affine_trans_no_rot(origin_rot_center, scale, res)
# total_trans = post_rot_trans.dot(rot_mat)
# # check_t = get_affine_transform_bak(center, scale, res, rot)
# # print(total_trans, check_t)
# affinetrans_post_rot = get_affine_trans_no_rot(transformed_center[:2],
# scale, res)
# return total_trans.astype(np.float32), affinetrans_post_rot.astype(
# np.float32)
def get_affine_transform(center, scale, optical_center, out_res, rot=0):
rot_mat = np.zeros((3, 3))
sn, cs = np.sin(rot), np.cos(rot)
rot_mat[0, :2] = [cs, -sn]
rot_mat[1, :2] = [sn, cs]
rot_mat[2, 2] = 1
# Rotate center to obtain coordinate of center in rotated image
origin_rot_center = rot_mat.dot(center.tolist() + [1])[:2]
# Get center for transform with verts rotated around optical axis
# (through pixel center, smthg like 128, 128 in pixels and 0,0 in 3d world)
# For this, rotate the center but around center of image (vs 0,0 in pixel space)
t_mat = np.eye(3)
t_mat[0, 2] = - optical_center[0]
t_mat[1, 2] = - optical_center[1]
t_inv = t_mat.copy()
t_inv[:2, 2] *= -1
transformed_center = (
t_inv.dot(rot_mat).dot(t_mat).dot(center.tolist() + [1])
post_rot_trans = get_affine_trans_no_rot(origin_rot_center, scale, out_res)
total_trans = post_rot_trans.dot(rot_mat)
# check_t = get_affine_transform_bak(center, scale, res, rot)
# print(total_trans, check_t)
affinetrans_post_rot = get_affine_trans_no_rot(
transformed_center[:2], scale, out_res
return (
def get_affine_transform_test(center, scale, res, rot=0):
rot_mat = np.zeros((3, 3))
sn, cs = np.sin(rot), np.cos(rot)
rot_mat[0, :2] = [cs, -sn]
rot_mat[1, :2] = [sn, cs]
rot_mat[2, 2] = 1
# Rotate center to obtain coordinate of center in rotated image
origin_rot_center = rot_mat.dot(center.tolist() + [
# Get center for transform with verts rotated around optical axis
# (through pixel center, smthg like 128, 128 in pixels and 0,0 in 3d world)
# For this, rotate the center but around center of image (vs 0,0 in pixel space)
t_mat = np.eye(3)
t_mat[0, 2] = -res[1] / 2
t_mat[1, 2] = -res[0] / 2
t_inv = t_mat.copy()
t_inv[:2, 2] *= -1
transformed_center = t_inv.dot(rot_mat).dot(t_mat).dot(center.tolist() + [
post_rot_trans = get_affine_trans_no_rot(origin_rot_center, scale, res)
total_trans = post_rot_trans.dot(rot_mat)
# check_t = get_affine_transform_bak(center, scale, res, rot)
# print(total_trans, check_t)
affinetrans_post_rot = get_affine_trans_no_rot(transformed_center[:2],
scale, res)
return total_trans.astype(np.float32), affinetrans_post_rot.astype(
def get_affine_trans_no_rot(center, scale, res):
affinet = np.zeros((3, 3))
affinet[0, 0] = float(res[1]) / scale
affinet[1, 1] = float(res[0]) / scale
affinet[0, 2] = res[1] * (-float(center[0]) / scale + .5)
affinet[1, 2] = res[0] * (-float(center[1]) / scale + .5)
affinet[2, 2] = 1
return affinet
def get_affine_transform_bak(center, scale, res, rot):
t = np.zeros((3, 3))
t[0, 0] = float(res[1]) / scale
t[1, 1] = float(res[0]) / scale
t[0, 2] = res[1] * (-float(center[0]) / scale + .5)
t[1, 2] = res[0] * (-float(center[1]) / scale + .5)
t[2, 2] = 1
if rot != 0:
rot_mat = np.zeros((3, 3))
sn, cs = np.sin(rot), np.cos(rot)
rot_mat[0, :2] = [cs, -sn]
rot_mat[1, :2] = [sn, cs]
rot_mat[2, 2] = 1
t_mat = np.eye(3)
t_mat[0, 2] = -res[1] / 2
t_mat[1, 2] = -res[0] / 2
t_inv = t_mat.copy()
t_inv[:2, 2] *= -1
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))).astype(np.float32)
return t, t
def gen_cam_param(joint, kp2d, mode='ortho'):
if mode in ['persp', 'perspective']:
kp2d = kp2d.reshape(-1)[:, np.newaxis] # (42, 1)
joint = joint / joint[:, 2:]
joint = joint[:, :2]
jM = np.zeros((42, 2), dtype="float32")
for i in range(joint.shape[0]): # 21
jM[2 * i][0] = joint[i][0]
jM[2 * i + 1][1] = joint[i][1]
pad2 = np.array(range(42))
pad2 = (pad2 % 2)[:, np.newaxis]
pad1 = (1 - pad2)
jM = np.concatenate([jM, pad1, pad2], axis=1) # (42, 4)
jMT = jM.transpose() # (4, 42)print
jMTjM = np.matmul(jMT, jM) # (4,4)
jMTb = np.matmul(jMT, kp2d)
cam_param = np.matmul(np.linalg.inv(jMTjM), jMTb)
cam_param = cam_param.reshape(-1)
return cam_param
elif mode in ['ortho', 'orthogonal']:
# ortho only when
assert np.sum(np.abs(joint[0, :])) == 0
joint = joint[:, :2] # (21, 2)
joint = joint.reshape(-1)[:, np.newaxis]
kp2d = kp2d.reshape(-1)[:, np.newaxis]
pad2 = np.array(range(42))
pad2 = (pad2 % 2)[:, np.newaxis]
pad1 = (1 - pad2)
jM = np.concatenate([joint, pad1, pad2], axis=1) # (42, 3)
jMT = jM.transpose() # (3, 42)
jMTjM = np.matmul(jMT, jM)
jMTb = np.matmul(jMT, kp2d)
cam_param = np.matmul(np.linalg.inv(jMTjM), jMTb)
cam_param = cam_param.reshape(-1)
return cam_param
raise Exception("Unkonwn mode type. should in ['persp', 'orth']")
# Copyright (c) Lixin YANG, Jiasen Li. All Rights Reserved.
import torch
import numpy as np
def gen_heatmap(img, pt, sigma):
"""generate heatmap based on pt coord.
:param img: original heatmap, zeros
:type img: np (H,W) float32
:param pt: keypoint coord.
:type pt: np (2,) int32
:param sigma: guassian sigma
:type sigma: float
- generated heatmap, np (H, W) each pixel values id a probability
- flag 0 or 1: indicate wheather this heatmap is valid(1)
pt = pt.astype(np.int32)
# Check that any part of the gaussian is in-bounds
ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)]
br = [int(pt[0] + 3 * sigma + 1), int(pt[1] + 3 * sigma + 1)]
if (
ul[0] >= img.shape[1]
or ul[1] >= img.shape[0]
or br[0] < 0
or br[1] < 0
# If not, just return the image as is
return img, 0
# Generate gaussian
size = 6 * sigma + 1
x = np.arange(0, size, 1, float)
y = x[:, np.newaxis]
x0 = y0 = size // 2
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], img.shape[1])
img_y = max(0, ul[1]), min(br[1], img.shape[0])
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return img, 1
def get_heatmap_pred(heatmaps):
""" get predictions from heatmaps in torch Tensor
return type: torch.LongTensor
assert heatmaps.dim() == 4, 'Score maps should be 4-dim (B, nJoints, H, W)'
maxval, idx = torch.max(heatmaps.view(heatmaps.size(0), heatmaps.size(1), -1), 2)
maxval = maxval.view(heatmaps.size(0), heatmaps.size(1), 1)
idx = idx.view(heatmaps.size(0), heatmaps.size(1), 1)
preds = idx.repeat(1, 1, 2).float() # (B, njoint, 2)
preds[:, :, 0] = (preds[:, :, 0]) % heatmaps.size(3) # + 1
preds[:, :, 1] = torch.floor((preds[:, :, 1]) / heatmaps.size(3)) # + 1
pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
preds *= pred_mask
return preds
import torch
import cv2
import numpy as np
import random
import torchvision
import utils.func as func
import config as cfg
def get_color_params(brightness=0, contrast=0, saturation=0, hue=0):
if brightness > 0:
brightness_factor = random.uniform(
max(0, 1 - brightness), 1 + brightness)
brightness_factor = None
if contrast > 0:
contrast_factor = random.uniform(max(0, 1 - contrast), 1 + contrast)
contrast_factor = None
if saturation > 0:
saturation_factor = random.uniform(
max(0, 1 - saturation), 1 + saturation)
saturation_factor = None
if hue > 0:
hue_factor = random.uniform(-hue, hue)
hue_factor = None
return brightness_factor, contrast_factor, saturation_factor, hue_factor
def color_jitter(img, brightness=0, contrast=0, saturation=0, hue=0):
brightness, contrast, saturation, hue = get_color_params(
# Create img transform function sequence
img_transforms = []
if brightness is not None:
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_brightness(img, brightness))
if saturation is not None:
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_saturation(img, saturation))
if hue is not None:
lambda img: torchvision.transforms.functional.adjust_hue(img, hue))
if contrast is not None:
img_transforms.append(lambda img: torchvision.transforms.functional.adjust_contrast(img, contrast))
jittered_img = img
for func in img_transforms:
jittered_img = func(jittered_img)
return jittered_img
def batch_with_dep(clrs, deps):
clrs = func.to_numpy(clrs)
if clrs.dtype is not np.uint8:
clrs = (clrs * 255).astype(np.uint8)
assert len(deps.shape) == 4, "deps should have shape (B, 1, H, W)"
deps = func.to_numpy(deps)
deps = deps.swapaxes(1, 2).swapaxes(2, 3)
deps = deps.repeat(3, axis=3)
if deps.dtype is not np.uint8:
deps = (deps * 255).astype(np.uint8)
batch_size = clrs.shape[0]
alpha = 0.6
beta = 0.9
gamma = 0
batch = []
for i in range(16):
if i >= batch_size:
batch.append(np.zeros((64, 64, 3)).astype(np.uint8))
clr = clrs[i]
clr = cv2.resize(clr, (64, 64))
dep = deps[i]
dep_img = cv2.addWeighted(clr, alpha, dep, beta, gamma)
resu = []
for i in range(4):
resu.append(np.concatenate(batch[i * 4: i * 4 + 4], axis=1))
resu = np.concatenate(resu)
return resu
def batch_with_joint(clrs, uvds):
clrs = func.to_numpy(clrs)
if clrs.dtype is not np.uint8:
clrs = (clrs * 255).astype(np.uint8)
uvds = func.to_numpy(uvds)
batch_size = clrs.shape[0]
batch = []
for i in range(16):
if i >= batch_size:
batch.append(np.zeros((256, 256, 3)).astype(np.uint8))
clr = clrs[i]
uv = (np.array(uvds[i][:, :2]) * clr.shape[0]).astype(np.uint8) # (256)
clr = draw_hand_skeloten(clr, uv, cfg.SNAP_BONES)
resu = []
for i in range(4):
resu.append(np.concatenate(batch[i * 4: i * 4 + 4], axis=1))
resu = np.concatenate(resu)
return resu
def draw_hand_skeloten(clr, uv, bone_links, colors=cfg.JOINT_COLORS):
for i in range(len(bone_links)):
bone = bone_links[i]
for j in bone:
cv2.circle(clr, tuple(uv[j]), 4, colors[i], -1)
for j, nj in zip(bone[:-1], bone[1:]):
cv2.line(clr, tuple(uv[j]), tuple(uv[nj]), colors[i], 2)
return clr
def batch_with_heatmap(
# inputs = func.to_numpy(inputs * 255) # 0~1 -> 0 ~255
heatmaps = func.to_numpy(heatmaps)
batch_img = []
for n in range(min(inputs.shape[0], n_in_batch)):
inp = inputs[n]
resu = np.concatenate(batch_img)
return resu
def sample_with_heatmap(img, heatmap, num_rows=2, parts_to_show=None):
if parts_to_show is None:
parts_to_show = np.arange(heatmap.shape[0]) # 21
# Generate a single image to display input/output pair
num_cols = int(np.ceil(float(len(parts_to_show)) / num_rows))
size = img.shape[0] // num_rows
full_img = np.zeros((img.shape[0], size * (num_cols + num_rows), 3), np.uint8)
full_img[:img.shape[0], :img.shape[1]] = img
inp_small = cv2.resize(img, (size, size))
# Set up heatmap display for each part
for i, part in enumerate(parts_to_show):
part_idx = part
out_resized = cv2.resize(heatmap[part_idx], (size, size))
out_resized = out_resized.astype(float)
out_img = inp_small.copy() * .4
color_hm = color_heatmap(out_resized)
out_img += color_hm * .6
col_offset = (i % num_cols + num_rows) * size
row_offset = (i // num_cols) * size
full_img[row_offset:row_offset + size, col_offset:col_offset + size] = out_img
return full_img
def color_heatmap(x):
color = np.zeros((x.shape[0], x.shape[1], 3))
color[:, :, 0] = gauss(x, .5, .6, .2) + gauss(x, 1, .8, .3)
color[:, :, 1] = gauss(x, 1, .5, .3)
color[:, :, 2] = gauss(x, 1, .2, .3)
color[color > 1] = 1
color = (color * 255).astype(np.uint8)
return color
def gauss(x, a, b, c, d=0):
return a * np.exp(-(x - b) ** 2 / (2 * c ** 2)) + d
import os
import shutil
import numpy as np
import scipy.io
import torch
from termcolor import colored, cprint
import utils.func as func
import copy
def print_args(args):
opts = vars(args)
cprint("{:>30} Options {}".format("=" * 15, "=" * 15), 'yellow')
for k, v in sorted(opts.items()):
print("{:>30} : {}".format(k, v))
cprint("{:>30} Options {}".format("=" * 15, "=" * 15), 'yellow')
def param_count(net):
return sum(p.numel() for p in net.parameters()) / 1e6
def out_loss_auc(
loss_all_, auc_all_, acc_hm_all_, outpath
loss_all = copy.deepcopy(loss_all_)
acc_hm_all = copy.deepcopy(acc_hm_all_)
auc_all = copy.deepcopy(auc_all_)
for k, l in zip(loss_all.keys(), loss_all.values()):
np.save(os.path.join(outpath, "{}.npy".format(k)), np.vstack((np.arange(1, len(l) + 1), np.array(l))).T)
if len(acc_hm_all):
for key ,value in acc_hm_all.items():
np.save(os.path.join(outpath, "acc_hm_all.npy"), acc_hm_all)
if len(auc_all):
for key ,value in auc_all.items():
np.save(os.path.join(outpath, "auc_all.npy"), np.array(auc_all))
def saveloss(d):
for k, v in zip(d.keys(), d.values()):
mat = np.array(v)
np.save(os.path.join("losses", "{}.npy".format(k)), mat)
def save_checkpoint(
# is_best=False
# preds = to_numpy(preds)
filepath = os.path.join(checkpoint, filename)
fileprefix = filename.split('.')[0]
# torch.save(state, filepath)
torch.save(state['model'].state_dict(), filepath)
if snapshot and state['epoch'] % snapshot == 0:
'{}_{}.pth'.format(fileprefix, state['epoch'])
[auc, best_acc] = is_best
for key in auc.keys():
if auc[key] > best_acc[key]:
'{}_{}best.pth'.format(fileprefix, key)
# def load_checkpoint(model, checkpoint):
# name = checkpoint
# checkpoint = torch.load(name)
# pretrain_dict = clean_state_dict(checkpoint['state_dict'])
# model_state = model.state_dict()
# state = {}
# for k, v in pretrain_dict.items():
# if k in model_state:
# state[k] = v
# else:
# print(k, ' is NOT in current model')
# model_state.update(state)
# model.load_state_dict(model_state)
# print(colored('loaded {}'.format(name), 'cyan'))
def load_checkpoint(model, checkpoint):
name = checkpoint
checkpoint = torch.load(name)
pretrain_dict = clean_state_dict(checkpoint['state_dict'])
model_state = model.state_dict()
state = {}
for k, v in pretrain_dict.items():
if k in model_state:
state[k] = v
print(k, ' is NOT in current model')
print(colored('loaded {}'.format(name), 'cyan'))
def clean_state_dict(state_dict):
"""save a cleaned version of model without dict and DataParallel
state_dict {collections.OrderedDict} -- [description]
clean_model {collections.OrderedDict} -- [description]
clean_model = state_dict
# create new OrderedDict that does not contain `module.`
from collections import OrderedDict
clean_model = OrderedDict()
if any(key.startswith('module') for key in state_dict):
for k, v in state_dict.items():
name = k[7:] # remove `module.`
clean_model[name] = v
return state_dict
return clean_model
def save_pred(preds, checkpoint='checkpoint', filename='preds_valid.mat'):
preds = func.to_numpy(preds)
filepath = os.path.join(checkpoint, filename)
scipy.io.savemat(filepath, mdict={'preds': preds})
def adjust_learning_rate(optimizer, epoch, lr, schedule, gamma):
"""Sets the learning rate to the initial LR decayed by schedule"""
if epoch in schedule:
lr *= gamma
print("adjust learning rate to: %.3e" % lr)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
def adjust_learning_rate_in_group(optimizer, group_id, epoch, lr, schedule, gamma):
"""Sets the learning rate to the initial LR decayed by schedule"""
if epoch in schedule:
lr *= gamma
print("adjust learning rate of group %d to: %.3e" % (group_id, lr))
optimizer.param_groups[group_id]['lr'] = lr
return lr
def resume_learning_rate(optimizer, epoch, lr, schedule, gamma):
for decay_id in schedule:
if epoch > decay_id:
lr *= gamma
print("adjust learning rate to: %.3e" % lr)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
def resume_learning_rate_in_group(optimizer, group_id, epoch, lr, schedule, gamma):
for decay_id in schedule:
if epoch > decay_id:
lr *= gamma
print("adjust learning rate of group %d to: %.3e" % (group_id, lr))
optimizer.param_groups[group_id]['lr'] = lr
return lr
# date:2020-04-11
# Author: Eric.Lee
# function: model utils
import os
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import random
def get_acc(output, label):
total = output.shape[0]
_, pred_label = output.max(1)
num_correct = (pred_label == label).sum().item()
return num_correct / float(total)
def set_learning_rate(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def set_seed(seed = 666):
if torch.cuda.is_available():
cudnn.deterministic = True
import numpy as np
class LowPassFilter:
def __init__(self):
self.prev_raw_value = None
self.prev_filtered_value = None
def process(self, value, alpha):
if self.prev_raw_value is None:
s = value
s = alpha * value + (1.0 - alpha) * self.prev_filtered_value
self.prev_raw_value = value
self.prev_filtered_value = s
return s
class OneEuroFilter:
def __init__(self, mincutoff=1.0, beta=0.0, dcutoff=1.0, freq=30):
self.freq = freq
self.mincutoff = mincutoff
self.beta = beta
self.dcutoff = dcutoff
self.x_filter = LowPassFilter()
self.dx_filter = LowPassFilter()
def compute_alpha(self, cutoff):
te = 1.0 / self.freq
tau = 1.0 / (2 * np.pi * cutoff)
return 1.0 / (1.0 + tau / te)
def process(self, x):
prev_x = self.x_filter.prev_raw_value
dx = 0.0 if prev_x is None else (x - prev_x) * self.freq
edx = self.dx_filter.process(dx, self.compute_alpha(self.dcutoff))
cutoff = self.mincutoff + self.beta * np.abs(edx)
return self.x_filter.process(x, self.compute_alpha(cutoff))
if __name__ == '__main__':
fliter = OneEuroFilter(4.0, 0.0)
noise = 0.01 * np.random.rand(1000)
x = np.linspace(0, 1, 1000)
X = x + noise
import matplotlib.pyplot as plt
y = np.zeros((1000,))
for i in range(1000):
y[i] = fliter.process(x[i])
import matplotlib.pyplot as plt
def plot3d(joints_,ax, title=None):
joints = joints_.copy()
ax.plot(joints[:, 0], joints[:, 1], joints[:, 2], 'yo', label='keypoint')
ax.plot(joints[:5, 0], joints[:5, 1],
joints[:5, 2],
ax.plot(joints[[0, 5, 6, 7, 8, ], 0], joints[[0, 5, 6, 7, 8, ], 1],
joints[[0, 5, 6, 7, 8, ], 2],
ax.plot(joints[[0, 9, 10, 11, 12, ], 0], joints[[0, 9, 10, 11, 12], 1],
joints[[0, 9, 10, 11, 12], 2],
ax.plot(joints[[0, 13, 14, 15, 16], 0], joints[[0, 13, 14, 15, 16], 1],
joints[[0, 13, 14, 15, 16], 2],
ax.plot(joints[[0, 17, 18, 19, 20], 0], joints[[0, 17, 18, 19, 20], 1],
joints[[0, 17, 18, 19, 20], 2],
# snap convention
# print(joints)
# ax.plot(joints[4][0], joints[4][1], joints[4][2], 'rD', label='thumb')
# ax.plot(joints[8][0], joints[8][1], joints[8][2], 'ro', label='index')
# ax.plot(joints[12][0], joints[12][1], joints[12][2], 'ro', label='middle')
# ax.plot(joints[16][0], joints[16][1], joints[16][2], 'ro', label='ring')
# ax.plot(joints[20][0], joints[20][1], joints[20][2], 'ro', label='pinky')
# plt.plot(joints [1:, 0], joints [1:, 1], joints [1:, 2], 'o')
# plt.legend()
# ax.view_init(330, 110)
ax.view_init(-190, -190)
return ax
def multi_plot3d(jointss_, title=None):
jointss = jointss_.copy()
fig = plt.figure(figsize=[50, 50])
ax = fig.add_subplot(111, projection='3d')
colors = ['b', 'r', "g"]
for i in range(len(jointss)):
joints = jointss[i]
plt.plot(joints[:, 0], joints[:, 1], joints[:, 2], 'yo')
plt.plot(joints[:5, 0], joints[:5, 1],
joints[:5, 2],
plt.plot(joints[[0, 5, 6, 7, 8, ], 0], joints[[0, 5, 6, 7, 8, ], 1],
joints[[0, 5, 6, 7, 8, ], 2],
plt.plot(joints[[0, 9, 10, 11, 12, ], 0], joints[[0, 9, 10, 11, 12], 1],
joints[[0, 9, 10, 11, 12], 2],
plt.plot(joints[[0, 13, 14, 15, 16], 0], joints[[0, 13, 14, 15, 16], 1],
joints[[0, 13, 14, 15, 16], 2],
plt.plot(joints[[0, 17, 18, 19, 20], 0], joints[[0, 17, 18, 19, 20], 1],
joints[[0, 17, 18, 19, 20], 2],
# plt.plot(joints[:1, 0], joints[:1, 1],
# joints[:1, 2],
# colors[i],
# )
# plt.plot(joints[[0, 5, ], 0], joints[[0, 5, ], 1],
# joints[[0, 5, ], 2],
# colors[i],
# )
# plt.plot(joints[[0, 9, ], 0], joints[[0, 9, ], 1],
# joints[[0, 9,], 2],
# colors[i],
# )
# plt.plot(joints[[0, 13, ], 0], joints[[0, 13, ], 1],
# joints[[0, 13, ], 2],
# colors[i],
# )
# plt.plot(joints[[0, 17, ], 0], joints[[0, 17, ], 1],
# joints[[0, 17, ], 2],
# colors[i],
# )
# snap convention
plt.plot(joints[4][0], joints[4][1], joints[4][2], 'rD')
plt.plot(joints[8][0], joints[8][1], joints[8][2], 'ro', )
plt.plot(joints[12][0], joints[12][1], joints[12][2], 'ro', )
plt.plot(joints[16][0], joints[16][1], joints[16][2], 'ro', )
plt.plot(joints[20][0], joints[20][1], joints[20][2], 'ro', )
# plt.plot(joints [1:, 0], joints [1:, 1], joints [1:, 2], 'o')
# ax.view_init(330, 110)
ax.view_init(-90, -90)
if title:
title_ = ""
for i in range(len(title)):
title_ += "{}: {} ".format(colors[i], title[i])
ax.set_title(title_, fontsize=12, color='black')
ax.set_title("None", fontsize=12, color='black')
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册