create first dpcas demo

bc20c001 · Eric.Lee2021 · 6415fc2f · bc20c001 · bc20c001 · bc20c001
29 changed file
--- a/README.md
+++ b/README.md
+# DpCas-Light  
+### dpcas(Deep Learning Componentized Application System)：深度学习组件化应用系统，为了更好更快的将已有的模型进行快速集成，实现应用。
+
+### 第一个完整pipelien 的 Demo，本地手势交互应用，之后会推出web架构的手势交互。
+
+## 项目介绍
+### 项目1：手势交互项目（local 本地版本）
+*  采用python多进程实现，100% python代码。
+* 1、实现单手点击，即大拇指和食指捏合时认为点击。
+* 2、实现双手配合点击选中目标区域。
+* 3、基于第2点的功能，支持识别架构的拓展（目前没有加任何物体识别模型，后面会加上）。
+* 4、实现基于IOU的手部跟踪。
+* 5、支持语音拓展功能。
+
+
+## 项目配置  
+### 1、软件  
+* Python 3.7  
+* PyTorch >= 1.5.1  
+* opencv-python
+* playsound
+### 2、硬件
+* 普通USB彩色（RGB）网络摄像头
+
+## 相关项目
+### 1、手部检测项目（yolo_v3）
+* 项目地址：https://codechina.csdn.net/EricLee/yolo_v3
+* [预训练模型下载地址(百度网盘 Password: 7mk0 )](https://pan.baidu.com/s/1hqzvz0MeFX0EdpWXUV6aFg)
+
+### 2、手21关键点回归项目(handpose_x)
+* https://codechina.csdn.net/EricLee/handpose_x
+* * [预训练模型下载地址(百度网盘 Password: 99f3 )](https://pan.baidu.com/s/1Ur6Ikp31XGEuA3hQjYzwIw)
+
+## 项目使用方法  
+### 项目1：手势交互项目（local 本地版本）
+### 1、下载手部检测模型和21关键点回归模型。
+### 2、确定摄像头连接成功。
+### 3、打开配置文件 lib/hand_lib/cfg/handpose.cfg 进行相关参数配置，具体配置参数如下，请仔细阅读（一般只需要配置模型路径及模型结构）
+```
+detect_model_path=./components/hand_detect/weights/latest_416.pt #手部检测模型地址
+detect_model_arch=yolo_v3 #检测模型类型 ，yolo  or yolo-tiny
+detect_conf_thres=0.5 # 检测模型阈值
+detect_nms_thres=0.45 # 检测模型 nms 阈值
+
+handpose_x_model_path=./components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth # 21点手回归模型地址
+handpose_x_model_arch=rexnetv1 # 回归模型结构
+
+camera_id = 0 # 相机 ID ，一般默认为0，如果不是请自行确认
+vis_gesture_lines = True # True: 点击时的轨迹可视化， Flase：点击时的轨迹不可视化
+charge_cycle_step = 32 # 点击稳定状态计数器，点击稳定充电环。
+```
+### 4、根目录下运行命令： python main.py
+
+## 联系方式 （Contact）  
+* E-mails: 305141918@qq.com   
--- a/applications/handpose_local_app.py
+++ b/applications/handpose_local_app.py
+#-*-coding:utf-8-*-
+'''
+DpCas-Light
+||||      |||||        ||||         ||       |||||||
+||  ||    ||   ||    ||    ||      ||||     ||     ||
+||    ||  ||    ||  ||      ||    ||  ||     ||
+||    ||  ||   ||   ||           ||====||     ||||||
+||    ||  |||||     ||      ||  ||======||         ||
+||  ||    ||         ||    ||  ||        ||  ||     ||
+||||      ||           ||||   ||          ||  |||||||
+
+/--------------------- HandPose_X ---------------------/
+'''
+# date:2021-03-12
+# Author: Eric.Lee
+# function: handpose demo
+
+import os
+import cv2
+import time
+
+from multiprocessing import Process
+from multiprocessing import Manager
+
+import cv2
+import numpy as np
+import random
+import time
+
+# 加载模型组件库
+from hand_detect.yolo_v3_hand import yolo_v3_hand_model
+from hand_keypoints.handpose_x import handpose_x_model
+
+# 加载工具库
+import sys
+sys.path.append("./lib/hand_lib/")
+from cores.handpose_fuction import handpose_track_keypoints21_pipeline
+from cores.handpose_fuction import hand_tracking,audio_recognize,judge_click_stabel,draw_click_lines
+from utils.utils import parse_data_cfg
+from playsound import playsound
+
+def audio_process_dw_edge_cnt(info_dict):
+
+    while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
+        time.sleep(2)
+
+    gesture_names = ["click"]
+    gesture_dict = {}
+
+    for k_ in gesture_names:
+        gesture_dict[k_] = None
+    # time.sleep(1)
+    # playsound("./materials/audio/sentences/WelcomeAR.mp3")
+    # time.sleep(0.01)
+    # playsound("./materials/audio/sentences/MorningEric.mp3")
+    # time.sleep(1)
+    reg_cnt = 0
+    while True:
+        time.sleep(0.01)
+        try:
+            reg_cnt = info_dict["click_dw_cnt"]
+            for i in range(reg_cnt):
+                # playsound("./materials/audio/cue/winwin-1.mp3")
+                playsound("./materials/audio/sentences/welldone.mp3")
+            info_dict["click_dw_cnt"] = info_dict["click_dw_cnt"] - reg_cnt
+        except Exception as inst:
+            print(type(inst),inst)    # exception instance
+
+
+        if info_dict["break"] == True:
+            break
+
+def audio_process_up_edge_cnt(info_dict):
+
+    while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
+        time.sleep(2)
+
+    gesture_names = ["click"]
+    gesture_dict = {}
+
+    for k_ in gesture_names:
+        gesture_dict[k_] = None
+
+    reg_cnt = 0
+    while True:
+        time.sleep(0.01)
+        # print(" --->>> audio_process")
+        try:
+            reg_cnt = info_dict["click_up_cnt"]
+            for i in range(reg_cnt):
+                # playsound("./materials/audio/cue/m2-0.mp3")
+                playsound("./materials/audio/sentences/Click.mp3")
+            info_dict["click_up_cnt"] = info_dict["click_up_cnt"] - reg_cnt
+        except Exception as inst:
+            print(type(inst),inst)    # the exception instance
+
+
+        if info_dict["break"] == True:
+            break
+
+def audio_process_dw_edge(info_dict):
+
+    while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
+        time.sleep(2)
+
+    gesture_names = ["click"]
+    gesture_dict = {}
+
+    for k_ in gesture_names:
+        gesture_dict[k_] = None
+    while True:
+        time.sleep(0.01)
+        # print(" --->>> audio_process")
+        try:
+            for g_ in gesture_names:
+                if gesture_dict[g_] is None:
+                    gesture_dict[g_] = info_dict[g_]
+                else:
+
+                    if ("click"==g_):
+                        if (info_dict[g_]^gesture_dict[g_]) and info_dict[g_]==False:# 判断Click手势信号为下降沿，Click动作结束
+                            playsound("./materials/audio/cue/winwin.mp3")
+                            # playsound("./materials/audio/sentences/welldone.mp3")
+
+                    gesture_dict[g_] = info_dict[g_]
+
+        except Exception as inst:
+            print(type(inst),inst)    # the exception instance
+
+
+        if info_dict["break"] == True:
+            break
+
+def audio_process_up_edge(info_dict):
+
+    while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
+        time.sleep(2)
+
+    gesture_names = ["click"]
+    gesture_dict = {}
+
+    for k_ in gesture_names:
+        gesture_dict[k_] = None
+    while True:
+        time.sleep(0.01)
+        # print(" --->>> audio_process")
+        try:
+            for g_ in gesture_names:
+                if gesture_dict[g_] is None:
+                    gesture_dict[g_] = info_dict[g_]
+                else:
+
+                    if ("click"==g_):
+                        if (info_dict[g_]^gesture_dict[g_]) and info_dict[g_]==True:# 判断Click手势信号为上升沿，Click动作开始
+                            playsound("./materials/audio/cue/m2.mp3")
+                            # playsound("./materials/audio/sentences/clik_quick.mp3")
+
+                    gesture_dict[g_] = info_dict[g_]
+
+        except Exception as inst:
+            print(type(inst),inst)    # the exception instance
+
+
+        if info_dict["break"] == True:
+            break
+'''
+    启动识别语音进程
+'''
+def audio_process_recognize_up_edge(info_dict):
+
+    while (info_dict["handpose_procss_ready"] == False): # 等待 模型加载
+        time.sleep(2)
+
+    gesture_names = ["double_en_pts"]
+    gesture_dict = {}
+
+    for k_ in gesture_names:
+        gesture_dict[k_] = None
+
+    while True:
+        time.sleep(0.01)
+        # print(" --->>> audio_process")
+        try:
+            for g_ in gesture_names:
+                if gesture_dict[g_] is None:
+                    gesture_dict[g_] = info_dict[g_]
+                else:
+
+                    if ("double_en_pts"==g_):
+                        if (info_dict[g_]^gesture_dict[g_]) and info_dict[g_]==True:# 判断Click手势信号为上升沿，Click动作开始
+                            playsound("./materials/audio/sentences/IdentifyingObjectsWait.mp3")
+                            playsound("./materials/audio/sentences/ObjectMayBeIdentified.mp3")
+
+                    gesture_dict[g_] = info_dict[g_]
+
+        except Exception as inst:
+            print(type(inst),inst)    # exception instance
+
+
+        if info_dict["break"] == True:
+            break
+'''
+/*****************************************/
+                算法 pipeline
+/*****************************************/
+'''
+def handpose_x_process(info_dict,config):
+    # 模型初始化
+    print("load model component  ...")
+    # yolo v3 手部检测模型初始化
+    hand_detect_model = yolo_v3_hand_model(conf_thres=float(config["detect_conf_thres"]),nms_thres=float(config["detect_nms_thres"]),
+        model_arch = config["detect_model_arch"],model_path = config["detect_model_path"])
+    # handpose_x 21 关键点回归模型初始化
+    handpose_model = handpose_x_model(model_arch = config["handpose_x_model_arch"],model_path = config["handpose_x_model_path"])
+    #
+    gesture_model = None # 目前缺省
+    #
+    object_recognize_model = None # 识别分类模型，目前缺省
+
+    #
+    img_reco_crop = None
+
+    cap = cv2.VideoCapture(int(config["camera_id"])) # 开启摄像机
+
+    cap.set(cv2.CAP_PROP_EXPOSURE, -8) # 设置相机曝光，（注意：不是所有相机有效）
+
+    # url="http://admin:admin@192.168.43.1:8081"
+    # cap=cv2.VideoCapture(url)
+    print("start handpose process ~")
+
+    info_dict["handpose_procss_ready"] = True #多进程间的开始同步信号
+
+    gesture_lines_dict = {} # 点击使能时的轨迹点
+
+    hands_dict = {} # 手的信息
+    hands_click_dict = {} #手的按键信息计数
+    track_index = 0 # 跟踪的全局索引
+
+    while True:
+        ret, img = cap.read()# 读取相机图像
+        if ret:# 读取相机图像成功
+            # img = cv2.flip(img,-1)
+            algo_img = img.copy()
+            st_ = time.time()
+            #------
+            hand_bbox =hand_detect_model.predict(img,vis = True) # 检测手，获取手的边界框
+
+            hands_dict,track_index = hand_tracking(data = hand_bbox,hands_dict = hands_dict,track_index = track_index) # 手跟踪，目前通过IOU方式进行目标跟踪
+            # 检测每个手的关键点及相关信息
+            handpose_list = handpose_track_keypoints21_pipeline(img,hands_dict = hands_dict,hands_click_dict = hands_click_dict,track_index = track_index,algo_img = algo_img,
+                handpose_model = handpose_model,gesture_model = gesture_model,
+                icon = None,vis = True)
+            et_ = time.time()
+            fps_ = 1./(et_-st_+1e-8)
+            #------------------------------------------ 跟踪手的 信息维护
+            #------------------ 获取跟踪到的手ID
+            id_list = []
+            for i in range(len(handpose_list)):
+                _,_,_,dict_ = handpose_list[i]
+                id_list.append(dict_["id"])
+            # print(id_list)
+            #----------------- 获取需要删除的手ID
+            id_del_list = []
+            for k_ in gesture_lines_dict.keys():
+                if k_ not in id_list:#去除过往已经跟踪失败的目标手的相关轨迹
+                    id_del_list.append(k_)
+            #----------------- 删除无法跟踪到的手的相关信息
+            for k_ in id_del_list:
+                del gesture_lines_dict[k_]
+                del hands_click_dict[k_]
+
+            #----------------- 更新检测到手的轨迹信息,及手点击使能时的上升沿和下降沿信号
+            double_en_pts = []
+            for i in range(len(handpose_list)):
+                _,_,_,dict_ = handpose_list[i]
+                id_ = dict_["id"]
+                if dict_["click"]:
+                    if  id_ not in gesture_lines_dict.keys():
+                        gesture_lines_dict[id_] = {}
+                        gesture_lines_dict[id_]["pts"]=[]
+                        gesture_lines_dict[id_]["line_color"] = (random.randint(100,255),random.randint(100,255),random.randint(100,255))
+                        gesture_lines_dict[id_]["click"] = None
+                    #判断是否上升沿
+                    if gesture_lines_dict[id_]["click"] is not None:
+                        if gesture_lines_dict[id_]["click"] == False:#上升沿计数器
+                            info_dict["click_up_cnt"] += 1
+                    #获得点击状态
+                    gesture_lines_dict[id_]["click"] = True
+                    #---获得坐标
+                    gesture_lines_dict[id_]["pts"].append(dict_["choose_pt"])
+                    double_en_pts.append(dict_["choose_pt"])
+                else:
+                    if  id_ not in gesture_lines_dict.keys():
+                        gesture_lines_dict[id_] = {}
+                        gesture_lines_dict[id_]["pts"]=[]
+                        gesture_lines_dict[id_]["line_color"] = (random.randint(100,255),random.randint(100,255),random.randint(100,255))
+                        gesture_lines_dict[id_]["click"] = None
+                    elif  id_ in gesture_lines_dict.keys():
+
+                        gesture_lines_dict[id_]["pts"]=[]# 清除轨迹
+                        #判断是否上升沿
+                        if gesture_lines_dict[id_]["click"] == True:#下降沿计数器
+                            info_dict["click_dw_cnt"] += 1
+                        # 更新点击状态
+                        gesture_lines_dict[id_]["click"] = False
+
+            #绘制手click 状态时的大拇指和食指中心坐标点轨迹
+            draw_click_lines(img,gesture_lines_dict,vis = bool(config["vis_gesture_lines"]))
+            # 判断各手的click状态是否稳定，且满足设定阈值
+            flag_click_stable = judge_click_stabel(img,handpose_list,int(config["charge_cycle_step"]))
+            # 判断是否启动识别语音,且进行选中目标识别
+            img_reco_crop = audio_recognize(img,algo_img,img_reco_crop,object_recognize_model,info_dict,double_en_pts,flag_click_stable)
+
+            cv2.putText(img, 'HandNum:[{}]'.format(len(hand_bbox)), (5,25),cv2.FONT_HERSHEY_COMPLEX, 0.7, (255, 0, 0),5)
+            cv2.putText(img, 'HandNum:[{}]'.format(len(hand_bbox)), (5,25),cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255))
+
+            cv2.namedWindow("image",0)
+            cv2.imshow("image",img)
+            if cv2.waitKey(1) == 27:
+                info_dict["break"] = True
+                break
+        else:
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()
+
+def main_handpose_x(cfg_file):
+    config = parse_data_cfg(cfg_file)
+
+    print("\n/---------------------- main_handpose_x config ------------------------/\n")
+    for k_ in config.keys():
+        print("{} : {}".format(k_,config[k_]))
+    print("\n/------------------------------------------------------------------------/\n")
+
+    print(" loading handpose_x local demo ...")
+    g_info_dict = Manager().dict()# 多进程共享字典初始化：用于多进程间的 key：value 操作
+    g_info_dict["handpose_procss_ready"] = False # 进程间的开启同步信号
+    g_info_dict["break"] = False # 进程间的退出同步信号
+    g_info_dict["double_en_pts"] = False # 双手选中动作使能信号
+
+    g_info_dict["click_up_cnt"] = 0
+    g_info_dict["click_dw_cnt"] = 0
+
+    print(" multiprocessing dict key:\n")
+    for key_ in g_info_dict.keys():
+        print( " -> ",key_)
+    print()
+
+    #-------------------------------------------------- 初始化各进程
+    process_list = []
+    t = Process(target=handpose_x_process,args=(g_info_dict,config,))
+    process_list.append(t)
+
+    t = Process(target=audio_process_recognize_up_edge,args=(g_info_dict,)) # 上升沿播放
+    process_list.append(t)
+
+    # t = Process(target=audio_process_dw_edge_cnt,args=(g_info_dict,)) # 下降沿播放
+    # process_list.append(t)
+    # t = Process(target=audio_process_up_edge_cnt,args=(g_info_dict,)) # 上升沿播放
+    # process_list.append(t)
+
+
+
+    for i in range(len(process_list)):
+        process_list[i].start()
+
+    for i in range(len(process_list)):
+        process_list[i].join()# 设置主线程等待子线程结束
+
+    del process_list
--- a/components/hand_detect/acc_model.py
+++ b/components/hand_detect/acc_model.py
+import torch
+import torch.nn as nn
+import torchvision
+import time
+import numpy as np
+import sys
+
+def get_model_op(model_,print_flag = False):
+    # print('/********************* modules *******************/')
+    op_dict = {}
+    idx = 0
+    for m in model_.modules():
+        idx += 1
+        if isinstance(m, nn.Conv2d):
+            if 'Conv2d' not in op_dict.keys():
+                op_dict['Conv2d'] = 1
+            else:
+                op_dict['Conv2d'] += 1
+            if print_flag:
+                print('{})  {}'.format(idx,m))
+            pass
+        elif isinstance(m, nn.BatchNorm2d):
+            if 'BatchNorm2d' not in op_dict.keys():
+                op_dict['BatchNorm2d'] = 1
+            else:
+                op_dict['BatchNorm2d'] += 1
+            if print_flag:
+                print('{})  {}'.format(idx,m))
+            pass
+        elif isinstance(m, nn.Linear):
+            if 'Linear' not in op_dict.keys():
+                op_dict['Linear'] = 1
+            else:
+                op_dict['Linear'] += 1
+            if print_flag:
+                print('{})  {}'.format(idx,m))
+            pass
+        elif isinstance(m, nn.Sequential):
+            if print_flag:
+                print('*******************{})  {}'.format(idx,m))
+            for n in m:
+                if print_flag:
+                    print('{})  {}'.format(idx,n))
+                if 'Conv2d' not in op_dict.keys():
+                    op_dict['Conv2d'] = 1
+                else:
+                    op_dict['Conv2d'] += 1
+                if 'BatchNorm2d' not in op_dict.keys():
+                    op_dict['BatchNorm2d'] = 1
+                else:
+                    op_dict['BatchNorm2d'] += 1
+                if 'Linear' not in op_dict.keys():
+                    op_dict['Linear'] = 1
+                else:
+                    op_dict['Linear'] += 1
+                if 'ReLU6' not in op_dict.keys():
+                    op_dict['ReLU6'] = 1
+                else:
+                    op_dict['ReLU6'] += 1
+            pass
+        elif isinstance(m, nn.ReLU6):
+            if print_flag:
+                print('{})  {}'.format(idx,m))
+            if 'ReLU6' not in op_dict.keys():
+                op_dict['ReLU6'] = 1
+            else:
+                op_dict['ReLU6'] += 1
+            pass
+        elif isinstance(m, nn.Module):
+            if print_flag:
+                print('{})  {}'.format(idx,m))
+            for n in m.modules():
+                if isinstance(n, nn.Conv2d):
+                    if print_flag:
+                        print('{})  {}'.format(idx,n))
+                    if 'Conv2d' not in op_dict.keys():
+                        op_dict['Conv2d'] = 1
+                    else:
+                        op_dict['Conv2d'] += 1
+                    if 'BatchNorm2d' not in op_dict.keys():
+                        op_dict['BatchNorm2d'] = 1
+                    else:
+                        op_dict['BatchNorm2d'] += 1
+                    if 'Linear' not in op_dict.keys():
+                        op_dict['Linear'] = 1
+                    else:
+                        op_dict['Linear'] += 1
+                    if 'ReLU6' not in op_dict.keys():
+                        op_dict['ReLU6'] = 1
+                    else:
+                        op_dict['ReLU6'] += 1
+                    pass
+            pass
+
+        else:
+            if print_flag:
+                print('{})  {}'.format(idx,m))
+            pass
+
+    # print('\n/********************** {} ********************/\n'.format(ops.network))
+    for key in op_dict.keys():
+        if print_flag:
+            print(' operation - {} : {}'.format(key,op_dict[key]))
+
+class DummyModule(nn.Module):
+    def __init__(self):
+        super(DummyModule, self).__init__()
+
+    def forward(self, x):
+        return x
+
+def fuse(conv, bn):
+    # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+    with torch.no_grad():
+        # init
+        if isinstance(conv, nn.Conv2d):
+            fusedconv = torch.nn.Conv2d(conv.in_channels,
+                                        conv.out_channels,
+                                        kernel_size=conv.kernel_size,
+                                        stride=conv.stride,
+                                        padding=conv.padding,
+                                        bias=True)
+        elif isinstance(conv, nn.ConvTranspose2d):  # not supprot nn.ConvTranspose2d
+            fusedconv = nn.ConvTranspose2d(
+                conv.in_channels,
+                conv.out_channels,
+                kernel_size=conv.kernel_size,
+                stride=conv.stride,
+                padding=conv.padding,
+                output_padding=conv.output_padding,
+                bias=True)
+        else:
+            print("error")
+            exit()
+
+        # prepare filters
+        w_conv = conv.weight.clone().view(conv.out_channels, -1)
+        w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+        fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
+
+        # prepare spatial bias
+        if conv.bias is not None:
+            b_conv = conv.bias
+            #b_conv = conv.bias.mul(bn.weight.div(torch.sqrt(bn.running_var + bn.eps)))  #  maybe, you should this one ?
+        else:
+            b_conv = torch.zeros(conv.weight.size(0))
+        b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+        fusedconv.bias.copy_(b_conv + b_bn)
+
+        return fusedconv
+
+# idxx = 0
+def fuse_module(m):
+    # global idxx
+    children = list(m.named_children())
+    c = None
+    cn = None
+
+    for name, child in children:
+        # idxx += 1
+        # print('-------------->>',idxx)
+        # if idxx%10==0:
+        #     continue
+        # print("name {}, child {}".format(name, child))
+        if isinstance(child, nn.BatchNorm2d) and c is not None:
+            bc = fuse(c, child)
+            m._modules[cn] = bc
+            # print('DummyModule()  : ',DummyModule())
+            m._modules[name] = DummyModule()
+            c = None
+        elif isinstance(child, nn.Conv2d):
+            c = child
+            cn = name
+        else:
+            fuse_module(child)
+
+def test_net(ops,m):
+
+    use_cuda = torch.cuda.is_available()
+    use_cpu = False
+    if ops.force_cpu or use_cuda == False:
+        p = torch.randn([1, 3, 256, 256])
+        device = torch.device("cpu")
+        use_cpu = True
+    else:
+        p = torch.randn([1, 3, 256, 256]).cuda()
+        device = torch.device("cuda:0")
+
+    count = 50
+    time_org = []
+    m_o = m.to(device)
+    get_model_op(m_o)
+    # print(m)
+    for i in range(count):
+        s1 = time.time()
+        if use_cpu:
+            o_output = m_o(p)
+        else:
+            o_output = m_o(p).cpu()
+        s2 = time.time()
+        time_org.append(s2 - s1)
+        print("Original time: ", s2 - s1)
+    print('------------------------------------>>>>')
+
+    fuse_module(m.to(torch.device("cpu")))
+
+    # print(m)
+
+    m_f = m.to(device)
+    get_model_op(m_f)
+
+    time_fuse = []
+    for i in range(count):
+        s1 = time.time()
+        if use_cpu:
+            f_output = m_f(p)
+        else:
+            f_output = m_f(p).cpu()
+        s2 = time.time()
+        time_fuse.append(s2 - s1)
+        print("Fused time: ", s2 - s1)
+
+    print("-" * 50)
+    print("org time:", np.mean(time_org))
+    print("fuse time:", np.mean(time_fuse))
+    for o in o_output:
+        print("org size:", o.size())
+    for o in f_output:
+        print("fuse size:", o.size())
+    for i in range(len(o_output)):
+        assert o_output[i].size()==f_output[i].size()
+        print("output[{}] max abs diff: {}".format(i, (o_output[i] - f_output[i]).abs().max().item()))
+        print("output[{}] MSE diff: {}".format(i, nn.MSELoss()(o_output[i], f_output[i]).item()))
+
+
+def acc_model(ops,m):
+    # print('\n-------------------------------->>> before acc model')
+    get_model_op(m)
+    fuse_module(m)
+    # print('\n-------------------------------->>> after acc model')
+    get_model_op(m)
+
+    return m
--- a/components/hand_detect/utils/__init__.py
+++ b/components/hand_detect/utils/__init__.py
--- a/components/hand_detect/utils/common_utils.py
+++ b/components/hand_detect/utils/common_utils.py
--- a/components/hand_detect/utils/datasets.py
+++ b/components/hand_detect/utils/datasets.py
+import glob
+import math
+import os
+import random
+import shutil
+from pathlib import Path
+from PIL import Image
+from tqdm import tqdm
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data import DataLoader
+
+def xyxy2xywh(x):
+    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2
+    y[:, 2] = x[:, 2] - x[:, 0]
+    y[:, 3] = x[:, 3] - x[:, 1]
+    return y
+
+
+def xywh2xyxy(x):
+    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2
+    y[:, 1] = x[:, 1] - x[:, 3] / 2
+    y[:, 2] = x[:, 0] + x[:, 2] / 2
+    y[:, 3] = x[:, 1] + x[:, 3] / 2
+    return y
+
+
+class LoadImages:  # for inference
+    def __init__(self, path, img_size=416):
+        self.height = img_size
+        img_formats = ['.jpg', '.jpeg', '.png', '.tif']
+        vid_formats = ['.mov', '.avi', '.mp4']
+
+        files = []
+        if os.path.isdir(path):
+            files = sorted(glob.glob('%s/*.*' % path))
+        elif os.path.isfile(path):
+            files = [path]
+
+        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
+        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
+        nI, nV = len(images), len(videos)
+
+        self.files = images + videos
+        self.nF = nI + nV  # number of files
+        self.video_flag = [False] * nI + [True] * nV
+        self.mode = 'images'
+        if any(videos):
+            self.new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        assert self.nF > 0, 'No images or videos found in ' + path
+
+    def __iter__(self):
+        self.count = 0
+        return self
+
+    def __next__(self):
+        if self.count == self.nF:
+            raise StopIteration
+        path = self.files[self.count]
+
+        if self.video_flag[self.count]:
+            # Read video
+            self.mode = 'video'
+            ret_val, img0 = self.cap.read()
+            if not ret_val:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nF:  # last video
+                    raise StopIteration
+                else:
+                    path = self.files[self.count]
+                    self.new_video(path)
+                    ret_val, img0 = self.cap.read()
+
+            self.frame += 1
+            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
+
+        else:
+            # Read image
+            self.count += 1
+            img0 = cv2.imread(path)  # BGR
+            assert img0 is not None, 'File Not Found ' + path
+            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
+
+        # Padded resize
+        img, _, _, _ = letterbox(img0, height=self.height)
+
+        # Normalize RGB
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
+        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+
+        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
+        return path, img, img0, self.cap
+
+    def new_video(self, path):
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+    def __len__(self):
+        return self.nF  # number of files
+
+
+class LoadWebcam:  # for inference
+    def __init__(self, img_size=416):
+        self.cam = cv2.VideoCapture(0)
+        self.height = img_size
+
+    def __iter__(self):
+        self.count = -1
+        return self
+
+    def __next__(self):
+        self.count += 1
+        if cv2.waitKey(1) == 27:  # esc to quit
+            cv2.destroyAllWindows()
+            raise StopIteration
+
+        # Read image
+        ret_val, img0 = self.cam.read()
+        assert ret_val, 'Webcam Error'
+        img_path = 'webcam_%g.jpg' % self.count
+        img0 = cv2.flip(img0, 1)  # flip left-right
+
+        # Padded resize
+        img, _, _, _ = letterbox(img0, height=self.height)
+
+        # Normalize RGB
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
+        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+
+        return img_path, img, img0, self.cam
+
+    def __len__(self):
+        return 0
+
+
+class LoadImagesAndLabels(Dataset):  # for training/testing
+    def __init__(self, path, batch_size, img_size=416, augment=True, multi_scale=False):
+        print('LoadImagesAndLabels init : ',path)
+        with open(path, 'r') as file:
+            img_files = file.read().splitlines()
+            img_files = list(filter(lambda x: len(x) > 0, img_files))
+        np.random.shuffle(img_files)  # shuffle img_list
+        print("shuffle image...")
+        self.img_files = img_files
+        assert len(self.img_files) > 0, 'No images found in %s' % path
+        self.img_size = img_size
+        self.batch_size = batch_size
+        self.multi_scale = multi_scale
+        self.augment = augment
+        self.scale_index = 0
+        if self.multi_scale:
+            self.img_size = img_size  # initiate with maximum multi_scale size, in case of out of memory
+            print("Multi scale images training, init img_size", self.img_size)
+        else:
+            print("Fixed scale images, img_size", self.img_size)
+        self.label_files = [
+            x.replace('images', 'labels').replace("JPEGImages", 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
+            for x in self.img_files]
+
+        # print('self.img_files   : ',self.img_files[1])
+        # print('self.label_files : ',self.label_files[1])
+
+    def __len__(self):
+        return len(self.img_files)
+
+    def __getitem__(self, index):
+
+        # if self.multi_scale and (index % self.batch_size == 0) and index != 0:
+        if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0:
+            self.img_size = random.choice(range(11, 18)) * 32
+            # print("++++++ change img_size, index:", self.img_size, index)
+        if self.multi_scale:
+            self.scale_index += 1
+            if self.scale_index >= (100*self.batch_size):
+                self.scale_index = 0
+
+
+        img_path = self.img_files[index]
+        label_path = self.label_files[index]
+
+        img = cv2.imread(img_path)  # BGR
+        assert img is not None, 'File Not Found ' + img_path
+
+        augment_hsv = random.random() < 0.5  # hsv_aug prob = 0.5
+        if self.augment and augment_hsv:
+            # SV augmentation by 50%
+            fraction = 0.50  # must be < 1.0
+            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+            S = img_hsv[:, :, 1].astype(np.float32)
+            V = img_hsv[:, :, 2].astype(np.float32)
+
+            a = (random.random() * 2 - 1) * fraction + 1  # a in [-0,5, 1.5]
+            S *= a
+            if a > 1:
+                np.clip(S, None, 255, out=S)
+
+            a = (random.random() * 2 - 1) * fraction + 1
+            V *= a
+            if a > 1:
+                np.clip(V, None, 255, out=V)
+
+            img_hsv[:, :, 1] = S  # .astype(np.uint8)
+            img_hsv[:, :, 2] = V  # .astype(np.uint8)
+            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
+
+        h, w, _ = img.shape
+        img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment)
+
+        # Load labels
+        labels = []
+        if os.path.isfile(label_path):
+            with open(label_path, 'r') as file:
+                lines = file.read().splitlines()
+
+            x = np.array([x.split() for x in lines], dtype=np.float32)
+            if x.size > 0:
+                # Normalized xywh to pixel xyxy format
+                labels = x.copy()
+                labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
+                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
+                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
+                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
+
+        # Augment image and labels
+        if self.augment:
+            img, labels = random_affine(img, labels, degrees=(-10, 10), translate=(0.10, 0.10), scale=(0.9, 1.1))
+
+        nL = len(labels)  # number of labels
+        if nL:
+            # convert xyxy to xywh
+            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ，且 归一化
+
+        if self.augment:
+            # random left-right flip
+            lr_flip = True
+            if lr_flip and random.random() > 0.5:
+                img = np.fliplr(img)
+                if nL:
+                    labels[:, 1] = 1 - labels[:, 1]
+
+            # random up-down flip
+            ud_flip = False
+            if ud_flip and random.random() > 0.5:
+                img = np.flipud(img)
+                if nL:
+                    labels[:, 2] = 1 - labels[:, 2]
+
+        labels_out = torch.zeros((nL, 6))# 加了 一个 batch size
+        if nL:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Normalize
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+
+        return torch.from_numpy(img), labels_out, img_path, (h, w)
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, hw = list(zip(*batch))  # transposed
+        for i, l in enumerate(label):
+            l[:, 0] = i  # 获取 物体的 归属于 图片 的 index
+        return torch.stack(img, 0), torch.cat(label, 0), path, hw
+
+
+def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
+    # Resize a rectangular image to a padded square
+    shape = img.shape[:2]  # shape = [height, width]
+    ratio = float(height) / max(shape)  # ratio  = old / new
+    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
+    dw = (height - new_shape[0]) / 2  # width padding
+    dh = (height - new_shape[1]) / 2  # height padding
+    top, bottom = round(dh - 0.1), round(dh + 0.1)
+    left, right = round(dw - 0.1), round(dw + 0.1)
+    # resize img
+    if augment:
+        interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+                                          None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+                                          cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
+        if interpolation is None:
+            img = cv2.resize(img, new_shape)
+        else:
+            img = cv2.resize(img, new_shape, interpolation=interpolation)
+    else:
+        img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
+    # print("resize time:",time.time()-s1)
+
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded square
+    return img, ratio, dw, dh
+
+
+def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
+                  borderValue=(127.5, 127.5, 127.5)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
+
+    if targets is None:
+        targets = []
+    border = 0  # width of added border (optional)
+    height = max(img.shape[0], img.shape[1]) + border * 2
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
+    # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
+    s = random.random() * (scale[1] - scale[0]) + scale[0]
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
+    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)
+
+    M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
+    imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
+                              borderValue=borderValue)  # BGR order borderValue
+
+    # Return warped points also
+    if len(targets) > 0:
+        n = targets.shape[0]
+        points = targets[:, 1:5].copy()
+        area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
+
+        # warp points
+        xy = np.ones((n * 4, 3))
+        xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+        xy = (xy @ M.T)[:, :2].reshape(n, 8)
+
+        # create new boxes
+        x = xy[:, [0, 2, 4, 6]]
+        y = xy[:, [1, 3, 5, 7]]
+        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+        # apply angle-based reduction of bounding boxes
+        radians = a * math.pi / 180
+        reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
+        x = (xy[:, 2] + xy[:, 0]) / 2
+        y = (xy[:, 3] + xy[:, 1]) / 2
+        w = (xy[:, 2] - xy[:, 0]) * reduction
+        h = (xy[:, 3] - xy[:, 1]) * reduction
+        xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+        # reject warped points outside of image
+        np.clip(xy, 0, height, out=xy)
+        w = xy[:, 2] - xy[:, 0]
+        h = xy[:, 3] - xy[:, 1]
+        area = w * h
+        ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
+        i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
+
+        targets = targets[i]
+        targets[:, 1:5] = xy[i]
+
+    return imw, targets
+
+
+def convert_images2bmp():
+    # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
+    for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
+        folder = os.sep + Path(path).name
+        output = path.replace(folder, folder + 'bmp')
+        if os.path.exists(output):
+            shutil.rmtree(output)  # delete output folder
+        os.makedirs(output)  # make new output folder
+
+        for f in tqdm(glob.glob('%s*.jpg' % path)):
+            save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
+            cv2.imwrite(save_name, cv2.imread(f))
+
+    for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
+        with open(label_path, 'r') as file:
+            lines = file.read()
+        lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
+            '/Users/glennjocher/PycharmProjects/', '../')
+        with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
+            file.write(lines)
--- a/components/hand_detect/utils/torch_utils.py
+++ b/components/hand_detect/utils/torch_utils.py
+import torch
+
+def init_seeds(seed=0):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+
+def select_device(force_cpu=False):
+    if force_cpu:
+        cuda = False
+        device = torch.device('cpu')
+    else:
+        cuda = torch.cuda.is_available()
+        device = torch.device('cuda:0' if cuda else 'cpu')
+
+        if torch.cuda.device_count() > 1:
+            device = torch.device('cuda' if cuda else 'cpu')
+            # print('Found %g GPUs' % torch.cuda.device_count())
+            # print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21')
+            # torch.cuda.set_device(0)  # OPTIONAL: Set your GPU if multiple available
+            # print('Using ', torch.cuda.device_count(), ' GPUs')
+
+    # print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
+    return device
--- a/components/hand_detect/utils/utils.py
+++ b/components/hand_detect/utils/utils.py
+import glob
+import random
+import time
+from collections import defaultdict
+
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+from dp_models.light_pose.modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS
+
+# Set printoptions
+torch.set_printoptions(linewidth=1320, precision=5, profile='long')
+np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
+
+# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
+cv2.setNumThreads(0)
+
+def float3(x):  # format floats to 3 decimals
+    return float(format(x, '.3f'))
+
+def init_seeds(seed=0):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    else:
+        torch.manual_seed(seed)
+        torch.manual_seed_all(seed)
+
+
+def load_classes(path):
+    # Loads class labels at 'path'
+    fp = open(path, 'r')
+    names = fp.read().split('\n')
+    return list(filter(None, names))  # filter removes empty strings (such as last line)
+
+
+def model_info(model):
+    # Plots a line-by-line description of a PyTorch model
+    n_p = sum(x.numel() for x in model.parameters())  # number parameters
+    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
+    print('\n%5s %60s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
+    for i, (name, p) in enumerate(model.named_parameters()):
+        # name = name.replace('module_list.', '')
+        print('%5g %60s %9s %12g %20s %10.3g %10.3g' % (
+            i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
+    print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g))
+
+
+
+
+
+def weights_init_normal(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
+    elif classname.find('BatchNorm2d') != -1:
+        torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
+        torch.nn.init.constant_(m.bias.data, 0.0)
+
+
+def xyxy2xywh(x):
+    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2
+    y[:, 2] = x[:, 2] - x[:, 0]
+    y[:, 3] = x[:, 3] - x[:, 1]
+    return y
+
+
+def xywh2xyxy(x):
+    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2
+    y[:, 1] = x[:, 1] - x[:, 3] / 2
+    y[:, 2] = x[:, 0] + x[:, 2] / 2
+    y[:, 3] = x[:, 1] + x[:, 3] / 2
+    return y
+
+def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
+    # Rescale x1, y1, x2, y2 from 416 to image size
+    # print('coords     : ',coords)
+    # print('img0_shape : ',img0_shape)
+    gain = float(img_size) / max(img0_shape)  # gain  = old / new
+    # print('gain       : ',gain)
+    pad_x = (img_size - img0_shape[1] * gain) / 2  # width padding
+    pad_y = (img_size - img0_shape[0] * gain) / 2  # height padding
+    # print('pad_xpad_y : ',pad_x,pad_y)
+    coords[:, [0, 2]] -= pad_x
+    coords[:, [1, 3]] -= pad_y
+    coords[:, :4] /= gain
+    coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
+    return coords
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:    True positives (list).
+        conf:  Objectness value from 0-1 (list).
+        pred_cls: Predicted object classes (list).
+        target_cls: True object classes (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes = np.unique(target_cls)
+
+    # Create Precision-Recall curve and compute AP for each class
+    ap, p, r = [], [], []
+    for c in unique_classes:
+        i = pred_cls == c
+        n_gt = (target_cls == c).sum()  # Number of ground truth objects
+        n_p = i.sum()  # Number of predicted objects
+
+        if n_p == 0 and n_gt == 0:
+            continue
+        elif n_p == 0 or n_gt == 0:
+            ap.append(0)
+            r.append(0)
+            p.append(0)
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum()
+            tpc = (tp[i]).cumsum()
+
+            # Recall
+            recall_curve = tpc / (n_gt + 1e-16)
+            r.append(recall_curve[-1])
+
+            # Precision
+            precision_curve = tpc / (tpc + fpc)
+            p.append(precision_curve[-1])
+
+            # AP from recall-precision curve
+            ap.append(compute_ap(recall_curve, precision_curve))
+
+            # Plot
+            # plt.plot(recall_curve, precision_curve)
+
+    # Compute F1 score (harmonic mean of precision and recall)
+    p, r, ap = np.array(p), np.array(r), np.array(ap)
+    f1 = 2 * p * r / (p + r + 1e-16)
+
+    return p, r, ap, f1, unique_classes.astype('int32')
+
+
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rbgirshick/py-faster-rcnn.
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+
+    mrec = np.concatenate(([0.], recall, [1.]))
+    mpre = np.concatenate(([0.], precision, [0.]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+    box2 = box2.t()
+
+    # Get the coordinates of bounding boxes
+    if x1y1x2y2:
+        # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    else:
+        # x, y, w, h = box1
+        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+
+    # Intersection area
+    inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+                 (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+
+    # Union Area
+    union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
+                 (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
+
+    return inter_area / union_area  # iou
+
+
+def wh_iou(box1, box2):
+
+    box2 = box2.t()
+
+    # w, h = box1
+    w1, h1 = box1[0], box1[1]
+    w2, h2 = box2[0], box2[1]
+
+    # Intersection area
+    inter_area = torch.min(w1, w2) * torch.min(h1, h2)
+
+    # Union Area
+    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
+
+    return inter_area / union_area  # iou
+
+
+def compute_loss(p, targets):  # predictions, targets
+    FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
+    lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) # losses 初始化 为 0
+    txy, twh, tcls, indices = targets
+    MSE = nn.MSELoss()
+    CE = nn.CrossEntropyLoss()
+    BCE = nn.BCEWithLogitsLoss()# 多标签分类时 使用 如 [1,1,0],
+
+    # Compute losses
+    for i, pi0 in enumerate(p):  # layer i predictions, i
+        b, a, gj, gi = indices[i]  # image_idx, anchor_idx, gridx, gridy
+
+        # print(i,') b, a, gj, gi : ')
+        # print('b', b)
+        # print('a', a)
+        # print('gj', gj)
+        # print('gi', gi)
+
+        tconf = torch.zeros_like(pi0[..., 0])  # conf
+
+        # print('tconf: ',tconf.size())
+        # Compute losses
+        k = 1  # nT / bs
+        if len(b) > 0:
+            pi = pi0[b, a, gj, gi]  # predictions closest to anchors
+            tconf[b, a, gj, gi] = 1  # conf
+
+            lxy += (k * 8) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
+            lwh += (k * 4) * MSE(pi[..., 2:4], twh[i])  # wh loss
+            lcls += (k * 1) * CE(pi[..., 5:], tcls[i])  # class_conf loss
+
+        lconf += (k * 64) * BCE(pi0[..., 4], tconf)  # obj_conf loss
+    loss = lxy + lwh + lconf + lcls
+
+    # Add to dictionary
+    d = defaultdict(float)
+    losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()]
+    for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses):
+        d[name] = x
+
+    return loss, d
+
+
+def build_targets(model, targets):
+    # targets = [image, class, x, y, w, h]
+    if isinstance(model, nn.parallel.DistributedDataParallel):
+        model = model.module
+
+    txy, twh, tcls, indices = [], [], [], []
+    for i, layer in enumerate(get_yolo_layers(model)):# 遍历 3 个 yolo layer
+        # print(i,'layer ',model.module_list[layer])
+        layer = model.module_list[layer][0]
+
+        # iou of targets-anchors
+        gwh = targets[:, 4:6] * layer.nG # 以 grid 为单位的 wh
+        iou = [wh_iou(x, gwh) for x in layer.anchor_vec]
+        iou, a = torch.stack(iou, 0).max(0)  # best iou and anchor
+
+        # reject below threshold ious (OPTIONAL, increases P, lowers R)
+        reject = True
+        if reject:
+            j = iou > 0.10
+            t, a, gwh = targets[j], a[j], gwh[j]
+        else:
+            t = targets
+
+        # Indices
+        b, c = t[:, :2].long().t()  # target image, class
+        gxy = t[:, 2:4] * layer.nG
+        gi, gj = gxy.long().t()  # grid_i, grid_j
+        indices.append((b, a, gj, gi)) # img_index , anchor_index , grid_x , grid_y
+
+        # print('b, a, gj, gi : ')
+        # print('b', b)
+        # print('a', a)
+        # print('gj', gj)
+        # print('gi', gi)
+        # print('class c',c)
+
+        # XY coordinates
+        txy.append(gxy - gxy.floor())#转化为grid相对坐标
+
+        # Width and height
+        twh.append(torch.log(gwh / layer.anchor_vec[a]))  # yolo method 对数
+        # twh.append(torch.sqrt(gwh / layer.anchor_vec[a]) / 2)  # power method
+
+        # Class
+        tcls.append(c)
+        # try:
+        #     print('c.max,layer.nC: ',c.max().item() ,layer.nC)
+        # except:
+        #     pass
+        if c.shape[0]:
+            assert c.max().item() <= layer.nC, 'Target classes exceed model classes'
+
+    return txy, twh, tcls, indices
+
+
+# @profile
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
+    """
+    Removes detections with lower object confidence score than 'conf_thres'
+    Non-Maximum Suppression to further filter detections.
+    Returns detections with shape:
+        (x1, y1, x2, y2, object_conf, class_conf, class)
+    """
+
+    min_wh = 2  # (pixels) minimum box width and height
+
+    output = [None] * len(prediction)
+    for image_i, pred in enumerate(prediction):
+        # Experiment: Prior class size rejection
+        # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
+        # a = w * h  # area
+        # ar = w / (h + 1e-16)  # aspect ratio
+        # n = len(w)
+        # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
+        # shape_likelihood = np.zeros((n, 60), dtype=np.float32)
+        # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
+        # from scipy.stats import multivariate_normal
+        # for c in range(60):
+        # shape_likelihood[:, c] =
+        #   multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
+
+        # Filter out confidence scores below threshold
+        class_conf, class_pred = pred[:, 5:].max(1)  # max class_conf, index
+        pred[:, 4] *= class_conf  # finall conf = obj_conf * class_conf
+
+        i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
+        # s2=time.time()
+        pred2 = pred[i]
+        # print("++++++pred2 = pred[i]",time.time()-s2, pred2)
+
+        # If none are remaining => process next image
+        if len(pred2) == 0:
+            continue
+
+        # Select predicted classes
+        class_conf = class_conf[i]
+        class_pred = class_pred[i].unsqueeze(1).float()
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        pred2[:, :4] = xywh2xyxy(pred2[:, :4])
+        # pred[:, 4] *= class_conf  # improves mAP from 0.549 to 0.551
+
+        # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
+        pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
+
+        # Get detections sorted by decreasing confidence scores
+        pred2 = pred2[(-pred2[:, 4]).argsort()]
+
+        det_max = []
+        nms_style = 'MERGE'  # 'OR' (default), 'AND', 'MERGE' (experimental)
+        for c in pred2[:, -1].unique():
+            dc = pred2[pred2[:, -1] == c]  # select class c
+            dc = dc[:min(len(dc), 100)]  # limit to first 100 boxes
+
+            # Non-maximum suppression
+            if nms_style == 'OR':  # default
+                # METHOD1
+                # ind = list(range(len(dc)))
+                # while len(ind):
+                # j = ind[0]
+                # det_max.append(dc[j:j + 1])  # save highest conf detection
+                # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
+                # [ind.pop(i) for i in reversed(reject)]
+
+                # METHOD2
+                while dc.shape[0]:
+                    det_max.append(dc[:1])  # save highest conf detection
+                    if len(dc) == 1:  # Stop if we're at the last detection
+                        break
+                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
+                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold
+
+            elif nms_style == 'AND':  # requires overlap, single boxes erased
+                while len(dc) > 1:
+                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
+                    if iou.max() > 0.5:
+                        det_max.append(dc[:1])
+                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold
+
+            elif nms_style == 'MERGE':  # weighted mixture box
+                while len(dc):
+                    i = bbox_iou(dc[0], dc) > nms_thres  # iou with other boxes
+                    weights = dc[i, 4:5]
+                    dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
+                    det_max.append(dc[:1])
+                    dc = dc[i == 0]
+
+        if len(det_max):
+            det_max = torch.cat(det_max)  # concatenate
+            output[image_i] = det_max[(-det_max[:, 4]).argsort()]  # sort
+    return output
+
+
+def get_yolo_layers(model):
+    yolo_layer_index = []
+    for index, l in enumerate(model.module_list):
+        try:
+            a = l[0].img_size and l[0].nG  # only yolo layer need img_size and nG
+            # print("---"*50)
+            # print(l, index)
+            yolo_layer_index.append(index)
+        except:
+            pass
+    assert len(yolo_layer_index) > 0, "can not find yolo layer"
+    return yolo_layer_index
--- a/components/hand_detect/yolo_v3_hand.py
+++ b/components/hand_detect/yolo_v3_hand.py
+#-*-coding:utf-8-*-
+# date:2021-03-09
+# Author: Eric.Lee
+# function: yolo v3 hand detect
+
+import os
+import cv2
+import numpy as np
+import time
+
+import torch
+
+from hand_detect.yolov3 import Yolov3, Yolov3Tiny
+from hand_detect.utils.torch_utils import select_device
+from hand_detect.acc_model import acc_model
+
+import torch.backends.cudnn as cudnn
+import torch.nn.functional as F
+
+
+import random
+
+def show_model_param(model):
+    params = list(model.parameters())
+    k = 0
+    for i in params:
+        l = 1
+        for j in i.size():
+            l *= j
+        print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l)))
+        k = k + l
+    print("----------------------")
+    print("总参数数量和: " + str(k))
+
+def process_data(img, img_size=416):# 图像预处理
+    img, _, _, _ = letterbox(img, height=img_size)
+    # Normalize RG25
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
+    img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+    img /= 255.0  # 0 - 255 to 0.0 - 1.0
+    return img
+
+def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+    # Plots one bounding box on image img
+    tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1  # line thickness
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(img, c1, c2, color, -1)  # filled
+        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [255, 55,90], thickness=tf, lineType=cv2.LINE_AA)
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+    box2 = box2.t()
+
+    # Get the coordinates of bounding boxes
+    if x1y1x2y2:
+        # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    else:
+        # x, y, w, h = box1
+        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+
+    # Intersection area
+    inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+                 (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+
+    # Union Area
+    union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
+                 (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
+
+    return inter_area / union_area  # iou
+
+def xywh2xyxy(x):
+    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2
+    y[:, 1] = x[:, 1] - x[:, 3] / 2
+    y[:, 2] = x[:, 0] + x[:, 2] / 2
+    y[:, 3] = x[:, 1] + x[:, 3] / 2
+    return y
+
+
+def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
+    # Rescale x1, y1, x2, y2 from 416 to image size
+    # print('coords     : ',coords)
+    # print('img0_shape : ',img0_shape)
+    gain = float(img_size) / max(img0_shape)  # gain  = old / new
+    # print('gain       : ',gain)
+    pad_x = (img_size - img0_shape[1] * gain) / 2  # width padding
+    pad_y = (img_size - img0_shape[0] * gain) / 2  # height padding
+    # print('pad_xpad_y : ',pad_x,pad_y)
+    coords[:, [0, 2]] -= pad_x
+    coords[:, [1, 3]] -= pad_y
+    coords[:, :4] /= gain
+    coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
+    return coords
+
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
+    """
+    Removes detections with lower object confidence score than 'conf_thres'
+    Non-Maximum Suppression to further filter detections.
+    Returns detections with shape:
+        (x1, y1, x2, y2, object_conf, class_conf, class)
+    """
+
+    min_wh = 2  # (pixels) minimum box width and height
+
+    output = [None] * len(prediction)
+    for image_i, pred in enumerate(prediction):
+        # Experiment: Prior class size rejection
+        # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
+        # a = w * h  # area
+        # ar = w / (h + 1e-16)  # aspect ratio
+        # n = len(w)
+        # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
+        # shape_likelihood = np.zeros((n, 60), dtype=np.float32)
+        # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
+        # from scipy.stats import multivariate_normal
+        # for c in range(60):
+        # shape_likelihood[:, c] =
+        #   multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
+
+        # Filter out confidence scores below threshold
+        class_conf, class_pred = pred[:, 5:].max(1)  # max class_conf, index
+        pred[:, 4] *= class_conf  # finall conf = obj_conf * class_conf
+
+        i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
+        # s2=time.time()
+        pred2 = pred[i]
+        # print("++++++pred2 = pred[i]",time.time()-s2, pred2)
+
+        # If none are remaining => process next image
+        if len(pred2) == 0:
+            continue
+
+        # Select predicted classes
+        class_conf = class_conf[i]
+        class_pred = class_pred[i].unsqueeze(1).float()
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        pred2[:, :4] = xywh2xyxy(pred2[:, :4])
+        # pred[:, 4] *= class_conf  # improves mAP from 0.549 to 0.551
+
+        # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
+        pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
+
+        # Get detections sorted by decreasing confidence scores
+        pred2 = pred2[(-pred2[:, 4]).argsort()]
+
+        det_max = []
+        nms_style = 'MERGE'  # 'OR' (default), 'AND', 'MERGE' (experimental)
+        for c in pred2[:, -1].unique():
+            dc = pred2[pred2[:, -1] == c]  # select class c
+            dc = dc[:min(len(dc), 100)]  # limit to first 100 boxes
+
+            # Non-maximum suppression
+            if nms_style == 'OR':  # default
+                # METHOD1
+                # ind = list(range(len(dc)))
+                # while len(ind):
+                # j = ind[0]
+                # det_max.append(dc[j:j + 1])  # save highest conf detection
+                # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
+                # [ind.pop(i) for i in reversed(reject)]
+
+                # METHOD2
+                while dc.shape[0]:
+                    det_max.append(dc[:1])  # save highest conf detection
+                    if len(dc) == 1:  # Stop if we're at the last detection
+                        break
+                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
+                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold
+
+            elif nms_style == 'AND':  # requires overlap, single boxes erased
+                while len(dc) > 1:
+                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
+                    if iou.max() > 0.5:
+                        det_max.append(dc[:1])
+                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold
+
+            elif nms_style == 'MERGE':  # weighted mixture box
+                while len(dc):
+                    i = bbox_iou(dc[0], dc) > nms_thres  # iou with other boxes
+                    weights = dc[i, 4:5]
+                    dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
+                    det_max.append(dc[:1])
+                    dc = dc[i == 0]
+
+        if len(det_max):
+            det_max = torch.cat(det_max)  # concatenate
+            output[image_i] = det_max[(-det_max[:, 4]).argsort()]  # sort
+    return output
+
+def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
+    # Resize a rectangular image to a padded square
+    shape = img.shape[:2]  # shape = [height, width]
+    ratio = float(height) / max(shape)  # ratio  = old / new
+    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
+    dw = (height - new_shape[0]) / 2  # width padding
+    dh = (height - new_shape[1]) / 2  # height padding
+    top, bottom = round(dh - 0.1), round(dh + 0.1)
+    left, right = round(dw - 0.1), round(dw + 0.1)
+    # resize img
+    if augment:
+        interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+                                          None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+                                          cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
+        if interpolation is None:
+            img = cv2.resize(img, new_shape)
+        else:
+            img = cv2.resize(img, new_shape, interpolation=interpolation)
+    else:
+        img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
+    # print("resize time:",time.time()-s1)
+
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded square
+    return img, ratio, dw, dh
+#---------------------------------------------------------
+# model_path = './coco_model/yolov3_coco.pt' # 检测模型路径
+# root_path = './test_images/'# 测试文件夹
+# model_arch = 'yolov3' # 模型类型
+# voc_config = 'cfg/voc.data' # 模型相关配置文件
+# img_size = 416 # 图像尺寸
+# conf_thres = 0.35# 检测置信度
+# nms_thres = 0.5 # nms 阈值
+class yolo_v3_hand_model(object):
+    def __init__(self,
+        model_path = './components/hand_detect/weights/latest_416-2021-02-19.pt',
+        model_arch = 'yolov3',
+        img_size=416,
+        conf_thres=0.16,
+        nms_thres=0.4,):
+        print("yolo v3 hand_model loading :  {}".format(model_path))
+        self.use_cuda = torch.cuda.is_available()
+        self.device = torch.device("cuda:0" if self.use_cuda else "cpu")
+        self.img_size = img_size
+        self.classes = ["Hand"]
+        self.num_classes = len(self.classes)
+        self.conf_thres = conf_thres
+        self.nms_thres = nms_thres
+        #-----------------------------------------------------------------------
+        weights = model_path
+        if "-tiny" in model_arch:
+            a_scalse = 416./img_size
+            anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
+            anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
+
+            model = Yolov3Tiny(self.num_classes,anchors = anchors_new)
+        else:
+            a_scalse = 416./img_size
+            anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
+            anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
+            model = Yolov3(self.num_classes,anchors = anchors_new)
+        #-----------------------------------------------------------------------
+
+        self.model = model
+        # show_model_param(self.model)# 显示模型参数
+
+        # print('num_classes : ',self.num_classes)
+
+        self.device = select_device() # 运行硬件选择
+        self.use_cuda = torch.cuda.is_available()
+        # Load weights
+        if os.access(weights,os.F_OK):# 判断模型文件是否存在
+            self.model.load_state_dict(torch.load(weights, map_location=lambda storage, loc: storage)['model'])
+        else:
+            print('------- >>> error : model not exists')
+            return False
+        #
+        self.model.eval()#模型设置为 eval
+        acc_model('',self.model)
+        self.model = self.model.to(self.device)
+
+    def predict(self, img_,vis):
+        with torch.no_grad():
+            t = time.time()
+            img = process_data(img_, self.img_size)
+            t1 = time.time()
+            img = torch.from_numpy(img).unsqueeze(0).to(self.device)
+
+            pred, _ = self.model(img)#图片检测
+
+            t2 = time.time()
+            detections = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0] # nms
+            t3 = time.time()
+            # print("t3 time:", t3)
+
+            if (detections is None) or len(detections) == 0:
+                return []
+            # Rescale boxes from 416 to true image size
+            detections[:, :4] = scale_coords(self.img_size, detections[:, :4], img_.shape).round()
+            # 绘制检测结果 ：detect reslut
+            dets_for_landmarks = []
+            colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, 10 + 1)][::-1]
+
+            output_dict_ = []
+            for *xyxy, conf, cls_conf, cls in detections:
+                label = '%s %.2f' % (self.classes[0], conf)
+                x1,y1,x2,y2 = xyxy
+                output_dict_.append((float(x1),float(y1),float(x2),float(y2),float(conf.item())))
+                if vis:
+                    plot_one_box(xyxy, img_, label=label, color=(0,175,255), line_thickness = 2)
+            return output_dict_
--- a/components/hand_detect/yolov3.py
+++ b/components/hand_detect/yolov3.py
--- a/components/hand_keypoints/handpose_x.py
+++ b/components/hand_keypoints/handpose_x.py
+#-*-coding:utf-8-*-
+# date:2021-03-09
+# Author: Eric.Lee
+# function: handpose_x 21 keypoints 2D
+
+import os
+import torch
+import cv2
+import numpy as np
+import json
+
+import torch
+import torch.nn as nn
+
+import time
+import math
+from datetime import datetime
+
+from hand_keypoints.models.resnet import resnet18,resnet34,resnet50,resnet101
+from hand_keypoints.models.squeezenet import squeezenet1_1,squeezenet1_0
+
+from hand_keypoints.models.resnet import resnet18,resnet34,resnet50,resnet101
+from hand_keypoints.models.squeezenet import squeezenet1_1,squeezenet1_0
+from hand_keypoints.models.shufflenetv2 import ShuffleNetV2
+from hand_keypoints.models.shufflenet import ShuffleNet
+from hand_keypoints.models.mobilenetv2 import MobileNetV2
+from torchvision.models import shufflenet_v2_x1_5 ,shufflenet_v2_x1_0 , shufflenet_v2_x2_0
+from hand_keypoints.models.rexnetv1 import ReXNetV1
+
+
+from hand_keypoints.utils.common_utils import *
+
+def draw_bd_handpose_c(img_,hand_,x,y,thick = 3):
+    # thick = 2
+    colors = [(0,215,255),(255,115,55),(5,255,55),(25,15,255),(225,15,55)]
+    #
+    cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['1']['x']+x), int(hand_['1']['y']+y)), colors[0], thick)
+    cv2.line(img_, (int(hand_['1']['x']+x), int(hand_['1']['y']+y)),(int(hand_['2']['x']+x), int(hand_['2']['y']+y)), colors[0], thick)
+    cv2.line(img_, (int(hand_['2']['x']+x), int(hand_['2']['y']+y)),(int(hand_['3']['x']+x), int(hand_['3']['y']+y)), colors[0], thick)
+    cv2.line(img_, (int(hand_['3']['x']+x), int(hand_['3']['y']+y)),(int(hand_['4']['x']+x), int(hand_['4']['y']+y)), colors[0], thick)
+
+    cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['5']['x']+x), int(hand_['5']['y']+y)), colors[1], thick)
+    cv2.line(img_, (int(hand_['5']['x']+x), int(hand_['5']['y']+y)),(int(hand_['6']['x']+x), int(hand_['6']['y']+y)), colors[1], thick)
+    cv2.line(img_, (int(hand_['6']['x']+x), int(hand_['6']['y']+y)),(int(hand_['7']['x']+x), int(hand_['7']['y']+y)), colors[1], thick)
+    cv2.line(img_, (int(hand_['7']['x']+x), int(hand_['7']['y']+y)),(int(hand_['8']['x']+x), int(hand_['8']['y']+y)), colors[1], thick)
+
+    cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['9']['x']+x), int(hand_['9']['y']+y)), colors[2], thick)
+    cv2.line(img_, (int(hand_['9']['x']+x), int(hand_['9']['y']+y)),(int(hand_['10']['x']+x), int(hand_['10']['y']+y)), colors[2], thick)
+    cv2.line(img_, (int(hand_['10']['x']+x), int(hand_['10']['y']+y)),(int(hand_['11']['x']+x), int(hand_['11']['y']+y)), colors[2], thick)
+    cv2.line(img_, (int(hand_['11']['x']+x), int(hand_['11']['y']+y)),(int(hand_['12']['x']+x), int(hand_['12']['y']+y)), colors[2], thick)
+
+    cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['13']['x']+x), int(hand_['13']['y']+y)), colors[3], thick)
+    cv2.line(img_, (int(hand_['13']['x']+x), int(hand_['13']['y']+y)),(int(hand_['14']['x']+x), int(hand_['14']['y']+y)), colors[3], thick)
+    cv2.line(img_, (int(hand_['14']['x']+x), int(hand_['14']['y']+y)),(int(hand_['15']['x']+x), int(hand_['15']['y']+y)), colors[3], thick)
+    cv2.line(img_, (int(hand_['15']['x']+x), int(hand_['15']['y']+y)),(int(hand_['16']['x']+x), int(hand_['16']['y']+y)), colors[3], thick)
+
+    cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['17']['x']+x), int(hand_['17']['y']+y)), colors[4], thick)
+    cv2.line(img_, (int(hand_['17']['x']+x), int(hand_['17']['y']+y)),(int(hand_['18']['x']+x), int(hand_['18']['y']+y)), colors[4], thick)
+    cv2.line(img_, (int(hand_['18']['x']+x), int(hand_['18']['y']+y)),(int(hand_['19']['x']+x), int(hand_['19']['y']+y)), colors[4], thick)
+    cv2.line(img_, (int(hand_['19']['x']+x), int(hand_['19']['y']+y)),(int(hand_['20']['x']+x), int(hand_['20']['y']+y)), colors[4], thick)
+
+#
+class handpose_x_model(object):
+    def __init__(self,
+        model_path = './components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth',
+        img_size= 256,
+        num_classes = 42,# 手部关键点个数 * 2 ： 21*2
+        model_arch = "rexnetv1",
+        ):
+        # print("handpose_x loading : ",model_path)
+        self.use_cuda = torch.cuda.is_available()
+        self.device = torch.device("cuda:0" if self.use_cuda else "cpu") # 可选的设备类型及序号
+        self.img_size = img_size
+        #-----------------------------------------------------------------------
+
+        if model_arch == 'resnet_50':
+            model_ = resnet50(num_classes = num_classes,img_size = self.img_size)
+        elif model_arch == 'resnet_18':
+            model_ = resnet18(num_classes = num_classes,img_size = self.img_size)
+        elif model_arch == 'resnet_34':
+            model_ = resnet34(num_classes = num_classes,img_size = self.img_size)
+        elif model_arch == 'resnet_101':
+            model_ = resnet101(num_classes = num_classes,img_size = self.img_size)
+        elif model_arch == "squeezenet1_0":
+            model_ = squeezenet1_0(pretrained=True, num_classes=num_classes)
+        elif model_arch == "squeezenet1_1":
+            model_ = squeezenet1_1(pretrained=True, num_classes=num_classes)
+        elif model_arch == "shufflenetv2":
+            model_ = ShuffleNetV2(ratio=1., num_classes=num_classes)
+        elif model_arch == "shufflenet_v2_x1_5":
+            model_ = shufflenet_v2_x1_5(pretrained=False,num_classes=num_classes)
+        elif model_arch == "shufflenet_v2_x1_0":
+            model_ = shufflenet_v2_x1_0(pretrained=False,num_classes=num_classes)
+        elif model_arch == "shufflenet_v2_x2_0":
+            model_ = shufflenet_v2_x2_0(pretrained=False,num_classes=num_classes)
+        elif model_arch == "shufflenet":
+            model_ = ShuffleNet(num_blocks = [2,4,2], num_classes=num_classes, groups=3)
+        elif model_arch == "mobilenetv2":
+            model_ = MobileNetV2(num_classes=num_classes)
+        elif model_arch == "rexnetv1":
+            model_ = ReXNetV1(num_classes=num_classes)
+        else:
+            print(" no support the model")
+        #-----------------------------------------------------------------------
+        model_ = model_.to(self.device)
+        model_.eval() # 设置为前向推断模式
+
+        # 加载测试模型
+        if os.access(model_path,os.F_OK):# checkpoint
+            chkpt = torch.load(model_path, map_location=self.device)
+            model_.load_state_dict(chkpt)
+            print('handpose_x model loading : {}'.format(model_path))
+
+        self.model_handpose = model_
+
+    def predict(self, img, vis = False):
+        with torch.no_grad():
+
+            if not((img.shape[0] == self.img_size) and (img.shape[1] == self.img_size)):
+                img = cv2.resize(img, (self.img_size,self.img_size), interpolation = cv2.INTER_CUBIC)
+
+            img_ = img.astype(np.float32)
+            img_ = (img_-128.)/256.
+
+            img_ = img_.transpose(2, 0, 1)
+            img_ = torch.from_numpy(img_)
+            img_ = img_.unsqueeze_(0)
+
+            if self.use_cuda:
+                img_ = img_.cuda()  # (bs, 3, h, w)
+
+            pre_ = self.model_handpose(img_.float())
+            output = pre_.cpu().detach().numpy()
+            output = np.squeeze(output)
+
+            return output
--- a/components/hand_keypoints/models/mobilenetv2.py
+++ b/components/hand_keypoints/models/mobilenetv2.py
+"""mobilenetv2 in pytorch
+
+
+
+[1] Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen
+
+    MobileNetV2: Inverted Residuals and Linear Bottlenecks
+    https://arxiv.org/abs/1801.04381
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class LinearBottleNeck(nn.Module):
+
+    def __init__(self, in_channels, out_channels, stride, t=6, class_num=100):
+        super().__init__()
+
+        self.residual = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels * t, 1),
+            nn.BatchNorm2d(in_channels * t),
+            nn.ReLU6(inplace=True),
+
+            nn.Conv2d(in_channels * t, in_channels * t, 3, stride=stride, padding=1, groups=in_channels * t),
+            nn.BatchNorm2d(in_channels * t),
+            nn.ReLU6(inplace=True),
+
+            nn.Conv2d(in_channels * t, out_channels, 1),
+            nn.BatchNorm2d(out_channels)
+        )
+
+        self.stride = stride
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+
+    def forward(self, x):
+
+        residual = self.residual(x)
+
+        if self.stride == 1 and self.in_channels == self.out_channels:
+            residual += x
+
+        return residual
+
+class MobileNetV2(nn.Module):
+
+    def __init__(self, num_classes=100,dropout_factor = 1.0):
+        super().__init__()
+
+        self.pre = nn.Sequential(
+            nn.Conv2d(3, 32, 1, padding=1),
+            nn.BatchNorm2d(32),
+            nn.ReLU6(inplace=True)
+        )
+
+        self.stage1 = LinearBottleNeck(32, 16, 1, 1)
+        self.stage2 = self._make_stage(2, 16, 24, 2, 6)
+        self.stage3 = self._make_stage(3, 24, 32, 2, 6)
+        self.stage4 = self._make_stage(4, 32, 64, 2, 6)
+        self.stage5 = self._make_stage(3, 64, 96, 1, 6)
+        self.stage6 = self._make_stage(3, 96, 160, 1, 6)
+        self.stage7 = LinearBottleNeck(160, 320, 1, 6)
+
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(320, 1280, 1),
+            nn.BatchNorm2d(1280),
+            nn.ReLU6(inplace=True)
+        )
+
+        self.conv2 = nn.Conv2d(1280, num_classes, 1)
+
+        self.dropout = nn.Dropout(dropout_factor)
+
+    def forward(self, x):
+        x = self.pre(x)
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = self.stage5(x)
+        x = self.stage6(x)
+        x = self.stage7(x)
+        x = self.conv1(x)
+        x = F.adaptive_avg_pool2d(x, 1)
+        x = self.dropout(x)
+        x = self.conv2(x)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+    def _make_stage(self, repeat, in_channels, out_channels, stride, t):
+
+        layers = []
+        layers.append(LinearBottleNeck(in_channels, out_channels, stride, t))
+
+        while repeat - 1:
+            layers.append(LinearBottleNeck(out_channels, out_channels, 1, t))
+            repeat -= 1
+
+        return nn.Sequential(*layers)
+
+def mobilenetv2():
+    return MobileNetV2()
--- a/components/hand_keypoints/models/my_model.py
+++ b/components/hand_keypoints/models/my_model.py
+#-*-coding:utf-8-*-
+# date:2020-08-08
+# Author: X.L.Eric
+# function: my model
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class MY_Net(nn.Module):
+    def __init__(self,num_classes):# op 初始化
+        super(MY_Net, self).__init__()
+        self.cov = nn.Conv2d(3, 32, 3)
+        self.relu = nn.ReLU(inplace=True)
+        layers1 = []
+        # Conv2d : in_channels, out_channels, kernel_size, stride, padding
+        layers1.append(nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3,stride=1,padding = 0))
+        layers1.append(nn.BatchNorm2d(64,affine=True))
+        layers1.append(nn.ReLU(inplace=True))
+        layers1.append(nn.AvgPool2d(kernel_size=3, stride=2, padding=1))
+        self.layers1 = nn.Sequential(*layers1)
+        layers2 = []
+        layers2.append(nn.Conv2d(64, 128, 3))
+        layers2.append(nn.BatchNorm2d(128,affine=True))
+        layers2.append(nn.ReLU(inplace=True))
+        layers2.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
+        self.layers2 = nn.Sequential(*layers2)
+        layers3 = []
+        layers3.append(nn.Conv2d(128, 256, 3,stride=2))
+        layers3.append(nn.BatchNorm2d(256,affine=True))
+        layers3.append(nn.ReLU(inplace=True))
+        layers3.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
+        self.layers3 = nn.Sequential(*layers3)
+        layers4 = []
+        layers4.append(nn.Conv2d(256, 512, 3,stride=2))
+        layers4.append(nn.BatchNorm2d(512,affine=True))
+        layers4.append(nn.ReLU(inplace=True))
+        layers4.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
+        layers4.append(nn.Conv2d(512, 512, 1,stride=1))
+        self.layers4 = nn.Sequential(*layers4)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))# 自适应均值池化
+        self.fc = nn.Linear(in_features = 512 , out_features = num_classes)# 全连接 fc
+
+    def forward(self, x):# 模型前向推断
+        x = self.cov(x)
+        x = self.relu(x)
+        x = self.layers1(x)
+        x = self.layers2(x)
+        x = self.layers3(x)
+        x = self.layers4(x)
+        x = self.avgpool(x)
+        x = x.reshape(x.size(0), -1)
+        x = self.fc(x)
+        return x
+
+if __name__ == "__main__":
+    #输入批次图片（batchsize，channel，height，width）：8 ，3*256*256
+    dummy_input = torch.randn([8, 3, 256,256])
+    model = MY_Net(num_classes = 100)# 分类数为 100 类
+    print('model:\n',model)# 打印模型op
+    output = model(dummy_input)# 模型前向推断
+    # 模型前向推断输出特征尺寸
+    print('model inference feature size: ',output.size())
+    print(output)
+
+    output_ = F.softmax(output,dim = 1)
+    #
+    print(output_)
--- a/components/hand_keypoints/models/resnet.py
+++ b/components/hand_keypoints/models/resnet.py
+import torch
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152']
+
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.):
+        self.inplanes = 64
+        self.dropout_factor = dropout_factor
+        super(ResNet, self).__init__()
+        # 26
+        # 586 train_sequence
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        # see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16
+        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        assert img_size % 32 == 0
+        pool_kernel = int(img_size / 32)
+        self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True)
+
+        self.dropout = nn.Dropout(self.dropout_factor)
+
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+
+        x = self.dropout(x)
+
+        x = self.fc(x)
+
+        return x
+
+
+def load_model(model, pretrained_state_dict):
+    model_dict = model.state_dict()
+    pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
+                       k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
+    model.load_state_dict(pretrained_dict, strict=False)
+    if len(pretrained_dict) == 0:
+        print("[INFO] No params were loaded ...")
+    else:
+        for k, v in pretrained_state_dict.items():
+            if k in pretrained_dict:
+                print("==>> Load {} {}".format(k, v.size()))
+            else:
+                print("[INFO] Skip {} {}".format(k, v.size()))
+    return model
+
+
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
+        print("Load pretrained model from {}".format(model_urls['resnet18']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+
+def resnet34(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
+        print("Load pretrained model from {}".format(model_urls['resnet34']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+
+def resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+        print("Load pretrained model from {}".format(model_urls['resnet50']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
+        print("Load pretrained model from {}".format(model_urls['resnet101']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+
+def resnet152(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
+        print("Load pretrained model from {}".format(model_urls['resnet152']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+if __name__ == "__main__":
+    input = torch.randn([32, 3, 256,256])
+    model = resnet34(False, num_classes=2, img_size=256)
+    output = model(input)
+    print(output.size())
--- a/components/hand_keypoints/models/resnet_50.py
+++ b/components/hand_keypoints/models/resnet_50.py
+import torch
+import torch.nn as nn
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152']
+
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000,dropout_factor = 1., zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=nn.BatchNorm2d):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            print('BatchNorm2d')
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+
+        self.dropout = nn.Dropout(dropout_factor)
+
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+
+        # ----------------------------------------------------------------------------------
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+
+        x = self.layer2(x)
+
+        x = self.layer3(x)
+
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+
+        x = x.reshape(x.size(0), -1)
+
+        x = self.dropout(x)
+
+        x = self.fc(x)
+
+        return x
+
+def _resnet(arch, block, layers, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    return model
+
+def resnet50(**kwargs):
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    """
+    print('Bottleneck:{}'.format(Bottleneck))
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3],**kwargs)
+if __name__ == "__main__":
+    dummy_input = torch.randn([32, 3, 128,128])
+    num_classes = 100
+    model = resnet50(num_classes = num_classes,dropout_factor=0.5)
+
+    print(model)
+    output = model(dummy_input)
+    print(output.size())
--- a/components/hand_keypoints/models/rexnetv1.py
+++ b/components/hand_keypoints/models/rexnetv1.py
+"""
+ReXNet
+Copyright (c) 2020-present NAVER Corp.
+MIT license
+"""
+
+import torch
+import torch.nn as nn
+from math import ceil
+
+# Memory-efficient Siwsh using torch.jit.script borrowed from the code in (https://twitter.com/jeremyphoward/status/1188251041835315200)
+# Currently use memory-efficient Swish as default:
+USE_MEMORY_EFFICIENT_SWISH = True
+
+if USE_MEMORY_EFFICIENT_SWISH:
+    @torch.jit.script
+    def swish_fwd(x):
+        return x.mul(torch.sigmoid(x))
+
+
+    @torch.jit.script
+    def swish_bwd(x, grad_output):
+        x_sigmoid = torch.sigmoid(x)
+        return grad_output * (x_sigmoid * (1. + x * (1. - x_sigmoid)))
+
+
+    class SwishJitImplementation(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x):
+            ctx.save_for_backward(x)
+            return swish_fwd(x)
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            x = ctx.saved_tensors[0]
+            return swish_bwd(x, grad_output)
+
+
+    def swish(x, inplace=False):
+        return SwishJitImplementation.apply(x)
+
+else:
+    def swish(x, inplace=False):
+        return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())
+
+
+class Swish(nn.Module):
+    def __init__(self, inplace=True):
+        super(Swish, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return swish(x, self.inplace)
+
+
+def ConvBNAct(out, in_channels, channels, kernel=1, stride=1, pad=0,
+              num_group=1, active=True, relu6=False):
+    out.append(nn.Conv2d(in_channels, channels, kernel,
+                         stride, pad, groups=num_group, bias=False))
+    out.append(nn.BatchNorm2d(channels))
+    if active:
+        out.append(nn.ReLU6(inplace=True) if relu6 else nn.ReLU(inplace=True))
+
+
+def ConvBNSwish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1):
+    out.append(nn.Conv2d(in_channels, channels, kernel,
+                         stride, pad, groups=num_group, bias=False))
+    out.append(nn.BatchNorm2d(channels))
+    out.append(Swish())
+
+
+class SE(nn.Module):
+    def __init__(self, in_channels, channels, se_ratio=12):
+        super(SE, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Conv2d(in_channels, channels // se_ratio, kernel_size=1, padding=0),
+            nn.BatchNorm2d(channels // se_ratio),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(channels // se_ratio, channels, kernel_size=1, padding=0),
+            nn.Sigmoid()
+        )
+
+    def forward(self, x):
+        y = self.avg_pool(x)
+        y = self.fc(y)
+        return x * y
+
+
+class LinearBottleneck(nn.Module):
+    def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12,
+                 **kwargs):
+        super(LinearBottleneck, self).__init__(**kwargs)
+        self.use_shortcut = stride == 1 and in_channels <= channels
+        self.in_channels = in_channels
+        self.out_channels = channels
+
+        out = []
+        if t != 1:
+            dw_channels = in_channels * t
+            ConvBNSwish(out, in_channels=in_channels, channels=dw_channels)
+        else:
+            dw_channels = in_channels
+
+        ConvBNAct(out, in_channels=dw_channels, channels=dw_channels, kernel=3, stride=stride, pad=1,
+                  num_group=dw_channels, active=False)
+
+        if use_se:
+            out.append(SE(dw_channels, dw_channels, se_ratio))
+
+        out.append(nn.ReLU6())
+        ConvBNAct(out, in_channels=dw_channels, channels=channels, active=False, relu6=True)
+        self.out = nn.Sequential(*out)
+
+    def forward(self, x):
+        out = self.out(x)
+        if self.use_shortcut:
+            out[:, 0:self.in_channels] += x
+
+        return out
+
+
+class ReXNetV1(nn.Module):
+    def __init__(self, input_ch=16, final_ch=180, width_mult=1.0, depth_mult=1.0, num_classes=1000,
+                 use_se=True,
+                 se_ratio=12,
+                 dropout_factor=0.2,
+                 bn_momentum=0.9):
+        super(ReXNetV1, self).__init__()
+
+        layers = [1, 2, 2, 3, 3, 5]
+        strides = [1, 2, 2, 2, 1, 2]
+        use_ses = [False, False, True, True, True, True]
+
+        layers = [ceil(element * depth_mult) for element in layers]
+        strides = sum([[element] + [1] * (layers[idx] - 1)
+                       for idx, element in enumerate(strides)], [])
+        if use_se:
+            use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], [])
+        else:
+            use_ses = [False] * sum(layers[:])
+        ts = [1] * layers[0] + [6] * sum(layers[1:])
+
+        self.depth = sum(layers[:]) * 3
+        stem_channel = 32 / width_mult if width_mult < 1.0 else 32
+        inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch
+
+        features = []
+        in_channels_group = []
+        channels_group = []
+
+        # The following channel configuration is a simple instance to make each layer become an expand layer.
+        for i in range(self.depth // 3):
+            if i == 0:
+                in_channels_group.append(int(round(stem_channel * width_mult)))
+                channels_group.append(int(round(inplanes * width_mult)))
+            else:
+                in_channels_group.append(int(round(inplanes * width_mult)))
+                inplanes += final_ch / (self.depth // 3 * 1.0)
+                channels_group.append(int(round(inplanes * width_mult)))
+
+        ConvBNSwish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1)
+
+        for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)):
+            features.append(LinearBottleneck(in_channels=in_c,
+                                             channels=c,
+                                             t=t,
+                                             stride=s,
+                                             use_se=se, se_ratio=se_ratio))
+
+        pen_channels = int(1280 * width_mult)
+        ConvBNSwish(features, c, pen_channels)
+
+        features.append(nn.AdaptiveAvgPool2d(1))
+        self.features = nn.Sequential(*features)
+        self.output = nn.Sequential(
+            nn.Dropout(dropout_factor),
+            nn.Conv2d(pen_channels, num_classes, 1, bias=True))
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.output(x).squeeze()
+        return x
--- a/components/hand_keypoints/models/shufflenet.py
+++ b/components/hand_keypoints/models/shufflenet.py
+"""shufflenet in pytorch
+
+
+
+[1] Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun.
+
+    ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
+    https://arxiv.org/abs/1707.01083v2
+"""
+
+from functools import partial
+
+import torch
+import torch.nn as nn
+
+
+class BasicConv2d(nn.Module):
+
+    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
+        super().__init__()
+        self.conv = nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs)
+        self.bn = nn.BatchNorm2d(output_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+class ChannelShuffle(nn.Module):
+
+    def __init__(self, groups):
+        super().__init__()
+        self.groups = groups
+
+    def forward(self, x):
+        batchsize, channels, height, width = x.data.size()
+        channels_per_group = int(channels / self.groups)
+
+        #"""suppose a convolutional layer with g groups whose output has
+        #g x n channels; we first reshape the output channel dimension
+        #into (g, n)"""
+        x = x.view(batchsize, self.groups, channels_per_group, height, width)
+
+        #"""transposing and then flattening it back as the input of next layer."""
+        x = x.transpose(1, 2).contiguous()
+        x = x.view(batchsize, -1, height, width)
+
+        return x
+
+class DepthwiseConv2d(nn.Module):
+
+    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
+        super().__init__()
+        self.depthwise = nn.Sequential(
+            nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs),
+            nn.BatchNorm2d(output_channels)
+        )
+
+    def forward(self, x):
+        return self.depthwise(x)
+
+class PointwiseConv2d(nn.Module):
+    def __init__(self, input_channels, output_channels, **kwargs):
+        super().__init__()
+        self.pointwise = nn.Sequential(
+            nn.Conv2d(input_channels, output_channels, 1, **kwargs),
+            nn.BatchNorm2d(output_channels)
+        )
+
+    def forward(self, x):
+        return self.pointwise(x)
+
+class ShuffleNetUnit(nn.Module):
+
+    def __init__(self, input_channels, output_channels, stage, stride, groups):
+        super().__init__()
+
+        #"""Similar to [9], we set the number of bottleneck channels to 1/4
+        #of the output channels for each ShuffleNet unit."""
+        self.bottlneck = nn.Sequential(
+            PointwiseConv2d(
+                input_channels,
+                int(output_channels / 4),
+                groups=groups
+            ),
+            nn.ReLU(inplace=True)
+        )
+
+        #"""Note that for Stage 2, we do not apply group convolution on the first pointwise
+        #layer because the number of input channels is relatively small."""
+        if stage == 2:
+            self.bottlneck = nn.Sequential(
+                PointwiseConv2d(
+                    input_channels,
+                    int(output_channels / 4),
+                    groups=groups
+                ),
+                nn.ReLU(inplace=True)
+            )
+
+        self.channel_shuffle = ChannelShuffle(groups)
+
+        self.depthwise = DepthwiseConv2d(
+            int(output_channels / 4),
+            int(output_channels / 4),
+            3,
+            groups=int(output_channels / 4),
+            stride=stride,
+            padding=1
+        )
+
+        self.expand = PointwiseConv2d(
+            int(output_channels / 4),
+            output_channels,
+            groups=groups
+        )
+
+        self.relu = nn.ReLU(inplace=True)
+        self.fusion = self._add
+        self.shortcut = nn.Sequential()
+
+        #"""As for the case where ShuffleNet is applied with stride,
+        #we simply make two modifications (see Fig 2 (c)):
+        #(i) add a 3 × 3 average pooling on the shortcut path;
+        #(ii) replace the element-wise addition with channel concatenation,
+        #which makes it easy to enlarge channel dimension with little extra
+        #computation cost.
+        if stride != 1 or input_channels != output_channels:
+            self.shortcut = nn.AvgPool2d(3, stride=2, padding=1)
+
+            self.expand = PointwiseConv2d(
+                int(output_channels / 4),
+                output_channels - input_channels,
+                groups=groups
+            )
+
+            self.fusion = self._cat
+
+    def _add(self, x, y):
+        return torch.add(x, y)
+
+    def _cat(self, x, y):
+        return torch.cat([x, y], dim=1)
+
+    def forward(self, x):
+        shortcut = self.shortcut(x)
+
+        shuffled = self.bottlneck(x)
+        shuffled = self.channel_shuffle(shuffled)
+        shuffled = self.depthwise(shuffled)
+        shuffled = self.expand(shuffled)
+
+        output = self.fusion(shortcut, shuffled)
+        output = self.relu(output)
+
+        return output
+
+class ShuffleNet(nn.Module):
+
+    def __init__(self, num_blocks = [2,4,2], num_classes=100, groups=3, dropout_factor = 1.0):
+        super().__init__()
+
+        if groups == 1:
+            out_channels = [24, 144, 288, 567]
+        elif groups == 2:
+            out_channels = [24, 200, 400, 800]
+        elif groups == 3:
+            out_channels = [24, 240, 480, 960]
+        elif groups == 4:
+            out_channels = [24, 272, 544, 1088]
+        elif groups == 8:
+            out_channels = [24, 384, 768, 1536]
+
+        self.conv1 = BasicConv2d(3, out_channels[0], 3, padding=1, stride=1)
+        self.input_channels = out_channels[0]
+
+        self.stage2 = self._make_stage(
+            ShuffleNetUnit,
+            num_blocks[0],
+            out_channels[1],
+            stride=2,
+            stage=2,
+            groups=groups
+        )
+
+        self.stage3 = self._make_stage(
+            ShuffleNetUnit,
+            num_blocks[1],
+            out_channels[2],
+            stride=2,
+            stage=3,
+            groups=groups
+        )
+
+        self.stage4 = self._make_stage(
+            ShuffleNetUnit,
+            num_blocks[2],
+            out_channels[3],
+            stride=2,
+            stage=4,
+            groups=groups
+        )
+
+        self.avg = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(out_channels[3], num_classes)
+        self.dropout = nn.Dropout(dropout_factor)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = self.avg(x)
+        x = x.view(x.size(0), -1)
+        x = self.dropout(x)
+        x = self.fc(x)
+
+        return x
+
+    def _make_stage(self, block, num_blocks, output_channels, stride, stage, groups):
+        """make shufflenet stage
+
+        Args:
+            block: block type, shuffle unit
+            out_channels: output depth channel number of this stage
+            num_blocks: how many blocks per stage
+            stride: the stride of the first block of this stage
+            stage: stage index
+            groups: group number of group convolution
+        Return:
+            return a shuffle net stage
+        """
+        strides = [stride] + [1] * (num_blocks - 1)
+
+        stage = []
+
+        for stride in strides:
+            stage.append(
+                block(
+                    self.input_channels,
+                    output_channels,
+                    stride=stride,
+                    stage=stage,
+                    groups=groups
+                )
+            )
+            self.input_channels = output_channels
+
+        return nn.Sequential(*stage)
+
+def shufflenet():
+    return ShuffleNet([4, 8, 4])
--- a/components/hand_keypoints/models/shufflenetv2.py
+++ b/components/hand_keypoints/models/shufflenetv2.py
+"""shufflenetv2 in pytorch
+
+
+
+[1] Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun
+
+    ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design
+    https://arxiv.org/abs/1807.11164
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def channel_split(x, split):
+    """split a tensor into two pieces along channel dimension
+    Args:
+        x: input tensor
+        split:(int) channel size for each pieces
+    """
+    assert x.size(1) == split * 2
+    return torch.split(x, split, dim=1)
+
+def channel_shuffle(x, groups):
+    """channel shuffle operation
+    Args:
+        x: input tensor
+        groups: input branch number
+    """
+
+    batch_size, channels, height, width = x.size()
+    channels_per_group = int(channels // groups)
+
+    x = x.view(batch_size, groups, channels_per_group, height, width)
+    x = x.transpose(1, 2).contiguous()
+    x = x.view(batch_size, -1, height, width)
+
+    return x
+
+class ShuffleUnit(nn.Module):
+
+    def __init__(self, in_channels, out_channels, stride):
+        super().__init__()
+
+        self.stride = stride
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+
+        if stride != 1 or in_channels != out_channels:
+            self.residual = nn.Sequential(
+                nn.Conv2d(in_channels, in_channels, 1),
+                nn.BatchNorm2d(in_channels),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
+                nn.BatchNorm2d(in_channels),
+                nn.Conv2d(in_channels, int(out_channels / 2), 1),
+                nn.BatchNorm2d(int(out_channels / 2)),
+                nn.ReLU(inplace=True)
+            )
+
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
+                nn.BatchNorm2d(in_channels),
+                nn.Conv2d(in_channels, int(out_channels / 2), 1),
+                nn.BatchNorm2d(int(out_channels / 2)),
+                nn.ReLU(inplace=True)
+            )
+        else:
+            self.shortcut = nn.Sequential()
+
+            in_channels = int(in_channels / 2)
+            self.residual = nn.Sequential(
+                nn.Conv2d(in_channels, in_channels, 1),
+                nn.BatchNorm2d(in_channels),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
+                nn.BatchNorm2d(in_channels),
+                nn.Conv2d(in_channels, in_channels, 1),
+                nn.BatchNorm2d(in_channels),
+                nn.ReLU(inplace=True)
+            )
+
+
+    def forward(self, x):
+
+        if self.stride == 1 and self.out_channels == self.in_channels:
+            shortcut, residual = channel_split(x, int(self.in_channels / 2))
+        else:
+            shortcut = x
+            residual = x
+
+        shortcut = self.shortcut(shortcut)
+        residual = self.residual(residual)
+        x = torch.cat([shortcut, residual], dim=1)
+        x = channel_shuffle(x, 2)
+
+        return x
+
+class ShuffleNetV2(nn.Module):
+
+    def __init__(self, ratio=1., num_classes=100, dropout_factor = 1.0):
+        super().__init__()
+        if ratio == 0.5:
+            out_channels = [48, 96, 192, 1024]
+        elif ratio == 1:
+            out_channels = [116, 232, 464, 1024]
+        elif ratio == 1.5:
+            out_channels = [176, 352, 704, 1024]
+        elif ratio == 2:
+            out_channels = [244, 488, 976, 2048]
+        else:
+            ValueError('unsupported ratio number')
+
+        self.pre = nn.Sequential(
+            nn.Conv2d(3, 24, 3, padding=1),
+            nn.BatchNorm2d(24)
+        )
+
+        self.stage2 = self._make_stage(24, out_channels[0], 3)
+        self.stage3 = self._make_stage(out_channels[0], out_channels[1], 7)
+        self.stage4 = self._make_stage(out_channels[1], out_channels[2], 3)
+        self.conv5 = nn.Sequential(
+            nn.Conv2d(out_channels[2], out_channels[3], 1),
+            nn.BatchNorm2d(out_channels[3]),
+            nn.ReLU(inplace=True)
+        )
+
+        self.fc = nn.Linear(out_channels[3], num_classes)
+
+        self.dropout = nn.Dropout(dropout_factor)
+
+    def forward(self, x):
+        x = self.pre(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = self.conv5(x)
+        x = F.adaptive_avg_pool2d(x, 1)
+        x = x.view(x.size(0), -1)
+        x = self.dropout(x)
+        x = self.fc(x)
+
+        return x
+
+    def _make_stage(self, in_channels, out_channels, repeat):
+        layers = []
+        layers.append(ShuffleUnit(in_channels, out_channels, 2))
+
+        while repeat:
+            layers.append(ShuffleUnit(out_channels, out_channels, 1))
+            repeat -= 1
+
+        return nn.Sequential(*layers)
+
+def shufflenetv2():
+    return ShuffleNetV2()
--- a/components/hand_keypoints/models/squeezenet.py
+++ b/components/hand_keypoints/models/squeezenet.py
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+import torch.utils.model_zoo as model_zoo
+
+
+__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1']
+
+
+model_urls = {
+    'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',
+    'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
+}
+
+
+class Fire(nn.Module):
+
+    def __init__(self, inplanes, squeeze_planes,
+                 expand1x1_planes, expand3x3_planes):
+        super(Fire, self).__init__()
+        self.inplanes = inplanes
+        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
+        self.squeeze_activation = nn.ReLU(inplace=True)
+        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
+                                   kernel_size=1)
+        self.expand1x1_activation = nn.ReLU(inplace=True)
+        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
+                                   kernel_size=3, padding=1)
+        self.expand3x3_activation = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.squeeze_activation(self.squeeze(x))
+        return torch.cat([
+            self.expand1x1_activation(self.expand1x1(x)),
+            self.expand3x3_activation(self.expand3x3(x))
+        ], 1)
+
+
+class SqueezeNet(nn.Module):
+
+    def __init__(self, version=1.0, num_classes=1000,dropout_factor = 1.):
+        super(SqueezeNet, self).__init__()
+        if version not in [1.0, 1.1]:
+            raise ValueError("Unsupported SqueezeNet version {version}:"
+                             "1.0 or 1.1 expected".format(version=version))
+        self.num_classes = num_classes
+        if version == 1.0:
+            self.features = nn.Sequential(
+                nn.Conv2d(3, 96, kernel_size=7, stride=2),
+                nn.ReLU(inplace=True),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+                Fire(96, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                Fire(128, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+                Fire(256, 32, 128, 128),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+                Fire(512, 64, 256, 256),
+            )
+        else:
+            self.features = nn.Sequential(
+                nn.Conv2d(3, 64, kernel_size=3, stride=2),
+                nn.ReLU(inplace=True),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+                Fire(64, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+                Fire(128, 32, 128, 128),
+                Fire(256, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                Fire(512, 64, 256, 256),
+            )
+        # Final convolution is initialized differently form the rest
+        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
+        self.classifier = nn.Sequential(
+            nn.Dropout(p=dropout_factor),
+            final_conv,
+            nn.ReLU(inplace=True),
+            nn.AdaptiveAvgPool2d(1)
+        )
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                if m is final_conv:
+                    init.normal(m.weight.data, mean=0.0, std=0.01)
+                else:
+                    init.kaiming_uniform(m.weight.data)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+
+    def forward(self, x):
+        x = self.features(x)
+        # print("features(x):", x.size())
+        x = self.classifier(x)
+        # print("features(x):", x.size())
+        return x.view(x.size(0), self.num_classes)
+
+
+def squeezenet1_0(pretrained=False, **kwargs):
+    r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level
+    accuracy with 50x fewer parameters and <0.5MB model size"
+    <https://arxiv.org/abs/1602.07360>`_ paper.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = SqueezeNet(version=1.0, **kwargs)
+    model_dict = model.state_dict()
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0'])
+        pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
+                       k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
+        model.load_state_dict(pretrained_dict,strict=False)
+    return model
+
+
+def squeezenet1_1(pretrained=False, **kwargs):
+    r"""SqueezeNet 1.1 model from the `official SqueezeNet repo
+    <https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1>`_.
+    SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
+    than SqueezeNet 1.0, without sacrificing accuracy.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = SqueezeNet(version=1.1, **kwargs)
+    model_dict = model.state_dict()
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0'])
+        pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
+                       k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
+        model.load_state_dict(pretrained_dict,strict=False)
+    return model
+
+
+if __name__ == "__main__":
+    from thop import profile
+    dummy = torch.from_numpy(np.random.random([16, 3, 256, 256]).astype(np.float32))
+    model = squeezenet1_0(pretrained=True, num_classes=42,dropout_factor = 0.5)
+    print(model)
+    flops, params = profile(model, inputs=(dummy, ))
+    model.eval()
+    output = model(dummy)
+    print(output.size())
+    print("flops: {}, params: {}".format(flops, params))
--- a/components/hand_keypoints/utils/common_utils.py
+++ b/components/hand_keypoints/utils/common_utils.py
+#-*-coding:utf-8-*-
+# date:2020-04-11
+# Author: Eric.Lee
+# function: common utils
+
+import os
+import shutil
+import cv2
+import numpy as np
+import json
+
+def mkdir_(path, flag_rm=False):
+    if os.path.exists(path):
+        if flag_rm == True:
+            shutil.rmtree(path)
+            os.mkdir(path)
+            print('remove {} done ~ '.format(path))
+    else:
+        os.mkdir(path)
+
+def plot_box(bbox, img, color=None, label=None, line_thickness=None):
+    tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox
+    if label:
+        tf = max(tl - 2, 1)
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox
+        cv2.rectangle(img, c1, c2, color, -1)  # label 矩形填充
+        # 文本绘制
+        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA)
+
+class JSON_Encoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.integer):
+            return int(obj)
+        elif isinstance(obj, np.floating):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        else:
+            return super(JSON_Encoder, self).default(obj)
+
+def draw_landmarks(img,output,draw_circle):
+    img_width = img.shape[1]
+    img_height = img.shape[0]
+    dict_landmarks = {}
+    for i in range(int(output.shape[0]/2)):
+        x = output[i*2+0]*float(img_width)
+        y = output[i*2+1]*float(img_height)
+        if 41>= i >=33:
+            if 'left_eyebrow' not in dict_landmarks.keys():
+                dict_landmarks['left_eyebrow'] = []
+            dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
+        elif 50>= i >=42:
+            if 'right_eyebrow' not in dict_landmarks.keys():
+                dict_landmarks['right_eyebrow'] = []
+            dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
+        elif 67>= i >=60:
+            if 'left_eye' not in dict_landmarks.keys():
+                dict_landmarks['left_eye'] = []
+            dict_landmarks['left_eye'].append([int(x),int(y),(255,0,255)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
+        elif 75>= i >=68:
+            if 'right_eye' not in dict_landmarks.keys():
+                dict_landmarks['right_eye'] = []
+            dict_landmarks['right_eye'].append([int(x),int(y),(255,0,255)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
+        elif 97>= i >=96:
+            cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1)
+        elif 54>= i >=51:
+            if 'bridge_nose' not in dict_landmarks.keys():
+                dict_landmarks['bridge_nose'] = []
+            dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1)
+        elif 32>= i >=0:
+            if 'basin' not in dict_landmarks.keys():
+                dict_landmarks['basin'] = []
+            dict_landmarks['basin'].append([int(x),int(y),(255,30,30)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1)
+        elif 59>= i >=55:
+            if 'wing_nose' not in dict_landmarks.keys():
+                dict_landmarks['wing_nose'] = []
+            dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1)
+        elif 87>= i >=76:
+            if 'out_lip' not in dict_landmarks.keys():
+                dict_landmarks['out_lip'] = []
+            dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1)
+        elif 95>= i >=88:
+            if 'in_lip' not in dict_landmarks.keys():
+                dict_landmarks['in_lip'] = []
+            dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)])
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1)
+        else:
+            if draw_circle:
+                cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
+
+    return dict_landmarks
+
+def draw_contour(image,dict):
+    for key in dict.keys():
+        # print(key)
+        _,_,color = dict[key][0]
+
+        if 'basin' == key or 'wing_nose' == key:
+            pts = np.array([[dict[key][i][0],dict[key][i][1]] for i in range(len(dict[key]))],np.int32)
+            # print(pts)
+            cv2.polylines(image,[pts],False,color)
+
+        else:
+            points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32)
+            for i in range(len(dict[key])):
+                x,y,_ = dict[key][i]
+                points_array[0,i,0] = x
+                points_array[0,i,1] = y
+
+            # cv2.fillPoly(image, points_array, color)
+            cv2.drawContours(image,points_array,-1,color,thickness=1)
--- a/components/hand_keypoints/utils/model_utils.py
+++ b/components/hand_keypoints/utils/model_utils.py
+#-*-coding:utf-8-*-
+# date:2020-04-11
+# Author: Eric.Lee
+# function: model utils
+
+import os
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import random
+
+def get_acc(output, label):
+    total = output.shape[0]
+    _, pred_label = output.max(1)
+    num_correct = (pred_label == label).sum().item()
+    return num_correct / float(total)
+
+def set_learning_rate(optimizer, lr):
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+def set_seed(seed = 666):
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        cudnn.deterministic = True
+
+def split_trainval_datasets(ops):
+    print(' --------------->>> split_trainval_datasets ')
+    train_split_datasets = []
+    train_split_datasets_label = []
+
+    val_split_datasets = []
+    val_split_datasets_label = []
+    for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('.')[0]), reverse=False)):
+        # print(' %s label is %s \n'%(doc,idx))
+
+        data_list = os.listdir(ops.train_path+doc)
+        random.shuffle(data_list)
+
+        cal_split_num = int(len(data_list)*ops.val_factor)
+
+        for i,file in enumerate(data_list):
+            if '.jpg' in file:
+                if i < cal_split_num:
+                    val_split_datasets.append(ops.train_path+doc + '/' + file)
+                    val_split_datasets_label.append(idx)
+                else:
+                    train_split_datasets.append(ops.train_path+doc + '/' + file)
+                    train_split_datasets_label.append(idx)
+
+                print(ops.train_path+doc + '/' + file,idx)
+
+    print('\n')
+    print('train_split_datasets len {}'.format(len(train_split_datasets)))
+    print('val_split_datasets len {}'.format(len(val_split_datasets)))
+
+    return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label
--- a/lib/hand_lib/cfg/handpose.cfg
+++ b/lib/hand_lib/cfg/handpose.cfg
+detect_model_path=./components/hand_detect/weights/latest_416-2021-02-19.pt
+detect_model_arch=yolo_v3
+detect_conf_thres=0.5
+detect_nms_thres=0.45
+
+handpose_x_model_path=./components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth
+handpose_x_model_arch=rexnetv1
+
+camera_id = 0
+vis_gesture_lines = True
+charge_cycle_step = 32
--- a/lib/hand_lib/cores/hand_pnp.py
+++ b/lib/hand_lib/cores/hand_pnp.py
+#-*-coding:utf-8-*-
+'''
+ DpCas-Light
+||||      |||||        ||||         ||       |||||||
+||  ||    ||   ||    ||    ||      ||||     ||     ||
+||    ||  ||    ||  ||      ||    ||  ||     ||
+||    ||  ||   ||   ||           ||====||     ||||||
+||    ||  |||||     ||      ||  ||======||         ||
+||  ||    ||         ||    ||  ||        ||  ||     ||
+||||      ||           ||||   ||          ||  |||||||
+
+/--------------------- HandPose_X ---------------------/
+'''
+# date:2019-12-10
+# Author: Eric.Lee
+# function: handpose :rotation & translation
+
+import cv2
+import numpy as np
+# 人脸外轮廓
+def get_face_outline(img_crop,face_crop_region,obj_crop_points,face_w,face_h):
+    face_mask = np.zeros((1,27,2),dtype = np.int32)
+    for m in range(obj_crop_points.shape[0]):
+        if m <=16:
+            x = int(face_crop_region[0]+obj_crop_points[m][0]*face_w)
+            y = int(face_crop_region[1]+obj_crop_points[m][1]*face_h)
+            # face_mask.append((x,y))
+            face_mask[0,m,0]=x
+            face_mask[0,m,1]=y
+
+    for k in range(16,26):
+        m = 42-k
+        x = int(face_crop_region[0]+obj_crop_points[m][0]*face_w)
+        y = int(face_crop_region[1]+obj_crop_points[m][1]*face_h)
+        # face_mask.append((x,y))
+        face_mask[0,k+1,0]=x
+        face_mask[0,k+1,1]=y
+        # print(x,y)
+    return face_mask
+
+# 人脸公共模型三维坐标
+object_pts = np.float32([
+                         [0., 0.4,0.],#掌心
+                         [0., 5.,0.],#hand 根部
+                         # [-2, 2.5,0.],#thumb 第一指节
+                         # [-4, 0.5,0.],#thumb 第二指节
+                         [-2.7, -4.5, 0.],# index 根部
+                         [0., -5., 0.],# middle 根部
+                         [2.6, -4., 0.], # ring 根部
+                         [5.2, -3., 0.],# pink 根部
+                         ]
+                         )
+
+# object_pts = np.float32([[-2.5, -7.45, 0.5],# pink 根部
+#
+#                          [-1.2, -7.45, 0.5], # ring 根部
+#
+#
+#                          [1.2, -7.5, 0.5],# middle 根部
+#
+#                          [2.5, -7.45, 0.5],# index 根部
+#                          [4.2, -3.45, 0.5],# thumb 第二指节
+#                          [2.5, -2.0, 0.5],# thumb 根部
+#                          [0.00, -0.0,0.5],#hand 根部
+#                          ]
+#                          )
+
+# xyz 立体矩形框
+# reprojectsrc = np.float32([[3.0, 11.0, 2.0],
+#                            [3.0, 11.0, -4.0],
+#                            [3.0, -7.0, -4.0],
+#                            [3.0, -7.0, 2.0],
+#                            [-3.0, 11.0, 2.0],
+#                            [-3.0, 11.0, -4.0],
+#                            [-3.0, -7.0, -4.0],
+#                            [-3.0, -7.0, 2.0]])
+
+reprojectsrc = np.float32([[5.0, 8.0, 2.0],
+                           [5.0, 8.0, -2.0],
+                           [5.0, -8.0, -2.0],
+                           [5.0, -8.0, 2.0],
+                           [-5.0, 8.0, 2.0],
+                           [-5.0, 8.0, -2.0],
+                           [-5.0, -8.0, -2.0],
+                           [-5.0, -8.0, 2.0]])
+
+# reprojectsrc = np.float32([[6.0, 4.0, 2.0],
+#                            [6.0, 4.0, -4.0],
+#                            [6.0, -3.0, -4.0],
+#                            [6.0, -3.0, 2.0],
+#                            [-6.0, 4.0, 2.0],
+#                            [-6.0, 4.0, -4.0],
+#                            [-6.0, -3.0, -4.0],
+#                            [-6.0, -3.0, 2.0]])
+
+# reprojectsrc = np.float32([[6.0, 6.0, 6.0],
+#                            [6.0, 6.0, -6.0],
+#                            [6.0, -6.0, -6.0],
+#                            [6.0, -6.0, 6.0],
+#                            [-6.0, 6.0, 6.0],
+#                            [-6.0, 6.0, -6.0],
+#                            [-6.0, -6.0, -6.0],
+#                            [-6.0, -6.0, 6.0]])
+
+# 立体矩形框连线，连接组合
+line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0],
+              [4, 5], [5, 6], [6, 7], [7, 4],
+              [0, 4], [1, 5], [2, 6], [3, 7]]
+
+
+def get_hand_pose(shape,img,vis = True):
+    h,w,_=img.shape
+    K = [w, 0.0, w//2,
+         0.0, w, h//2,
+         0.0, 0.0, 1.0]
+    # Assuming no lens distortion
+    D = [0, 0, 0.0, 0.0, 0]
+
+    cam_matrix = np.array(K).reshape(3, 3).astype(np.float32)# 相机矩阵
+    # dist_coeffs = np.array(D).reshape(5, 1).astype(np.float32)#相机畸变矩阵，默认无畸变
+    dist_coeffs = np.float32([0.0, 0.0, 0.0, 0.0, 0.0])
+    # 选取的人脸关键点的二维图像坐标
+    # image_pts = np.float32([shape[17], shape[21], shape[22], shape[26], shape[36],
+    #                         shape[39], shape[42], shape[45],
+    #                         shape[27],shape[31], shape[35],shape[30],shape[33]])
+
+    image_pts = np.float32([shape[0], shape[1], shape[2], shape[3], shape[4], shape[5]
+                            ]
+                            )
+
+    # PNP 计算图像二维和三维实际关系，获得旋转和偏移矩阵
+    _, rotation_vec, translation_vec = cv2.solvePnP(object_pts, image_pts, cam_matrix, dist_coeffs)
+    # _, rotation_vec, translation_vec = cv2.solvePnPRansac(object_pts, image_pts, cam_matrix, dist_coeffs)
+
+
+    # print("translation_vec:",translation_vec)
+    #print('translation_vec : {}'.format(translation_vec))
+
+    # 映射矩形框
+    reprojectdst, _ = cv2.projectPoints(reprojectsrc, rotation_vec, translation_vec, cam_matrix,dist_coeffs)
+
+    reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2)))
+
+    # calc euler angle
+    rotation_mat, _ = cv2.Rodrigues(rotation_vec)#旋转向量转为旋转矩阵
+    pose_mat = cv2.hconcat((rotation_mat, translation_vec))# 拼接操作 旋转 + 偏移
+    _, _, _, _, _, _, euler_angle = cv2.decomposeProjectionMatrix(pose_mat)#欧拉角估计
+
+    if vis:
+        for i,line_pair in enumerate(line_pairs):# 显示立体矩形框
+            x1 = int(reprojectdst[line_pair[0]][0])
+            y1 = int(reprojectdst[line_pair[0]][1])
+
+            x2 = int(reprojectdst[line_pair[1]][0])
+            y2 = int(reprojectdst[line_pair[1]][1])
+
+            if line_pair[0] in [0,3,4,7] and line_pair[1] in [0,3,4,7]:
+                cv2.line(img,(x1,y1),(x2,y2),(255,0,0),2)
+            elif line_pair[0] in [1,2,5,6] and line_pair[1] in [1,2,5,6]:
+                cv2.line(img,(x1,y1),(x2,y2),(250,150,0),2)
+            else:
+                cv2.line(img,(x1,y1),(x2,y2),(0,90,255),2)
+
+    return reprojectdst, euler_angle,translation_vec
--- a/lib/hand_lib/cores/handpose_fuction.py
+++ b/lib/hand_lib/cores/handpose_fuction.py
--- a/lib/hand_lib/cores/tracking_utils.py
+++ b/lib/hand_lib/cores/tracking_utils.py
+#-*-coding:utf-8-*-
+'''
+ DpCas-Light
+||||      |||||        ||||         ||       |||||||
+||  ||    ||   ||    ||    ||      ||||     ||     ||
+||    ||  ||    ||  ||      ||    ||  ||     ||
+||    ||  ||   ||   ||           ||====||     ||||||
+||    ||  |||||     ||      ||  ||======||         ||
+||  ||    ||         ||    ||  ||        ||  ||     ||
+||||      ||           ||||   ||          ||  |||||||
+
+/--------------------- HandPose_X ---------------------/
+'''
+import copy
+def compute_iou_tk(rec1, rec2):
+    """
+    computing IoU
+    :param rec1: (y0, x0, y1, x1), which reflects
+            (top, left, bottom, right)
+    :param rec2: (y0, x0, y1, x1)
+    :return: scala value of IoU
+    """
+    # computing area of each rectangles
+
+    S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
+    S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
+
+    # computing the sum_area
+    sum_area = S_rec1 + S_rec2
+
+    # find the each edge of intersect rectangle
+    left_line = max(rec1[1], rec2[1])
+    right_line = min(rec1[3], rec2[3])
+    top_line = max(rec1[0], rec2[0])
+    bottom_line = min(rec1[2], rec2[2])
+
+    # judge if there is an intersect
+    if left_line >= right_line or top_line >= bottom_line:
+        return 0.
+    else:
+        intersect = (right_line - left_line) * (bottom_line - top_line)
+        return (intersect / (sum_area - intersect)) * 1.0
+
+def tracking_bbox(data,hand_dict,index,iou_thr = 0.5):
+
+    track_index = index
+    reg_dict = {}
+    Flag_ = True if hand_dict else False
+    if Flag_ == False:
+        # print("------------------->>. False")
+        for bbox in data:
+            x_min,y_min,x_max,y_max,score = bbox
+            reg_dict[track_index] = (x_min,y_min,x_max,y_max,score,0.,1,1)
+            track_index += 1
+
+            if track_index >= 65535:
+                track_index = 0
+    else:
+        # print("------------------->>. True ")
+        for bbox in data:
+            xa0,ya0,xa1,ya1,score = bbox
+            is_track = False
+            for k_ in hand_dict.keys():
+                xb0,yb0,xb1,yb1,_,_,cnt_,bbox_stanbel_cnt = hand_dict[k_]
+
+                iou_ = compute_iou_tk((ya0,xa0,ya1,xa1),(yb0,xb0,yb1,xb1))
+                # print((ya0,xa0,ya1,xa1),(yb0,xb0,yb1,xb1))
+                # print("iou : ",iou_)
+                if iou_ > iou_thr: # 跟踪成功目标
+                    UI_CNT = 1
+                    if iou_ > 0.888:
+                        UI_CNT = bbox_stanbel_cnt + 1
+                    reg_dict[k_] = (xa0,ya0,xa1,ya1,score,iou_,cnt_ + 1,UI_CNT)
+                    is_track = True
+                    # print("is_track : " ,cnt_ + 1)
+            if is_track == False: # 新目标
+                reg_dict[track_index] = (xa0,ya0,xa1,ya1,score,0.,1,1)
+                track_index += 1
+                if track_index >=65535: #索引越界归零
+                    track_index = 0
+
+                if track_index>=100:
+                    track_index = 0
+
+    hand_dict = copy.deepcopy(reg_dict)
+
+    # print("a:",hand_dict)
+
+    return hand_dict,track_index
--- a/lib/hand_lib/utils/utils.py
+++ b/lib/hand_lib/utils/utils.py
+import os
+
+"""Parses the data configuration file"""
+def parse_data_cfg(path):
+    print('data_cfg ： ',path)
+    options = dict()
+    with open(path, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.strip()
+        if line == '' or line.startswith('#'):
+            continue
+        key, value = line.split('=')
+        options[key.strip()] = value.strip()
+    return options
--- a/main.py
+++ b/main.py
--- a/materials/audio/sentences/IdentifyingObjectsWait.mp3
+++ b/materials/audio/sentences/IdentifyingObjectsWait.mp3
--- a/materials/audio/sentences/ObjectMayBeIdentified.mp3
+++ b/materials/audio/sentences/ObjectMayBeIdentified.mp3