From 4b6c84295e7aef502fd1e5e1c35d6f14e0d437dd Mon Sep 17 00:00:00 2001 From: "Eric.Lee2021" <305141918@qq.com> Date: Sat, 19 Jun 2021 20:14:17 +0800 Subject: [PATCH] add inference components --- components/hand_detect/acc_model.py | 243 +++++++++ components/hand_detect/utils/__init__.py | 0 components/hand_detect/utils/torch_utils.py | 24 + components/hand_detect/yolo_v3_hand.py | 329 ++++++++++++ components/hand_detect/yolov3.py | 505 ++++++++++++++++++ components/hand_keypoints/handpose_x.py | 146 +++++ .../hand_keypoints/models/mobilenetv2.py | 105 ++++ components/hand_keypoints/models/my_model.py | 67 +++ components/hand_keypoints/models/resnet.py | 263 +++++++++ components/hand_keypoints/models/resnet_50.py | 194 +++++++ components/hand_keypoints/models/rexnetv1.py | 183 +++++++ .../hand_keypoints/models/shufflenet.py | 254 +++++++++ .../hand_keypoints/models/shufflenetv2.py | 157 ++++++ .../hand_keypoints/models/squeezenet.py | 153 ++++++ .../hand_keypoints/utils/common_utils.py | 132 +++++ .../hand_keypoints/utils/model_utils.py | 61 +++ 16 files changed, 2816 insertions(+) create mode 100644 components/hand_detect/acc_model.py create mode 100644 components/hand_detect/utils/__init__.py create mode 100644 components/hand_detect/utils/torch_utils.py create mode 100644 components/hand_detect/yolo_v3_hand.py create mode 100644 components/hand_detect/yolov3.py create mode 100644 components/hand_keypoints/handpose_x.py create mode 100644 components/hand_keypoints/models/mobilenetv2.py create mode 100644 components/hand_keypoints/models/my_model.py create mode 100644 components/hand_keypoints/models/resnet.py create mode 100644 components/hand_keypoints/models/resnet_50.py create mode 100644 components/hand_keypoints/models/rexnetv1.py create mode 100644 components/hand_keypoints/models/shufflenet.py create mode 100644 components/hand_keypoints/models/shufflenetv2.py create mode 100644 components/hand_keypoints/models/squeezenet.py create mode 100644 components/hand_keypoints/utils/common_utils.py create mode 100644 components/hand_keypoints/utils/model_utils.py diff --git a/components/hand_detect/acc_model.py b/components/hand_detect/acc_model.py new file mode 100644 index 0000000..f2a8658 --- /dev/null +++ b/components/hand_detect/acc_model.py @@ -0,0 +1,243 @@ +import torch +import torch.nn as nn +import torchvision +import time +import numpy as np +import sys + +def get_model_op(model_,print_flag = False): + # print('/********************* modules *******************/') + op_dict = {} + idx = 0 + for m in model_.modules(): + idx += 1 + if isinstance(m, nn.Conv2d): + if 'Conv2d' not in op_dict.keys(): + op_dict['Conv2d'] = 1 + else: + op_dict['Conv2d'] += 1 + if print_flag: + print('{}) {}'.format(idx,m)) + pass + elif isinstance(m, nn.BatchNorm2d): + if 'BatchNorm2d' not in op_dict.keys(): + op_dict['BatchNorm2d'] = 1 + else: + op_dict['BatchNorm2d'] += 1 + if print_flag: + print('{}) {}'.format(idx,m)) + pass + elif isinstance(m, nn.Linear): + if 'Linear' not in op_dict.keys(): + op_dict['Linear'] = 1 + else: + op_dict['Linear'] += 1 + if print_flag: + print('{}) {}'.format(idx,m)) + pass + elif isinstance(m, nn.Sequential): + if print_flag: + print('*******************{}) {}'.format(idx,m)) + for n in m: + if print_flag: + print('{}) {}'.format(idx,n)) + if 'Conv2d' not in op_dict.keys(): + op_dict['Conv2d'] = 1 + else: + op_dict['Conv2d'] += 1 + if 'BatchNorm2d' not in op_dict.keys(): + op_dict['BatchNorm2d'] = 1 + else: + op_dict['BatchNorm2d'] += 1 + if 'Linear' not in op_dict.keys(): + op_dict['Linear'] = 1 + else: + op_dict['Linear'] += 1 + if 'ReLU6' not in op_dict.keys(): + op_dict['ReLU6'] = 1 + else: + op_dict['ReLU6'] += 1 + pass + elif isinstance(m, nn.ReLU6): + if print_flag: + print('{}) {}'.format(idx,m)) + if 'ReLU6' not in op_dict.keys(): + op_dict['ReLU6'] = 1 + else: + op_dict['ReLU6'] += 1 + pass + elif isinstance(m, nn.Module): + if print_flag: + print('{}) {}'.format(idx,m)) + for n in m.modules(): + if isinstance(n, nn.Conv2d): + if print_flag: + print('{}) {}'.format(idx,n)) + if 'Conv2d' not in op_dict.keys(): + op_dict['Conv2d'] = 1 + else: + op_dict['Conv2d'] += 1 + if 'BatchNorm2d' not in op_dict.keys(): + op_dict['BatchNorm2d'] = 1 + else: + op_dict['BatchNorm2d'] += 1 + if 'Linear' not in op_dict.keys(): + op_dict['Linear'] = 1 + else: + op_dict['Linear'] += 1 + if 'ReLU6' not in op_dict.keys(): + op_dict['ReLU6'] = 1 + else: + op_dict['ReLU6'] += 1 + pass + pass + + else: + if print_flag: + print('{}) {}'.format(idx,m)) + pass + + # print('\n/********************** {} ********************/\n'.format(ops.network)) + for key in op_dict.keys(): + if print_flag: + print(' operation - {} : {}'.format(key,op_dict[key])) + +class DummyModule(nn.Module): + def __init__(self): + super(DummyModule, self).__init__() + + def forward(self, x): + return x + +def fuse(conv, bn): + # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + with torch.no_grad(): + # init + if isinstance(conv, nn.Conv2d): + fusedconv = torch.nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + bias=True) + elif isinstance(conv, nn.ConvTranspose2d): # not supprot nn.ConvTranspose2d + fusedconv = nn.ConvTranspose2d( + conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + output_padding=conv.output_padding, + bias=True) + else: + print("error") + exit() + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) + + # prepare spatial bias + if conv.bias is not None: + b_conv = conv.bias + #b_conv = conv.bias.mul(bn.weight.div(torch.sqrt(bn.running_var + bn.eps))) # maybe, you should this one ? + else: + b_conv = torch.zeros(conv.weight.size(0)) + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(b_conv + b_bn) + + return fusedconv + +# idxx = 0 +def fuse_module(m): + # global idxx + children = list(m.named_children()) + c = None + cn = None + + for name, child in children: + # idxx += 1 + # print('-------------->>',idxx) + # if idxx%10==0: + # continue + # print("name {}, child {}".format(name, child)) + if isinstance(child, nn.BatchNorm2d) and c is not None: + bc = fuse(c, child) + m._modules[cn] = bc + # print('DummyModule() : ',DummyModule()) + m._modules[name] = DummyModule() + c = None + elif isinstance(child, nn.Conv2d): + c = child + cn = name + else: + fuse_module(child) + +def test_net(ops,m): + + use_cuda = torch.cuda.is_available() + use_cpu = False + if ops.force_cpu or use_cuda == False: + p = torch.randn([1, 3, 256, 256]) + device = torch.device("cpu") + use_cpu = True + else: + p = torch.randn([1, 3, 256, 256]).cuda() + device = torch.device("cuda:0") + + count = 50 + time_org = [] + m_o = m.to(device) + get_model_op(m_o) + # print(m) + for i in range(count): + s1 = time.time() + if use_cpu: + o_output = m_o(p) + else: + o_output = m_o(p).cpu() + s2 = time.time() + time_org.append(s2 - s1) + print("Original time: ", s2 - s1) + print('------------------------------------>>>>') + + fuse_module(m.to(torch.device("cpu"))) + + # print(m) + + m_f = m.to(device) + get_model_op(m_f) + + time_fuse = [] + for i in range(count): + s1 = time.time() + if use_cpu: + f_output = m_f(p) + else: + f_output = m_f(p).cpu() + s2 = time.time() + time_fuse.append(s2 - s1) + print("Fused time: ", s2 - s1) + + print("-" * 50) + print("org time:", np.mean(time_org)) + print("fuse time:", np.mean(time_fuse)) + for o in o_output: + print("org size:", o.size()) + for o in f_output: + print("fuse size:", o.size()) + for i in range(len(o_output)): + assert o_output[i].size()==f_output[i].size() + print("output[{}] max abs diff: {}".format(i, (o_output[i] - f_output[i]).abs().max().item())) + print("output[{}] MSE diff: {}".format(i, nn.MSELoss()(o_output[i], f_output[i]).item())) + + +def acc_model(ops,m): + # print('\n-------------------------------->>> before acc model') + get_model_op(m) + fuse_module(m) + # print('\n-------------------------------->>> after acc model') + get_model_op(m) + + return m diff --git a/components/hand_detect/utils/__init__.py b/components/hand_detect/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/components/hand_detect/utils/torch_utils.py b/components/hand_detect/utils/torch_utils.py new file mode 100644 index 0000000..2c7352e --- /dev/null +++ b/components/hand_detect/utils/torch_utils.py @@ -0,0 +1,24 @@ +import torch + +def init_seeds(seed=0): + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + +def select_device(force_cpu=False): + if force_cpu: + cuda = False + device = torch.device('cpu') + else: + cuda = torch.cuda.is_available() + device = torch.device('cuda:0' if cuda else 'cpu') + + if torch.cuda.device_count() > 1: + device = torch.device('cuda' if cuda else 'cpu') + # print('Found %g GPUs' % torch.cuda.device_count()) + # print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21') + # torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available + # print('Using ', torch.cuda.device_count(), ' GPUs') + + # print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else '')) + return device diff --git a/components/hand_detect/yolo_v3_hand.py b/components/hand_detect/yolo_v3_hand.py new file mode 100644 index 0000000..99d9255 --- /dev/null +++ b/components/hand_detect/yolo_v3_hand.py @@ -0,0 +1,329 @@ +#-*-coding:utf-8-*- +# date:2021-03-09 +# Author: Eric.Lee +# function: yolo v3 hand detect + +import os +import cv2 +import numpy as np +import time + +import torch + +from hand_detect.yolov3 import Yolov3, Yolov3Tiny +from hand_detect.utils.torch_utils import select_device +from hand_detect.acc_model import acc_model + +import torch.backends.cudnn as cudnn +import torch.nn.functional as F + + +import random + +def show_model_param(model): + params = list(model.parameters()) + k = 0 + for i in params: + l = 1 + for j in i.size(): + l *= j + print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l))) + k = k + l + print("----------------------") + print("总参数数量和: " + str(k)) + +def process_data(img, img_size=416):# 图像预处理 + img, _, _, _ = letterbox(img, height=img_size) + # Normalize RG25 + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB + img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + return img + +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + # Plots one bounding box on image img + tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1) # filled + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [255, 55,90], thickness=tf, lineType=cv2.LINE_AA) + +def bbox_iou(box1, box2, x1y1x2y2=True): + # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 + box2 = box2.t() + + # Get the coordinates of bounding boxes + if x1y1x2y2: + # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + else: + # x, y, w, h = box1 + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + + # Intersection area + inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ + (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + + # Union Area + union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \ + (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area + + return inter_area / union_area # iou + +def xywh2xyxy(x): + # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y + + +def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸 + # Rescale x1, y1, x2, y2 from 416 to image size + # print('coords : ',coords) + # print('img0_shape : ',img0_shape) + gain = float(img_size) / max(img0_shape) # gain = old / new + # print('gain : ',gain) + pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding + pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding + # print('pad_xpad_y : ',pad_x,pad_y) + coords[:, [0, 2]] -= pad_x + coords[:, [1, 3]] -= pad_y + coords[:, :4] /= gain + coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数 + return coords + +def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): + """ + Removes detections with lower object confidence score than 'conf_thres' + Non-Maximum Suppression to further filter detections. + Returns detections with shape: + (x1, y1, x2, y2, object_conf, class_conf, class) + """ + + min_wh = 2 # (pixels) minimum box width and height + + output = [None] * len(prediction) + for image_i, pred in enumerate(prediction): + # Experiment: Prior class size rejection + # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] + # a = w * h # area + # ar = w / (h + 1e-16) # aspect ratio + # n = len(w) + # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar) + # shape_likelihood = np.zeros((n, 60), dtype=np.float32) + # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1) + # from scipy.stats import multivariate_normal + # for c in range(60): + # shape_likelihood[:, c] = + # multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2]) + + # Filter out confidence scores below threshold + class_conf, class_pred = pred[:, 5:].max(1) # max class_conf, index + pred[:, 4] *= class_conf # finall conf = obj_conf * class_conf + + i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh) + # s2=time.time() + pred2 = pred[i] + # print("++++++pred2 = pred[i]",time.time()-s2, pred2) + + # If none are remaining => process next image + if len(pred2) == 0: + continue + + # Select predicted classes + class_conf = class_conf[i] + class_pred = class_pred[i].unsqueeze(1).float() + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + pred2[:, :4] = xywh2xyxy(pred2[:, :4]) + # pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551 + + # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred) + pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1) + + # Get detections sorted by decreasing confidence scores + pred2 = pred2[(-pred2[:, 4]).argsort()] + + det_max = [] + nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental) + for c in pred2[:, -1].unique(): + dc = pred2[pred2[:, -1] == c] # select class c + dc = dc[:min(len(dc), 100)] # limit to first 100 boxes + + # Non-maximum suppression + if nms_style == 'OR': # default + # METHOD1 + # ind = list(range(len(dc))) + # while len(ind): + # j = ind[0] + # det_max.append(dc[j:j + 1]) # save highest conf detection + # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero() + # [ind.pop(i) for i in reversed(reject)] + + # METHOD2 + while dc.shape[0]: + det_max.append(dc[:1]) # save highest conf detection + if len(dc) == 1: # Stop if we're at the last detection + break + iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes + dc = dc[1:][iou < nms_thres] # remove ious > threshold + + elif nms_style == 'AND': # requires overlap, single boxes erased + while len(dc) > 1: + iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes + if iou.max() > 0.5: + det_max.append(dc[:1]) + dc = dc[1:][iou < nms_thres] # remove ious > threshold + + elif nms_style == 'MERGE': # weighted mixture box + while len(dc): + i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes + weights = dc[i, 4:5] + dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum() + det_max.append(dc[:1]) + dc = dc[i == 0] + + if len(det_max): + det_max = torch.cat(det_max) # concatenate + output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort + return output + +def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)): + # Resize a rectangular image to a padded square + shape = img.shape[:2] # shape = [height, width] + ratio = float(height) / max(shape) # ratio = old / new + new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) + dw = (height - new_shape[0]) / 2 # width padding + dh = (height - new_shape[1]) / 2 # height padding + top, bottom = round(dh - 0.1), round(dh + 0.1) + left, right = round(dw - 0.1), round(dw + 0.1) + # resize img + if augment: + interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, + None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, + cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]) + if interpolation is None: + img = cv2.resize(img, new_shape) + else: + img = cv2.resize(img, new_shape, interpolation=interpolation) + else: + img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST) + # print("resize time:",time.time()-s1) + + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square + return img, ratio, dw, dh +#--------------------------------------------------------- +# model_path = './coco_model/yolov3_coco.pt' # 检测模型路径 +# root_path = './test_images/'# 测试文件夹 +# model_arch = 'yolov3' # 模型类型 +# voc_config = 'cfg/voc.data' # 模型相关配置文件 +# img_size = 416 # 图像尺寸 +# conf_thres = 0.35# 检测置信度 +# nms_thres = 0.5 # nms 阈值 +class yolo_v3_hand_model(object): + def __init__(self, + model_path = './components/hand_detect/weights/hand_416-20210606.pt', + model_arch = 'yolov3', + yolo_anchor_scale = 1., + img_size=416, + conf_thres=0.55, + nms_thres=0.4, + model_half = False, + ): + print("yolo v3 hand_model loading : {}".format(model_path)) + self.use_cuda = torch.cuda.is_available() + self.device = torch.device("cuda:0" if self.use_cuda else "cpu") + self.img_size = img_size + self.classes = ["Hand"] + self.num_classes = len(self.classes) + self.conf_thres = conf_thres + self.nms_thres = nms_thres + self.model_half = model_half + #----------------------------------------------------------------------- + weights = model_path + if "tiny" in model_arch: + a_scalse = 416./img_size*yolo_anchor_scale + anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)] + anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] + + model = Yolov3Tiny(self.num_classes,anchors = anchors_new) + else: + a_scalse = 416./img_size + anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] + anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] + model = Yolov3(self.num_classes,anchors = anchors_new) + #----------------------------------------------------------------------- + + self.model = model + # show_model_param(self.model)# 显示模型参数 + + # print('num_classes : ',self.num_classes) + + self.device = select_device() # 运行硬件选择 + self.use_cuda = torch.cuda.is_available() + # Load weights + if os.access(weights,os.F_OK):# 判断模型文件是否存在 + self.model.load_state_dict(torch.load(weights, map_location=lambda storage, loc: storage)['model']) + else: + print('------- >>> error : model not exists') + return False + # + self.model.eval()#模型设置为 eval + acc_model('',self.model) + self.model = self.model.to(self.device) + + if model_half: + self.model = self.model.to(self.device).half() + + if self.use_cuda: + self.model = self.model.cuda() + + def predict(self, img_,vis): + with torch.no_grad(): + t = time.time() + img = process_data(img_, self.img_size) + t1 = time.time() + img = torch.from_numpy(img).unsqueeze(0).to(self.device) + if self.model_half: + img=img.type(torch.HalfTensor) + if self.use_cuda: + img = img.cuda() + pred, _ = self.model(img)#图片检测 + + t2 = time.time() + # detections = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0] # nms + detections = non_max_suppression(pred.float(), self.conf_thres, self.nms_thres)[0] # nms + t3 = time.time() + # print("t3 time:", t3) + + if (detections is None) or len(detections) == 0: + return [] + # Rescale boxes from 416 to true image size + detections[:, :4] = scale_coords(self.img_size, detections[:, :4], img_.shape).round() + # 绘制检测结果 :detect reslut + dets_for_landmarks = [] + colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, 10 + 1)][::-1] + + output_dict_ = [] + for *xyxy, conf, cls_conf, cls in detections: + label = '%s %.2f' % (self.classes[0], conf) + x1,y1,x2,y2 = xyxy + output_dict_.append((float(x1),float(y1),float(x2),float(y2),float(conf.item()))) + if vis: + plot_one_box(xyxy, img_, label=label, color=(0,175,255), line_thickness = 2) + if vis: + cv2.namedWindow("yoloV3",0) + cv2.imshow("yoloV3",img_) + return output_dict_ diff --git a/components/hand_detect/yolov3.py b/components/hand_detect/yolov3.py new file mode 100644 index 0000000..519ae7e --- /dev/null +++ b/components/hand_detect/yolov3.py @@ -0,0 +1,505 @@ +import os +import numpy as np +from collections import OrderedDict + +import torch +import torch.nn.functional as F +import torch.nn as nn + + +# reference: +# https://github.com/ultralytics/yolov3/blob/master/models.py +# https://github.com/TencentYoutuResearch/ObjectDetection-OneStageDet/blob/master/yolo/vedanet/network/backbone/brick/darknet53.py +# network structure https://blog.csdn.net/u010397980/article/details/85058630 + +flag_yolo_structure = False # True 查看 相关的网络 log + +class Conv2dBatchLeaky(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride, leaky_slope=0.1): + super(Conv2dBatchLeaky, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + if isinstance(kernel_size, (list, tuple)): + self.padding = [int(ii/2) for ii in kernel_size] + if flag_yolo_structure: + print('------------------->>>> Conv2dBatchLeaky isinstance') + else: + self.padding = int(kernel_size/2) + + self.leaky_slope = leaky_slope + # Layer + # LeakyReLU : y = max(0, x) + leaky_slope*min(0,x) + self.layers = nn.Sequential( + nn.Conv2d(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, bias=False), + nn.BatchNorm2d(self.out_channels), + nn.LeakyReLU(self.leaky_slope, inplace=True) + ) + + def forward(self, x): + x = self.layers(x) + return x + +class ResBlockSum(nn.Module): + def __init__(self, nchannels): + super().__init__() + self.block = nn.Sequential( + Conv2dBatchLeaky(nchannels, int(nchannels/2), 1, 1), + Conv2dBatchLeaky(int(nchannels/2), nchannels, 3, 1) + ) + + def forward(self, x): + return x + self.block(x) + +class HeadBody(nn.Module): + def __init__(self, in_channels, out_channels): + super(HeadBody, self).__init__() + + self.layer = nn.Sequential( + Conv2dBatchLeaky(in_channels, out_channels, 1, 1), + Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1), + Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1), + Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1), + Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1) + ) + + def forward(self, x): + x = self.layer(x) + return x + +class Upsample(nn.Module): + # Custom Upsample layer (nn.Upsample gives deprecated warning message) + + def __init__(self, scale_factor=1, mode='nearest'): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + self.mode = mode + + def forward(self, x): + return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) + +# default anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] +class YOLOLayer(nn.Module): + def __init__(self, anchors, nC): + super(YOLOLayer, self).__init__() + + self.anchors = torch.FloatTensor(anchors) + self.nA = len(anchors) # number of anchors (3) + self.nC = nC # number of classes + self.img_size = 0 + if flag_yolo_structure: + print('init YOLOLayer ------ >>> ') + print('anchors : ',self.anchors) + print('nA : ',self.nA) + print('nC : ',self.nC) + print('img_size : ',self.img_size) + + def forward(self, p, img_size, var=None):# p : feature map + bs, nG = p.shape[0], p.shape[-1] # batch_size , grid + if flag_yolo_structure: + print('bs, nG --->>> ',bs, nG) + if self.img_size != img_size: + create_grids(self, img_size, nG, p.device) + + # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, xywh + confidence + classes) + p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction + + if self.training: + return p + else: # inference + io = p.clone() # inference output + io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy + io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method + io[..., 4:] = torch.sigmoid(io[..., 4:]) # p_conf, p_cls + io[..., :4] *= self.stride + if self.nC == 1: + io[..., 5] = 1 # single-class model + # flatten prediction, reshape from [bs, nA, nG, nG, nC] to [bs, nA * nG * nG, nC] + return io.view(bs, -1, 5 + self.nC), p + +def create_grids(self, img_size, nG, device='cpu'): + # self.nA : len(anchors) # number of anchors (3) + # self.nC : nC # number of classes + # nG : feature map grid 13*13 26*26 52*52 + self.img_size = img_size + self.stride = img_size / nG + if flag_yolo_structure: + print('create_grids stride : ',self.stride) + + # build xy offsets + grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float() + grid_y = grid_x.permute(0, 1, 3, 2) + self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device) + if flag_yolo_structure: + print('grid_x : ',grid_x.size(),grid_x) + print('grid_y : ',grid_y.size(),grid_y) + print('grid_xy : ',self.grid_xy.size(),self.grid_xy) + + # build wh gains + self.anchor_vec = self.anchors.to(device) / self.stride # 基于 stride 的归一化 + # print('self.anchor_vecself.anchor_vecself.anchor_vec:',self.anchor_vec) + self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device) + self.nG = torch.FloatTensor([nG]).to(device) + + +def get_yolo_layer_index(module_list): + yolo_layer_index = [] + for index, l in enumerate(module_list): + try: + a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG + yolo_layer_index.append(index) + except: + pass + assert len(yolo_layer_index) > 0, "can not find yolo layer" + return yolo_layer_index + + +# ----------------------yolov3------------------------ + +class Yolov3(nn.Module): + def __init__(self, num_classes=80, anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]): + super().__init__() + anchor_mask1 = [i for i in range(2 * len(anchors) // 3, len(anchors), 1)] # [6, 7, 8] + anchor_mask2 = [i for i in range(len(anchors) // 3, 2 * len(anchors) // 3, 1)] # [3, 4, 5] + anchor_mask3 = [i for i in range(0, len(anchors) // 3, 1)] # [0, 1, 2] + if flag_yolo_structure: + print('anchor_mask1 : ',anchor_mask1) # 大物体 anchor + print('anchor_mask2 : ',anchor_mask2) # 中物体 anchor + print('anchor_mask3 : ',anchor_mask3) # 小物体 anchor + + # Network + # OrderedDict 是 dict 的子类,其最大特征是,它可以“维护”添加 key-value 对的顺序 + layer_list = [] + + ''' + ****** Conv2dBatchLeaky ***** + op : Conv2d,BatchNorm2d,LeakyReLU + inputs : in_channels, out_channels, kernel_size, stride, leaky_slope + ''' + + ''' + ****** ResBlockSum ****** + op : Conv2dBatchLeaky * 2 + x + inputs : nchannels + ''' + # list 0 + layer_list.append(OrderedDict([ + ('0_stage1_conv', Conv2dBatchLeaky(3, 32, 3, 1, 1)), # 416 x 416 x 32 # Convolutional + + ("0_stage2_conv", Conv2dBatchLeaky(32, 64, 3, 2)), # 208 x 208 x 64 # Convolutional + ("0_stage2_ressum1", ResBlockSum(64)), # Convolutional*2 + Resiudal + + ("0_stage3_conv", Conv2dBatchLeaky(64, 128, 3, 2)), # 104 x 104 128 # Convolutional + ("0_stage3_ressum1", ResBlockSum(128)), + ("0_stage3_ressum2", ResBlockSum(128)), # (Convolutional*2 + Resiudal)**2 + + ("0_stage4_conv", Conv2dBatchLeaky(128, 256, 3, 2)), # 52 x 52 x 256 # Convolutional + ("0_stage4_ressum1", ResBlockSum(256)), + ("0_stage4_ressum2", ResBlockSum(256)), + ("0_stage4_ressum3", ResBlockSum(256)), + ("0_stage4_ressum4", ResBlockSum(256)), + ("0_stage4_ressum5", ResBlockSum(256)), + ("0_stage4_ressum6", ResBlockSum(256)), + ("0_stage4_ressum7", ResBlockSum(256)), + ("0_stage4_ressum8", ResBlockSum(256)), # 52 x 52 x 256 output_feature_0 (Convolutional*2 + Resiudal)**8 + ])) + # list 1 + layer_list.append(OrderedDict([ + ("1_stage5_conv", Conv2dBatchLeaky(256, 512, 3, 2)), # 26 x 26 x 512 # Convolutional + ("1_stage5_ressum1", ResBlockSum(512)), + ("1_stage5_ressum2", ResBlockSum(512)), + ("1_stage5_ressum3", ResBlockSum(512)), + ("1_stage5_ressum4", ResBlockSum(512)), + ("1_stage5_ressum5", ResBlockSum(512)), + ("1_stage5_ressum6", ResBlockSum(512)), + ("1_stage5_ressum7", ResBlockSum(512)), + ("1_stage5_ressum8", ResBlockSum(512)), # 26 x 26 x 512 output_feature_1 # (Convolutional*2 + Resiudal)**8 + ])) + + ''' + ****** HeadBody ****** + op : Conv2dBatchLeaky * 5 + inputs : in_channels, out_channels + ''' + # list 2 + layer_list.append(OrderedDict([ + ("2_stage6_conv", Conv2dBatchLeaky(512, 1024, 3, 2)), # 13 x 13 x 1024 # Convolutional + ("2_stage6_ressum1", ResBlockSum(1024)), + ("2_stage6_ressum2", ResBlockSum(1024)), + ("2_stage6_ressum3", ResBlockSum(1024)), + ("2_stage6_ressum4", ResBlockSum(1024)), # 13 x 13 x 1024 output_feature_2 # (Convolutional*2 + Resiudal)**4 + ("2_headbody1", HeadBody(in_channels=1024, out_channels=512)), # 13 x 13 x 512 # Convalutional Set = Conv2dBatchLeaky * 5 + ])) + # list 3 + layer_list.append(OrderedDict([ + ("3_conv_1", Conv2dBatchLeaky(in_channels=512, out_channels=1024, kernel_size=3, stride=1)), + ("3_conv_2", nn.Conv2d(in_channels=1024, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) # predict one + # list 4 + layer_list.append(OrderedDict([ + ("4_yolo", YOLOLayer([anchors[i] for i in anchor_mask1], num_classes)) + ])) # 3*((x, y, w, h, confidence) + classes ) + + # list 5 + layer_list.append(OrderedDict([ + ("5_conv", Conv2dBatchLeaky(512, 256, 1, 1)), + ("5_upsample", Upsample(scale_factor=2)), + ])) + # list 6 + layer_list.append(OrderedDict([ + ("6_head_body2", HeadBody(in_channels=768, out_channels=256)) # Convalutional Set = Conv2dBatchLeaky * 5 + ])) + # list 7 + layer_list.append(OrderedDict([ + ("7_conv_1", Conv2dBatchLeaky(in_channels=256, out_channels=512, kernel_size=3, stride=1)), + ("7_conv_2", nn.Conv2d(in_channels=512, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) # predict two + # list 8 + layer_list.append(OrderedDict([ + ("8_yolo", YOLOLayer([anchors[i] for i in anchor_mask2], num_classes)) + ])) # 3*((x, y, w, h, confidence) + classes ) + # list 9 + layer_list.append(OrderedDict([ + ("9_conv", Conv2dBatchLeaky(256, 128, 1, 1)), + ("9_upsample", Upsample(scale_factor=2)), + ])) + # list 10 + layer_list.append(OrderedDict([ + ("10_head_body3", HeadBody(in_channels=384, out_channels=128)) # Convalutional Set = Conv2dBatchLeaky * 5 + ])) + # list 11 + layer_list.append(OrderedDict([ + ("11_conv_1", Conv2dBatchLeaky(in_channels=128, out_channels=256, kernel_size=3, stride=1)), + ("11_conv_2", nn.Conv2d(in_channels=256, out_channels=len(anchor_mask3) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) # predict three + # list 12 + layer_list.append(OrderedDict([ + ("12_yolo", YOLOLayer([anchors[i] for i in anchor_mask3], num_classes)) + ])) # 3*((x, y, w, h, confidence) + classes ) + # nn.ModuleList类似于pytho中的list类型,只是将一系列层装入列表,并没有实现forward()方法,因此也不会有网络模型产生的副作用 + self.module_list = nn.ModuleList([nn.Sequential(i) for i in layer_list]) + self.yolo_layer_index = get_yolo_layer_index(self.module_list) + if flag_yolo_structure: + print('yolo_layer : ',len(layer_list),'\n') + print(self.module_list[4]) + print(self.module_list[8]) + print(self.module_list[12]) + + # print('self.module_list -------->>> ',self.module_list) + # print('self.yolo_layer_index -------->>> ',self.yolo_layer_index) + + def forward(self, x): + img_size = x.shape[-1] + if flag_yolo_structure: + print('forward img_size : ',img_size,x.shape) + output = [] + + x = self.module_list[0](x) + x_route1 = x + x = self.module_list[1](x) + x_route2 = x + x = self.module_list[2](x) + + yolo_head = self.module_list[3](x) + if flag_yolo_structure: + print('mask1 yolo_head : ',yolo_head.size()) + yolo_head_out_13x13 = self.module_list[4][0](yolo_head, img_size) + output.append(yolo_head_out_13x13) + + x = self.module_list[5](x) + x = torch.cat([x, x_route2], 1) + x = self.module_list[6](x) + + yolo_head = self.module_list[7](x) + if flag_yolo_structure: + print('mask2 yolo_head : ',yolo_head.size()) + yolo_head_out_26x26 = self.module_list[8][0](yolo_head, img_size) + output.append(yolo_head_out_26x26) + + x = self.module_list[9](x) + x = torch.cat([x, x_route1], 1) + x = self.module_list[10](x) + + yolo_head = self.module_list[11](x) + if flag_yolo_structure: + print('mask3 yolo_head : ',yolo_head.size()) + yolo_head_out_52x52 = self.module_list[12][0](yolo_head, img_size) + output.append(yolo_head_out_52x52) + + if self.training: + return output + else: + io, p = list(zip(*output)) # inference output, training output + return torch.cat(io, 1), p + + +# ----------------------yolov3 tiny------------------------ + +class EmptyLayer(nn.Module): + """Placeholder for 'route' and 'shortcut' layers""" + def __init__(self): + super(EmptyLayer, self).__init__() + + def forward(self, x): + return x + + +class Yolov3Tiny(nn.Module): + def __init__(self, num_classes=80, anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]): + super(Yolov3Tiny, self).__init__() + + anchor_mask1 = [i for i in range(len(anchors) // 2, len(anchors), 1)] # [3, 4, 5] + anchor_mask2 = [i for i in range(0, len(anchors) // 2, 1)] # [0, 1, 2] + + layer_list = [] + layer_list.append(OrderedDict([ + # layer 0 + ("conv_0", nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_0", nn.BatchNorm2d(16)), + ("leaky_0", nn.LeakyReLU(0.1)), + # layer 1 + ("maxpool_1", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 2 + ("conv_2", nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_2", nn.BatchNorm2d(32)), + ("leaky_2", nn.LeakyReLU(0.1)), + # layer 3 + ("maxpool_3", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 4 + ("conv_4", nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_4", nn.BatchNorm2d(64)), + ("leaky_4", nn.LeakyReLU(0.1)), + # layer 5 + ("maxpool_5", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 6 + ("conv_6", nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_6", nn.BatchNorm2d(128)), + ("leaky_6", nn.LeakyReLU(0.1)), + # layer 7 + ("maxpool_7", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 8 + ("conv_8", nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_8", nn.BatchNorm2d(256)), + ("leaky_8", nn.LeakyReLU(0.1)), + ])) + + layer_list.append(OrderedDict([ + # layer 9 + ("maxpool_9", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 10 + ("conv_10", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_10", nn.BatchNorm2d(512)), + ("leaky_10", nn.LeakyReLU(0.1)), + # layer 11 + ('_debug_padding_11', nn.ZeroPad2d((0, 1, 0, 1))), + ("maxpool_11", nn.MaxPool2d(kernel_size=2, stride=1, padding=0)), + # layer 12 + ("conv_12", nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_12", nn.BatchNorm2d(1024)), + ("leaky_12", nn.LeakyReLU(0.1)), + # layer 13 + ("conv_13", nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False)), + ("batch_norm_13", nn.BatchNorm2d(256)), + ("leaky_13", nn.LeakyReLU(0.1)), + ])) + + layer_list.append(OrderedDict([ + # layer 14 + ("conv_14", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_14", nn.BatchNorm2d(512)), + ("leaky_14", nn.LeakyReLU(0.1)), + # layer 15 + ("conv_15", + nn.Conv2d(in_channels=512, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) + + # layer 16 + anchor_tmp1 = [anchors[i] for i in anchor_mask1] + layer_list.append(OrderedDict([("yolo_16", YOLOLayer(anchor_tmp1, num_classes))])) + + # layer 17 + layer_list.append(OrderedDict([("route_17", EmptyLayer())])) + + layer_list.append(OrderedDict([ + # layer 18 + ("conv_18", nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False)), + ("batch_norm_18", nn.BatchNorm2d(128)), + ("leaky_18", nn.LeakyReLU(0.1)), + # layer 19 + ("upsample_19", Upsample(scale_factor=2)), + ])) + + # layer 20 + layer_list.append(OrderedDict([('route_20', EmptyLayer())])) + + layer_list.append(OrderedDict([ + # layer 21 + ("conv_21", nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_21", nn.BatchNorm2d(256)), + ("leaky_21", nn.LeakyReLU(0.1)), + # layer 22 + ("conv_22", + nn.Conv2d(in_channels=256, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) + + # layer 23 + anchor_tmp2 = [anchors[i] for i in anchor_mask2] + layer_list.append(OrderedDict([("yolo_23", YOLOLayer(anchor_tmp2, num_classes))])) + + self.module_list = nn.ModuleList([nn.Sequential(layer) for layer in layer_list]) + self.yolo_layer_index = get_yolo_layer_index(self.module_list) + + def forward(self, x): + img_size = x.shape[-1] + output = [] + + x = self.module_list[0](x) # layer0 to layer8 + x_route8 = x + x = self.module_list[1](x) # layer9 to layer13 + x_route13 = x + x = self.module_list[2](x) # layer14, layer15 + x = self.module_list[3][0](x, img_size) # yolo_16 + output.append(x) + x = self.module_list[5](x_route13) # layer18, layer19 + x = torch.cat([x, x_route8], 1) # route + x = self.module_list[7](x) # layer21, layer22 + x = self.module_list[8][0](x, img_size) # yolo_23 + output.append(x) + + if self.training: + return output + else: + io, p = list(zip(*output)) # inference output, training output + return torch.cat(io, 1), p + + +if __name__ == "__main__": + dummy_input = torch.Tensor(5, 3, 416, 416) + model = Yolov3(num_classes=80) + params = list(model.parameters()) + k = 0 + for i in params: + l = 1 + for j in i.size(): + l *= j + # print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l))) + k = k + l + print("----------------------") + print("总参数数量和: " + str(k)) + print("-----------yolo layer") + for index in model.yolo_layer_index: + print(model.module_list[index]) + + print("-----------train") + model.train() + for res in model(dummy_input): + print("res:", np.shape(res)) + + print("-----------eval") + model.eval() + inference_out, train_out = model(dummy_input) + print("inference_out:", np.shape(inference_out)) + for o in train_out: + print("train_out:", np.shape(o)) diff --git a/components/hand_keypoints/handpose_x.py b/components/hand_keypoints/handpose_x.py new file mode 100644 index 0000000..72f30ce --- /dev/null +++ b/components/hand_keypoints/handpose_x.py @@ -0,0 +1,146 @@ +#-*-coding:utf-8-*- +# date:2021-03-09 +# Author: Eric.Lee +# function: handpose_x 21 keypoints 2D + +import os +import torch +import cv2 +import numpy as np +import json + +import torch +import torch.nn as nn + +import time +import math +from datetime import datetime + +from hand_keypoints.models.resnet import resnet18,resnet34,resnet50,resnet101 +from hand_keypoints.models.squeezenet import squeezenet1_1,squeezenet1_0 + +from hand_keypoints.models.resnet import resnet18,resnet34,resnet50,resnet101 +from hand_keypoints.models.squeezenet import squeezenet1_1,squeezenet1_0 +from hand_keypoints.models.shufflenetv2 import ShuffleNetV2 +from hand_keypoints.models.shufflenet import ShuffleNet +from hand_keypoints.models.mobilenetv2 import MobileNetV2 +from torchvision.models import shufflenet_v2_x1_5 ,shufflenet_v2_x1_0 , shufflenet_v2_x2_0 +from hand_keypoints.models.rexnetv1 import ReXNetV1 + + +from hand_keypoints.utils.common_utils import * + +def draw_bd_handpose_c(img_,hand_,x,y,thick = 3): + # thick = 2 + colors = [(0,215,255),(255,115,55),(5,255,55),(25,15,255),(225,15,55)] + # + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['1']['x']+x), int(hand_['1']['y']+y)), colors[0], thick) + cv2.line(img_, (int(hand_['1']['x']+x), int(hand_['1']['y']+y)),(int(hand_['2']['x']+x), int(hand_['2']['y']+y)), colors[0], thick) + cv2.line(img_, (int(hand_['2']['x']+x), int(hand_['2']['y']+y)),(int(hand_['3']['x']+x), int(hand_['3']['y']+y)), colors[0], thick) + cv2.line(img_, (int(hand_['3']['x']+x), int(hand_['3']['y']+y)),(int(hand_['4']['x']+x), int(hand_['4']['y']+y)), colors[0], thick) + + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['5']['x']+x), int(hand_['5']['y']+y)), colors[1], thick) + cv2.line(img_, (int(hand_['5']['x']+x), int(hand_['5']['y']+y)),(int(hand_['6']['x']+x), int(hand_['6']['y']+y)), colors[1], thick) + cv2.line(img_, (int(hand_['6']['x']+x), int(hand_['6']['y']+y)),(int(hand_['7']['x']+x), int(hand_['7']['y']+y)), colors[1], thick) + cv2.line(img_, (int(hand_['7']['x']+x), int(hand_['7']['y']+y)),(int(hand_['8']['x']+x), int(hand_['8']['y']+y)), colors[1], thick) + + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['9']['x']+x), int(hand_['9']['y']+y)), colors[2], thick) + cv2.line(img_, (int(hand_['9']['x']+x), int(hand_['9']['y']+y)),(int(hand_['10']['x']+x), int(hand_['10']['y']+y)), colors[2], thick) + cv2.line(img_, (int(hand_['10']['x']+x), int(hand_['10']['y']+y)),(int(hand_['11']['x']+x), int(hand_['11']['y']+y)), colors[2], thick) + cv2.line(img_, (int(hand_['11']['x']+x), int(hand_['11']['y']+y)),(int(hand_['12']['x']+x), int(hand_['12']['y']+y)), colors[2], thick) + + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['13']['x']+x), int(hand_['13']['y']+y)), colors[3], thick) + cv2.line(img_, (int(hand_['13']['x']+x), int(hand_['13']['y']+y)),(int(hand_['14']['x']+x), int(hand_['14']['y']+y)), colors[3], thick) + cv2.line(img_, (int(hand_['14']['x']+x), int(hand_['14']['y']+y)),(int(hand_['15']['x']+x), int(hand_['15']['y']+y)), colors[3], thick) + cv2.line(img_, (int(hand_['15']['x']+x), int(hand_['15']['y']+y)),(int(hand_['16']['x']+x), int(hand_['16']['y']+y)), colors[3], thick) + + cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['17']['x']+x), int(hand_['17']['y']+y)), colors[4], thick) + cv2.line(img_, (int(hand_['17']['x']+x), int(hand_['17']['y']+y)),(int(hand_['18']['x']+x), int(hand_['18']['y']+y)), colors[4], thick) + cv2.line(img_, (int(hand_['18']['x']+x), int(hand_['18']['y']+y)),(int(hand_['19']['x']+x), int(hand_['19']['y']+y)), colors[4], thick) + cv2.line(img_, (int(hand_['19']['x']+x), int(hand_['19']['y']+y)),(int(hand_['20']['x']+x), int(hand_['20']['y']+y)), colors[4], thick) + +# +class handpose_x_model(object): + def __init__(self, + # model_path = './components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth', + model_path = './components/hand_keypoints/weights/resnet_50-size-256-wingloss102-0.119.pth', + img_size= 256, + num_classes = 42,# 手部关键点个数 * 2 : 21*2 + # model_arch = "rexnetv1", + model_arch = "resnet_50", + model_half = False, + ): + # print("handpose_x loading : ",model_path) + self.use_cuda = torch.cuda.is_available() + self.device = torch.device("cuda:0" if self.use_cuda else "cpu") # 可选的设备类型及序号 + self.img_size = img_size + self.model_half = model_half + #----------------------------------------------------------------------- + + if model_arch == 'resnet_50': + model_ = resnet50(num_classes = num_classes,img_size = self.img_size) + elif model_arch == 'resnet_18': + model_ = resnet18(num_classes = num_classes,img_size = self.img_size) + elif model_arch == 'resnet_34': + model_ = resnet34(num_classes = num_classes,img_size = self.img_size) + elif model_arch == 'resnet_101': + model_ = resnet101(num_classes = num_classes,img_size = self.img_size) + elif model_arch == "squeezenet1_0": + model_ = squeezenet1_0(pretrained=True, num_classes=num_classes) + elif model_arch == "squeezenet1_1": + model_ = squeezenet1_1(pretrained=True, num_classes=num_classes) + elif model_arch == "shufflenetv2": + model_ = ShuffleNetV2(ratio=1., num_classes=num_classes) + elif model_arch == "shufflenet_v2_x1_5": + model_ = shufflenet_v2_x1_5(pretrained=False,num_classes=num_classes) + elif model_arch == "shufflenet_v2_x1_0": + model_ = shufflenet_v2_x1_0(pretrained=False,num_classes=num_classes) + elif model_arch == "shufflenet_v2_x2_0": + model_ = shufflenet_v2_x2_0(pretrained=False,num_classes=num_classes) + elif model_arch == "shufflenet": + model_ = ShuffleNet(num_blocks = [2,4,2], num_classes=num_classes, groups=3) + elif model_arch == "mobilenetv2": + model_ = MobileNetV2(num_classes=num_classes) + elif model_arch == "rexnetv1": + model_ = ReXNetV1(num_classes=num_classes,width_mult=1., depth_mult=1.) + else: + print(" no support the model") + #----------------------------------------------------------------------- + model_ = model_.to(self.device) + model_.eval() # 设置为前向推断模式 + + # 加载测试模型 + if os.access(model_path,os.F_OK):# checkpoint + chkpt = torch.load(model_path, map_location=self.device) + model_.load_state_dict(chkpt) + print('handpose_x model loading : {}'.format(model_path)) + + self.model_handpose = model_ + if model_half: + self.model_handpose = self.model_handpose.half() + + + + def predict(self, img, vis = False): + with torch.no_grad(): + + if not((img.shape[0] == self.img_size) and (img.shape[1] == self.img_size)): + img = cv2.resize(img, (self.img_size,self.img_size), interpolation = cv2.INTER_CUBIC) + + img_ = img.astype(np.float32) + img_ = (img_-128.)/256. + + img_ = img_.transpose(2, 0, 1) + img_ = torch.from_numpy(img_) + img_ = img_.unsqueeze_(0).float() + + if self.model_half: + img_=img_.type(torch.HalfTensor) + if self.use_cuda: + img_ = img_.cuda() # (bs, 3, h, w) + + pre_ = self.model_handpose(img_) + output = pre_.cpu().detach().numpy() + output = np.squeeze(output) + + return output diff --git a/components/hand_keypoints/models/mobilenetv2.py b/components/hand_keypoints/models/mobilenetv2.py new file mode 100644 index 0000000..7cd6062 --- /dev/null +++ b/components/hand_keypoints/models/mobilenetv2.py @@ -0,0 +1,105 @@ +"""mobilenetv2 in pytorch + + + +[1] Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen + + MobileNetV2: Inverted Residuals and Linear Bottlenecks + https://arxiv.org/abs/1801.04381 +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class LinearBottleNeck(nn.Module): + + def __init__(self, in_channels, out_channels, stride, t=6, class_num=100): + super().__init__() + + self.residual = nn.Sequential( + nn.Conv2d(in_channels, in_channels * t, 1), + nn.BatchNorm2d(in_channels * t), + nn.ReLU6(inplace=True), + + nn.Conv2d(in_channels * t, in_channels * t, 3, stride=stride, padding=1, groups=in_channels * t), + nn.BatchNorm2d(in_channels * t), + nn.ReLU6(inplace=True), + + nn.Conv2d(in_channels * t, out_channels, 1), + nn.BatchNorm2d(out_channels) + ) + + self.stride = stride + self.in_channels = in_channels + self.out_channels = out_channels + + def forward(self, x): + + residual = self.residual(x) + + if self.stride == 1 and self.in_channels == self.out_channels: + residual += x + + return residual + +class MobileNetV2(nn.Module): + + def __init__(self, num_classes=100,dropout_factor = 1.0): + super().__init__() + + self.pre = nn.Sequential( + nn.Conv2d(3, 32, 1, padding=1), + nn.BatchNorm2d(32), + nn.ReLU6(inplace=True) + ) + + self.stage1 = LinearBottleNeck(32, 16, 1, 1) + self.stage2 = self._make_stage(2, 16, 24, 2, 6) + self.stage3 = self._make_stage(3, 24, 32, 2, 6) + self.stage4 = self._make_stage(4, 32, 64, 2, 6) + self.stage5 = self._make_stage(3, 64, 96, 1, 6) + self.stage6 = self._make_stage(3, 96, 160, 1, 6) + self.stage7 = LinearBottleNeck(160, 320, 1, 6) + + self.conv1 = nn.Sequential( + nn.Conv2d(320, 1280, 1), + nn.BatchNorm2d(1280), + nn.ReLU6(inplace=True) + ) + + self.conv2 = nn.Conv2d(1280, num_classes, 1) + + self.dropout = nn.Dropout(dropout_factor) + + def forward(self, x): + x = self.pre(x) + x = self.stage1(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.stage5(x) + x = self.stage6(x) + x = self.stage7(x) + x = self.conv1(x) + x = F.adaptive_avg_pool2d(x, 1) + x = self.dropout(x) + x = self.conv2(x) + x = x.view(x.size(0), -1) + + return x + + def _make_stage(self, repeat, in_channels, out_channels, stride, t): + + layers = [] + layers.append(LinearBottleNeck(in_channels, out_channels, stride, t)) + + while repeat - 1: + layers.append(LinearBottleNeck(out_channels, out_channels, 1, t)) + repeat -= 1 + + return nn.Sequential(*layers) + +def mobilenetv2(): + return MobileNetV2() diff --git a/components/hand_keypoints/models/my_model.py b/components/hand_keypoints/models/my_model.py new file mode 100644 index 0000000..d161357 --- /dev/null +++ b/components/hand_keypoints/models/my_model.py @@ -0,0 +1,67 @@ +#-*-coding:utf-8-*- +# date:2020-08-08 +# Author: X.L.Eric +# function: my model + +import torch +import torch.nn as nn +import torch.nn.functional as F +class MY_Net(nn.Module): + def __init__(self,num_classes):# op 初始化 + super(MY_Net, self).__init__() + self.cov = nn.Conv2d(3, 32, 3) + self.relu = nn.ReLU(inplace=True) + layers1 = [] + # Conv2d : in_channels, out_channels, kernel_size, stride, padding + layers1.append(nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3,stride=1,padding = 0)) + layers1.append(nn.BatchNorm2d(64,affine=True)) + layers1.append(nn.ReLU(inplace=True)) + layers1.append(nn.AvgPool2d(kernel_size=3, stride=2, padding=1)) + self.layers1 = nn.Sequential(*layers1) + layers2 = [] + layers2.append(nn.Conv2d(64, 128, 3)) + layers2.append(nn.BatchNorm2d(128,affine=True)) + layers2.append(nn.ReLU(inplace=True)) + layers2.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) + self.layers2 = nn.Sequential(*layers2) + layers3 = [] + layers3.append(nn.Conv2d(128, 256, 3,stride=2)) + layers3.append(nn.BatchNorm2d(256,affine=True)) + layers3.append(nn.ReLU(inplace=True)) + layers3.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) + self.layers3 = nn.Sequential(*layers3) + layers4 = [] + layers4.append(nn.Conv2d(256, 512, 3,stride=2)) + layers4.append(nn.BatchNorm2d(512,affine=True)) + layers4.append(nn.ReLU(inplace=True)) + layers4.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) + layers4.append(nn.Conv2d(512, 512, 1,stride=1)) + self.layers4 = nn.Sequential(*layers4) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1))# 自适应均值池化 + self.fc = nn.Linear(in_features = 512 , out_features = num_classes)# 全连接 fc + + def forward(self, x):# 模型前向推断 + x = self.cov(x) + x = self.relu(x) + x = self.layers1(x) + x = self.layers2(x) + x = self.layers3(x) + x = self.layers4(x) + x = self.avgpool(x) + x = x.reshape(x.size(0), -1) + x = self.fc(x) + return x + +if __name__ == "__main__": + #输入批次图片(batchsize,channel,height,width):8 ,3*256*256 + dummy_input = torch.randn([8, 3, 256,256]) + model = MY_Net(num_classes = 100)# 分类数为 100 类 + print('model:\n',model)# 打印模型op + output = model(dummy_input)# 模型前向推断 + # 模型前向推断输出特征尺寸 + print('model inference feature size: ',output.size()) + print(output) + + output_ = F.softmax(output,dim = 1) + # + print(output_) diff --git a/components/hand_keypoints/models/resnet.py b/components/hand_keypoints/models/resnet.py new file mode 100644 index 0000000..eda0241 --- /dev/null +++ b/components/hand_keypoints/models/resnet.py @@ -0,0 +1,263 @@ +import torch +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.): + self.inplanes = 64 + self.dropout_factor = dropout_factor + super(ResNet, self).__init__() + # 26 + # 586 train_sequence + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + # see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16 + # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + assert img_size % 32 == 0 + pool_kernel = int(img_size / 32) + self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True) + + self.dropout = nn.Dropout(self.dropout_factor) + + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + + x = self.dropout(x) + + x = self.fc(x) + + return x + + +def load_model(model, pretrained_state_dict): + model_dict = model.state_dict() + pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if + k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()} + model.load_state_dict(pretrained_dict, strict=False) + if len(pretrained_dict) == 0: + print("[INFO] No params were loaded ...") + else: + for k, v in pretrained_state_dict.items(): + if k in pretrained_dict: + print("==>> Load {} {}".format(k, v.size())) + else: + print("[INFO] Skip {} {}".format(k, v.size())) + return model + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + print("Load pretrained model from {}".format(model_urls['resnet18'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet18']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + print("Load pretrained model from {}".format(model_urls['resnet34'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet34']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + print("Load pretrained model from {}".format(model_urls['resnet50'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet50']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + print("Load pretrained model from {}".format(model_urls['resnet101'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet101']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + print("Load pretrained model from {}".format(model_urls['resnet152'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet152']) + model = load_model(model, pretrained_state_dict) + return model + +if __name__ == "__main__": + input = torch.randn([32, 3, 256,256]) + model = resnet34(False, num_classes=2, img_size=256) + output = model(input) + print(output.size()) diff --git a/components/hand_keypoints/models/resnet_50.py b/components/hand_keypoints/models/resnet_50.py new file mode 100644 index 0000000..20783e4 --- /dev/null +++ b/components/hand_keypoints/models/resnet_50.py @@ -0,0 +1,194 @@ +import torch +import torch.nn as nn + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000,dropout_factor = 1., zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, + norm_layer=nn.BatchNorm2d): + super(ResNet, self).__init__() + if norm_layer is None: + print('BatchNorm2d') + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + + self.dropout = nn.Dropout(dropout_factor) + + self.fc = nn.Linear(512 * block.expansion, num_classes) + + + # ---------------------------------------------------------------------------------- + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + + x = self.layer2(x) + + x = self.layer3(x) + + x = self.layer4(x) + + x = self.avgpool(x) + + x = x.reshape(x.size(0), -1) + + x = self.dropout(x) + + x = self.fc(x) + + return x + +def _resnet(arch, block, layers, **kwargs): + model = ResNet(block, layers, **kwargs) + return model + +def resnet50(**kwargs): + r"""ResNet-50 model from + `"Deep Residual Learning for Image Recognition" `_ + """ + print('Bottleneck:{}'.format(Bottleneck)) + return _resnet('resnet50', Bottleneck, [3, 4, 6, 3],**kwargs) +if __name__ == "__main__": + dummy_input = torch.randn([32, 3, 128,128]) + num_classes = 100 + model = resnet50(num_classes = num_classes,dropout_factor=0.5) + + print(model) + output = model(dummy_input) + print(output.size()) diff --git a/components/hand_keypoints/models/rexnetv1.py b/components/hand_keypoints/models/rexnetv1.py new file mode 100644 index 0000000..664b9d8 --- /dev/null +++ b/components/hand_keypoints/models/rexnetv1.py @@ -0,0 +1,183 @@ +""" +ReXNet +Copyright (c) 2020-present NAVER Corp. +MIT license +""" + +import torch +import torch.nn as nn +from math import ceil + +# Memory-efficient Siwsh using torch.jit.script borrowed from the code in (https://twitter.com/jeremyphoward/status/1188251041835315200) +# Currently use memory-efficient Swish as default: +USE_MEMORY_EFFICIENT_SWISH = True + +if USE_MEMORY_EFFICIENT_SWISH: + @torch.jit.script + def swish_fwd(x): + return x.mul(torch.sigmoid(x)) + + + @torch.jit.script + def swish_bwd(x, grad_output): + x_sigmoid = torch.sigmoid(x) + return grad_output * (x_sigmoid * (1. + x * (1. - x_sigmoid))) + + + class SwishJitImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return swish_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return swish_bwd(x, grad_output) + + + def swish(x, inplace=False): + return SwishJitImplementation.apply(x) + +else: + def swish(x, inplace=False): + return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid()) + + +class Swish(nn.Module): + def __init__(self, inplace=True): + super(Swish, self).__init__() + self.inplace = inplace + + def forward(self, x): + return swish(x, self.inplace) + + +def ConvBNAct(out, in_channels, channels, kernel=1, stride=1, pad=0, + num_group=1, active=True, relu6=False): + out.append(nn.Conv2d(in_channels, channels, kernel, + stride, pad, groups=num_group, bias=False)) + out.append(nn.BatchNorm2d(channels)) + if active: + out.append(nn.ReLU6(inplace=True) if relu6 else nn.ReLU(inplace=True)) + + +def ConvBNSwish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1): + out.append(nn.Conv2d(in_channels, channels, kernel, + stride, pad, groups=num_group, bias=False)) + out.append(nn.BatchNorm2d(channels)) + out.append(Swish()) + + +class SE(nn.Module): + def __init__(self, in_channels, channels, se_ratio=12): + super(SE, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Conv2d(in_channels, channels // se_ratio, kernel_size=1, padding=0), + nn.BatchNorm2d(channels // se_ratio), + nn.ReLU(inplace=True), + nn.Conv2d(channels // se_ratio, channels, kernel_size=1, padding=0), + nn.Sigmoid() + ) + + def forward(self, x): + y = self.avg_pool(x) + y = self.fc(y) + return x * y + + +class LinearBottleneck(nn.Module): + def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12, + **kwargs): + super(LinearBottleneck, self).__init__(**kwargs) + self.use_shortcut = stride == 1 and in_channels <= channels + self.in_channels = in_channels + self.out_channels = channels + + out = [] + if t != 1: + dw_channels = in_channels * t + ConvBNSwish(out, in_channels=in_channels, channels=dw_channels) + else: + dw_channels = in_channels + + ConvBNAct(out, in_channels=dw_channels, channels=dw_channels, kernel=3, stride=stride, pad=1, + num_group=dw_channels, active=False) + + if use_se: + out.append(SE(dw_channels, dw_channels, se_ratio)) + + out.append(nn.ReLU6()) + ConvBNAct(out, in_channels=dw_channels, channels=channels, active=False, relu6=True) + self.out = nn.Sequential(*out) + + def forward(self, x): + out = self.out(x) + if self.use_shortcut: + out[:, 0:self.in_channels] += x + + return out + + +class ReXNetV1(nn.Module): + def __init__(self, input_ch=16, final_ch=180, width_mult=1.0, depth_mult=1.0, num_classes=1000, + use_se=True, + se_ratio=12, + dropout_factor=0.2, + bn_momentum=0.9): + super(ReXNetV1, self).__init__() + + layers = [1, 2, 2, 3, 3, 5] + strides = [1, 2, 2, 2, 1, 2] + use_ses = [False, False, True, True, True, True] + + layers = [ceil(element * depth_mult) for element in layers] + strides = sum([[element] + [1] * (layers[idx] - 1) + for idx, element in enumerate(strides)], []) + if use_se: + use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], []) + else: + use_ses = [False] * sum(layers[:]) + ts = [1] * layers[0] + [6] * sum(layers[1:]) + + self.depth = sum(layers[:]) * 3 + stem_channel = 32 / width_mult if width_mult < 1.0 else 32 + inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch + + features = [] + in_channels_group = [] + channels_group = [] + + # The following channel configuration is a simple instance to make each layer become an expand layer. + for i in range(self.depth // 3): + if i == 0: + in_channels_group.append(int(round(stem_channel * width_mult))) + channels_group.append(int(round(inplanes * width_mult))) + else: + in_channels_group.append(int(round(inplanes * width_mult))) + inplanes += final_ch / (self.depth // 3 * 1.0) + channels_group.append(int(round(inplanes * width_mult))) + + ConvBNSwish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1) + + for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)): + features.append(LinearBottleneck(in_channels=in_c, + channels=c, + t=t, + stride=s, + use_se=se, se_ratio=se_ratio)) + + pen_channels = int(1280 * width_mult) + ConvBNSwish(features, c, pen_channels) + + features.append(nn.AdaptiveAvgPool2d(1)) + self.features = nn.Sequential(*features) + self.output = nn.Sequential( + nn.Dropout(dropout_factor), + nn.Conv2d(pen_channels, num_classes, 1, bias=True)) + + def forward(self, x): + x = self.features(x) + x = self.output(x).squeeze() + return x diff --git a/components/hand_keypoints/models/shufflenet.py b/components/hand_keypoints/models/shufflenet.py new file mode 100644 index 0000000..0cd8d2c --- /dev/null +++ b/components/hand_keypoints/models/shufflenet.py @@ -0,0 +1,254 @@ +"""shufflenet in pytorch + + + +[1] Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun. + + ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices + https://arxiv.org/abs/1707.01083v2 +""" + +from functools import partial + +import torch +import torch.nn as nn + + +class BasicConv2d(nn.Module): + + def __init__(self, input_channels, output_channels, kernel_size, **kwargs): + super().__init__() + self.conv = nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs) + self.bn = nn.BatchNorm2d(output_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + +class ChannelShuffle(nn.Module): + + def __init__(self, groups): + super().__init__() + self.groups = groups + + def forward(self, x): + batchsize, channels, height, width = x.data.size() + channels_per_group = int(channels / self.groups) + + #"""suppose a convolutional layer with g groups whose output has + #g x n channels; we first reshape the output channel dimension + #into (g, n)""" + x = x.view(batchsize, self.groups, channels_per_group, height, width) + + #"""transposing and then flattening it back as the input of next layer.""" + x = x.transpose(1, 2).contiguous() + x = x.view(batchsize, -1, height, width) + + return x + +class DepthwiseConv2d(nn.Module): + + def __init__(self, input_channels, output_channels, kernel_size, **kwargs): + super().__init__() + self.depthwise = nn.Sequential( + nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs), + nn.BatchNorm2d(output_channels) + ) + + def forward(self, x): + return self.depthwise(x) + +class PointwiseConv2d(nn.Module): + def __init__(self, input_channels, output_channels, **kwargs): + super().__init__() + self.pointwise = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 1, **kwargs), + nn.BatchNorm2d(output_channels) + ) + + def forward(self, x): + return self.pointwise(x) + +class ShuffleNetUnit(nn.Module): + + def __init__(self, input_channels, output_channels, stage, stride, groups): + super().__init__() + + #"""Similar to [9], we set the number of bottleneck channels to 1/4 + #of the output channels for each ShuffleNet unit.""" + self.bottlneck = nn.Sequential( + PointwiseConv2d( + input_channels, + int(output_channels / 4), + groups=groups + ), + nn.ReLU(inplace=True) + ) + + #"""Note that for Stage 2, we do not apply group convolution on the first pointwise + #layer because the number of input channels is relatively small.""" + if stage == 2: + self.bottlneck = nn.Sequential( + PointwiseConv2d( + input_channels, + int(output_channels / 4), + groups=groups + ), + nn.ReLU(inplace=True) + ) + + self.channel_shuffle = ChannelShuffle(groups) + + self.depthwise = DepthwiseConv2d( + int(output_channels / 4), + int(output_channels / 4), + 3, + groups=int(output_channels / 4), + stride=stride, + padding=1 + ) + + self.expand = PointwiseConv2d( + int(output_channels / 4), + output_channels, + groups=groups + ) + + self.relu = nn.ReLU(inplace=True) + self.fusion = self._add + self.shortcut = nn.Sequential() + + #"""As for the case where ShuffleNet is applied with stride, + #we simply make two modifications (see Fig 2 (c)): + #(i) add a 3 × 3 average pooling on the shortcut path; + #(ii) replace the element-wise addition with channel concatenation, + #which makes it easy to enlarge channel dimension with little extra + #computation cost. + if stride != 1 or input_channels != output_channels: + self.shortcut = nn.AvgPool2d(3, stride=2, padding=1) + + self.expand = PointwiseConv2d( + int(output_channels / 4), + output_channels - input_channels, + groups=groups + ) + + self.fusion = self._cat + + def _add(self, x, y): + return torch.add(x, y) + + def _cat(self, x, y): + return torch.cat([x, y], dim=1) + + def forward(self, x): + shortcut = self.shortcut(x) + + shuffled = self.bottlneck(x) + shuffled = self.channel_shuffle(shuffled) + shuffled = self.depthwise(shuffled) + shuffled = self.expand(shuffled) + + output = self.fusion(shortcut, shuffled) + output = self.relu(output) + + return output + +class ShuffleNet(nn.Module): + + def __init__(self, num_blocks = [2,4,2], num_classes=100, groups=3, dropout_factor = 1.0): + super().__init__() + + if groups == 1: + out_channels = [24, 144, 288, 567] + elif groups == 2: + out_channels = [24, 200, 400, 800] + elif groups == 3: + out_channels = [24, 240, 480, 960] + elif groups == 4: + out_channels = [24, 272, 544, 1088] + elif groups == 8: + out_channels = [24, 384, 768, 1536] + + self.conv1 = BasicConv2d(3, out_channels[0], 3, padding=1, stride=1) + self.input_channels = out_channels[0] + + self.stage2 = self._make_stage( + ShuffleNetUnit, + num_blocks[0], + out_channels[1], + stride=2, + stage=2, + groups=groups + ) + + self.stage3 = self._make_stage( + ShuffleNetUnit, + num_blocks[1], + out_channels[2], + stride=2, + stage=3, + groups=groups + ) + + self.stage4 = self._make_stage( + ShuffleNetUnit, + num_blocks[2], + out_channels[3], + stride=2, + stage=4, + groups=groups + ) + + self.avg = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(out_channels[3], num_classes) + self.dropout = nn.Dropout(dropout_factor) + + def forward(self, x): + x = self.conv1(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.avg(x) + x = x.view(x.size(0), -1) + x = self.dropout(x) + x = self.fc(x) + + return x + + def _make_stage(self, block, num_blocks, output_channels, stride, stage, groups): + """make shufflenet stage + + Args: + block: block type, shuffle unit + out_channels: output depth channel number of this stage + num_blocks: how many blocks per stage + stride: the stride of the first block of this stage + stage: stage index + groups: group number of group convolution + Return: + return a shuffle net stage + """ + strides = [stride] + [1] * (num_blocks - 1) + + stage = [] + + for stride in strides: + stage.append( + block( + self.input_channels, + output_channels, + stride=stride, + stage=stage, + groups=groups + ) + ) + self.input_channels = output_channels + + return nn.Sequential(*stage) + +def shufflenet(): + return ShuffleNet([4, 8, 4]) diff --git a/components/hand_keypoints/models/shufflenetv2.py b/components/hand_keypoints/models/shufflenetv2.py new file mode 100644 index 0000000..a94f9a7 --- /dev/null +++ b/components/hand_keypoints/models/shufflenetv2.py @@ -0,0 +1,157 @@ +"""shufflenetv2 in pytorch + + + +[1] Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun + + ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design + https://arxiv.org/abs/1807.11164 +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def channel_split(x, split): + """split a tensor into two pieces along channel dimension + Args: + x: input tensor + split:(int) channel size for each pieces + """ + assert x.size(1) == split * 2 + return torch.split(x, split, dim=1) + +def channel_shuffle(x, groups): + """channel shuffle operation + Args: + x: input tensor + groups: input branch number + """ + + batch_size, channels, height, width = x.size() + channels_per_group = int(channels // groups) + + x = x.view(batch_size, groups, channels_per_group, height, width) + x = x.transpose(1, 2).contiguous() + x = x.view(batch_size, -1, height, width) + + return x + +class ShuffleUnit(nn.Module): + + def __init__(self, in_channels, out_channels, stride): + super().__init__() + + self.stride = stride + self.in_channels = in_channels + self.out_channels = out_channels + + if stride != 1 or in_channels != out_channels: + self.residual = nn.Sequential( + nn.Conv2d(in_channels, in_channels, 1), + nn.BatchNorm2d(in_channels), + nn.ReLU(inplace=True), + nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), + nn.BatchNorm2d(in_channels), + nn.Conv2d(in_channels, int(out_channels / 2), 1), + nn.BatchNorm2d(int(out_channels / 2)), + nn.ReLU(inplace=True) + ) + + self.shortcut = nn.Sequential( + nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), + nn.BatchNorm2d(in_channels), + nn.Conv2d(in_channels, int(out_channels / 2), 1), + nn.BatchNorm2d(int(out_channels / 2)), + nn.ReLU(inplace=True) + ) + else: + self.shortcut = nn.Sequential() + + in_channels = int(in_channels / 2) + self.residual = nn.Sequential( + nn.Conv2d(in_channels, in_channels, 1), + nn.BatchNorm2d(in_channels), + nn.ReLU(inplace=True), + nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), + nn.BatchNorm2d(in_channels), + nn.Conv2d(in_channels, in_channels, 1), + nn.BatchNorm2d(in_channels), + nn.ReLU(inplace=True) + ) + + + def forward(self, x): + + if self.stride == 1 and self.out_channels == self.in_channels: + shortcut, residual = channel_split(x, int(self.in_channels / 2)) + else: + shortcut = x + residual = x + + shortcut = self.shortcut(shortcut) + residual = self.residual(residual) + x = torch.cat([shortcut, residual], dim=1) + x = channel_shuffle(x, 2) + + return x + +class ShuffleNetV2(nn.Module): + + def __init__(self, ratio=1., num_classes=100, dropout_factor = 1.0): + super().__init__() + if ratio == 0.5: + out_channels = [48, 96, 192, 1024] + elif ratio == 1: + out_channels = [116, 232, 464, 1024] + elif ratio == 1.5: + out_channels = [176, 352, 704, 1024] + elif ratio == 2: + out_channels = [244, 488, 976, 2048] + else: + ValueError('unsupported ratio number') + + self.pre = nn.Sequential( + nn.Conv2d(3, 24, 3, padding=1), + nn.BatchNorm2d(24) + ) + + self.stage2 = self._make_stage(24, out_channels[0], 3) + self.stage3 = self._make_stage(out_channels[0], out_channels[1], 7) + self.stage4 = self._make_stage(out_channels[1], out_channels[2], 3) + self.conv5 = nn.Sequential( + nn.Conv2d(out_channels[2], out_channels[3], 1), + nn.BatchNorm2d(out_channels[3]), + nn.ReLU(inplace=True) + ) + + self.fc = nn.Linear(out_channels[3], num_classes) + + self.dropout = nn.Dropout(dropout_factor) + + def forward(self, x): + x = self.pre(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.conv5(x) + x = F.adaptive_avg_pool2d(x, 1) + x = x.view(x.size(0), -1) + x = self.dropout(x) + x = self.fc(x) + + return x + + def _make_stage(self, in_channels, out_channels, repeat): + layers = [] + layers.append(ShuffleUnit(in_channels, out_channels, 2)) + + while repeat: + layers.append(ShuffleUnit(out_channels, out_channels, 1)) + repeat -= 1 + + return nn.Sequential(*layers) + +def shufflenetv2(): + return ShuffleNetV2() diff --git a/components/hand_keypoints/models/squeezenet.py b/components/hand_keypoints/models/squeezenet.py new file mode 100644 index 0000000..377fe4f --- /dev/null +++ b/components/hand_keypoints/models/squeezenet.py @@ -0,0 +1,153 @@ +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.utils.model_zoo as model_zoo + + +__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1'] + + +model_urls = { + 'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth', + 'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth', +} + + +class Fire(nn.Module): + + def __init__(self, inplanes, squeeze_planes, + expand1x1_planes, expand3x3_planes): + super(Fire, self).__init__() + self.inplanes = inplanes + self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1) + self.squeeze_activation = nn.ReLU(inplace=True) + self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes, + kernel_size=1) + self.expand1x1_activation = nn.ReLU(inplace=True) + self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes, + kernel_size=3, padding=1) + self.expand3x3_activation = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.squeeze_activation(self.squeeze(x)) + return torch.cat([ + self.expand1x1_activation(self.expand1x1(x)), + self.expand3x3_activation(self.expand3x3(x)) + ], 1) + + +class SqueezeNet(nn.Module): + + def __init__(self, version=1.0, num_classes=1000,dropout_factor = 1.): + super(SqueezeNet, self).__init__() + if version not in [1.0, 1.1]: + raise ValueError("Unsupported SqueezeNet version {version}:" + "1.0 or 1.1 expected".format(version=version)) + self.num_classes = num_classes + if version == 1.0: + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=7, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(96, 16, 64, 64), + Fire(128, 16, 64, 64), + Fire(128, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 32, 128, 128), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(512, 64, 256, 256), + ) + else: + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(64, 16, 64, 64), + Fire(128, 16, 64, 64), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(128, 32, 128, 128), + Fire(256, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + Fire(512, 64, 256, 256), + ) + # Final convolution is initialized differently form the rest + final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) + self.classifier = nn.Sequential( + nn.Dropout(p=dropout_factor), + final_conv, + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d(1) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + if m is final_conv: + init.normal(m.weight.data, mean=0.0, std=0.01) + else: + init.kaiming_uniform(m.weight.data) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x): + x = self.features(x) + # print("features(x):", x.size()) + x = self.classifier(x) + # print("features(x):", x.size()) + return x.view(x.size(0), self.num_classes) + + +def squeezenet1_0(pretrained=False, **kwargs): + r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level + accuracy with 50x fewer parameters and <0.5MB model size" + `_ paper. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = SqueezeNet(version=1.0, **kwargs) + model_dict = model.state_dict() + if pretrained: + pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0']) + pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if + k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()} + model.load_state_dict(pretrained_dict,strict=False) + return model + + +def squeezenet1_1(pretrained=False, **kwargs): + r"""SqueezeNet 1.1 model from the `official SqueezeNet repo + `_. + SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters + than SqueezeNet 1.0, without sacrificing accuracy. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = SqueezeNet(version=1.1, **kwargs) + model_dict = model.state_dict() + if pretrained: + pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0']) + pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if + k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()} + model.load_state_dict(pretrained_dict,strict=False) + return model + + +if __name__ == "__main__": + from thop import profile + dummy = torch.from_numpy(np.random.random([16, 3, 256, 256]).astype(np.float32)) + model = squeezenet1_0(pretrained=True, num_classes=42,dropout_factor = 0.5) + print(model) + flops, params = profile(model, inputs=(dummy, )) + model.eval() + output = model(dummy) + print(output.size()) + print("flops: {}, params: {}".format(flops, params)) diff --git a/components/hand_keypoints/utils/common_utils.py b/components/hand_keypoints/utils/common_utils.py new file mode 100644 index 0000000..1e5b172 --- /dev/null +++ b/components/hand_keypoints/utils/common_utils.py @@ -0,0 +1,132 @@ +#-*-coding:utf-8-*- +# date:2020-04-11 +# Author: Eric.Lee +# function: common utils + +import os +import shutil +import cv2 +import numpy as np +import json + +def mkdir_(path, flag_rm=False): + if os.path.exists(path): + if flag_rm == True: + shutil.rmtree(path) + os.mkdir(path) + print('remove {} done ~ '.format(path)) + else: + os.mkdir(path) + +def plot_box(bbox, img, color=None, label=None, line_thickness=None): + tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox + if label: + tf = max(tl - 2, 1) + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox + cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 + # 文本绘制 + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA) + +class JSON_Encoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + else: + return super(JSON_Encoder, self).default(obj) + +def draw_landmarks(img,output,draw_circle): + img_width = img.shape[1] + img_height = img.shape[0] + dict_landmarks = {} + for i in range(int(output.shape[0]/2)): + x = output[i*2+0]*float(img_width) + y = output[i*2+1]*float(img_height) + if 41>= i >=33: + if 'left_eyebrow' not in dict_landmarks.keys(): + dict_landmarks['left_eyebrow'] = [] + dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1) + elif 50>= i >=42: + if 'right_eyebrow' not in dict_landmarks.keys(): + dict_landmarks['right_eyebrow'] = [] + dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1) + elif 67>= i >=60: + if 'left_eye' not in dict_landmarks.keys(): + dict_landmarks['left_eye'] = [] + dict_landmarks['left_eye'].append([int(x),int(y),(255,0,255)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) + elif 75>= i >=68: + if 'right_eye' not in dict_landmarks.keys(): + dict_landmarks['right_eye'] = [] + dict_landmarks['right_eye'].append([int(x),int(y),(255,0,255)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) + elif 97>= i >=96: + cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1) + elif 54>= i >=51: + if 'bridge_nose' not in dict_landmarks.keys(): + dict_landmarks['bridge_nose'] = [] + dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1) + elif 32>= i >=0: + if 'basin' not in dict_landmarks.keys(): + dict_landmarks['basin'] = [] + dict_landmarks['basin'].append([int(x),int(y),(255,30,30)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1) + elif 59>= i >=55: + if 'wing_nose' not in dict_landmarks.keys(): + dict_landmarks['wing_nose'] = [] + dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1) + elif 87>= i >=76: + if 'out_lip' not in dict_landmarks.keys(): + dict_landmarks['out_lip'] = [] + dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1) + elif 95>= i >=88: + if 'in_lip' not in dict_landmarks.keys(): + dict_landmarks['in_lip'] = [] + dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)]) + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1) + else: + if draw_circle: + cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) + + return dict_landmarks + +def draw_contour(image,dict): + for key in dict.keys(): + # print(key) + _,_,color = dict[key][0] + + if 'basin' == key or 'wing_nose' == key: + pts = np.array([[dict[key][i][0],dict[key][i][1]] for i in range(len(dict[key]))],np.int32) + # print(pts) + cv2.polylines(image,[pts],False,color) + + else: + points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32) + for i in range(len(dict[key])): + x,y,_ = dict[key][i] + points_array[0,i,0] = x + points_array[0,i,1] = y + + # cv2.fillPoly(image, points_array, color) + cv2.drawContours(image,points_array,-1,color,thickness=1) diff --git a/components/hand_keypoints/utils/model_utils.py b/components/hand_keypoints/utils/model_utils.py new file mode 100644 index 0000000..48cc3d3 --- /dev/null +++ b/components/hand_keypoints/utils/model_utils.py @@ -0,0 +1,61 @@ +#-*-coding:utf-8-*- +# date:2020-04-11 +# Author: Eric.Lee +# function: model utils + +import os +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import random + +def get_acc(output, label): + total = output.shape[0] + _, pred_label = output.max(1) + num_correct = (pred_label == label).sum().item() + return num_correct / float(total) + +def set_learning_rate(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + +def set_seed(seed = 666): + np.random.seed(seed) + random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + cudnn.deterministic = True + +def split_trainval_datasets(ops): + print(' --------------->>> split_trainval_datasets ') + train_split_datasets = [] + train_split_datasets_label = [] + + val_split_datasets = [] + val_split_datasets_label = [] + for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('.')[0]), reverse=False)): + # print(' %s label is %s \n'%(doc,idx)) + + data_list = os.listdir(ops.train_path+doc) + random.shuffle(data_list) + + cal_split_num = int(len(data_list)*ops.val_factor) + + for i,file in enumerate(data_list): + if '.jpg' in file: + if i < cal_split_num: + val_split_datasets.append(ops.train_path+doc + '/' + file) + val_split_datasets_label.append(idx) + else: + train_split_datasets.append(ops.train_path+doc + '/' + file) + train_split_datasets_label.append(idx) + + print(ops.train_path+doc + '/' + file,idx) + + print('\n') + print('train_split_datasets len {}'.format(len(train_split_datasets))) + print('val_split_datasets len {}'.format(len(val_split_datasets))) + + return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label -- GitLab