import torch
import torch.nn as nn
import torchvision
import time
import numpy as np
import sys
def get_model_op(model_,print_flag = False):
# print('/********************* modules *******************/')
op_dict = {}
idx = 0
for m in model_.modules():
idx += 1
if isinstance(m, nn.Conv2d):
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
op_dict['Conv2d'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
elif isinstance(m, nn.BatchNorm2d):
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
op_dict['BatchNorm2d'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
elif isinstance(m, nn.Linear):
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
op_dict['Linear'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
elif isinstance(m, nn.Sequential):
if print_flag:
print('*******************{}) {}'.format(idx,m))
for n in m:
if print_flag:
print('{}) {}'.format(idx,n))
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
op_dict['Conv2d'] += 1
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
op_dict['BatchNorm2d'] += 1
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
op_dict['Linear'] += 1
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
op_dict['ReLU6'] += 1
elif isinstance(m, nn.ReLU6):
if print_flag:
print('{}) {}'.format(idx,m))
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
op_dict['ReLU6'] += 1
elif isinstance(m, nn.Module):
if print_flag:
print('{}) {}'.format(idx,m))
for n in m.modules():
if isinstance(n, nn.Conv2d):
if print_flag:
print('{}) {}'.format(idx,n))
if 'Conv2d' not in op_dict.keys():
op_dict['Conv2d'] = 1
op_dict['Conv2d'] += 1
if 'BatchNorm2d' not in op_dict.keys():
op_dict['BatchNorm2d'] = 1
op_dict['BatchNorm2d'] += 1
if 'Linear' not in op_dict.keys():
op_dict['Linear'] = 1
op_dict['Linear'] += 1
if 'ReLU6' not in op_dict.keys():
op_dict['ReLU6'] = 1
op_dict['ReLU6'] += 1
if print_flag:
print('{}) {}'.format(idx,m))
# print('\n/********************** {} ********************/\n'.format(ops.network))
for key in op_dict.keys():
if print_flag:
print(' operation - {} : {}'.format(key,op_dict[key]))
class DummyModule(nn.Module):
def __init__(self):
super(DummyModule, self).__init__()
def forward(self, x):
return x
def fuse(conv, bn):
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
with torch.no_grad():
# init
if isinstance(conv, nn.Conv2d):
fusedconv = torch.nn.Conv2d(conv.in_channels,
elif isinstance(conv, nn.ConvTranspose2d): # not supprot nn.ConvTranspose2d
fusedconv = nn.ConvTranspose2d(
# prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
# prepare spatial bias
if conv.bias is not None:
b_conv = conv.bias
#b_conv = conv.bias.mul(bn.weight.div(torch.sqrt(bn.running_var + bn.eps))) # maybe, you should this one ?
b_conv = torch.zeros(conv.weight.size(0))
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(b_conv + b_bn)
return fusedconv
# idxx = 0
def fuse_module(m):
# global idxx
children = list(m.named_children())
c = None
cn = None
for name, child in children:
# idxx += 1
# print('-------------->>',idxx)
# if idxx%10==0:
# continue
# print("name {}, child {}".format(name, child))
if isinstance(child, nn.BatchNorm2d) and c is not None:
bc = fuse(c, child)
m._modules[cn] = bc
# print('DummyModule() : ',DummyModule())
m._modules[name] = DummyModule()
c = None
elif isinstance(child, nn.Conv2d):
c = child
cn = name
def test_net(ops,m):
use_cuda = torch.cuda.is_available()
use_cpu = False
if ops.force_cpu or use_cuda == False:
p = torch.randn([1, 3, 256, 256])
device = torch.device("cpu")
use_cpu = True
p = torch.randn([1, 3, 256, 256]).cuda()
device = torch.device("cuda:0")
count = 50
time_org = []
m_o = m.to(device)
# print(m)
for i in range(count):
s1 = time.time()
if use_cpu:
o_output = m_o(p)
o_output = m_o(p).cpu()
s2 = time.time()
time_org.append(s2 - s1)
print("Original time: ", s2 - s1)
# print(m)
m_f = m.to(device)
time_fuse = []
for i in range(count):
s1 = time.time()
if use_cpu:
f_output = m_f(p)
f_output = m_f(p).cpu()
s2 = time.time()
time_fuse.append(s2 - s1)
print("Fused time: ", s2 - s1)
print("-" * 50)
print("org time:", np.mean(time_org))
print("fuse time:", np.mean(time_fuse))
for o in o_output:
print("org size:", o.size())
for o in f_output:
print("fuse size:", o.size())
for i in range(len(o_output)):
assert o_output[i].size()==f_output[i].size()
print("output[{}] max abs diff: {}".format(i, (o_output[i] - f_output[i]).abs().max().item()))
print("output[{}] MSE diff: {}".format(i, nn.MSELoss()(o_output[i], f_output[i]).item()))
def acc_model(ops,m):
# print('\n-------------------------------->>> before acc model')
# print('\n-------------------------------->>> after acc model')
return m
import torch
def init_seeds(seed=0):
def select_device(force_cpu=False):
if force_cpu:
cuda = False
device = torch.device('cpu')
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
if torch.cuda.device_count() > 1:
device = torch.device('cuda' if cuda else 'cpu')
# print('Found %g GPUs' % torch.cuda.device_count())
# print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21')
# torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available
# print('Using ', torch.cuda.device_count(), ' GPUs')
# print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
return device
# date:2021-03-09
# Author: Eric.Lee
# function: yolo v3 hand detect
import os
import cv2
import numpy as np
import time
import torch
from hand_detect.yolov3 import Yolov3, Yolov3Tiny
from hand_detect.utils.torch_utils import select_device
from hand_detect.acc_model import acc_model
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
import random
def show_model_param(model):
params = list(model.parameters())
k = 0
for i in params:
l = 1
for j in i.size():
l *= j
print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l)))
k = k + l
print("总参数数量和: " + str(k))
def process_data(img, img_size=416):# 图像预处理
img, _, _, _ = letterbox(img, height=img_size)
# Normalize RG25
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return img
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [255, 55,90], thickness=tf, lineType=cv2.LINE_AA)
def bbox_iou(box1, box2, x1y1x2y2=True):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t()
# Get the coordinates of bounding boxes
if x1y1x2y2:
# x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
# x, y, w, h = box1
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
(b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
return inter_area / union_area # iou
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
# Rescale x1, y1, x2, y2 from 416 to image size
# print('coords : ',coords)
# print('img0_shape : ',img0_shape)
gain = float(img_size) / max(img0_shape) # gain = old / new
# print('gain : ',gain)
pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding
pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding
# print('pad_xpad_y : ',pad_x,pad_y)
coords[:, [0, 2]] -= pad_x
coords[:, [1, 3]] -= pad_y
coords[:, :4] /= gain
coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
return coords
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_conf, class)
min_wh = 2 # (pixels) minimum box width and height
output = [None] * len(prediction)
for image_i, pred in enumerate(prediction):
# Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area
# ar = w / (h + 1e-16) # aspect ratio
# n = len(w)
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Filter out confidence scores below threshold
class_conf, class_pred = pred[:, 5:].max(1) # max class_conf, index
pred[:, 4] *= class_conf # finall conf = obj_conf * class_conf
i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
# s2=time.time()
pred2 = pred[i]
# print("++++++pred2 = pred[i]",time.time()-s2, pred2)
# If none are remaining => process next image
if len(pred2) == 0:
# Select predicted classes
class_conf = class_conf[i]
class_pred = class_pred[i].unsqueeze(1).float()
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
pred2[:, :4] = xywh2xyxy(pred2[:, :4])
# pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551
# Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
# Get detections sorted by decreasing confidence scores
pred2 = pred2[(-pred2[:, 4]).argsort()]
det_max = []
nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental)
for c in pred2[:, -1].unique():
dc = pred2[pred2[:, -1] == c] # select class c
dc = dc[:min(len(dc), 100)] # limit to first 100 boxes
# Non-maximum suppression
if nms_style == 'OR': # default
# ind = list(range(len(dc)))
# while len(ind):
# j = ind[0]
# det_max.append(dc[j:j + 1]) # save highest conf detection
# reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
# [ind.pop(i) for i in reversed(reject)]
while dc.shape[0]:
det_max.append(dc[:1]) # save highest conf detection
if len(dc) == 1: # Stop if we're at the last detection
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'AND': # requires overlap, single boxes erased
while len(dc) > 1:
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
if iou.max() > 0.5:
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'MERGE': # weighted mixture box
while len(dc):
i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes
weights = dc[i, 4:5]
dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
dc = dc[i == 0]
if len(det_max):
det_max = torch.cat(det_max) # concatenate
output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort
return output
def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
# Resize a rectangular image to a padded square
shape = img.shape[:2] # shape = [height, width]
ratio = float(height) / max(shape) # ratio = old / new
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
dw = (height - new_shape[0]) / 2 # width padding
dh = (height - new_shape[1]) / 2 # height padding
top, bottom = round(dh - 0.1), round(dh + 0.1)
left, right = round(dw - 0.1), round(dw + 0.1)
# resize img
if augment:
interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
if interpolation is None:
img = cv2.resize(img, new_shape)
img = cv2.resize(img, new_shape, interpolation=interpolation)
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
# print("resize time:",time.time()-s1)
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
return img, ratio, dw, dh
# model_path = './coco_model/yolov3_coco.pt' # 检测模型路径
# root_path = './test_images/'# 测试文件夹
# model_arch = 'yolov3' # 模型类型
# voc_config = 'cfg/voc.data' # 模型相关配置文件
# img_size = 416 # 图像尺寸
# conf_thres = 0.35# 检测置信度
# nms_thres = 0.5 # nms 阈值
class yolo_v3_hand_model(object):
def __init__(self,
model_path = './components/hand_detect/weights/hand_416-20210606.pt',
model_arch = 'yolov3',
yolo_anchor_scale = 1.,
model_half = False,
print("yolo v3 hand_model loading : {}".format(model_path))
self.use_cuda = torch.cuda.is_available()
self.device = torch.device("cuda:0" if self.use_cuda else "cpu")
self.img_size = img_size
self.classes = ["Hand"]
self.num_classes = len(self.classes)
self.conf_thres = conf_thres
self.nms_thres = nms_thres
self.model_half = model_half
weights = model_path
if "tiny" in model_arch:
a_scalse = 416./img_size*yolo_anchor_scale
anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
model = Yolov3Tiny(self.num_classes,anchors = anchors_new)
a_scalse = 416./img_size
anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
model = Yolov3(self.num_classes,anchors = anchors_new)
self.model = model
# show_model_param(self.model)# 显示模型参数
# print('num_classes : ',self.num_classes)
self.device = select_device() # 运行硬件选择
self.use_cuda = torch.cuda.is_available()
# Load weights
if os.access(weights,os.F_OK):# 判断模型文件是否存在
self.model.load_state_dict(torch.load(weights, map_location=lambda storage, loc: storage)['model'])
print('------- >>> error : model not exists')
return False
self.model.eval()#模型设置为 eval
self.model = self.model.to(self.device)
if model_half:
self.model = self.model.to(self.device).half()
if self.use_cuda:
self.model = self.model.cuda()
def predict(self, img_,vis):
with torch.no_grad():
t = time.time()
img = process_data(img_, self.img_size)
t1 = time.time()
img = torch.from_numpy(img).unsqueeze(0).to(self.device)
if self.model_half:
if self.use_cuda:
img = img.cuda()
pred, _ = self.model(img)#图片检测
t2 = time.time()
# detections = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0] # nms
detections = non_max_suppression(pred.float(), self.conf_thres, self.nms_thres)[0] # nms
t3 = time.time()
# print("t3 time:", t3)
if (detections is None) or len(detections) == 0:
return []
# Rescale boxes from 416 to true image size
detections[:, :4] = scale_coords(self.img_size, detections[:, :4], img_.shape).round()
# 绘制检测结果 :detect reslut
dets_for_landmarks = []
colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, 10 + 1)][::-1]
output_dict_ = []
for *xyxy, conf, cls_conf, cls in detections:
label = '%s %.2f' % (self.classes[0], conf)
x1,y1,x2,y2 = xyxy
if vis:
plot_one_box(xyxy, img_, label=label, color=(0,175,255), line_thickness = 2)
if vis:
return output_dict_
import os
import numpy as np
from collections import OrderedDict
import torch
import torch.nn.functional as F
import torch.nn as nn
# reference:
# https://github.com/ultralytics/yolov3/blob/master/models.py
# https://github.com/TencentYoutuResearch/ObjectDetection-OneStageDet/blob/master/yolo/vedanet/network/backbone/brick/darknet53.py
# network structure https://blog.csdn.net/u010397980/article/details/85058630
flag_yolo_structure = False # True 查看 相关的网络 log
class Conv2dBatchLeaky(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, leaky_slope=0.1):
super(Conv2dBatchLeaky, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
if isinstance(kernel_size, (list, tuple)):
self.padding = [int(ii/2) for ii in kernel_size]
if flag_yolo_structure:
print('------------------->>>> Conv2dBatchLeaky isinstance')
self.padding = int(kernel_size/2)
self.leaky_slope = leaky_slope
# Layer
# LeakyReLU : y = max(0, x) + leaky_slope*min(0,x)
self.layers = nn.Sequential(
nn.Conv2d(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, bias=False),
nn.LeakyReLU(self.leaky_slope, inplace=True)
def forward(self, x):
x = self.layers(x)
return x
class ResBlockSum(nn.Module):
def __init__(self, nchannels):
self.block = nn.Sequential(
Conv2dBatchLeaky(nchannels, int(nchannels/2), 1, 1),
Conv2dBatchLeaky(int(nchannels/2), nchannels, 3, 1)
def forward(self, x):
return x + self.block(x)
class HeadBody(nn.Module):
def __init__(self, in_channels, out_channels):
super(HeadBody, self).__init__()
self.layer = nn.Sequential(
Conv2dBatchLeaky(in_channels, out_channels, 1, 1),
Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1),
Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1),
Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1),
Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1)
def forward(self, x):
x = self.layer(x)
return x
class Upsample(nn.Module):
# Custom Upsample layer (nn.Upsample gives deprecated warning message)
def __init__(self, scale_factor=1, mode='nearest'):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
self.mode = mode
def forward(self, x):
return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
# default anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
class YOLOLayer(nn.Module):
def __init__(self, anchors, nC):
super(YOLOLayer, self).__init__()
self.anchors = torch.FloatTensor(anchors)
self.nA = len(anchors) # number of anchors (3)
self.nC = nC # number of classes
self.img_size = 0
if flag_yolo_structure:
print('init YOLOLayer ------ >>> ')
print('anchors : ',self.anchors)
print('nA : ',self.nA)
print('nC : ',self.nC)
print('img_size : ',self.img_size)
def forward(self, p, img_size, var=None):# p : feature map
bs, nG = p.shape[0], p.shape[-1] # batch_size , grid
if flag_yolo_structure:
print('bs, nG --->>> ',bs, nG)
if self.img_size != img_size:
create_grids(self, img_size, nG, p.device)
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, xywh + confidence + classes)
p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction
if self.training:
return p
else: # inference
io = p.clone() # inference output
io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy
io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method
io[..., 4:] = torch.sigmoid(io[..., 4:]) # p_conf, p_cls
io[..., :4] *= self.stride
if self.nC == 1:
io[..., 5] = 1 # single-class model
# flatten prediction, reshape from [bs, nA, nG, nG, nC] to [bs, nA * nG * nG, nC]
return io.view(bs, -1, 5 + self.nC), p
def create_grids(self, img_size, nG, device='cpu'):
# self.nA : len(anchors) # number of anchors (3)
# self.nC : nC # number of classes
# nG : feature map grid 13*13 26*26 52*52
self.img_size = img_size
self.stride = img_size / nG
if flag_yolo_structure:
print('create_grids stride : ',self.stride)
# build xy offsets
grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float()
grid_y = grid_x.permute(0, 1, 3, 2)
self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device)
if flag_yolo_structure:
print('grid_x : ',grid_x.size(),grid_x)
print('grid_y : ',grid_y.size(),grid_y)
print('grid_xy : ',self.grid_xy.size(),self.grid_xy)
# build wh gains
self.anchor_vec = self.anchors.to(device) / self.stride # 基于 stride 的归一化
# print('self.anchor_vecself.anchor_vecself.anchor_vec:',self.anchor_vec)
self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device)
self.nG = torch.FloatTensor([nG]).to(device)
def get_yolo_layer_index(module_list):
yolo_layer_index = []
for index, l in enumerate(module_list):
a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG
assert len(yolo_layer_index) > 0, "can not find yolo layer"
return yolo_layer_index
# ----------------------yolov3------------------------
class Yolov3(nn.Module):
def __init__(self, num_classes=80, anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]):
anchor_mask1 = [i for i in range(2 * len(anchors) // 3, len(anchors), 1)] # [6, 7, 8]
anchor_mask2 = [i for i in range(len(anchors) // 3, 2 * len(anchors) // 3, 1)] # [3, 4, 5]
anchor_mask3 = [i for i in range(0, len(anchors) // 3, 1)] # [0, 1, 2]
if flag_yolo_structure:
print('anchor_mask1 : ',anchor_mask1) # 大物体 anchor
print('anchor_mask2 : ',anchor_mask2) # 中物体 anchor
print('anchor_mask3 : ',anchor_mask3) # 小物体 anchor
# Network
# OrderedDict 是 dict 的子类,其最大特征是,它可以“维护”添加 key-value 对的顺序
layer_list = []
****** Conv2dBatchLeaky *****
op : Conv2d,BatchNorm2d,LeakyReLU
inputs : in_channels, out_channels, kernel_size, stride, leaky_slope
****** ResBlockSum ******
op : Conv2dBatchLeaky * 2 + x
inputs : nchannels
# list 0
('0_stage1_conv', Conv2dBatchLeaky(3, 32, 3, 1, 1)), # 416 x 416 x 32 # Convolutional
("0_stage2_conv", Conv2dBatchLeaky(32, 64, 3, 2)), # 208 x 208 x 64 # Convolutional
("0_stage2_ressum1", ResBlockSum(64)), # Convolutional*2 + Resiudal
("0_stage3_conv", Conv2dBatchLeaky(64, 128, 3, 2)), # 104 x 104 128 # Convolutional
("0_stage3_ressum1", ResBlockSum(128)),
("0_stage3_ressum2", ResBlockSum(128)), # (Convolutional*2 + Resiudal)**2
("0_stage4_conv", Conv2dBatchLeaky(128, 256, 3, 2)), # 52 x 52 x 256 # Convolutional
("0_stage4_ressum1", ResBlockSum(256)),
("0_stage4_ressum2", ResBlockSum(256)),
("0_stage4_ressum3", ResBlockSum(256)),
("0_stage4_ressum4", ResBlockSum(256)),
("0_stage4_ressum5", ResBlockSum(256)),
("0_stage4_ressum6", ResBlockSum(256)),
("0_stage4_ressum7", ResBlockSum(256)),
("0_stage4_ressum8", ResBlockSum(256)), # 52 x 52 x 256 output_feature_0 (Convolutional*2 + Resiudal)**8
# list 1
("1_stage5_conv", Conv2dBatchLeaky(256, 512, 3, 2)), # 26 x 26 x 512 # Convolutional
("1_stage5_ressum1", ResBlockSum(512)),
("1_stage5_ressum2", ResBlockSum(512)),
("1_stage5_ressum3", ResBlockSum(512)),
("1_stage5_ressum4", ResBlockSum(512)),
("1_stage5_ressum5", ResBlockSum(512)),
("1_stage5_ressum6", ResBlockSum(512)),
("1_stage5_ressum7", ResBlockSum(512)),
("1_stage5_ressum8", ResBlockSum(512)), # 26 x 26 x 512 output_feature_1 # (Convolutional*2 + Resiudal)**8
****** HeadBody ******
op : Conv2dBatchLeaky * 5
inputs : in_channels, out_channels
# list 2
("2_stage6_conv", Conv2dBatchLeaky(512, 1024, 3, 2)), # 13 x 13 x 1024 # Convolutional
("2_stage6_ressum1", ResBlockSum(1024)),
("2_stage6_ressum2", ResBlockSum(1024)),
("2_stage6_ressum3", ResBlockSum(1024)),
("2_stage6_ressum4", ResBlockSum(1024)), # 13 x 13 x 1024 output_feature_2 # (Convolutional*2 + Resiudal)**4
("2_headbody1", HeadBody(in_channels=1024, out_channels=512)), # 13 x 13 x 512 # Convalutional Set = Conv2dBatchLeaky * 5
# list 3
("3_conv_1", Conv2dBatchLeaky(in_channels=512, out_channels=1024, kernel_size=3, stride=1)),
("3_conv_2", nn.Conv2d(in_channels=1024, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
])) # predict one
# list 4
("4_yolo", YOLOLayer([anchors[i] for i in anchor_mask1], num_classes))
])) # 3*((x, y, w, h, confidence) + classes )
# list 5
("5_conv", Conv2dBatchLeaky(512, 256, 1, 1)),
("5_upsample", Upsample(scale_factor=2)),
# list 6
("6_head_body2", HeadBody(in_channels=768, out_channels=256)) # Convalutional Set = Conv2dBatchLeaky * 5
# list 7
("7_conv_1", Conv2dBatchLeaky(in_channels=256, out_channels=512, kernel_size=3, stride=1)),
("7_conv_2", nn.Conv2d(in_channels=512, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
])) # predict two
# list 8
("8_yolo", YOLOLayer([anchors[i] for i in anchor_mask2], num_classes))
])) # 3*((x, y, w, h, confidence) + classes )
# list 9
("9_conv", Conv2dBatchLeaky(256, 128, 1, 1)),
("9_upsample", Upsample(scale_factor=2)),
# list 10
("10_head_body3", HeadBody(in_channels=384, out_channels=128)) # Convalutional Set = Conv2dBatchLeaky * 5
# list 11
("11_conv_1", Conv2dBatchLeaky(in_channels=128, out_channels=256, kernel_size=3, stride=1)),
("11_conv_2", nn.Conv2d(in_channels=256, out_channels=len(anchor_mask3) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
])) # predict three
# list 12
("12_yolo", YOLOLayer([anchors[i] for i in anchor_mask3], num_classes))
])) # 3*((x, y, w, h, confidence) + classes )
# nn.ModuleList类似于pytho中的list类型,只是将一系列层装入列表,并没有实现forward()方法,因此也不会有网络模型产生的副作用
self.module_list = nn.ModuleList([nn.Sequential(i) for i in layer_list])
self.yolo_layer_index = get_yolo_layer_index(self.module_list)
if flag_yolo_structure:
print('yolo_layer : ',len(layer_list),'\n')
# print('self.module_list -------->>> ',self.module_list)
# print('self.yolo_layer_index -------->>> ',self.yolo_layer_index)
def forward(self, x):
img_size = x.shape[-1]
if flag_yolo_structure:
print('forward img_size : ',img_size,x.shape)
output = []
x = self.module_list[0](x)
x_route1 = x
x = self.module_list[1](x)
x_route2 = x
x = self.module_list[2](x)
yolo_head = self.module_list[3](x)
if flag_yolo_structure:
print('mask1 yolo_head : ',yolo_head.size())
yolo_head_out_13x13 = self.module_list[4][0](yolo_head, img_size)
x = self.module_list[5](x)
x = torch.cat([x, x_route2], 1)
x = self.module_list[6](x)
yolo_head = self.module_list[7](x)
if flag_yolo_structure:
print('mask2 yolo_head : ',yolo_head.size())
yolo_head_out_26x26 = self.module_list[8][0](yolo_head, img_size)
x = self.module_list[9](x)
x = torch.cat([x, x_route1], 1)
x = self.module_list[10](x)
yolo_head = self.module_list[11](x)
if flag_yolo_structure:
print('mask3 yolo_head : ',yolo_head.size())
yolo_head_out_52x52 = self.module_list[12][0](yolo_head, img_size)
if self.training:
return output
io, p = list(zip(*output)) # inference output, training output
return torch.cat(io, 1), p
# ----------------------yolov3 tiny------------------------
class EmptyLayer(nn.Module):
"""Placeholder for 'route' and 'shortcut' layers"""
def __init__(self):
super(EmptyLayer, self).__init__()
def forward(self, x):
return x
class Yolov3Tiny(nn.Module):
def __init__(self, num_classes=80, anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]):
super(Yolov3Tiny, self).__init__()
anchor_mask1 = [i for i in range(len(anchors) // 2, len(anchors), 1)] # [3, 4, 5]
anchor_mask2 = [i for i in range(0, len(anchors) // 2, 1)] # [0, 1, 2]
layer_list = []
# layer 0
("conv_0", nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_0", nn.BatchNorm2d(16)),
("leaky_0", nn.LeakyReLU(0.1)),
# layer 1
("maxpool_1", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 2
("conv_2", nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_2", nn.BatchNorm2d(32)),
("leaky_2", nn.LeakyReLU(0.1)),
# layer 3
("maxpool_3", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 4
("conv_4", nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_4", nn.BatchNorm2d(64)),
("leaky_4", nn.LeakyReLU(0.1)),
# layer 5
("maxpool_5", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 6
("conv_6", nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_6", nn.BatchNorm2d(128)),
("leaky_6", nn.LeakyReLU(0.1)),
# layer 7
("maxpool_7", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 8
("conv_8", nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_8", nn.BatchNorm2d(256)),
("leaky_8", nn.LeakyReLU(0.1)),
# layer 9
("maxpool_9", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 10
("conv_10", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_10", nn.BatchNorm2d(512)),
("leaky_10", nn.LeakyReLU(0.1)),
# layer 11
('_debug_padding_11', nn.ZeroPad2d((0, 1, 0, 1))),
("maxpool_11", nn.MaxPool2d(kernel_size=2, stride=1, padding=0)),
# layer 12
("conv_12", nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_12", nn.BatchNorm2d(1024)),
("leaky_12", nn.LeakyReLU(0.1)),
# layer 13
("conv_13", nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False)),
("batch_norm_13", nn.BatchNorm2d(256)),
("leaky_13", nn.LeakyReLU(0.1)),
# layer 14
("conv_14", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_14", nn.BatchNorm2d(512)),
("leaky_14", nn.LeakyReLU(0.1)),
# layer 15
nn.Conv2d(in_channels=512, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
# layer 16
anchor_tmp1 = [anchors[i] for i in anchor_mask1]
layer_list.append(OrderedDict([("yolo_16", YOLOLayer(anchor_tmp1, num_classes))]))
# layer 17
layer_list.append(OrderedDict([("route_17", EmptyLayer())]))
# layer 18
("conv_18", nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False)),
("batch_norm_18", nn.BatchNorm2d(128)),
("leaky_18", nn.LeakyReLU(0.1)),
# layer 19
("upsample_19", Upsample(scale_factor=2)),
# layer 20
layer_list.append(OrderedDict([('route_20', EmptyLayer())]))
# layer 21
("conv_21", nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_21", nn.BatchNorm2d(256)),
("leaky_21", nn.LeakyReLU(0.1)),
# layer 22
nn.Conv2d(in_channels=256, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
# layer 23
anchor_tmp2 = [anchors[i] for i in anchor_mask2]
layer_list.append(OrderedDict([("yolo_23", YOLOLayer(anchor_tmp2, num_classes))]))
self.module_list = nn.ModuleList([nn.Sequential(layer) for layer in layer_list])
self.yolo_layer_index = get_yolo_layer_index(self.module_list)
def forward(self, x):
img_size = x.shape[-1]
output = []
x = self.module_list[0](x) # layer0 to layer8
x_route8 = x
x = self.module_list[1](x) # layer9 to layer13
x_route13 = x
x = self.module_list[2](x) # layer14, layer15
x = self.module_list[3][0](x, img_size) # yolo_16
x = self.module_list[5](x_route13) # layer18, layer19
x = torch.cat([x, x_route8], 1) # route
x = self.module_list[7](x) # layer21, layer22
x = self.module_list[8][0](x, img_size) # yolo_23
if self.training:
return output
io, p = list(zip(*output)) # inference output, training output
return torch.cat(io, 1), p
if __name__ == "__main__":
dummy_input = torch.Tensor(5, 3, 416, 416)
model = Yolov3(num_classes=80)
params = list(model.parameters())
k = 0
for i in params:
l = 1
for j in i.size():
l *= j
# print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l)))
k = k + l
print("总参数数量和: " + str(k))
print("-----------yolo layer")
for index in model.yolo_layer_index:
for res in model(dummy_input):
print("res:", np.shape(res))
inference_out, train_out = model(dummy_input)
print("inference_out:", np.shape(inference_out))
for o in train_out:
print("train_out:", np.shape(o))
# date:2021-03-09
# Author: Eric.Lee
# function: handpose_x 21 keypoints 2D
import os
import torch
import cv2
import numpy as np
import json
import torch
import torch.nn as nn
import time
import math
from datetime import datetime
from hand_keypoints.models.resnet import resnet18,resnet34,resnet50,resnet101
from hand_keypoints.models.squeezenet import squeezenet1_1,squeezenet1_0
from hand_keypoints.models.resnet import resnet18,resnet34,resnet50,resnet101
from hand_keypoints.models.squeezenet import squeezenet1_1,squeezenet1_0
from hand_keypoints.models.shufflenetv2 import ShuffleNetV2
from hand_keypoints.models.shufflenet import ShuffleNet
from hand_keypoints.models.mobilenetv2 import MobileNetV2
from torchvision.models import shufflenet_v2_x1_5 ,shufflenet_v2_x1_0 , shufflenet_v2_x2_0
from hand_keypoints.models.rexnetv1 import ReXNetV1
from hand_keypoints.utils.common_utils import *
def draw_bd_handpose_c(img_,hand_,x,y,thick = 3):
# thick = 2
colors = [(0,215,255),(255,115,55),(5,255,55),(25,15,255),(225,15,55)]
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['1']['x']+x), int(hand_['1']['y']+y)), colors[0], thick)
cv2.line(img_, (int(hand_['1']['x']+x), int(hand_['1']['y']+y)),(int(hand_['2']['x']+x), int(hand_['2']['y']+y)), colors[0], thick)
cv2.line(img_, (int(hand_['2']['x']+x), int(hand_['2']['y']+y)),(int(hand_['3']['x']+x), int(hand_['3']['y']+y)), colors[0], thick)
cv2.line(img_, (int(hand_['3']['x']+x), int(hand_['3']['y']+y)),(int(hand_['4']['x']+x), int(hand_['4']['y']+y)), colors[0], thick)
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['5']['x']+x), int(hand_['5']['y']+y)), colors[1], thick)
cv2.line(img_, (int(hand_['5']['x']+x), int(hand_['5']['y']+y)),(int(hand_['6']['x']+x), int(hand_['6']['y']+y)), colors[1], thick)
cv2.line(img_, (int(hand_['6']['x']+x), int(hand_['6']['y']+y)),(int(hand_['7']['x']+x), int(hand_['7']['y']+y)), colors[1], thick)
cv2.line(img_, (int(hand_['7']['x']+x), int(hand_['7']['y']+y)),(int(hand_['8']['x']+x), int(hand_['8']['y']+y)), colors[1], thick)
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['9']['x']+x), int(hand_['9']['y']+y)), colors[2], thick)
cv2.line(img_, (int(hand_['9']['x']+x), int(hand_['9']['y']+y)),(int(hand_['10']['x']+x), int(hand_['10']['y']+y)), colors[2], thick)
cv2.line(img_, (int(hand_['10']['x']+x), int(hand_['10']['y']+y)),(int(hand_['11']['x']+x), int(hand_['11']['y']+y)), colors[2], thick)
cv2.line(img_, (int(hand_['11']['x']+x), int(hand_['11']['y']+y)),(int(hand_['12']['x']+x), int(hand_['12']['y']+y)), colors[2], thick)
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['13']['x']+x), int(hand_['13']['y']+y)), colors[3], thick)
cv2.line(img_, (int(hand_['13']['x']+x), int(hand_['13']['y']+y)),(int(hand_['14']['x']+x), int(hand_['14']['y']+y)), colors[3], thick)
cv2.line(img_, (int(hand_['14']['x']+x), int(hand_['14']['y']+y)),(int(hand_['15']['x']+x), int(hand_['15']['y']+y)), colors[3], thick)
cv2.line(img_, (int(hand_['15']['x']+x), int(hand_['15']['y']+y)),(int(hand_['16']['x']+x), int(hand_['16']['y']+y)), colors[3], thick)
cv2.line(img_, (int(hand_['0']['x']+x), int(hand_['0']['y']+y)),(int(hand_['17']['x']+x), int(hand_['17']['y']+y)), colors[4], thick)
cv2.line(img_, (int(hand_['17']['x']+x), int(hand_['17']['y']+y)),(int(hand_['18']['x']+x), int(hand_['18']['y']+y)), colors[4], thick)
cv2.line(img_, (int(hand_['18']['x']+x), int(hand_['18']['y']+y)),(int(hand_['19']['x']+x), int(hand_['19']['y']+y)), colors[4], thick)
cv2.line(img_, (int(hand_['19']['x']+x), int(hand_['19']['y']+y)),(int(hand_['20']['x']+x), int(hand_['20']['y']+y)), colors[4], thick)
class handpose_x_model(object):
def __init__(self,
# model_path = './components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth',
model_path = './components/hand_keypoints/weights/resnet_50-size-256-wingloss102-0.119.pth',
img_size= 256,
num_classes = 42,# 手部关键点个数 * 2 : 21*2
# model_arch = "rexnetv1",
model_arch = "resnet_50",
model_half = False,
# print("handpose_x loading : ",model_path)
self.use_cuda = torch.cuda.is_available()
self.device = torch.device("cuda:0" if self.use_cuda else "cpu") # 可选的设备类型及序号
self.img_size = img_size
self.model_half = model_half
if model_arch == 'resnet_50':
model_ = resnet50(num_classes = num_classes,img_size = self.img_size)
elif model_arch == 'resnet_18':
model_ = resnet18(num_classes = num_classes,img_size = self.img_size)
elif model_arch == 'resnet_34':
model_ = resnet34(num_classes = num_classes,img_size = self.img_size)
elif model_arch == 'resnet_101':
model_ = resnet101(num_classes = num_classes,img_size = self.img_size)
elif model_arch == "squeezenet1_0":
model_ = squeezenet1_0(pretrained=True, num_classes=num_classes)
elif model_arch == "squeezenet1_1":
model_ = squeezenet1_1(pretrained=True, num_classes=num_classes)
elif model_arch == "shufflenetv2":
model_ = ShuffleNetV2(ratio=1., num_classes=num_classes)
elif model_arch == "shufflenet_v2_x1_5":
model_ = shufflenet_v2_x1_5(pretrained=False,num_classes=num_classes)
elif model_arch == "shufflenet_v2_x1_0":
model_ = shufflenet_v2_x1_0(pretrained=False,num_classes=num_classes)
elif model_arch == "shufflenet_v2_x2_0":
model_ = shufflenet_v2_x2_0(pretrained=False,num_classes=num_classes)
elif model_arch == "shufflenet":
model_ = ShuffleNet(num_blocks = [2,4,2], num_classes=num_classes, groups=3)
elif model_arch == "mobilenetv2":
model_ = MobileNetV2(num_classes=num_classes)
elif model_arch == "rexnetv1":
model_ = ReXNetV1(num_classes=num_classes,width_mult=1., depth_mult=1.)
print(" no support the model")
model_ = model_.to(self.device)
model_.eval() # 设置为前向推断模式
# 加载测试模型
if os.access(model_path,os.F_OK):# checkpoint
chkpt = torch.load(model_path, map_location=self.device)
print('handpose_x model loading : {}'.format(model_path))
self.model_handpose = model_
if model_half:
self.model_handpose = self.model_handpose.half()
def predict(self, img, vis = False):
with torch.no_grad():
if not((img.shape[0] == self.img_size) and (img.shape[1] == self.img_size)):
img = cv2.resize(img, (self.img_size,self.img_size), interpolation = cv2.INTER_CUBIC)
img_ = img.astype(np.float32)
img_ = (img_-128.)/256.
img_ = img_.transpose(2, 0, 1)
img_ = torch.from_numpy(img_)
img_ = img_.unsqueeze_(0).float()
if self.model_half:
if self.use_cuda:
img_ = img_.cuda() # (bs, 3, h, w)
pre_ = self.model_handpose(img_)
output = pre_.cpu().detach().numpy()
output = np.squeeze(output)
return output
"""mobilenetv2 in pytorch
[1] Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen
MobileNetV2: Inverted Residuals and Linear Bottlenecks
import torch
import torch.nn as nn
import torch.nn.functional as F
class LinearBottleNeck(nn.Module):
def __init__(self, in_channels, out_channels, stride, t=6, class_num=100):
self.residual = nn.Sequential(
nn.Conv2d(in_channels, in_channels * t, 1),
nn.BatchNorm2d(in_channels * t),
nn.Conv2d(in_channels * t, in_channels * t, 3, stride=stride, padding=1, groups=in_channels * t),
nn.BatchNorm2d(in_channels * t),
nn.Conv2d(in_channels * t, out_channels, 1),
self.stride = stride
self.in_channels = in_channels
self.out_channels = out_channels
def forward(self, x):
residual = self.residual(x)
if self.stride == 1 and self.in_channels == self.out_channels:
residual += x
return residual
class MobileNetV2(nn.Module):
def __init__(self, num_classes=100,dropout_factor = 1.0):
self.pre = nn.Sequential(
nn.Conv2d(3, 32, 1, padding=1),
self.stage1 = LinearBottleNeck(32, 16, 1, 1)
self.stage2 = self._make_stage(2, 16, 24, 2, 6)
self.stage3 = self._make_stage(3, 24, 32, 2, 6)
self.stage4 = self._make_stage(4, 32, 64, 2, 6)
self.stage5 = self._make_stage(3, 64, 96, 1, 6)
self.stage6 = self._make_stage(3, 96, 160, 1, 6)
self.stage7 = LinearBottleNeck(160, 320, 1, 6)
self.conv1 = nn.Sequential(
nn.Conv2d(320, 1280, 1),
self.conv2 = nn.Conv2d(1280, num_classes, 1)
self.dropout = nn.Dropout(dropout_factor)
def forward(self, x):
x = self.pre(x)
x = self.stage1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.stage5(x)
x = self.stage6(x)
x = self.stage7(x)
x = self.conv1(x)
x = F.adaptive_avg_pool2d(x, 1)
x = self.dropout(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
return x
def _make_stage(self, repeat, in_channels, out_channels, stride, t):
layers = []
layers.append(LinearBottleNeck(in_channels, out_channels, stride, t))
while repeat - 1:
layers.append(LinearBottleNeck(out_channels, out_channels, 1, t))
repeat -= 1
return nn.Sequential(*layers)
def mobilenetv2():
return MobileNetV2()
# date:2020-08-08
# Author: X.L.Eric
# function: my model
import torch
import torch.nn as nn
import torch.nn.functional as F
class MY_Net(nn.Module):
def __init__(self,num_classes):# op 初始化
super(MY_Net, self).__init__()
self.cov = nn.Conv2d(3, 32, 3)
self.relu = nn.ReLU(inplace=True)
layers1 = []
# Conv2d : in_channels, out_channels, kernel_size, stride, padding
layers1.append(nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3,stride=1,padding = 0))
layers1.append(nn.AvgPool2d(kernel_size=3, stride=2, padding=1))
self.layers1 = nn.Sequential(*layers1)
layers2 = []
layers2.append(nn.Conv2d(64, 128, 3))
layers2.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
self.layers2 = nn.Sequential(*layers2)
layers3 = []
layers3.append(nn.Conv2d(128, 256, 3,stride=2))
layers3.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
self.layers3 = nn.Sequential(*layers3)
layers4 = []
layers4.append(nn.Conv2d(256, 512, 3,stride=2))
layers4.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
layers4.append(nn.Conv2d(512, 512, 1,stride=1))
self.layers4 = nn.Sequential(*layers4)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))# 自适应均值池化
self.fc = nn.Linear(in_features = 512 , out_features = num_classes)# 全连接 fc
def forward(self, x):# 模型前向推断
x = self.cov(x)
x = self.relu(x)
x = self.layers1(x)
x = self.layers2(x)
x = self.layers3(x)
x = self.layers4(x)
x = self.avgpool(x)
x = x.reshape(x.size(0), -1)
x = self.fc(x)
return x
if __name__ == "__main__":
#输入批次图片(batchsize,channel,height,width):8 ,3*256*256
dummy_input = torch.randn([8, 3, 256,256])
model = MY_Net(num_classes = 100)# 分类数为 100 类
print('model:\n',model)# 打印模型op
output = model(dummy_input)# 模型前向推断
# 模型前向推断输出特征尺寸
print('model inference feature size: ',output.size())
output_ = F.softmax(output,dim = 1)
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.):
self.inplanes = 64
self.dropout_factor = dropout_factor
super(ResNet, self).__init__()
# 26
# 586 train_sequence
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
# see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16
# self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
assert img_size % 32 == 0
pool_kernel = int(img_size / 32)
self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True)
self.dropout = nn.Dropout(self.dropout_factor)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def load_model(model, pretrained_state_dict):
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
model.load_state_dict(pretrained_dict, strict=False)
if len(pretrained_dict) == 0:
print("[INFO] No params were loaded ...")
for k, v in pretrained_state_dict.items():
if k in pretrained_dict:
print("==>> Load {} {}".format(k, v.size()))
print("[INFO] Skip {} {}".format(k, v.size()))
return model
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
print("Load pretrained model from {}".format(model_urls['resnet18']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
model = load_model(model, pretrained_state_dict)
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
print("Load pretrained model from {}".format(model_urls['resnet34']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
model = load_model(model, pretrained_state_dict)
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
print("Load pretrained model from {}".format(model_urls['resnet50']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
model = load_model(model, pretrained_state_dict)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
print("Load pretrained model from {}".format(model_urls['resnet101']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
model = load_model(model, pretrained_state_dict)
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
print("Load pretrained model from {}".format(model_urls['resnet152']))
pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
model = load_model(model, pretrained_state_dict)
return model
if __name__ == "__main__":
input = torch.randn([32, 3, 256,256])
model = resnet34(False, num_classes=2, img_size=256)
output = model(input)
import torch
import torch.nn as nn
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000,dropout_factor = 1., zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.dropout = nn.Dropout(dropout_factor)
self.fc = nn.Linear(512 * block.expansion, num_classes)
# ----------------------------------------------------------------------------------
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.reshape(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def _resnet(arch, block, layers, **kwargs):
model = ResNet(block, layers, **kwargs)
return model
def resnet50(**kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3],**kwargs)
if __name__ == "__main__":
dummy_input = torch.randn([32, 3, 128,128])
num_classes = 100
model = resnet50(num_classes = num_classes,dropout_factor=0.5)
output = model(dummy_input)
Copyright (c) 2020-present NAVER Corp.
MIT license
import torch
import torch.nn as nn
from math import ceil
# Memory-efficient Siwsh using torch.jit.script borrowed from the code in (https://twitter.com/jeremyphoward/status/1188251041835315200)
# Currently use memory-efficient Swish as default:
def swish_fwd(x):
return x.mul(torch.sigmoid(x))
def swish_bwd(x, grad_output):
x_sigmoid = torch.sigmoid(x)
return grad_output * (x_sigmoid * (1. + x * (1. - x_sigmoid)))
class SwishJitImplementation(torch.autograd.Function):
def forward(ctx, x):
return swish_fwd(x)
def backward(ctx, grad_output):
x = ctx.saved_tensors[0]
return swish_bwd(x, grad_output)
def swish(x, inplace=False):
return SwishJitImplementation.apply(x)
def swish(x, inplace=False):
return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())
class Swish(nn.Module):
def __init__(self, inplace=True):
super(Swish, self).__init__()
self.inplace = inplace
def forward(self, x):
return swish(x, self.inplace)
def ConvBNAct(out, in_channels, channels, kernel=1, stride=1, pad=0,
num_group=1, active=True, relu6=False):
out.append(nn.Conv2d(in_channels, channels, kernel,
stride, pad, groups=num_group, bias=False))
if active:
out.append(nn.ReLU6(inplace=True) if relu6 else nn.ReLU(inplace=True))
def ConvBNSwish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1):
out.append(nn.Conv2d(in_channels, channels, kernel,
stride, pad, groups=num_group, bias=False))
class SE(nn.Module):
def __init__(self, in_channels, channels, se_ratio=12):
super(SE, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Conv2d(in_channels, channels // se_ratio, kernel_size=1, padding=0),
nn.BatchNorm2d(channels // se_ratio),
nn.Conv2d(channels // se_ratio, channels, kernel_size=1, padding=0),
def forward(self, x):
y = self.avg_pool(x)
y = self.fc(y)
return x * y
class LinearBottleneck(nn.Module):
def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12,
super(LinearBottleneck, self).__init__(**kwargs)
self.use_shortcut = stride == 1 and in_channels <= channels
self.in_channels = in_channels
self.out_channels = channels
out = []
if t != 1:
dw_channels = in_channels * t
ConvBNSwish(out, in_channels=in_channels, channels=dw_channels)
dw_channels = in_channels
ConvBNAct(out, in_channels=dw_channels, channels=dw_channels, kernel=3, stride=stride, pad=1,
num_group=dw_channels, active=False)
if use_se:
out.append(SE(dw_channels, dw_channels, se_ratio))
ConvBNAct(out, in_channels=dw_channels, channels=channels, active=False, relu6=True)
self.out = nn.Sequential(*out)
def forward(self, x):
out = self.out(x)
if self.use_shortcut:
out[:, 0:self.in_channels] += x
return out
class ReXNetV1(nn.Module):
def __init__(self, input_ch=16, final_ch=180, width_mult=1.0, depth_mult=1.0, num_classes=1000,
super(ReXNetV1, self).__init__()
layers = [1, 2, 2, 3, 3, 5]
strides = [1, 2, 2, 2, 1, 2]
use_ses = [False, False, True, True, True, True]
layers = [ceil(element * depth_mult) for element in layers]
strides = sum([[element] + [1] * (layers[idx] - 1)
for idx, element in enumerate(strides)], [])
if use_se:
use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], [])
use_ses = [False] * sum(layers[:])
ts = [1] * layers[0] + [6] * sum(layers[1:])
self.depth = sum(layers[:]) * 3
stem_channel = 32 / width_mult if width_mult < 1.0 else 32
inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch
features = []
in_channels_group = []
channels_group = []
# The following channel configuration is a simple instance to make each layer become an expand layer.
for i in range(self.depth // 3):
if i == 0:
in_channels_group.append(int(round(stem_channel * width_mult)))
channels_group.append(int(round(inplanes * width_mult)))
in_channels_group.append(int(round(inplanes * width_mult)))
inplanes += final_ch / (self.depth // 3 * 1.0)
channels_group.append(int(round(inplanes * width_mult)))
ConvBNSwish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1)
for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)):
use_se=se, se_ratio=se_ratio))
pen_channels = int(1280 * width_mult)
ConvBNSwish(features, c, pen_channels)
self.features = nn.Sequential(*features)
self.output = nn.Sequential(
nn.Conv2d(pen_channels, num_classes, 1, bias=True))
def forward(self, x):
x = self.features(x)
x = self.output(x).squeeze()
return x
"""shufflenet in pytorch
[1] Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun.
ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
from functools import partial
import torch
import torch.nn as nn
class BasicConv2d(nn.Module):
def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
self.conv = nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs)
self.bn = nn.BatchNorm2d(output_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class ChannelShuffle(nn.Module):
def __init__(self, groups):
self.groups = groups
def forward(self, x):
batchsize, channels, height, width = x.data.size()
channels_per_group = int(channels / self.groups)
#"""suppose a convolutional layer with g groups whose output has
#g x n channels; we first reshape the output channel dimension
#into (g, n)"""
x = x.view(batchsize, self.groups, channels_per_group, height, width)
#"""transposing and then flattening it back as the input of next layer."""
x = x.transpose(1, 2).contiguous()
x = x.view(batchsize, -1, height, width)
return x
class DepthwiseConv2d(nn.Module):
def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
self.depthwise = nn.Sequential(
nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs),
def forward(self, x):
return self.depthwise(x)
class PointwiseConv2d(nn.Module):
def __init__(self, input_channels, output_channels, **kwargs):
self.pointwise = nn.Sequential(
nn.Conv2d(input_channels, output_channels, 1, **kwargs),
def forward(self, x):
return self.pointwise(x)
class ShuffleNetUnit(nn.Module):
def __init__(self, input_channels, output_channels, stage, stride, groups):
#"""Similar to [9], we set the number of bottleneck channels to 1/4
#of the output channels for each ShuffleNet unit."""
self.bottlneck = nn.Sequential(
int(output_channels / 4),
#"""Note that for Stage 2, we do not apply group convolution on the first pointwise
#layer because the number of input channels is relatively small."""
if stage == 2:
self.bottlneck = nn.Sequential(
int(output_channels / 4),
self.channel_shuffle = ChannelShuffle(groups)
self.depthwise = DepthwiseConv2d(
int(output_channels / 4),
int(output_channels / 4),
groups=int(output_channels / 4),
self.expand = PointwiseConv2d(
int(output_channels / 4),
self.relu = nn.ReLU(inplace=True)
self.fusion = self._add
self.shortcut = nn.Sequential()
#"""As for the case where ShuffleNet is applied with stride,
#we simply make two modifications (see Fig 2 (c)):
#(i) add a 3 × 3 average pooling on the shortcut path;
#(ii) replace the element-wise addition with channel concatenation,
#which makes it easy to enlarge channel dimension with little extra
#computation cost.
if stride != 1 or input_channels != output_channels:
self.shortcut = nn.AvgPool2d(3, stride=2, padding=1)
self.expand = PointwiseConv2d(
int(output_channels / 4),
output_channels - input_channels,
self.fusion = self._cat
def _add(self, x, y):
return torch.add(x, y)
def _cat(self, x, y):
return torch.cat([x, y], dim=1)
def forward(self, x):
shortcut = self.shortcut(x)
shuffled = self.bottlneck(x)
shuffled = self.channel_shuffle(shuffled)
shuffled = self.depthwise(shuffled)
shuffled = self.expand(shuffled)
output = self.fusion(shortcut, shuffled)
output = self.relu(output)
return output
class ShuffleNet(nn.Module):
def __init__(self, num_blocks = [2,4,2], num_classes=100, groups=3, dropout_factor = 1.0):
if groups == 1:
out_channels = [24, 144, 288, 567]
elif groups == 2:
out_channels = [24, 200, 400, 800]
elif groups == 3:
out_channels = [24, 240, 480, 960]
elif groups == 4:
out_channels = [24, 272, 544, 1088]
elif groups == 8:
out_channels = [24, 384, 768, 1536]
self.conv1 = BasicConv2d(3, out_channels[0], 3, padding=1, stride=1)
self.input_channels = out_channels[0]
self.stage2 = self._make_stage(
self.stage3 = self._make_stage(
self.stage4 = self._make_stage(
self.avg = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(out_channels[3], num_classes)
self.dropout = nn.Dropout(dropout_factor)
def forward(self, x):
x = self.conv1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.avg(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def _make_stage(self, block, num_blocks, output_channels, stride, stage, groups):
"""make shufflenet stage
block: block type, shuffle unit
out_channels: output depth channel number of this stage
num_blocks: how many blocks per stage
stride: the stride of the first block of this stage
stage: stage index
groups: group number of group convolution
return a shuffle net stage
strides = [stride] + [1] * (num_blocks - 1)
stage = []
for stride in strides:
self.input_channels = output_channels
return nn.Sequential(*stage)
def shufflenet():
return ShuffleNet([4, 8, 4])
"""shufflenetv2 in pytorch
[1] Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun
ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design
import torch
import torch.nn as nn
import torch.nn.functional as F
def channel_split(x, split):
"""split a tensor into two pieces along channel dimension
x: input tensor
split:(int) channel size for each pieces
assert x.size(1) == split * 2
return torch.split(x, split, dim=1)
def channel_shuffle(x, groups):
"""channel shuffle operation
x: input tensor
groups: input branch number
batch_size, channels, height, width = x.size()
channels_per_group = int(channels // groups)
x = x.view(batch_size, groups, channels_per_group, height, width)
x = x.transpose(1, 2).contiguous()
x = x.view(batch_size, -1, height, width)
return x
class ShuffleUnit(nn.Module):
def __init__(self, in_channels, out_channels, stride):
self.stride = stride
self.in_channels = in_channels
self.out_channels = out_channels
if stride != 1 or in_channels != out_channels:
self.residual = nn.Sequential(
nn.Conv2d(in_channels, in_channels, 1),
nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
nn.Conv2d(in_channels, int(out_channels / 2), 1),
nn.BatchNorm2d(int(out_channels / 2)),
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
nn.Conv2d(in_channels, int(out_channels / 2), 1),
nn.BatchNorm2d(int(out_channels / 2)),
self.shortcut = nn.Sequential()
in_channels = int(in_channels / 2)
self.residual = nn.Sequential(
nn.Conv2d(in_channels, in_channels, 1),
nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels),
nn.Conv2d(in_channels, in_channels, 1),
def forward(self, x):
if self.stride == 1 and self.out_channels == self.in_channels:
shortcut, residual = channel_split(x, int(self.in_channels / 2))
shortcut = x
residual = x
shortcut = self.shortcut(shortcut)
residual = self.residual(residual)
x = torch.cat([shortcut, residual], dim=1)
x = channel_shuffle(x, 2)
return x
class ShuffleNetV2(nn.Module):
def __init__(self, ratio=1., num_classes=100, dropout_factor = 1.0):
if ratio == 0.5:
out_channels = [48, 96, 192, 1024]
elif ratio == 1:
out_channels = [116, 232, 464, 1024]
elif ratio == 1.5:
out_channels = [176, 352, 704, 1024]
elif ratio == 2:
out_channels = [244, 488, 976, 2048]
ValueError('unsupported ratio number')
self.pre = nn.Sequential(
nn.Conv2d(3, 24, 3, padding=1),
self.stage2 = self._make_stage(24, out_channels[0], 3)
self.stage3 = self._make_stage(out_channels[0], out_channels[1], 7)
self.stage4 = self._make_stage(out_channels[1], out_channels[2], 3)
self.conv5 = nn.Sequential(
nn.Conv2d(out_channels[2], out_channels[3], 1),
self.fc = nn.Linear(out_channels[3], num_classes)
self.dropout = nn.Dropout(dropout_factor)
def forward(self, x):
x = self.pre(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.conv5(x)
x = F.adaptive_avg_pool2d(x, 1)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.fc(x)
return x
def _make_stage(self, in_channels, out_channels, repeat):
layers = []
layers.append(ShuffleUnit(in_channels, out_channels, 2))
while repeat:
layers.append(ShuffleUnit(out_channels, out_channels, 1))
repeat -= 1
return nn.Sequential(*layers)
def shufflenetv2():
return ShuffleNetV2()
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.utils.model_zoo as model_zoo
__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1']
model_urls = {
'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',
'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
class Fire(nn.Module):
def __init__(self, inplanes, squeeze_planes,
expand1x1_planes, expand3x3_planes):
super(Fire, self).__init__()
self.inplanes = inplanes
self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
self.squeeze_activation = nn.ReLU(inplace=True)
self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
self.expand1x1_activation = nn.ReLU(inplace=True)
self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
kernel_size=3, padding=1)
self.expand3x3_activation = nn.ReLU(inplace=True)
def forward(self, x):
x = self.squeeze_activation(self.squeeze(x))
return torch.cat([
], 1)
class SqueezeNet(nn.Module):
def __init__(self, version=1.0, num_classes=1000,dropout_factor = 1.):
super(SqueezeNet, self).__init__()
if version not in [1.0, 1.1]:
raise ValueError("Unsupported SqueezeNet version {version}:"
"1.0 or 1.1 expected".format(version=version))
self.num_classes = num_classes
if version == 1.0:
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=7, stride=2),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(96, 16, 64, 64),
Fire(128, 16, 64, 64),
Fire(128, 32, 128, 128),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(256, 32, 128, 128),
Fire(256, 48, 192, 192),
Fire(384, 48, 192, 192),
Fire(384, 64, 256, 256),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(512, 64, 256, 256),
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=2),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(64, 16, 64, 64),
Fire(128, 16, 64, 64),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(128, 32, 128, 128),
Fire(256, 32, 128, 128),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(256, 48, 192, 192),
Fire(384, 48, 192, 192),
Fire(384, 64, 256, 256),
Fire(512, 64, 256, 256),
# Final convolution is initialized differently form the rest
final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
self.classifier = nn.Sequential(
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m is final_conv:
init.normal(m.weight.data, mean=0.0, std=0.01)
if m.bias is not None:
def forward(self, x):
x = self.features(x)
# print("features(x):", x.size())
x = self.classifier(x)
# print("features(x):", x.size())
return x.view(x.size(0), self.num_classes)
def squeezenet1_0(pretrained=False, **kwargs):
r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level
accuracy with 50x fewer parameters and <0.5MB model size"
<https://arxiv.org/abs/1602.07360>`_ paper.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = SqueezeNet(version=1.0, **kwargs)
model_dict = model.state_dict()
if pretrained:
pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0'])
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
return model
def squeezenet1_1(pretrained=False, **kwargs):
r"""SqueezeNet 1.1 model from the `official SqueezeNet repo
SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
than SqueezeNet 1.0, without sacrificing accuracy.
pretrained (bool): If True, returns a model pre-trained on ImageNet
model = SqueezeNet(version=1.1, **kwargs)
model_dict = model.state_dict()
if pretrained:
pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0'])
pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
return model
if __name__ == "__main__":
from thop import profile
dummy = torch.from_numpy(np.random.random([16, 3, 256, 256]).astype(np.float32))
model = squeezenet1_0(pretrained=True, num_classes=42,dropout_factor = 0.5)
flops, params = profile(model, inputs=(dummy, ))
output = model(dummy)
print("flops: {}, params: {}".format(flops, params))
# date:2020-04-11
# Author: Eric.Lee
# function: common utils
import os
import shutil
import cv2
import numpy as np
import json
def mkdir_(path, flag_rm=False):
if os.path.exists(path):
if flag_rm == True:
print('remove {} done ~ '.format(path))
def plot_box(bbox, img, color=None, label=None, line_thickness=None):
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox
if label:
tf = max(tl - 2, 1)
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox
cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充
# 文本绘制
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA)
class JSON_Encoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
return super(JSON_Encoder, self).default(obj)
def draw_landmarks(img,output,draw_circle):
img_width = img.shape[1]
img_height = img.shape[0]
dict_landmarks = {}
for i in range(int(output.shape[0]/2)):
x = output[i*2+0]*float(img_width)
y = output[i*2+1]*float(img_height)
if 41>= i >=33:
if 'left_eyebrow' not in dict_landmarks.keys():
dict_landmarks['left_eyebrow'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 50>= i >=42:
if 'right_eyebrow' not in dict_landmarks.keys():
dict_landmarks['right_eyebrow'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1)
elif 67>= i >=60:
if 'left_eye' not in dict_landmarks.keys():
dict_landmarks['left_eye'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 75>= i >=68:
if 'right_eye' not in dict_landmarks.keys():
dict_landmarks['right_eye'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
elif 97>= i >=96:
cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1)
elif 54>= i >=51:
if 'bridge_nose' not in dict_landmarks.keys():
dict_landmarks['bridge_nose'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1)
elif 32>= i >=0:
if 'basin' not in dict_landmarks.keys():
dict_landmarks['basin'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1)
elif 59>= i >=55:
if 'wing_nose' not in dict_landmarks.keys():
dict_landmarks['wing_nose'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1)
elif 87>= i >=76:
if 'out_lip' not in dict_landmarks.keys():
dict_landmarks['out_lip'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1)
elif 95>= i >=88:
if 'in_lip' not in dict_landmarks.keys():
dict_landmarks['in_lip'] = []
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1)
if draw_circle:
cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1)
return dict_landmarks
def draw_contour(image,dict):
for key in dict.keys():
# print(key)
_,_,color = dict[key][0]
if 'basin' == key or 'wing_nose' == key:
pts = np.array([[dict[key][i][0],dict[key][i][1]] for i in range(len(dict[key]))],np.int32)
# print(pts)
points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32)
for i in range(len(dict[key])):
x,y,_ = dict[key][i]
points_array[0,i,0] = x
points_array[0,i,1] = y
# cv2.fillPoly(image, points_array, color)
# date:2020-04-11
# Author: Eric.Lee
# function: model utils
import os
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import random
def get_acc(output, label):
total = output.shape[0]
_, pred_label = output.max(1)
num_correct = (pred_label == label).sum().item()
return num_correct / float(total)
def set_learning_rate(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def set_seed(seed = 666):
if torch.cuda.is_available():
cudnn.deterministic = True
def split_trainval_datasets(ops):
print(' --------------->>> split_trainval_datasets ')
train_split_datasets = []
train_split_datasets_label = []
val_split_datasets = []
val_split_datasets_label = []
for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('.')[0]), reverse=False)):
# print(' %s label is %s \n'%(doc,idx))
data_list = os.listdir(ops.train_path+doc)
cal_split_num = int(len(data_list)*ops.val_factor)
for i,file in enumerate(data_list):
if '.jpg' in file:
if i < cal_split_num:
val_split_datasets.append(ops.train_path+doc + '/' + file)
train_split_datasets.append(ops.train_path+doc + '/' + file)
print(ops.train_path+doc + '/' + file,idx)
print('train_split_datasets len {}'.format(len(train_split_datasets)))
print('val_split_datasets len {}'.format(len(val_split_datasets)))
return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label
