from yolov3 import Yolov3, Yolov3Tiny
from utils.parse_config import parse_data_cfg
from utils.torch_utils import select_device
import torch
from torch.utils.data import DataLoader
from utils.datasets import LoadImagesAndLabels
from utils.utils import *
import os
import numpy as np
def set_learning_rate(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def train(data_cfg ='cfg/voc.data',
accumulate = 1):
device = select_device()
# Configure run
get_data_cfg = parse_data_cfg(data_cfg)#返回训练配置参数,类型:字典
gpus = get_data_cfg['gpus']
num_workers = int(get_data_cfg['num_workers'])
cfg_model = get_data_cfg['cfg_model']
train_path = get_data_cfg['train']
valid_ptah = get_data_cfg['valid']
num_classes = int(get_data_cfg['classes'])
finetune_model = get_data_cfg['finetune_model']
batch_size = int(get_data_cfg['batch_size'])
img_size = int(get_data_cfg['img_size'])
multi_scale = get_data_cfg['multi_scale']
epochs = int(get_data_cfg['epochs'])
lr_step = str(get_data_cfg['lr_step'])
lr0 = float(get_data_cfg['lr0'])
if multi_scale == 'True':
multi_scale = True
multi_scale = False
print('data_cfg : ',data_cfg)
print('voc.data config len : ',len(get_data_cfg))
print('gpus : ',gpus)
print('num_workers : ',num_workers)
print('model : ',cfg_model)
print('finetune_model : ',finetune_model)
print('train_path : ',train_path)
print('valid_ptah : ',valid_ptah)
print('num_classes : ',num_classes)
print('batch_size : ',batch_size)
print('img_size : ',img_size)
print('multi_scale : ',multi_scale)
print('lr0 : ',lr0)
print('lr_step : ',lr_step)
# load model
pattern_data_ = data_cfg.split("/")[-1:][0].replace(".data","")
if "-tiny" in cfg_model:
a_scalse = 416./img_size
anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
model = Yolov3Tiny(num_classes,anchors = anchors_new)
# weights = './weights-yolov3-person-tiny/'
weights = './weights-yolov3-{}-tiny/'.format(pattern_data_)
a_scalse = 416./img_size
anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
model = Yolov3(num_classes,anchors = anchors_new)
weights = './weights-yolov3-{}/'.format(pattern_data_)
# mkdir save model document
if not os.path.exists(weights):
model = model.to(device)
latest = weights + 'latest_{}.pt'.format(img_size)
best = weights + 'best_{}.pt'.format(img_size)
# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005)
start_epoch = 0
if os.access(finetune_model,os.F_OK):# load retrain/finetune_model
print('loading yolo-v3 finetune_model ~~~~~~',finetune_model)
not_load_filters = 3*(80+5) # voc: 3*(20+5), coco: 3*(80+5)=255
chkpt = torch.load(finetune_model, map_location=device)
model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters}, strict=False)
# model.load_state_dict(chkpt['model'])
if 'coco' not in finetune_model:
start_epoch = chkpt['epoch']
if chkpt['optimizer'] is not None:
best_loss = chkpt['best_loss']
# Set scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k) gamma:学习率下降的乘数因子
milestones=[int(i) for i in lr_step.split(",")]
print('milestones : ',milestones)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(i) for i in lr_step.split(",")], gamma=0.1,
last_epoch=start_epoch - 1)
# Dataset
print('multi_scale : ',multi_scale)
dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True, multi_scale=multi_scale)
print('--------------->>> imge num : ',dataset.__len__())
# Dataloader
dataloader = DataLoader(dataset,
drop_last = False,
# Start training
t = time.time()
# model_info(model)# 打印模型信息
nB = len(dataloader)
n_burnin = min(round(nB / 5 + 1), 1000) # burn-in batches
best_loss = float('inf')
test_loss = float('inf')
flag_start = False
for epoch in range(0, epochs):
print(' ~~~~')
if flag_start:
flag_start = True
mloss = defaultdict(float) # mean loss
for i, (imgs, targets, img_path_, _) in enumerate(dataloader):
multi_size = imgs.size()
imgs = imgs.to(device)
targets = targets.to(device)
nt = len(targets)
if nt == 0: # if no targets continue
# SGD burn-in
if epoch == 0 and i <= n_burnin:
lr = lr0 * (i / n_burnin) ** 4
for x in optimizer.param_groups:
x['lr'] = lr
# Run model
pred = model(imgs)
# Build targets
target_list = build_targets(model, targets)
# Compute loss
loss, loss_dict = compute_loss(pred, target_list)
# Compute gradient
# Accumulate gradient for x batches before optimizing
if (i + 1) % accumulate == 0 or (i + 1) == nB:
# Running epoch-means of tracked metrics
for key, val in loss_dict.items():
mloss[key] = (mloss[key] * i + val) / (i + 1)
print(' Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, wh {:.3f}, '
'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'.format(epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3]
, nt, scheduler.get_lr()[0], mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], time.time() - t),
end = '\r')
s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'],
mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt, time.time() - t)
t = time.time()
# Create checkpoint
chkpt = {'epoch': epoch,
'best_loss': best_loss,
'model': model.module.state_dict() if type(
model) is nn.parallel.DistributedDataParallel else model.state_dict(),
'optimizer': optimizer.state_dict()}
# Save latest checkpoint
torch.save(chkpt, latest)
# Save best checkpoint
if best_loss == test_loss and epoch%5 == 0:
torch.save(chkpt, best)
# Save backup every 10 epochs (optional)
if epoch > 0 and epoch % 5 == 0:
torch.save(chkpt, weights + 'yoloV3_{}_epoch_{}.pt'.format(img_size,epoch))
# Delete checkpoint
del chkpt
if __name__ == '__main__':
print('well done ~ ')
import os
import numpy as np
from collections import OrderedDict
import torch
import torch.nn.functional as F
import torch.nn as nn
# reference:
# https://github.com/ultralytics/yolov3/blob/master/models.py
# https://github.com/TencentYoutuResearch/ObjectDetection-OneStageDet/blob/master/yolo/vedanet/network/backbone/brick/darknet53.py
# network structure https://blog.csdn.net/u010397980/article/details/85058630
flag_yolo_structure = False # True 查看 相关的网络 log
class Conv2dBatchLeaky(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, leaky_slope=0.1):
super(Conv2dBatchLeaky, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
if isinstance(kernel_size, (list, tuple)):
self.padding = [int(ii/2) for ii in kernel_size]
if flag_yolo_structure:
print('------------------->>>> Conv2dBatchLeaky isinstance')
self.padding = int(kernel_size/2)
self.leaky_slope = leaky_slope
# Layer
# LeakyReLU : y = max(0, x) + leaky_slope*min(0,x)
self.layers = nn.Sequential(
nn.Conv2d(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, bias=False),
nn.LeakyReLU(self.leaky_slope, inplace=True)
def forward(self, x):
x = self.layers(x)
return x
class ResBlockSum(nn.Module):
def __init__(self, nchannels):
self.block = nn.Sequential(
Conv2dBatchLeaky(nchannels, int(nchannels/2), 1, 1),
Conv2dBatchLeaky(int(nchannels/2), nchannels, 3, 1)
def forward(self, x):
return x + self.block(x)
class HeadBody(nn.Module):
def __init__(self, in_channels, out_channels):
super(HeadBody, self).__init__()
self.layer = nn.Sequential(
Conv2dBatchLeaky(in_channels, out_channels, 1, 1),
Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1),
Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1),
Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1),
Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1)
def forward(self, x):
x = self.layer(x)
return x
class Upsample(nn.Module):
# Custom Upsample layer (nn.Upsample gives deprecated warning message)
def __init__(self, scale_factor=1, mode='nearest'):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
self.mode = mode
def forward(self, x):
return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
# default anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
class YOLOLayer(nn.Module):
def __init__(self, anchors, nC):
super(YOLOLayer, self).__init__()
self.anchors = torch.FloatTensor(anchors)
self.nA = len(anchors) # number of anchors (3)
self.nC = nC # number of classes
self.img_size = 0
if flag_yolo_structure:
print('init YOLOLayer ------ >>> ')
print('anchors : ',self.anchors)
print('nA : ',self.nA)
print('nC : ',self.nC)
print('img_size : ',self.img_size)
def forward(self, p, img_size, var=None):# p : feature map
bs, nG = p.shape[0], p.shape[-1] # batch_size , grid
if flag_yolo_structure:
print('bs, nG --->>> ',bs, nG)
if self.img_size != img_size:
create_grids(self, img_size, nG, p.device)
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, xywh + confidence + classes)
p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction
if self.training:
return p
else: # inference
io = p.clone() # inference output
io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy
io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method
io[..., 4:] = torch.sigmoid(io[..., 4:]) # p_conf, p_cls
io[..., :4] *= self.stride
if self.nC == 1:
io[..., 5] = 1 # single-class model
# flatten prediction, reshape from [bs, nA, nG, nG, nC] to [bs, nA * nG * nG, nC]
return io.view(bs, -1, 5 + self.nC), p
def create_grids(self, img_size, nG, device='cpu'):
# self.nA : len(anchors) # number of anchors (3)
# self.nC : nC # number of classes
# nG : feature map grid 13*13 26*26 52*52
self.img_size = img_size
self.stride = img_size / nG
if flag_yolo_structure:
print('create_grids stride : ',self.stride)
# build xy offsets
grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float()
grid_y = grid_x.permute(0, 1, 3, 2)
self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device)
if flag_yolo_structure:
print('grid_x : ',grid_x.size(),grid_x)
print('grid_y : ',grid_y.size(),grid_y)
print('grid_xy : ',self.grid_xy.size(),self.grid_xy)
# build wh gains
self.anchor_vec = self.anchors.to(device) / self.stride # 基于 stride 的归一化
# print('self.anchor_vecself.anchor_vecself.anchor_vec:',self.anchor_vec)
self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device)
self.nG = torch.FloatTensor([nG]).to(device)
def get_yolo_layer_index(module_list):
yolo_layer_index = []
for index, l in enumerate(module_list):
a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG
assert len(yolo_layer_index) > 0, "can not find yolo layer"
return yolo_layer_index
# ----------------------yolov3------------------------
class Yolov3(nn.Module):
def __init__(self, num_classes=80, anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]):
anchor_mask1 = [i for i in range(2 * len(anchors) // 3, len(anchors), 1)] # [6, 7, 8]
anchor_mask2 = [i for i in range(len(anchors) // 3, 2 * len(anchors) // 3, 1)] # [3, 4, 5]
anchor_mask3 = [i for i in range(0, len(anchors) // 3, 1)] # [0, 1, 2]
if flag_yolo_structure:
print('anchor_mask1 : ',anchor_mask1) # 大物体 anchor
print('anchor_mask2 : ',anchor_mask2) # 中物体 anchor
print('anchor_mask3 : ',anchor_mask3) # 小物体 anchor
# Network
# OrderedDict 是 dict 的子类,其最大特征是,它可以“维护”添加 key-value 对的顺序
layer_list = []
****** Conv2dBatchLeaky *****
op : Conv2d,BatchNorm2d,LeakyReLU
inputs : in_channels, out_channels, kernel_size, stride, leaky_slope
****** ResBlockSum ******
op : Conv2dBatchLeaky * 2 + x
inputs : nchannels
# list 0
('0_stage1_conv', Conv2dBatchLeaky(3, 32, 3, 1, 1)), # 416 x 416 x 32 # Convolutional
("0_stage2_conv", Conv2dBatchLeaky(32, 64, 3, 2)), # 208 x 208 x 64 # Convolutional
("0_stage2_ressum1", ResBlockSum(64)), # Convolutional*2 + Resiudal
("0_stage3_conv", Conv2dBatchLeaky(64, 128, 3, 2)), # 104 x 104 128 # Convolutional
("0_stage3_ressum1", ResBlockSum(128)),
("0_stage3_ressum2", ResBlockSum(128)), # (Convolutional*2 + Resiudal)**2
("0_stage4_conv", Conv2dBatchLeaky(128, 256, 3, 2)), # 52 x 52 x 256 # Convolutional
("0_stage4_ressum1", ResBlockSum(256)),
("0_stage4_ressum2", ResBlockSum(256)),
("0_stage4_ressum3", ResBlockSum(256)),
("0_stage4_ressum4", ResBlockSum(256)),
("0_stage4_ressum5", ResBlockSum(256)),
("0_stage4_ressum6", ResBlockSum(256)),
("0_stage4_ressum7", ResBlockSum(256)),
("0_stage4_ressum8", ResBlockSum(256)), # 52 x 52 x 256 output_feature_0 (Convolutional*2 + Resiudal)**8
# list 1
("1_stage5_conv", Conv2dBatchLeaky(256, 512, 3, 2)), # 26 x 26 x 512 # Convolutional
("1_stage5_ressum1", ResBlockSum(512)),
("1_stage5_ressum2", ResBlockSum(512)),
("1_stage5_ressum3", ResBlockSum(512)),
("1_stage5_ressum4", ResBlockSum(512)),
("1_stage5_ressum5", ResBlockSum(512)),
("1_stage5_ressum6", ResBlockSum(512)),
("1_stage5_ressum7", ResBlockSum(512)),
("1_stage5_ressum8", ResBlockSum(512)), # 26 x 26 x 512 output_feature_1 # (Convolutional*2 + Resiudal)**8
****** HeadBody ******
op : Conv2dBatchLeaky * 5
inputs : in_channels, out_channels
# list 2
("2_stage6_conv", Conv2dBatchLeaky(512, 1024, 3, 2)), # 13 x 13 x 1024 # Convolutional
("2_stage6_ressum1", ResBlockSum(1024)),
("2_stage6_ressum2", ResBlockSum(1024)),
("2_stage6_ressum3", ResBlockSum(1024)),
("2_stage6_ressum4", ResBlockSum(1024)), # 13 x 13 x 1024 output_feature_2 # (Convolutional*2 + Resiudal)**4
("2_headbody1", HeadBody(in_channels=1024, out_channels=512)), # 13 x 13 x 512 # Convalutional Set = Conv2dBatchLeaky * 5
# list 3
("3_conv_1", Conv2dBatchLeaky(in_channels=512, out_channels=1024, kernel_size=3, stride=1)),
("3_conv_2", nn.Conv2d(in_channels=1024, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
])) # predict one
# list 4
("4_yolo", YOLOLayer([anchors[i] for i in anchor_mask1], num_classes))
])) # 3*((x, y, w, h, confidence) + classes )
# list 5
("5_conv", Conv2dBatchLeaky(512, 256, 1, 1)),
("5_upsample", Upsample(scale_factor=2)),
# list 6
("6_head_body2", HeadBody(in_channels=768, out_channels=256)) # Convalutional Set = Conv2dBatchLeaky * 5
# list 7
("7_conv_1", Conv2dBatchLeaky(in_channels=256, out_channels=512, kernel_size=3, stride=1)),
("7_conv_2", nn.Conv2d(in_channels=512, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
])) # predict two
# list 8
("8_yolo", YOLOLayer([anchors[i] for i in anchor_mask2], num_classes))
])) # 3*((x, y, w, h, confidence) + classes )
# list 9
("9_conv", Conv2dBatchLeaky(256, 128, 1, 1)),
("9_upsample", Upsample(scale_factor=2)),
# list 10
("10_head_body3", HeadBody(in_channels=384, out_channels=128)) # Convalutional Set = Conv2dBatchLeaky * 5
# list 11
("11_conv_1", Conv2dBatchLeaky(in_channels=128, out_channels=256, kernel_size=3, stride=1)),
("11_conv_2", nn.Conv2d(in_channels=256, out_channels=len(anchor_mask3) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
])) # predict three
# list 12
("12_yolo", YOLOLayer([anchors[i] for i in anchor_mask3], num_classes))
])) # 3*((x, y, w, h, confidence) + classes )
# nn.ModuleList类似于pytho中的list类型,只是将一系列层装入列表,并没有实现forward()方法,因此也不会有网络模型产生的副作用
self.module_list = nn.ModuleList([nn.Sequential(i) for i in layer_list])
self.yolo_layer_index = get_yolo_layer_index(self.module_list)
if flag_yolo_structure:
print('yolo_layer : ',len(layer_list),'\n')
# print('self.module_list -------->>> ',self.module_list)
# print('self.yolo_layer_index -------->>> ',self.yolo_layer_index)
def forward(self, x):
img_size = x.shape[-1]
if flag_yolo_structure:
print('forward img_size : ',img_size,x.shape)
output = []
x = self.module_list[0](x)
x_route1 = x
x = self.module_list[1](x)
x_route2 = x
x = self.module_list[2](x)
yolo_head = self.module_list[3](x)
if flag_yolo_structure:
print('mask1 yolo_head : ',yolo_head.size())
yolo_head_out_13x13 = self.module_list[4][0](yolo_head, img_size)
x = self.module_list[5](x)
x = torch.cat([x, x_route2], 1)
x = self.module_list[6](x)
yolo_head = self.module_list[7](x)
if flag_yolo_structure:
print('mask2 yolo_head : ',yolo_head.size())
yolo_head_out_26x26 = self.module_list[8][0](yolo_head, img_size)
x = self.module_list[9](x)
x = torch.cat([x, x_route1], 1)
x = self.module_list[10](x)
yolo_head = self.module_list[11](x)
if flag_yolo_structure:
print('mask3 yolo_head : ',yolo_head.size())
yolo_head_out_52x52 = self.module_list[12][0](yolo_head, img_size)
if self.training:
return output
io, p = list(zip(*output)) # inference output, training output
return torch.cat(io, 1), p
# ----------------------yolov3 tiny------------------------
class EmptyLayer(nn.Module):
"""Placeholder for 'route' and 'shortcut' layers"""
def __init__(self):
super(EmptyLayer, self).__init__()
def forward(self, x):
return x
class Yolov3Tiny(nn.Module):
def __init__(self, num_classes=80, anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]):
super(Yolov3Tiny, self).__init__()
anchor_mask1 = [i for i in range(len(anchors) // 2, len(anchors), 1)] # [3, 4, 5]
anchor_mask2 = [i for i in range(0, len(anchors) // 2, 1)] # [0, 1, 2]
layer_list = []
# layer 0
("conv_0", nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_0", nn.BatchNorm2d(16)),
("leaky_0", nn.LeakyReLU(0.1)),
# layer 1
("maxpool_1", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 2
("conv_2", nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_2", nn.BatchNorm2d(32)),
("leaky_2", nn.LeakyReLU(0.1)),
# layer 3
("maxpool_3", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 4
("conv_4", nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_4", nn.BatchNorm2d(64)),
("leaky_4", nn.LeakyReLU(0.1)),
# layer 5
("maxpool_5", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 6
("conv_6", nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_6", nn.BatchNorm2d(128)),
("leaky_6", nn.LeakyReLU(0.1)),
# layer 7
("maxpool_7", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 8
("conv_8", nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_8", nn.BatchNorm2d(256)),
("leaky_8", nn.LeakyReLU(0.1)),
# layer 9
("maxpool_9", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
# layer 10
("conv_10", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_10", nn.BatchNorm2d(512)),
("leaky_10", nn.LeakyReLU(0.1)),
# layer 11
('_debug_padding_11', nn.ZeroPad2d((0, 1, 0, 1))),
("maxpool_11", nn.MaxPool2d(kernel_size=2, stride=1, padding=0)),
# layer 12
("conv_12", nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_12", nn.BatchNorm2d(1024)),
("leaky_12", nn.LeakyReLU(0.1)),
# layer 13
("conv_13", nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False)),
("batch_norm_13", nn.BatchNorm2d(256)),
("leaky_13", nn.LeakyReLU(0.1)),
# layer 14
("conv_14", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_14", nn.BatchNorm2d(512)),
("leaky_14", nn.LeakyReLU(0.1)),
# layer 15
nn.Conv2d(in_channels=512, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
# layer 16
anchor_tmp1 = [anchors[i] for i in anchor_mask1]
layer_list.append(OrderedDict([("yolo_16", YOLOLayer(anchor_tmp1, num_classes))]))
# layer 17
layer_list.append(OrderedDict([("route_17", EmptyLayer())]))
# layer 18
("conv_18", nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False)),
("batch_norm_18", nn.BatchNorm2d(128)),
("leaky_18", nn.LeakyReLU(0.1)),
# layer 19
("upsample_19", Upsample(scale_factor=2)),
# layer 20
layer_list.append(OrderedDict([('route_20', EmptyLayer())]))
# layer 21
("conv_21", nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)),
("batch_norm_21", nn.BatchNorm2d(256)),
("leaky_21", nn.LeakyReLU(0.1)),
# layer 22
nn.Conv2d(in_channels=256, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
# layer 23
anchor_tmp2 = [anchors[i] for i in anchor_mask2]
layer_list.append(OrderedDict([("yolo_23", YOLOLayer(anchor_tmp2, num_classes))]))
self.module_list = nn.ModuleList([nn.Sequential(layer) for layer in layer_list])
self.yolo_layer_index = get_yolo_layer_index(self.module_list)
def forward(self, x):
img_size = x.shape[-1]
output = []
x = self.module_list[0](x) # layer0 to layer8
x_route8 = x
x = self.module_list[1](x) # layer9 to layer13
x_route13 = x
x = self.module_list[2](x) # layer14, layer15
x = self.module_list[3][0](x, img_size) # yolo_16
x = self.module_list[5](x_route13) # layer18, layer19
x = torch.cat([x, x_route8], 1) # route
x = self.module_list[7](x) # layer21, layer22
x = self.module_list[8][0](x, img_size) # yolo_23
if self.training:
return output
io, p = list(zip(*output)) # inference output, training output
return torch.cat(io, 1), p
if __name__ == "__main__":
dummy_input = torch.Tensor(5, 3, 416, 416)
model = Yolov3(num_classes=80)
params = list(model.parameters())
k = 0
for i in params:
l = 1
for j in i.size():
l *= j
# print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l)))
k = k + l
print("总参数数量和: " + str(k))
print("-----------yolo layer")
for index in model.yolo_layer_index:
for res in model(dummy_input):
print("res:", np.shape(res))
inference_out, train_out = model(dummy_input)
print("inference_out:", np.shape(inference_out))
for o in train_out:
print("train_out:", np.shape(o))
