From d018bd5f7bb1faca9426c056d95bae4e27c2687d Mon Sep 17 00:00:00 2001 From: "Eric.Lee2021" <305141918@qq.com> Date: Tue, 2 Feb 2021 19:29:54 +0800 Subject: [PATCH] create pro --- train.py | 208 ++++++++++++++++++++++ yolov3.py | 505 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 713 insertions(+) create mode 100644 train.py create mode 100644 yolov3.py diff --git a/train.py b/train.py new file mode 100644 index 0000000..6522311 --- /dev/null +++ b/train.py @@ -0,0 +1,208 @@ +#coding:utf-8 +from yolov3 import Yolov3, Yolov3Tiny +from utils.parse_config import parse_data_cfg +from utils.torch_utils import select_device +import torch +from torch.utils.data import DataLoader +from utils.datasets import LoadImagesAndLabels +from utils.utils import * +import os +import numpy as np + +def set_learning_rate(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + +def train(data_cfg ='cfg/voc.data', + accumulate = 1): + device = select_device() + # Configure run + get_data_cfg = parse_data_cfg(data_cfg)#返回训练配置参数,类型:字典 + + gpus = get_data_cfg['gpus'] + num_workers = int(get_data_cfg['num_workers']) + cfg_model = get_data_cfg['cfg_model'] + train_path = get_data_cfg['train'] + valid_ptah = get_data_cfg['valid'] + num_classes = int(get_data_cfg['classes']) + finetune_model = get_data_cfg['finetune_model'] + batch_size = int(get_data_cfg['batch_size']) + img_size = int(get_data_cfg['img_size']) + multi_scale = get_data_cfg['multi_scale'] + epochs = int(get_data_cfg['epochs']) + lr_step = str(get_data_cfg['lr_step']) + lr0 = float(get_data_cfg['lr0']) + + if multi_scale == 'True': + multi_scale = True + else: + multi_scale = False + + print('data_cfg : ',data_cfg) + print('voc.data config len : ',len(get_data_cfg)) + print('gpus : ',gpus) + print('num_workers : ',num_workers) + print('model : ',cfg_model) + print('finetune_model : ',finetune_model) + print('train_path : ',train_path) + print('valid_ptah : ',valid_ptah) + print('num_classes : ',num_classes) + print('batch_size : ',batch_size) + print('img_size : ',img_size) + print('multi_scale : ',multi_scale) + print('lr0 : ',lr0) + print('lr_step : ',lr_step) + # load model + pattern_data_ = data_cfg.split("/")[-1:][0].replace(".data","") + if "-tiny" in cfg_model: + a_scalse = 416./img_size + anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)] + anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] + + model = Yolov3Tiny(num_classes,anchors = anchors_new) + # weights = './weights-yolov3-person-tiny/' + weights = './weights-yolov3-{}-tiny/'.format(pattern_data_) + else: + a_scalse = 416./img_size + anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] + anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] + model = Yolov3(num_classes,anchors = anchors_new) + weights = './weights-yolov3-{}/'.format(pattern_data_) + # mkdir save model document + if not os.path.exists(weights): + os.mkdir(weights) + + model = model.to(device) + latest = weights + 'latest_{}.pt'.format(img_size) + best = weights + 'best_{}.pt'.format(img_size) + # Optimizer + optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005) + + start_epoch = 0 + + if os.access(finetune_model,os.F_OK):# load retrain/finetune_model + print('loading yolo-v3 finetune_model ~~~~~~',finetune_model) + not_load_filters = 3*(80+5) # voc: 3*(20+5), coco: 3*(80+5)=255 + chkpt = torch.load(finetune_model, map_location=device) + model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters}, strict=False) + # model.load_state_dict(chkpt['model']) + if 'coco' not in finetune_model: + start_epoch = chkpt['epoch'] + if chkpt['optimizer'] is not None: + optimizer.load_state_dict(chkpt['optimizer']) + best_loss = chkpt['best_loss'] + + + # Set scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k) gamma:学习率下降的乘数因子 + milestones=[int(i) for i in lr_step.split(",")] + print('milestones : ',milestones) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(i) for i in lr_step.split(",")], gamma=0.1, + last_epoch=start_epoch - 1) + + # Dataset + print('multi_scale : ',multi_scale) + dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True, multi_scale=multi_scale) + print('--------------->>> imge num : ',dataset.__len__()) + # Dataloader + dataloader = DataLoader(dataset, + batch_size=batch_size, + num_workers=num_workers, + shuffle=True, + pin_memory=False, + drop_last = False, + collate_fn=dataset.collate_fn) + + # Start training + t = time.time() + # model_info(model)# 打印模型信息 + nB = len(dataloader) + n_burnin = min(round(nB / 5 + 1), 1000) # burn-in batches + + best_loss = float('inf') + test_loss = float('inf') + + flag_start = False + + for epoch in range(0, epochs): + + print(' ~~~~') + model.train() + + if flag_start: + scheduler.step() + flag_start = True + + mloss = defaultdict(float) # mean loss + for i, (imgs, targets, img_path_, _) in enumerate(dataloader): + multi_size = imgs.size() + imgs = imgs.to(device) + targets = targets.to(device) + + nt = len(targets) + if nt == 0: # if no targets continue + continue + + # SGD burn-in + if epoch == 0 and i <= n_burnin: + lr = lr0 * (i / n_burnin) ** 4 + for x in optimizer.param_groups: + x['lr'] = lr + + # Run model + pred = model(imgs) + + # Build targets + target_list = build_targets(model, targets) + + # Compute loss + loss, loss_dict = compute_loss(pred, target_list) + + # Compute gradient + loss.backward() + + # Accumulate gradient for x batches before optimizing + if (i + 1) % accumulate == 0 or (i + 1) == nB: + optimizer.step() + optimizer.zero_grad() + + # Running epoch-means of tracked metrics + for key, val in loss_dict.items(): + mloss[key] = (mloss[key] * i + val) / (i + 1) + + print(' Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, wh {:.3f}, ' + 'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'.format(epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3] + , nt, scheduler.get_lr()[0], mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], time.time() - t), + end = '\r') + + s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'], + mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt, time.time() - t) + t = time.time() + print() + # Create checkpoint + chkpt = {'epoch': epoch, + 'best_loss': best_loss, + 'model': model.module.state_dict() if type( + model) is nn.parallel.DistributedDataParallel else model.state_dict(), + 'optimizer': optimizer.state_dict()} + + # Save latest checkpoint + torch.save(chkpt, latest) + + # Save best checkpoint + if best_loss == test_loss and epoch%5 == 0: + torch.save(chkpt, best) + + # Save backup every 10 epochs (optional) + if epoch > 0 and epoch % 5 == 0: + torch.save(chkpt, weights + 'yoloV3_{}_epoch_{}.pt'.format(img_size,epoch)) + + # Delete checkpoint + del chkpt +#------------------------------------------------------------------------------- +if __name__ == '__main__': + + + train(data_cfg='cfg/hand.data') + + + print('well done ~ ') diff --git a/yolov3.py b/yolov3.py new file mode 100644 index 0000000..519ae7e --- /dev/null +++ b/yolov3.py @@ -0,0 +1,505 @@ +import os +import numpy as np +from collections import OrderedDict + +import torch +import torch.nn.functional as F +import torch.nn as nn + + +# reference: +# https://github.com/ultralytics/yolov3/blob/master/models.py +# https://github.com/TencentYoutuResearch/ObjectDetection-OneStageDet/blob/master/yolo/vedanet/network/backbone/brick/darknet53.py +# network structure https://blog.csdn.net/u010397980/article/details/85058630 + +flag_yolo_structure = False # True 查看 相关的网络 log + +class Conv2dBatchLeaky(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride, leaky_slope=0.1): + super(Conv2dBatchLeaky, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + if isinstance(kernel_size, (list, tuple)): + self.padding = [int(ii/2) for ii in kernel_size] + if flag_yolo_structure: + print('------------------->>>> Conv2dBatchLeaky isinstance') + else: + self.padding = int(kernel_size/2) + + self.leaky_slope = leaky_slope + # Layer + # LeakyReLU : y = max(0, x) + leaky_slope*min(0,x) + self.layers = nn.Sequential( + nn.Conv2d(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, bias=False), + nn.BatchNorm2d(self.out_channels), + nn.LeakyReLU(self.leaky_slope, inplace=True) + ) + + def forward(self, x): + x = self.layers(x) + return x + +class ResBlockSum(nn.Module): + def __init__(self, nchannels): + super().__init__() + self.block = nn.Sequential( + Conv2dBatchLeaky(nchannels, int(nchannels/2), 1, 1), + Conv2dBatchLeaky(int(nchannels/2), nchannels, 3, 1) + ) + + def forward(self, x): + return x + self.block(x) + +class HeadBody(nn.Module): + def __init__(self, in_channels, out_channels): + super(HeadBody, self).__init__() + + self.layer = nn.Sequential( + Conv2dBatchLeaky(in_channels, out_channels, 1, 1), + Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1), + Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1), + Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1), + Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1) + ) + + def forward(self, x): + x = self.layer(x) + return x + +class Upsample(nn.Module): + # Custom Upsample layer (nn.Upsample gives deprecated warning message) + + def __init__(self, scale_factor=1, mode='nearest'): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + self.mode = mode + + def forward(self, x): + return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) + +# default anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] +class YOLOLayer(nn.Module): + def __init__(self, anchors, nC): + super(YOLOLayer, self).__init__() + + self.anchors = torch.FloatTensor(anchors) + self.nA = len(anchors) # number of anchors (3) + self.nC = nC # number of classes + self.img_size = 0 + if flag_yolo_structure: + print('init YOLOLayer ------ >>> ') + print('anchors : ',self.anchors) + print('nA : ',self.nA) + print('nC : ',self.nC) + print('img_size : ',self.img_size) + + def forward(self, p, img_size, var=None):# p : feature map + bs, nG = p.shape[0], p.shape[-1] # batch_size , grid + if flag_yolo_structure: + print('bs, nG --->>> ',bs, nG) + if self.img_size != img_size: + create_grids(self, img_size, nG, p.device) + + # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, xywh + confidence + classes) + p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction + + if self.training: + return p + else: # inference + io = p.clone() # inference output + io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy + io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method + io[..., 4:] = torch.sigmoid(io[..., 4:]) # p_conf, p_cls + io[..., :4] *= self.stride + if self.nC == 1: + io[..., 5] = 1 # single-class model + # flatten prediction, reshape from [bs, nA, nG, nG, nC] to [bs, nA * nG * nG, nC] + return io.view(bs, -1, 5 + self.nC), p + +def create_grids(self, img_size, nG, device='cpu'): + # self.nA : len(anchors) # number of anchors (3) + # self.nC : nC # number of classes + # nG : feature map grid 13*13 26*26 52*52 + self.img_size = img_size + self.stride = img_size / nG + if flag_yolo_structure: + print('create_grids stride : ',self.stride) + + # build xy offsets + grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float() + grid_y = grid_x.permute(0, 1, 3, 2) + self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device) + if flag_yolo_structure: + print('grid_x : ',grid_x.size(),grid_x) + print('grid_y : ',grid_y.size(),grid_y) + print('grid_xy : ',self.grid_xy.size(),self.grid_xy) + + # build wh gains + self.anchor_vec = self.anchors.to(device) / self.stride # 基于 stride 的归一化 + # print('self.anchor_vecself.anchor_vecself.anchor_vec:',self.anchor_vec) + self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device) + self.nG = torch.FloatTensor([nG]).to(device) + + +def get_yolo_layer_index(module_list): + yolo_layer_index = [] + for index, l in enumerate(module_list): + try: + a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG + yolo_layer_index.append(index) + except: + pass + assert len(yolo_layer_index) > 0, "can not find yolo layer" + return yolo_layer_index + + +# ----------------------yolov3------------------------ + +class Yolov3(nn.Module): + def __init__(self, num_classes=80, anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]): + super().__init__() + anchor_mask1 = [i for i in range(2 * len(anchors) // 3, len(anchors), 1)] # [6, 7, 8] + anchor_mask2 = [i for i in range(len(anchors) // 3, 2 * len(anchors) // 3, 1)] # [3, 4, 5] + anchor_mask3 = [i for i in range(0, len(anchors) // 3, 1)] # [0, 1, 2] + if flag_yolo_structure: + print('anchor_mask1 : ',anchor_mask1) # 大物体 anchor + print('anchor_mask2 : ',anchor_mask2) # 中物体 anchor + print('anchor_mask3 : ',anchor_mask3) # 小物体 anchor + + # Network + # OrderedDict 是 dict 的子类,其最大特征是,它可以“维护”添加 key-value 对的顺序 + layer_list = [] + + ''' + ****** Conv2dBatchLeaky ***** + op : Conv2d,BatchNorm2d,LeakyReLU + inputs : in_channels, out_channels, kernel_size, stride, leaky_slope + ''' + + ''' + ****** ResBlockSum ****** + op : Conv2dBatchLeaky * 2 + x + inputs : nchannels + ''' + # list 0 + layer_list.append(OrderedDict([ + ('0_stage1_conv', Conv2dBatchLeaky(3, 32, 3, 1, 1)), # 416 x 416 x 32 # Convolutional + + ("0_stage2_conv", Conv2dBatchLeaky(32, 64, 3, 2)), # 208 x 208 x 64 # Convolutional + ("0_stage2_ressum1", ResBlockSum(64)), # Convolutional*2 + Resiudal + + ("0_stage3_conv", Conv2dBatchLeaky(64, 128, 3, 2)), # 104 x 104 128 # Convolutional + ("0_stage3_ressum1", ResBlockSum(128)), + ("0_stage3_ressum2", ResBlockSum(128)), # (Convolutional*2 + Resiudal)**2 + + ("0_stage4_conv", Conv2dBatchLeaky(128, 256, 3, 2)), # 52 x 52 x 256 # Convolutional + ("0_stage4_ressum1", ResBlockSum(256)), + ("0_stage4_ressum2", ResBlockSum(256)), + ("0_stage4_ressum3", ResBlockSum(256)), + ("0_stage4_ressum4", ResBlockSum(256)), + ("0_stage4_ressum5", ResBlockSum(256)), + ("0_stage4_ressum6", ResBlockSum(256)), + ("0_stage4_ressum7", ResBlockSum(256)), + ("0_stage4_ressum8", ResBlockSum(256)), # 52 x 52 x 256 output_feature_0 (Convolutional*2 + Resiudal)**8 + ])) + # list 1 + layer_list.append(OrderedDict([ + ("1_stage5_conv", Conv2dBatchLeaky(256, 512, 3, 2)), # 26 x 26 x 512 # Convolutional + ("1_stage5_ressum1", ResBlockSum(512)), + ("1_stage5_ressum2", ResBlockSum(512)), + ("1_stage5_ressum3", ResBlockSum(512)), + ("1_stage5_ressum4", ResBlockSum(512)), + ("1_stage5_ressum5", ResBlockSum(512)), + ("1_stage5_ressum6", ResBlockSum(512)), + ("1_stage5_ressum7", ResBlockSum(512)), + ("1_stage5_ressum8", ResBlockSum(512)), # 26 x 26 x 512 output_feature_1 # (Convolutional*2 + Resiudal)**8 + ])) + + ''' + ****** HeadBody ****** + op : Conv2dBatchLeaky * 5 + inputs : in_channels, out_channels + ''' + # list 2 + layer_list.append(OrderedDict([ + ("2_stage6_conv", Conv2dBatchLeaky(512, 1024, 3, 2)), # 13 x 13 x 1024 # Convolutional + ("2_stage6_ressum1", ResBlockSum(1024)), + ("2_stage6_ressum2", ResBlockSum(1024)), + ("2_stage6_ressum3", ResBlockSum(1024)), + ("2_stage6_ressum4", ResBlockSum(1024)), # 13 x 13 x 1024 output_feature_2 # (Convolutional*2 + Resiudal)**4 + ("2_headbody1", HeadBody(in_channels=1024, out_channels=512)), # 13 x 13 x 512 # Convalutional Set = Conv2dBatchLeaky * 5 + ])) + # list 3 + layer_list.append(OrderedDict([ + ("3_conv_1", Conv2dBatchLeaky(in_channels=512, out_channels=1024, kernel_size=3, stride=1)), + ("3_conv_2", nn.Conv2d(in_channels=1024, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) # predict one + # list 4 + layer_list.append(OrderedDict([ + ("4_yolo", YOLOLayer([anchors[i] for i in anchor_mask1], num_classes)) + ])) # 3*((x, y, w, h, confidence) + classes ) + + # list 5 + layer_list.append(OrderedDict([ + ("5_conv", Conv2dBatchLeaky(512, 256, 1, 1)), + ("5_upsample", Upsample(scale_factor=2)), + ])) + # list 6 + layer_list.append(OrderedDict([ + ("6_head_body2", HeadBody(in_channels=768, out_channels=256)) # Convalutional Set = Conv2dBatchLeaky * 5 + ])) + # list 7 + layer_list.append(OrderedDict([ + ("7_conv_1", Conv2dBatchLeaky(in_channels=256, out_channels=512, kernel_size=3, stride=1)), + ("7_conv_2", nn.Conv2d(in_channels=512, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) # predict two + # list 8 + layer_list.append(OrderedDict([ + ("8_yolo", YOLOLayer([anchors[i] for i in anchor_mask2], num_classes)) + ])) # 3*((x, y, w, h, confidence) + classes ) + # list 9 + layer_list.append(OrderedDict([ + ("9_conv", Conv2dBatchLeaky(256, 128, 1, 1)), + ("9_upsample", Upsample(scale_factor=2)), + ])) + # list 10 + layer_list.append(OrderedDict([ + ("10_head_body3", HeadBody(in_channels=384, out_channels=128)) # Convalutional Set = Conv2dBatchLeaky * 5 + ])) + # list 11 + layer_list.append(OrderedDict([ + ("11_conv_1", Conv2dBatchLeaky(in_channels=128, out_channels=256, kernel_size=3, stride=1)), + ("11_conv_2", nn.Conv2d(in_channels=256, out_channels=len(anchor_mask3) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) # predict three + # list 12 + layer_list.append(OrderedDict([ + ("12_yolo", YOLOLayer([anchors[i] for i in anchor_mask3], num_classes)) + ])) # 3*((x, y, w, h, confidence) + classes ) + # nn.ModuleList类似于pytho中的list类型,只是将一系列层装入列表,并没有实现forward()方法,因此也不会有网络模型产生的副作用 + self.module_list = nn.ModuleList([nn.Sequential(i) for i in layer_list]) + self.yolo_layer_index = get_yolo_layer_index(self.module_list) + if flag_yolo_structure: + print('yolo_layer : ',len(layer_list),'\n') + print(self.module_list[4]) + print(self.module_list[8]) + print(self.module_list[12]) + + # print('self.module_list -------->>> ',self.module_list) + # print('self.yolo_layer_index -------->>> ',self.yolo_layer_index) + + def forward(self, x): + img_size = x.shape[-1] + if flag_yolo_structure: + print('forward img_size : ',img_size,x.shape) + output = [] + + x = self.module_list[0](x) + x_route1 = x + x = self.module_list[1](x) + x_route2 = x + x = self.module_list[2](x) + + yolo_head = self.module_list[3](x) + if flag_yolo_structure: + print('mask1 yolo_head : ',yolo_head.size()) + yolo_head_out_13x13 = self.module_list[4][0](yolo_head, img_size) + output.append(yolo_head_out_13x13) + + x = self.module_list[5](x) + x = torch.cat([x, x_route2], 1) + x = self.module_list[6](x) + + yolo_head = self.module_list[7](x) + if flag_yolo_structure: + print('mask2 yolo_head : ',yolo_head.size()) + yolo_head_out_26x26 = self.module_list[8][0](yolo_head, img_size) + output.append(yolo_head_out_26x26) + + x = self.module_list[9](x) + x = torch.cat([x, x_route1], 1) + x = self.module_list[10](x) + + yolo_head = self.module_list[11](x) + if flag_yolo_structure: + print('mask3 yolo_head : ',yolo_head.size()) + yolo_head_out_52x52 = self.module_list[12][0](yolo_head, img_size) + output.append(yolo_head_out_52x52) + + if self.training: + return output + else: + io, p = list(zip(*output)) # inference output, training output + return torch.cat(io, 1), p + + +# ----------------------yolov3 tiny------------------------ + +class EmptyLayer(nn.Module): + """Placeholder for 'route' and 'shortcut' layers""" + def __init__(self): + super(EmptyLayer, self).__init__() + + def forward(self, x): + return x + + +class Yolov3Tiny(nn.Module): + def __init__(self, num_classes=80, anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]): + super(Yolov3Tiny, self).__init__() + + anchor_mask1 = [i for i in range(len(anchors) // 2, len(anchors), 1)] # [3, 4, 5] + anchor_mask2 = [i for i in range(0, len(anchors) // 2, 1)] # [0, 1, 2] + + layer_list = [] + layer_list.append(OrderedDict([ + # layer 0 + ("conv_0", nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_0", nn.BatchNorm2d(16)), + ("leaky_0", nn.LeakyReLU(0.1)), + # layer 1 + ("maxpool_1", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 2 + ("conv_2", nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_2", nn.BatchNorm2d(32)), + ("leaky_2", nn.LeakyReLU(0.1)), + # layer 3 + ("maxpool_3", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 4 + ("conv_4", nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_4", nn.BatchNorm2d(64)), + ("leaky_4", nn.LeakyReLU(0.1)), + # layer 5 + ("maxpool_5", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 6 + ("conv_6", nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_6", nn.BatchNorm2d(128)), + ("leaky_6", nn.LeakyReLU(0.1)), + # layer 7 + ("maxpool_7", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 8 + ("conv_8", nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_8", nn.BatchNorm2d(256)), + ("leaky_8", nn.LeakyReLU(0.1)), + ])) + + layer_list.append(OrderedDict([ + # layer 9 + ("maxpool_9", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)), + # layer 10 + ("conv_10", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_10", nn.BatchNorm2d(512)), + ("leaky_10", nn.LeakyReLU(0.1)), + # layer 11 + ('_debug_padding_11', nn.ZeroPad2d((0, 1, 0, 1))), + ("maxpool_11", nn.MaxPool2d(kernel_size=2, stride=1, padding=0)), + # layer 12 + ("conv_12", nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_12", nn.BatchNorm2d(1024)), + ("leaky_12", nn.LeakyReLU(0.1)), + # layer 13 + ("conv_13", nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False)), + ("batch_norm_13", nn.BatchNorm2d(256)), + ("leaky_13", nn.LeakyReLU(0.1)), + ])) + + layer_list.append(OrderedDict([ + # layer 14 + ("conv_14", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_14", nn.BatchNorm2d(512)), + ("leaky_14", nn.LeakyReLU(0.1)), + # layer 15 + ("conv_15", + nn.Conv2d(in_channels=512, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) + + # layer 16 + anchor_tmp1 = [anchors[i] for i in anchor_mask1] + layer_list.append(OrderedDict([("yolo_16", YOLOLayer(anchor_tmp1, num_classes))])) + + # layer 17 + layer_list.append(OrderedDict([("route_17", EmptyLayer())])) + + layer_list.append(OrderedDict([ + # layer 18 + ("conv_18", nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False)), + ("batch_norm_18", nn.BatchNorm2d(128)), + ("leaky_18", nn.LeakyReLU(0.1)), + # layer 19 + ("upsample_19", Upsample(scale_factor=2)), + ])) + + # layer 20 + layer_list.append(OrderedDict([('route_20', EmptyLayer())])) + + layer_list.append(OrderedDict([ + # layer 21 + ("conv_21", nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)), + ("batch_norm_21", nn.BatchNorm2d(256)), + ("leaky_21", nn.LeakyReLU(0.1)), + # layer 22 + ("conv_22", + nn.Conv2d(in_channels=256, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)), + ])) + + # layer 23 + anchor_tmp2 = [anchors[i] for i in anchor_mask2] + layer_list.append(OrderedDict([("yolo_23", YOLOLayer(anchor_tmp2, num_classes))])) + + self.module_list = nn.ModuleList([nn.Sequential(layer) for layer in layer_list]) + self.yolo_layer_index = get_yolo_layer_index(self.module_list) + + def forward(self, x): + img_size = x.shape[-1] + output = [] + + x = self.module_list[0](x) # layer0 to layer8 + x_route8 = x + x = self.module_list[1](x) # layer9 to layer13 + x_route13 = x + x = self.module_list[2](x) # layer14, layer15 + x = self.module_list[3][0](x, img_size) # yolo_16 + output.append(x) + x = self.module_list[5](x_route13) # layer18, layer19 + x = torch.cat([x, x_route8], 1) # route + x = self.module_list[7](x) # layer21, layer22 + x = self.module_list[8][0](x, img_size) # yolo_23 + output.append(x) + + if self.training: + return output + else: + io, p = list(zip(*output)) # inference output, training output + return torch.cat(io, 1), p + + +if __name__ == "__main__": + dummy_input = torch.Tensor(5, 3, 416, 416) + model = Yolov3(num_classes=80) + params = list(model.parameters()) + k = 0 + for i in params: + l = 1 + for j in i.size(): + l *= j + # print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l))) + k = k + l + print("----------------------") + print("总参数数量和: " + str(k)) + print("-----------yolo layer") + for index in model.yolo_layer_index: + print(model.module_list[index]) + + print("-----------train") + model.train() + for res in model(dummy_input): + print("res:", np.shape(res)) + + print("-----------eval") + model.eval() + inference_out, train_out = model(dummy_input) + print("inference_out:", np.shape(inference_out)) + for o in train_out: + print("train_out:", np.shape(o)) -- GitLab