create pro

d018bd5f · Eric.Lee2021 · 13e1ab1c · d018bd5f · d018bd5f
显示空白变更内容
内联并排

Showing with 713 addition and 0 deletion

train.py train.py +208 -0

yolov3.py yolov3.py +505 -0

未找到文件。
--- a/train.py
+++ b/train.py
+#coding:utf-8
+from yolov3 import Yolov3, Yolov3Tiny
+from utils.parse_config import parse_data_cfg
+from utils.torch_utils import select_device
+import torch
+from torch.utils.data import DataLoader
+from utils.datasets import LoadImagesAndLabels
+from utils.utils import *
+import os
+import numpy as np
+
+def set_learning_rate(optimizer, lr):
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+def train(data_cfg ='cfg/voc.data',
+    accumulate = 1):
+    device = select_device()
+    # Configure run
+    get_data_cfg = parse_data_cfg(data_cfg)#返回训练配置参数，类型：字典
+
+    gpus = get_data_cfg['gpus']
+    num_workers = int(get_data_cfg['num_workers'])
+    cfg_model = get_data_cfg['cfg_model']
+    train_path = get_data_cfg['train']
+    valid_ptah = get_data_cfg['valid']
+    num_classes = int(get_data_cfg['classes'])
+    finetune_model = get_data_cfg['finetune_model']
+    batch_size = int(get_data_cfg['batch_size'])
+    img_size = int(get_data_cfg['img_size'])
+    multi_scale = get_data_cfg['multi_scale']
+    epochs = int(get_data_cfg['epochs'])
+    lr_step = str(get_data_cfg['lr_step'])
+    lr0 = float(get_data_cfg['lr0'])
+
+    if multi_scale == 'True':
+        multi_scale = True
+    else:
+        multi_scale = False
+
+    print('data_cfg            : ',data_cfg)
+    print('voc.data config len : ',len(get_data_cfg))
+    print('gpus             : ',gpus)
+    print('num_workers      : ',num_workers)
+    print('model            : ',cfg_model)
+    print('finetune_model   : ',finetune_model)
+    print('train_path       : ',train_path)
+    print('valid_ptah       : ',valid_ptah)
+    print('num_classes      : ',num_classes)
+    print('batch_size       : ',batch_size)
+    print('img_size         : ',img_size)
+    print('multi_scale      : ',multi_scale)
+    print('lr0              : ',lr0)
+    print('lr_step          : ',lr_step)
+    # load model
+    pattern_data_ = data_cfg.split("/")[-1:][0].replace(".data","")
+    if "-tiny" in cfg_model:
+        a_scalse = 416./img_size
+        anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
+        anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
+
+        model = Yolov3Tiny(num_classes,anchors = anchors_new)
+        # weights = './weights-yolov3-person-tiny/'
+        weights = './weights-yolov3-{}-tiny/'.format(pattern_data_)
+    else:
+        a_scalse = 416./img_size
+        anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
+        anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
+        model = Yolov3(num_classes,anchors = anchors_new)
+        weights = './weights-yolov3-{}/'.format(pattern_data_)
+    # mkdir save model document
+    if not os.path.exists(weights):
+        os.mkdir(weights)
+
+    model = model.to(device)
+    latest = weights + 'latest_{}.pt'.format(img_size)
+    best = weights + 'best_{}.pt'.format(img_size)
+    # Optimizer
+    optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005)
+
+    start_epoch = 0
+
+    if os.access(finetune_model,os.F_OK):# load retrain/finetune_model
+        print('loading yolo-v3 finetune_model ~~~~~~',finetune_model)
+        not_load_filters = 3*(80+5)  # voc: 3*(20+5), coco: 3*(80+5)=255
+        chkpt = torch.load(finetune_model, map_location=device)
+        model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters}, strict=False)
+        # model.load_state_dict(chkpt['model'])
+        if 'coco' not in finetune_model:
+            start_epoch = chkpt['epoch']
+            if chkpt['optimizer'] is not None:
+                optimizer.load_state_dict(chkpt['optimizer'])
+                best_loss = chkpt['best_loss']
+
+
+    # Set scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k) gamma：学习率下降的乘数因子
+    milestones=[int(i) for i in lr_step.split(",")]
+    print('milestones : ',milestones)
+    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(i) for i in lr_step.split(",")], gamma=0.1,
+                                                     last_epoch=start_epoch - 1)
+
+    # Dataset
+    print('multi_scale : ',multi_scale)
+    dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True, multi_scale=multi_scale)
+    print('--------------->>> imge num : ',dataset.__len__())
+    # Dataloader
+    dataloader = DataLoader(dataset,
+                            batch_size=batch_size,
+                            num_workers=num_workers,
+                            shuffle=True,
+                            pin_memory=False,
+                            drop_last = False,
+                            collate_fn=dataset.collate_fn)
+
+    # Start training
+    t = time.time()
+    # model_info(model)# 打印模型信息
+    nB = len(dataloader)
+    n_burnin = min(round(nB / 5 + 1), 1000)  # burn-in batches
+
+    best_loss = float('inf')
+    test_loss = float('inf')
+
+    flag_start = False
+
+    for epoch in range(0, epochs):
+
+        print('  ~~~~')
+        model.train()
+
+        if flag_start:
+            scheduler.step()
+        flag_start = True
+
+        mloss = defaultdict(float)  # mean loss
+        for i, (imgs, targets, img_path_, _) in enumerate(dataloader):
+            multi_size = imgs.size()
+            imgs = imgs.to(device)
+            targets = targets.to(device)
+
+            nt = len(targets)
+            if nt == 0:  # if no targets continue
+                continue
+
+            # SGD burn-in
+            if epoch == 0 and i <= n_burnin:
+                lr = lr0 * (i / n_burnin) ** 4
+                for x in optimizer.param_groups:
+                    x['lr'] = lr
+
+            # Run model
+            pred = model(imgs)
+
+            # Build targets
+            target_list = build_targets(model, targets)
+
+            # Compute loss
+            loss, loss_dict = compute_loss(pred, target_list)
+
+            # Compute gradient
+            loss.backward()
+
+            # Accumulate gradient for x batches before optimizing
+            if (i + 1) % accumulate == 0 or (i + 1) == nB:
+                optimizer.step()
+                optimizer.zero_grad()
+
+            # Running epoch-means of tracked metrics
+            for key, val in loss_dict.items():
+                mloss[key] = (mloss[key] * i + val) / (i + 1)
+
+            print('  Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, wh {:.3f}, '
+                  'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'.format(epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3]
+                   , nt, scheduler.get_lr()[0], mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], time.time() - t),
+                   end = '\r')
+
+            s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'],
+                mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt, time.time() - t)
+            t = time.time()
+        print()
+        # Create checkpoint
+        chkpt = {'epoch': epoch,
+                 'best_loss': best_loss,
+                 'model': model.module.state_dict() if type(
+                     model) is nn.parallel.DistributedDataParallel else model.state_dict(),
+                 'optimizer': optimizer.state_dict()}
+
+        # Save latest checkpoint
+        torch.save(chkpt, latest)
+
+        # Save best checkpoint
+        if best_loss == test_loss and epoch%5 == 0:
+            torch.save(chkpt, best)
+
+        # Save backup every 10 epochs (optional)
+        if epoch > 0 and epoch % 5 == 0:
+            torch.save(chkpt, weights + 'yoloV3_{}_epoch_{}.pt'.format(img_size,epoch))
+
+        # Delete checkpoint
+        del chkpt
+#-------------------------------------------------------------------------------
+if __name__ == '__main__':
+
+
+    train(data_cfg='cfg/hand.data')
+
+
+    print('well done ~ ')
--- a/yolov3.py
+++ b/yolov3.py
+import os
+import numpy as np
+from collections import OrderedDict
+
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+
+
+# reference:
+# https://github.com/ultralytics/yolov3/blob/master/models.py
+# https://github.com/TencentYoutuResearch/ObjectDetection-OneStageDet/blob/master/yolo/vedanet/network/backbone/brick/darknet53.py
+# network structure https://blog.csdn.net/u010397980/article/details/85058630
+
+flag_yolo_structure = False # True 查看 相关的网络 log
+
+class Conv2dBatchLeaky(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, leaky_slope=0.1):
+        super(Conv2dBatchLeaky, self).__init__()
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        if isinstance(kernel_size, (list, tuple)):
+            self.padding = [int(ii/2) for ii in kernel_size]
+            if flag_yolo_structure:
+                print('------------------->>>> Conv2dBatchLeaky isinstance')
+        else:
+            self.padding = int(kernel_size/2)
+
+        self.leaky_slope = leaky_slope
+        # Layer
+        # LeakyReLU : y = max(0, x) + leaky_slope*min(0,x)
+        self.layers = nn.Sequential(
+            nn.Conv2d(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, bias=False),
+            nn.BatchNorm2d(self.out_channels),
+            nn.LeakyReLU(self.leaky_slope, inplace=True)
+        )
+
+    def forward(self, x):
+        x = self.layers(x)
+        return x
+
+class ResBlockSum(nn.Module):
+    def __init__(self, nchannels):
+        super().__init__()
+        self.block = nn.Sequential(
+            Conv2dBatchLeaky(nchannels, int(nchannels/2), 1, 1),
+            Conv2dBatchLeaky(int(nchannels/2), nchannels, 3, 1)
+            )
+
+    def forward(self, x):
+        return x + self.block(x)
+
+class HeadBody(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(HeadBody, self).__init__()
+
+        self.layer = nn.Sequential(
+            Conv2dBatchLeaky(in_channels, out_channels, 1, 1),
+            Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1),
+            Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1),
+            Conv2dBatchLeaky(out_channels, out_channels*2, 3, 1),
+            Conv2dBatchLeaky(out_channels*2, out_channels, 1, 1)
+        )
+
+    def forward(self, x):
+        x = self.layer(x)
+        return x
+
+class Upsample(nn.Module):
+    # Custom Upsample layer (nn.Upsample gives deprecated warning message)
+
+    def __init__(self, scale_factor=1, mode='nearest'):
+        super(Upsample, self).__init__()
+        self.scale_factor = scale_factor
+        self.mode = mode
+
+    def forward(self, x):
+        return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
+
+# default anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
+class YOLOLayer(nn.Module):
+    def __init__(self, anchors, nC):
+        super(YOLOLayer, self).__init__()
+
+        self.anchors = torch.FloatTensor(anchors)
+        self.nA = len(anchors)  # number of anchors (3)
+        self.nC = nC  # number of classes
+        self.img_size = 0
+        if flag_yolo_structure:
+            print('init YOLOLayer ------ >>> ')
+            print('anchors  : ',self.anchors)
+            print('nA       : ',self.nA)
+            print('nC       : ',self.nC)
+            print('img_size : ',self.img_size)
+
+    def forward(self, p, img_size, var=None):# p : feature map
+        bs, nG = p.shape[0], p.shape[-1] # batch_size , grid
+        if flag_yolo_structure:
+            print('bs, nG --->>> ',bs, nG)
+        if self.img_size != img_size:
+            create_grids(self, img_size, nG, p.device)
+
+        # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85)  # (bs, anchors, grid, grid, xywh + confidence + classes)
+        p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous()  #  prediction
+
+        if self.training:
+            return p
+        else:  # inference
+            io = p.clone()  # inference output
+            io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy  # xy
+            io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh  # wh yolo method
+            io[..., 4:] = torch.sigmoid(io[..., 4:])  # p_conf, p_cls
+            io[..., :4] *= self.stride
+            if self.nC == 1:
+                io[..., 5] = 1  # single-class model
+            # flatten prediction, reshape from [bs, nA, nG, nG, nC] to [bs, nA * nG * nG, nC]
+            return io.view(bs, -1, 5 + self.nC), p
+
+def create_grids(self, img_size, nG, device='cpu'):
+    # self.nA : len(anchors)  # number of anchors (3)
+    # self.nC : nC  # number of classes
+    # nG : feature map grid  13*13  26*26 52*52
+    self.img_size = img_size
+    self.stride = img_size / nG
+    if flag_yolo_structure:
+        print('create_grids stride : ',self.stride)
+
+    # build xy offsets
+    grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float()
+    grid_y = grid_x.permute(0, 1, 3, 2)
+    self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device)
+    if flag_yolo_structure:
+        print('grid_x : ',grid_x.size(),grid_x)
+        print('grid_y : ',grid_y.size(),grid_y)
+        print('grid_xy : ',self.grid_xy.size(),self.grid_xy)
+
+    # build wh gains
+    self.anchor_vec = self.anchors.to(device) / self.stride # 基于 stride 的归一化
+    # print('self.anchor_vecself.anchor_vecself.anchor_vec:',self.anchor_vec)
+    self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device)
+    self.nG = torch.FloatTensor([nG]).to(device)
+
+
+def get_yolo_layer_index(module_list):
+    yolo_layer_index = []
+    for index, l in enumerate(module_list):
+        try:
+            a = l[0].img_size and l[0].nG  # only yolo layer need img_size and nG
+            yolo_layer_index.append(index)
+        except:
+            pass
+    assert len(yolo_layer_index) > 0, "can not find yolo layer"
+    return yolo_layer_index
+
+
+# ----------------------yolov3------------------------
+
+class Yolov3(nn.Module):
+    def __init__(self, num_classes=80, anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]):
+        super().__init__()
+        anchor_mask1 = [i for i in range(2 * len(anchors) // 3, len(anchors), 1)]  # [6, 7, 8]
+        anchor_mask2 = [i for i in range(len(anchors) // 3, 2 * len(anchors) // 3, 1)]  # [3, 4, 5]
+        anchor_mask3 = [i for i in range(0, len(anchors) // 3, 1)]  # [0, 1, 2]
+        if flag_yolo_structure:
+            print('anchor_mask1 ： ',anchor_mask1) # 大物体 anchor
+            print('anchor_mask2 ： ',anchor_mask2) # 中物体 anchor
+            print('anchor_mask3 ： ',anchor_mask3) # 小物体 anchor
+
+        # Network
+        # OrderedDict 是 dict 的子类，其最大特征是，它可以“维护”添加 key-value 对的顺序
+        layer_list = []
+
+        '''
+        ******      Conv2dBatchLeaky       *****
+        op : Conv2d,BatchNorm2d,LeakyReLU
+        inputs : in_channels, out_channels, kernel_size, stride, leaky_slope
+        '''
+
+        '''
+        ******      ResBlockSum ******
+        op : Conv2dBatchLeaky * 2 + x
+        inputs : nchannels
+        '''
+        # list 0
+        layer_list.append(OrderedDict([
+            ('0_stage1_conv', Conv2dBatchLeaky(3, 32, 3, 1, 1)),  # 416 x 416 x 32        # Convolutional
+
+            ("0_stage2_conv", Conv2dBatchLeaky(32, 64, 3, 2)),  # 208 x 208 x 64          # Convolutional
+            ("0_stage2_ressum1", ResBlockSum(64)),                                        # Convolutional*2 + Resiudal
+
+            ("0_stage3_conv", Conv2dBatchLeaky(64, 128, 3, 2)),  # 104 x 104 128          # Convolutional
+            ("0_stage3_ressum1", ResBlockSum(128)),
+            ("0_stage3_ressum2", ResBlockSum(128)),                                       # (Convolutional*2 + Resiudal)**2
+
+            ("0_stage4_conv", Conv2dBatchLeaky(128, 256, 3, 2)),  # 52 x 52 x 256         # Convolutional
+            ("0_stage4_ressum1", ResBlockSum(256)),
+            ("0_stage4_ressum2", ResBlockSum(256)),
+            ("0_stage4_ressum3", ResBlockSum(256)),
+            ("0_stage4_ressum4", ResBlockSum(256)),
+            ("0_stage4_ressum5", ResBlockSum(256)),
+            ("0_stage4_ressum6", ResBlockSum(256)),
+            ("0_stage4_ressum7", ResBlockSum(256)),
+            ("0_stage4_ressum8", ResBlockSum(256)),  # 52 x 52 x 256 output_feature_0      (Convolutional*2 + Resiudal)**8
+            ]))
+        # list 1
+        layer_list.append(OrderedDict([
+            ("1_stage5_conv", Conv2dBatchLeaky(256, 512, 3, 2)),  # 26 x 26 x 512         # Convolutional
+            ("1_stage5_ressum1", ResBlockSum(512)),
+            ("1_stage5_ressum2", ResBlockSum(512)),
+            ("1_stage5_ressum3", ResBlockSum(512)),
+            ("1_stage5_ressum4", ResBlockSum(512)),
+            ("1_stage5_ressum5", ResBlockSum(512)),
+            ("1_stage5_ressum6", ResBlockSum(512)),
+            ("1_stage5_ressum7", ResBlockSum(512)),
+            ("1_stage5_ressum8", ResBlockSum(512)),  # 26 x 26 x 512 output_feature_1     # (Convolutional*2 + Resiudal)**8
+            ]))
+
+        '''
+        ******      HeadBody      ******
+        op : Conv2dBatchLeaky * 5
+        inputs : in_channels, out_channels
+        '''
+        # list 2
+        layer_list.append(OrderedDict([
+            ("2_stage6_conv", Conv2dBatchLeaky(512, 1024, 3, 2)),  # 13 x 13 x 1024      # Convolutional
+            ("2_stage6_ressum1", ResBlockSum(1024)),
+            ("2_stage6_ressum2", ResBlockSum(1024)),
+            ("2_stage6_ressum3", ResBlockSum(1024)),
+            ("2_stage6_ressum4", ResBlockSum(1024)),  # 13 x 13 x 1024 output_feature_2 # (Convolutional*2 + Resiudal)**4
+            ("2_headbody1", HeadBody(in_channels=1024, out_channels=512)), # 13 x 13 x 512  # Convalutional Set = Conv2dBatchLeaky * 5
+            ]))
+        # list 3
+        layer_list.append(OrderedDict([
+            ("3_conv_1", Conv2dBatchLeaky(in_channels=512, out_channels=1024, kernel_size=3, stride=1)),
+            ("3_conv_2", nn.Conv2d(in_channels=1024, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
+        ])) # predict one
+        # list 4
+        layer_list.append(OrderedDict([
+            ("4_yolo", YOLOLayer([anchors[i] for i in anchor_mask1], num_classes))
+        ])) # 3*((x, y, w, h, confidence) + classes )
+
+        # list 5
+        layer_list.append(OrderedDict([
+            ("5_conv", Conv2dBatchLeaky(512, 256, 1, 1)),
+            ("5_upsample", Upsample(scale_factor=2)),
+        ]))
+        # list 6
+        layer_list.append(OrderedDict([
+            ("6_head_body2", HeadBody(in_channels=768, out_channels=256)) # Convalutional Set = Conv2dBatchLeaky * 5
+        ]))
+        # list 7
+        layer_list.append(OrderedDict([
+            ("7_conv_1", Conv2dBatchLeaky(in_channels=256, out_channels=512, kernel_size=3, stride=1)),
+            ("7_conv_2", nn.Conv2d(in_channels=512, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
+        ])) # predict two
+        # list 8
+        layer_list.append(OrderedDict([
+            ("8_yolo", YOLOLayer([anchors[i] for i in anchor_mask2], num_classes))
+        ])) # 3*((x, y, w, h, confidence) + classes )
+        # list 9
+        layer_list.append(OrderedDict([
+            ("9_conv", Conv2dBatchLeaky(256, 128, 1, 1)),
+            ("9_upsample", Upsample(scale_factor=2)),
+        ]))
+        # list 10
+        layer_list.append(OrderedDict([
+            ("10_head_body3", HeadBody(in_channels=384, out_channels=128))  # Convalutional Set = Conv2dBatchLeaky * 5
+        ]))
+        # list 11
+        layer_list.append(OrderedDict([
+            ("11_conv_1", Conv2dBatchLeaky(in_channels=128, out_channels=256, kernel_size=3, stride=1)),
+            ("11_conv_2", nn.Conv2d(in_channels=256, out_channels=len(anchor_mask3) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
+        ])) # predict three
+        # list 12
+        layer_list.append(OrderedDict([
+            ("12_yolo", YOLOLayer([anchors[i] for i in anchor_mask3], num_classes))
+        ])) # 3*((x, y, w, h, confidence) + classes )
+        # nn.ModuleList类似于pytho中的list类型，只是将一系列层装入列表，并没有实现forward()方法，因此也不会有网络模型产生的副作用
+        self.module_list = nn.ModuleList([nn.Sequential(i) for i in layer_list])
+        self.yolo_layer_index = get_yolo_layer_index(self.module_list)
+        if flag_yolo_structure:
+            print('yolo_layer : ',len(layer_list),'\n')
+            print(self.module_list[4])
+            print(self.module_list[8])
+            print(self.module_list[12])
+
+        # print('self.module_list  -------->>> ',self.module_list)
+        # print('self.yolo_layer_index  -------->>> ',self.yolo_layer_index)
+
+    def forward(self, x):
+        img_size = x.shape[-1]
+        if flag_yolo_structure:
+            print('forward img_size : ',img_size,x.shape)
+        output = []
+
+        x = self.module_list[0](x)
+        x_route1 = x
+        x = self.module_list[1](x)
+        x_route2 = x
+        x = self.module_list[2](x)
+
+        yolo_head = self.module_list[3](x)
+        if flag_yolo_structure:
+            print('mask1 yolo_head : ',yolo_head.size())
+        yolo_head_out_13x13 = self.module_list[4][0](yolo_head, img_size)
+        output.append(yolo_head_out_13x13)
+
+        x = self.module_list[5](x)
+        x = torch.cat([x, x_route2], 1)
+        x = self.module_list[6](x)
+
+        yolo_head = self.module_list[7](x)
+        if flag_yolo_structure:
+            print('mask2 yolo_head : ',yolo_head.size())
+        yolo_head_out_26x26 = self.module_list[8][0](yolo_head, img_size)
+        output.append(yolo_head_out_26x26)
+
+        x = self.module_list[9](x)
+        x = torch.cat([x, x_route1], 1)
+        x = self.module_list[10](x)
+
+        yolo_head = self.module_list[11](x)
+        if flag_yolo_structure:
+            print('mask3 yolo_head : ',yolo_head.size())
+        yolo_head_out_52x52 = self.module_list[12][0](yolo_head, img_size)
+        output.append(yolo_head_out_52x52)
+
+        if self.training:
+            return output
+        else:
+            io, p = list(zip(*output))  # inference output, training output
+            return torch.cat(io, 1), p
+
+
+# ----------------------yolov3 tiny------------------------
+
+class EmptyLayer(nn.Module):
+    """Placeholder for 'route' and 'shortcut' layers"""
+    def __init__(self):
+        super(EmptyLayer, self).__init__()
+
+    def forward(self, x):
+        return x
+
+
+class Yolov3Tiny(nn.Module):
+    def __init__(self, num_classes=80, anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]):
+        super(Yolov3Tiny, self).__init__()
+
+        anchor_mask1 = [i for i in range(len(anchors) // 2, len(anchors), 1)]  # [3, 4, 5]
+        anchor_mask2 = [i for i in range(0, len(anchors) // 2, 1)]  # [0, 1, 2]
+
+        layer_list = []
+        layer_list.append(OrderedDict([
+            # layer 0
+            ("conv_0", nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_0", nn.BatchNorm2d(16)),
+            ("leaky_0", nn.LeakyReLU(0.1)),
+            # layer 1
+            ("maxpool_1", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
+            # layer 2
+            ("conv_2", nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_2", nn.BatchNorm2d(32)),
+            ("leaky_2", nn.LeakyReLU(0.1)),
+            # layer 3
+            ("maxpool_3", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
+            # layer 4
+            ("conv_4", nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_4", nn.BatchNorm2d(64)),
+            ("leaky_4", nn.LeakyReLU(0.1)),
+            # layer 5
+            ("maxpool_5", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
+            # layer 6
+            ("conv_6", nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_6", nn.BatchNorm2d(128)),
+            ("leaky_6", nn.LeakyReLU(0.1)),
+            # layer 7
+            ("maxpool_7", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
+            # layer 8
+            ("conv_8", nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_8", nn.BatchNorm2d(256)),
+            ("leaky_8", nn.LeakyReLU(0.1)),
+        ]))
+
+        layer_list.append(OrderedDict([
+            # layer 9
+            ("maxpool_9", nn.MaxPool2d(kernel_size=2, stride=2, padding=0)),
+            # layer 10
+            ("conv_10", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_10", nn.BatchNorm2d(512)),
+            ("leaky_10", nn.LeakyReLU(0.1)),
+            # layer 11
+            ('_debug_padding_11', nn.ZeroPad2d((0, 1, 0, 1))),
+            ("maxpool_11", nn.MaxPool2d(kernel_size=2, stride=1, padding=0)),
+            # layer 12
+            ("conv_12", nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_12", nn.BatchNorm2d(1024)),
+            ("leaky_12", nn.LeakyReLU(0.1)),
+            # layer 13
+            ("conv_13", nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False)),
+            ("batch_norm_13", nn.BatchNorm2d(256)),
+            ("leaky_13", nn.LeakyReLU(0.1)),
+        ]))
+
+        layer_list.append(OrderedDict([
+            # layer 14
+            ("conv_14", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_14", nn.BatchNorm2d(512)),
+            ("leaky_14", nn.LeakyReLU(0.1)),
+            # layer 15
+            ("conv_15",
+             nn.Conv2d(in_channels=512, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
+        ]))
+
+        # layer 16
+        anchor_tmp1 = [anchors[i] for i in anchor_mask1]
+        layer_list.append(OrderedDict([("yolo_16", YOLOLayer(anchor_tmp1, num_classes))]))
+
+        # layer 17
+        layer_list.append(OrderedDict([("route_17", EmptyLayer())]))
+
+        layer_list.append(OrderedDict([
+            # layer 18
+            ("conv_18", nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False)),
+            ("batch_norm_18", nn.BatchNorm2d(128)),
+            ("leaky_18", nn.LeakyReLU(0.1)),
+            # layer 19
+            ("upsample_19", Upsample(scale_factor=2)),
+        ]))
+
+        # layer 20
+        layer_list.append(OrderedDict([('route_20', EmptyLayer())]))
+
+        layer_list.append(OrderedDict([
+            # layer 21
+            ("conv_21", nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)),
+            ("batch_norm_21", nn.BatchNorm2d(256)),
+            ("leaky_21", nn.LeakyReLU(0.1)),
+            # layer 22
+            ("conv_22",
+             nn.Conv2d(in_channels=256, out_channels=len(anchor_mask2) * (num_classes + 5), kernel_size=1, stride=1, padding=0, bias=True)),
+        ]))
+
+        # layer 23
+        anchor_tmp2 = [anchors[i] for i in anchor_mask2]
+        layer_list.append(OrderedDict([("yolo_23", YOLOLayer(anchor_tmp2, num_classes))]))
+
+        self.module_list = nn.ModuleList([nn.Sequential(layer) for layer in layer_list])
+        self.yolo_layer_index = get_yolo_layer_index(self.module_list)
+
+    def forward(self, x):
+        img_size = x.shape[-1]
+        output = []
+
+        x = self.module_list[0](x)  # layer0 to layer8
+        x_route8 = x
+        x = self.module_list[1](x)  # layer9 to layer13
+        x_route13 = x
+        x = self.module_list[2](x)  # layer14, layer15
+        x = self.module_list[3][0](x, img_size)  # yolo_16
+        output.append(x)
+        x = self.module_list[5](x_route13)  # layer18, layer19
+        x = torch.cat([x, x_route8], 1)  # route
+        x = self.module_list[7](x)  # layer21, layer22
+        x = self.module_list[8][0](x, img_size)  # yolo_23
+        output.append(x)
+
+        if self.training:
+            return output
+        else:
+            io, p = list(zip(*output))  # inference output, training output
+            return torch.cat(io, 1), p
+
+
+if __name__ == "__main__":
+    dummy_input = torch.Tensor(5, 3, 416, 416)
+    model = Yolov3(num_classes=80)
+    params = list(model.parameters())
+    k = 0
+    for i in params:
+        l = 1
+        for j in i.size():
+            l *= j
+        # print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l)))
+        k = k + l
+    print("----------------------")
+    print("总参数数量和: " + str(k))
+    print("-----------yolo layer")
+    for index in model.yolo_layer_index:
+        print(model.module_list[index])
+
+    print("-----------train")
+    model.train()
+    for res in model(dummy_input):
+        print("res:", np.shape(res))
+
+    print("-----------eval")
+    model.eval()
+    inference_out, train_out = model(dummy_input)
+    print("inference_out:", np.shape(inference_out))
+    for o in train_out:
+        print("train_out:", np.shape(o))