From 6c4b51d53811aee909495efb478597d39da80399 Mon Sep 17 00:00:00 2001
From: "Eric.Lee2021" <305141918@qq.com>
Date: Tue, 8 Jun 2021 16:47:48 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E6=94=AF=E6=8C=81?=
 =?UTF-8?q?=20voc=20=E6=A0=BC=E5=BC=8F=E6=95=B0=E6=8D=AE=E9=9B=86=E7=9A=84?=
 =?UTF-8?q?=E8=AE=AD=E7=BB=83=E6=A8=A1=E5=BC=8F=EF=BC=8C=E7=9B=AE=E5=89=8D?=
 =?UTF-8?q?=E9=AA=8C=E8=AF=81=E9=80=9A=E8=BF=87=E7=9A=84=E6=98=AF=E5=8D=95?=
 =?UTF-8?q?=E7=B1=BB=E6=95=B0=E6=8D=AE=E9=9B=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cfg/hand.data         |  15 +-
 cfg/hand.names        |   2 +-
 show_yolo_anno_voc.py |  55 +++++++
 train_voc.py          | 215 +++++++++++++++++++++++++++
 utils/datasets_voc.py | 329 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 608 insertions(+), 8 deletions(-)
 create mode 100644 show_yolo_anno_voc.py
 create mode 100644 train_voc.py
 create mode 100644 utils/datasets_voc.py

diff --git a/cfg/hand.data b/cfg/hand.data
index d3236a4..ffb6563 100644
--- a/cfg/hand.data
+++ b/cfg/hand.data
@@ -1,17 +1,18 @@
 cfg_model=yolo
 classes=1
 gpus = 0
-num_workers = 12
-batch_size = 8
+num_workers = 6
+batch_size = 9
 img_size = 416
 multi_scale = True
-epochs = 100
+epochs = 320
 train=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt
 valid=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt
 names=./cfg/hand.names
 #finetune_model=./finetune_model/yolov3_coco.pt
 #finetune_model = ./weights-yolov3/latest.pt
-finetune_model = ./weights-yolov3-hand/latest_416.pt
-#finetune_model = ./weights-yolov3-face-tiny/latest_416.pt
-lr_step = 20,50,80
-lr0 = 0.0001
+finetune_model = ./weights-yolov3-hand/yoloV3_416_epoch_12.pt
+#finetune_model = ./weights-yolov3-face-tiny/hand_416-2021-01-29.pt
+lr_step = 120,220,280
+lr0 = 0.00002
+train_voc=G:/tvcoco-hand/
diff --git a/cfg/hand.names b/cfg/hand.names
index d87e264..4eb9b0e 100644
--- a/cfg/hand.names
+++ b/cfg/hand.names
@@ -1 +1 @@
-Hand
+hand
diff --git a/show_yolo_anno_voc.py b/show_yolo_anno_voc.py
new file mode 100644
index 0000000..7109b89
--- /dev/null
+++ b/show_yolo_anno_voc.py
@@ -0,0 +1,55 @@
+#-*-coding:utf-8-*-
+# date:2021-05
+# Author: Eric.Lee
+# function: show yolo data of voc format anno
+
+import cv2
+import os
+import numpy as np
+import xml.etree.cElementTree as et
+
+if __name__ == "__main__":
+
+    path='G:/hand_detect_datasets-0/'
+    path_voc_names = './cfg/hand.names'
+
+    with open(path_voc_names, 'r') as f:
+        label_map = f.readlines()
+    label_dict = {}
+    for i in range(len(label_map)):
+        label_map[i] = label_map[i].strip()
+        print(i,') ',label_map[i])
+        label_dict[label_map[i]] = i
+
+    print("label_dict : {}".format(label_dict))
+
+    for file in os.listdir(path):
+        if ".jpg" in file:
+            path_img = path + file
+            path_label = path_img.replace(".jpg",".xml")
+            if not os.access(path_label,os.F_OK):
+                continue
+            img = cv2.imread(path_img)
+            #
+            tree=et.parse(path_label)
+            root=tree.getroot()
+            for Object in root.findall('object'):
+                name=Object.find('name').text
+
+                bndbox=Object.find('bndbox')
+                x1= np.float32((bndbox.find('xmin').text))
+                y1= np.float32((bndbox.find('ymin').text))
+                x2= np.float32((bndbox.find('xmax').text))
+                y2= np.float32((bndbox.find('ymax').text))
+
+                cv2.rectangle(img, (int(x1),int(y1)), (int(x2),int(y2)), (255,100,100), 2)
+
+                cv2.putText(img, "{}".format(name), (int(x1),int(y1)),\
+                cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 55, 255), 6)
+                cv2.putText(img, "{}".format(name), (int(x1),int(y1)),\
+                cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 155, 255), 2)
+            cv2.namedWindow('image',0)
+            cv2.imshow('image',img)
+            if cv2.waitKey(30) == 27:
+                break
+    cv2.destroyAllWindows()
diff --git a/train_voc.py b/train_voc.py
new file mode 100644
index 0000000..2dadf59
--- /dev/null
+++ b/train_voc.py
@@ -0,0 +1,215 @@
+#coding:utf-8
+# date:2021-06
+# Author: Eric.Lee
+# function: train data of voc format
+import os
+from yolov3 import Yolov3, Yolov3Tiny
+from utils.parse_config import parse_data_cfg
+from utils.torch_utils import select_device
+import torch
+from torch.utils.data import DataLoader
+from utils.datasets_voc import LoadImagesAndLabels
+from utils.utils import *
+import numpy as np
+
+def set_learning_rate(optimizer, lr):
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+def train(data_cfg ='cfg/voc.data',accumulate = 1):
+    # Configure run
+    get_data_cfg = parse_data_cfg(data_cfg)#返回训练配置参数，类型：字典
+
+    gpus = get_data_cfg['gpus']
+    voc_names = get_data_cfg['names']
+
+    num_workers = int(get_data_cfg['num_workers'])
+    cfg_model = get_data_cfg['cfg_model']
+    train_path = get_data_cfg['train_voc']
+    num_classes = int(get_data_cfg['classes'])
+    finetune_model = get_data_cfg['finetune_model']
+    batch_size = int(get_data_cfg['batch_size'])
+    img_size = int(get_data_cfg['img_size'])
+    multi_scale = get_data_cfg['multi_scale']
+    epochs = int(get_data_cfg['epochs'])
+    lr_step = str(get_data_cfg['lr_step'])
+    lr0 = float(get_data_cfg['lr0'])
+
+    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
+    device = select_device()
+
+    if multi_scale == 'True':
+        multi_scale = True
+    else:
+        multi_scale = False
+
+    print('data_cfg            : ',data_cfg)
+    print('voc.data config len : ',len(get_data_cfg))
+    print('gpus             : ',gpus)
+    print('num_workers      : ',num_workers)
+    print('model            : ',cfg_model)
+    print('finetune_model   : ',finetune_model)
+    print('train_voc_path       : ',train_path)
+    print('num_classes      : ',num_classes)
+    print('batch_size       : ',batch_size)
+    print('img_size         : ',img_size)
+    print('multi_scale      : ',multi_scale)
+    print('lr_step          : ',lr_step)
+    print('lr0              : ',lr0)
+    # load model
+    pattern_data_ = data_cfg.split("/")[-1:][0].replace(".data","")
+    if "-tiny" in cfg_model:
+        a_scalse = 416./img_size
+        anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
+        anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
+
+        model = Yolov3Tiny(num_classes,anchors = anchors_new)
+        # weights = './weights-yolov3-person-tiny/'
+        weights = './weights-yolov3-{}-tiny/'.format(pattern_data_)
+    else:
+        a_scalse = 416./img_size
+        anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
+        anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
+        model = Yolov3(num_classes,anchors = anchors_new)
+        weights = './weights-yolov3-{}/'.format(pattern_data_)
+    # mkdir save model document
+    if not os.path.exists(weights):
+        os.mkdir(weights)
+
+    model = model.to(device)
+    latest = weights + 'latest_{}.pt'.format(img_size)
+    best = weights + 'best_{}.pt'.format(img_size)
+    # Optimizer
+    optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005)
+
+    start_epoch = 0
+
+    if os.access(finetune_model,os.F_OK):# load retrain/finetune_model
+        print('loading yolo-v3 finetune_model ~~~~~~',finetune_model)
+        not_load_filters = 3*(80+5)  # voc: 3*(20+5), coco: 3*(80+5)=255
+        chkpt = torch.load(finetune_model, map_location=device)
+        model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters}, strict=False)
+        # model.load_state_dict(chkpt['model'])
+        if 'coco' not in finetune_model:
+            start_epoch = chkpt['epoch']
+            if chkpt['optimizer'] is not None:
+                optimizer.load_state_dict(chkpt['optimizer'])
+                best_loss = chkpt['best_loss']
+
+
+    # Set scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k) gamma：学习率下降的乘数因子
+    milestones=[int(i) for i in lr_step.split(",")]
+    print('milestones : ',milestones)
+    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(i) for i in lr_step.split(",")], gamma=0.1,
+                                                     last_epoch=start_epoch - 1)
+
+    # Dataset
+    print('multi_scale : ',multi_scale)
+    dataset = LoadImagesAndLabels(train_path, voc_names = voc_names,batch_size=batch_size, img_size=img_size, augment=True, multi_scale=multi_scale)
+    print('--------------->>> imge num : ',dataset.__len__())
+    # Dataloader
+    dataloader = DataLoader(dataset,
+                            batch_size=batch_size,
+                            num_workers=num_workers,
+                            shuffle=True,
+                            pin_memory=False,
+                            drop_last = False,
+                            collate_fn=dataset.collate_fn)
+
+    # Start training
+    t = time.time()
+    # model_info(model)# 打印模型信息
+    nB = len(dataloader)
+    n_burnin = min(round(nB / 5 + 3), 1000)  # burn-in batches
+
+    best_loss = float('inf')
+    test_loss = float('inf')
+
+    flag_start = False
+
+    for epoch in range(0, epochs):
+
+        print('  ~~~~')
+        model.train()
+
+        if flag_start:
+            scheduler.step()
+        flag_start = True
+
+        mloss = defaultdict(float)  # mean loss
+        for i, (imgs, targets, img_path_, _) in enumerate(dataloader):
+            multi_size = imgs.size()
+            imgs = imgs.to(device)
+            targets = targets.to(device)
+
+            nt = len(targets)
+            if nt == 0:  # if no targets continue
+                continue
+
+            # SGD burn-in
+            if epoch == 0 and i <= n_burnin:
+                lr = lr0 * (i / n_burnin) ** 4
+                for x in optimizer.param_groups:
+                    x['lr'] = lr
+
+            # Run model
+            pred = model(imgs)
+
+            # Build targets
+            target_list = build_targets(model, targets)
+
+            # Compute loss
+            loss, loss_dict = compute_loss(pred, target_list)
+
+            # Compute gradient
+            loss.backward()
+
+            # Accumulate gradient for x batches before optimizing
+            if (i + 1) % accumulate == 0 or (i + 1) == nB:
+                optimizer.step()
+                optimizer.zero_grad()
+
+            # Running epoch-means of tracked metrics
+            for key, val in loss_dict.items():
+                mloss[key] = (mloss[key] * i + val) / (i + 1)
+
+            print('  Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, wh {:.3f}, '
+                  'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'.format(epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3]
+                   , nt, scheduler.get_lr()[0], mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], time.time() - t),
+                   end = '\r')
+
+            s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'],
+                mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt, time.time() - t)
+            t = time.time()
+        print()
+        # Create checkpoint
+        chkpt = {'epoch': epoch,
+                 'best_loss': best_loss,
+                 'model': model.module.state_dict() if type(
+                     model) is nn.parallel.DistributedDataParallel else model.state_dict(),
+                 'optimizer': optimizer.state_dict()}
+
+        # Save latest checkpoint
+        torch.save(chkpt, latest)
+
+        # Save best checkpoint
+        if best_loss == test_loss and epoch%5 == 0:
+            torch.save(chkpt, best)
+
+        # Save backup every 10 epochs (optional)
+        if True:#epoch > 0 and epoch % 5 == 0:
+            torch.save(chkpt, weights + 'yoloV3_{}_epoch_{}.pt'.format(img_size,epoch+1))
+
+        # Delete checkpoint
+        del chkpt
+#-------------------------------------------------------------------------------
+if __name__ == '__main__':
+
+    train(data_cfg="cfg/hand.data")
+    # train(data_cfg = "cfg/face.data")
+    # train(data_cfg = "cfg/person.data")
+    # train(data_cfg = "cfg/helmet.data")
+    # train(data_cfg = "cfg/transport.data")
+
+
+    print('well done ~ ')
diff --git a/utils/datasets_voc.py b/utils/datasets_voc.py
new file mode 100644
index 0000000..b94bef2
--- /dev/null
+++ b/utils/datasets_voc.py
@@ -0,0 +1,329 @@
+# date:2021-06
+# Author: Eric.Lee
+# function: dataloader data of voc format
+
+import glob
+import math
+import os
+import random
+import shutil
+from pathlib import Path
+from PIL import Image
+from tqdm import tqdm
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data import DataLoader
+import xml.etree.cElementTree as et
+
+def xyxy2xywh(x):
+    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2
+    y[:, 2] = x[:, 2] - x[:, 0]
+    y[:, 3] = x[:, 3] - x[:, 1]
+    return y
+
+
+def xywh2xyxy(x):
+    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2
+    y[:, 1] = x[:, 1] - x[:, 3] / 2
+    y[:, 2] = x[:, 0] + x[:, 2] / 2
+    y[:, 3] = x[:, 1] + x[:, 3] / 2
+    return y
+def read_label_xml(path_label):
+    tree=et.parse(path_label)
+    root=tree.getroot()
+    obj_num = 0
+    for Object in root.findall('object'):
+        name=Object.find('name').text
+        obj_num += 1
+    return obj_num
+class LoadImagesAndLabels(Dataset):  # for training/testing
+    def __init__(self, path,voc_names, batch_size, img_size=416, augment=True, multi_scale=False):
+        print('LoadImagesAndLabels init : ',path)
+        # 解析标签
+        with open(voc_names, 'r') as f:
+            label_map = f.readlines()
+        label_voc_dict = {} # 名称转数字标签
+        obj_num_sum = 0 # 物体总数量
+        for i in range(len(label_map)):
+            label_map[i] = label_map[i].strip()
+            print(i,') ',label_map[i])
+            label_voc_dict[label_map[i]] = i
+
+        print("label_voc_dict : {}".format(label_voc_dict))
+        img_files = []
+        label_files = []
+        for file in os.listdir(path):
+            if ".jpg" in file:
+                path_img = path + file
+                path_label = path_img.replace(".jpg",".xml")
+                if not os.access(path_label,os.F_OK):
+                    continue
+                obj_num = read_label_xml(path_label)
+                if obj_num == 0 : #  检查 xml 标注文件是否为 空
+                    continue
+                obj_num_sum += obj_num
+                img_files.append(path_img)
+                label_files.append(path_label)
+
+        self.label_voc_dict = label_voc_dict
+        self.img_files = img_files
+        assert len(self.img_files) > 0, 'No images found in %s' % path
+        self.img_size = img_size
+        self.batch_size = batch_size
+        self.multi_scale = multi_scale
+        self.augment = augment
+        self.scale_index = 0
+        if self.multi_scale:
+            self.img_size = img_size  # initiate with maximum multi_scale size, in case of out of memory
+            print("Multi scale images training, init img_size", self.img_size)
+        else:
+            print("Fixed scale images, img_size", self.img_size)
+        self.label_files = label_files
+
+        print("init voc data_iter done ~")
+        print("obj_num_sum : {}".format(obj_num_sum))
+
+    def __len__(self):
+        return len(self.img_files)
+
+    def __getitem__(self, index):
+
+        # if self.multi_scale and (index % self.batch_size == 0) and index != 0:
+        if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0:
+            # self.img_size = random.choice(range(11, 18)) * 32
+            self.img_size = random.choice(range(12, 15)) * 32
+            # print("++++++ change img_size, index:", self.img_size, index)
+        if self.multi_scale:
+            self.scale_index += 1
+            if self.scale_index >= (100*self.batch_size):
+                self.scale_index = 0
+
+
+        img_path = self.img_files[index]
+        label_path = self.label_files[index]
+
+        img = cv2.imread(img_path)  # BGR
+        # print("img shape",img.shape)
+        assert img is not None, 'File Not Found ' + img_path
+
+        augment_hsv = random.random() < 0.5  # hsv_aug prob = 0.5
+        if self.augment and augment_hsv:
+            # SV augmentation by 50%
+            fraction = 0.50  # must be < 1.0
+            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+            S = img_hsv[:, :, 1].astype(np.float32)
+            V = img_hsv[:, :, 2].astype(np.float32)
+
+            a = (random.random() * 2 - 1) * fraction + 1  # a in [-0,5, 1.5]
+            S *= a
+            if a > 1:
+                np.clip(S, None, 255, out=S)
+
+            a = (random.random() * 2 - 1) * fraction + 1
+            V *= a
+            if a > 1:
+                np.clip(V, None, 255, out=V)
+
+            img_hsv[:, :, 1] = S  # .astype(np.uint8)
+            img_hsv[:, :, 2] = V  # .astype(np.uint8)
+            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
+
+        h, w, _ = img.shape
+        img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment)
+        # cv2.imshow("img",img)
+        # cv2.waitKey(1)
+        # Load labels
+        #-----------------------------------------------------------------------
+        tree=et.parse(label_path)
+        root=tree.getroot()
+        labels = []
+        x = []
+        for Object in root.findall('object'):
+            name_=Object.find('name').text
+
+            bndbox=Object.find('bndbox')
+            xmin= np.float32((bndbox.find('xmin').text))
+            ymin= np.float32((bndbox.find('ymin').text))
+            xmax= np.float32((bndbox.find('xmax').text))
+            ymax= np.float32((bndbox.find('ymax').text))
+            #
+            xmin = np.clip(xmin,0,w-1)
+            ymin = np.clip(ymin,0,h-1)
+            xmax = np.clip(xmax,0,w-1)
+            ymax = np.clip(ymax,0,h-1)
+            #
+
+            x_mid = (xmax + xmin)/2./float(w)
+            y_mid = (ymax + ymin)/2./float(h)
+
+            w_box = (xmax-xmin)/float(w)
+            h_box = (ymax-ymin)/float(h)
+
+            x.append((self.label_voc_dict[name_],x_mid,y_mid,w_box,h_box))
+        x = np.array(x, dtype=np.float32)
+        # print(x)
+        if x.size > 0:
+            labels = x.copy()
+            labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
+            labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
+            labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
+            labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
+        #-----------------------------------------------------------------------
+        # labels = []
+        # if os.path.isfile(label_path):
+        #     with open(label_path, 'r') as file:
+        #         lines = file.read().splitlines()
+        #
+        #     x = np.array([x.split() for x in lines], dtype=np.float32)
+        #     if x.size > 0:
+        #         # Normalized xywh to pixel xyxy format
+        #         labels = x.copy()
+        #         labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
+        #         labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
+        #         labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
+        #         labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
+
+        # Augment image and labels
+        if self.augment:
+            img, labels = random_affine(img, labels, degrees=(-30, 30), translate=(0.10, 0.10), scale=(0.9, 1.1))
+
+        nL = len(labels)  # number of labels
+        if nL:
+            # convert xyxy to xywh
+            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ，且 归一化
+
+        if self.augment:
+            # random left-right flip
+            lr_flip = True
+            if lr_flip and random.random() > 0.5:
+                img = np.fliplr(img)
+                if nL:
+                    labels[:, 1] = 1 - labels[:, 1]
+
+            # random up-down flip
+            ud_flip = True
+            if ud_flip and random.random() > 0.5:
+                img = np.flipud(img)
+                if nL:
+                    labels[:, 2] = 1 - labels[:, 2]
+
+        labels_out = torch.zeros((nL, 6))# 加了 一个 batch size
+        if nL:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Normalize
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+
+        return torch.from_numpy(img), labels_out, img_path, (h, w)
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, hw = list(zip(*batch))  # transposed
+        for i, l in enumerate(label):
+            l[:, 0] = i  # 获取 物体的 归属于 图片 的 index
+        return torch.stack(img, 0), torch.cat(label, 0), path, hw
+
+
+def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
+    # Resize a rectangular image to a padded square
+    shape = img.shape[:2]  # shape = [height, width]
+    ratio = float(height) / max(shape)  # ratio  = old / new
+    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
+    dw = (height - new_shape[0]) / 2  # width padding
+    dh = (height - new_shape[1]) / 2  # height padding
+    top, bottom = round(dh - 0.1), round(dh + 0.1)
+    left, right = round(dw - 0.1), round(dw + 0.1)
+    # resize img
+    if augment:
+        interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+                                          None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+                                          cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
+        if interpolation is None:
+            img = cv2.resize(img, new_shape)
+        else:
+            img = cv2.resize(img, new_shape, interpolation=interpolation)
+    else:
+        img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
+    # print("resize time:",time.time()-s1)
+
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded square
+    return img, ratio, dw, dh
+
+
+def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
+                  borderValue=(127.5, 127.5, 127.5)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
+
+    if targets is None:
+        targets = []
+    border = 0  # width of added border (optional)
+    height = max(img.shape[0], img.shape[1]) + border * 2
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
+    # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
+    s = random.random() * (scale[1] - scale[0]) + scale[0]
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
+    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)
+
+    M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
+    imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
+                              borderValue=borderValue)  # BGR order borderValue
+
+    # Return warped points also
+    if len(targets) > 0:
+        n = targets.shape[0]
+        points = targets[:, 1:5].copy()
+        area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
+
+        # warp points
+        xy = np.ones((n * 4, 3))
+        xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+        xy = (xy @ M.T)[:, :2].reshape(n, 8)
+
+        # create new boxes
+        x = xy[:, [0, 2, 4, 6]]
+        y = xy[:, [1, 3, 5, 7]]
+        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+        # apply angle-based reduction of bounding boxes
+        radians = a * math.pi / 180
+        reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
+        x = (xy[:, 2] + xy[:, 0]) / 2
+        y = (xy[:, 3] + xy[:, 1]) / 2
+        w = (xy[:, 2] - xy[:, 0]) * reduction
+        h = (xy[:, 3] - xy[:, 1]) * reduction
+        xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+        # reject warped points outside of image
+        np.clip(xy, 0, height, out=xy)
+        w = xy[:, 2] - xy[:, 0]
+        h = xy[:, 3] - xy[:, 1]
+        area = w * h
+        ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
+        i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
+
+        targets = targets[i]
+        targets[:, 1:5] = xy[i]
+
+    return imw, targets
-- 
GitLab