From 6c4b51d53811aee909495efb478597d39da80399 Mon Sep 17 00:00:00 2001 From: "Eric.Lee2021" <305141918@qq.com> Date: Tue, 8 Jun 2021 16:47:48 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E6=94=AF=E6=8C=81?= =?UTF-8?q?=20voc=20=E6=A0=BC=E5=BC=8F=E6=95=B0=E6=8D=AE=E9=9B=86=E7=9A=84?= =?UTF-8?q?=E8=AE=AD=E7=BB=83=E6=A8=A1=E5=BC=8F=EF=BC=8C=E7=9B=AE=E5=89=8D?= =?UTF-8?q?=E9=AA=8C=E8=AF=81=E9=80=9A=E8=BF=87=E7=9A=84=E6=98=AF=E5=8D=95?= =?UTF-8?q?=E7=B1=BB=E6=95=B0=E6=8D=AE=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cfg/hand.data | 15 +- cfg/hand.names | 2 +- show_yolo_anno_voc.py | 55 +++++++ train_voc.py | 215 +++++++++++++++++++++++++++ utils/datasets_voc.py | 329 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 608 insertions(+), 8 deletions(-) create mode 100644 show_yolo_anno_voc.py create mode 100644 train_voc.py create mode 100644 utils/datasets_voc.py diff --git a/cfg/hand.data b/cfg/hand.data index d3236a4..ffb6563 100644 --- a/cfg/hand.data +++ b/cfg/hand.data @@ -1,17 +1,18 @@ cfg_model=yolo classes=1 gpus = 0 -num_workers = 12 -batch_size = 8 +num_workers = 6 +batch_size = 9 img_size = 416 multi_scale = True -epochs = 100 +epochs = 320 train=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt valid=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt names=./cfg/hand.names #finetune_model=./finetune_model/yolov3_coco.pt #finetune_model = ./weights-yolov3/latest.pt -finetune_model = ./weights-yolov3-hand/latest_416.pt -#finetune_model = ./weights-yolov3-face-tiny/latest_416.pt -lr_step = 20,50,80 -lr0 = 0.0001 +finetune_model = ./weights-yolov3-hand/yoloV3_416_epoch_12.pt +#finetune_model = ./weights-yolov3-face-tiny/hand_416-2021-01-29.pt +lr_step = 120,220,280 +lr0 = 0.00002 +train_voc=G:/tvcoco-hand/ diff --git a/cfg/hand.names b/cfg/hand.names index d87e264..4eb9b0e 100644 --- a/cfg/hand.names +++ b/cfg/hand.names @@ -1 +1 @@ -Hand +hand diff --git a/show_yolo_anno_voc.py b/show_yolo_anno_voc.py new file mode 100644 index 0000000..7109b89 --- /dev/null +++ b/show_yolo_anno_voc.py @@ -0,0 +1,55 @@ +#-*-coding:utf-8-*- +# date:2021-05 +# Author: Eric.Lee +# function: show yolo data of voc format anno + +import cv2 +import os +import numpy as np +import xml.etree.cElementTree as et + +if __name__ == "__main__": + + path='G:/hand_detect_datasets-0/' + path_voc_names = './cfg/hand.names' + + with open(path_voc_names, 'r') as f: + label_map = f.readlines() + label_dict = {} + for i in range(len(label_map)): + label_map[i] = label_map[i].strip() + print(i,') ',label_map[i]) + label_dict[label_map[i]] = i + + print("label_dict : {}".format(label_dict)) + + for file in os.listdir(path): + if ".jpg" in file: + path_img = path + file + path_label = path_img.replace(".jpg",".xml") + if not os.access(path_label,os.F_OK): + continue + img = cv2.imread(path_img) + # + tree=et.parse(path_label) + root=tree.getroot() + for Object in root.findall('object'): + name=Object.find('name').text + + bndbox=Object.find('bndbox') + x1= np.float32((bndbox.find('xmin').text)) + y1= np.float32((bndbox.find('ymin').text)) + x2= np.float32((bndbox.find('xmax').text)) + y2= np.float32((bndbox.find('ymax').text)) + + cv2.rectangle(img, (int(x1),int(y1)), (int(x2),int(y2)), (255,100,100), 2) + + cv2.putText(img, "{}".format(name), (int(x1),int(y1)),\ + cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 55, 255), 6) + cv2.putText(img, "{}".format(name), (int(x1),int(y1)),\ + cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 155, 255), 2) + cv2.namedWindow('image',0) + cv2.imshow('image',img) + if cv2.waitKey(30) == 27: + break + cv2.destroyAllWindows() diff --git a/train_voc.py b/train_voc.py new file mode 100644 index 0000000..2dadf59 --- /dev/null +++ b/train_voc.py @@ -0,0 +1,215 @@ +#coding:utf-8 +# date:2021-06 +# Author: Eric.Lee +# function: train data of voc format +import os +from yolov3 import Yolov3, Yolov3Tiny +from utils.parse_config import parse_data_cfg +from utils.torch_utils import select_device +import torch +from torch.utils.data import DataLoader +from utils.datasets_voc import LoadImagesAndLabels +from utils.utils import * +import numpy as np + +def set_learning_rate(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + +def train(data_cfg ='cfg/voc.data',accumulate = 1): + # Configure run + get_data_cfg = parse_data_cfg(data_cfg)#返回训练配置参数,类型:字典 + + gpus = get_data_cfg['gpus'] + voc_names = get_data_cfg['names'] + + num_workers = int(get_data_cfg['num_workers']) + cfg_model = get_data_cfg['cfg_model'] + train_path = get_data_cfg['train_voc'] + num_classes = int(get_data_cfg['classes']) + finetune_model = get_data_cfg['finetune_model'] + batch_size = int(get_data_cfg['batch_size']) + img_size = int(get_data_cfg['img_size']) + multi_scale = get_data_cfg['multi_scale'] + epochs = int(get_data_cfg['epochs']) + lr_step = str(get_data_cfg['lr_step']) + lr0 = float(get_data_cfg['lr0']) + + os.environ['CUDA_VISIBLE_DEVICES'] = gpus + device = select_device() + + if multi_scale == 'True': + multi_scale = True + else: + multi_scale = False + + print('data_cfg : ',data_cfg) + print('voc.data config len : ',len(get_data_cfg)) + print('gpus : ',gpus) + print('num_workers : ',num_workers) + print('model : ',cfg_model) + print('finetune_model : ',finetune_model) + print('train_voc_path : ',train_path) + print('num_classes : ',num_classes) + print('batch_size : ',batch_size) + print('img_size : ',img_size) + print('multi_scale : ',multi_scale) + print('lr_step : ',lr_step) + print('lr0 : ',lr0) + # load model + pattern_data_ = data_cfg.split("/")[-1:][0].replace(".data","") + if "-tiny" in cfg_model: + a_scalse = 416./img_size + anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)] + anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] + + model = Yolov3Tiny(num_classes,anchors = anchors_new) + # weights = './weights-yolov3-person-tiny/' + weights = './weights-yolov3-{}-tiny/'.format(pattern_data_) + else: + a_scalse = 416./img_size + anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] + anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] + model = Yolov3(num_classes,anchors = anchors_new) + weights = './weights-yolov3-{}/'.format(pattern_data_) + # mkdir save model document + if not os.path.exists(weights): + os.mkdir(weights) + + model = model.to(device) + latest = weights + 'latest_{}.pt'.format(img_size) + best = weights + 'best_{}.pt'.format(img_size) + # Optimizer + optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005) + + start_epoch = 0 + + if os.access(finetune_model,os.F_OK):# load retrain/finetune_model + print('loading yolo-v3 finetune_model ~~~~~~',finetune_model) + not_load_filters = 3*(80+5) # voc: 3*(20+5), coco: 3*(80+5)=255 + chkpt = torch.load(finetune_model, map_location=device) + model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters}, strict=False) + # model.load_state_dict(chkpt['model']) + if 'coco' not in finetune_model: + start_epoch = chkpt['epoch'] + if chkpt['optimizer'] is not None: + optimizer.load_state_dict(chkpt['optimizer']) + best_loss = chkpt['best_loss'] + + + # Set scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k) gamma:学习率下降的乘数因子 + milestones=[int(i) for i in lr_step.split(",")] + print('milestones : ',milestones) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(i) for i in lr_step.split(",")], gamma=0.1, + last_epoch=start_epoch - 1) + + # Dataset + print('multi_scale : ',multi_scale) + dataset = LoadImagesAndLabels(train_path, voc_names = voc_names,batch_size=batch_size, img_size=img_size, augment=True, multi_scale=multi_scale) + print('--------------->>> imge num : ',dataset.__len__()) + # Dataloader + dataloader = DataLoader(dataset, + batch_size=batch_size, + num_workers=num_workers, + shuffle=True, + pin_memory=False, + drop_last = False, + collate_fn=dataset.collate_fn) + + # Start training + t = time.time() + # model_info(model)# 打印模型信息 + nB = len(dataloader) + n_burnin = min(round(nB / 5 + 3), 1000) # burn-in batches + + best_loss = float('inf') + test_loss = float('inf') + + flag_start = False + + for epoch in range(0, epochs): + + print(' ~~~~') + model.train() + + if flag_start: + scheduler.step() + flag_start = True + + mloss = defaultdict(float) # mean loss + for i, (imgs, targets, img_path_, _) in enumerate(dataloader): + multi_size = imgs.size() + imgs = imgs.to(device) + targets = targets.to(device) + + nt = len(targets) + if nt == 0: # if no targets continue + continue + + # SGD burn-in + if epoch == 0 and i <= n_burnin: + lr = lr0 * (i / n_burnin) ** 4 + for x in optimizer.param_groups: + x['lr'] = lr + + # Run model + pred = model(imgs) + + # Build targets + target_list = build_targets(model, targets) + + # Compute loss + loss, loss_dict = compute_loss(pred, target_list) + + # Compute gradient + loss.backward() + + # Accumulate gradient for x batches before optimizing + if (i + 1) % accumulate == 0 or (i + 1) == nB: + optimizer.step() + optimizer.zero_grad() + + # Running epoch-means of tracked metrics + for key, val in loss_dict.items(): + mloss[key] = (mloss[key] * i + val) / (i + 1) + + print(' Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, wh {:.3f}, ' + 'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'.format(epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3] + , nt, scheduler.get_lr()[0], mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], time.time() - t), + end = '\r') + + s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'], + mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt, time.time() - t) + t = time.time() + print() + # Create checkpoint + chkpt = {'epoch': epoch, + 'best_loss': best_loss, + 'model': model.module.state_dict() if type( + model) is nn.parallel.DistributedDataParallel else model.state_dict(), + 'optimizer': optimizer.state_dict()} + + # Save latest checkpoint + torch.save(chkpt, latest) + + # Save best checkpoint + if best_loss == test_loss and epoch%5 == 0: + torch.save(chkpt, best) + + # Save backup every 10 epochs (optional) + if True:#epoch > 0 and epoch % 5 == 0: + torch.save(chkpt, weights + 'yoloV3_{}_epoch_{}.pt'.format(img_size,epoch+1)) + + # Delete checkpoint + del chkpt +#------------------------------------------------------------------------------- +if __name__ == '__main__': + + train(data_cfg="cfg/hand.data") + # train(data_cfg = "cfg/face.data") + # train(data_cfg = "cfg/person.data") + # train(data_cfg = "cfg/helmet.data") + # train(data_cfg = "cfg/transport.data") + + + print('well done ~ ') diff --git a/utils/datasets_voc.py b/utils/datasets_voc.py new file mode 100644 index 0000000..b94bef2 --- /dev/null +++ b/utils/datasets_voc.py @@ -0,0 +1,329 @@ +# date:2021-06 +# Author: Eric.Lee +# function: dataloader data of voc format + +import glob +import math +import os +import random +import shutil +from pathlib import Path +from PIL import Image +from tqdm import tqdm +import cv2 +import numpy as np +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +import xml.etree.cElementTree as et + +def xyxy2xywh(x): + # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 + y[:, 2] = x[:, 2] - x[:, 0] + y[:, 3] = x[:, 3] - x[:, 1] + return y + + +def xywh2xyxy(x): + # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y +def read_label_xml(path_label): + tree=et.parse(path_label) + root=tree.getroot() + obj_num = 0 + for Object in root.findall('object'): + name=Object.find('name').text + obj_num += 1 + return obj_num +class LoadImagesAndLabels(Dataset): # for training/testing + def __init__(self, path,voc_names, batch_size, img_size=416, augment=True, multi_scale=False): + print('LoadImagesAndLabels init : ',path) + # 解析标签 + with open(voc_names, 'r') as f: + label_map = f.readlines() + label_voc_dict = {} # 名称转数字标签 + obj_num_sum = 0 # 物体总数量 + for i in range(len(label_map)): + label_map[i] = label_map[i].strip() + print(i,') ',label_map[i]) + label_voc_dict[label_map[i]] = i + + print("label_voc_dict : {}".format(label_voc_dict)) + img_files = [] + label_files = [] + for file in os.listdir(path): + if ".jpg" in file: + path_img = path + file + path_label = path_img.replace(".jpg",".xml") + if not os.access(path_label,os.F_OK): + continue + obj_num = read_label_xml(path_label) + if obj_num == 0 : # 检查 xml 标注文件是否为 空 + continue + obj_num_sum += obj_num + img_files.append(path_img) + label_files.append(path_label) + + self.label_voc_dict = label_voc_dict + self.img_files = img_files + assert len(self.img_files) > 0, 'No images found in %s' % path + self.img_size = img_size + self.batch_size = batch_size + self.multi_scale = multi_scale + self.augment = augment + self.scale_index = 0 + if self.multi_scale: + self.img_size = img_size # initiate with maximum multi_scale size, in case of out of memory + print("Multi scale images training, init img_size", self.img_size) + else: + print("Fixed scale images, img_size", self.img_size) + self.label_files = label_files + + print("init voc data_iter done ~") + print("obj_num_sum : {}".format(obj_num_sum)) + + def __len__(self): + return len(self.img_files) + + def __getitem__(self, index): + + # if self.multi_scale and (index % self.batch_size == 0) and index != 0: + if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0: + # self.img_size = random.choice(range(11, 18)) * 32 + self.img_size = random.choice(range(12, 15)) * 32 + # print("++++++ change img_size, index:", self.img_size, index) + if self.multi_scale: + self.scale_index += 1 + if self.scale_index >= (100*self.batch_size): + self.scale_index = 0 + + + img_path = self.img_files[index] + label_path = self.label_files[index] + + img = cv2.imread(img_path) # BGR + # print("img shape",img.shape) + assert img is not None, 'File Not Found ' + img_path + + augment_hsv = random.random() < 0.5 # hsv_aug prob = 0.5 + if self.augment and augment_hsv: + # SV augmentation by 50% + fraction = 0.50 # must be < 1.0 + img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + S = img_hsv[:, :, 1].astype(np.float32) + V = img_hsv[:, :, 2].astype(np.float32) + + a = (random.random() * 2 - 1) * fraction + 1 # a in [-0,5, 1.5] + S *= a + if a > 1: + np.clip(S, None, 255, out=S) + + a = (random.random() * 2 - 1) * fraction + 1 + V *= a + if a > 1: + np.clip(V, None, 255, out=V) + + img_hsv[:, :, 1] = S # .astype(np.uint8) + img_hsv[:, :, 2] = V # .astype(np.uint8) + cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) + + h, w, _ = img.shape + img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment) + # cv2.imshow("img",img) + # cv2.waitKey(1) + # Load labels + #----------------------------------------------------------------------- + tree=et.parse(label_path) + root=tree.getroot() + labels = [] + x = [] + for Object in root.findall('object'): + name_=Object.find('name').text + + bndbox=Object.find('bndbox') + xmin= np.float32((bndbox.find('xmin').text)) + ymin= np.float32((bndbox.find('ymin').text)) + xmax= np.float32((bndbox.find('xmax').text)) + ymax= np.float32((bndbox.find('ymax').text)) + # + xmin = np.clip(xmin,0,w-1) + ymin = np.clip(ymin,0,h-1) + xmax = np.clip(xmax,0,w-1) + ymax = np.clip(ymax,0,h-1) + # + + x_mid = (xmax + xmin)/2./float(w) + y_mid = (ymax + ymin)/2./float(h) + + w_box = (xmax-xmin)/float(w) + h_box = (ymax-ymin)/float(h) + + x.append((self.label_voc_dict[name_],x_mid,y_mid,w_box,h_box)) + x = np.array(x, dtype=np.float32) + # print(x) + if x.size > 0: + labels = x.copy() + labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw + labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh + labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw + labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh + #----------------------------------------------------------------------- + # labels = [] + # if os.path.isfile(label_path): + # with open(label_path, 'r') as file: + # lines = file.read().splitlines() + # + # x = np.array([x.split() for x in lines], dtype=np.float32) + # if x.size > 0: + # # Normalized xywh to pixel xyxy format + # labels = x.copy() + # labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw + # labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh + # labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw + # labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh + + # Augment image and labels + if self.augment: + img, labels = random_affine(img, labels, degrees=(-30, 30), translate=(0.10, 0.10), scale=(0.9, 1.1)) + + nL = len(labels) # number of labels + if nL: + # convert xyxy to xywh + labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ,且 归一化 + + if self.augment: + # random left-right flip + lr_flip = True + if lr_flip and random.random() > 0.5: + img = np.fliplr(img) + if nL: + labels[:, 1] = 1 - labels[:, 1] + + # random up-down flip + ud_flip = True + if ud_flip and random.random() > 0.5: + img = np.flipud(img) + if nL: + labels[:, 2] = 1 - labels[:, 2] + + labels_out = torch.zeros((nL, 6))# 加了 一个 batch size + if nL: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Normalize + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + + return torch.from_numpy(img), labels_out, img_path, (h, w) + + @staticmethod + def collate_fn(batch): + img, label, path, hw = list(zip(*batch)) # transposed + for i, l in enumerate(label): + l[:, 0] = i # 获取 物体的 归属于 图片 的 index + return torch.stack(img, 0), torch.cat(label, 0), path, hw + + +def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)): + # Resize a rectangular image to a padded square + shape = img.shape[:2] # shape = [height, width] + ratio = float(height) / max(shape) # ratio = old / new + new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) + dw = (height - new_shape[0]) / 2 # width padding + dh = (height - new_shape[1]) / 2 # height padding + top, bottom = round(dh - 0.1), round(dh + 0.1) + left, right = round(dw - 0.1), round(dw + 0.1) + # resize img + if augment: + interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, + None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, + cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]) + if interpolation is None: + img = cv2.resize(img, new_shape) + else: + img = cv2.resize(img, new_shape, interpolation=interpolation) + else: + img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST) + # print("resize time:",time.time()-s1) + + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square + return img, ratio, dw, dh + + +def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), + borderValue=(127.5, 127.5, 127.5)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 + + if targets is None: + targets = [] + border = 0 # width of added border (optional) + height = max(img.shape[0], img.shape[1]) + border * 2 + + # Rotation and Scale + R = np.eye(3) + a = random.random() * (degrees[1] - degrees[0]) + degrees[0] + # a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations + s = random.random() * (scale[1] - scale[0]) + scale[0] + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) + + # Translation + T = np.eye(3) + T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels) + T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg) + + M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! + imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR, + borderValue=borderValue) # BGR order borderValue + + # Return warped points also + if len(targets) > 0: + n = targets.shape[0] + points = targets[:, 1:5].copy() + area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1]) + + # warp points + xy = np.ones((n * 4, 3)) + xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = (xy @ M.T)[:, :2].reshape(n, 8) + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + + # apply angle-based reduction of bounding boxes + radians = a * math.pi / 180 + reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 + x = (xy[:, 2] + xy[:, 0]) / 2 + y = (xy[:, 3] + xy[:, 1]) / 2 + w = (xy[:, 2] - xy[:, 0]) * reduction + h = (xy[:, 3] - xy[:, 1]) * reduction + xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T + + # reject warped points outside of image + np.clip(xy, 0, height, out=xy) + w = xy[:, 2] - xy[:, 0] + h = xy[:, 3] - xy[:, 1] + area = w * h + ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) + i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) + + targets = targets[i] + targets[:, 1:5] = xy[i] + + return imw, targets -- GitLab