From 304235b6d3625f13013d01148f141be20b942c67 Mon Sep 17 00:00:00 2001 From: "Eric.Lee2021" <305141918@qq.com> Date: Tue, 2 Feb 2021 19:33:46 +0800 Subject: [PATCH] update --- README.md | 1 + cfg/hand.data | 17 ++ cfg/hand.names | 1 + predict.py | 188 ++++++++++++++++++ utils/__init__.py | 0 utils/datasets.py | 373 ++++++++++++++++++++++++++++++++++ utils/parse_config.py | 36 ++++ utils/torch_utils.py | 26 +++ utils/utils.py | 452 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 1094 insertions(+) create mode 100644 README.md create mode 100644 cfg/hand.data create mode 100644 cfg/hand.names create mode 100644 predict.py create mode 100644 utils/__init__.py create mode 100644 utils/datasets.py create mode 100644 utils/parse_config.py create mode 100644 utils/torch_utils.py create mode 100644 utils/utils.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..9691ab3 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# YOLO V3 diff --git a/cfg/hand.data b/cfg/hand.data new file mode 100644 index 0000000..d3236a4 --- /dev/null +++ b/cfg/hand.data @@ -0,0 +1,17 @@ +cfg_model=yolo +classes=1 +gpus = 0 +num_workers = 12 +batch_size = 8 +img_size = 416 +multi_scale = True +epochs = 100 +train=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt +valid=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt +names=./cfg/hand.names +#finetune_model=./finetune_model/yolov3_coco.pt +#finetune_model = ./weights-yolov3/latest.pt +finetune_model = ./weights-yolov3-hand/latest_416.pt +#finetune_model = ./weights-yolov3-face-tiny/latest_416.pt +lr_step = 20,50,80 +lr0 = 0.0001 diff --git a/cfg/hand.names b/cfg/hand.names new file mode 100644 index 0000000..d87e264 --- /dev/null +++ b/cfg/hand.names @@ -0,0 +1 @@ +Hand diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..909c045 --- /dev/null +++ b/predict.py @@ -0,0 +1,188 @@ +#coding:utf-8 +# date:2019-08 +# Author: Eric.Lee +# function: predict camera +import argparse +import time +import os +import torch +from utils.datasets import * +from utils.utils import * +from utils.parse_config import parse_data_cfg +from yolov3 import Yolov3, Yolov3Tiny +from utils.torch_utils import select_device +# os.environ['CUDA_VISIBLE_DEVICES'] = "0" + +def process_data(img, img_size=416):# 图像预处理 + img, _, _, _ = letterbox(img, height=img_size) + # Normalize RGB + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB + img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + return img + +def show_model_param(model): + params = list(model.parameters()) + k = 0 + for i in params: + l = 1 + for j in i.size(): + l *= j + print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l))) + k = k + l + print("----------------------") + print("总参数数量和: " + str(k)) +def refine_hand_bbox(bbox,img_shape): + height,width,_ = img_shape + + x1,y1,x2,y2 = bbox + + expand_w = (x2-x1) + expand_h = (y2-y1) + + x1 -= expand_w*0.06 + y1 -= expand_h*0.1 + x2 += expand_w*0.06 + y2 += expand_h*0.1 + + x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2) + + x1 = int(max(0,x1)) + y1 = int(max(0,y1)) + x2 = int(min(x2,width-1)) + y2 = int(min(y2,height-1)) + + return (x1,y1,x2,y2) +def detect( + model_path, + root_path, + cfg, + data_cfg, + img_size=416, + conf_thres=0.5, + nms_thres=0.5, +): + classes = load_classes(parse_data_cfg(data_cfg)['names']) + num_classes = len(classes) + + # Initialize model + weights = model_path + if "-tiny" in cfg: + a_scalse = 416./img_size + anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)] + anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] + + model = Yolov3Tiny(num_classes,anchors = anchors_new) + + else: + a_scalse = 416./img_size + anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] + anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] + model = Yolov3(num_classes,anchors = anchors_new) + + show_model_param(model)# 显示模型参数 + + device = select_device() # 运行硬件选择 + use_cuda = torch.cuda.is_available() + # Load weights + if os.access(weights,os.F_OK):# 判断模型文件是否存在 + model.load_state_dict(torch.load(weights, map_location=device)['model']) + else: + print('error model not exists') + return False + model.to(device).eval()#模型模式设置为 eval + + colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, num_classes + 1)][::-1] + + video_capture = cv2.VideoCapture(0) + #------------------------------------------------- + while True: + ret, im0 = video_capture.read() + if ret: + t = time.time() + img = process_data(im0, img_size) + if use_cuda: + torch.cuda.synchronize() + t1 = time.time() + print("process time:", t1-t) + img = torch.from_numpy(img).unsqueeze(0).to(device) + + pred, _ = model(img)#图片检测 + if use_cuda: + torch.cuda.synchronize() + t2 = time.time() + print("inference time:", t2-t1) + detections = non_max_suppression(pred, conf_thres, nms_thres)[0] # nms + if use_cuda: + torch.cuda.synchronize() + t3 = time.time() + print("get res time:", t3-t2) + if detections is None or len(detections) == 0: + cv2.namedWindow('image',0) + cv2.imshow("image", im0) + key = cv2.waitKey(1) + if key == 27: + break + continue + # Rescale boxes from 416 to true image size + detections[:, :4] = scale_coords(img_size, detections[:, :4], im0.shape).round() + result = [] + for res in detections: + result.append((classes[int(res[-1])], float(res[4]), [int(res[0]), int(res[1]), int(res[2]), int(res[3])])) + if use_cuda: + torch.cuda.synchronize() + + # print(result) + + for r in result: + print(r) + + # Draw bounding boxes and labels of detections + for *xyxy, conf, cls_conf, cls in detections: + label = '%s %.2f' % (classes[int(cls)], conf) + # xyxy = refine_hand_bbox(xyxy,im0.shape) + plot_one_box(xyxy, im0, label=label, color=(255,255,0)) + + s2 = time.time() + print("detect time: {} \n".format(s2 - t)) + + str_fps = ("{:.2f} Fps".format(1./(s2 - t+0.00001))) + cv2.putText(im0, str_fps, (5,im0.shape[0]-3),cv2.FONT_HERSHEY_DUPLEX, 0.9, (255, 0, 255),4) + cv2.putText(im0, str_fps, (5,im0.shape[0]-3),cv2.FONT_HERSHEY_DUPLEX, 0.9, (255, 255, 0),1) + + cv2.namedWindow('image',0) + cv2.imshow("image", im0) + key = cv2.waitKey(1) + if key == 27: + break + else: + break + + cv2.destroyAllWindows() + +if __name__ == '__main__': + pattern = 'yolo' + if "-tiny" in pattern: + model_path = './weights-yolov3-person-tiny/latest_320.pt' # 检测模型路径 + root_path = './test_images/'# 测试文件夹 + model_cfg = pattern # 模型类型 + else: + model_path = './weights-yolov3-hand/latest_416.pt' # 检测模型路径 + root_path = './test_images/'# 测试文件夹 + model_cfg = 'yolov3' # 模型类型 + + voc_config = 'cfg/hand.data' # 模型相关配置文件 + img_size = 416 # 图像尺寸 + conf_thres = 0.25# 检测置信度 + nms_thres = 0.45 # nms 阈值 + + with torch.no_grad():#设置无梯度运行 + detect( + model_path = model_path, + root_path = root_path, + cfg = model_cfg, + data_cfg = voc_config, + img_size=img_size, + conf_thres=conf_thres, + nms_thres=nms_thres, + ) diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/datasets.py b/utils/datasets.py new file mode 100644 index 0000000..35fe7d4 --- /dev/null +++ b/utils/datasets.py @@ -0,0 +1,373 @@ +import glob +import math +import os +import random +import shutil +from pathlib import Path +from PIL import Image +# import matplotlib.pyplot as plt +from tqdm import tqdm +import cv2 +import numpy as np +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader + +def xyxy2xywh(x): + # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 + y[:, 2] = x[:, 2] - x[:, 0] + y[:, 3] = x[:, 3] - x[:, 1] + return y + + +def xywh2xyxy(x): + # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y + + +class LoadImages: # for inference + def __init__(self, path, img_size=416): + self.height = img_size + img_formats = ['.jpg', '.jpeg', '.png', '.tif'] + vid_formats = ['.mov', '.avi', '.mp4'] + + files = [] + if os.path.isdir(path): + files = sorted(glob.glob('%s/*.*' % path)) + elif os.path.isfile(path): + files = [path] + + images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats] + videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats] + nI, nV = len(images), len(videos) + + self.files = images + videos + self.nF = nI + nV # number of files + self.video_flag = [False] * nI + [True] * nV + self.mode = 'images' + if any(videos): + self.new_video(videos[0]) # new video + else: + self.cap = None + assert self.nF > 0, 'No images or videos found in ' + path + + def __iter__(self): + self.count = 0 + return self + + def __next__(self): + if self.count == self.nF: + raise StopIteration + path = self.files[self.count] + + if self.video_flag[self.count]: + # Read video + self.mode = 'video' + ret_val, img0 = self.cap.read() + if not ret_val: + self.count += 1 + self.cap.release() + if self.count == self.nF: # last video + raise StopIteration + else: + path = self.files[self.count] + self.new_video(path) + ret_val, img0 = self.cap.read() + + self.frame += 1 + print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='') + + else: + # Read image + self.count += 1 + img0 = cv2.imread(path) # BGR + assert img0 is not None, 'File Not Found ' + path + print('image %g/%g %s: ' % (self.count, self.nF, path), end='') + + # Padded resize + img, _, _, _ = letterbox(img0, height=self.height) + + # Normalize RGB + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB + img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + + # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image + return path, img, img0, self.cap + + def new_video(self, path): + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + def __len__(self): + return self.nF # number of files + + +class LoadWebcam: # for inference + def __init__(self, img_size=416): + self.cam = cv2.VideoCapture(0) + self.height = img_size + + def __iter__(self): + self.count = -1 + return self + + def __next__(self): + self.count += 1 + if cv2.waitKey(1) == 27: # esc to quit + cv2.destroyAllWindows() + raise StopIteration + + # Read image + ret_val, img0 = self.cam.read() + assert ret_val, 'Webcam Error' + img_path = 'webcam_%g.jpg' % self.count + img0 = cv2.flip(img0, 1) # flip left-right + + # Padded resize + img, _, _, _ = letterbox(img0, height=self.height) + + # Normalize RGB + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB + img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + + return img_path, img, img0, self.cam + + def __len__(self): + return 0 + + +class LoadImagesAndLabels(Dataset): # for training/testing + def __init__(self, path, batch_size, img_size=416, augment=True, multi_scale=False): + print('LoadImagesAndLabels init : ',path) + with open(path, 'r') as file: + img_files = file.read().splitlines() + img_files = list(filter(lambda x: len(x) > 0, img_files)) + np.random.shuffle(img_files) # shuffle img_list + print("shuffle image...") + self.img_files = img_files + assert len(self.img_files) > 0, 'No images found in %s' % path + self.img_size = img_size + self.batch_size = batch_size + self.multi_scale = multi_scale + self.augment = augment + self.scale_index = 0 + if self.multi_scale: + self.img_size = img_size # initiate with maximum multi_scale size, in case of out of memory + print("Multi scale images training, init img_size", self.img_size) + else: + print("Fixed scale images, img_size", self.img_size) + self.label_files = [ + x.replace('images', 'labels').replace("JPEGImages", 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt') + for x in self.img_files] + + def __len__(self): + return len(self.img_files) + + def __getitem__(self, index): + + # if self.multi_scale and (index % self.batch_size == 0) and index != 0: + if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0: + # self.img_size = random.choice(range(11, 18)) * 32 + self.img_size = random.choice(range(11, 16)) * 32 + # print("++++++ change img_size, index:", self.img_size, index) + if self.multi_scale: + self.scale_index += 1 + if self.scale_index >= (100*self.batch_size): + self.scale_index = 0 + + + img_path = self.img_files[index] + label_path = self.label_files[index] + + img = cv2.imread(img_path) # BGR + # print("img shape",img.shape) + assert img is not None, 'File Not Found ' + img_path + + augment_hsv = random.random() < 0.5 # hsv_aug prob = 0.5 + if self.augment and augment_hsv: + # SV augmentation by 50% + fraction = 0.50 # must be < 1.0 + img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + S = img_hsv[:, :, 1].astype(np.float32) + V = img_hsv[:, :, 2].astype(np.float32) + + a = (random.random() * 2 - 1) * fraction + 1 # a in [-0,5, 1.5] + S *= a + if a > 1: + np.clip(S, None, 255, out=S) + + a = (random.random() * 2 - 1) * fraction + 1 + V *= a + if a > 1: + np.clip(V, None, 255, out=V) + + img_hsv[:, :, 1] = S # .astype(np.uint8) + img_hsv[:, :, 2] = V # .astype(np.uint8) + cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) + + h, w, _ = img.shape + img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment) + + # Load labels + labels = [] + if os.path.isfile(label_path): + with open(label_path, 'r') as file: + lines = file.read().splitlines() + + x = np.array([x.split() for x in lines], dtype=np.float32) + if x.size > 0: + # Normalized xywh to pixel xyxy format + labels = x.copy() + labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw + labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh + labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw + labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh + + # Augment image and labels + if self.augment: + img, labels = random_affine(img, labels, degrees=(-30, 30), translate=(0.10, 0.10), scale=(0.9, 1.1)) + + nL = len(labels) # number of labels + if nL: + # convert xyxy to xywh + labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ,且 归一化 + + if self.augment: + # random left-right flip + lr_flip = True + if lr_flip and random.random() > 0.5: + img = np.fliplr(img) + if nL: + labels[:, 1] = 1 - labels[:, 1] + + # random up-down flip + ud_flip = False + if ud_flip and random.random() > 0.5: + img = np.flipud(img) + if nL: + labels[:, 2] = 1 - labels[:, 2] + + labels_out = torch.zeros((nL, 6))# 加了 一个 batch size + if nL: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Normalize + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + + return torch.from_numpy(img), labels_out, img_path, (h, w) + + @staticmethod + def collate_fn(batch): + img, label, path, hw = list(zip(*batch)) # transposed + for i, l in enumerate(label): + l[:, 0] = i # 获取 物体的 归属于 图片 的 index + return torch.stack(img, 0), torch.cat(label, 0), path, hw + + +def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)): + # Resize a rectangular image to a padded square + shape = img.shape[:2] # shape = [height, width] + ratio = float(height) / max(shape) # ratio = old / new + new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) + dw = (height - new_shape[0]) / 2 # width padding + dh = (height - new_shape[1]) / 2 # height padding + top, bottom = round(dh - 0.1), round(dh + 0.1) + left, right = round(dw - 0.1), round(dw + 0.1) + # resize img + if augment: + interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, + None, cv2.INTER_NEAREST, cv2.INTER_LINEAR, + cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]) + if interpolation is None: + img = cv2.resize(img, new_shape) + else: + img = cv2.resize(img, new_shape, interpolation=interpolation) + else: + img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST) + # print("resize time:",time.time()-s1) + + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square + return img, ratio, dw, dh + + +def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), + borderValue=(127.5, 127.5, 127.5)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 + + if targets is None: + targets = [] + border = 0 # width of added border (optional) + height = max(img.shape[0], img.shape[1]) + border * 2 + + # Rotation and Scale + R = np.eye(3) + a = random.random() * (degrees[1] - degrees[0]) + degrees[0] + # a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations + s = random.random() * (scale[1] - scale[0]) + scale[0] + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) + + # Translation + T = np.eye(3) + T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels) + T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg) + + M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! + imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR, + borderValue=borderValue) # BGR order borderValue + + # Return warped points also + if len(targets) > 0: + n = targets.shape[0] + points = targets[:, 1:5].copy() + area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1]) + + # warp points + xy = np.ones((n * 4, 3)) + xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = (xy @ M.T)[:, :2].reshape(n, 8) + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + + # apply angle-based reduction of bounding boxes + radians = a * math.pi / 180 + reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 + x = (xy[:, 2] + xy[:, 0]) / 2 + y = (xy[:, 3] + xy[:, 1]) / 2 + w = (xy[:, 2] - xy[:, 0]) * reduction + h = (xy[:, 3] - xy[:, 1]) * reduction + xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T + + # reject warped points outside of image + np.clip(xy, 0, height, out=xy) + w = xy[:, 2] - xy[:, 0] + h = xy[:, 3] - xy[:, 1] + area = w * h + ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) + i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) + + targets = targets[i] + targets[:, 1:5] = xy[i] + + return imw, targets diff --git a/utils/parse_config.py b/utils/parse_config.py new file mode 100644 index 0000000..0aca2eb --- /dev/null +++ b/utils/parse_config.py @@ -0,0 +1,36 @@ +def parse_model_cfg(path): + """Parses the yolo-v3 layer configuration file and returns module definitions""" + file = open(path, 'r') + lines = file.read().split('\n') + lines = [x for x in lines if x and not x.startswith('#')] + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces + module_defs = [] + for line in lines: + if line.startswith('['): # This marks the start of a new block + module_defs.append({}) + module_defs[-1]['type'] = line[1:-1].rstrip() + if module_defs[-1]['type'] == 'convolutional': + module_defs[-1]['batch_normalize'] = 0 + else: + key, value = line.split("=") + value = value.strip() + module_defs[-1][key.rstrip()] = value.strip() + + return module_defs + + +def parse_data_cfg(path): + """Parses the data configuration file""" + print('data_cfg : ',path) + options = dict() + # options['gpus'] = '0,1,2,3' + # options['num_workers'] = '10' + with open(path, 'r') as fp: + lines = fp.readlines() + for line in lines: + line = line.strip() + if line == '' or line.startswith('#'): + continue + key, value = line.split('=') + options[key.strip()] = value.strip() + return options diff --git a/utils/torch_utils.py b/utils/torch_utils.py new file mode 100644 index 0000000..a4a26fd --- /dev/null +++ b/utils/torch_utils.py @@ -0,0 +1,26 @@ +import torch + + +def init_seeds(seed=0): + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def select_device(force_cpu=False): + if force_cpu: + cuda = False + device = torch.device('cpu') + else: + cuda = torch.cuda.is_available() + device = torch.device('cuda:0' if cuda else 'cpu') + + if torch.cuda.device_count() > 1: + device = torch.device('cuda' if cuda else 'cpu') + print('Found %g GPUs' % torch.cuda.device_count()) + # print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21') + # torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available + # print('Using ', torch.cuda.device_count(), ' GPUs') + + print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else '')) + return device diff --git a/utils/utils.py b/utils/utils.py new file mode 100644 index 0000000..8ea2f1d --- /dev/null +++ b/utils/utils.py @@ -0,0 +1,452 @@ +import glob +import random +import time +from collections import defaultdict +import cv2 +import numpy as np +import torch +import torch.nn as nn + + +# Set printoptions +torch.set_printoptions(linewidth=1320, precision=5, profile='long') +np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 + +# Prevent OpenCV from multithreading (to use PyTorch DataLoader) +cv2.setNumThreads(0) + + +def float3(x): # format floats to 3 decimals + return float(format(x, '.3f')) + + +def init_seeds(seed=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + else: + torch.manual_seed(seed) + torch.manual_seed_all(seed) + + +def load_classes(path): + # Loads class labels at 'path' + fp = open(path, 'r') + names = fp.read().split('\n') + return list(filter(None, names)) # filter removes empty strings (such as last line) + + +def model_info(model): + # Plots a line-by-line description of a PyTorch model + n_p = sum(x.numel() for x in model.parameters()) # number parameters + n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients + print('\n%5s %60s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) + for i, (name, p) in enumerate(model.named_parameters()): + # name = name.replace('module_list.', '') + print('%5g %60s %9s %12g %20s %10.3g %10.3g' % ( + i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) + print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g)) + + + +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + # Plots one bounding box on image img + tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1) # filled + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) + + +def weights_init_normal(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + torch.nn.init.normal_(m.weight.data, 0.0, 0.03) + elif classname.find('BatchNorm2d') != -1: + torch.nn.init.normal_(m.weight.data, 1.0, 0.03) + torch.nn.init.constant_(m.bias.data, 0.0) + + +def xyxy2xywh(x): + # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 + y[:, 2] = x[:, 2] - x[:, 0] + y[:, 3] = x[:, 3] - x[:, 1] + return y + + +def xywh2xyxy(x): + # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y + + +def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸 + # Rescale x1, y1, x2, y2 from 416 to image size + # print('coords : ',coords) + # print('img0_shape : ',img0_shape) + gain = float(img_size) / max(img0_shape) # gain = old / new + # print('gain : ',gain) + pad_x = (img_size - img0_shape[1] * gain) / 2 # width padding + pad_y = (img_size - img0_shape[0] * gain) / 2 # height padding + # print('pad_xpad_y : ',pad_x,pad_y) + coords[:, [0, 2]] -= pad_x + coords[:, [1, 3]] -= pad_y + coords[:, :4] /= gain + coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数 + return coords + + +def ap_per_class(tp, conf, pred_cls, target_cls): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (list). + conf: Objectness value from 0-1 (list). + pred_cls: Predicted object classes (list). + target_cls: True object classes (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes = np.unique(target_cls) + + # Create Precision-Recall curve and compute AP for each class + ap, p, r = [], [], [] + for c in unique_classes: + i = pred_cls == c + n_gt = (target_cls == c).sum() # Number of ground truth objects + n_p = i.sum() # Number of predicted objects + + if n_p == 0 and n_gt == 0: + continue + elif n_p == 0 or n_gt == 0: + ap.append(0) + r.append(0) + p.append(0) + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum() + tpc = (tp[i]).cumsum() + + # Recall + recall_curve = tpc / (n_gt + 1e-16) + r.append(recall_curve[-1]) + + # Precision + precision_curve = tpc / (tpc + fpc) + p.append(precision_curve[-1]) + + # AP from recall-precision curve + ap.append(compute_ap(recall_curve, precision_curve)) + + # Plot + # plt.plot(recall_curve, precision_curve) + + # Compute F1 score (harmonic mean of precision and recall) + p, r, ap = np.array(p), np.array(r), np.array(ap) + f1 = 2 * p * r / (p + r + 1e-16) + + return p, r, ap, f1, unique_classes.astype('int32') + + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rbgirshick/py-faster-rcnn. + # Arguments + recall: The recall curve (list). + precision: The precision curve (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + # correct AP calculation + # first append sentinel values at the end + + mrec = np.concatenate(([0.], recall, [1.])) + mpre = np.concatenate(([0.], precision, [0.])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def bbox_iou(box1, box2, x1y1x2y2=True): + # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 + box2 = box2.t() + + # Get the coordinates of bounding boxes + if x1y1x2y2: + # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + else: + # x, y, w, h = box1 + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + + # Intersection area + inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ + (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + + # Union Area + union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \ + (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area + + return inter_area / union_area # iou + + +def wh_iou(box1, box2): + + box2 = box2.t() + + # w, h = box1 + w1, h1 = box1[0], box1[1] + w2, h2 = box2[0], box2[1] + + # Intersection area + inter_area = torch.min(w1, w2) * torch.min(h1, h2) + + # Union Area + union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area + + return inter_area / union_area # iou + + +def compute_loss(p, targets): # predictions, targets + FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor + lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) # losses 初始化 为 0 + txy, twh, tcls, indices = targets + MSE = nn.MSELoss() + CE = nn.CrossEntropyLoss() + BCE = nn.BCEWithLogitsLoss()# 多标签分类时 使用 如 [1,1,0], + + # Compute losses + for i, pi0 in enumerate(p): # layer i predictions, i + b, a, gj, gi = indices[i] # image_idx, anchor_idx, gridx, gridy + + # print(i,') b, a, gj, gi : ') + # print('b', b) + # print('a', a) + # print('gj', gj) + # print('gi', gi) + + tconf = torch.zeros_like(pi0[..., 0]) # conf + + # print('tconf: ',tconf.size()) + # Compute losses + k = 1 # nT / bs + if len(b) > 0: + pi = pi0[b, a, gj, gi] # predictions closest to anchors + tconf[b, a, gj, gi] = 1 # conf + + lxy += (k * 8) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss + lwh += (k * 4) * MSE(pi[..., 2:4], twh[i]) # wh loss + lcls += (k * 1) * CE(pi[..., 5:], tcls[i]) # class_conf loss + + lconf += (k * 64) * BCE(pi0[..., 4], tconf) # obj_conf loss + loss = lxy + lwh + lconf + lcls + + # Add to dictionary + d = defaultdict(float) + losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()] + for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses): + d[name] = x + + return loss, d + + +def build_targets(model, targets): + # targets = [image, class, x, y, w, h] + if isinstance(model, nn.parallel.DistributedDataParallel): + model = model.module + + txy, twh, tcls, indices = [], [], [], [] + for i, layer in enumerate(get_yolo_layers(model)):# 遍历 3 个 yolo layer + # print(i,'layer ',model.module_list[layer]) + layer = model.module_list[layer][0] + + # iou of targets-anchors + gwh = targets[:, 4:6] * layer.nG # 以 grid 为单位的 wh + iou = [wh_iou(x, gwh) for x in layer.anchor_vec] + iou, a = torch.stack(iou, 0).max(0) # best iou and anchor + + # reject below threshold ious (OPTIONAL, increases P, lowers R) + reject = True + if reject: + j = iou > 0.10 + t, a, gwh = targets[j], a[j], gwh[j] + else: + t = targets + + # Indices + b, c = t[:, :2].long().t() # target image, class + gxy = t[:, 2:4] * layer.nG + gi, gj = gxy.long().t() # grid_i, grid_j + indices.append((b, a, gj, gi)) # img_index , anchor_index , grid_x , grid_y + + # print('b, a, gj, gi : ') + # print('b', b) + # print('a', a) + # print('gj', gj) + # print('gi', gi) + # print('class c',c) + + # XY coordinates + txy.append(gxy - gxy.floor())#转化为grid相对坐标 + + # Width and height + twh.append(torch.log(gwh / layer.anchor_vec[a])) # yolo method 对数 + # twh.append(torch.sqrt(gwh / layer.anchor_vec[a]) / 2) # power method + + # Class + tcls.append(c) + # try: + # print('c.max,layer.nC: ',c.max().item() ,layer.nC) + # except: + # pass + if c.shape[0]: + assert c.max().item() <= layer.nC, 'Target classes exceed model classes' + + return txy, twh, tcls, indices + + +# @profile +def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): + """ + Removes detections with lower object confidence score than 'conf_thres' + Non-Maximum Suppression to further filter detections. + Returns detections with shape: + (x1, y1, x2, y2, object_conf, class_conf, class) + """ + + min_wh = 2 # (pixels) minimum box width and height + + output = [None] * len(prediction) + for image_i, pred in enumerate(prediction): + # Experiment: Prior class size rejection + # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] + # a = w * h # area + # ar = w / (h + 1e-16) # aspect ratio + # n = len(w) + # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar) + # shape_likelihood = np.zeros((n, 60), dtype=np.float32) + # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1) + # from scipy.stats import multivariate_normal + # for c in range(60): + # shape_likelihood[:, c] = + # multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2]) + + # Filter out confidence scores below threshold + class_conf, class_pred = pred[:, 5:].max(1) # max class_conf, index + pred[:, 4] *= class_conf # finall conf = obj_conf * class_conf + + i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh) + # s2=time.time() + pred2 = pred[i] + # print("++++++pred2 = pred[i]",time.time()-s2, pred2) + + # If none are remaining => process next image + if len(pred2) == 0: + continue + + # Select predicted classes + class_conf = class_conf[i] + class_pred = class_pred[i].unsqueeze(1).float() + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + pred2[:, :4] = xywh2xyxy(pred2[:, :4]) + # pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551 + + # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred) + pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1) + + # Get detections sorted by decreasing confidence scores + pred2 = pred2[(-pred2[:, 4]).argsort()] + + det_max = [] + nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental) + for c in pred2[:, -1].unique(): + dc = pred2[pred2[:, -1] == c] # select class c + dc = dc[:min(len(dc), 100)] # limit to first 100 boxes + + # Non-maximum suppression + if nms_style == 'OR': # default + # METHOD1 + # ind = list(range(len(dc))) + # while len(ind): + # j = ind[0] + # det_max.append(dc[j:j + 1]) # save highest conf detection + # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero() + # [ind.pop(i) for i in reversed(reject)] + + # METHOD2 + while dc.shape[0]: + det_max.append(dc[:1]) # save highest conf detection + if len(dc) == 1: # Stop if we're at the last detection + break + iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes + dc = dc[1:][iou < nms_thres] # remove ious > threshold + + elif nms_style == 'AND': # requires overlap, single boxes erased + while len(dc) > 1: + iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes + if iou.max() > 0.5: + det_max.append(dc[:1]) + dc = dc[1:][iou < nms_thres] # remove ious > threshold + + elif nms_style == 'MERGE': # weighted mixture box + while len(dc): + i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes + weights = dc[i, 4:5] + dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum() + det_max.append(dc[:1]) + dc = dc[i == 0] + + if len(det_max): + det_max = torch.cat(det_max) # concatenate + output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort + return output + + +def get_yolo_layers(model): + yolo_layer_index = [] + for index, l in enumerate(model.module_list): + try: + a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG + # print("---"*50) + # print(l, index) + yolo_layer_index.append(index) + except: + pass + assert len(yolo_layer_index) > 0, "can not find yolo layer" + return yolo_layer_index -- GitLab