update

304235b6 · Eric.Lee2021 · d018bd5f · 304235b6 · 304235b6 · 304235b6
9 changed file
--- a/README.md
+++ b/README.md
+# YOLO V3
--- a/cfg/hand.data
+++ b/cfg/hand.data
+cfg_model=yolo
+classes=1
+gpus = 0
+num_workers = 12
+batch_size = 8
+img_size = 416
+multi_scale = True
+epochs = 100
+train=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt
+valid=D:/m_cc/yolov3_pytorch/datasets_fusion_hand_train/anno/train.txt
+names=./cfg/hand.names
+#finetune_model=./finetune_model/yolov3_coco.pt
+#finetune_model = ./weights-yolov3/latest.pt
+finetune_model = ./weights-yolov3-hand/latest_416.pt
+#finetune_model = ./weights-yolov3-face-tiny/latest_416.pt
+lr_step = 20,50,80
+lr0 = 0.0001
--- a/cfg/hand.names
+++ b/cfg/hand.names
+Hand
--- a/predict.py
+++ b/predict.py
+#coding:utf-8
+# date:2019-08
+# Author: Eric.Lee
+# function: predict camera
+import argparse
+import time
+import os
+import torch
+from utils.datasets import *
+from utils.utils import *
+from utils.parse_config import parse_data_cfg
+from yolov3 import Yolov3, Yolov3Tiny
+from utils.torch_utils import select_device
+# os.environ['CUDA_VISIBLE_DEVICES'] = "0"
+
+def process_data(img, img_size=416):# 图像预处理
+    img, _, _, _ = letterbox(img, height=img_size)
+    # Normalize RGB
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
+    img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+    img /= 255.0  # 0 - 255 to 0.0 - 1.0
+    return img
+
+def show_model_param(model):
+    params = list(model.parameters())
+    k = 0
+    for i in params:
+        l = 1
+        for j in i.size():
+            l *= j
+        print("该层的结构: {}, 参数和: {}".format(str(list(i.size())), str(l)))
+        k = k + l
+    print("----------------------")
+    print("总参数数量和: " + str(k))
+def refine_hand_bbox(bbox,img_shape):
+    height,width,_ = img_shape
+
+    x1,y1,x2,y2 = bbox
+
+    expand_w = (x2-x1)
+    expand_h = (y2-y1)
+
+    x1 -= expand_w*0.06
+    y1 -= expand_h*0.1
+    x2 += expand_w*0.06
+    y2 += expand_h*0.1
+
+    x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)
+
+    x1 = int(max(0,x1))
+    y1 = int(max(0,y1))
+    x2 = int(min(x2,width-1))
+    y2 = int(min(y2,height-1))
+
+    return (x1,y1,x2,y2)
+def detect(
+        model_path,
+        root_path,
+        cfg,
+        data_cfg,
+        img_size=416,
+        conf_thres=0.5,
+        nms_thres=0.5,
+):
+    classes = load_classes(parse_data_cfg(data_cfg)['names'])
+    num_classes = len(classes)
+
+    # Initialize model
+    weights = model_path
+    if "-tiny" in cfg:
+        a_scalse = 416./img_size
+        anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
+        anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
+
+        model = Yolov3Tiny(num_classes,anchors = anchors_new)
+
+    else:
+        a_scalse = 416./img_size
+        anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
+        anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]
+        model = Yolov3(num_classes,anchors = anchors_new)
+
+    show_model_param(model)# 显示模型参数
+
+    device = select_device() # 运行硬件选择
+    use_cuda = torch.cuda.is_available()
+    # Load weights
+    if os.access(weights,os.F_OK):# 判断模型文件是否存在
+        model.load_state_dict(torch.load(weights, map_location=device)['model'])
+    else:
+        print('error model not exists')
+        return False
+    model.to(device).eval()#模型模式设置为 eval
+
+    colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, num_classes + 1)][::-1]
+
+    video_capture = cv2.VideoCapture(0)
+    #-------------------------------------------------
+    while True:
+        ret, im0 = video_capture.read()
+        if ret:
+            t = time.time()
+            img = process_data(im0, img_size)
+            if use_cuda:
+                torch.cuda.synchronize()
+            t1 = time.time()
+            print("process time:", t1-t)
+            img = torch.from_numpy(img).unsqueeze(0).to(device)
+
+            pred, _ = model(img)#图片检测
+            if use_cuda:
+                torch.cuda.synchronize()
+            t2 = time.time()
+            print("inference time:", t2-t1)
+            detections = non_max_suppression(pred, conf_thres, nms_thres)[0] # nms
+            if use_cuda:
+                torch.cuda.synchronize()
+            t3 = time.time()
+            print("get res time:", t3-t2)
+            if detections is None or len(detections) == 0:
+                cv2.namedWindow('image',0)
+                cv2.imshow("image", im0)
+                key = cv2.waitKey(1)
+                if key == 27:
+                    break
+                continue
+            # Rescale boxes from 416 to true image size
+            detections[:, :4] = scale_coords(img_size, detections[:, :4], im0.shape).round()
+            result = []
+            for res in detections:
+                result.append((classes[int(res[-1])], float(res[4]), [int(res[0]), int(res[1]), int(res[2]), int(res[3])]))
+            if use_cuda:
+                torch.cuda.synchronize()
+
+            # print(result)
+
+            for r in result:
+                print(r)
+
+            # Draw bounding boxes and labels of detections
+            for *xyxy, conf, cls_conf, cls in detections:
+                label = '%s %.2f' % (classes[int(cls)], conf)
+                # xyxy = refine_hand_bbox(xyxy,im0.shape)
+                plot_one_box(xyxy, im0, label=label, color=(255,255,0))
+
+            s2 = time.time()
+            print("detect time: {} \n".format(s2 - t))
+
+            str_fps = ("{:.2f} Fps".format(1./(s2 - t+0.00001)))
+            cv2.putText(im0, str_fps, (5,im0.shape[0]-3),cv2.FONT_HERSHEY_DUPLEX, 0.9, (255, 0, 255),4)
+            cv2.putText(im0, str_fps, (5,im0.shape[0]-3),cv2.FONT_HERSHEY_DUPLEX, 0.9, (255, 255, 0),1)
+
+            cv2.namedWindow('image',0)
+            cv2.imshow("image", im0)
+            key = cv2.waitKey(1)
+            if key == 27:
+                break
+        else:
+            break
+
+    cv2.destroyAllWindows()
+
+if __name__ == '__main__':
+    pattern = 'yolo'
+    if "-tiny" in pattern:
+        model_path = './weights-yolov3-person-tiny/latest_320.pt' # 检测模型路径
+        root_path = './test_images/'# 测试文件夹
+        model_cfg = pattern # 模型类型
+    else:
+        model_path = './weights-yolov3-hand/latest_416.pt' # 检测模型路径
+        root_path = './test_images/'# 测试文件夹
+        model_cfg = 'yolov3' # 模型类型
+
+    voc_config = 'cfg/hand.data' # 模型相关配置文件
+    img_size = 416 # 图像尺寸
+    conf_thres = 0.25# 检测置信度
+    nms_thres = 0.45 # nms 阈值
+
+    with torch.no_grad():#设置无梯度运行
+        detect(
+            model_path = model_path,
+            root_path = root_path,
+            cfg = model_cfg,
+            data_cfg = voc_config,
+            img_size=img_size,
+            conf_thres=conf_thres,
+            nms_thres=nms_thres,
+        )
--- a/utils/__init__.py
+++ b/utils/__init__.py
--- a/utils/datasets.py
+++ b/utils/datasets.py
+import glob
+import math
+import os
+import random
+import shutil
+from pathlib import Path
+from PIL import Image
+# import matplotlib.pyplot as plt
+from tqdm import tqdm
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data import DataLoader
+
+def xyxy2xywh(x):
+    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2
+    y[:, 2] = x[:, 2] - x[:, 0]
+    y[:, 3] = x[:, 3] - x[:, 1]
+    return y
+
+
+def xywh2xyxy(x):
+    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2
+    y[:, 1] = x[:, 1] - x[:, 3] / 2
+    y[:, 2] = x[:, 0] + x[:, 2] / 2
+    y[:, 3] = x[:, 1] + x[:, 3] / 2
+    return y
+
+
+class LoadImages:  # for inference
+    def __init__(self, path, img_size=416):
+        self.height = img_size
+        img_formats = ['.jpg', '.jpeg', '.png', '.tif']
+        vid_formats = ['.mov', '.avi', '.mp4']
+
+        files = []
+        if os.path.isdir(path):
+            files = sorted(glob.glob('%s/*.*' % path))
+        elif os.path.isfile(path):
+            files = [path]
+
+        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
+        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
+        nI, nV = len(images), len(videos)
+
+        self.files = images + videos
+        self.nF = nI + nV  # number of files
+        self.video_flag = [False] * nI + [True] * nV
+        self.mode = 'images'
+        if any(videos):
+            self.new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        assert self.nF > 0, 'No images or videos found in ' + path
+
+    def __iter__(self):
+        self.count = 0
+        return self
+
+    def __next__(self):
+        if self.count == self.nF:
+            raise StopIteration
+        path = self.files[self.count]
+
+        if self.video_flag[self.count]:
+            # Read video
+            self.mode = 'video'
+            ret_val, img0 = self.cap.read()
+            if not ret_val:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nF:  # last video
+                    raise StopIteration
+                else:
+                    path = self.files[self.count]
+                    self.new_video(path)
+                    ret_val, img0 = self.cap.read()
+
+            self.frame += 1
+            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
+
+        else:
+            # Read image
+            self.count += 1
+            img0 = cv2.imread(path)  # BGR
+            assert img0 is not None, 'File Not Found ' + path
+            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
+
+        # Padded resize
+        img, _, _, _ = letterbox(img0, height=self.height)
+
+        # Normalize RGB
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
+        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+
+        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
+        return path, img, img0, self.cap
+
+    def new_video(self, path):
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+    def __len__(self):
+        return self.nF  # number of files
+
+
+class LoadWebcam:  # for inference
+    def __init__(self, img_size=416):
+        self.cam = cv2.VideoCapture(0)
+        self.height = img_size
+
+    def __iter__(self):
+        self.count = -1
+        return self
+
+    def __next__(self):
+        self.count += 1
+        if cv2.waitKey(1) == 27:  # esc to quit
+            cv2.destroyAllWindows()
+            raise StopIteration
+
+        # Read image
+        ret_val, img0 = self.cam.read()
+        assert ret_val, 'Webcam Error'
+        img_path = 'webcam_%g.jpg' % self.count
+        img0 = cv2.flip(img0, 1)  # flip left-right
+
+        # Padded resize
+        img, _, _, _ = letterbox(img0, height=self.height)
+
+        # Normalize RGB
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
+        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+
+        return img_path, img, img0, self.cam
+
+    def __len__(self):
+        return 0
+
+
+class LoadImagesAndLabels(Dataset):  # for training/testing
+    def __init__(self, path, batch_size, img_size=416, augment=True, multi_scale=False):
+        print('LoadImagesAndLabels init : ',path)
+        with open(path, 'r') as file:
+            img_files = file.read().splitlines()
+            img_files = list(filter(lambda x: len(x) > 0, img_files))
+        np.random.shuffle(img_files)  # shuffle img_list
+        print("shuffle image...")
+        self.img_files = img_files
+        assert len(self.img_files) > 0, 'No images found in %s' % path
+        self.img_size = img_size
+        self.batch_size = batch_size
+        self.multi_scale = multi_scale
+        self.augment = augment
+        self.scale_index = 0
+        if self.multi_scale:
+            self.img_size = img_size  # initiate with maximum multi_scale size, in case of out of memory
+            print("Multi scale images training, init img_size", self.img_size)
+        else:
+            print("Fixed scale images, img_size", self.img_size)
+        self.label_files = [
+            x.replace('images', 'labels').replace("JPEGImages", 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
+            for x in self.img_files]
+
+    def __len__(self):
+        return len(self.img_files)
+
+    def __getitem__(self, index):
+
+        # if self.multi_scale and (index % self.batch_size == 0) and index != 0:
+        if self.multi_scale and (self.scale_index % self.batch_size == 0)and self.scale_index != 0:
+            # self.img_size = random.choice(range(11, 18)) * 32
+            self.img_size = random.choice(range(11, 16)) * 32
+            # print("++++++ change img_size, index:", self.img_size, index)
+        if self.multi_scale:
+            self.scale_index += 1
+            if self.scale_index >= (100*self.batch_size):
+                self.scale_index = 0
+
+
+        img_path = self.img_files[index]
+        label_path = self.label_files[index]
+
+        img = cv2.imread(img_path)  # BGR
+        # print("img shape",img.shape)
+        assert img is not None, 'File Not Found ' + img_path
+
+        augment_hsv = random.random() < 0.5  # hsv_aug prob = 0.5
+        if self.augment and augment_hsv:
+            # SV augmentation by 50%
+            fraction = 0.50  # must be < 1.0
+            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+            S = img_hsv[:, :, 1].astype(np.float32)
+            V = img_hsv[:, :, 2].astype(np.float32)
+
+            a = (random.random() * 2 - 1) * fraction + 1  # a in [-0,5, 1.5]
+            S *= a
+            if a > 1:
+                np.clip(S, None, 255, out=S)
+
+            a = (random.random() * 2 - 1) * fraction + 1
+            V *= a
+            if a > 1:
+                np.clip(V, None, 255, out=V)
+
+            img_hsv[:, :, 1] = S  # .astype(np.uint8)
+            img_hsv[:, :, 2] = V  # .astype(np.uint8)
+            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
+
+        h, w, _ = img.shape
+        img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment)
+
+        # Load labels
+        labels = []
+        if os.path.isfile(label_path):
+            with open(label_path, 'r') as file:
+                lines = file.read().splitlines()
+
+            x = np.array([x.split() for x in lines], dtype=np.float32)
+            if x.size > 0:
+                # Normalized xywh to pixel xyxy format
+                labels = x.copy()
+                labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
+                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
+                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
+                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
+
+        # Augment image and labels
+        if self.augment:
+            img, labels = random_affine(img, labels, degrees=(-30, 30), translate=(0.10, 0.10), scale=(0.9, 1.1))
+
+        nL = len(labels)  # number of labels
+        if nL:
+            # convert xyxy to xywh
+            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 转化 格式 ，且 归一化
+
+        if self.augment:
+            # random left-right flip
+            lr_flip = True
+            if lr_flip and random.random() > 0.5:
+                img = np.fliplr(img)
+                if nL:
+                    labels[:, 1] = 1 - labels[:, 1]
+
+            # random up-down flip
+            ud_flip = False
+            if ud_flip and random.random() > 0.5:
+                img = np.flipud(img)
+                if nL:
+                    labels[:, 2] = 1 - labels[:, 2]
+
+        labels_out = torch.zeros((nL, 6))# 加了 一个 batch size
+        if nL:
+            labels_out[:, 1:] = torch.from_numpy(labels)
+
+        # Normalize
+        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+
+        return torch.from_numpy(img), labels_out, img_path, (h, w)
+
+    @staticmethod
+    def collate_fn(batch):
+        img, label, path, hw = list(zip(*batch))  # transposed
+        for i, l in enumerate(label):
+            l[:, 0] = i  # 获取 物体的 归属于 图片 的 index
+        return torch.stack(img, 0), torch.cat(label, 0), path, hw
+
+
+def letterbox(img, height=416, augment=False, color=(127.5, 127.5, 127.5)):
+    # Resize a rectangular image to a padded square
+    shape = img.shape[:2]  # shape = [height, width]
+    ratio = float(height) / max(shape)  # ratio  = old / new
+    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
+    dw = (height - new_shape[0]) / 2  # width padding
+    dh = (height - new_shape[1]) / 2  # height padding
+    top, bottom = round(dh - 0.1), round(dh + 0.1)
+    left, right = round(dw - 0.1), round(dw + 0.1)
+    # resize img
+    if augment:
+        interpolation = np.random.choice([None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+                                          None, cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+                                          cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4])
+        if interpolation is None:
+            img = cv2.resize(img, new_shape)
+        else:
+            img = cv2.resize(img, new_shape, interpolation=interpolation)
+    else:
+        img = cv2.resize(img, new_shape, interpolation=cv2.INTER_NEAREST)
+    # print("resize time:",time.time()-s1)
+
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded square
+    return img, ratio, dw, dh
+
+
+def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
+                  borderValue=(127.5, 127.5, 127.5)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
+
+    if targets is None:
+        targets = []
+    border = 0  # width of added border (optional)
+    height = max(img.shape[0], img.shape[1]) + border * 2
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
+    # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
+    s = random.random() * (scale[1] - scale[0]) + scale[0]
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
+    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)
+
+    M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
+    imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
+                              borderValue=borderValue)  # BGR order borderValue
+
+    # Return warped points also
+    if len(targets) > 0:
+        n = targets.shape[0]
+        points = targets[:, 1:5].copy()
+        area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
+
+        # warp points
+        xy = np.ones((n * 4, 3))
+        xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+        xy = (xy @ M.T)[:, :2].reshape(n, 8)
+
+        # create new boxes
+        x = xy[:, [0, 2, 4, 6]]
+        y = xy[:, [1, 3, 5, 7]]
+        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+        # apply angle-based reduction of bounding boxes
+        radians = a * math.pi / 180
+        reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
+        x = (xy[:, 2] + xy[:, 0]) / 2
+        y = (xy[:, 3] + xy[:, 1]) / 2
+        w = (xy[:, 2] - xy[:, 0]) * reduction
+        h = (xy[:, 3] - xy[:, 1]) * reduction
+        xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+        # reject warped points outside of image
+        np.clip(xy, 0, height, out=xy)
+        w = xy[:, 2] - xy[:, 0]
+        h = xy[:, 3] - xy[:, 1]
+        area = w * h
+        ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
+        i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
+
+        targets = targets[i]
+        targets[:, 1:5] = xy[i]
+
+    return imw, targets
--- a/utils/parse_config.py
+++ b/utils/parse_config.py
+def parse_model_cfg(path):
+    """Parses the yolo-v3 layer configuration file and returns module definitions"""
+    file = open(path, 'r')
+    lines = file.read().split('\n')
+    lines = [x for x in lines if x and not x.startswith('#')]
+    lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
+    module_defs = []
+    for line in lines:
+        if line.startswith('['):  # This marks the start of a new block
+            module_defs.append({})
+            module_defs[-1]['type'] = line[1:-1].rstrip()
+            if module_defs[-1]['type'] == 'convolutional':
+                module_defs[-1]['batch_normalize'] = 0
+        else:
+            key, value = line.split("=")
+            value = value.strip()
+            module_defs[-1][key.rstrip()] = value.strip()
+
+    return module_defs
+
+
+def parse_data_cfg(path):
+    """Parses the data configuration file"""
+    print('data_cfg ： ',path)
+    options = dict()
+    # options['gpus'] = '0,1,2,3'
+    # options['num_workers'] = '10'
+    with open(path, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.strip()
+        if line == '' or line.startswith('#'):
+            continue
+        key, value = line.split('=')
+        options[key.strip()] = value.strip()
+    return options
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
+import torch
+
+
+def init_seeds(seed=0):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+
+
+def select_device(force_cpu=False):
+    if force_cpu:
+        cuda = False
+        device = torch.device('cpu')
+    else:
+        cuda = torch.cuda.is_available()
+        device = torch.device('cuda:0' if cuda else 'cpu')
+
+        if torch.cuda.device_count() > 1:
+            device = torch.device('cuda' if cuda else 'cpu')
+            print('Found %g GPUs' % torch.cuda.device_count())
+            # print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21')
+            # torch.cuda.set_device(0)  # OPTIONAL: Set your GPU if multiple available
+            # print('Using ', torch.cuda.device_count(), ' GPUs')
+
+    print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
+    return device
--- a/utils/utils.py
+++ b/utils/utils.py
+import glob
+import random
+import time
+from collections import defaultdict
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+# Set printoptions
+torch.set_printoptions(linewidth=1320, precision=5, profile='long')
+np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
+
+# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
+cv2.setNumThreads(0)
+
+
+def float3(x):  # format floats to 3 decimals
+    return float(format(x, '.3f'))
+
+
+def init_seeds(seed=0):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    else:
+        torch.manual_seed(seed)
+        torch.manual_seed_all(seed)
+
+
+def load_classes(path):
+    # Loads class labels at 'path'
+    fp = open(path, 'r')
+    names = fp.read().split('\n')
+    return list(filter(None, names))  # filter removes empty strings (such as last line)
+
+
+def model_info(model):
+    # Plots a line-by-line description of a PyTorch model
+    n_p = sum(x.numel() for x in model.parameters())  # number parameters
+    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
+    print('\n%5s %60s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
+    for i, (name, p) in enumerate(model.named_parameters()):
+        # name = name.replace('module_list.', '')
+        print('%5g %60s %9s %12g %20s %10.3g %10.3g' % (
+            i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
+    print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g))
+
+
+
+def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+    # Plots one bounding box on image img
+    tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1  # line thickness
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(img, c1, c2, color, -1)  # filled
+        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+
+
+def weights_init_normal(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
+    elif classname.find('BatchNorm2d') != -1:
+        torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
+        torch.nn.init.constant_(m.bias.data, 0.0)
+
+
+def xyxy2xywh(x):
+    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2
+    y[:, 2] = x[:, 2] - x[:, 0]
+    y[:, 3] = x[:, 3] - x[:, 1]
+    return y
+
+
+def xywh2xyxy(x):
+    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2
+    y[:, 1] = x[:, 1] - x[:, 3] / 2
+    y[:, 2] = x[:, 0] + x[:, 2] / 2
+    y[:, 3] = x[:, 1] + x[:, 3] / 2
+    return y
+
+
+def scale_coords(img_size, coords, img0_shape):# image size 转为 原图尺寸
+    # Rescale x1, y1, x2, y2 from 416 to image size
+    # print('coords     : ',coords)
+    # print('img0_shape : ',img0_shape)
+    gain = float(img_size) / max(img0_shape)  # gain  = old / new
+    # print('gain       : ',gain)
+    pad_x = (img_size - img0_shape[1] * gain) / 2  # width padding
+    pad_y = (img_size - img0_shape[0] * gain) / 2  # height padding
+    # print('pad_xpad_y : ',pad_x,pad_y)
+    coords[:, [0, 2]] -= pad_x
+    coords[:, [1, 3]] -= pad_y
+    coords[:, :4] /= gain
+    coords[:, :4] = torch.clamp(coords[:, :4], min=0)# 夹紧区间最小值不为负数
+    return coords
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:    True positives (list).
+        conf:  Objectness value from 0-1 (list).
+        pred_cls: Predicted object classes (list).
+        target_cls: True object classes (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes = np.unique(target_cls)
+
+    # Create Precision-Recall curve and compute AP for each class
+    ap, p, r = [], [], []
+    for c in unique_classes:
+        i = pred_cls == c
+        n_gt = (target_cls == c).sum()  # Number of ground truth objects
+        n_p = i.sum()  # Number of predicted objects
+
+        if n_p == 0 and n_gt == 0:
+            continue
+        elif n_p == 0 or n_gt == 0:
+            ap.append(0)
+            r.append(0)
+            p.append(0)
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum()
+            tpc = (tp[i]).cumsum()
+
+            # Recall
+            recall_curve = tpc / (n_gt + 1e-16)
+            r.append(recall_curve[-1])
+
+            # Precision
+            precision_curve = tpc / (tpc + fpc)
+            p.append(precision_curve[-1])
+
+            # AP from recall-precision curve
+            ap.append(compute_ap(recall_curve, precision_curve))
+
+            # Plot
+            # plt.plot(recall_curve, precision_curve)
+
+    # Compute F1 score (harmonic mean of precision and recall)
+    p, r, ap = np.array(p), np.array(r), np.array(ap)
+    f1 = 2 * p * r / (p + r + 1e-16)
+
+    return p, r, ap, f1, unique_classes.astype('int32')
+
+
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rbgirshick/py-faster-rcnn.
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+
+    mrec = np.concatenate(([0.], recall, [1.]))
+    mpre = np.concatenate(([0.], precision, [0.]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+    box2 = box2.t()
+
+    # Get the coordinates of bounding boxes
+    if x1y1x2y2:
+        # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    else:
+        # x, y, w, h = box1
+        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+
+    # Intersection area
+    inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+                 (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+
+    # Union Area
+    union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
+                 (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
+
+    return inter_area / union_area  # iou
+
+
+def wh_iou(box1, box2):
+
+    box2 = box2.t()
+
+    # w, h = box1
+    w1, h1 = box1[0], box1[1]
+    w2, h2 = box2[0], box2[1]
+
+    # Intersection area
+    inter_area = torch.min(w1, w2) * torch.min(h1, h2)
+
+    # Union Area
+    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
+
+    return inter_area / union_area  # iou
+
+
+def compute_loss(p, targets):  # predictions, targets
+    FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
+    lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) # losses 初始化 为 0
+    txy, twh, tcls, indices = targets
+    MSE = nn.MSELoss()
+    CE = nn.CrossEntropyLoss()
+    BCE = nn.BCEWithLogitsLoss()# 多标签分类时 使用 如 [1,1,0],
+
+    # Compute losses
+    for i, pi0 in enumerate(p):  # layer i predictions, i
+        b, a, gj, gi = indices[i]  # image_idx, anchor_idx, gridx, gridy
+
+        # print(i,') b, a, gj, gi : ')
+        # print('b', b)
+        # print('a', a)
+        # print('gj', gj)
+        # print('gi', gi)
+
+        tconf = torch.zeros_like(pi0[..., 0])  # conf
+
+        # print('tconf: ',tconf.size())
+        # Compute losses
+        k = 1  # nT / bs
+        if len(b) > 0:
+            pi = pi0[b, a, gj, gi]  # predictions closest to anchors
+            tconf[b, a, gj, gi] = 1  # conf
+
+            lxy += (k * 8) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
+            lwh += (k * 4) * MSE(pi[..., 2:4], twh[i])  # wh loss
+            lcls += (k * 1) * CE(pi[..., 5:], tcls[i])  # class_conf loss
+
+        lconf += (k * 64) * BCE(pi0[..., 4], tconf)  # obj_conf loss
+    loss = lxy + lwh + lconf + lcls
+
+    # Add to dictionary
+    d = defaultdict(float)
+    losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()]
+    for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses):
+        d[name] = x
+
+    return loss, d
+
+
+def build_targets(model, targets):
+    # targets = [image, class, x, y, w, h]
+    if isinstance(model, nn.parallel.DistributedDataParallel):
+        model = model.module
+
+    txy, twh, tcls, indices = [], [], [], []
+    for i, layer in enumerate(get_yolo_layers(model)):# 遍历 3 个 yolo layer
+        # print(i,'layer ',model.module_list[layer])
+        layer = model.module_list[layer][0]
+
+        # iou of targets-anchors
+        gwh = targets[:, 4:6] * layer.nG # 以 grid 为单位的 wh
+        iou = [wh_iou(x, gwh) for x in layer.anchor_vec]
+        iou, a = torch.stack(iou, 0).max(0)  # best iou and anchor
+
+        # reject below threshold ious (OPTIONAL, increases P, lowers R)
+        reject = True
+        if reject:
+            j = iou > 0.10
+            t, a, gwh = targets[j], a[j], gwh[j]
+        else:
+            t = targets
+
+        # Indices
+        b, c = t[:, :2].long().t()  # target image, class
+        gxy = t[:, 2:4] * layer.nG
+        gi, gj = gxy.long().t()  # grid_i, grid_j
+        indices.append((b, a, gj, gi)) # img_index , anchor_index , grid_x , grid_y
+
+        # print('b, a, gj, gi : ')
+        # print('b', b)
+        # print('a', a)
+        # print('gj', gj)
+        # print('gi', gi)
+        # print('class c',c)
+
+        # XY coordinates
+        txy.append(gxy - gxy.floor())#转化为grid相对坐标
+
+        # Width and height
+        twh.append(torch.log(gwh / layer.anchor_vec[a]))  # yolo method 对数
+        # twh.append(torch.sqrt(gwh / layer.anchor_vec[a]) / 2)  # power method
+
+        # Class
+        tcls.append(c)
+        # try:
+        #     print('c.max,layer.nC: ',c.max().item() ,layer.nC)
+        # except:
+        #     pass
+        if c.shape[0]:
+            assert c.max().item() <= layer.nC, 'Target classes exceed model classes'
+
+    return txy, twh, tcls, indices
+
+
+# @profile
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
+    """
+    Removes detections with lower object confidence score than 'conf_thres'
+    Non-Maximum Suppression to further filter detections.
+    Returns detections with shape:
+        (x1, y1, x2, y2, object_conf, class_conf, class)
+    """
+
+    min_wh = 2  # (pixels) minimum box width and height
+
+    output = [None] * len(prediction)
+    for image_i, pred in enumerate(prediction):
+        # Experiment: Prior class size rejection
+        # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
+        # a = w * h  # area
+        # ar = w / (h + 1e-16)  # aspect ratio
+        # n = len(w)
+        # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
+        # shape_likelihood = np.zeros((n, 60), dtype=np.float32)
+        # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
+        # from scipy.stats import multivariate_normal
+        # for c in range(60):
+        # shape_likelihood[:, c] =
+        #   multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
+
+        # Filter out confidence scores below threshold
+        class_conf, class_pred = pred[:, 5:].max(1)  # max class_conf, index
+        pred[:, 4] *= class_conf  # finall conf = obj_conf * class_conf
+
+        i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh)
+        # s2=time.time()
+        pred2 = pred[i]
+        # print("++++++pred2 = pred[i]",time.time()-s2, pred2)
+
+        # If none are remaining => process next image
+        if len(pred2) == 0:
+            continue
+
+        # Select predicted classes
+        class_conf = class_conf[i]
+        class_pred = class_pred[i].unsqueeze(1).float()
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        pred2[:, :4] = xywh2xyxy(pred2[:, :4])
+        # pred[:, 4] *= class_conf  # improves mAP from 0.549 to 0.551
+
+        # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
+        pred2 = torch.cat((pred2[:, :5], class_conf.unsqueeze(1), class_pred), 1)
+
+        # Get detections sorted by decreasing confidence scores
+        pred2 = pred2[(-pred2[:, 4]).argsort()]
+
+        det_max = []
+        nms_style = 'MERGE'  # 'OR' (default), 'AND', 'MERGE' (experimental)
+        for c in pred2[:, -1].unique():
+            dc = pred2[pred2[:, -1] == c]  # select class c
+            dc = dc[:min(len(dc), 100)]  # limit to first 100 boxes
+
+            # Non-maximum suppression
+            if nms_style == 'OR':  # default
+                # METHOD1
+                # ind = list(range(len(dc)))
+                # while len(ind):
+                # j = ind[0]
+                # det_max.append(dc[j:j + 1])  # save highest conf detection
+                # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
+                # [ind.pop(i) for i in reversed(reject)]
+
+                # METHOD2
+                while dc.shape[0]:
+                    det_max.append(dc[:1])  # save highest conf detection
+                    if len(dc) == 1:  # Stop if we're at the last detection
+                        break
+                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
+                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold
+
+            elif nms_style == 'AND':  # requires overlap, single boxes erased
+                while len(dc) > 1:
+                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
+                    if iou.max() > 0.5:
+                        det_max.append(dc[:1])
+                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold
+
+            elif nms_style == 'MERGE':  # weighted mixture box
+                while len(dc):
+                    i = bbox_iou(dc[0], dc) > nms_thres  # iou with other boxes
+                    weights = dc[i, 4:5]
+                    dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
+                    det_max.append(dc[:1])
+                    dc = dc[i == 0]
+
+        if len(det_max):
+            det_max = torch.cat(det_max)  # concatenate
+            output[image_i] = det_max[(-det_max[:, 4]).argsort()]  # sort
+    return output
+
+
+def get_yolo_layers(model):
+    yolo_layer_index = []
+    for index, l in enumerate(model.module_list):
+        try:
+            a = l[0].img_size and l[0].nG  # only yolo layer need img_size and nG
+            # print("---"*50)
+            # print(l, index)
+            yolo_layer_index.append(index)
+        except:
+            pass
+    assert len(yolo_layer_index) > 0, "can not find yolo layer"
+    return yolo_layer_index