Initial commit

c3731591 · Glenn Jocher · c3731591 · c3731591 · c3731591 · c3731591
20 changed file
--- a/.gitattributes
+++ b/.gitattributes
+# Auto detect text files and perform LF normalization
+* text=auto
--- a/.gitignore
+++ b/.gitignore
+# Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
+*.jpg
+*.png
+*.bmp
+*.tif
+*.heic
+*.JPG
+*.PNG
+*.TIF
+*.HEIC
+*.weights
+*.pt
+*.tif.txt
+!zidane_result.jpg
+!coco_training_loss.png
+!images/*
+
+checkpoints
+temp-plot.html
+
+# MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
+*.m~
+*.mat
+!targets*.mat
+
+# GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+
+# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
+
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+Icon?
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+
+# https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/*
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/dictionaries
+.html  # Bokeh Plots
+.pg  # TensorFlow Frozen Graphs
+.avi # videos
+
+# Sensitive or high-churn files:
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+
+# Gradle:
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# CMake
+cmake-build-debug/
+cmake-build-release/
+
+# Mongo Explorer plugin:
+.idea/**/mongoSettings.xml
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
--- a/LICENSE
+++ b/LICENSE
--- a/README.md
+++ b/README.md
+<img src="https://storage.googleapis.com/ultralytics/UltralyticsLogoName1000×676.png" width="200">  
+
+# Introduction
+
+This directory contains software developed by Ultralytics LLC. For more information on Ultralytics projects please visit:
+http://www.ultralytics.com  
+
+# Description
+
+The https://github.com/ultralytics/yolov3 repo contains code to train YOLOv3 on the COCO dataset: https://cocodataset.org/#home. Credit to P.J. Reddie for YOLO (https://pjreddie.com/darknet/yolo/) and to Erik Lindernoren for the pytorch implementation this repo is based on (https://github.com/eriklindernoren/PyTorch-YOLOv3).
+
+# Requirements
+
+Python 3.6 or later with the following `pip3 install -U -r requirements.txt` packages:
+
+- `numpy`
+- `torch`
+- `opencv-python`
+
+# Running
+
+Run `train.py` to begin training. Each epoch trains on 120,000 images from the train and validate sets, and validates on 5000 images in the validation set. An Nvidia GTX 1080 Ti will run about 16 epochs per day. Loss plots for the bounding boxes, objectness and class confidence should appear similar to results shown here.
+![Alt](https://github.com/ultralytics/yolov3/blob/master/data/xview_training_loss.png "training loss")
+
+Checkpoints will be saved in `/checkpoints` directory. Run `detect.py` to apply trained weights to an image, such as `zidane.jpg` from the `data/samples` folder, shown here.
+![Alt](https://github.com/ultralytics/yolov3/blob/master/data/zidane_result.jpg "example")
+
+Run `test.py` to test the latest checkpoint on the 5000 validation images. Joseph Redmon's official YOLOv3 weights produce a mAP of .581 using this method, compared to .579 in his paper.
+
+# Contact
+
+For questions or comments please contact Glenn Jocher at glenn.jocher@ultralytics.com or visit us at http://www.ultralytics.com/contact
\ No newline at end of file
--- a/cfg/coco.data
+++ b/cfg/coco.data
+classes=80
+train=/Users/glennjocher/Downloads/DATA/coco/trainvalno5k.txt
+valid=/Users/glennjocher/Downloads/DATA/coco/5k.txt
+names=data/coco.names
+backup=backup/
+eval=coco
--- a/cfg/yolov3.cfg
+++ b/cfg/yolov3.cfg
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=16
+subdivisions=1
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
--- a/data/coco.names
+++ b/data/coco.names
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/data/coco_training_loss.png
+++ b/data/coco_training_loss.png
--- a/data/get_coco_dataset.sh
+++ b/data/get_coco_dataset.sh
+#!/bin/bash
+
+# CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
+
+# Clone COCO API
+git clone https://github.com/pdollar/coco
+cd coco
+
+mkdir images
+cd images
+
+# Download Images
+wget -c https://pjreddie.com/media/files/train2014.zip
+wget -c https://pjreddie.com/media/files/val2014.zip
+
+# Unzip
+unzip -q train2014.zip
+unzip -q val2014.zip
+
+cd ..
+
+# Download COCO Metadata
+wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
+wget -c https://pjreddie.com/media/files/coco/5k.part
+wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
+wget -c https://pjreddie.com/media/files/coco/labels.tgz
+tar xzf labels.tgz
+unzip -q instances_train-val2014.zip
+
+# Set Up Image Lists
+#paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
+#paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
+
+sudo shutdown
+
+# get xview training data
+# wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ'
+# tar -xvzf train_images.tgz
+# sudo rm -rf train_images/._*
+# lastly convert each .tif to a .bmp for faster loading in cv2
--- a/data/zidane_result.jpg
+++ b/data/zidane_result.jpg
--- a/detect.py
+++ b/detect.py
+import argparse
+import time
+
+from models import *
+from utils.datasets import *
+from utils.utils import *
+
+cuda = torch.cuda.is_available()
+device = torch.device('cuda:0' if cuda else 'cpu')
+
+parser = argparse.ArgumentParser()
+# Get data configuration
+
+# cd yolo && python3 detect.py -secondary_classifier 1
+parser.add_argument('-image_folder', type=str, default='data/samples', help='path to images')
+parser.add_argument('-output_folder', type=str, default='output', help='path to outputs')
+parser.add_argument('-plot_flag', type=bool, default=True)
+parser.add_argument('-txt_out', type=bool, default=False)
+
+parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
+parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')
+parser.add_argument('-conf_thres', type=float, default=0.8, help='object confidence threshold')
+parser.add_argument('-nms_thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
+parser.add_argument('-batch_size', type=int, default=1, help='size of the batches')
+parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')
+opt = parser.parse_args()
+print(opt)
+
+
+def detect(opt):
+    os.system('rm -rf ' + opt.output_folder)
+    os.makedirs(opt.output_folder, exist_ok=True)
+
+    # Load model
+    model = Darknet(opt.cfg, opt.img_size)
+
+    weights_path = 'checkpoints/yolov3.weights'
+    if weights_path.endswith('.weights'):  # saved in darknet format
+        load_weights(model, weights_path)
+    else:  # endswith('.pt'), saved in pytorch format
+        checkpoint = torch.load(weights_path, map_location='cpu')
+        model.load_state_dict(checkpoint['model'])
+        del checkpoint
+
+        # current = model.state_dict()
+        # saved = checkpoint['model']
+        # # 1. filter out unnecessary keys
+        # saved = {k: v for k, v in saved.items() if ((k in current) and (current[k].shape == v.shape))}
+        # # 2. overwrite entries in the existing state dict
+        # current.update(saved)
+        # # 3. load the new state dict
+        # model.load_state_dict(current)
+        # model.to(device).eval()
+        # del checkpoint, current, saved
+
+    model.to(device).eval()
+
+    # Set Dataloader
+    classes = load_classes(opt.class_path)  # Extracts class labels from file
+    dataloader = ImageFolder(opt.image_folder, batch_size=opt.batch_size, img_size=opt.img_size)
+
+    imgs = []  # Stores image paths
+    img_detections = []  # Stores detections for each image index
+    prev_time = time.time()
+    detections = None
+    for batch_i, (img_paths, img) in enumerate(dataloader):
+        print(batch_i, img.shape, end=' ')
+        preds = []
+
+        # Get detections
+        with torch.no_grad():
+            # Normal orientation
+            chip = torch.from_numpy(img).unsqueeze(0).to(device)
+            pred = model(chip)
+            pred = pred[pred[:, :, 4] > opt.conf_thres]
+
+            if len(pred) > 0:
+                preds.append(pred.unsqueeze(0))
+
+        if len(preds) > 0:
+            detections = non_max_suppression(torch.cat(preds, 1), opt.conf_thres, opt.nms_thres)
+            img_detections.extend(detections)
+            imgs.extend(img_paths)
+
+        print('Batch %d... (Done %.3fs)' % (batch_i, time.time() - prev_time))
+        prev_time = time.time()
+
+    # Bounding-box colors
+    color_list = [[random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)] for _ in range(len(classes))]
+
+    if len(img_detections) == 0:
+        return
+
+    # Iterate through images and save plot of detections
+    for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
+        print("image %g: '%s'" % (img_i, path))
+
+        if opt.plot_flag:
+            img = cv2.imread(path)
+
+        # The amount of padding that was added
+        pad_x = max(img.shape[0] - img.shape[1], 0) * (opt.img_size / max(img.shape))
+        pad_y = max(img.shape[1] - img.shape[0], 0) * (opt.img_size / max(img.shape))
+        # Image height and width after padding is removed
+        unpad_h = opt.img_size - pad_y
+        unpad_w = opt.img_size - pad_x
+
+        # Draw bounding boxes and labels of detections
+        if detections is not None:
+            unique_classes = detections[:, -1].cpu().unique()
+            bbox_colors = random.sample(color_list, len(unique_classes))
+
+            # write results to .txt file
+            results_img_path = os.path.join(opt.output_folder, path.split('/')[-1])
+            results_txt_path = results_img_path + '.txt'
+            if os.path.isfile(results_txt_path):
+                os.remove(results_txt_path)
+
+            for i in unique_classes:
+                n = (detections[:, -1].cpu() == i).sum()
+                print('%g %ss' % (n, classes[int(i)]))
+
+            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
+                # Rescale coordinates to original dimensions
+                box_h = ((y2 - y1) / unpad_h) * img.shape[0]
+                box_w = ((x2 - x1) / unpad_w) * img.shape[1]
+                y1 = (((y1 - pad_y // 2) / unpad_h) * img.shape[0]).round().item()
+                x1 = (((x1 - pad_x // 2) / unpad_w) * img.shape[1]).round().item()
+                x2 = (x1 + box_w).round().item()
+                y2 = (y1 + box_h).round().item()
+                x1, y1, x2, y2 = max(x1, 0), max(y1, 0), max(x2, 0), max(y2, 0)
+
+                # write to file
+                if opt.txt_out:
+                    with open(results_txt_path, 'a') as file:
+                        file.write(('%g %g %g %g %g %g \n') % (x1, y1, x2, y2, cls_pred, cls_conf * conf))
+
+                if opt.plot_flag:
+                    # Add the bbox to the plot
+                    label = '%s %.2f' % (classes[int(cls_pred)], cls_conf) if cls_conf > 0.05 else None
+                    color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])]
+                    plot_one_box([x1, y1, x2, y2], img, label=label, color=color, line_thickness=3)
+
+        if opt.plot_flag:
+            # Save generated image with detections
+            cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img)
+
+
+if __name__ == '__main__':
+    torch.cuda.empty_cache()
+    detect(opt)
--- a/models.py
+++ b/models.py
+from collections import defaultdict
+
+import torch.nn as nn
+
+from utils.utils import *
+from utils.parse_config import *
+
+
+def create_modules(module_defs):
+    """
+    Constructs module list of layer blocks from module configuration in module_defs
+    """
+    hyperparams = module_defs.pop(0)
+    output_filters = [int(hyperparams['channels'])]
+    module_list = nn.ModuleList()
+    for i, module_def in enumerate(module_defs):
+        modules = nn.Sequential()
+
+        if module_def['type'] == 'convolutional':
+            bn = int(module_def['batch_normalize'])
+            filters = int(module_def['filters'])
+            kernel_size = int(module_def['size'])
+            pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
+            modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
+                                                        out_channels=filters,
+                                                        kernel_size=kernel_size,
+                                                        stride=int(module_def['stride']),
+                                                        padding=pad,
+                                                        bias=not bn))
+            if bn:
+                modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters))
+            if module_def['activation'] == 'leaky':
+                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
+
+        elif module_def['type'] == 'upsample':
+            upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')
+            modules.add_module('upsample_%d' % i, upsample)
+
+        elif module_def['type'] == 'route':
+            layers = [int(x) for x in module_def["layers"].split(',')]
+            filters = sum([output_filters[layer_i] for layer_i in layers])
+            modules.add_module('route_%d' % i, EmptyLayer())
+
+        elif module_def['type'] == 'shortcut':
+            filters = output_filters[int(module_def['from'])]
+            modules.add_module("shortcut_%d" % i, EmptyLayer())
+
+        elif module_def["type"] == "yolo":
+            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
+            # Extract anchors
+            anchors = [float(x) for x in module_def["anchors"].split(",")]
+            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
+            anchors = [anchors[i] for i in anchor_idxs]
+            num_classes = int(module_def['classes'])
+            img_height = int(hyperparams['height'])
+            # Define detection layer
+            yolo_layer = YOLOLayer(anchors, num_classes, img_height, anchor_idxs)
+            modules.add_module('yolo_%d' % i, yolo_layer)
+
+        # Register module list and number of output filters
+        module_list.append(modules)
+        output_filters.append(filters)
+
+    return hyperparams, module_list
+
+
+class EmptyLayer(nn.Module):
+    """Placeholder for 'route' and 'shortcut' layers"""
+
+    def __init__(self):
+        super(EmptyLayer, self).__init__()
+
+
+class YOLOLayer(nn.Module):
+    # YOLO Layer 0
+
+    def __init__(self, anchors, nC, img_dim, anchor_idxs):
+        super(YOLOLayer, self).__init__()
+
+        anchors = [(a_w, a_h) for a_w, a_h in anchors]  # (pixels)
+        nA = len(anchors)
+
+        self.anchors = anchors
+        self.nA = nA  # number of anchors (3)
+        self.nC = nC  # number of classes (60)
+        self.bbox_attrs = 5 + nC
+        self.img_dim = img_dim  # from hyperparams in cfg file, NOT from parser
+
+        if anchor_idxs[0] == (nA * 2):  # 6
+            stride = 32
+        elif anchor_idxs[0] == nA:  # 3
+            stride = 16
+        else:
+            stride = 8
+
+        # Build anchor grids
+        nG = int(self.img_dim / stride)
+        self.grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).float()
+        self.grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).float()
+        self.scaled_anchors = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors])
+        self.anchor_w = self.scaled_anchors[:, 0:1].view((1, nA, 1, 1))
+        self.anchor_h = self.scaled_anchors[:, 1:2].view((1, nA, 1, 1))
+
+    def forward(self, p, targets=None, requestPrecision=False, epoch=None):
+        FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor
+        # device = torch.device('cuda:0' if p.is_cuda else 'cpu')
+
+        bs = p.shape[0]
+        nG = p.shape[2]
+        stride = self.img_dim / nG
+
+        if p.is_cuda and not self.grid_x.is_cuda:
+            self.grid_x, self.grid_y = self.grid_x.cuda(), self.grid_y.cuda()
+            self.anchor_w, self.anchor_h = self.anchor_w.cuda(), self.anchor_h.cuda()
+            # self.scaled_anchors = self.scaled_anchors.cuda()
+
+        # x.view(4, 650, 19, 19) -- > (4, 10, 19, 19, 65)  # (bs, anchors, grid, grid, classes + xywh)
+        p = p.view(bs, self.nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()  # prediction
+
+        # Get outputs
+        x = torch.sigmoid(p[..., 0])  # Center x
+        y = torch.sigmoid(p[..., 1])  # Center y
+        w = p[..., 2]  # Width
+        h = p[..., 3]  # Height
+        width = torch.exp(w.data) * self.anchor_w
+        height = torch.exp(h.data) * self.anchor_h
+
+        # Add offset and scale with anchors (in grid space, i.e. 0-13)
+        pred_boxes = FT(bs, self.nA, nG, nG, 4)
+        pred_conf = p[..., 4]  # Conf
+        pred_cls = p[..., 5:]  # Class
+
+        # Training
+        if targets is not None:
+            BCEWithLogitsLoss1 = nn.BCEWithLogitsLoss(size_average=False)  # version 0.4.0
+            BCEWithLogitsLoss0 = nn.BCEWithLogitsLoss()
+            # BCEWithLogitsLoss2 = nn.BCEWithLogitsLoss(size_average=True)
+            MSELoss = nn.MSELoss(size_average=False)  # version 0.4.0
+            CrossEntropyLoss = nn.CrossEntropyLoss()
+
+            if requestPrecision:
+                gx = self.grid_x[:, :, :nG, :nG]
+                gy = self.grid_y[:, :, :nG, :nG]
+                pred_boxes[..., 0] = x.data + gx - width / 2
+                pred_boxes[..., 1] = y.data + gy - height / 2
+                pred_boxes[..., 2] = x.data + gx + width / 2
+                pred_boxes[..., 3] = y.data + gy + height / 2
+
+            tx, ty, tw, th, mask, tcls, TP, FP, FN, TC = \
+                build_targets(pred_boxes, pred_conf, pred_cls, targets, self.scaled_anchors, self.nA, self.nC, nG,
+                              requestPrecision)
+            tcls = tcls[mask]
+            if x.is_cuda:
+                tx, ty, tw, th, mask, tcls = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda(), mask.cuda(), tcls.cuda()
+
+            # Mask outputs to ignore non-existing objects (but keep confidence predictions)
+            nM = mask.sum().float()
+            nGT = sum([len(x) for x in targets])
+            if nM > 0:
+                lx = 5 * MSELoss(x[mask], tx[mask])
+                ly = 5 * MSELoss(y[mask], ty[mask])
+                lw = 5 * MSELoss(w[mask], tw[mask])
+                lh = 5 * MSELoss(h[mask], th[mask])
+                lconf = 1.5 * BCEWithLogitsLoss1(pred_conf[mask], mask[mask].float())
+
+                lcls = nM * CrossEntropyLoss(pred_cls[mask], torch.argmax(tcls, 1))
+                # lcls = BCEWithLogitsLoss1(pred_cls[mask], tcls.float())
+            else:
+                lx, ly, lw, lh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0])
+
+            lconf += nM * BCEWithLogitsLoss0(pred_conf[~mask], mask[~mask].float())
+
+            loss = lx + ly + lw + lh + lconf + lcls
+            i = torch.sigmoid(pred_conf[~mask]) > 0.99
+            FPe = torch.zeros(self.nC)
+            if i.sum() > 0:
+                FP_classes = torch.argmax(pred_cls[~mask][i], 1)
+                for c in FP_classes:
+                    FPe[c] += 1
+
+            return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), \
+                   nGT, TP, FP, FPe, FN, TC
+
+        else:
+            pred_boxes[..., 0] = x.data + self.grid_x
+            pred_boxes[..., 1] = y.data + self.grid_y
+            pred_boxes[..., 2] = width
+            pred_boxes[..., 3] = height
+
+            # If not in training phase return predictions
+            output = torch.cat((pred_boxes.view(bs, -1, 4) * stride,
+                                torch.sigmoid(pred_conf.view(bs, -1, 1)), pred_cls.view(bs, -1, self.nC)), -1)
+            return output.data
+
+
+class Darknet(nn.Module):
+    """YOLOv3 object detection model"""
+
+    def __init__(self, config_path, img_size=416):
+        super(Darknet, self).__init__()
+        self.module_defs = parse_model_config(config_path)
+        self.module_defs[0]['height'] = img_size
+        self.hyperparams, self.module_list = create_modules(self.module_defs)
+        self.img_size = img_size
+        self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nGT', 'TP', 'FP', 'FPe', 'FN', 'TC']
+
+    def forward(self, x, targets=None, requestPrecision=False, epoch=None):
+        is_training = targets is not None
+        output = []
+        self.losses = defaultdict(float)
+        layer_outputs = []
+
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if module_def['type'] in ['convolutional', 'upsample']:
+                x = module(x)
+            elif module_def['type'] == 'route':
+                layer_i = [int(x) for x in module_def['layers'].split(',')]
+                x = torch.cat([layer_outputs[i] for i in layer_i], 1)
+            elif module_def['type'] == 'shortcut':
+                layer_i = int(module_def['from'])
+                x = layer_outputs[-1] + layer_outputs[layer_i]
+            elif module_def['type'] == 'yolo':
+                # Train phase: get loss
+                if is_training:
+                    x, *losses = module[0](x, targets, requestPrecision, epoch)
+                    for name, loss in zip(self.loss_names, losses):
+                        self.losses[name] += loss
+                # Test phase: Get detections
+                else:
+                    x = module(x)
+                output.append(x)
+            layer_outputs.append(x)
+
+        if is_training:
+            self.losses['nGT'] /= 3
+            self.losses['TC'] /= 3
+            metrics = torch.zeros(4, len(self.losses['FPe']))  # TP, FP, FN, target_count
+
+            ui = np.unique(self.losses['TC'])[1:]
+            for i in ui:
+                j = self.losses['TC'] == float(i)
+                metrics[0, i] = (self.losses['TP'][j] > 0).sum().float()  # TP
+                metrics[1, i] = (self.losses['FP'][j] > 0).sum().float()  # FP
+                metrics[2, i] = (self.losses['FN'][j] == 3).sum().float()  # FN
+            metrics[3] = metrics.sum(0)
+            metrics[1] += self.losses['FPe']
+
+            self.losses['TP'] = metrics[0].sum()
+            self.losses['FP'] = metrics[1].sum()
+            self.losses['FN'] = metrics[2].sum()
+            self.losses['TC'] = 0
+            self.losses['metrics'] = metrics
+
+        return sum(output) if is_training else torch.cat(output, 1)
+
+
+def load_weights(self, weights_path):
+    """Parses and loads the weights stored in 'weights_path'"""
+
+    # Open the weights file
+    fp = open(weights_path, "rb")
+    header = np.fromfile(fp, dtype=np.int32, count=5)  # First five are header values
+
+    # Needed to write header when saving weights
+    self.header_info = header
+
+    self.seen = header[3]
+    weights = np.fromfile(fp, dtype=np.float32)  # The rest are weights
+    fp.close()
+
+    ptr = 0
+    for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+        if module_def['type'] == 'convolutional':
+            conv_layer = module[0]
+            if module_def['batch_normalize']:
+                # Load BN bias, weights, running mean and running variance
+                bn_layer = module[1]
+                num_b = bn_layer.bias.numel()  # Number of biases
+                # Bias
+                bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
+                bn_layer.bias.data.copy_(bn_b)
+                ptr += num_b
+                # Weight
+                bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
+                bn_layer.weight.data.copy_(bn_w)
+                ptr += num_b
+                # Running Mean
+                bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
+                bn_layer.running_mean.data.copy_(bn_rm)
+                ptr += num_b
+                # Running Var
+                bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
+                bn_layer.running_var.data.copy_(bn_rv)
+                ptr += num_b
+            else:
+                # Load conv. bias
+                num_b = conv_layer.bias.numel()
+                conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
+                conv_layer.bias.data.copy_(conv_b)
+                ptr += num_b
+            # Load conv. weights
+            num_w = conv_layer.weight.numel()
+            conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
+            conv_layer.weight.data.copy_(conv_w)
+            ptr += num_w
+
+
+"""
+    @:param path    - path of the new weights file
+    @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
+"""
+
+
+def save_weights(self, path, cutoff=-1):
+    fp = open(path, 'wb')
+    self.header_info[3] = self.seen
+    self.header_info.tofile(fp)
+
+    # Iterate through layers
+    for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
+        if module_def['type'] == 'convolutional':
+            conv_layer = module[0]
+            # If batch norm, load bn first
+            if module_def['batch_normalize']:
+                bn_layer = module[1]
+                bn_layer.bias.data.cpu().numpy().tofile(fp)
+                bn_layer.weight.data.cpu().numpy().tofile(fp)
+                bn_layer.running_mean.data.cpu().numpy().tofile(fp)
+                bn_layer.running_var.data.cpu().numpy().tofile(fp)
+            # Load conv bias
+            else:
+                conv_layer.bias.data.cpu().numpy().tofile(fp)
+            # Load conv weights
+            conv_layer.weight.data.cpu().numpy().tofile(fp)
+
+    fp.close()
--- a/requirements.txt
+++ b/requirements.txt
+# pip3 install -U -r requirements.txt
+numpy
+scipy
+opencv-python
+torch
+matplotlib
+tqdm
+h5py
\ No newline at end of file
--- a/results.txt
+++ b/results.txt
--- a/test.py
+++ b/test.py
+import argparse
+
+from models import *
+from utils.datasets import *
+from utils.utils import *
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--epochs', type=int, default=200, help='number of epochs')
+parser.add_argument('--batch_size', type=int, default=32, help='size of each image batch')
+parser.add_argument('--model_config_path', type=str, default='cfg/yolov3.cfg', help='path to model config file')
+parser.add_argument('--data_config_path', type=str, default='cfg/coco.data', help='path to data config file')
+parser.add_argument('--weights_path', type=str, default='checkpoints/yolov3.weights', help='path to weights file')
+parser.add_argument('--class_path', type=str, default='data/coco.names', help='path to class label file')
+parser.add_argument('--iou_thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
+parser.add_argument('--conf_thres', type=float, default=0.5, help='object confidence threshold')
+parser.add_argument('--nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')
+parser.add_argument('--n_cpu', type=int, default=0, help='number of cpu threads to use during batch generation')
+parser.add_argument('--img_size', type=int, default=416, help='size of each image dimension')
+parser.add_argument('--use_cuda', type=bool, default=True, help='whether to use cuda if available')
+opt = parser.parse_args()
+print(opt)
+
+cuda = torch.cuda.is_available() and opt.use_cuda
+device = torch.device('cuda:0' if cuda else 'cpu')
+
+# Get data configuration
+data_config = parse_data_config(opt.data_config_path)
+test_path = data_config['valid']
+num_classes = int(data_config['classes'])
+
+# Initiate model
+model = Darknet(opt.model_config_path, opt.img_size)
+
+# Load weights
+weights_path = 'checkpoints/yolov3.pt'
+if weights_path.endswith('.weights'):  # darknet format
+    load_weights(model, weights_path)
+elif weights_path.endswith('.pt'):  # pytorch format
+    checkpoint = torch.load(weights_path, map_location='cpu')
+    model.load_state_dict(checkpoint['model'])
+    del checkpoint
+
+model.to(device).eval()
+
+# Get dataloader
+# dataset = ListDataset(test_path)
+# dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu)
+dataloader = ListDataset(test_path, batch_size=opt.batch_size, img_size=opt.img_size)
+
+Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
+
+n_gt = 0
+correct = 0
+
+print('Compute mAP...')
+
+outputs = []
+targets = None
+APs = []
+for batch_i, (imgs, targets) in enumerate(dataloader):
+    imgs = imgs.to(device)
+
+    with torch.no_grad():
+        output = model(imgs)
+        output = non_max_suppression(output, conf_thres=opt.conf_thres, nms_thres=opt.nms_thres)
+
+    # Compute average precision for each sample
+    for sample_i in range(len(targets)):
+        correct = []
+
+        # Get labels for sample where width is not zero (dummies)
+        annotations = targets[sample_i]
+        # Extract detections
+        detections = output[sample_i]
+
+        if detections is None:
+            # If there are no detections but there are annotations mask as zero AP
+            if annotations.size(0) != 0:
+                APs.append(0)
+            continue
+
+        # Get detections sorted by decreasing confidence scores
+        detections = detections[np.argsort(-detections[:, 4])]
+
+        # If no annotations add number of detections as incorrect
+        if annotations.size(0) == 0:
+            correct.extend([0 for _ in range(len(detections))])
+        else:
+            # Extract target boxes as (x1, y1, x2, y2)
+            target_boxes = torch.FloatTensor(annotations[:, 1:].shape)
+            target_boxes[:, 0] = (annotations[:, 1] - annotations[:, 3] / 2)
+            target_boxes[:, 1] = (annotations[:, 2] - annotations[:, 4] / 2)
+            target_boxes[:, 2] = (annotations[:, 1] + annotations[:, 3] / 2)
+            target_boxes[:, 3] = (annotations[:, 2] + annotations[:, 4] / 2)
+            target_boxes *= opt.img_size
+
+            detected = []
+            for *pred_bbox, conf, obj_conf, obj_pred in detections:
+
+                pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
+                # Compute iou with target boxes
+                iou = bbox_iou(pred_bbox, target_boxes)
+                # Extract index of largest overlap
+                best_i = np.argmax(iou)
+                # If overlap exceeds threshold and classification is correct mark as correct
+                if iou[best_i] > opt.iou_thres and obj_pred == annotations[best_i, 0] and best_i not in detected:
+                    correct.append(1)
+                    detected.append(best_i)
+                else:
+                    correct.append(0)
+
+        # Extract true and false positives
+        true_positives = np.array(correct)
+        false_positives = 1 - true_positives
+
+        # Compute cumulative false positives and true positives
+        false_positives = np.cumsum(false_positives)
+        true_positives = np.cumsum(true_positives)
+
+        # Compute recall and precision at all ranks
+        recall = true_positives / annotations.size(0) if annotations.size(0) else true_positives
+        precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
+
+        # Compute average precision
+        AP = compute_ap(recall, precision)
+        APs.append(AP)
+
+        print("+ Sample [%d/%d] AP: %.4f (%.4f)" % (len(APs), len(dataloader) * opt.batch_size, AP, np.mean(APs)))
+
+print("Mean Average Precision: %.4f" % np.mean(APs))
--- a/train.py
+++ b/train.py
+import argparse
+import time
+from sys import platform
+
+from models import *
+from utils.datasets import *
+from utils.utils import *
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-epochs', type=int, default=999, help='number of epochs')
+parser.add_argument('-batch_size', type=int, default=12, help='size of each image batch')
+parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='data config file path')
+parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
+parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')
+parser.add_argument('-resume', default=False, help='resume training flag')
+opt = parser.parse_args()
+print(opt)
+
+cuda = torch.cuda.is_available()
+device = torch.device('cuda:0' if cuda else 'cpu')
+
+random.seed(0)
+np.random.seed(0)
+torch.manual_seed(0)
+if cuda:
+    torch.cuda.manual_seed(0)
+    torch.cuda.manual_seed_all(0)
+    torch.backends.cudnn.benchmark = True
+
+def main(opt):
+    os.makedirs('checkpoints', exist_ok=True)
+
+    # Configure run
+    data_config = parse_data_config(opt.data_config_path)
+    num_classes = int(data_config['classes'])
+    if platform == 'darwin':  # macos
+        train_path = data_config['valid']
+    else:  # linux (gcp cloud)
+        train_path = '../coco/trainvalno5k.txt'
+
+    # Initialize model
+    model = Darknet(opt.cfg, opt.img_size)
+
+    # Get dataloader
+    dataloader = ListDataset(train_path, batch_size=opt.batch_size, img_size=opt.img_size)
+
+    # reload saved optimizer state
+    start_epoch = 0
+    best_loss = float('inf')
+    if opt.resume:
+        checkpoint = torch.load('checkpoints/latest.pt', map_location='cpu')
+
+        model.load_state_dict(checkpoint['model'])
+        if torch.cuda.device_count() > 1:
+            print('Using ', torch.cuda.device_count(), ' GPUs')
+            model = nn.DataParallel(model)
+        model.to(device).train()
+
+        # # Transfer learning
+        # for i, (name, p) in enumerate(model.named_parameters()):
+        #     #name = name.replace('module_list.', '')
+        #     #print('%4g %70s %9s %12g %20s %12g %12g' % (
+        #     #    i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
+        #     if p.shape[0] != 650:  # not YOLO layer
+        #         p.requires_grad = False
+
+        # Set optimizer
+        # optimizer = torch.optim.SGD(model.parameters(), lr=.001, momentum=.9, weight_decay=0.0005 * 0, nesterov=True)
+        # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
+        optimizer = torch.optim.Adam(model.parameters())
+        optimizer.load_state_dict(checkpoint['optimizer'])
+
+        start_epoch = checkpoint['epoch'] + 1
+        best_loss = checkpoint['best_loss']
+
+        del checkpoint  # current, saved
+    else:
+        if torch.cuda.device_count() > 1:
+            print('Using ', torch.cuda.device_count(), ' GPUs')
+            model = nn.DataParallel(model)
+        model.to(device).train()
+        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=5e-4)
+
+    # Set scheduler
+    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 24, eta_min=0.00001, last_epoch=-1)
+    # y = 0.001 * exp(-0.00921 * x)  # 1e-4 @ 250, 1e-5 @ 500
+    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99082, last_epoch=start_epoch - 1)
+
+    modelinfo(model)
+    t0, t1 = time.time(), time.time()
+    print('%10s' * 16 % (
+        'Epoch', 'Batch', 'x', 'y', 'w', 'h', 'conf', 'cls', 'total', 'P', 'R', 'nGT', 'TP', 'FP', 'FN', 'time'))
+    for epoch in range(opt.epochs):
+        epoch += start_epoch
+
+        # img_size = random.choice([19, 20, 21, 22, 23, 24, 25]) * 32
+        # dataloader = ListDataset(train_path, batch_size=opt.batch_size, img_size=img_size, targets_path=targets_path)
+        # print('Running image size %g' % img_size)
+
+        # Update scheduler
+        # if epoch % 25 == 0:
+        #     scheduler.last_epoch = -1  # for cosine annealing, restart every 25 epochs
+        # scheduler.step()
+        # if epoch <= 100:
+        # for g in optimizer.param_groups:
+        # g['lr'] = 0.0005 * (0.992 ** epoch)  # 1/10 th every 250 epochs
+        # g['lr'] = 0.001 * (0.9773 ** epoch)  # 1/10 th every 100 epochs
+        # g['lr'] = 0.0005 * (0.955 ** epoch)  # 1/10 th every 50 epochs
+        # g['lr'] = 0.0005 * (0.926 ** epoch)  # 1/10 th every 30 epochs
+
+        ui = -1
+        rloss = defaultdict(float)  # running loss
+        metrics = torch.zeros(4, num_classes)
+        for i, (imgs, targets) in enumerate(dataloader):
+
+            n = opt.batch_size  # number of pictures at a time
+            for j in range(int(len(imgs) / n)):
+                targets_j = targets[j * n:j * n + n]
+                nGT = sum([len(x) for x in targets_j])
+                if nGT < 1:
+                    continue
+
+                loss = model(imgs[j * n:j * n + n].to(device), targets_j, requestPrecision=True, epoch=epoch)
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+
+                ui += 1
+                metrics += model.losses['metrics']
+                for key, val in model.losses.items():
+                    rloss[key] = (rloss[key] * ui + val) / (ui + 1)
+
+                # Precision
+                precision = metrics[0] / (metrics[0] + metrics[1] + 1e-16)
+                k = (metrics[0] + metrics[1]) > 0
+                if k.sum() > 0:
+                    mean_precision = precision[k].mean()
+                else:
+                    mean_precision = 0
+
+                # Recall
+                recall = metrics[0] / (metrics[0] + metrics[2] + 1e-16)
+                k = (metrics[0] + metrics[2]) > 0
+                if k.sum() > 0:
+                    mean_recall = recall[k].mean()
+                else:
+                    mean_recall = 0
+
+                s = ('%10s%10s' + '%10.3g' * 14) % (
+                    '%g/%g' % (epoch, opt.epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['x'],
+                    rloss['y'], rloss['w'], rloss['h'], rloss['conf'], rloss['cls'],
+                    rloss['loss'], mean_precision, mean_recall, model.losses['nGT'], model.losses['TP'],
+                    model.losses['FP'], model.losses['FN'], time.time() - t1)
+                t1 = time.time()
+                print(s)
+
+            # if i == 1:
+            #    return
+
+            # Write epoch results
+            with open('results.txt', 'a') as file:
+                file.write(s + '\n')
+
+        # Update best loss
+        loss_per_target = rloss['loss'] / rloss['nGT']
+        if loss_per_target < best_loss:
+            best_loss = loss_per_target
+
+        # Save latest checkpoint
+        checkpoint = {'epoch': epoch,
+                      'best_loss': best_loss,
+                      'model': model.state_dict(),
+                      'optimizer': optimizer.state_dict()}
+        torch.save(checkpoint, 'checkpoints/latest.pt')
+
+        # Save best checkpoint
+        if best_loss == loss_per_target:
+            os.system('cp checkpoints/latest.pt checkpoints/best.pt')
+
+        # Save backup checkpoint
+        if (epoch > 0) & (epoch % 100 == 0):
+            os.system('cp checkpoints/latest.pt checkpoints/backup' + str(epoch) + '.pt')
+
+    # Save final model
+    dt = time.time() - t0
+    print('Finished %g epochs in %.2fs (%.2fs/epoch)' % (epoch, dt, dt / (epoch + 1)))
+
+
+if __name__ == '__main__':
+    torch.cuda.empty_cache()
+    main(opt)
+    torch.cuda.empty_cache()
--- a/utils/datasets.py
+++ b/utils/datasets.py
+import glob
+import math
+import os
+import random
+
+import cv2
+import numpy as np
+import torch
+
+# from torch.utils.data import Dataset
+from utils.utils import xyxy2xywh
+
+
+class ImageFolder():  # for eval-only
+    def __init__(self, path, batch_size=1, img_size=416):
+        if os.path.isdir(path):
+            self.files = sorted(glob.glob('%s/*.*' % path))
+        elif os.path.isfile(path):
+            self.files = [path]
+
+        self.nF = len(self.files)  # number of image files
+        self.nB = math.ceil(self.nF / batch_size)  # number of batches
+        self.batch_size = batch_size
+        self.height = img_size
+        assert self.nF > 0, 'No images found in path %s' % path
+
+        # RGB normalization values
+        # self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((3, 1, 1))
+        # self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((3, 1, 1))
+
+    def __iter__(self):
+        self.count = -1
+        return self
+
+    def __next__(self):
+        self.count += 1
+        if self.count == self.nB:
+            raise StopIteration
+        img_path = self.files[self.count]
+
+        # Read image
+        img = cv2.imread(img_path)  # BGR
+
+        # Padded resize
+        img, _, _, _ = resize_square(img, height=self.height, color=(127.5, 127.5, 127.5))
+
+        # Normalize RGB
+        img = img[:, :, ::-1].transpose(2, 0, 1)
+        img = np.ascontiguousarray(img, dtype=np.float32)
+        # img -= self.rgb_mean
+        # img /= self.rgb_std
+        img /= 255.0
+
+        return [img_path], img
+
+    def __len__(self):
+        return self.nB  # number of batches
+
+
+class ListDataset():  # for training
+    def __init__(self, path, batch_size=1, img_size=608):
+        self.path = path
+        #self.img_files = sorted(glob.glob('%s/*.*' % path))
+        with open(path, 'r') as file:
+            self.img_files = file.readlines()
+        self.img_files = [path.replace('\n', '').replace('/images','/Users/glennjocher/Downloads/DATA/coco/images') for path in self.img_files]
+
+        self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in
+                            self.img_files]
+
+        self.nF = len(self.img_files)  # number of image files
+        self.nB = math.ceil(self.nF / batch_size)  # number of batches
+        self.batch_size = batch_size
+
+        #assert self.nB > 0, 'No images found in path %s' % path
+        self.height = img_size
+
+        # RGB normalization values
+        # self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((1, 3, 1, 1))
+        # self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((1, 3, 1, 1))
+
+    def __iter__(self):
+        self.count = -1
+        # self.shuffled_vector = np.random.permutation(self.nF)  # shuffled vector
+        self.shuffled_vector = np.arange(self.nF)
+        return self
+
+    def __next__(self):
+        self.count += 1
+        if self.count == self.nB:
+            raise StopIteration
+
+        ia = self.count * self.batch_size
+        ib = min((self.count + 1) * self.batch_size, self.nF)
+
+        height = self.height
+
+        img_all = []
+        labels_all = []
+        for index, files_index in enumerate(range(ia, ib)):
+            img_path = self.img_files[self.shuffled_vector[files_index]]
+            label_path = self.label_files[self.shuffled_vector[files_index]]
+
+            img = cv2.imread(img_path)  # BGR
+            if img is None:
+                continue
+
+            augment_hsv = False
+            if augment_hsv:
+                # SV augmentation by 50%
+                fraction = 0.50
+                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+                S = img_hsv[:, :, 1].astype(np.float32)
+                V = img_hsv[:, :, 2].astype(np.float32)
+
+                a = (random.random() * 2 - 1) * fraction + 1
+                S *= a
+                if a > 1:
+                    np.clip(S, a_min=0, a_max=255, out=S)
+
+                a = (random.random() * 2 - 1) * fraction + 1
+                V *= a
+                if a > 1:
+                    np.clip(V, a_min=0, a_max=255, out=V)
+
+                img_hsv[:, :, 1] = S.astype(np.uint8)
+                img_hsv[:, :, 2] = V.astype(np.uint8)
+                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
+
+            h, w, _ = img.shape
+            img, ratio, padw, padh = resize_square(img, height=height, color=(127.5, 127.5, 127.5))
+
+            # Load labels
+            if os.path.isfile(label_path):
+                labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 5)
+
+                # Normalized xywh to pixel xyxy format
+                labels = labels0.copy()
+                labels[:, 1] = ratio * w * (labels0[:, 1] - labels0[:, 3] / 2) + padw
+                labels[:, 2] = ratio * h * (labels0[:, 2] - labels0[:, 4] / 2) + padh
+                labels[:, 3] = ratio * w * (labels0[:, 1] + labels0[:, 3] / 2) + padw
+                labels[:, 4] = ratio * h * (labels0[:, 2] + labels0[:, 4] / 2) + padh
+            else:
+                labels = np.array([])
+
+            # Augment image and labels
+            # img, labels, M = random_affine(img, targets=labels, degrees=(-5, 5), translate=(0.1, 0.1), scale=(0.8, 1.2))  # RGB
+
+            plotFlag = False
+            if plotFlag:
+                import matplotlib.pyplot as plt
+                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
+                plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-')
+
+            nL = len(labels)
+            if nL > 0:
+                # convert xyxy to xywh
+                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
+
+            # random left-right flip
+            lr_flip = False
+            if lr_flip & (random.random() > 0.5):
+                img = np.fliplr(img)
+                if nL > 0:
+                    labels[:, 1] = 1 - labels[:, 1]
+
+            # random up-down flip
+            ud_flip = False
+            if ud_flip & (random.random() > 0.5):
+                img = np.flipud(img)
+                if nL > 0:
+                    labels[:, 2] = 1 - labels[:, 2]
+
+            img_all.append(img)
+            labels_all.append(torch.from_numpy(labels))
+
+        # Normalize
+        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
+        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
+        # img_all -= self.rgb_mean
+        # img_all /= self.rgb_std
+        img_all /= 255.0
+
+        return torch.from_numpy(img_all), labels_all
+
+    def __len__(self):
+        return self.nB  # number of batches
+
+
+def resize_square(img, height=416, color=(0, 0, 0)):  # resize a rectangular image to a padded square
+    shape = img.shape[:2]  # shape = [height, width]
+    ratio = float(height) / max(shape)
+    new_shape = [round(shape[0] * ratio), round(shape[1] * ratio)]
+    dw = height - new_shape[1]  # width padding
+    dh = height - new_shape[0]  # height padding
+    top, bottom = dh // 2, dh - (dh // 2)
+    left, right = dw // 2, dw - (dw // 2)
+    img = cv2.resize(img, (new_shape[1], new_shape[0]), interpolation=cv2.INTER_AREA)
+    return cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color), ratio, dw // 2, dh // 2
+
+
+def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-3, 3),
+                  borderValue=(0, 0, 0)):
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
+
+    border = 0  # width of added border (optional)
+    height = max(img.shape[0], img.shape[1]) + border * 2
+
+    # Rotation and Scale
+    R = np.eye(3)
+    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
+    # a += random.choice([-180, -90, 0, 90])  # random 90deg rotations added to small rotations
+
+    s = random.random() * (scale[1] - scale[0]) + scale[0]
+    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
+
+    # Translation
+    T = np.eye(3)
+    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
+    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)
+
+    # Shear
+    S = np.eye(3)
+    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
+    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)
+
+    M = S @ T @ R  # ORDER IS IMPORTANT HERE!!
+    imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
+                              borderValue=borderValue)  # BGR order (YUV-equalized BGR means)
+
+    # Return warped points also
+    if targets is not None:
+        if len(targets) > 0:
+            n = targets.shape[0]
+            points = targets[:, 1:5].copy()
+            area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
+
+            # warp points
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            xy = (xy @ M.T)[:, :2].reshape(n, 8)
+
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+            # apply angle-based reduction
+            radians = a * math.pi / 180
+            reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
+            x = (xy[:, 2] + xy[:, 0]) / 2
+            y = (xy[:, 3] + xy[:, 1]) / 2
+            w = (xy[:, 2] - xy[:, 0]) * reduction
+            h = (xy[:, 3] - xy[:, 1]) * reduction
+            xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+            # reject warped points outside of image
+            np.clip(xy, 0, height, out=xy)
+            w = xy[:, 2] - xy[:, 0]
+            h = xy[:, 3] - xy[:, 1]
+            area = w * h
+            ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
+            i = (w > 4) & (h > 4) & (area / area0 > 0.1) & (ar < 10)
+
+            targets = targets[i]
+            targets[:, 1:5] = xy[i]
+
+        return imw, targets, M
+    else:
+        return imw
+
+
+def convert_tif2bmp(p='/Users/glennjocher/Downloads/DATA/xview/val_images_bmp'):
+    import glob
+    import cv2
+    files = sorted(glob.glob('%s/*.tif' % p))
+    for i, f in enumerate(files):
+        print('%g/%g' % (i + 1, len(files)))
+
+        img = cv2.imread(f)
+
+        cv2.imwrite(f.replace('.tif', '.bmp'), img)
+        os.system('rm -rf ' + f)
--- a/utils/gcp.sh
+++ b/utils/gcp.sh
+#!/usr/bin/env bash
+
+# Start
+sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3 && cd yolov3 && python3 train.py -img_size 416 -epochs 999
+
+# Resume
+cd yolov3 && python3 train.py -img_size 416 -resume 1
+
+# Detect
+gsutil cp gs://ultralytics/fresh9_5_e201.pt yolov3/checkpoints
+cd yolov3 && python3 detect.py
+
--- a/utils/parse_config.py
+++ b/utils/parse_config.py
+
+
+def parse_model_config(path):
+    """Parses the yolo-v3 layer configuration file and returns module definitions"""
+    file = open(path, 'r')
+    lines = file.read().split('\n')
+    lines = [x for x in lines if x and not x.startswith('#')]
+    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+    module_defs = []
+    for line in lines:
+        if line.startswith('['): # This marks the start of a new block
+            module_defs.append({})
+            module_defs[-1]['type'] = line[1:-1].rstrip()
+            if module_defs[-1]['type'] == 'convolutional':
+                module_defs[-1]['batch_normalize'] = 0
+        else:
+            key, value = line.split("=")
+            value = value.strip()
+            module_defs[-1][key.rstrip()] = value.strip()
+
+    return module_defs
+
+def parse_data_config(path):
+    """Parses the data configuration file"""
+    options = dict()
+    options['gpus'] = '0,1,2,3'
+    options['num_workers'] = '10'
+    with open(path, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.strip()
+        if line == '' or line.startswith('#'):
+            continue
+        key, value = line.split('=')
+        options[key.strip()] = value.strip()
+    return options
--- a/utils/utils.py
+++ b/utils/utils.py
+import random
+
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+# set printoptions
+torch.set_printoptions(linewidth=1320, precision=5, profile='long')
+np.set_printoptions(linewidth=320, formatter={'float_kind': '{11.5g}'.format})  # format short g, %precision=5
+
+
+def load_classes(path):
+    """
+    Loads class labels at 'path'
+    """
+    fp = open(path, "r")
+    names = fp.read().split("\n")[:-1]
+    return names
+
+
+def modelinfo(model):
+    nparams = sum(x.numel() for x in model.parameters())
+    ngradients = sum(x.numel() for x in model.parameters() if x.requires_grad)
+    print('\n%4s %70s %9s %12s %20s %12s %12s' % ('', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
+    for i, (name, p) in enumerate(model.named_parameters()):
+        name = name.replace('module_list.', '')
+        print('%4g %70s %9s %12g %20s %12g %12g' % (
+            i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
+    print('\n%g layers, %g parameters, %g gradients' % (i + 1, nparams, ngradients))
+
+
+
+def xview_class_weights(indices):  # weights of each class in the training set, normalized to mu = 1
+    weights = 1 / torch.FloatTensor(
+        [74, 364, 713, 71, 2925, 209767, 6925, 1101, 3612, 12134, 5871, 3640, 860, 4062, 895, 149, 174, 17, 1624, 1846,
+         125, 122, 124, 662, 1452, 697, 222, 190, 786, 200, 450, 295, 79, 205, 156, 181, 70, 64, 337, 1352, 336, 78,
+         628, 841, 287, 83, 702, 1177, 313865, 195, 1081, 882, 1059, 4175, 123, 1700, 2317, 1579, 368, 85])
+    weights /= weights.sum()
+    return weights[indices]
+
+
+
+def plot_one_box(x, im, color=None, label=None, line_thickness=None):
+    tl = line_thickness or round(0.003 * max(im.shape[0:2]))  # line thickness
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(im, c1, c2, color, thickness=tl)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(im, c1, c2, color, -1)  # filled
+        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+
+
+def weights_init_normal(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
+    elif classname.find('BatchNorm2d') != -1:
+        torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
+        torch.nn.init.constant_(m.bias.data, 0.0)
+
+
+def xyxy2xywh(box):
+    xywh = np.zeros(box.shape)
+    xywh[:, 0] = (box[:, 0] + box[:, 2]) / 2
+    xywh[:, 1] = (box[:, 1] + box[:, 3]) / 2
+    xywh[:, 2] = box[:, 2] - box[:, 0]
+    xywh[:, 3] = box[:, 3] - box[:, 1]
+    return xywh
+
+
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves.
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.], recall, [1.]))
+    mpre = np.concatenate(([0.], precision, [0.]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    # if len(box1.shape) == 1:
+    #    box1 = box1.reshape(1, 4)
+
+    """
+    Returns the IoU of two bounding boxes
+    """
+    if x1y1x2y2:
+        # Get the coordinates of bounding boxes
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
+    else:
+        # Transform from center and width to exact coordinates
+        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
+        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
+        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
+        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+
+    # get the corrdinates of the intersection rectangle
+    inter_rect_x1 = torch.max(b1_x1, b2_x1)
+    inter_rect_y1 = torch.max(b1_y1, b2_y1)
+    inter_rect_x2 = torch.min(b1_x2, b2_x2)
+    inter_rect_y2 = torch.min(b1_y2, b2_y2)
+    # Intersection area
+    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0)
+    # Union Area
+    b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
+    b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
+
+    return inter_area / (b1_area + b2_area - inter_area + 1e-16)
+
+
+def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG, requestPrecision):
+    """
+    returns nGT, nCorrect, tx, ty, tw, th, tconf, tcls
+    """
+    nB = len(target)  # target.shape[0]
+    nT = [len(x) for x in target]  # torch.argmin(target[:, :, 4], 1)  # targets per image
+    tx = torch.zeros(nB, nA, nG, nG)  # batch size (4), number of anchors (3), number of grid points (13)
+    ty = torch.zeros(nB, nA, nG, nG)
+    tw = torch.zeros(nB, nA, nG, nG)
+    th = torch.zeros(nB, nA, nG, nG)
+    tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
+    tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)  # nC = number of classes
+    TP = torch.ByteTensor(nB, max(nT)).fill_(0)
+    FP = torch.ByteTensor(nB, max(nT)).fill_(0)
+    FN = torch.ByteTensor(nB, max(nT)).fill_(0)
+    TC = torch.ShortTensor(nB, max(nT)).fill_(-1)  # target category
+
+    for b in range(nB):
+        nTb = nT[b]  # number of targets
+        if nTb == 0:
+            continue
+        t = target[b]
+        FN[b, :nTb] = 1
+
+        # Convert to position relative to box
+        TC[b, :nTb], gx, gy, gw, gh = t[:, 0].long(), t[:, 1] * nG, t[:, 2] * nG, t[:, 3] * nG, t[:, 4] * nG
+        # Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors)
+        gi = torch.clamp(gx.long(), min=0, max=nG - 1)
+        gj = torch.clamp(gy.long(), min=0, max=nG - 1)
+
+        # iou of targets-anchors (using wh only)
+        box1 = t[:, 3:5] * nG
+        # box2 = anchor_grid_wh[:, gj, gi]
+        box2 = anchor_wh.unsqueeze(1).repeat(1, nTb, 1)
+        inter_area = torch.min(box1, box2).prod(2)
+        iou_anch = inter_area / (gw * gh + box2.prod(2) - inter_area + 1e-16)
+
+        # Select best iou_pred and anchor
+        iou_anch_best, a = iou_anch.max(0)  # best anchor [0-2] for each target
+
+        # Two targets can not claim the same anchor
+        if nTb > 1:
+            iou_order = np.argsort(-iou_anch_best)  # best to worst
+            # u = torch.cat((gi, gj, a), 0).view(3, -1).numpy()
+            # _, first_unique = np.unique(u[:, iou_order], axis=1, return_index=True)  # first unique indices
+            u = gi.float() * 0.4361538773074043 + gj.float() * 0.28012496588736746 + a.float() * 0.6627147212460307
+            _, first_unique = np.unique(u[iou_order], return_index=True)  # first unique indices
+            # print(((np.sort(first_unique) - np.sort(first_unique2)) ** 2).sum())
+            i = iou_order[first_unique]
+            # best anchor must share significant commonality (iou) with target
+            i = i[iou_anch_best[i] > 0.10]
+            if len(i) == 0:
+                continue
+
+            a, gj, gi, t = a[i], gj[i], gi[i], t[i]
+            if len(t.shape) == 1:
+                t = t.view(1, 5)
+        else:
+            if iou_anch_best < 0.10:
+                continue
+            i = 0
+
+        tc, gx, gy, gw, gh = t[:, 0].long(), t[:, 1] * nG, t[:, 2] * nG, t[:, 3] * nG, t[:, 4] * nG
+
+        # Coordinates
+        tx[b, a, gj, gi] = gx - gi.float()
+        ty[b, a, gj, gi] = gy - gj.float()
+        # Width and height (sqrt method)
+        # tw[b, a, gj, gi] = torch.sqrt(gw / anchor_wh[a, 0]) / 2
+        # th[b, a, gj, gi] = torch.sqrt(gh / anchor_wh[a, 1]) / 2
+        # Width and height (yolov3 method)
+        tw[b, a, gj, gi] = torch.log(gw / anchor_wh[a, 0] + 1e-16)
+        th[b, a, gj, gi] = torch.log(gh / anchor_wh[a, 1] + 1e-16)
+
+        # One-hot encoding of label
+        tcls[b, a, gj, gi, tc] = 1
+        tconf[b, a, gj, gi] = 1
+
+        if requestPrecision:
+            # predicted classes and confidence
+            tb = torch.cat((gx - gw / 2, gy - gh / 2, gx + gw / 2, gy + gh / 2)).view(4, -1).t()  # target boxes
+            pcls = torch.argmax(pred_cls[b, a, gj, gi], 1).cpu()
+            pconf = torch.sigmoid(pred_conf[b, a, gj, gi]).cpu()
+            iou_pred = bbox_iou(tb, pred_boxes[b, a, gj, gi].cpu())
+
+            TP[b, i] = (pconf > 0.99) & (iou_pred > 0.5) & (pcls == tc)
+            FP[b, i] = (pconf > 0.99) & (TP[b, i] == 0)  # coordinates or class are wrong
+            FN[b, i] = pconf <= 0.99  # confidence score is too low (set to zero)
+
+    return tx, ty, tw, th, tconf, tcls, TP, FP, FN, TC
+
+
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
+    prediction = prediction.cpu()
+
+    """
+    Removes detections with lower object confidence score than 'conf_thres' and performs
+    Non-Maximum Suppression to further filter detections.
+    Returns detections with shape:
+        (x1, y1, x2, y2, object_conf, class_score, class_pred)
+    """
+
+    output = [None for _ in range(len(prediction))]
+    for image_i, pred in enumerate(prediction):
+        # Filter out confidence scores below threshold
+        # Get score and class with highest confidence
+
+        # cross-class NMS
+        cross_class_nms = False
+        if cross_class_nms:
+            thresh = 0.85
+            a = pred.clone()
+            a = a[np.argsort(-a[:, 4])]  # sort best to worst
+            radius = 30  # area to search for cross-class ious
+            for i in range(len(a)):
+                if i >= len(a) - 1:
+                    break
+
+                close = (np.abs(a[i, 0] - a[i + 1:, 0]) < radius) & (np.abs(a[i, 1] - a[i + 1:, 1]) < radius)
+                close = close.nonzero()
+
+                if len(close) > 0:
+                    close = close + i + 1
+                    iou = bbox_iou(a[i:i + 1, :4], a[close.squeeze(), :4].reshape(-1, 4), x1y1x2y2=False)
+                    bad = close[iou > thresh]
+
+                    if len(bad) > 0:
+                        mask = torch.ones(len(a)).type(torch.ByteTensor)
+                        mask[bad] = 0
+                        a = a[mask]
+            pred = a
+
+        x, y, w, h = pred[:, 0].numpy(), pred[:, 1].numpy(), pred[:, 2].numpy(), pred[:, 3].numpy()
+        a = w * h  # area
+        ar = w / (h + 1e-16)  # aspect ratio
+        log_w, log_h, log_a, log_ar = np.log(w), np.log(h), np.log(a), np.log(ar)
+
+        # n = len(w)
+        # shape_likelihood = np.zeros((n, 60), dtype=np.float32)
+        # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
+        # from scipy.stats import multivariate_normal
+        # for c in range(60):
+        # shape_likelihood[:, c] = multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
+
+        class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1)
+
+        v = ((pred[:, 4] > conf_thres) & (class_prob > .3)).numpy()
+        v = v.nonzero()
+
+        pred = pred[v]
+        class_prob = class_prob[v]
+        class_pred = class_pred[v]
+
+        # If none are remaining => process next image
+        nP = pred.shape[0]
+        if not nP:
+            continue
+
+        # From (center x, center y, width, height) to (x1, y1, x2, y2)
+        box_corner = pred.new(nP, 4)
+        xy = pred[:, 0:2]
+        wh = pred[:, 2:4] / 2
+        box_corner[:, 0:2] = xy - wh
+        box_corner[:, 2:4] = xy + wh
+        pred[:, :4] = box_corner
+
+        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_prob, class_pred)
+        detections = torch.cat((pred[:, :5], class_prob.float().unsqueeze(1), class_pred.float().unsqueeze(1)), 1)
+        # Iterate through all predicted classes
+        unique_labels = detections[:, -1].cpu().unique()
+        if prediction.is_cuda:
+            unique_labels = unique_labels.cuda()
+
+        nms_style = 'OR'  # 'AND' or 'OR' (classical)
+        for c in unique_labels:
+            # Get the detections with the particular class
+            detections_class = detections[detections[:, -1] == c]
+            # Sort the detections by maximum objectness confidence
+            _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
+            detections_class = detections_class[conf_sort_index]
+            # Perform non-maximum suppression
+            max_detections = []
+
+            if nms_style == 'OR':  # Classical NMS
+                while detections_class.shape[0]:
+                    # Get detection with highest confidence and save as max detection
+                    max_detections.append(detections_class[0].unsqueeze(0))
+                    # Stop if we're at the last detection
+                    if len(detections_class) == 1:
+                        break
+                    # Get the IOUs for all boxes with lower confidence
+                    ious = bbox_iou(max_detections[-1], detections_class[1:])
+
+                    # Remove detections with IoU >= NMS threshold
+                    detections_class = detections_class[1:][ious < nms_thres]
+
+            elif nms_style == 'AND':  # 'AND'-style NMS, at least two boxes must share commonality to pass, single boxes erased
+                while detections_class.shape[0]:
+                    if len(detections_class) == 1:
+                        break
+
+                    ious = bbox_iou(detections_class[:1], detections_class[1:])
+
+                    if ious.max() > 0.5:
+                        max_detections.append(detections_class[0].unsqueeze(0))
+
+                    # Remove detections with IoU >= NMS threshold
+                    detections_class = detections_class[1:][ious < nms_thres]
+
+            if len(max_detections) > 0:
+                max_detections = torch.cat(max_detections).data
+                # Add max detections to outputs
+                output[image_i] = max_detections if output[image_i] is None else torch.cat(
+                    (output[image_i], max_detections))
+
+    return output
+
+
+def strip_optimizer_from_checkpoint(filename='checkpoints/best.pt'):
+    # Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
+    import torch
+    a = torch.load(filename, map_location='cpu')
+    a['optimizer'] = []
+    torch.save(a, filename.replace('.pt', '_lite.pt'))
+
+
+def plotResults():
+    # Plot YOLO training results file "results.txt"
+    import numpy as np
+    import matplotlib.pyplot as plt
+    plt.figure(figsize=(18, 9))
+    s = ['x', 'y', 'w', 'h', 'conf', 'cls', 'loss', 'prec', 'recall']
+    for f in ('results.txt',):
+        results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 7, 8, 9, 10]).T
+        for i in range(9):
+            plt.subplot(2, 5, i + 1)
+            plt.plot(results[i, :3000], marker='.', label=f)
+            plt.title(s[i])
+        plt.legend()