feat(data): support cache ram of COCO dataset (#1562)

feat(data): support cache ram of COCO dataset

feat(data): support cache ram of COCO dataset (#1562)
feat(data): support cache ram of COCO dataset
33f48a92 · Yuang Peng · GitHub · 11c2a1f8 · 33f48a92 · 33f48a92
7 changed file
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,7 @@ torchvision
 thop
 ninja
 tabulate
+psutil

 # verified versions
 # pycocotools corresponds to https://github.com/ppwwyyxx/cocoapi

--- a/setup.cfg
+++ b/setup.cfg
@@ -3,7 +3,7 @@ line_length = 100
 multi_line_output = 3
 balanced_wrapping = True
 known_standard_library = setuptools
-known_third_party = tqdm,loguru,tabulate
+known_third_party = tqdm,loguru,tabulate,psutil
 known_data_processing = cv2,numpy,scipy,PIL,matplotlib
 known_datasets = pycocotools
 known_deeplearning = torch,torchvision,caffe2,onnx,apex,timm,thop,torch2trt,tensorrt,openvino,onnxruntime

--- a/tools/train.py
+++ b/tools/train.py
@@ -67,10 +67,10 @@ def make_parser():
    )
    parser.add_argument(
        "--cache",
-        dest="cache",
-        default=False,
-        action="store_true",
-        help="Caching imgs to RAM for fast training.",
+        type=str,
+        nargs="?",
+        const="ram",
+        help="Caching imgs to ram/disk for fast training.",
    )
    parser.add_argument(
        "-o",
@@ -130,6 +130,9 @@ if __name__ == "__main__":
    num_gpu = get_num_devices() if args.devices is None else args.devices
    assert num_gpu <= get_num_devices()

+    if args.cache is not None:
+        exp.create_cache_dataset(args.cache)
+
    dist_url = "auto" if args.dist_url is None else args.dist_url
    launch(
        main,

--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -26,6 +26,7 @@ from yolox.utils import (
    gpu_mem_usage,
    is_parallel,
    load_ckpt,
+    mem_usage,
    occupy_mem,
    save_checkpoint,
    setup_logger,
@@ -250,10 +251,12 @@ class Trainer:
                ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
            )

+            mem_str = "gpu mem: {:.0f}Mb, mem: {:.1f}Gb".format(gpu_mem_usage(), mem_usage())
+
            logger.info(
-                "{}, mem: {:.0f}Mb, {}, {}, lr: {:.3e}".format(
+                "{}, {}, {}, {}, lr: {:.3e}".format(
                    progress_str,
-                    gpu_mem_usage(),
+                    mem_str,
                    time_str,
                    loss_str,
                    self.meter["lr"].latest,

--- a/yolox/data/datasets/coco.py
+++ b/yolox/data/datasets/coco.py
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 # Copyright (c) Megvii, Inc. and its affiliates.
-
+import copy
 import os
+import random
+from multiprocessing.pool import ThreadPool
+import psutil
 from loguru import logger
+from tqdm import tqdm

 import cv2
 import numpy as np
@@ -45,6 +49,7 @@ class COCODataset(Dataset):
        img_size=(416, 416),
        preproc=None,
        cache=False,
+        cache_type="ram",
    ):
        """
        COCO dataset initialization. Annotation data are read into memory by COCO API.
@@ -64,74 +69,95 @@ class COCODataset(Dataset):
        self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
        remove_useless_info(self.coco)
        self.ids = self.coco.getImgIds()
+        self.num_imgs = len(self.ids)
        self.class_ids = sorted(self.coco.getCatIds())
        self.cats = self.coco.loadCats(self.coco.getCatIds())
        self._classes = tuple([c["name"] for c in self.cats])
-        self.imgs = None
        self.name = name
        self.img_size = img_size
        self.preproc = preproc
        self.annotations = self._load_coco_annotations()
-        if cache:
+        self.imgs = None
+        self.cache = cache
+        self.cache_type = cache_type
+
+        if self.cache:
            self._cache_images()

-    def __len__(self):
-        return len(self.ids)
+    def _cache_images(self):
+        mem = psutil.virtual_memory()
+        mem_required = self.cal_cache_ram()
+        gb = 1 << 30

-    def __del__(self):
-        del self.imgs
+        if self.cache_type == "ram" and mem_required > mem.available:
+            self.cache = False
+        else:
+            logger.info(
+                f"{mem_required / gb:.1f}GB RAM required, "
+                f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB RAM available, "
+                f"Since the first thing we do is cache, "
+                f"there is no guarantee that the remaining memory space is sufficient"
+            )

-    def _load_coco_annotations(self):
-        return [self.load_anno_from_ids(_ids) for _ids in self.ids]
+        if self.cache and self.imgs is None:
+            if self.cache_type == 'ram':
+                self.imgs = [None] * self.num_imgs
+                logger.info("You are using cached images in RAM to accelerate training!")
+            else:   # 'disk'
+                self.cache_dir = os.path.join(
+                    self.data_dir,
+                    f"{self.name}_cache{self.img_size[0]}x{self.img_size[1]}"
+                )
+                if not os.path.exists(self.cache_dir):
+                    os.mkdir(self.cache_dir)
+                    logger.warning(
+                        f"\n*******************************************************************\n"
+                        f"You are using cached images in DISK to accelerate training.\n"
+                        f"This requires large DISK space.\n"
+                        f"Make sure you have {mem_required / gb:.1f} "
+                        f"available DISK space for training COCO.\n"
+                        f"*******************************************************************\\n"
+                    )
+                else:
+                    logger.info("Found disk cache!")
+                    return

-    def _cache_images(self):
-        logger.warning(
-            "\n********************************************************************************\n"
-            "You are using cached images in RAM to accelerate training.\n"
-            "This requires large system RAM.\n"
-            "Make sure you have 200G+ RAM and 136G available disk space for training COCO.\n"
-            "********************************************************************************\n"
-        )
-        max_h = self.img_size[0]
-        max_w = self.img_size[1]
-        cache_file = os.path.join(self.data_dir, f"img_resized_cache_{self.name}.array")
-        if not os.path.exists(cache_file):
            logger.info(
-                "Caching images for the first time. This might take about 20 minutes for COCO"
+                "Caching images for the first time. "
+                "This might take about 15 minutes for COCO"
            )
-            self.imgs = np.memmap(
-                cache_file,
-                shape=(len(self.ids), max_h, max_w, 3),
-                dtype=np.uint8,
-                mode="w+",
-            )
-            from tqdm import tqdm
-            from multiprocessing.pool import ThreadPool

-            NUM_THREADs = min(8, os.cpu_count())
-            loaded_images = ThreadPool(NUM_THREADs).imap(
-                lambda x: self.load_resized_img(x),
-                range(len(self.annotations)),
-            )
-            pbar = tqdm(enumerate(loaded_images), total=len(self.annotations))
-            for k, out in pbar:
-                self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy()
-            self.imgs.flush()
+            num_threads = min(8, max(1, os.cpu_count() - 1))
+            b = 0
+            load_imgs = ThreadPool(num_threads).imap(self.load_resized_img, range(self.num_imgs))
+            pbar = tqdm(enumerate(load_imgs), total=self.num_imgs)
+            for i, x in pbar:   # x = self.load_resized_img(self, i)
+                if self.cache_type == 'ram':
+                    self.imgs[i] = x
+                else:   # 'disk'
+                    cache_filename = f'{self.annotations[i]["filename"].split(".")[0]}.npy'
+                    np.save(os.path.join(self.cache_dir, cache_filename), x)
+                b += x.nbytes
+                pbar.desc = f'Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache})'
            pbar.close()
-        else:
-            logger.warning(
-                "You are using cached imgs! Make sure your dataset is not changed!!\n"
-                "Everytime the self.input_size is changed in your exp file, you need to delete\n"
-                "the cached data and re-generate them.\n"
-            )

-        logger.info("Loading cached imgs...")
-        self.imgs = np.memmap(
-            cache_file,
-            shape=(len(self.ids), max_h, max_w, 3),
-            dtype=np.uint8,
-            mode="r+",
-        )
+    def cal_cache_ram(self):
+        cache_bytes = 0
+        num_samples = min(self.num_imgs, 32)
+        for _ in range(num_samples):
+            img = self.load_resized_img(random.randint(0, self.num_imgs - 1))
+            cache_bytes += img.nbytes
+        mem_required = cache_bytes * self.num_imgs / num_samples
+        return mem_required
+
+    def __len__(self):
+        return self.num_imgs
+
+    def __del__(self):
+        del self.imgs
+
+    def _load_coco_annotations(self):
+        return [self.load_anno_from_ids(_ids) for _ids in self.ids]

    def load_anno_from_ids(self, id_):
        im_ann = self.coco.loadImgs(id_)[0]
@@ -152,7 +178,6 @@ class COCODataset(Dataset):
        num_objs = len(objs)

        res = np.zeros((num_objs, 5))
-
        for ix, obj in enumerate(objs):
            cls = self.class_ids.index(obj["category_id"])
            res[ix, 0:4] = obj["clean_bbox"]
@@ -197,15 +222,16 @@ class COCODataset(Dataset):

    def pull_item(self, index):
        id_ = self.ids[index]
+        label, origin_image_size, _, filename = self.annotations[index]

-        res, img_info, resized_info, _ = self.annotations[index]
-        if self.imgs is not None:
-            pad_img = self.imgs[index]
-            img = pad_img[: resized_info[0], : resized_info[1], :].copy()
+        if self.cache_type == 'ram':
+            img = self.imgs[index]
+        elif self.cache_type == 'disk':
+            img = np.load(os.path.join(self.cache_dir, f"{filename.split('.')[0]}.npy"))
        else:
            img = self.load_resized_img(index)

-        return img, res.copy(), img_info, np.array([id_])
+        return copy.deepcopy(img), copy.deepcopy(label), origin_image_size, np.array([id_])

    @Dataset.mosaic_getitem
    def __getitem__(self, index):

--- a/yolox/exp/yolox_base.py
+++ b/yolox/exp/yolox_base.py
@@ -106,6 +106,23 @@ class Exp(BaseExp):
        self.test_conf = 0.01
        # nms threshold
        self.nmsthre = 0.65
+        self.cache_dataset = None
+        self.dataset = None
+
+    def create_cache_dataset(self, cache_type: str = "ram"):
+        from yolox.data import COCODataset, TrainTransform
+        self.cache_dataset = COCODataset(
+            data_dir=self.data_dir,
+            json_file=self.train_ann,
+            img_size=self.input_size,
+            preproc=TrainTransform(
+                max_labels=50,
+                flip_prob=self.flip_prob,
+                hsv_prob=self.hsv_prob
+            ),
+            cache=True,
+            cache_type=cache_type,
+        )

    def get_model(self):
        from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
@@ -127,7 +144,16 @@ class Exp(BaseExp):
        self.model.train()
        return self.model

-    def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
+    def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: str = None):
+        """
+        Get dataloader according to cache_img parameter.
+        Args:
+            no_aug (bool, optional): Whether to turn off mosaic data enhancement. Defaults to False.
+            cache_img (str, optional): cache_img is equivalent to cache_type. Defaults to None.
+                "ram" : Caching imgs to ram for fast training.
+                "disk": Caching imgs to disk for fast training.
+                None: Do not use cache, in this case cache_data is also None.
+        """
        from yolox.data import (
            COCODataset,
            TrainTransform,
@@ -140,18 +166,23 @@ class Exp(BaseExp):
        from yolox.utils import wait_for_the_master

        with wait_for_the_master():
-            dataset = COCODataset(
-                data_dir=self.data_dir,
-                json_file=self.train_ann,
-                img_size=self.input_size,
-                preproc=TrainTransform(
-                    max_labels=50,
-                    flip_prob=self.flip_prob,
-                    hsv_prob=self.hsv_prob),
-                cache=cache_img,
-            )
+            if self.cache_dataset is None:
+                assert cache_img is None, "cache is True, but cache_dataset is None"
+                dataset = COCODataset(
+                    data_dir=self.data_dir,
+                    json_file=self.train_ann,
+                    img_size=self.input_size,
+                    preproc=TrainTransform(
+                        max_labels=50,
+                        flip_prob=self.flip_prob,
+                        hsv_prob=self.hsv_prob),
+                    cache=False,
+                    cache_type=cache_img,
+                )
+            else:
+                dataset = self.cache_dataset

-        dataset = MosaicDetection(
+        self.dataset = MosaicDetection(
            dataset,
            mosaic=not no_aug,
            img_size=self.input_size,
@@ -169,8 +200,6 @@ class Exp(BaseExp):
            mixup_prob=self.mixup_prob,
        )

-        self.dataset = dataset
-
        if is_distributed:
            batch_size = batch_size // dist.get_world_size()


--- a/yolox/utils/metric.py
+++ b/yolox/utils/metric.py
@@ -5,6 +5,7 @@ import functools
 import os
 import time
 from collections import defaultdict, deque
+import psutil

 import numpy as np

@@ -16,6 +17,7 @@ __all__ = [
    "get_total_and_free_memory_in_Mb",
    "occupy_mem",
    "gpu_mem_usage",
+    "mem_usage"
 ]


@@ -51,6 +53,15 @@ def gpu_mem_usage():
    return mem_usage_bytes / (1024 * 1024)


+def mem_usage():
+    """
+    Compute the memory usage for the current machine (GB).
+    """
+    gb = 1 << 30
+    mem = psutil.virtual_memory()
+    return mem.used / gb
+
+
 class AverageMeter:
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.