import os import os.path import numpy as np import pandas from collections import OrderedDict from ltr.data.image_loader import default_image_loader from .base_dataset import BaseDataset from ltr.admin.environment import env_settings def list_sequences(root, set_ids): """ Lists all the videos in the input set_ids. Returns a list of tuples (set_id, video_name) args: root: Root directory to TrackingNet set_ids: Sets (0-11) which are to be used returns: list - list of tuples (set_id, video_name) containing the set_id and video_name for each sequence """ sequence_list = [] for s in set_ids: anno_dir = os.path.join(root, "TRAIN_" + str(s), "anno") sequences_cur_set = [(s, os.path.splitext(f)[0]) for f in os.listdir(anno_dir) if f.endswith('.txt')] sequence_list += sequences_cur_set return sequence_list class TrackingNet(BaseDataset): """ TrackingNet dataset. Publication: TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild. Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem ECCV, 2018 https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit. """ def __init__(self, root=None, image_loader=default_image_loader, set_ids=None): """ args: root - The path to the TrackingNet folder, containing the training sets. image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. set_ids (None) - List containing the ids of the TrackingNet sets to be used for training. If None, all the sets (0 - 11) will be used. """ root = env_settings().trackingnet_dir if root is None else root super().__init__(root, image_loader) if set_ids is None: set_ids = [i for i in range(12)] self.set_ids = set_ids # Keep a list of all videos. Sequence list is a list of tuples (set_id, video_name) containing the set_id and # video_name for each sequence self.sequence_list = list_sequences(self.root, self.set_ids) def get_name(self): return 'trackingnet' def _read_anno(self, seq_id): set_id = self.sequence_list[seq_id][0] vid_name = self.sequence_list[seq_id][1] anno_file = os.path.join(self.root, "TRAIN_" + str(set_id), "anno", vid_name + ".txt") gt = pandas.read_csv( anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values return np.array(gt) def get_sequence_info(self, seq_id): anno = self._read_anno(seq_id) target_visible = (anno[:, 2] > 0) & (anno[:, 3] > 0) return anno, target_visible def _get_frame(self, seq_id, frame_id): set_id = self.sequence_list[seq_id][0] vid_name = self.sequence_list[seq_id][1] frame_path = os.path.join(self.root, "TRAIN_" + str(set_id), "frames", vid_name, str(frame_id) + ".jpg") return self.image_loader(frame_path) def get_frames(self, seq_id, frame_ids, anno=None): frame_list = [self._get_frame(seq_id, f) for f in frame_ids] if anno is None: anno = self._read_anno(seq_id) # Return as list of tensors anno_frames = [anno[f_id, :] for f_id in frame_ids] object_meta = OrderedDict({ 'object_class': None, 'motion_class': None, 'major_class': None, 'root_class': None, 'motion_adverb': None }) return frame_list, anno_frames, object_meta