diff --git a/datasets/imdb.py b/datasets/imdb.py index cabf03ddde9a22b7e328f0b6c2f0421ed665e440..7ce66512086e0c9fd994e514571c2568079edd03 100644 --- a/datasets/imdb.py +++ b/datasets/imdb.py @@ -1,4 +1,5 @@ import os +import PIL class imdb(object): def __init__(self, name): @@ -63,3 +64,22 @@ class imdb(object): all_boxes[class][image] = [] or np.array of shape #dets x 5 """ raise NotImplementedError + + def append_flipped_roidb(self): + num_images = len(self.image_index) + widths = [PIL.Image.open(self.image_path_at(i)).size[0] + for i in xrange(num_images)] + for i in xrange(num_images): + self.roidb[i]['flipped'] = False + boxes = self.roidb[i]['boxes'].copy() + oldx1 = boxes[:, 0].copy() + oldx2 = boxes[:, 2].copy() + boxes[:, 0] = widths[i] - oldx2 - 1 + boxes[:, 2] = widths[i] - oldx1 - 1 + assert (boxes[:, 2] >= boxes[:, 0]).all() + entry = {'boxes' : boxes, + 'gt_overlaps' : self.roidb[i]['gt_overlaps'], + 'gt_classes' : self.roidb[i]['gt_classes'], + 'flipped' : True} + self.roidb.append(entry) + self._image_index = self._image_index * 2 diff --git a/datasets/pascal_voc.py b/datasets/pascal_voc.py index d35f83d9bc9f2170eb0d059b0c493c4d58d20b48..68200a59d6908fcd00aee3856bcf0de5a2541b8f 100644 --- a/datasets/pascal_voc.py +++ b/datasets/pascal_voc.py @@ -58,7 +58,7 @@ class pascal_voc(datasets.imdb): self._image_set + '.txt') assert os.path.exists(image_set_file) with open(image_set_file) as f: - image_index = tuple([x.strip() for x in f.readlines()]) + image_index = [x.strip() for x in f.readlines()] return image_index def _get_default_path(self): diff --git a/fast_rcnn_train.py b/fast_rcnn_train.py index 35ff83857547a6f9100c296ed7ebe82195b2ee1d..ed6150f8b6f37e717ad513b4ffbec30d301ef7ea 100755 --- a/fast_rcnn_train.py +++ b/fast_rcnn_train.py @@ -156,6 +156,9 @@ if __name__ == '__main__': imdb_train = datasets.pascal_voc('trainval', '2007') + # enhance roidb to contain flipped examples + imdb_train.append_flipped_roidb() + # enhance roidb to contain some useful derived quanties roidb_train = training_roidb(imdb_train) diff --git a/finetuning.py b/finetuning.py index c81c6ba1034c26f8572998c8bffee6c7dac2c10f..b7a83c5c19a7b20b7b6d716264c2d3d052ccc05f 100644 --- a/finetuning.py +++ b/finetuning.py @@ -129,6 +129,8 @@ def _get_image_blob(roidb, scale_inds): im_scale_factors = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) + if roidb[i]['flipped']: + im = im[:, ::-1, :] im = im.astype(np.float32, copy=False) im -= conf.PIXEL_MEANS im_shape = im.shape