From a578b574252b141e39002ebc8a9cba59f29e1cac Mon Sep 17 00:00:00 2001 From: Ross Girshick Date: Wed, 4 Mar 2015 15:01:35 -0800 Subject: [PATCH] remove conf.NUM_CLASSES --- bbox_regression_targets.py | 17 +++++++---------- fast_rcnn_config.py | 15 +++++++++++---- fast_rcnn_train.py | 4 ++-- finetuning.py | 10 ++++------ 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/bbox_regression_targets.py b/bbox_regression_targets.py index 6c0942a..bf2bb43 100644 --- a/bbox_regression_targets.py +++ b/bbox_regression_targets.py @@ -46,6 +46,8 @@ def _compute_targets(rois, overlaps, labels): def append_bbox_regression_targets(roidb): num_images = len(roidb) + # Infer number of classes from the number of columns in gt_overlaps + num_classes = roidb[0]['gt_overlaps'].shape[1] for im_i in xrange(num_images): rois = roidb[im_i]['boxes'] max_overlaps = roidb[im_i]['max_overlaps'] @@ -55,12 +57,12 @@ def append_bbox_regression_targets(roidb): # Compute values needed for means and stds # var(x) = E(x^2) - E(x)^2 - class_counts = np.zeros((conf.NUM_CLASSES, 1)) + conf.EPS - sums = np.zeros((conf.NUM_CLASSES, 4)) - squared_sums = np.zeros((conf.NUM_CLASSES, 4)) + class_counts = np.zeros((num_classes, 1)) + conf.EPS + sums = np.zeros((num_classes, 4)) + squared_sums = np.zeros((num_classes, 4)) for im_i in xrange(num_images): targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, conf.NUM_CLASSES): + for cls in xrange(1, num_classes): cls_inds = np.where(targets[:, 0] == cls)[0] if cls_inds.size > 0: class_counts[cls] += cls_inds.size @@ -73,18 +75,13 @@ def append_bbox_regression_targets(roidb): # Normalize targets for im_i in xrange(num_images): targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, conf.NUM_CLASSES): + for cls in xrange(1, num_classes): cls_inds = np.where(targets[:, 0] == cls)[0] roidb[im_i]['bbox_targets'][cls_inds, 1:] \ -= means[cls, :] roidb[im_i]['bbox_targets'][cls_inds, 1:] \ /= stds[cls, :] - # TODO(rbg) remove this when everything is in python - import scipy.io - scipy.io.savemat('../rcnn/data/voc_2007_means_stds.mat', - {'means': means, 'stds': stds}) - # These values will be needed for making predictions # (the predicts will need to be unnormalized and uncentered) return means, stds diff --git a/fast_rcnn_config.py b/fast_rcnn_config.py index 0a7b144..c26dd4f 100644 --- a/fast_rcnn_config.py +++ b/fast_rcnn_config.py @@ -1,33 +1,40 @@ import os import sys +import numpy as np caffe_path = os.path.abspath(os.path.join('..', 'caffe', 'python')) sys.path.insert(0, caffe_path) -import numpy as np - # Scales used in the SPP-net paper # SCALES = (480, 576, 688, 864, 1200) SCALES = (480, 576, 600) + # Max pixel size of a scaled input image # MAX_SIZE = 2000 MAX_SIZE = 1000 + # Images per batch IMS_PER_BATCH = 2 # 4 + # Minibatch size BATCH_SIZE = 128 # 128 + # Fraction of minibatch that is foreground labeled (class > 0) FG_FRACTION = 0.25 + # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) FG_THRESH = 0.5 + # Overlap threshold for a ROI to be considered background (class = 0 if # overlap in [0.1, 0.5)) BG_THRESH_HI = 0.5 BG_THRESH_LO = 0.1 + # Pixel mean values (BGR order) as a (1, 1, 3) array PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) -# Stride in input image pixels at ROI pooling level + +# Stride in input image pixels at ROI pooling level (network specific) +# 16 is true for AlexNet and VGG-16 FEAT_STRIDE = 16 -NUM_CLASSES = 21 BBOX_THRESH = 0.5 EPS = 1e-14 SNAPSHOT_ITERS = 10000 diff --git a/fast_rcnn_train.py b/fast_rcnn_train.py index 14745c8..0847e0a 100755 --- a/fast_rcnn_train.py +++ b/fast_rcnn_train.py @@ -93,9 +93,9 @@ def train_model(sw, roidb, max_epochs=100): sw.solver.net.blobs['rois'].reshape(num_rois, 5, 1, 1) sw.solver.net.blobs['labels'].reshape(num_rois, 1, 1, 1) sw.solver.net.blobs['bbox_targets'] \ - .reshape(num_rois, 4 * conf.NUM_CLASSES, 1, 1) + .reshape(num_rois, bbox_targets_blob.shape[1], 1, 1) sw.solver.net.blobs['bbox_loss_weights'] \ - .reshape(num_rois, 4 * conf.NUM_CLASSES, 1, 1) + .reshape(num_rois, bbox_loss_weights_blob.shape[1], 1, 1) # Copy data into net's input blobs sw.solver.net.blobs['data'].data[...] = \ im_blob.astype(np.float32, copy=False) diff --git a/finetuning.py b/finetuning.py index 029195a..9036b38 100644 --- a/finetuning.py +++ b/finetuning.py @@ -5,6 +5,8 @@ import fast_rcnn_config as conf def get_minibatch(roidb): num_images = len(roidb) + # Infer number of classes from the number of columns in gt_overlaps + num_classes = roidb[0]['gt_overlaps'].shape[1] # Sample random scales to use for each image in this batch random_scale_inds = \ np.random.randint(0, high=len(conf.SCALES), size=num_images) @@ -20,7 +22,7 @@ def get_minibatch(roidb): # Now, build the region of interest and label blobs rois_blob = np.zeros((0, 5), dtype=np.float32) labels_blob = np.zeros((0), dtype=np.float32) - bbox_targets_blob = np.zeros((0, 4 * conf.NUM_CLASSES), dtype=np.float32) + bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) bbox_loss_weights_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) all_overlaps = [] for im_i in xrange(num_images): @@ -29,10 +31,6 @@ def get_minibatch(roidb): fg_rois_per_image, rois_per_image) feat_rois = _map_im_rois_to_feat_rois(im_rois, im_scale_factors[im_i]) - # Assert various bounds - assert((feat_rois[:, 2] >= feat_rois[:, 0]).all()) - assert((feat_rois[:, 3] >= feat_rois[:, 1]).all()) - assert((feat_rois >= 0).all()) rois_blob_this_image = \ np.append(im_i * np.ones((feat_rois.shape[0], 1)), feat_rois, axis=1) @@ -50,7 +48,7 @@ def _get_bbox_regression_labels(bbox_target_data): # Return (N, K * 4, 1, 1) blob of regression targets # Return (N, K * 4, 1, 1) blob of Euclidean loss weights clss = bbox_target_data[:, 0] - bbox_targets = np.zeros((clss.size, 4 * conf.NUM_CLASSES), dtype=np.float32) + bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32) inds = np.where(clss > 0)[0] for ind in inds: -- GitLab