From a578b574252b141e39002ebc8a9cba59f29e1cac Mon Sep 17 00:00:00 2001
From: Ross Girshick <ross.girshick@gmail.com>
Date: Wed, 4 Mar 2015 15:01:35 -0800
Subject: [PATCH] remove conf.NUM_CLASSES

---
 bbox_regression_targets.py | 17 +++++++----------
 fast_rcnn_config.py        | 15 +++++++++++----
 fast_rcnn_train.py         |  4 ++--
 finetuning.py              | 10 ++++------
 4 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/bbox_regression_targets.py b/bbox_regression_targets.py
index 6c0942a..bf2bb43 100644
--- a/bbox_regression_targets.py
+++ b/bbox_regression_targets.py
@@ -46,6 +46,8 @@ def _compute_targets(rois, overlaps, labels):
 
 def append_bbox_regression_targets(roidb):
     num_images = len(roidb)
+    # Infer number of classes from the number of columns in gt_overlaps
+    num_classes = roidb[0]['gt_overlaps'].shape[1]
     for im_i in xrange(num_images):
         rois = roidb[im_i]['boxes']
         max_overlaps = roidb[im_i]['max_overlaps']
@@ -55,12 +57,12 @@ def append_bbox_regression_targets(roidb):
 
     # Compute values needed for means and stds
     # var(x) = E(x^2) - E(x)^2
-    class_counts = np.zeros((conf.NUM_CLASSES, 1)) + conf.EPS
-    sums = np.zeros((conf.NUM_CLASSES, 4))
-    squared_sums = np.zeros((conf.NUM_CLASSES, 4))
+    class_counts = np.zeros((num_classes, 1)) + conf.EPS
+    sums = np.zeros((num_classes, 4))
+    squared_sums = np.zeros((num_classes, 4))
     for im_i in xrange(num_images):
         targets = roidb[im_i]['bbox_targets']
-        for cls in xrange(1, conf.NUM_CLASSES):
+        for cls in xrange(1, num_classes):
             cls_inds = np.where(targets[:, 0] == cls)[0]
             if cls_inds.size > 0:
                 class_counts[cls] += cls_inds.size
@@ -73,18 +75,13 @@ def append_bbox_regression_targets(roidb):
     # Normalize targets
     for im_i in xrange(num_images):
         targets = roidb[im_i]['bbox_targets']
-        for cls in xrange(1, conf.NUM_CLASSES):
+        for cls in xrange(1, num_classes):
             cls_inds = np.where(targets[:, 0] == cls)[0]
             roidb[im_i]['bbox_targets'][cls_inds, 1:] \
                     -= means[cls, :]
             roidb[im_i]['bbox_targets'][cls_inds, 1:] \
                     /= stds[cls, :]
 
-    # TODO(rbg) remove this when everything is in python
-    import scipy.io
-    scipy.io.savemat('../rcnn/data/voc_2007_means_stds.mat',
-                     {'means': means, 'stds': stds})
-
     # These values will be needed for making predictions
     # (the predicts will need to be unnormalized and uncentered)
     return means, stds
diff --git a/fast_rcnn_config.py b/fast_rcnn_config.py
index 0a7b144..c26dd4f 100644
--- a/fast_rcnn_config.py
+++ b/fast_rcnn_config.py
@@ -1,33 +1,40 @@
 import os
 import sys
+import numpy as np
 caffe_path = os.path.abspath(os.path.join('..', 'caffe', 'python'))
 sys.path.insert(0, caffe_path)
 
-import numpy as np
-
 # Scales used in the SPP-net paper
 # SCALES          = (480, 576, 688, 864, 1200)
 SCALES          = (480, 576, 600)
+
 # Max pixel size of a scaled input image
 # MAX_SIZE        = 2000
 MAX_SIZE        = 1000
+
 # Images per batch
 IMS_PER_BATCH   = 2 # 4
+
 # Minibatch size
 BATCH_SIZE      = 128 # 128
+
 # Fraction of minibatch that is foreground labeled (class > 0)
 FG_FRACTION     = 0.25
+
 # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 FG_THRESH       = 0.5
+
 # Overlap threshold for a ROI to be considered background (class = 0 if
 # overlap in [0.1, 0.5))
 BG_THRESH_HI    = 0.5
 BG_THRESH_LO    = 0.1
+
 # Pixel mean values (BGR order) as a (1, 1, 3) array
 PIXEL_MEANS     = np.array([[[102.9801, 115.9465, 122.7717]]])
-# Stride in input image pixels at ROI pooling level
+
+# Stride in input image pixels at ROI pooling level (network specific)
+# 16 is true for AlexNet and VGG-16
 FEAT_STRIDE     = 16
-NUM_CLASSES     = 21
 BBOX_THRESH     = 0.5
 EPS             = 1e-14
 SNAPSHOT_ITERS  = 10000
diff --git a/fast_rcnn_train.py b/fast_rcnn_train.py
index 14745c8..0847e0a 100755
--- a/fast_rcnn_train.py
+++ b/fast_rcnn_train.py
@@ -93,9 +93,9 @@ def train_model(sw, roidb, max_epochs=100):
             sw.solver.net.blobs['rois'].reshape(num_rois, 5, 1, 1)
             sw.solver.net.blobs['labels'].reshape(num_rois, 1, 1, 1)
             sw.solver.net.blobs['bbox_targets'] \
-                .reshape(num_rois, 4 * conf.NUM_CLASSES, 1, 1)
+                .reshape(num_rois, bbox_targets_blob.shape[1], 1, 1)
             sw.solver.net.blobs['bbox_loss_weights'] \
-                .reshape(num_rois, 4 * conf.NUM_CLASSES, 1, 1)
+                .reshape(num_rois, bbox_loss_weights_blob.shape[1], 1, 1)
             # Copy data into net's input blobs
             sw.solver.net.blobs['data'].data[...] = \
                 im_blob.astype(np.float32, copy=False)
diff --git a/finetuning.py b/finetuning.py
index 029195a..9036b38 100644
--- a/finetuning.py
+++ b/finetuning.py
@@ -5,6 +5,8 @@ import fast_rcnn_config as conf
 
 def get_minibatch(roidb):
     num_images = len(roidb)
+    # Infer number of classes from the number of columns in gt_overlaps
+    num_classes = roidb[0]['gt_overlaps'].shape[1]
     # Sample random scales to use for each image in this batch
     random_scale_inds = \
         np.random.randint(0, high=len(conf.SCALES), size=num_images)
@@ -20,7 +22,7 @@ def get_minibatch(roidb):
     # Now, build the region of interest and label blobs
     rois_blob = np.zeros((0, 5), dtype=np.float32)
     labels_blob = np.zeros((0), dtype=np.float32)
-    bbox_targets_blob = np.zeros((0, 4 * conf.NUM_CLASSES), dtype=np.float32)
+    bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
     bbox_loss_weights_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
     all_overlaps = []
     for im_i in xrange(num_images):
@@ -29,10 +31,6 @@ def get_minibatch(roidb):
                           fg_rois_per_image,
                           rois_per_image)
         feat_rois = _map_im_rois_to_feat_rois(im_rois, im_scale_factors[im_i])
-        # Assert various bounds
-        assert((feat_rois[:, 2] >= feat_rois[:, 0]).all())
-        assert((feat_rois[:, 3] >= feat_rois[:, 1]).all())
-        assert((feat_rois >= 0).all())
         rois_blob_this_image = \
             np.append(im_i * np.ones((feat_rois.shape[0], 1)), feat_rois,
                       axis=1)
@@ -50,7 +48,7 @@ def _get_bbox_regression_labels(bbox_target_data):
     # Return (N, K * 4, 1, 1) blob of regression targets
     # Return (N, K * 4, 1, 1) blob of Euclidean loss weights
     clss = bbox_target_data[:, 0]
-    bbox_targets = np.zeros((clss.size, 4 * conf.NUM_CLASSES), dtype=np.float32)
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
     bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
     inds = np.where(clss > 0)[0]
     for ind in inds:
-- 
GitLab