remove conf.NUM_CLASSES

a578b574 · Ross Girshick · c1189b8b · a578b574 · a578b574 · a578b574
Showing with 24 addition and 22 deletion

bbox_regression_targets.py bbox_regression_targets.py +7 -10

fast_rcnn_config.py fast_rcnn_config.py +11 -4

fast_rcnn_train.py fast_rcnn_train.py +2 -2

finetuning.py finetuning.py +4 -6

未找到文件。
--- a/bbox_regression_targets.py
+++ b/bbox_regression_targets.py
@@ -46,6 +46,8 @@ def _compute_targets(rois, overlaps, labels):

 def append_bbox_regression_targets(roidb):
    num_images = len(roidb)
+    # Infer number of classes from the number of columns in gt_overlaps
+    num_classes = roidb[0]['gt_overlaps'].shape[1]
    for im_i in xrange(num_images):
        rois = roidb[im_i]['boxes']
        max_overlaps = roidb[im_i]['max_overlaps']
@@ -55,12 +57,12 @@ def append_bbox_regression_targets(roidb):

    # Compute values needed for means and stds
    # var(x) = E(x^2) - E(x)^2
-    class_counts = np.zeros((conf.NUM_CLASSES, 1)) + conf.EPS
-    sums = np.zeros((conf.NUM_CLASSES, 4))
-    squared_sums = np.zeros((conf.NUM_CLASSES, 4))
+    class_counts = np.zeros((num_classes, 1)) + conf.EPS
+    sums = np.zeros((num_classes, 4))
+    squared_sums = np.zeros((num_classes, 4))
    for im_i in xrange(num_images):
        targets = roidb[im_i]['bbox_targets']
-        for cls in xrange(1, conf.NUM_CLASSES):
+        for cls in xrange(1, num_classes):
            cls_inds = np.where(targets[:, 0] == cls)[0]
            if cls_inds.size > 0:
                class_counts[cls] += cls_inds.size
@@ -73,18 +75,13 @@ def append_bbox_regression_targets(roidb):
    # Normalize targets
    for im_i in xrange(num_images):
        targets = roidb[im_i]['bbox_targets']
-        for cls in xrange(1, conf.NUM_CLASSES):
+        for cls in xrange(1, num_classes):
            cls_inds = np.where(targets[:, 0] == cls)[0]
            roidb[im_i]['bbox_targets'][cls_inds, 1:] \
                    -= means[cls, :]
            roidb[im_i]['bbox_targets'][cls_inds, 1:] \
                    /= stds[cls, :]

-    # TODO(rbg) remove this when everything is in python
-    import scipy.io
-    scipy.io.savemat('../rcnn/data/voc_2007_means_stds.mat',
-                     {'means': means, 'stds': stds})
-
    # These values will be needed for making predictions
    # (the predicts will need to be unnormalized and uncentered)
    return means, stds
--- a/fast_rcnn_config.py
+++ b/fast_rcnn_config.py
 import os
 import sys
+import numpy as np
 caffe_path = os.path.abspath(os.path.join('..', 'caffe', 'python'))
 sys.path.insert(0, caffe_path)

-import numpy as np
-
 # Scales used in the SPP-net paper
 # SCALES          = (480, 576, 688, 864, 1200)
 SCALES          = (480, 576, 600)
+
 # Max pixel size of a scaled input image
 # MAX_SIZE        = 2000
 MAX_SIZE        = 1000
+
 # Images per batch
 IMS_PER_BATCH   = 2 # 4
+
 # Minibatch size
 BATCH_SIZE      = 128 # 128
+
 # Fraction of minibatch that is foreground labeled (class > 0)
 FG_FRACTION     = 0.25
+
 # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 FG_THRESH       = 0.5
+
 # Overlap threshold for a ROI to be considered background (class = 0 if
 # overlap in [0.1, 0.5))
 BG_THRESH_HI    = 0.5
 BG_THRESH_LO    = 0.1
+
 # Pixel mean values (BGR order) as a (1, 1, 3) array
 PIXEL_MEANS     = np.array([[[102.9801, 115.9465, 122.7717]]])
-# Stride in input image pixels at ROI pooling level
+
+# Stride in input image pixels at ROI pooling level (network specific)
+# 16 is true for AlexNet and VGG-16
 FEAT_STRIDE     = 16
-NUM_CLASSES     = 21
 BBOX_THRESH     = 0.5
 EPS             = 1e-14
 SNAPSHOT_ITERS  = 10000

--- a/fast_rcnn_train.py
+++ b/fast_rcnn_train.py
@@ -93,9 +93,9 @@ def train_model(sw, roidb, max_epochs=100):
            sw.solver.net.blobs['rois'].reshape(num_rois, 5, 1, 1)
            sw.solver.net.blobs['labels'].reshape(num_rois, 1, 1, 1)
            sw.solver.net.blobs['bbox_targets'] \
-                .reshape(num_rois, 4 * conf.NUM_CLASSES, 1, 1)
+                .reshape(num_rois, bbox_targets_blob.shape[1], 1, 1)
            sw.solver.net.blobs['bbox_loss_weights'] \
-                .reshape(num_rois, 4 * conf.NUM_CLASSES, 1, 1)
+                .reshape(num_rois, bbox_loss_weights_blob.shape[1], 1, 1)
            # Copy data into net's input blobs
            sw.solver.net.blobs['data'].data[...] = \
                im_blob.astype(np.float32, copy=False)

--- a/finetuning.py
+++ b/finetuning.py
@@ -5,6 +5,8 @@ import fast_rcnn_config as conf

 def get_minibatch(roidb):
    num_images = len(roidb)
+    # Infer number of classes from the number of columns in gt_overlaps
+    num_classes = roidb[0]['gt_overlaps'].shape[1]
    # Sample random scales to use for each image in this batch
    random_scale_inds = \
        np.random.randint(0, high=len(conf.SCALES), size=num_images)
@@ -20,7 +22,7 @@ def get_minibatch(roidb):
    # Now, build the region of interest and label blobs
    rois_blob = np.zeros((0, 5), dtype=np.float32)
    labels_blob = np.zeros((0), dtype=np.float32)
-    bbox_targets_blob = np.zeros((0, 4 * conf.NUM_CLASSES), dtype=np.float32)
+    bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
    bbox_loss_weights_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
    all_overlaps = []
    for im_i in xrange(num_images):
@@ -29,10 +31,6 @@ def get_minibatch(roidb):
                          fg_rois_per_image,
                          rois_per_image)
        feat_rois = _map_im_rois_to_feat_rois(im_rois, im_scale_factors[im_i])
-        # Assert various bounds
-        assert((feat_rois[:, 2] >= feat_rois[:, 0]).all())
-        assert((feat_rois[:, 3] >= feat_rois[:, 1]).all())
-        assert((feat_rois >= 0).all())
        rois_blob_this_image = \
            np.append(im_i * np.ones((feat_rois.shape[0], 1)), feat_rois,
                      axis=1)
@@ -50,7 +48,7 @@ def _get_bbox_regression_labels(bbox_target_data):
    # Return (N, K * 4, 1, 1) blob of regression targets
    # Return (N, K * 4, 1, 1) blob of Euclidean loss weights
    clss = bbox_target_data[:, 0]
-    bbox_targets = np.zeros((clss.size, 4 * conf.NUM_CLASSES), dtype=np.float32)
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
    bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
    inds = np.where(clss > 0)[0]
    for ind in inds: