#!/usr/bin/env python import sys import subprocess caffe_path = '../caffe/python' sys.path.insert(0, caffe_path) import argparse from utils.timer import Timer import numpy as np import matplotlib.pyplot as plt import cv2 import caffe import fast_rcnn_config as conf import utils.cython_nms import datasets.pascal_voc import cPickle import heapq def _get_image_blob(im): im_pyra = [] im_orig = im.astype(np.float32, copy=True) im_orig -= conf.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) max_shape = (0, 0, 0) processed_ims = [] im_scale_factors = [] for target_size in conf.TEST_SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > conf.TEST_MAX_SIZE: im_scale = float(conf.TEST_MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) max_shape = np.maximum(max_shape, im.shape) num_images = len(processed_ims) blob = np.zeros((num_images, max_shape[0], max_shape[1], max_shape[2]), dtype=np.float32) for i in xrange(num_images): im = processed_ims[i] blob[i, 0:im.shape[0], 0:im.shape[1], :] = im # Move channels (axis 3) to axis 1 # Axis order will become: (batch elem, channel, height, width) channel_swap = (0, 3, 1, 2) blob = blob.transpose(channel_swap) return blob, np.array(im_scale_factors) def _get_rois_blob(im_rois, im_scale_factors): feat_rois, levels = _map_im_rois_to_feat_rois(im_rois, im_scale_factors) rois_blob = np.hstack((levels, feat_rois))[:, :, np.newaxis, np.newaxis] return rois_blob.astype(np.float32, copy=False) def _map_im_rois_to_feat_rois(im_rois, scales): im_rois = im_rois.astype(np.float, copy=False) widths = im_rois[:, 2] - im_rois[:, 0] + 1 heights = im_rois[:, 3] - im_rois[:, 1] + 1 areas = widths * heights scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2) # TODO(rbg): 227 or 224? or blah/.... diff_areas = np.abs(scaled_areas - 227 * 227) levels = diff_areas.argmin(axis=1)[:, np.newaxis] feat_rois = np.round(im_rois * scales[levels] / conf.FEAT_STRIDE) return feat_rois, levels def _get_blobs(im, rois): blobs = {'data' : None, 'rois' : None} blobs['data'], im_scale_factors = _get_image_blob(im) blobs['rois'] = _get_rois_blob(rois, im_scale_factors) return blobs, im_scale_factors def _bbox_pred(boxes, box_deltas): if boxes.shape[0] == 0: return np.zeros((0, box_deltas.shape[1])) boxes = boxes.astype(np.float, copy=False) widths = boxes[:, 2] - boxes[:, 0] + conf.EPS heights = boxes[:, 3] - boxes[:, 1] + conf.EPS ctr_x = boxes[:, 0] + 0.5 * widths ctr_y = boxes[:, 1] + 0.5 * heights dx = box_deltas[:, 0::4] dy = box_deltas[:, 1::4] dw = box_deltas[:, 2::4] dh = box_deltas[:, 3::4] pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] pred_w = np.exp(dw) * widths[:, np.newaxis] pred_h = np.exp(dh) * heights[:, np.newaxis] pred_boxes = np.zeros(box_deltas.shape) # x1 pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # y1 pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # x2 pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # y2 pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h return pred_boxes def _clip_boxes(boxes, im_shape): # x1 >= 0 boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0) # y1 >= 0 boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0) # x2 < im_shape[1] boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1) # y2 < im_shape[0] boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1) return boxes def im_detect(net, im, boxes): # TODO: remove duplicates blobs, im_scale_factors = _get_blobs(im, boxes) # v = np.array([1, 1e3, 1e6, 1e9, 1e12]) # hashes = blobs['rois'][:, :, 0, 0].dot(v.T) hashes = (blobs['rois'][:, :, 0, 0] * np.array([[1, 1e3, 1e6, 1e9, 1e12]])).sum(axis=1) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :, :, :] boxes = boxes[index, :] # reshape network inputs base_shape = blobs['data'].shape num_rois = blobs['rois'].shape[0] net.blobs['data'].reshape(base_shape[0], base_shape[1], base_shape[2], base_shape[3]) net.blobs['rois'].reshape(num_rois, 5, 1, 1) blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False), rois=blobs['rois'].astype(np.float32, copy=False)) scores = blobs_out['fc8_pascal'][:, :, 0, 0] # Return scores as fg - bg scores = scores - scores[:, 0][:, np.newaxis] box_deltas = blobs_out['fc8_pascal_bbox'][:, :, 0, 0] pred_boxes = _bbox_pred(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] # TODO(rbg): try variant where we predict boxes and then score those # Need to compute all cls_rois and then deduplicate # for i in xrange(1, scores.shape[1]): # cls_rois_blob = _get_rois_blob(pred_boxes[:, i*4:(i+1)*4], # im_scale_factors) # t = Timer() # t.tic() # blobs_out = net.forward(data=blobs['data'].astype(np.float32, # copy=False), # rois=cls_rois_blob.astype(np.float32, # copy=False), # start='roi_pool5') # print t.toc() # cls_scores = blobs_out['fc8_pascal'][:, :, 0, 0] # scores[:, i] = cls_scores[:, i] - cls_scores[:, 0] return scores, pred_boxes def _vis_detections(im, class_name, dets): im = im[:, :, (2, 1, 0)] for i in xrange(np.minimum(10, dets.shape[0])): bbox = dets[i, :4] score = dets[i, -1] if score > 0: plt.cla() plt.imshow(im) plt.gca().add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=3) ) plt.title('{} {:.3f}'.format(class_name, score)) plt.pause(0.5) def _write_voc_results_file(imdb, all_boxes): pid = os.getpid() #/data/VOC2007/VOCdevkit/results/VOC2007/Main/comp4-44503_det_test_aeroplane.txt base_path = './datasets/VOCdevkit2007/results/VOC2007/Main/comp4-{}_'.format(pid) for cls_ind, cls in enumerate(imdb.classes): if cls == '__background__': continue file_name = base_path + 'det_test_' + cls + '.txt' with open(file_name, 'wt') as f: for im_ind, index in enumerate(imdb.image_index): dets = all_boxes[cls_ind][im_ind] if dets == []: continue keep = utils.cython_nms.nms(dets, 0.3) if len(keep) == 0: continue dets = dets[keep, :] # the VOCdevkit expects 1-based indices dets[:, :4] += 1 for k in xrange(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( index, dets[k, -1], dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3])) print 'Evaluate comp4-{}'.format(pid) return pid def _do_matlab_eval(pid): cmd = 'cd ../rcnn;' cmd += 'matlab -nodisplay -nodesktop ' cmd += '-r "load imdb/cache/imdb_voc_2007_test.mat; ' cmd += 'imdb_eval_voc_py(imdb, {});"'.format(pid) status = subprocess.call(cmd), shell=True) def fast_rcnn_test(net, imdb): num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = utils.cython_nms.nms(all_boxes[j][i], 0.3) _vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] with open('dets.pkl', 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) pid = _write_voc_results_file(imdb, all_boxes) _do_matlab_eval(pid) # Write results file and call matlab to evaluate if __name__ == '__main__': prototxt = 'model-defs/vgg16_pyramid_forward_only_bbox_reg.prototxt' caffemodel = '/home/rbg/working/pyramid-rcnn/fast-rcnn/snapshots/vgg16_finetune_all_joint_bbox_reg_smoothL1_roidb2_iter_40000.caffemodel' caffe.set_phase_test() caffe.set_mode_gpu() GPU_ID = 2 if GPU_ID is not None: caffe.set_device(GPU_ID) net = caffe.Net(prototxt, caffemodel) import datasets.pascal_voc imdb = datasets.pascal_voc('test', '2007') fast_rcnn_test(net, imdb)