From ef0fa051591923e237ebdd860f59dd72f626c6e4 Mon Sep 17 00:00:00 2001
From: chenyuntc <chenyuntc@163.com>
Date: Thu, 21 Dec 2017 22:13:09 +0800
Subject: [PATCH] for caffe

---
 data/dataset.py            | 20 +++++++-------------
 model/faster_rcnn_vgg16.py |  3 ++-
 model/utils/rpn_tools.py   |  2 --
 train.py                   | 34 +++++++++++++++++-----------------
 4 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/data/dataset.py b/data/dataset.py
index dcd2855..59236b1 100644
--- a/data/dataset.py
+++ b/data/dataset.py
@@ -4,6 +4,7 @@ from skimage import transform as sktsf
 from torchvision import transforms as tvtsf
 from . import util
 from util import array_tool as at
+ 
 
 
 def preprocess(img, min_size=600, max_size=1000):
@@ -32,20 +33,13 @@ def preprocess(img, min_size=600, max_size=1000):
     scale = min(scale1, scale2)
     # both the longer and shorter should be less than
     # max_size and min_size
-    img = img / 256.
+    img = img / 255.
+    img = img[[2,1,0],:,:] #RGB-BGR
     img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect')
-    normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406],
-                                std=[0.229, 0.224, 0.225])
-
-    img = normalize(t.from_numpy(img))
-    return img.numpy()
-    # unNOTE: original implementation in chainer:
-    # mean=np.array([122.7717, 115.9465, 102.9801],
-    # img = (img - self.mean).astype(np.float32, copy=False)
-    # Answer: https://github.com/pytorch/vision/issues/223
-    # the input of vgg16 in pytorch:
-    # rgb 0 to 1, instead of bgr 0 to 255
-
+    img = img*255
+    mean=np.array([122.7717, 115.9465, 102.9801]).reshape(3,1,1)
+    img = (img - mean).astype(np.float32, copy=True)
+    return img
 
 class Transform(object):
 
diff --git a/model/faster_rcnn_vgg16.py b/model/faster_rcnn_vgg16.py
index 8695c70..07755fa 100644
--- a/model/faster_rcnn_vgg16.py
+++ b/model/faster_rcnn_vgg16.py
@@ -12,7 +12,8 @@ from config import opt
 
 def decom_vgg16(pretrained=True):
     # the 30th layer of features is relu of conv5_3
-    model = vgg16(pretrained)
+    model = vgg16(pretrained=False)
+    model.load_state_dict('/home/a/code/pytorch/faster-rcnn/pytorch-faster-rcnn/data/imagenet_weights/vgg16.pth')
     features = list(model.features)[:30]
     classifier = model.classifier
 
diff --git a/model/utils/rpn_tools.py b/model/utils/rpn_tools.py
index 54d82d1..24772f1 100644
--- a/model/utils/rpn_tools.py
+++ b/model/utils/rpn_tools.py
@@ -54,7 +54,6 @@ class ProposalCreator():
                  n_train_post_nms=2000,
                  n_test_pre_nms=6000,
                  n_test_post_nms=300,
-                 force_cpu_nms=False,
                  min_size=16
                  ):
         self.parent_model = parent_model
@@ -63,7 +62,6 @@ class ProposalCreator():
         self.n_train_post_nms = n_train_post_nms
         self.n_test_pre_nms = n_test_pre_nms
         self.n_test_post_nms = n_test_post_nms
-        self.force_cpu_nms = force_cpu_nms
         self.min_size = min_size
 
     def __call__(self, loc, score,
diff --git a/train.py b/train.py
index 961923c..3882af0 100644
--- a/train.py
+++ b/train.py
@@ -62,7 +62,7 @@ def train(**kwargs):
         trainer.load(opt.load_path)
         print('load pretrained model from %s' % opt.load_path)
 
-    trainer.optimizer = trainer.faster_rcnn.get_great_optimizer()
+    # trainer.optimizer = trainer.faster_rcnn.get_great_optimizer()
     trainer.vis.text(dataset.db.label_names, win='labels')
     best_map = 0
     for epoch in range(opt.epoch):
@@ -80,20 +80,20 @@ def train(**kwargs):
                 # plot loss
                 trainer.vis.plot_many(trainer.get_meter_data())
 
-                # plot groud truth bboxes
-                ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
-                gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], 
-                                    at.tonumpy(bbox_)[0], 
-                                    label_[0].numpy())
-                trainer.vis.img('gt_img', gt_img)
-
-                # plot predicti bboxes
-                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
-                pred_img = visdom_bbox( at.tonumpy(ori_img[0]), 
-                                        at.tonumpy(_bboxes[0]),
-                                        at.tonumpy(_labels[0]).reshape(-1), 
-                                        at.tonumpy(_scores[0]))
-                trainer.vis.img('pred_img', pred_img)
+                # # plot groud truth bboxes
+                # ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
+                # gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], 
+                #                     at.tonumpy(bbox_)[0], 
+                #                     label_[0].numpy())
+                # trainer.vis.img('gt_img', gt_img)
+
+                # # plot predicti bboxes
+                # _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
+                # pred_img = visdom_bbox( at.tonumpy(ori_img[0]), 
+                #                         at.tonumpy(_bboxes[0]),
+                #                         at.tonumpy(_labels[0]).reshape(-1), 
+                #                         at.tonumpy(_scores[0]))
+                # trainer.vis.img('pred_img', pred_img)
 
                 # rpn confusion matrix(meter)
                 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
@@ -110,8 +110,8 @@ def train(**kwargs):
         if epoch==8:
             trainer.load(best_path)
             trainer.faster_rcnn.scale_lr(opt.lr_decay)
-        if epoch ==0:
-            trainer.optimizer = trainer.faster_rcnn.get_optimizer()
+        # if epoch ==0:
+        #     trainer.optimizer = trainer.faster_rcnn.get_optimizer()
 
         trainer.vis.plot('test_map', eval_result['map'])
         lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
-- 
GitLab