From ef0fa051591923e237ebdd860f59dd72f626c6e4 Mon Sep 17 00:00:00 2001 From: chenyuntc Date: Thu, 21 Dec 2017 22:13:09 +0800 Subject: [PATCH] for caffe --- data/dataset.py | 20 +++++++------------- model/faster_rcnn_vgg16.py | 3 ++- model/utils/rpn_tools.py | 2 -- train.py | 34 +++++++++++++++++----------------- 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/data/dataset.py b/data/dataset.py index dcd2855..59236b1 100644 --- a/data/dataset.py +++ b/data/dataset.py @@ -4,6 +4,7 @@ from skimage import transform as sktsf from torchvision import transforms as tvtsf from . import util from util import array_tool as at + def preprocess(img, min_size=600, max_size=1000): @@ -32,20 +33,13 @@ def preprocess(img, min_size=600, max_size=1000): scale = min(scale1, scale2) # both the longer and shorter should be less than # max_size and min_size - img = img / 256. + img = img / 255. + img = img[[2,1,0],:,:] #RGB-BGR img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect') - normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - img = normalize(t.from_numpy(img)) - return img.numpy() - # unNOTE: original implementation in chainer: - # mean=np.array([122.7717, 115.9465, 102.9801], - # img = (img - self.mean).astype(np.float32, copy=False) - # Answer: https://github.com/pytorch/vision/issues/223 - # the input of vgg16 in pytorch: - # rgb 0 to 1, instead of bgr 0 to 255 - + img = img*255 + mean=np.array([122.7717, 115.9465, 102.9801]).reshape(3,1,1) + img = (img - mean).astype(np.float32, copy=True) + return img class Transform(object): diff --git a/model/faster_rcnn_vgg16.py b/model/faster_rcnn_vgg16.py index 8695c70..07755fa 100644 --- a/model/faster_rcnn_vgg16.py +++ b/model/faster_rcnn_vgg16.py @@ -12,7 +12,8 @@ from config import opt def decom_vgg16(pretrained=True): # the 30th layer of features is relu of conv5_3 - model = vgg16(pretrained) + model = vgg16(pretrained=False) + model.load_state_dict('/home/a/code/pytorch/faster-rcnn/pytorch-faster-rcnn/data/imagenet_weights/vgg16.pth') features = list(model.features)[:30] classifier = model.classifier diff --git a/model/utils/rpn_tools.py b/model/utils/rpn_tools.py index 54d82d1..24772f1 100644 --- a/model/utils/rpn_tools.py +++ b/model/utils/rpn_tools.py @@ -54,7 +54,6 @@ class ProposalCreator(): n_train_post_nms=2000, n_test_pre_nms=6000, n_test_post_nms=300, - force_cpu_nms=False, min_size=16 ): self.parent_model = parent_model @@ -63,7 +62,6 @@ class ProposalCreator(): self.n_train_post_nms = n_train_post_nms self.n_test_pre_nms = n_test_pre_nms self.n_test_post_nms = n_test_post_nms - self.force_cpu_nms = force_cpu_nms self.min_size = min_size def __call__(self, loc, score, diff --git a/train.py b/train.py index 961923c..3882af0 100644 --- a/train.py +++ b/train.py @@ -62,7 +62,7 @@ def train(**kwargs): trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) - trainer.optimizer = trainer.faster_rcnn.get_great_optimizer() + # trainer.optimizer = trainer.faster_rcnn.get_great_optimizer() trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(opt.epoch): @@ -80,20 +80,20 @@ def train(**kwargs): # plot loss trainer.vis.plot_many(trainer.get_meter_data()) - # plot groud truth bboxes - ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 - gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], - at.tonumpy(bbox_)[0], - label_[0].numpy()) - trainer.vis.img('gt_img', gt_img) - - # plot predicti bboxes - _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True) - pred_img = visdom_bbox( at.tonumpy(ori_img[0]), - at.tonumpy(_bboxes[0]), - at.tonumpy(_labels[0]).reshape(-1), - at.tonumpy(_scores[0])) - trainer.vis.img('pred_img', pred_img) + # # plot groud truth bboxes + # ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 + # gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], + # at.tonumpy(bbox_)[0], + # label_[0].numpy()) + # trainer.vis.img('gt_img', gt_img) + + # # plot predicti bboxes + # _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True) + # pred_img = visdom_bbox( at.tonumpy(ori_img[0]), + # at.tonumpy(_bboxes[0]), + # at.tonumpy(_labels[0]).reshape(-1), + # at.tonumpy(_scores[0])) + # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') @@ -110,8 +110,8 @@ def train(**kwargs): if epoch==8: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) - if epoch ==0: - trainer.optimizer = trainer.faster_rcnn.get_optimizer() + # if epoch ==0: + # trainer.optimizer = trainer.faster_rcnn.get_optimizer() trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] -- GitLab