diff --git a/data/dataset.py b/data/dataset.py index dcd285536024b2748fff6f29fb11e7616cb63367..59236b1a3eb7304ec801fbdb19a5f973d6c9443c 100644 --- a/data/dataset.py +++ b/data/dataset.py @@ -4,6 +4,7 @@ from skimage import transform as sktsf from torchvision import transforms as tvtsf from . import util from util import array_tool as at + def preprocess(img, min_size=600, max_size=1000): @@ -32,20 +33,13 @@ def preprocess(img, min_size=600, max_size=1000): scale = min(scale1, scale2) # both the longer and shorter should be less than # max_size and min_size - img = img / 256. + img = img / 255. + img = img[[2,1,0],:,:] #RGB-BGR img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect') - normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - img = normalize(t.from_numpy(img)) - return img.numpy() - # unNOTE: original implementation in chainer: - # mean=np.array([122.7717, 115.9465, 102.9801], - # img = (img - self.mean).astype(np.float32, copy=False) - # Answer: https://github.com/pytorch/vision/issues/223 - # the input of vgg16 in pytorch: - # rgb 0 to 1, instead of bgr 0 to 255 - + img = img*255 + mean=np.array([122.7717, 115.9465, 102.9801]).reshape(3,1,1) + img = (img - mean).astype(np.float32, copy=True) + return img class Transform(object): diff --git a/model/faster_rcnn_vgg16.py b/model/faster_rcnn_vgg16.py index 8695c709dfb88089267091237227e012b9e4063c..07755fa688ac8f84793be55fdbbf491b04ebc01a 100644 --- a/model/faster_rcnn_vgg16.py +++ b/model/faster_rcnn_vgg16.py @@ -12,7 +12,8 @@ from config import opt def decom_vgg16(pretrained=True): # the 30th layer of features is relu of conv5_3 - model = vgg16(pretrained) + model = vgg16(pretrained=False) + model.load_state_dict('/home/a/code/pytorch/faster-rcnn/pytorch-faster-rcnn/data/imagenet_weights/vgg16.pth') features = list(model.features)[:30] classifier = model.classifier diff --git a/model/utils/rpn_tools.py b/model/utils/rpn_tools.py index 54d82d1390ac76963cfc6debe88fbcc19683ca23..24772f153b2cb9e12595af5cfe83f51474c6fdc0 100644 --- a/model/utils/rpn_tools.py +++ b/model/utils/rpn_tools.py @@ -54,7 +54,6 @@ class ProposalCreator(): n_train_post_nms=2000, n_test_pre_nms=6000, n_test_post_nms=300, - force_cpu_nms=False, min_size=16 ): self.parent_model = parent_model @@ -63,7 +62,6 @@ class ProposalCreator(): self.n_train_post_nms = n_train_post_nms self.n_test_pre_nms = n_test_pre_nms self.n_test_post_nms = n_test_post_nms - self.force_cpu_nms = force_cpu_nms self.min_size = min_size def __call__(self, loc, score, diff --git a/train.py b/train.py index 961923c911c30455d9548fbf8f58e214c77d5a3f..3882af007176941ef500f321dcdeffbe5c78188e 100644 --- a/train.py +++ b/train.py @@ -62,7 +62,7 @@ def train(**kwargs): trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) - trainer.optimizer = trainer.faster_rcnn.get_great_optimizer() + # trainer.optimizer = trainer.faster_rcnn.get_great_optimizer() trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(opt.epoch): @@ -80,20 +80,20 @@ def train(**kwargs): # plot loss trainer.vis.plot_many(trainer.get_meter_data()) - # plot groud truth bboxes - ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 - gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], - at.tonumpy(bbox_)[0], - label_[0].numpy()) - trainer.vis.img('gt_img', gt_img) - - # plot predicti bboxes - _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True) - pred_img = visdom_bbox( at.tonumpy(ori_img[0]), - at.tonumpy(_bboxes[0]), - at.tonumpy(_labels[0]).reshape(-1), - at.tonumpy(_scores[0])) - trainer.vis.img('pred_img', pred_img) + # # plot groud truth bboxes + # ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 + # gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], + # at.tonumpy(bbox_)[0], + # label_[0].numpy()) + # trainer.vis.img('gt_img', gt_img) + + # # plot predicti bboxes + # _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True) + # pred_img = visdom_bbox( at.tonumpy(ori_img[0]), + # at.tonumpy(_bboxes[0]), + # at.tonumpy(_labels[0]).reshape(-1), + # at.tonumpy(_scores[0])) + # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') @@ -110,8 +110,8 @@ def train(**kwargs): if epoch==8: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) - if epoch ==0: - trainer.optimizer = trainer.faster_rcnn.get_optimizer() + # if epoch ==0: + # trainer.optimizer = trainer.faster_rcnn.get_optimizer() trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']