提交 caaa7fdd 编写于 作者: C chenyuntc

refactor code2

上级 0f7d507e
......@@ -5,15 +5,15 @@ from torchvision import transforms as tvtsf
from . import util
import numpy as np
from config import opt
from util import array_tool as at
def inverse_normalize(img):
if opt.caffe_pretrain:
img = img + (np.array([122.7717, 115.9465, 102.9801]).reshape(3,1,1))
img = img + (np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1))
return img[::-1, :, :]
# approximate un-normalize for visualize
return (img*0.225+0.45).clip(min=0,max=1)*255
return (img * 0.225 + 0.45).clip(min=0, max=1) * 255
def pytorch_normalze(img):
"""
......@@ -21,25 +21,28 @@ def pytorch_normalze(img):
return appr -1~1 RGB
"""
normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
std=[0.229, 0.224, 0.225])
img = normalize(t.from_numpy(img))
return img.numpy()
def caffe_normalize(img):
"""
return appr -125-125 BGR
"""
img = img[[2,1,0],:,:] #RGB-BGR
img = img*255
mean=np.array([122.7717, 115.9465, 102.9801]).reshape(3,1,1)
img = img[[2, 1, 0], :, :] # RGB-BGR
img = img * 255
mean = np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1)
img = (img - mean).astype(np.float32, copy=True)
return img
def preprocess(img, min_size=600, max_size=1000):
"""Preprocess an image for feature extraction.
The length of the shorter edge is scaled to :obj:`self.min_size`.
After the scaling, if the length of the longer edge is longer than
:param min_size:
:obj:`self.max_size`, the image is scaled to fit the longer edge
to :obj:`self.max_size`.
......@@ -49,6 +52,8 @@ def preprocess(img, min_size=600, max_size=1000):
Args:
img (~numpy.ndarray): An image. This is in CHW and RGB format.
The range of its value is :math:`[0, 255]`.
(~numpy.ndarray): An image. This is in CHW and RGB format.
The range of its value is :math:`[0, 255]`.
Returns:
~numpy.ndarray:
......@@ -69,6 +74,7 @@ def preprocess(img, min_size=600, max_size=1000):
normalize = pytorch_normalze
return normalize(img)
class Transform(object):
def __init__(self, min_size=600, max_size=1000):
......@@ -92,7 +98,7 @@ class Transform(object):
return img, bbox, label, scale
class Dataset():
class Dataset:
def __init__(self, opt):
self.opt = opt
self.db = VOCBboxDataset(opt.voc_data_dir)
......@@ -110,16 +116,15 @@ class Dataset():
return len(self.db)
class TestDataset():
def __init__(self, opt,split='test',use_difficult=True):
class TestDataset:
def __init__(self, opt, split='test', use_difficult=True):
self.opt = opt
self.db = testset = VOCBboxDataset(opt.voc_data_dir, split=split, use_difficult=use_difficult)
self.db = VOCBboxDataset(opt.voc_data_dir, split=split, use_difficult=use_difficult)
def __getitem__(self, idx):
ori_img, bbox, label, difficult = self.db.get_example(idx)
img = preprocess(ori_img)
return (img), ori_img.shape[1:], bbox, label, difficult
return img, ori_img.shape[1:], bbox, label, difficult
def __len__(self):
return len(self.db)
......@@ -18,14 +18,15 @@
# https://github.com/rbgirshick/py-faster-rcnn
# ----------------------------------------------------
import numpy as np
import os
import warnings
import xml.etree.ElementTree as ET
import numpy as np
from .util import read_image
class VOCBboxDataset():
class VOCBboxDataset:
"""Bounding box dataset for PASCAL `VOC`_.
.. _`VOC`: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/
......
......@@ -108,7 +108,6 @@ def test_roi_module():
rois = t.autograd.Variable(bottom_rois)
output = module(x, rois)
output.sum().backward()
grad_x = x.grad.cpu().data.numpy()
def t2c(variable):
npa = variable.data.cpu().numpy()
......@@ -130,4 +129,3 @@ def test_roi_module():
F.sum(o_cn).backward()
test_eq(x.grad, x_cn.grad, 'backward')
print('test pass')
......@@ -44,7 +44,6 @@ class RegionProposalNetwork(nn.Module):
def __init__(
self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2],
anchor_scales=[8, 16, 32], feat_stride=16,
initialW=None,
proposal_creator_params=dict(),
):
super(RegionProposalNetwork, self).__init__()
......
......@@ -288,7 +288,7 @@ def _get_inside_index(anchor, H, W):
return index_inside
class ProposalCreator():
class ProposalCreator:
# unNOTE: I'll make it undifferential
# unTODO: make sure it's ok
# It's ok
......@@ -415,7 +415,6 @@ class ProposalCreator():
if n_pre_nms > 0:
order = order[:n_pre_nms]
roi = roi[order, :]
score = score[order]
# Apply nms (e.g. threshold = 0.7).
# Take after_nms_topN (e.g. 300).
......
......@@ -5,7 +5,7 @@ import matplotlib
from tqdm import tqdm
from config import opt
from data.dataset import Dataset, TestDataset,inverse_normalize
from data.dataset import Dataset, TestDataset, inverse_normalize
from model import FasterRCNNVGG16
from torch.autograd import Variable
from torch.utils import data as data_
......@@ -17,11 +17,13 @@ from util.eval_tool import eval_detection_voc
# fix for ulimit
# https://github.com/pytorch/pytorch/issues/973#issuecomment-346405667
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (20480, rlimit[1]))
matplotlib.use('agg')
def eval(dataloader, faster_rcnn, test_num=10000):
pred_bboxes, pred_labels, pred_scores = list(), list(), list()
gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
......@@ -86,17 +88,17 @@ def train(**kwargs):
# plot groud truth bboxes
ori_img_ = inverse_normalize(at.tonumpy(img[0]))
gt_img = visdom_bbox(ori_img_,
at.tonumpy(bbox_[0]),
at.tonumpy(label_[0]))
gt_img = visdom_bbox(ori_img_,
at.tonumpy(bbox_[0]),
at.tonumpy(label_[0]))
trainer.vis.img('gt_img', gt_img)
# plot predicti bboxes
_bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_],visualize=True)
pred_img = visdom_bbox( ori_img_,
at.tonumpy(_bboxes[0]),
at.tonumpy(_labels[0]).reshape(-1),
at.tonumpy(_scores[0]))
_bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
pred_img = visdom_bbox(ori_img_,
at.tonumpy(_bboxes[0]),
at.tonumpy(_labels[0]).reshape(-1),
at.tonumpy(_scores[0]))
trainer.vis.img('pred_img', pred_img)
# rpn confusion matrix(meter)
......@@ -108,19 +110,21 @@ def train(**kwargs):
if eval_result['map'] > best_map:
best_map = eval_result['map']
best_path = trainer.save(best_map=best_map)
if epoch==9:
if epoch == 9:
trainer.load(best_path)
trainer.faster_rcnn.scale_lr(opt.lr_decay)
trainer.vis.plot('test_map', eval_result['map'])
lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
str(eval_result['map']),
str(trainer.get_meter_data()))
str(eval_result['map']),
str(trainer.get_meter_data()))
trainer.vis.log(log_info)
if epoch == 13: break
if epoch == 13:
break
if __name__ == '__main__':
import fire
fire.Fire()
......@@ -7,7 +7,7 @@ from torch import nn
import torch as t
from torch.autograd import Variable
from util import array_tool as at
from util.vis_tool import Visualizer
from util.vis_tool import Visualizer
from config import opt
from torchnet.meter import ConfusionMeter, AverageValueMeter
......@@ -104,7 +104,7 @@ class FasterRCNNTrainer(nn.Module):
rpn_score = rpn_scores[0]
rpn_loc = rpn_locs[0]
roi = rois
# Sample RoIs and forward
# it's fine to break the computation graph of rois,
# consider them as constant input
......@@ -114,7 +114,6 @@ class FasterRCNNTrainer(nn.Module):
at.tonumpy(label),
self.loc_normalize_mean,
self.loc_normalize_std)
self.sample_roi, self.gt_roi_label = sample_roi, gt_roi_label
# NOTE it's all zero because now it only support for batch=1 now
sample_roi_index = t.zeros(len(sample_roi))
roi_cls_loc, roi_score = self.faster_rcnn.head(
......@@ -124,16 +123,16 @@ class FasterRCNNTrainer(nn.Module):
# ------------------ RPN losses -------------------#
gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
at.tonumpy(bbox),
anchor,
img_size)
at.tonumpy(bbox),
anchor,
img_size)
gt_rpn_label = at.tovariable(gt_rpn_label).long()
gt_rpn_loc = at.tovariable(gt_rpn_loc)
rpn_loc_loss = _fast_rcnn_loc_loss(
rpn_loc,
gt_rpn_loc,
gt_rpn_label.data,
self.rpn_sigma)
rpn_loc,
gt_rpn_loc,
gt_rpn_label.data,
self.rpn_sigma)
# NOTE: default value of ignore_index is -100 ...
rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)
......@@ -145,15 +144,15 @@ class FasterRCNNTrainer(nn.Module):
n_sample = roi_cls_loc.shape[0]
roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
at.totensor(gt_roi_label).long()]
at.totensor(gt_roi_label).long()]
gt_roi_label = at.tovariable(gt_roi_label).long()
gt_roi_loc = at.tovariable(gt_roi_loc)
roi_loc_loss = _fast_rcnn_loc_loss(
roi_loc.contiguous(),
gt_roi_loc,
gt_roi_label.data,
self.roi_sigma)
roi_loc.contiguous(),
gt_roi_loc,
gt_roi_label.data,
self.roi_sigma)
roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())
......@@ -197,14 +196,14 @@ class FasterRCNNTrainer(nn.Module):
if save_path is None:
timestr = time.strftime('%m%d%H%M')
save_path = 'checkpoints/fasterrcnn_%s' % timestr
for k_,v_ in kwargs.items():
save_path += '_%s' %v_
for k_, v_ in kwargs.items():
save_path += '_%s' % v_
t.save(save_dict, save_path)
self.vis.save([self.vis.env])
return save_path
def load(self, path, load_optimizer=True, parse_opt=False,):
def load(self, path, load_optimizer=True, parse_opt=False, ):
state_dict = t.load(path)
if 'model' in state_dict:
self.faster_rcnn.load_state_dict(state_dict['model'])
......@@ -235,12 +234,12 @@ class FasterRCNNTrainer(nn.Module):
def _smooth_l1_loss(x, t, in_weight, sigma):
sigma2 = sigma ** 2
diff = in_weight * (x - t)
abs_diff = (diff).abs()
abs_diff = diff.abs()
flag = (abs_diff.data < (1. / sigma2)).float()
flag = Variable(flag)
y = (flag * (sigma2 / 2.) * (diff ** 2) +
(1 - flag) * (abs_diff - 0.5 / sigma2))
return (y).sum()
return y.sum()
def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma):
......@@ -251,5 +250,5 @@ def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma):
in_weight[(gt_label > 0).view(-1, 1).expand_as(in_weight).cuda()] = 1
loc_loss = _smooth_l1_loss(pred_loc, gt_loc, Variable(in_weight), sigma)
# Normalize by total number of negtive and positive rois.
loc_loss /= (gt_label >= 0).sum() # ignore gt_label==-1 for rpn_loss
loc_loss /= (gt_label >= 0).sum() # ignore gt_label==-1 for rpn_loss
return loc_loss
......@@ -167,32 +167,33 @@ def visdom_bbox(*args, **kwargs):
class Visualizer(object):
'''
封装了visdom的基本操作,但是你仍然可以通过`self.vis.function`
调用原生的visdom接口
'''
"""
wrapper for visdom
you can still access naive visdom function by
self.line, self.scater,self._send,etc.
due to the implementation of `__getattr__`
"""
def __init__(self, env='default', **kwargs):
self.vis = visdom.Visdom(env=env, **kwargs)
self._vis_kw = kwargs
# 画的第几个数,相当于横座标
# 保存(’loss',23) 即loss的第23个点
# e.g.(’loss',23) the 23th value of loss
self.index = {}
self.log_text = ''
def reinit(self, env='default', **kwargs):
'''
修改visdom的配置
'''
"""
change the config of visdom
"""
self.vis = visdom.Visdom(env=env, **kwargs)
return self
def plot_many(self, d):
'''
一次plot多个
"""
plot multi values
@params d: dict (name,value) i.e. ('loss',0.11)
'''
"""
for k, v in d.items():
if v is not None:
self.plot(k, v)
......@@ -202,12 +203,12 @@ class Visualizer(object):
self.img(k, v)
def plot(self, name, y, **kwargs):
'''
"""
self.plot('loss',1.00)
'''
"""
x = self.index.get(name, 0)
self.vis.line(Y=np.array([y]), X=np.array([x]),
win=(name),
win=name,
opts=dict(title=name),
update=None if x == 0 else 'append',
**kwargs
......@@ -215,23 +216,23 @@ class Visualizer(object):
self.index[name] = x + 1
def img(self, name, img_, **kwargs):
'''
"""
self.img('input_img',t.Tensor(64,64))
self.img('input_imgs',t.Tensor(3,64,64))
self.img('input_imgs',t.Tensor(100,1,64,64))
self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10)
!!!don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~!!!
'''
"""
self.vis.images(t.Tensor(img_).cpu().numpy(),
win=(name),
win=name,
opts=dict(title=name),
**kwargs
)
def log(self, info, win='log_text'):
'''
"""
self.log({'loss':1,'lr':0.0001})
'''
"""
self.log_text += ('[{time}] {info} <br>'.format(
time=time.strftime('%m%d_%H%M%S'), \
info=info))
......@@ -252,4 +253,4 @@ class Visualizer(object):
self.vis = visdom.Visdom(env=d.get('env', self.vis.env), **(self.d.get('vis_kw')))
self.log_text = d.get('log_text', '')
self.index = d.get('index', dict())
return self
\ No newline at end of file
return self
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册