Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_47816946
simple-faster-rcnn-pytorch
提交
caaa7fdd
S
simple-faster-rcnn-pytorch
项目概览
weixin_47816946
/
simple-faster-rcnn-pytorch
与 Fork 源项目一致
从无法访问的项目Fork
通知
8
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
simple-faster-rcnn-pytorch
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
caaa7fdd
编写于
12月 26, 2017
作者:
C
chenyuntc
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor code2
上级
0f7d507e
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
82 addition
and
76 deletion
+82
-76
data/dataset.py
data/dataset.py
+18
-13
data/voc_dataset.py
data/voc_dataset.py
+4
-3
model/ROIModule.py
model/ROIModule.py
+0
-2
model/region_proposal_network.py
model/region_proposal_network.py
+0
-1
model/utils/creator_tool.py
model/utils/creator_tool.py
+1
-2
train.py
train.py
+17
-13
trainer.py
trainer.py
+20
-21
util/vis_tool.py
util/vis_tool.py
+22
-21
未找到文件。
data/dataset.py
浏览文件 @
caaa7fdd
...
...
@@ -5,15 +5,15 @@ from torchvision import transforms as tvtsf
from
.
import
util
import
numpy
as
np
from
config
import
opt
from
util
import
array_tool
as
at
def
inverse_normalize
(
img
):
if
opt
.
caffe_pretrain
:
img
=
img
+
(
np
.
array
([
122.7717
,
115.9465
,
102.9801
]).
reshape
(
3
,
1
,
1
))
img
=
img
+
(
np
.
array
([
122.7717
,
115.9465
,
102.9801
]).
reshape
(
3
,
1
,
1
))
return
img
[::
-
1
,
:,
:]
# approximate un-normalize for visualize
return
(
img
*
0.225
+
0.45
).
clip
(
min
=
0
,
max
=
1
)
*
255
return
(
img
*
0.225
+
0.45
).
clip
(
min
=
0
,
max
=
1
)
*
255
def
pytorch_normalze
(
img
):
"""
...
...
@@ -21,25 +21,28 @@ def pytorch_normalze(img):
return appr -1~1 RGB
"""
normalize
=
tvtsf
.
Normalize
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
])
std
=
[
0.229
,
0.224
,
0.225
])
img
=
normalize
(
t
.
from_numpy
(
img
))
return
img
.
numpy
()
def
caffe_normalize
(
img
):
"""
return appr -125-125 BGR
"""
img
=
img
[[
2
,
1
,
0
],:,:]
#
RGB-BGR
img
=
img
*
255
mean
=
np
.
array
([
122.7717
,
115.9465
,
102.9801
]).
reshape
(
3
,
1
,
1
)
img
=
img
[[
2
,
1
,
0
],
:,
:]
#
RGB-BGR
img
=
img
*
255
mean
=
np
.
array
([
122.7717
,
115.9465
,
102.9801
]).
reshape
(
3
,
1
,
1
)
img
=
(
img
-
mean
).
astype
(
np
.
float32
,
copy
=
True
)
return
img
def
preprocess
(
img
,
min_size
=
600
,
max_size
=
1000
):
"""Preprocess an image for feature extraction.
The length of the shorter edge is scaled to :obj:`self.min_size`.
After the scaling, if the length of the longer edge is longer than
:param min_size:
:obj:`self.max_size`, the image is scaled to fit the longer edge
to :obj:`self.max_size`.
...
...
@@ -49,6 +52,8 @@ def preprocess(img, min_size=600, max_size=1000):
Args:
img (~numpy.ndarray): An image. This is in CHW and RGB format.
The range of its value is :math:`[0, 255]`.
(~numpy.ndarray): An image. This is in CHW and RGB format.
The range of its value is :math:`[0, 255]`.
Returns:
~numpy.ndarray:
...
...
@@ -69,6 +74,7 @@ def preprocess(img, min_size=600, max_size=1000):
normalize
=
pytorch_normalze
return
normalize
(
img
)
class
Transform
(
object
):
def
__init__
(
self
,
min_size
=
600
,
max_size
=
1000
):
...
...
@@ -92,7 +98,7 @@ class Transform(object):
return
img
,
bbox
,
label
,
scale
class
Dataset
()
:
class
Dataset
:
def
__init__
(
self
,
opt
):
self
.
opt
=
opt
self
.
db
=
VOCBboxDataset
(
opt
.
voc_data_dir
)
...
...
@@ -110,16 +116,15 @@ class Dataset():
return
len
(
self
.
db
)
class
TestDataset
()
:
def
__init__
(
self
,
opt
,
split
=
'test'
,
use_difficult
=
True
):
class
TestDataset
:
def
__init__
(
self
,
opt
,
split
=
'test'
,
use_difficult
=
True
):
self
.
opt
=
opt
self
.
db
=
testset
=
VOCBboxDataset
(
opt
.
voc_data_dir
,
split
=
split
,
use_difficult
=
use_difficult
)
self
.
db
=
VOCBboxDataset
(
opt
.
voc_data_dir
,
split
=
split
,
use_difficult
=
use_difficult
)
def
__getitem__
(
self
,
idx
):
ori_img
,
bbox
,
label
,
difficult
=
self
.
db
.
get_example
(
idx
)
img
=
preprocess
(
ori_img
)
return
(
img
)
,
ori_img
.
shape
[
1
:],
bbox
,
label
,
difficult
return
img
,
ori_img
.
shape
[
1
:],
bbox
,
label
,
difficult
def
__len__
(
self
):
return
len
(
self
.
db
)
data/voc_dataset.py
浏览文件 @
caaa7fdd
...
...
@@ -18,14 +18,15 @@
# https://github.com/rbgirshick/py-faster-rcnn
# ----------------------------------------------------
import
numpy
as
np
import
os
import
warnings
import
xml.etree.ElementTree
as
ET
import
numpy
as
np
from
.util
import
read_image
class
VOCBboxDataset
()
:
class
VOCBboxDataset
:
"""Bounding box dataset for PASCAL `VOC`_.
.. _`VOC`: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/
...
...
model/ROIModule.py
浏览文件 @
caaa7fdd
...
...
@@ -108,7 +108,6 @@ def test_roi_module():
rois
=
t
.
autograd
.
Variable
(
bottom_rois
)
output
=
module
(
x
,
rois
)
output
.
sum
().
backward
()
grad_x
=
x
.
grad
.
cpu
().
data
.
numpy
()
def
t2c
(
variable
):
npa
=
variable
.
data
.
cpu
().
numpy
()
...
...
@@ -130,4 +129,3 @@ def test_roi_module():
F
.
sum
(
o_cn
).
backward
()
test_eq
(
x
.
grad
,
x_cn
.
grad
,
'backward'
)
print
(
'test pass'
)
model/region_proposal_network.py
浏览文件 @
caaa7fdd
...
...
@@ -44,7 +44,6 @@ class RegionProposalNetwork(nn.Module):
def
__init__
(
self
,
in_channels
=
512
,
mid_channels
=
512
,
ratios
=
[
0.5
,
1
,
2
],
anchor_scales
=
[
8
,
16
,
32
],
feat_stride
=
16
,
initialW
=
None
,
proposal_creator_params
=
dict
(),
):
super
(
RegionProposalNetwork
,
self
).
__init__
()
...
...
model/utils/creator_tool.py
浏览文件 @
caaa7fdd
...
...
@@ -288,7 +288,7 @@ def _get_inside_index(anchor, H, W):
return
index_inside
class
ProposalCreator
()
:
class
ProposalCreator
:
# unNOTE: I'll make it undifferential
# unTODO: make sure it's ok
# It's ok
...
...
@@ -415,7 +415,6 @@ class ProposalCreator():
if
n_pre_nms
>
0
:
order
=
order
[:
n_pre_nms
]
roi
=
roi
[
order
,
:]
score
=
score
[
order
]
# Apply nms (e.g. threshold = 0.7).
# Take after_nms_topN (e.g. 300).
...
...
train.py
浏览文件 @
caaa7fdd
...
...
@@ -5,7 +5,7 @@ import matplotlib
from
tqdm
import
tqdm
from
config
import
opt
from
data.dataset
import
Dataset
,
TestDataset
,
inverse_normalize
from
data.dataset
import
Dataset
,
TestDataset
,
inverse_normalize
from
model
import
FasterRCNNVGG16
from
torch.autograd
import
Variable
from
torch.utils
import
data
as
data_
...
...
@@ -17,11 +17,13 @@ from util.eval_tool import eval_detection_voc
# fix for ulimit
# https://github.com/pytorch/pytorch/issues/973#issuecomment-346405667
import
resource
rlimit
=
resource
.
getrlimit
(
resource
.
RLIMIT_NOFILE
)
resource
.
setrlimit
(
resource
.
RLIMIT_NOFILE
,
(
20480
,
rlimit
[
1
]))
matplotlib
.
use
(
'agg'
)
def
eval
(
dataloader
,
faster_rcnn
,
test_num
=
10000
):
pred_bboxes
,
pred_labels
,
pred_scores
=
list
(),
list
(),
list
()
gt_bboxes
,
gt_labels
,
gt_difficults
=
list
(),
list
(),
list
()
...
...
@@ -86,17 +88,17 @@ def train(**kwargs):
# plot groud truth bboxes
ori_img_
=
inverse_normalize
(
at
.
tonumpy
(
img
[
0
]))
gt_img
=
visdom_bbox
(
ori_img_
,
at
.
tonumpy
(
bbox_
[
0
]),
at
.
tonumpy
(
label_
[
0
]))
gt_img
=
visdom_bbox
(
ori_img_
,
at
.
tonumpy
(
bbox_
[
0
]),
at
.
tonumpy
(
label_
[
0
]))
trainer
.
vis
.
img
(
'gt_img'
,
gt_img
)
# plot predicti bboxes
_bboxes
,
_labels
,
_scores
=
trainer
.
faster_rcnn
.
predict
([
ori_img_
],
visualize
=
True
)
pred_img
=
visdom_bbox
(
ori_img_
,
at
.
tonumpy
(
_bboxes
[
0
]),
at
.
tonumpy
(
_labels
[
0
]).
reshape
(
-
1
),
at
.
tonumpy
(
_scores
[
0
]))
_bboxes
,
_labels
,
_scores
=
trainer
.
faster_rcnn
.
predict
([
ori_img_
],
visualize
=
True
)
pred_img
=
visdom_bbox
(
ori_img_
,
at
.
tonumpy
(
_bboxes
[
0
]),
at
.
tonumpy
(
_labels
[
0
]).
reshape
(
-
1
),
at
.
tonumpy
(
_scores
[
0
]))
trainer
.
vis
.
img
(
'pred_img'
,
pred_img
)
# rpn confusion matrix(meter)
...
...
@@ -108,19 +110,21 @@ def train(**kwargs):
if
eval_result
[
'map'
]
>
best_map
:
best_map
=
eval_result
[
'map'
]
best_path
=
trainer
.
save
(
best_map
=
best_map
)
if
epoch
==
9
:
if
epoch
==
9
:
trainer
.
load
(
best_path
)
trainer
.
faster_rcnn
.
scale_lr
(
opt
.
lr_decay
)
trainer
.
vis
.
plot
(
'test_map'
,
eval_result
[
'map'
])
lr_
=
trainer
.
faster_rcnn
.
optimizer
.
param_groups
[
0
][
'lr'
]
log_info
=
'lr:{}, map:{},loss:{}'
.
format
(
str
(
lr_
),
str
(
eval_result
[
'map'
]),
str
(
trainer
.
get_meter_data
()))
str
(
eval_result
[
'map'
]),
str
(
trainer
.
get_meter_data
()))
trainer
.
vis
.
log
(
log_info
)
if
epoch
==
13
:
break
if
epoch
==
13
:
break
if
__name__
==
'__main__'
:
import
fire
fire
.
Fire
()
trainer.py
浏览文件 @
caaa7fdd
...
...
@@ -7,7 +7,7 @@ from torch import nn
import
torch
as
t
from
torch.autograd
import
Variable
from
util
import
array_tool
as
at
from
util.vis_tool
import
Visualizer
from
util.vis_tool
import
Visualizer
from
config
import
opt
from
torchnet.meter
import
ConfusionMeter
,
AverageValueMeter
...
...
@@ -104,7 +104,7 @@ class FasterRCNNTrainer(nn.Module):
rpn_score
=
rpn_scores
[
0
]
rpn_loc
=
rpn_locs
[
0
]
roi
=
rois
# Sample RoIs and forward
# it's fine to break the computation graph of rois,
# consider them as constant input
...
...
@@ -114,7 +114,6 @@ class FasterRCNNTrainer(nn.Module):
at
.
tonumpy
(
label
),
self
.
loc_normalize_mean
,
self
.
loc_normalize_std
)
self
.
sample_roi
,
self
.
gt_roi_label
=
sample_roi
,
gt_roi_label
# NOTE it's all zero because now it only support for batch=1 now
sample_roi_index
=
t
.
zeros
(
len
(
sample_roi
))
roi_cls_loc
,
roi_score
=
self
.
faster_rcnn
.
head
(
...
...
@@ -124,16 +123,16 @@ class FasterRCNNTrainer(nn.Module):
# ------------------ RPN losses -------------------#
gt_rpn_loc
,
gt_rpn_label
=
self
.
anchor_target_creator
(
at
.
tonumpy
(
bbox
),
anchor
,
img_size
)
at
.
tonumpy
(
bbox
),
anchor
,
img_size
)
gt_rpn_label
=
at
.
tovariable
(
gt_rpn_label
).
long
()
gt_rpn_loc
=
at
.
tovariable
(
gt_rpn_loc
)
rpn_loc_loss
=
_fast_rcnn_loc_loss
(
rpn_loc
,
gt_rpn_loc
,
gt_rpn_label
.
data
,
self
.
rpn_sigma
)
rpn_loc
,
gt_rpn_loc
,
gt_rpn_label
.
data
,
self
.
rpn_sigma
)
# NOTE: default value of ignore_index is -100 ...
rpn_cls_loss
=
F
.
cross_entropy
(
rpn_score
,
gt_rpn_label
.
cuda
(),
ignore_index
=-
1
)
...
...
@@ -145,15 +144,15 @@ class FasterRCNNTrainer(nn.Module):
n_sample
=
roi_cls_loc
.
shape
[
0
]
roi_cls_loc
=
roi_cls_loc
.
view
(
n_sample
,
-
1
,
4
)
roi_loc
=
roi_cls_loc
[
t
.
arange
(
0
,
n_sample
).
long
().
cuda
(),
\
at
.
totensor
(
gt_roi_label
).
long
()]
at
.
totensor
(
gt_roi_label
).
long
()]
gt_roi_label
=
at
.
tovariable
(
gt_roi_label
).
long
()
gt_roi_loc
=
at
.
tovariable
(
gt_roi_loc
)
roi_loc_loss
=
_fast_rcnn_loc_loss
(
roi_loc
.
contiguous
(),
gt_roi_loc
,
gt_roi_label
.
data
,
self
.
roi_sigma
)
roi_loc
.
contiguous
(),
gt_roi_loc
,
gt_roi_label
.
data
,
self
.
roi_sigma
)
roi_cls_loss
=
nn
.
CrossEntropyLoss
()(
roi_score
,
gt_roi_label
.
cuda
())
...
...
@@ -197,14 +196,14 @@ class FasterRCNNTrainer(nn.Module):
if
save_path
is
None
:
timestr
=
time
.
strftime
(
'%m%d%H%M'
)
save_path
=
'checkpoints/fasterrcnn_%s'
%
timestr
for
k_
,
v_
in
kwargs
.
items
():
save_path
+=
'_%s'
%
v_
for
k_
,
v_
in
kwargs
.
items
():
save_path
+=
'_%s'
%
v_
t
.
save
(
save_dict
,
save_path
)
self
.
vis
.
save
([
self
.
vis
.
env
])
return
save_path
def
load
(
self
,
path
,
load_optimizer
=
True
,
parse_opt
=
False
,):
def
load
(
self
,
path
,
load_optimizer
=
True
,
parse_opt
=
False
,
):
state_dict
=
t
.
load
(
path
)
if
'model'
in
state_dict
:
self
.
faster_rcnn
.
load_state_dict
(
state_dict
[
'model'
])
...
...
@@ -235,12 +234,12 @@ class FasterRCNNTrainer(nn.Module):
def
_smooth_l1_loss
(
x
,
t
,
in_weight
,
sigma
):
sigma2
=
sigma
**
2
diff
=
in_weight
*
(
x
-
t
)
abs_diff
=
(
diff
)
.
abs
()
abs_diff
=
diff
.
abs
()
flag
=
(
abs_diff
.
data
<
(
1.
/
sigma2
)).
float
()
flag
=
Variable
(
flag
)
y
=
(
flag
*
(
sigma2
/
2.
)
*
(
diff
**
2
)
+
(
1
-
flag
)
*
(
abs_diff
-
0.5
/
sigma2
))
return
(
y
)
.
sum
()
return
y
.
sum
()
def
_fast_rcnn_loc_loss
(
pred_loc
,
gt_loc
,
gt_label
,
sigma
):
...
...
@@ -251,5 +250,5 @@ def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma):
in_weight
[(
gt_label
>
0
).
view
(
-
1
,
1
).
expand_as
(
in_weight
).
cuda
()]
=
1
loc_loss
=
_smooth_l1_loss
(
pred_loc
,
gt_loc
,
Variable
(
in_weight
),
sigma
)
# Normalize by total number of negtive and positive rois.
loc_loss
/=
(
gt_label
>=
0
).
sum
()
# ignore gt_label==-1 for rpn_loss
loc_loss
/=
(
gt_label
>=
0
).
sum
()
# ignore gt_label==-1 for rpn_loss
return
loc_loss
util/vis_tool.py
浏览文件 @
caaa7fdd
...
...
@@ -167,32 +167,33 @@ def visdom_bbox(*args, **kwargs):
class
Visualizer
(
object
):
'''
封装了visdom的基本操作,但是你仍然可以通过`self.vis.function`
调用原生的visdom接口
'''
"""
wrapper for visdom
you can still access naive visdom function by
self.line, self.scater,self._send,etc.
due to the implementation of `__getattr__`
"""
def
__init__
(
self
,
env
=
'default'
,
**
kwargs
):
self
.
vis
=
visdom
.
Visdom
(
env
=
env
,
**
kwargs
)
self
.
_vis_kw
=
kwargs
# 画的第几个数,相当于横座标
# 保存(’loss',23) 即loss的第23个点
# e.g.(’loss',23) the 23th value of loss
self
.
index
=
{}
self
.
log_text
=
''
def
reinit
(
self
,
env
=
'default'
,
**
kwargs
):
'''
修改visdom的配置
'''
"""
change the config of visdom
"""
self
.
vis
=
visdom
.
Visdom
(
env
=
env
,
**
kwargs
)
return
self
def
plot_many
(
self
,
d
):
'''
一次plot多个
"""
plot multi values
@params d: dict (name,value) i.e. ('loss',0.11)
'''
"""
for
k
,
v
in
d
.
items
():
if
v
is
not
None
:
self
.
plot
(
k
,
v
)
...
...
@@ -202,12 +203,12 @@ class Visualizer(object):
self
.
img
(
k
,
v
)
def
plot
(
self
,
name
,
y
,
**
kwargs
):
'''
"""
self.plot('loss',1.00)
'''
"""
x
=
self
.
index
.
get
(
name
,
0
)
self
.
vis
.
line
(
Y
=
np
.
array
([
y
]),
X
=
np
.
array
([
x
]),
win
=
(
name
)
,
win
=
name
,
opts
=
dict
(
title
=
name
),
update
=
None
if
x
==
0
else
'append'
,
**
kwargs
...
...
@@ -215,23 +216,23 @@ class Visualizer(object):
self
.
index
[
name
]
=
x
+
1
def
img
(
self
,
name
,
img_
,
**
kwargs
):
'''
"""
self.img('input_img',t.Tensor(64,64))
self.img('input_imgs',t.Tensor(3,64,64))
self.img('input_imgs',t.Tensor(100,1,64,64))
self.img('input_imgs',t.Tensor(100,3,64,64),nrows=10)
!!!don‘t ~~self.img('input_imgs',t.Tensor(100,64,64),nrows=10)~~!!!
'''
"""
self
.
vis
.
images
(
t
.
Tensor
(
img_
).
cpu
().
numpy
(),
win
=
(
name
)
,
win
=
name
,
opts
=
dict
(
title
=
name
),
**
kwargs
)
def
log
(
self
,
info
,
win
=
'log_text'
):
'''
"""
self.log({'loss':1,'lr':0.0001})
'''
"""
self
.
log_text
+=
(
'[{time}] {info} <br>'
.
format
(
time
=
time
.
strftime
(
'%m%d_%H%M%S'
),
\
info
=
info
))
...
...
@@ -252,4 +253,4 @@ class Visualizer(object):
self
.
vis
=
visdom
.
Visdom
(
env
=
d
.
get
(
'env'
,
self
.
vis
.
env
),
**
(
self
.
d
.
get
(
'vis_kw'
)))
self
.
log_text
=
d
.
get
(
'log_text'
,
''
)
self
.
index
=
d
.
get
(
'index'
,
dict
())
return
self
\ No newline at end of file
return
self
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录