未验证 提交 6095dd07 编写于 作者: J jerrywgz 提交者: GitHub

add config for Faster RCNN (#1309)

* add config for Faster RCNN

* adaptive value for lr_steps
上级 d507a7a7
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from edict import AttrDict
import six
import numpy as np
_C = AttrDict()
cfg = _C
#
# Training options
#
_C.TRAIN = AttrDict()
# scales an image's shortest side
_C.TRAIN.scales = [800]
# max size of longest side
_C.TRAIN.max_size = 1333
# images per GPU in minibatch
_C.TRAIN.im_per_batch = 1
# roi minibatch size per image
_C.TRAIN.batch_size_per_im = 512
# target fraction of foreground roi minibatch
_C.TRAIN.fg_fractrion = 0.25
# overlap threshold for a foreground roi
_C.TRAIN.fg_thresh = 0.5
# overlap threshold for a background roi
_C.TRAIN.bg_thresh_hi = 0.5
_C.TRAIN.bg_thresh_lo = 0.0
# If False, only resize image and not pad, image shape is different between
# GPUs in one mini-batch. If True, image shape is the same in one mini-batch.
_C.TRAIN.padding_minibatch = False
# Snapshot period
_C.TRAIN.snapshot_iter = 10000
# number of RPN proposals to keep before NMS
_C.TRAIN.rpn_pre_nms_top_n = 12000
# number of RPN proposals to keep after NMS
_C.TRAIN.rpn_post_nms_top_n = 2000
# NMS threshold used on RPN proposals
_C.TRAIN.rpn_nms_thresh = 0.7
# min size in RPN proposals
_C.TRAIN.rpn_min_size = 0.0
# eta for adaptive NMS in RPN
_C.TRAIN.rpn_eta = 1.0
# number of RPN examples per image
_C.TRAIN.rpn_batch_size_per_im = 256
# remove anchors out of the image
_C.TRAIN.rpn_straddle_thresh = 0.
# target fraction of foreground examples pre RPN minibatch
_C.TRAIN.rpn_fg_fraction = 0.5
# min overlap between anchor and gt box to be a positive examples
_C.TRAIN.rpn_positive_overlap = 0.7
# max overlap between anchor and gt box to be a negative examples
_C.TRAIN.rpn_negative_overlap = 0.3
# stopgrad at a specified stage
_C.TRAIN.freeze_at = 2
# min area of ground truth box
_C.TRAIN.gt_min_area = -1
#
# Inference options
#
_C.TEST = AttrDict()
# scales an image's shortest side
_C.TEST.scales = [800]
# max size of longest side
_C.TEST.max_size = 1333
# eta for adaptive NMS in RPN
_C.TEST.rpn_eta = 1.0
# min score threshold to infer
_C.TEST.score_thresh = 0.05
# overlap threshold used for NMS
_C.TEST.nms_thresh = 0.5
# number of RPN proposals to keep before NMS
_C.TEST.rpn_pre_nms_top_n = 6000
# number of RPN proposals to keep after NMS
_C.TEST.rpn_post_nms_top_n = 1000
# min size in RPN proposals
_C.TEST.rpn_min_size = 0.0
# max number of detections
_C.TEST.detectiions_per_im = 100
# NMS threshold used on RPN proposals
_C.TEST.rpn_nms_thresh = 0.7
#
# Model options
#
# weight for bbox regression targets
_C.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2]
# RPN anchor sizes
_C.anchor_sizes = [32, 64, 128, 256, 512]
# RPN anchor ratio
_C.aspect_ratio = [0.5, 1, 2]
# variance of anchors
_C.variances = [1., 1., 1., 1.]
# stride of feature map
_C.rpn_stride = [16.0, 16.0]
#
# SOLVER options
#
# derived learning rate the to get the final learning rate.
_C.learning_rate = 0.01
# maximum number of iterations
_C.max_iter = 180000
# warm up to learning rate
_C.warm_up_iter = 500
_C.warm_up_factor = 1. / 3.
# lr steps_with_decay
_C.lr_steps = [120000, 160000]
_C.lr_gamma = 0.1
# L2 regularization hyperparameter
_C.weight_decay = 0.0001
# momentum with SGD
_C.momentum = 0.9
#
# ENV options
#
# support both CPU and GPU
_C.use_gpu = True
# Whether use parallel
_C.parallel = True
# Class number
_C.class_num = 81
# support pyreader
_C.use_pyreader = True
# pixel mean values
_C.pixel_means = [102.9801, 115.9465, 122.7717]
# clip box to prevent overflowing
_C.bbox_clip = np.log(1000. / 16.)
# dataset path
_C.train_file_list = 'annotations/instances_train2017.json'
_C.train_data_dir = 'train2017'
_C.val_file_list = 'annotations/instances_val2017.json'
_C.val_data_dir = 'val2017'
def merge_cfg_from_args(args, mode):
"""Merge config keys, values in args into the global config."""
if mode == 'train':
sub_d = _C.TRAIN
else:
sub_d = _C.TEST
for k, v in sorted(six.iteritems(vars(args))):
d = _C
try:
value = eval(v)
except:
value = v
if k in sub_d:
sub_d[k] = value
else:
d[k] = value
......@@ -27,21 +27,27 @@ from __future__ import unicode_literals
import cv2
import numpy as np
from config import cfg
def get_image_blob(roidb, settings):
def get_image_blob(roidb, mode):
"""Builds an input blob from the images in the roidb at the specified
scales.
"""
scale_ind = np.random.randint(0, high=len(settings.scales))
if mode == 'train':
scales = cfg.TRAIN.scales
scale_ind = np.random.randint(0, high=len(scales))
target_size = scales[scale_ind]
max_size = cfg.TRAIN.max_size
else:
target_size = cfg.TEST.scales[0]
max_size = cfg.TEST.max_size
im = cv2.imread(roidb['image'])
assert im is not None, \
'Failed to read image \'{}\''.format(roidb['image'])
if roidb['flipped']:
im = im[:, ::-1, :]
target_size = settings.scales[scale_ind]
im, im_scale = prep_im_for_blob(im, settings.mean_value, target_size,
settings.max_size)
im, im_scale = prep_im_for_blob(im, cfg.pixel_means, target_size, max_size)
return im, im_scale
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
def __getattr__(self, name):
if name in self.__dict__:
return self.__dict__[name]
elif name in self:
return self[name]
else:
raise AttributeError(name)
def __setattr__(self, name, value):
if name in self.__dict__:
self.__dict__[name] = value
else:
self[name] = value
......@@ -29,18 +29,20 @@ import models.resnet as resnet
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval, Params
from config import cfg
def eval(cfg):
def eval():
if '2014' in cfg.dataset:
test_list = 'annotations/instances_val2014.json'
elif '2017' in cfg.dataset:
test_list = 'annotations/instances_val2017.json'
image_shape = [3, cfg.max_size, cfg.max_size]
image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size]
class_nums = cfg.class_num
batch_size = cfg.batch_size
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
total_batch_size = devices_num * cfg.TRAIN.im_per_batch
cocoGt = COCO(os.path.join(cfg.data_dir, test_list))
numId_to_catId_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())}
category_ids = cocoGt.getCatIds()
......@@ -51,7 +53,6 @@ def eval(cfg):
label_list[0] = ['background']
model = model_builder.FasterRCNN(
cfg=cfg,
add_conv_body_func=resnet.add_ResNet50_conv4_body,
add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
use_pyreader=False,
......@@ -66,7 +67,7 @@ def eval(cfg):
return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
# yapf: enable
test_reader = reader.test(cfg, batch_size)
test_reader = reader.test(total_batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
dts_res = []
......@@ -80,11 +81,11 @@ def eval(cfg):
fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(batch_data),
return_numpy=False)
new_lod, nmsed_out = get_nmsed_box(cfg, rpn_rois_v, confs_v, locs_v,
new_lod, nmsed_out = get_nmsed_box(rpn_rois_v, confs_v, locs_v,
class_nums, im_info,
numId_to_catId_map)
dts_res += get_dt_res(batch_size, new_lod, nmsed_out, batch_data)
dts_res += get_dt_res(total_batch_size, new_lod, nmsed_out, batch_data)
end = time.time()
print('batch id: {}, time: {}'.format(batch_id, end - start))
with open("detection_result.json", 'w') as outfile:
......@@ -100,6 +101,4 @@ def eval(cfg):
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
data_args = reader.Settings(args)
eval(data_args)
eval()
......@@ -20,6 +20,7 @@ import box_utils
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from config import cfg
def box_decoder(target_box, prior_box, prior_box_var):
......@@ -31,10 +32,8 @@ def box_decoder(target_box, prior_box, prior_box_var):
prior_box_loc[:, 3] = (prior_box[:, 3] + prior_box[:, 1]) / 2
pred_bbox = np.zeros_like(target_box, dtype=np.float32)
for i in range(prior_box.shape[0]):
dw = np.minimum(prior_box_var[2] * target_box[i, 2::4],
np.log(1000. / 16.))
dh = np.minimum(prior_box_var[3] * target_box[i, 3::4],
np.log(1000. / 16.))
dw = np.minimum(prior_box_var[2] * target_box[i, 2::4], cfg.bbox_clip)
dh = np.minimum(prior_box_var[3] * target_box[i, 3::4], cfg.bbox_clip)
pred_bbox[i, 0::4] = prior_box_var[0] * target_box[
i, 0::4] * prior_box_loc[i, 0] + prior_box_loc[i, 2]
pred_bbox[i, 1::4] = prior_box_var[1] * target_box[
......@@ -67,11 +66,11 @@ def clip_tiled_boxes(boxes, im_shape):
return boxes
def get_nmsed_box(args, rpn_rois, confs, locs, class_nums, im_info,
def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info,
numId_to_catId_map):
lod = rpn_rois.lod()[0]
rpn_rois_v = np.array(rpn_rois)
variance_v = np.array([0.1, 0.1, 0.2, 0.2])
variance_v = np.array(cfg.bbox_reg_weights)
confs_v = np.array(confs)
locs_v = np.array(locs)
rois = box_decoder(locs_v, rpn_rois_v, variance_v)
......@@ -89,12 +88,12 @@ def get_nmsed_box(args, rpn_rois, confs, locs, class_nums, im_info,
cls_boxes = [[] for _ in range(class_nums)]
scores_n = confs_v[start:end, :]
for j in range(1, class_nums):
inds = np.where(scores_n[:, j] > args.score_threshold)[0]
inds = np.where(scores_n[:, j] > cfg.TEST.score_thresh)[0]
scores_j = scores_n[inds, j]
rois_j = rois_n[inds, j * 4:(j + 1) * 4]
dets_j = np.hstack((rois_j, scores_j[:, np.newaxis])).astype(
np.float32, copy=False)
keep = box_utils.nms(dets_j, args.nms_threshold)
keep = box_utils.nms(dets_j, cfg.TEST.nms_thresh)
nms_dets = dets_j[keep, :]
#add labels
cat_id = numId_to_catId_map[j]
......@@ -105,8 +104,8 @@ def get_nmsed_box(args, rpn_rois, confs, locs, class_nums, im_info,
# Limit to max_per_image detections **over all classes**
image_scores = np.hstack(
[cls_boxes[j][:, -2] for j in range(1, class_nums)])
if len(image_scores) > 100:
image_thresh = np.sort(image_scores)[-100]
if len(image_scores) > cfg.TEST.detectiions_per_im:
image_thresh = np.sort(image_scores)[-cfg.TEST.detectiions_per_im]
for j in range(1, class_nums):
keep = np.where(cls_boxes[j][:, -2] >= image_thresh)[0]
cls_boxes[j] = cls_boxes[j][keep, :]
......
import os
import time
import numpy as np
from eval_helper import get_nmsed_box
from eval_helper import get_dt_res
from eval_helper import draw_bounding_box_on_image
import paddle
import paddle.fluid as fluid
import reader
from utility import print_arguments, parse_args
import models.model_builder as model_builder
import models.resnet as resnet
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval, Params
from config import cfg
def infer():
if '2014' in cfg.dataset:
test_list = 'annotations/instances_val2014.json'
elif '2017' in cfg.dataset:
test_list = 'annotations/instances_val2017.json'
cocoGt = COCO(os.path.join(cfg.data_dir, test_list))
numId_to_catId_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())}
category_ids = cocoGt.getCatIds()
label_list = {
item['id']: item['name']
for item in cocoGt.loadCats(category_ids)
}
label_list[0] = ['background']
image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size]
class_nums = cfg.class_num
model = model_builder.FasterRCNN(
add_conv_body_func=resnet.add_ResNet50_conv4_body,
add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
use_pyreader=False,
is_train=False)
model.build_model(image_shape)
rpn_rois, confs, locs = model.eval_out()
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
# yapf: disable
if cfg.pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
# yapf: enable
infer_reader = reader.infer()
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
dts_res = []
fetch_list = [rpn_rois, confs, locs]
data = next(infer_reader())
im_info = [data[0][1]]
rpn_rois_v, confs_v, locs_v = exe.run(
fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data),
return_numpy=False)
new_lod, nmsed_out = get_nmsed_box(rpn_rois_v, confs_v, locs_v, class_nums,
im_info, numId_to_catId_map)
path = os.path.join(cfg.image_path, cfg.image_name)
draw_bounding_box_on_image(path, nmsed_out, cfg.draw_threshold, label_list)
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
infer()
......@@ -17,11 +17,11 @@ from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay
from config import cfg
class FasterRCNN(object):
def __init__(self,
cfg=None,
add_conv_body_func=None,
add_roi_box_head_func=None,
is_train=True,
......@@ -29,7 +29,6 @@ class FasterRCNN(object):
use_random=True):
self.add_conv_body_func = add_conv_body_func
self.add_roi_box_head_func = add_roi_box_head_func
self.cfg = cfg
self.is_train = is_train
self.use_pyreader = use_pyreader
self.use_random = use_random
......@@ -111,10 +110,10 @@ class FasterRCNN(object):
name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
self.anchor, self.var = fluid.layers.anchor_generator(
input=rpn_conv,
anchor_sizes=self.cfg.anchor_sizes,
aspect_ratios=self.cfg.aspect_ratios,
variance=self.cfg.variance,
stride=[16.0, 16.0])
anchor_sizes=cfg.anchor_sizes,
aspect_ratios=cfg.aspect_ratio,
variance=cfg.variances,
stride=cfg.rpn_stride)
num_anchor = self.anchor.shape[2]
# Proposal classification scores
self.rpn_cls_score = fluid.layers.conv2d(
......@@ -152,8 +151,12 @@ class FasterRCNN(object):
rpn_cls_score_prob = fluid.layers.sigmoid(
self.rpn_cls_score, name='rpn_cls_score_prob')
pre_nms_top_n = 12000 if self.is_train else 6000
post_nms_top_n = 2000 if self.is_train else 1000
param_obj = cfg.TRAIN if self.is_train else cfg.TEST
pre_nms_top_n = param_obj.rpn_pre_nms_top_n
post_nms_top_n = param_obj.rpn_post_nms_top_n
nms_thresh = param_obj.rpn_nms_thresh
min_size = param_obj.rpn_min_size
eta = param_obj.rpn_eta
rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
scores=rpn_cls_score_prob,
bbox_deltas=self.rpn_bbox_pred,
......@@ -162,9 +165,9 @@ class FasterRCNN(object):
variances=self.var,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=0.7,
min_size=0.0,
eta=1.0)
nms_thresh=nms_thresh,
min_size=min_size,
eta=eta)
self.rpn_rois = rpn_rois
if self.is_train:
outs = fluid.layers.generate_proposal_labels(
......@@ -173,13 +176,13 @@ class FasterRCNN(object):
is_crowd=self.is_crowd,
gt_boxes=self.gt_box,
im_info=self.im_info,
batch_size_per_im=self.cfg.batch_size_per_im,
fg_fraction=0.25,
fg_thresh=0.5,
bg_thresh_hi=0.5,
bg_thresh_lo=0.0,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=self.cfg.class_num,
batch_size_per_im=cfg.TRAIN.batch_size_per_im,
fg_fraction=cfg.TRAIN.fg_fractrion,
fg_thresh=cfg.TRAIN.fg_thresh,
bg_thresh_hi=cfg.TRAIN.bg_thresh_hi,
bg_thresh_lo=cfg.TRAIN.bg_thresh_lo,
bbox_reg_weights=cfg.bbox_reg_weights,
class_nums=cfg.class_num,
use_random=self.use_random)
self.rois = outs[0]
......@@ -201,7 +204,7 @@ class FasterRCNN(object):
spatial_scale=0.0625)
rcnn_out = self.add_roi_box_head_func(pool)
self.cls_score = fluid.layers.fc(input=rcnn_out,
size=self.cfg.class_num,
size=cfg.class_num,
act=None,
name='cls_score',
param_attr=ParamAttr(
......@@ -213,7 +216,7 @@ class FasterRCNN(object):
learning_rate=2.,
regularizer=L2Decay(0.)))
self.bbox_pred = fluid.layers.fc(input=rcnn_out,
size=4 * self.cfg.class_num,
size=4 * cfg.class_num,
act=None,
name='bbox_pred',
param_attr=ParamAttr(
......@@ -257,7 +260,6 @@ class FasterRCNN(object):
x=rpn_cls_score_reshape, shape=(0, -1, 1))
rpn_bbox_pred_reshape = fluid.layers.reshape(
x=rpn_bbox_pred_reshape, shape=(0, -1, 4))
score_pred, loc_pred, score_tgt, loc_tgt = \
fluid.layers.rpn_target_assign(
bbox_pred=rpn_bbox_pred_reshape,
......@@ -267,11 +269,11 @@ class FasterRCNN(object):
gt_boxes=self.gt_box,
is_crowd=self.is_crowd,
im_info=self.im_info,
rpn_batch_size_per_im=256,
rpn_straddle_thresh=0.0,
rpn_fg_fraction=0.5,
rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3,
rpn_batch_size_per_im=cfg.TRAIN.rpn_batch_size_per_im,
rpn_straddle_thresh=cfg.TRAIN.rpn_straddle_thresh,
rpn_fg_fraction=cfg.TRAIN.rpn_fg_fraction,
rpn_positive_overlap=cfg.TRAIN.rpn_positive_overlap,
rpn_negative_overlap=cfg.TRAIN.rpn_negative_overlap,
use_random=self.use_random)
score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
......
......@@ -16,6 +16,7 @@ import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from paddle.fluid.regularizer import L2Decay
from config import cfg
def conv_bn_layer(input,
......@@ -137,7 +138,7 @@ ResNet_cfg = {
}
def add_ResNet50_conv4_body(body_input, freeze_at=2):
def add_ResNet50_conv4_body(body_input):
stages, block_func = ResNet_cfg[50]
stages = stages[0:3]
conv1 = conv_affine_layer(
......@@ -149,13 +150,13 @@ def add_ResNet50_conv4_body(body_input, freeze_at=2):
pool_stride=2,
pool_padding=1)
res2 = layer_warp(block_func, pool1, 64, stages[0], 1, name="res2")
if freeze_at == 2:
if cfg.TRAIN.freeze_at == 2:
res2.stop_gradient = True
res3 = layer_warp(block_func, res2, 128, stages[1], 2, name="res3")
if freeze_at == 3:
if cfg.TRAIN.freeze_at == 3:
res3.stop_gradient = True
res4 = layer_warp(block_func, res3, 256, stages[2], 2, name="res4")
if freeze_at == 4:
if cfg.TRAIN.freeze_at == 4:
res4.stop_gradient = True
return res4
......
......@@ -26,19 +26,18 @@ import paddle.fluid.profiler as profiler
import models.model_builder as model_builder
import models.resnet as resnet
from learning_rate import exponential_with_warmup_decay
from config import cfg
def train(cfg):
batch_size = cfg.batch_size
def train():
learning_rate = cfg.learning_rate
image_shape = [3, cfg.max_size, cfg.max_size]
image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]
num_iterations = cfg.max_iter
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
total_batch_size = devices_num * cfg.TRAIN.im_per_batch
model = model_builder.FasterRCNN(
cfg=cfg,
add_conv_body_func=resnet.add_ResNet50_conv4_body,
add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
use_pyreader=cfg.use_pyreader,
......@@ -51,8 +50,10 @@ def train(cfg):
rpn_reg_loss.persistable = True
loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss
boundaries = [120000, 160000]
values = [learning_rate, learning_rate * 0.1, learning_rate * 0.01]
boundaries = cfg.lr_steps
gamma = cfg.lr_gamma
step_num = len(lr_steps)
values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=exponential_with_warmup_decay(
......@@ -82,22 +83,16 @@ def train(cfg):
train_exe = fluid.ParallelExecutor(
use_cuda=bool(cfg.use_gpu), loss_name=loss.name)
assert cfg.batch_size % devices_num == 0, \
"batch_size = %d, devices_num = %d" %(cfg.batch_size, devices_num)
batch_size_per_dev = cfg.batch_size / devices_num
if cfg.use_pyreader:
train_reader = reader.train(
cfg,
batch_size=batch_size_per_dev,
total_batch_size=cfg.batch_size,
padding_total=cfg.padding_minibatch,
batch_size=cfg.TRAIN.im_per_batch,
total_batch_size=total_batch_size,
padding_total=cfg.TRAIN.padding_minibatch,
shuffle=False)
py_reader = model.py_reader
py_reader.decorate_paddle_reader(train_reader)
else:
train_reader = reader.train(
cfg, batch_size=cfg.batch_size, shuffle=False)
train_reader = reader.train(batch_size=total_batch_size, shuffle=False)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
fetch_list = [loss, loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss]
......@@ -163,8 +158,7 @@ def train(cfg):
run_func(2)
# profiling
start = time.time()
use_profile = False
if use_profile:
if cfg.use_profile:
with profiler.profiler('GPU', 'total', '/tmp/profile_file'):
reader_time, run_time, total_images = run_func(num_iterations)
else:
......@@ -181,6 +175,4 @@ def train(cfg):
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
data_args = reader.Settings(args)
train(data_args)
train()
......@@ -26,58 +26,45 @@ from collections import deque
from roidbs import JsonDataset
import data_utils
from config import cfg
class Settings(object):
def __init__(self, args=None):
for arg, value in sorted(six.iteritems(vars(args))):
setattr(self, arg, value)
if 'coco2014' in args.dataset:
self.class_nums = 81
self.train_file_list = 'annotations/instances_train2014.json'
self.train_data_dir = 'train2014'
self.val_file_list = 'annotations/instances_val2014.json'
self.val_data_dir = 'val2014'
elif 'coco2017' in args.dataset:
self.class_nums = 81
self.train_file_list = 'annotations/instances_train2017.json'
self.train_data_dir = 'train2017'
self.val_file_list = 'annotations/instances_val2017.json'
self.val_data_dir = 'val2017'
else:
raise NotImplementedError('Dataset {} not supported'.format(
self.dataset))
self.mean_value = np.array(self.mean_value)[
np.newaxis, np.newaxis, :].astype('float32')
def coco(settings,
mode,
def coco(mode,
batch_size=None,
total_batch_size=None,
padding_total=False,
shuffle=False):
if 'coco2014' in cfg.dataset:
cfg.train_file_list = 'annotations/instances_train2014.json'
cfg.train_data_dir = 'train2014'
cfg.val_file_list = 'annotations/instances_val2014.json'
cfg.val_data_dir = 'val2014'
elif 'coco2017' in cfg.dataset:
cfg.train_file_list = 'annotations/instances_train2017.json'
cfg.train_data_dir = 'train2017'
cfg.val_file_list = 'annotations/instances_val2017.json'
cfg.val_data_dir = 'val2017'
else:
raise NotImplementedError('Dataset {} not supported'.format(
cfg.dataset))
cfg.mean_value = np.array(cfg.pixel_means)[np.newaxis,
np.newaxis, :].astype('float32')
total_batch_size = total_batch_size if total_batch_size else batch_size
if mode != 'infer':
assert total_batch_size % batch_size == 0
if mode == 'train':
settings.train_file_list = os.path.join(settings.data_dir,
settings.train_file_list)
settings.train_data_dir = os.path.join(settings.data_dir,
settings.train_data_dir)
cfg.train_file_list = os.path.join(cfg.data_dir, cfg.train_file_list)
cfg.train_data_dir = os.path.join(cfg.data_dir, cfg.train_data_dir)
elif mode == 'test' or mode == 'infer':
settings.val_file_list = os.path.join(settings.data_dir,
settings.val_file_list)
settings.val_data_dir = os.path.join(settings.data_dir,
settings.val_data_dir)
json_dataset = JsonDataset(settings, train=(mode == 'train'))
cfg.val_file_list = os.path.join(cfg.data_dir, cfg.val_file_list)
cfg.val_data_dir = os.path.join(cfg.data_dir, cfg.val_data_dir)
json_dataset = JsonDataset(train=(mode == 'train'))
roidbs = json_dataset.get_roidb()
print("{} on {} with {} roidbs".format(mode, settings.dataset, len(roidbs)))
print("{} on {} with {} roidbs".format(mode, cfg.dataset, len(roidbs)))
def roidb_reader(roidb, mode):
im, im_scales = data_utils.get_image_blob(roidb, settings)
im, im_scales = data_utils.get_image_blob(roidb, mode)
im_id = roidb['id']
im_height = np.round(roidb['height'] * im_scales)
im_width = np.round(roidb['width'] * im_scales)
......@@ -150,7 +137,7 @@ def coco(settings,
else:
for roidb in roidbs:
if settings.image_name not in roidb['image']:
if cfg.image_name not in roidb['image']:
continue
im, im_info, im_id = roidb_reader(roidb, mode)
batch_out = [(im, im_info, im_id)]
......@@ -159,23 +146,14 @@ def coco(settings,
return reader
def train(settings,
batch_size,
total_batch_size=None,
padding_total=False,
shuffle=True):
def train(batch_size, total_batch_size=None, padding_total=False, shuffle=True):
return coco(
settings,
'train',
batch_size,
total_batch_size,
padding_total,
shuffle=shuffle)
'train', batch_size, total_batch_size, padding_total, shuffle=shuffle)
def test(settings, batch_size, total_batch_size=None, padding_total=False):
return coco(settings, 'test', batch_size, total_batch_size, shuffle=False)
def test(batch_size, total_batch_size=None, padding_total=False):
return coco('test', batch_size, total_batch_size, shuffle=False)
def infer(settings):
return coco(settings, 'infer')
def infer():
return coco('infer')
......@@ -36,6 +36,7 @@ import matplotlib
matplotlib.use('Agg')
from pycocotools.coco import COCO
import box_utils
from config import cfg
logger = logging.getLogger(__name__)
......@@ -43,16 +44,16 @@ logger = logging.getLogger(__name__)
class JsonDataset(object):
"""A class representing a COCO json dataset."""
def __init__(self, args, train=False):
print('Creating: {}'.format(args.dataset))
self.name = args.dataset
def __init__(self, train=False):
print('Creating: {}'.format(cfg.dataset))
self.name = cfg.dataset
self.is_train = train
if self.is_train:
data_dir = args.train_data_dir
file_list = args.train_file_list
data_dir = cfg.train_data_dir
file_list = cfg.train_file_list
else:
data_dir = args.val_data_dir
file_list = args.val_file_list
data_dir = cfg.val_data_dir
file_list = cfg.val_file_list
self.image_directory = data_dir
self.COCO = COCO(file_list)
# Set up dataset classes
......@@ -90,7 +91,6 @@ class JsonDataset(object):
end_time = time.time()
print('_add_gt_annotations took {:.3f}s'.format(end_time -
start_time))
print('Appending horizontally-flipped training examples...')
self._extend_with_flipped_entries(roidb)
print('Loaded dataset: {:s}'.format(self.name))
......@@ -129,7 +129,7 @@ class JsonDataset(object):
width = entry['width']
height = entry['height']
for obj in objs:
if obj['area'] < -1: #cfg.TRAIN.GT_MIN_AREA:
if obj['area'] < cfg.TRAIN.gt_min_area:
continue
if 'ignore' in obj and obj['ignore'] == 1:
continue
......
......@@ -28,11 +28,12 @@ import reader
import models.model_builder as model_builder
import models.resnet as resnet
from learning_rate import exponential_with_warmup_decay
from config import cfg
def train(cfg):
def train():
learning_rate = cfg.learning_rate
image_shape = [3, cfg.max_size, cfg.max_size]
image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]
if cfg.debug:
fluid.default_startup_program().random_seed = 1000
......@@ -43,9 +44,9 @@ def train(cfg):
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
total_batch_size = devices_num * cfg.TRAIN.im_per_batch
model = model_builder.FasterRCNN(
cfg=cfg,
add_conv_body_func=resnet.add_ResNet50_conv4_body,
add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
use_pyreader=cfg.use_pyreader,
......@@ -58,18 +59,20 @@ def train(cfg):
rpn_reg_loss.persistable = True
loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss
boundaries = [120000, 160000]
values = [learning_rate, learning_rate * 0.1, learning_rate * 0.01]
boundaries = cfg.lr_steps
gamma = cfg.lr_gamma
step_num = len(lr_steps)
values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=exponential_with_warmup_decay(
learning_rate=learning_rate,
boundaries=boundaries,
values=values,
warmup_iter=500,
warmup_factor=1.0 / 3.0),
regularization=fluid.regularizer.L2Decay(0.0001),
momentum=0.9)
warmup_iter=cfg.warm_up_iter,
warmup_factor=cfg.warm_up_factor),
regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
momentum=cfg.momentum)
optimizer.minimize(loss)
fluid.memory_optimize(fluid.default_main_program())
......@@ -89,20 +92,16 @@ def train(cfg):
train_exe = fluid.ParallelExecutor(
use_cuda=bool(cfg.use_gpu), loss_name=loss.name)
assert cfg.batch_size % devices_num == 0
batch_size_per_dev = cfg.batch_size / devices_num
if cfg.use_pyreader:
train_reader = reader.train(
cfg,
batch_size=batch_size_per_dev,
total_batch_size=cfg.batch_size,
padding_total=cfg.padding_minibatch,
batch_size=cfg.TRAIN.im_per_batch,
total_batch_size=total_batch_size,
padding_total=cfg.TRAIN.padding_minibatch,
shuffle=True)
py_reader = model.py_reader
py_reader.decorate_paddle_reader(train_reader)
else:
train_reader = reader.train(
cfg, batch_size=cfg.batch_size, shuffle=True)
train_reader = reader.train(batch_size=total_batch_size, shuffle=True)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
def save_model(postfix):
......@@ -133,7 +132,7 @@ def train(cfg):
smoothed_loss.get_median_value(
), start_time - prev_start_time))
sys.stdout.flush()
if (iter_id + 1) % cfg.snapshot_stride == 0:
if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
save_model("model_iter{}".format(iter_id))
except fluid.core.EOFException:
py_reader.reset()
......@@ -159,7 +158,7 @@ def train(cfg):
iter_id, lr[0],
smoothed_loss.get_median_value(), start_time - prev_start_time))
sys.stdout.flush()
if (iter_id + 1) % cfg.snapshot_stride == 0:
if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
save_model("model_iter{}".format(iter_id))
if (iter_id + 1) == cfg.max_iter:
break
......@@ -175,6 +174,4 @@ def train(cfg):
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
data_args = reader.Settings(args)
train(data_args)
train()
......@@ -18,7 +18,7 @@ Contains common utility functions.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import distutils.util
import numpy as np
import six
......@@ -26,6 +26,7 @@ from collections import deque
from paddle.fluid import core
import argparse
import functools
from config import *
def print_arguments(args):
......@@ -96,31 +97,33 @@ def parse_args():
add_arg('model_save_dir', str, 'output', "The path to save model.")
add_arg('pretrained_model', str, 'imagenet_resnet50_fusebn', "The init model path.")
add_arg('dataset', str, 'coco2017', "coco2014, coco2017.")
add_arg('data_dir', str, 'data/COCO17', "The data root path.")
add_arg('class_num', int, 81, "Class number.")
add_arg('data_dir', str, 'data/COCO17', "The data root path.")
add_arg('use_pyreader', bool, True, "Use pyreader.")
add_arg('use_profile', bool, False, "Whether use profiler.")
add_arg('padding_minibatch',bool, False,
"If False, only resize image and not pad, image shape is different between"
" GPUs in one mini-batch. If True, image shape is the same in one mini-batch.")
#SOLVER
add_arg('learning_rate', float, 0.01, "Learning rate.")
add_arg('max_iter', int, 180000, "Iter number.")
add_arg('log_window', int, 1, "Log smooth window, set 1 for debug, set 20 for train.")
add_arg('snapshot_stride', int, 10000, "save model every snapshot stride.")
add_arg('log_window', int, 20, "Log smooth window, set 1 for debug, set 20 for train.")
# FAST RCNN
# RPN
add_arg('anchor_sizes', int, [32,64,128,256,512], "The size of anchors.")
add_arg('aspect_ratios', float, [0.5,1.0,2.0], "The ratio of anchors.")
add_arg('variance', float, [1.,1.,1.,1.], "The variance of anchors.")
add_arg('rpn_stride', float, 16., "Stride of the feature map that RPN is attached.")
# FAST RCNN
add_arg('rpn_stride', float, [16.,16.], "Stride of the feature map that RPN is attached.")
add_arg('rpn_nms_thresh', float, 0.7, "NMS threshold used on RPN proposals")
# TRAIN TEST INFER
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('im_per_batch', int, 1, "Minibatch size.")
add_arg('max_size', int, 1333, "The resized image height.")
add_arg('scales', int, [800], "The resized image height.")
add_arg('batch_size_per_im',int, 512, "fast rcnn head batch size")
add_arg('mean_value', float, [102.9801, 115.9465, 122.7717], "pixel mean")
add_arg('nms_threshold', float, 0.5, "NMS threshold.")
add_arg('score_threshold', float, 0.05, "score threshold for NMS.")
add_arg('pixel_means', float, [102.9801, 115.9465, 122.7717], "pixel mean")
add_arg('nms_thresh', float, 0.5, "NMS threshold.")
add_arg('score_thresh', float, 0.05, "score threshold for NMS.")
add_arg('snapshot_stride', int, 10000, "save model every snapshot stride.")
add_arg('debug', bool, False, "Debug mode")
# SINGLE EVAL AND DRAW
add_arg('draw_threshold', float, 0.8, "Confidence threshold to draw bbox.")
......@@ -128,4 +131,9 @@ def parse_args():
add_arg('image_name', str, '', "The single image used to inference and visualize.")
# yapf: enable
args = parser.parse_args()
file_name = sys.argv[0]
if 'train' in file_name or 'profile' in file_name:
merge_cfg_from_args(args, 'train')
else:
merge_cfg_from_args(args, 'test')
return args
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册