add config for Faster RCNN (#1309)

* add config for Faster RCNN * adaptive value for lr_steps

add config for Faster RCNN (#1309)
* add config for Faster RCNN * adaptive value for lr_steps
6095dd07 · jerrywgz · GitHub · d507a7a7 · 6095dd07 · 6095dd07
13 changed file
--- a/fluid/faster_rcnn/config.py
+++ b/fluid/faster_rcnn/config.py
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#    http://www.apache.org/licenses/LICENSE-2.0
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License. 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+from edict import AttrDict
+import six
+import numpy as np
+
+_C = AttrDict()
+cfg = _C
+
+#
+# Training options
+#
+_C.TRAIN = AttrDict()
+
+# scales an image's shortest side
+_C.TRAIN.scales = [800]
+
+# max size of longest side
+_C.TRAIN.max_size = 1333
+
+# images per GPU in minibatch
+_C.TRAIN.im_per_batch = 1
+
+# roi minibatch size per image
+_C.TRAIN.batch_size_per_im = 512
+
+# target fraction of foreground roi minibatch 
+_C.TRAIN.fg_fractrion = 0.25
+
+# overlap threshold for a foreground roi
+_C.TRAIN.fg_thresh = 0.5
+
+# overlap threshold for a background roi
+_C.TRAIN.bg_thresh_hi = 0.5
+_C.TRAIN.bg_thresh_lo = 0.0
+
+# If False, only resize image and not pad, image shape is different between
+# GPUs in one mini-batch. If True, image shape is the same in one mini-batch.
+_C.TRAIN.padding_minibatch = False
+
+# Snapshot period
+_C.TRAIN.snapshot_iter = 10000
+
+# number of RPN proposals to keep before NMS
+_C.TRAIN.rpn_pre_nms_top_n = 12000
+
+# number of RPN proposals to keep after NMS
+_C.TRAIN.rpn_post_nms_top_n = 2000
+
+# NMS threshold used on RPN proposals
+_C.TRAIN.rpn_nms_thresh = 0.7
+
+# min size in RPN proposals
+_C.TRAIN.rpn_min_size = 0.0
+
+# eta for adaptive NMS in RPN
+_C.TRAIN.rpn_eta = 1.0
+
+# number of RPN examples per image
+_C.TRAIN.rpn_batch_size_per_im = 256
+
+# remove anchors out of the image
+_C.TRAIN.rpn_straddle_thresh = 0.
+
+# target fraction of foreground examples pre RPN minibatch
+_C.TRAIN.rpn_fg_fraction = 0.5
+
+# min overlap between anchor and gt box to be a positive examples
+_C.TRAIN.rpn_positive_overlap = 0.7
+
+# max overlap between anchor and gt box to be a negative examples
+_C.TRAIN.rpn_negative_overlap = 0.3
+
+# stopgrad at a specified stage
+_C.TRAIN.freeze_at = 2
+
+# min area of ground truth box
+_C.TRAIN.gt_min_area = -1
+
+#
+# Inference options
+#
+_C.TEST = AttrDict()
+
+# scales an image's shortest side
+_C.TEST.scales = [800]
+
+# max size of longest side
+_C.TEST.max_size = 1333
+
+# eta for adaptive NMS in RPN
+_C.TEST.rpn_eta = 1.0
+
+# min score threshold to infer
+_C.TEST.score_thresh = 0.05
+
+# overlap threshold used for NMS
+_C.TEST.nms_thresh = 0.5
+
+# number of RPN proposals to keep before NMS
+_C.TEST.rpn_pre_nms_top_n = 6000
+
+# number of RPN proposals to keep after NMS
+_C.TEST.rpn_post_nms_top_n = 1000
+
+# min size in RPN proposals
+_C.TEST.rpn_min_size = 0.0
+
+# max number of detections
+_C.TEST.detectiions_per_im = 100
+
+# NMS threshold used on RPN proposals
+_C.TEST.rpn_nms_thresh = 0.7
+
+#
+# Model options
+#
+
+# weight for bbox regression targets
+_C.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2]
+
+# RPN anchor sizes
+_C.anchor_sizes = [32, 64, 128, 256, 512]
+
+# RPN anchor ratio
+_C.aspect_ratio = [0.5, 1, 2]
+
+# variance of anchors
+_C.variances = [1., 1., 1., 1.]
+
+# stride of feature map
+_C.rpn_stride = [16.0, 16.0]
+
+#
+# SOLVER options
+#
+
+# derived learning rate the to get the final learning rate.
+_C.learning_rate = 0.01
+
+# maximum number of iterations
+_C.max_iter = 180000
+
+# warm up to learning rate 
+_C.warm_up_iter = 500
+_C.warm_up_factor = 1. / 3.
+
+# lr steps_with_decay
+_C.lr_steps = [120000, 160000]
+_C.lr_gamma = 0.1
+
+# L2 regularization hyperparameter
+_C.weight_decay = 0.0001
+
+# momentum with SGD
+_C.momentum = 0.9
+
+#
+# ENV options
+#
+
+# support both CPU and GPU
+_C.use_gpu = True
+
+# Whether use parallel
+_C.parallel = True
+
+# Class number
+_C.class_num = 81
+
+# support pyreader
+_C.use_pyreader = True
+
+# pixel mean values
+_C.pixel_means = [102.9801, 115.9465, 122.7717]
+
+# clip box to prevent overflowing
+_C.bbox_clip = np.log(1000. / 16.)
+
+# dataset path
+_C.train_file_list = 'annotations/instances_train2017.json'
+_C.train_data_dir = 'train2017'
+_C.val_file_list = 'annotations/instances_val2017.json'
+_C.val_data_dir = 'val2017'
+
+
+def merge_cfg_from_args(args, mode):
+    """Merge config keys, values in args into the global config."""
+    if mode == 'train':
+        sub_d = _C.TRAIN
+    else:
+        sub_d = _C.TEST
+    for k, v in sorted(six.iteritems(vars(args))):
+        d = _C
+        try:
+            value = eval(v)
+        except:
+            value = v
+        if k in sub_d:
+            sub_d[k] = value
+        else:
+            d[k] = value
--- a/fluid/faster_rcnn/data_utils.py
+++ b/fluid/faster_rcnn/data_utils.py
@@ -27,21 +27,27 @@ from __future__ import unicode_literals

 import cv2
 import numpy as np
+from config import cfg


-def get_image_blob(roidb, settings):
+def get_image_blob(roidb, mode):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
-    scale_ind = np.random.randint(0, high=len(settings.scales))
+    if mode == 'train':
+        scales = cfg.TRAIN.scales
+        scale_ind = np.random.randint(0, high=len(scales))
+        target_size = scales[scale_ind]
+        max_size = cfg.TRAIN.max_size
+    else:
+        target_size = cfg.TEST.scales[0]
+        max_size = cfg.TEST.max_size
    im = cv2.imread(roidb['image'])
    assert im is not None, \
        'Failed to read image \'{}\''.format(roidb['image'])
    if roidb['flipped']:
        im = im[:, ::-1, :]
-    target_size = settings.scales[scale_ind]
-    im, im_scale = prep_im_for_blob(im, settings.mean_value, target_size,
-                                    settings.max_size)
+    im, im_scale = prep_im_for_blob(im, cfg.pixel_means, target_size, max_size)

    return im, im_scale


--- a/fluid/faster_rcnn/edict.py
+++ b/fluid/faster_rcnn/edict.py
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+
+class AttrDict(dict):
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+
+    def __getattr__(self, name):
+        if name in self.__dict__:
+            return self.__dict__[name]
+        elif name in self:
+            return self[name]
+        else:
+            raise AttributeError(name)
+
+    def __setattr__(self, name, value):
+        if name in self.__dict__:
+            self.__dict__[name] = value
+        else:
+            self[name] = value
--- a/fluid/faster_rcnn/eval_coco_map.py
+++ b/fluid/faster_rcnn/eval_coco_map.py
@@ -29,18 +29,20 @@ import models.resnet as resnet
 import json
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval, Params
+from config import cfg


-def eval(cfg):
-
+def eval():
    if '2014' in cfg.dataset:
        test_list = 'annotations/instances_val2014.json'
    elif '2017' in cfg.dataset:
        test_list = 'annotations/instances_val2017.json'

-    image_shape = [3, cfg.max_size, cfg.max_size]
+    image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size]
    class_nums = cfg.class_num
-    batch_size = cfg.batch_size
+    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
+    devices_num = len(devices.split(","))
+    total_batch_size = devices_num * cfg.TRAIN.im_per_batch
    cocoGt = COCO(os.path.join(cfg.data_dir, test_list))
    numId_to_catId_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())}
    category_ids = cocoGt.getCatIds()
@@ -51,7 +53,6 @@ def eval(cfg):
    label_list[0] = ['background']

    model = model_builder.FasterRCNN(
-        cfg=cfg,
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=False,
@@ -66,7 +67,7 @@ def eval(cfg):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
    # yapf: enable
-    test_reader = reader.test(cfg, batch_size)
+    test_reader = reader.test(total_batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    dts_res = []
@@ -80,11 +81,11 @@ def eval(cfg):
            fetch_list=[v.name for v in fetch_list],
            feed=feeder.feed(batch_data),
            return_numpy=False)
-        new_lod, nmsed_out = get_nmsed_box(cfg, rpn_rois_v, confs_v, locs_v,
+        new_lod, nmsed_out = get_nmsed_box(rpn_rois_v, confs_v, locs_v,
                                           class_nums, im_info,
                                           numId_to_catId_map)

-        dts_res += get_dt_res(batch_size, new_lod, nmsed_out, batch_data)
+        dts_res += get_dt_res(total_batch_size, new_lod, nmsed_out, batch_data)
        end = time.time()
        print('batch id: {}, time: {}'.format(batch_id, end - start))
    with open("detection_result.json", 'w') as outfile:
@@ -100,6 +101,4 @@ def eval(cfg):
 if __name__ == '__main__':
    args = parse_args()
    print_arguments(args)
-
-    data_args = reader.Settings(args)
-    eval(data_args)
+    eval()
--- a/fluid/faster_rcnn/eval_helper.py
+++ b/fluid/faster_rcnn/eval_helper.py
@@ -20,6 +20,7 @@ import box_utils
 from PIL import Image
 from PIL import ImageDraw
 from PIL import ImageFont
+from config import cfg


 def box_decoder(target_box, prior_box, prior_box_var):
@@ -31,10 +32,8 @@ def box_decoder(target_box, prior_box, prior_box_var):
    prior_box_loc[:, 3] = (prior_box[:, 3] + prior_box[:, 1]) / 2
    pred_bbox = np.zeros_like(target_box, dtype=np.float32)
    for i in range(prior_box.shape[0]):
-        dw = np.minimum(prior_box_var[2] * target_box[i, 2::4],
-                        np.log(1000. / 16.))
-        dh = np.minimum(prior_box_var[3] * target_box[i, 3::4],
-                        np.log(1000. / 16.))
+        dw = np.minimum(prior_box_var[2] * target_box[i, 2::4], cfg.bbox_clip)
+        dh = np.minimum(prior_box_var[3] * target_box[i, 3::4], cfg.bbox_clip)
        pred_bbox[i, 0::4] = prior_box_var[0] * target_box[
            i, 0::4] * prior_box_loc[i, 0] + prior_box_loc[i, 2]
        pred_bbox[i, 1::4] = prior_box_var[1] * target_box[
@@ -67,11 +66,11 @@ def clip_tiled_boxes(boxes, im_shape):
    return boxes


-def get_nmsed_box(args, rpn_rois, confs, locs, class_nums, im_info,
+def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info,
                  numId_to_catId_map):
    lod = rpn_rois.lod()[0]
    rpn_rois_v = np.array(rpn_rois)
-    variance_v = np.array([0.1, 0.1, 0.2, 0.2])
+    variance_v = np.array(cfg.bbox_reg_weights)
    confs_v = np.array(confs)
    locs_v = np.array(locs)
    rois = box_decoder(locs_v, rpn_rois_v, variance_v)
@@ -89,12 +88,12 @@ def get_nmsed_box(args, rpn_rois, confs, locs, class_nums, im_info,
        cls_boxes = [[] for _ in range(class_nums)]
        scores_n = confs_v[start:end, :]
        for j in range(1, class_nums):
-            inds = np.where(scores_n[:, j] > args.score_threshold)[0]
+            inds = np.where(scores_n[:, j] > cfg.TEST.score_thresh)[0]
            scores_j = scores_n[inds, j]
            rois_j = rois_n[inds, j * 4:(j + 1) * 4]
            dets_j = np.hstack((rois_j, scores_j[:, np.newaxis])).astype(
                np.float32, copy=False)
-            keep = box_utils.nms(dets_j, args.nms_threshold)
+            keep = box_utils.nms(dets_j, cfg.TEST.nms_thresh)
            nms_dets = dets_j[keep, :]
            #add labels
            cat_id = numId_to_catId_map[j]
@@ -105,8 +104,8 @@ def get_nmsed_box(args, rpn_rois, confs, locs, class_nums, im_info,
    # Limit to max_per_image detections **over all classes**
        image_scores = np.hstack(
            [cls_boxes[j][:, -2] for j in range(1, class_nums)])
-        if len(image_scores) > 100:
-            image_thresh = np.sort(image_scores)[-100]
+        if len(image_scores) > cfg.TEST.detectiions_per_im:
+            image_thresh = np.sort(image_scores)[-cfg.TEST.detectiions_per_im]
            for j in range(1, class_nums):
                keep = np.where(cls_boxes[j][:, -2] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

--- a/fluid/faster_rcnn/infer.py
+++ b/fluid/faster_rcnn/infer.py
+import os
+import time
+import numpy as np
+from eval_helper import get_nmsed_box
+from eval_helper import get_dt_res
+from eval_helper import draw_bounding_box_on_image
+import paddle
+import paddle.fluid as fluid
+import reader
+from utility import print_arguments, parse_args
+import models.model_builder as model_builder
+import models.resnet as resnet
+import json
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval, Params
+from config import cfg
+
+
+def infer():
+
+    if '2014' in cfg.dataset:
+        test_list = 'annotations/instances_val2014.json'
+    elif '2017' in cfg.dataset:
+        test_list = 'annotations/instances_val2017.json'
+
+    cocoGt = COCO(os.path.join(cfg.data_dir, test_list))
+    numId_to_catId_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())}
+    category_ids = cocoGt.getCatIds()
+    label_list = {
+        item['id']: item['name']
+        for item in cocoGt.loadCats(category_ids)
+    }
+    label_list[0] = ['background']
+    image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size]
+    class_nums = cfg.class_num
+
+    model = model_builder.FasterRCNN(
+        add_conv_body_func=resnet.add_ResNet50_conv4_body,
+        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
+        use_pyreader=False,
+        is_train=False)
+    model.build_model(image_shape)
+    rpn_rois, confs, locs = model.eval_out()
+    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    # yapf: disable
+    if cfg.pretrained_model:
+        def if_exist(var):
+            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
+        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
+    # yapf: enable
+    infer_reader = reader.infer()
+    feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
+
+    dts_res = []
+    fetch_list = [rpn_rois, confs, locs]
+    data = next(infer_reader())
+    im_info = [data[0][1]]
+    rpn_rois_v, confs_v, locs_v = exe.run(
+        fetch_list=[v.name for v in fetch_list],
+        feed=feeder.feed(data),
+        return_numpy=False)
+    new_lod, nmsed_out = get_nmsed_box(rpn_rois_v, confs_v, locs_v, class_nums,
+                                       im_info, numId_to_catId_map)
+    path = os.path.join(cfg.image_path, cfg.image_name)
+    draw_bounding_box_on_image(path, nmsed_out, cfg.draw_threshold, label_list)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    print_arguments(args)
+    infer()
--- a/fluid/faster_rcnn/models/model_builder.py
+++ b/fluid/faster_rcnn/models/model_builder.py
@@ -17,11 +17,11 @@ from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.initializer import Constant
 from paddle.fluid.initializer import Normal
 from paddle.fluid.regularizer import L2Decay
+from config import cfg


 class FasterRCNN(object):
    def __init__(self,
-                 cfg=None,
                 add_conv_body_func=None,
                 add_roi_box_head_func=None,
                 is_train=True,
@@ -29,7 +29,6 @@ class FasterRCNN(object):
                 use_random=True):
        self.add_conv_body_func = add_conv_body_func
        self.add_roi_box_head_func = add_roi_box_head_func
-        self.cfg = cfg
        self.is_train = is_train
        self.use_pyreader = use_pyreader
        self.use_random = use_random
@@ -111,10 +110,10 @@ class FasterRCNN(object):
                name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
        self.anchor, self.var = fluid.layers.anchor_generator(
            input=rpn_conv,
-            anchor_sizes=self.cfg.anchor_sizes,
-            aspect_ratios=self.cfg.aspect_ratios,
-            variance=self.cfg.variance,
-            stride=[16.0, 16.0])
+            anchor_sizes=cfg.anchor_sizes,
+            aspect_ratios=cfg.aspect_ratio,
+            variance=cfg.variances,
+            stride=cfg.rpn_stride)
        num_anchor = self.anchor.shape[2]
        # Proposal classification scores
        self.rpn_cls_score = fluid.layers.conv2d(
@@ -152,8 +151,12 @@ class FasterRCNN(object):
        rpn_cls_score_prob = fluid.layers.sigmoid(
            self.rpn_cls_score, name='rpn_cls_score_prob')

-        pre_nms_top_n = 12000 if self.is_train else 6000
-        post_nms_top_n = 2000 if self.is_train else 1000
+        param_obj = cfg.TRAIN if self.is_train else cfg.TEST
+        pre_nms_top_n = param_obj.rpn_pre_nms_top_n
+        post_nms_top_n = param_obj.rpn_post_nms_top_n
+        nms_thresh = param_obj.rpn_nms_thresh
+        min_size = param_obj.rpn_min_size
+        eta = param_obj.rpn_eta
        rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
            scores=rpn_cls_score_prob,
            bbox_deltas=self.rpn_bbox_pred,
@@ -162,9 +165,9 @@ class FasterRCNN(object):
            variances=self.var,
            pre_nms_top_n=pre_nms_top_n,
            post_nms_top_n=post_nms_top_n,
-            nms_thresh=0.7,
-            min_size=0.0,
-            eta=1.0)
+            nms_thresh=nms_thresh,
+            min_size=min_size,
+            eta=eta)
        self.rpn_rois = rpn_rois
        if self.is_train:
            outs = fluid.layers.generate_proposal_labels(
@@ -173,13 +176,13 @@ class FasterRCNN(object):
                is_crowd=self.is_crowd,
                gt_boxes=self.gt_box,
                im_info=self.im_info,
-                batch_size_per_im=self.cfg.batch_size_per_im,
-                fg_fraction=0.25,
-                fg_thresh=0.5,
-                bg_thresh_hi=0.5,
-                bg_thresh_lo=0.0,
-                bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
-                class_nums=self.cfg.class_num,
+                batch_size_per_im=cfg.TRAIN.batch_size_per_im,
+                fg_fraction=cfg.TRAIN.fg_fractrion,
+                fg_thresh=cfg.TRAIN.fg_thresh,
+                bg_thresh_hi=cfg.TRAIN.bg_thresh_hi,
+                bg_thresh_lo=cfg.TRAIN.bg_thresh_lo,
+                bbox_reg_weights=cfg.bbox_reg_weights,
+                class_nums=cfg.class_num,
                use_random=self.use_random)

            self.rois = outs[0]
@@ -201,7 +204,7 @@ class FasterRCNN(object):
            spatial_scale=0.0625)
        rcnn_out = self.add_roi_box_head_func(pool)
        self.cls_score = fluid.layers.fc(input=rcnn_out,
-                                         size=self.cfg.class_num,
+                                         size=cfg.class_num,
                                         act=None,
                                         name='cls_score',
                                         param_attr=ParamAttr(
@@ -213,7 +216,7 @@ class FasterRCNN(object):
                                             learning_rate=2.,
                                             regularizer=L2Decay(0.)))
        self.bbox_pred = fluid.layers.fc(input=rcnn_out,
-                                         size=4 * self.cfg.class_num,
+                                         size=4 * cfg.class_num,
                                         act=None,
                                         name='bbox_pred',
                                         param_attr=ParamAttr(
@@ -257,7 +260,6 @@ class FasterRCNN(object):
            x=rpn_cls_score_reshape, shape=(0, -1, 1))
        rpn_bbox_pred_reshape = fluid.layers.reshape(
            x=rpn_bbox_pred_reshape, shape=(0, -1, 4))
-
        score_pred, loc_pred, score_tgt, loc_tgt = \
            fluid.layers.rpn_target_assign(
                bbox_pred=rpn_bbox_pred_reshape,
@@ -267,11 +269,11 @@ class FasterRCNN(object):
                gt_boxes=self.gt_box,
                is_crowd=self.is_crowd,
                im_info=self.im_info,
-                rpn_batch_size_per_im=256,
-                rpn_straddle_thresh=0.0,
-                rpn_fg_fraction=0.5,
-                rpn_positive_overlap=0.7,
-                rpn_negative_overlap=0.3,
+                rpn_batch_size_per_im=cfg.TRAIN.rpn_batch_size_per_im,
+                rpn_straddle_thresh=cfg.TRAIN.rpn_straddle_thresh,
+                rpn_fg_fraction=cfg.TRAIN.rpn_fg_fraction,
+                rpn_positive_overlap=cfg.TRAIN.rpn_positive_overlap,
+                rpn_negative_overlap=cfg.TRAIN.rpn_negative_overlap,
                use_random=self.use_random)
        score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
        rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(

--- a/fluid/faster_rcnn/models/resnet.py
+++ b/fluid/faster_rcnn/models/resnet.py
@@ -16,6 +16,7 @@ import paddle.fluid as fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.initializer import Constant
 from paddle.fluid.regularizer import L2Decay
+from config import cfg


 def conv_bn_layer(input,
@@ -137,7 +138,7 @@ ResNet_cfg = {
 }


-def add_ResNet50_conv4_body(body_input, freeze_at=2):
+def add_ResNet50_conv4_body(body_input):
    stages, block_func = ResNet_cfg[50]
    stages = stages[0:3]
    conv1 = conv_affine_layer(
@@ -149,13 +150,13 @@ def add_ResNet50_conv4_body(body_input, freeze_at=2):
        pool_stride=2,
        pool_padding=1)
    res2 = layer_warp(block_func, pool1, 64, stages[0], 1, name="res2")
-    if freeze_at == 2:
+    if cfg.TRAIN.freeze_at == 2:
        res2.stop_gradient = True
    res3 = layer_warp(block_func, res2, 128, stages[1], 2, name="res3")
-    if freeze_at == 3:
+    if cfg.TRAIN.freeze_at == 3:
        res3.stop_gradient = True
    res4 = layer_warp(block_func, res3, 256, stages[2], 2, name="res4")
-    if freeze_at == 4:
+    if cfg.TRAIN.freeze_at == 4:
        res4.stop_gradient = True
    return res4


--- a/fluid/faster_rcnn/profile.py
+++ b/fluid/faster_rcnn/profile.py
@@ -26,19 +26,18 @@ import paddle.fluid.profiler as profiler
 import models.model_builder as model_builder
 import models.resnet as resnet
 from learning_rate import exponential_with_warmup_decay
+from config import cfg


-def train(cfg):
-    batch_size = cfg.batch_size
+def train():
    learning_rate = cfg.learning_rate
-    image_shape = [3, cfg.max_size, cfg.max_size]
+    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]
    num_iterations = cfg.max_iter

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
-
+    total_batch_size = devices_num * cfg.TRAIN.im_per_batch
    model = model_builder.FasterRCNN(
-        cfg=cfg,
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=cfg.use_pyreader,
@@ -51,8 +50,10 @@ def train(cfg):
    rpn_reg_loss.persistable = True
    loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss

-    boundaries = [120000, 160000]
-    values = [learning_rate, learning_rate * 0.1, learning_rate * 0.01]
+    boundaries = cfg.lr_steps
+    gamma = cfg.lr_gamma
+    step_num = len(lr_steps)
+    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
@@ -82,22 +83,16 @@ def train(cfg):
        train_exe = fluid.ParallelExecutor(
            use_cuda=bool(cfg.use_gpu), loss_name=loss.name)

-    assert cfg.batch_size % devices_num == 0, \
-        "batch_size = %d, devices_num = %d" %(cfg.batch_size, devices_num)
-
-    batch_size_per_dev = cfg.batch_size / devices_num
    if cfg.use_pyreader:
        train_reader = reader.train(
-            cfg,
-            batch_size=batch_size_per_dev,
-            total_batch_size=cfg.batch_size,
-            padding_total=cfg.padding_minibatch,
+            batch_size=cfg.TRAIN.im_per_batch,
+            total_batch_size=total_batch_size,
+            padding_total=cfg.TRAIN.padding_minibatch,
            shuffle=False)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
-        train_reader = reader.train(
-            cfg, batch_size=cfg.batch_size, shuffle=False)
+        train_reader = reader.train(batch_size=total_batch_size, shuffle=False)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    fetch_list = [loss, loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss]
@@ -163,8 +158,7 @@ def train(cfg):
    run_func(2)
    # profiling
    start = time.time()
-    use_profile = False
-    if use_profile:
+    if cfg.use_profile:
        with profiler.profiler('GPU', 'total', '/tmp/profile_file'):
            reader_time, run_time, total_images = run_func(num_iterations)
    else:
@@ -181,6 +175,4 @@ def train(cfg):
 if __name__ == '__main__':
    args = parse_args()
    print_arguments(args)
-
-    data_args = reader.Settings(args)
-    train(data_args)
+    train()
--- a/fluid/faster_rcnn/reader.py
+++ b/fluid/faster_rcnn/reader.py
@@ -26,58 +26,45 @@ from collections import deque

 from roidbs import JsonDataset
 import data_utils
+from config import cfg


-class Settings(object):
-    def __init__(self, args=None):
-        for arg, value in sorted(six.iteritems(vars(args))):
-            setattr(self, arg, value)
-
-        if 'coco2014' in args.dataset:
-            self.class_nums = 81
-            self.train_file_list = 'annotations/instances_train2014.json'
-            self.train_data_dir = 'train2014'
-            self.val_file_list = 'annotations/instances_val2014.json'
-            self.val_data_dir = 'val2014'
-        elif 'coco2017' in args.dataset:
-            self.class_nums = 81
-            self.train_file_list = 'annotations/instances_train2017.json'
-            self.train_data_dir = 'train2017'
-            self.val_file_list = 'annotations/instances_val2017.json'
-            self.val_data_dir = 'val2017'
-        else:
-            raise NotImplementedError('Dataset {} not supported'.format(
-                self.dataset))
-        self.mean_value = np.array(self.mean_value)[
-            np.newaxis, np.newaxis, :].astype('float32')
-
-
-def coco(settings,
-         mode,
+def coco(mode,
         batch_size=None,
         total_batch_size=None,
         padding_total=False,
         shuffle=False):
+    if 'coco2014' in cfg.dataset:
+        cfg.train_file_list = 'annotations/instances_train2014.json'
+        cfg.train_data_dir = 'train2014'
+        cfg.val_file_list = 'annotations/instances_val2014.json'
+        cfg.val_data_dir = 'val2014'
+    elif 'coco2017' in cfg.dataset:
+        cfg.train_file_list = 'annotations/instances_train2017.json'
+        cfg.train_data_dir = 'train2017'
+        cfg.val_file_list = 'annotations/instances_val2017.json'
+        cfg.val_data_dir = 'val2017'
+    else:
+        raise NotImplementedError('Dataset {} not supported'.format(
+            cfg.dataset))
+    cfg.mean_value = np.array(cfg.pixel_means)[np.newaxis,
+                                               np.newaxis, :].astype('float32')
    total_batch_size = total_batch_size if total_batch_size else batch_size
    if mode != 'infer':
        assert total_batch_size % batch_size == 0
    if mode == 'train':
-        settings.train_file_list = os.path.join(settings.data_dir,
-                                                settings.train_file_list)
-        settings.train_data_dir = os.path.join(settings.data_dir,
-                                               settings.train_data_dir)
+        cfg.train_file_list = os.path.join(cfg.data_dir, cfg.train_file_list)
+        cfg.train_data_dir = os.path.join(cfg.data_dir, cfg.train_data_dir)
    elif mode == 'test' or mode == 'infer':
-        settings.val_file_list = os.path.join(settings.data_dir,
-                                              settings.val_file_list)
-        settings.val_data_dir = os.path.join(settings.data_dir,
-                                             settings.val_data_dir)
-    json_dataset = JsonDataset(settings, train=(mode == 'train'))
+        cfg.val_file_list = os.path.join(cfg.data_dir, cfg.val_file_list)
+        cfg.val_data_dir = os.path.join(cfg.data_dir, cfg.val_data_dir)
+    json_dataset = JsonDataset(train=(mode == 'train'))
    roidbs = json_dataset.get_roidb()

-    print("{} on {} with {} roidbs".format(mode, settings.dataset, len(roidbs)))
+    print("{} on {} with {} roidbs".format(mode, cfg.dataset, len(roidbs)))

    def roidb_reader(roidb, mode):
-        im, im_scales = data_utils.get_image_blob(roidb, settings)
+        im, im_scales = data_utils.get_image_blob(roidb, mode)
        im_id = roidb['id']
        im_height = np.round(roidb['height'] * im_scales)
        im_width = np.round(roidb['width'] * im_scales)
@@ -150,7 +137,7 @@ def coco(settings,

        else:
            for roidb in roidbs:
-                if settings.image_name not in roidb['image']:
+                if cfg.image_name not in roidb['image']:
                    continue
                im, im_info, im_id = roidb_reader(roidb, mode)
                batch_out = [(im, im_info, im_id)]
@@ -159,23 +146,14 @@ def coco(settings,
    return reader


-def train(settings,
-          batch_size,
-          total_batch_size=None,
-          padding_total=False,
-          shuffle=True):
+def train(batch_size, total_batch_size=None, padding_total=False, shuffle=True):
    return coco(
-        settings,
-        'train',
-        batch_size,
-        total_batch_size,
-        padding_total,
-        shuffle=shuffle)
+        'train', batch_size, total_batch_size, padding_total, shuffle=shuffle)


-def test(settings, batch_size, total_batch_size=None, padding_total=False):
-    return coco(settings, 'test', batch_size, total_batch_size, shuffle=False)
+def test(batch_size, total_batch_size=None, padding_total=False):
+    return coco('test', batch_size, total_batch_size, shuffle=False)


-def infer(settings):
-    return coco(settings, 'infer')
+def infer():
+    return coco('infer')
--- a/fluid/faster_rcnn/roidbs.py
+++ b/fluid/faster_rcnn/roidbs.py
@@ -36,6 +36,7 @@ import matplotlib
 matplotlib.use('Agg')
 from pycocotools.coco import COCO
 import box_utils
+from config import cfg

 logger = logging.getLogger(__name__)

@@ -43,16 +44,16 @@ logger = logging.getLogger(__name__)
 class JsonDataset(object):
    """A class representing a COCO json dataset."""

-    def __init__(self, args, train=False):
-        print('Creating: {}'.format(args.dataset))
-        self.name = args.dataset
+    def __init__(self, train=False):
+        print('Creating: {}'.format(cfg.dataset))
+        self.name = cfg.dataset
        self.is_train = train
        if self.is_train:
-            data_dir = args.train_data_dir
-            file_list = args.train_file_list
+            data_dir = cfg.train_data_dir
+            file_list = cfg.train_file_list
        else:
-            data_dir = args.val_data_dir
-            file_list = args.val_file_list
+            data_dir = cfg.val_data_dir
+            file_list = cfg.val_file_list
        self.image_directory = data_dir
        self.COCO = COCO(file_list)
        # Set up dataset classes
@@ -90,7 +91,6 @@ class JsonDataset(object):
            end_time = time.time()
            print('_add_gt_annotations took {:.3f}s'.format(end_time -
                                                            start_time))
-
            print('Appending horizontally-flipped training examples...')
            self._extend_with_flipped_entries(roidb)
        print('Loaded dataset: {:s}'.format(self.name))
@@ -129,7 +129,7 @@ class JsonDataset(object):
        width = entry['width']
        height = entry['height']
        for obj in objs:
-            if obj['area'] < -1:  #cfg.TRAIN.GT_MIN_AREA:
+            if obj['area'] < cfg.TRAIN.gt_min_area:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue

--- a/fluid/faster_rcnn/train.py
+++ b/fluid/faster_rcnn/train.py
@@ -28,11 +28,12 @@ import reader
 import models.model_builder as model_builder
 import models.resnet as resnet
 from learning_rate import exponential_with_warmup_decay
+from config import cfg


-def train(cfg):
+def train():
    learning_rate = cfg.learning_rate
-    image_shape = [3, cfg.max_size, cfg.max_size]
+    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]

    if cfg.debug:
        fluid.default_startup_program().random_seed = 1000
@@ -43,9 +44,9 @@ def train(cfg):

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
+    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    model = model_builder.FasterRCNN(
-        cfg=cfg,
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=cfg.use_pyreader,
@@ -58,18 +59,20 @@ def train(cfg):
    rpn_reg_loss.persistable = True
    loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss

-    boundaries = [120000, 160000]
-    values = [learning_rate, learning_rate * 0.1, learning_rate * 0.01]
+    boundaries = cfg.lr_steps
+    gamma = cfg.lr_gamma
+    step_num = len(lr_steps)
+    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
            learning_rate=learning_rate,
            boundaries=boundaries,
            values=values,
-            warmup_iter=500,
-            warmup_factor=1.0 / 3.0),
-        regularization=fluid.regularizer.L2Decay(0.0001),
-        momentum=0.9)
+            warmup_iter=cfg.warm_up_iter,
+            warmup_factor=cfg.warm_up_factor),
+        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
+        momentum=cfg.momentum)
    optimizer.minimize(loss)

    fluid.memory_optimize(fluid.default_main_program())
@@ -89,20 +92,16 @@ def train(cfg):
        train_exe = fluid.ParallelExecutor(
            use_cuda=bool(cfg.use_gpu), loss_name=loss.name)

-    assert cfg.batch_size % devices_num == 0
-    batch_size_per_dev = cfg.batch_size / devices_num
    if cfg.use_pyreader:
        train_reader = reader.train(
-            cfg,
-            batch_size=batch_size_per_dev,
-            total_batch_size=cfg.batch_size,
-            padding_total=cfg.padding_minibatch,
+            batch_size=cfg.TRAIN.im_per_batch,
+            total_batch_size=total_batch_size,
+            padding_total=cfg.TRAIN.padding_minibatch,
            shuffle=True)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
-        train_reader = reader.train(
-            cfg, batch_size=cfg.batch_size, shuffle=True)
+        train_reader = reader.train(batch_size=total_batch_size, shuffle=True)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def save_model(postfix):
@@ -133,7 +132,7 @@ def train(cfg):
                    smoothed_loss.get_median_value(
                    ), start_time - prev_start_time))
                sys.stdout.flush()
-                if (iter_id + 1) % cfg.snapshot_stride == 0:
+                if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                    save_model("model_iter{}".format(iter_id))
        except fluid.core.EOFException:
            py_reader.reset()
@@ -159,7 +158,7 @@ def train(cfg):
                iter_id, lr[0],
                smoothed_loss.get_median_value(), start_time - prev_start_time))
            sys.stdout.flush()
-            if (iter_id + 1) % cfg.snapshot_stride == 0:
+            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
            if (iter_id + 1) == cfg.max_iter:
                break
@@ -175,6 +174,4 @@ def train(cfg):
 if __name__ == '__main__':
    args = parse_args()
    print_arguments(args)
-
-    data_args = reader.Settings(args)
-    train(data_args)
+    train()
--- a/fluid/faster_rcnn/utility.py
+++ b/fluid/faster_rcnn/utility.py
@@ -18,7 +18,7 @@ Contains common utility functions.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-
+import sys
 import distutils.util
 import numpy as np
 import six
@@ -26,6 +26,7 @@ from collections import deque
 from paddle.fluid import core
 import argparse
 import functools
+from config import *


 def print_arguments(args):
@@ -96,31 +97,33 @@ def parse_args():
    add_arg('model_save_dir',   str,    'output',     "The path to save model.")
    add_arg('pretrained_model', str,    'imagenet_resnet50_fusebn', "The init model path.")
    add_arg('dataset',          str,   'coco2017',  "coco2014, coco2017.")
-    add_arg('data_dir',         str,   'data/COCO17',        "The data root path.")
    add_arg('class_num',        int,   81,          "Class number.")
+    add_arg('data_dir',         str,   'data/COCO17',        "The data root path.")
    add_arg('use_pyreader',     bool,   True,           "Use pyreader.")
+    add_arg('use_profile',         bool,   False,       "Whether use profiler.")
    add_arg('padding_minibatch',bool,   False,
        "If False, only resize image and not pad, image shape is different between"
        " GPUs in one mini-batch. If True, image shape is the same in one mini-batch.")
    #SOLVER
    add_arg('learning_rate',    float,  0.01,     "Learning rate.")
    add_arg('max_iter',         int,    180000,   "Iter number.")
-    add_arg('log_window',       int,    1,        "Log smooth window, set 1 for debug, set 20 for train.")
-    add_arg('snapshot_stride',  int,    10000,    "save model every snapshot stride.")
+    add_arg('log_window',       int,    20,        "Log smooth window, set 1 for debug, set 20 for train.")
+    # FAST RCNN
    # RPN
    add_arg('anchor_sizes',     int,    [32,64,128,256,512],  "The size of anchors.")
    add_arg('aspect_ratios',    float,  [0.5,1.0,2.0],    "The ratio of anchors.")
    add_arg('variance',         float,  [1.,1.,1.,1.],    "The variance of anchors.")
-    add_arg('rpn_stride',       float,  16.,    "Stride of the feature map that RPN is attached.")
-    # FAST RCNN
+    add_arg('rpn_stride',       float,  [16.,16.],    "Stride of the feature map that RPN is attached.")
+    add_arg('rpn_nms_thresh',    float,   0.7,          "NMS threshold used on RPN proposals")
    # TRAIN TEST INFER
-    add_arg('batch_size',       int,   1,        "Minibatch size.")
+    add_arg('im_per_batch',       int,   1,        "Minibatch size.")
    add_arg('max_size',         int,   1333,    "The resized image height.")
    add_arg('scales', int,  [800],    "The resized image height.")
    add_arg('batch_size_per_im',int,    512,    "fast rcnn head batch size")
-    add_arg('mean_value',     float,   [102.9801, 115.9465, 122.7717], "pixel mean")
-    add_arg('nms_threshold',    float, 0.5,    "NMS threshold.")
-    add_arg('score_threshold',    float, 0.05,    "score threshold for NMS.")
+    add_arg('pixel_means',     float,   [102.9801, 115.9465, 122.7717], "pixel mean")
+    add_arg('nms_thresh',    float, 0.5,    "NMS threshold.")
+    add_arg('score_thresh',    float, 0.05,    "score threshold for NMS.")
+    add_arg('snapshot_stride',  int,    10000,    "save model every snapshot stride.")
    add_arg('debug',            bool,   False,   "Debug mode")
    # SINGLE EVAL AND DRAW
    add_arg('draw_threshold',  float, 0.8,    "Confidence threshold to draw bbox.")
@@ -128,4 +131,9 @@ def parse_args():
    add_arg('image_name',        str,    '',       "The single image used to inference and visualize.")
    # yapf: enable
    args = parser.parse_args()
+    file_name = sys.argv[0]
+    if 'train' in file_name or 'profile' in file_name:
+        merge_cfg_from_args(args, 'train')
+    else:
+        merge_cfg_from_args(args, 'test')
    return args