fit for train.

b2389e38 · dengkaipeng · 9da63d61 · b2389e38 · b2389e38 · b2389e38
12 changed file
--- a/fluid/PaddleCV/yolov3/config.py
+++ b/fluid/PaddleCV/yolov3/config.py
@@ -24,10 +24,6 @@ cfg = _C
 # Training options
 #
-# batch
-_C.batch = 8
 # Snapshot period
 _C.snapshot_iter = 2000
@@ -72,6 +68,9 @@ _C.pixel_stds = [0.229, 0.224, 0.225]
 # SOLVER options
 #
+# batch size
+_C.batch_size = 64
 # derived learning rate the to get the final learning rate.
 _C.learning_rate = 0.001
@@ -92,9 +91,7 @@ _C.weight_decay = 0.0005
 # momentum with SGD
 _C.momentum = 0.9
-# decay
+#
-_C.decay = 0.0005
 # ENV options
 #

--- a/fluid/PaddleCV/yolov3/config_parser.py
+++ b/fluid/PaddleCV/yolov3/config_parser.py
-#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
 #Licensed under the Apache License, Version 2.0 (the "License");
 #you may not use this file except in compliance with the License.
 #You may obtain a copy of the License at
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
+#
 #Unless required by applicable law or agreed to in writing, software
 #distributed under the License is distributed on an "AS IS" BASIS,
 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #See the License for the specific language governing permissions and
-#limitations under the License. 
+#limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
-LAYER_TYPES = [
-        "net",
-        "convolutional",
-        "shortcut",
-        "route",
-        "upsample",
-        "maxpool",
-        "yolo",
-        ]
-class ConfigPaser(object):
+class AttrDict(dict):
-    def __init__(self, config_path):
+    def __init__(self, *args, **kwargs):
-        self.config_path = config_path
+        super(AttrDict, self).__init__(*args, **kwargs)
-    def parse(self):
-        with open(self.config_path) as cfg_file:
-            model_defs = []
-            for line in cfg_file.readlines():
-                line = line.strip()
-                if len(line) == 0:
-                    continue
-                if line.startswith('#'):
-                    continue
-                if line.startswith('['):
-                    layer_type = line[1:-1].strip()
-                    if layer_type not in LAYER_TYPES:
-                        print("Unknow config layer type: ", layer_type)
-                        return None
-                    model_defs.append({})
-                    model_defs[-1]['type'] = layer_type
-                else:
-                    key, value = line.split('=')
-                    model_defs[-1][key.strip()] = value.strip()
-        return model_defs
+    def __getattr__(self, name):
+        if name in self.__dict__:
+            return self.__dict__[name]
+        elif name in self:
+            return self[name]
+        else:
+            raise AttributeError(name)
+    def __setattr__(self, name, value):
+        if name in self.__dict__:
+            self.__dict__[name] = value
+        else:
+            self[name] = value
--- a/fluid/PaddleCV/yolov3/eval.py
+++ b/fluid/PaddleCV/yolov3/eval.py
@@ -17,13 +17,13 @@ from __future__ import division
 from __future__ import print_function
 import os
 import time
+import json
 import numpy as np
 import paddle
 import paddle.fluid as fluid
 import reader
-import models.yolov3 as models
+from models.yolov3 import YOLOv3
 from utility import print_arguments, parse_args
-import json
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval, Params
 from config import cfg
@@ -39,11 +39,9 @@ def eval():
        if not os.path.exists('output'):
            os.mkdir('output')
-    model = models.YOLOv3(cfg.model_cfg_path, is_train=False)
+    model = YOLOv3(cfg.model_cfg_path, is_train=False)
    model.build_model()
    outputs = model.get_pred()
-    yolo_anchors = model.get_yolo_anchors()
-    yolo_classes = model.get_yolo_classes()
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    # yapf: disable
@@ -52,7 +50,7 @@ def eval():
            return os.path.exists(os.path.join(cfg.weights, var.name))
        fluid.io.load_vars(exe, cfg.weights, predicate=if_exist)
    # yapf: enable
-    input_size = model.get_input_size()
+    input_size = cfg.input_size
    test_reader = reader.test(input_size, 1)
    label_names, label_ids = reader.get_label_infos()
    if cfg.debug:

--- a/fluid/PaddleCV/yolov3/infer.py
+++ b/fluid/PaddleCV/yolov3/infer.py
@@ -6,9 +6,7 @@ import paddle.fluid as fluid
 import box_utils
 import reader
 from utility import print_arguments, parse_args
-import models.yolov3 as models
+from models.yolov3 import YOLOv3
-# from coco_reader import load_label_names
-import json
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval, Params
 from config import cfg
@@ -19,12 +17,10 @@ def infer():
    if not os.path.exists('output'):
        os.mkdir('output')
-    model = models.YOLOv3(cfg.model_cfg_path, is_train=False)
+    model = YOLOv3(cfg.model_cfg_path, is_train=False)
    model.build_model()
    outputs = model.get_pred()
-    input_size = model.get_input_size()
+    input_size = cfg.input_size
-    yolo_anchors = model.get_yolo_anchors()
-    yolo_classes = model.get_yolo_classes()
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    # yapf: disable

--- a/fluid/PaddleCV/yolov3/learning_rate.py
+++ b/fluid/PaddleCV/yolov3/learning_rate.py
@@ -22,7 +22,7 @@ from paddle.fluid.layers import control_flow
 def exponential_with_warmup_decay(learning_rate, boundaries, values,
-                                  warmup_iter, warmup_factor, start_step):
+                                  warmup_iter, warmup_factor):
    global_step = lr_scheduler._decay_step_counter()
    lr = fluid.layers.create_global_var(

--- a/fluid/PaddleCV/yolov3/models.py
+++ b/fluid/PaddleCV/yolov3/models.py
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import division
-from __future__ import print_function
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Constant
-from paddle.fluid.initializer import Normal
-from paddle.fluid.regularizer import L2Decay
-import box_utils
-from config.config_parser import ConfigPaser
-from config.config import cfg
-def conv_bn_layer(input,
-                  ch_out,
-                  filter_size,
-                  stride,
-                  padding,
-                  act=None,
-                  bn=False,
-                  name=None,
-                  is_train=True):
-    if bn:
-        out = fluid.layers.conv2d(
-            input=input,
-            num_filters=ch_out,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            act=None,
-            param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
-                                 name=name + "_weights"),
-            bias_attr=False,
-            name=name + '.conv2d.output.1')
-        bn_name = "bn" + name[4:]
-        out = fluid.layers.batch_norm(input=out, 
-                                      act=None, 
-                                      is_test=not is_train,
-                                      param_attr=ParamAttr(
-                                            initializer=fluid.initializer.Normal(0., 0.02),
-                                            regularizer=L2Decay(0.),
-                                            name=bn_name + '_scale'),
-                                      bias_attr=ParamAttr(
-                                            initializer=fluid.initializer.Constant(0.0),
-                                            regularizer=L2Decay(0.),
-                                            name=bn_name + '_offset'),
-                                      moving_mean_name=bn_name+'_mean',
-                                      moving_variance_name=bn_name+'_var',
-                                      name=bn_name+'.output')
-    else:
-        out = fluid.layers.conv2d(
-            input=input,
-            num_filters=ch_out,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            act=None,
-            param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
-                                 name=name + "_weights"),
-            bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0),
-                                 regularizer=L2Decay(0.),
-                                 name=name + "_bias"),
-            name=name + '.conv2d.output.1')
-    if act == 'relu':
-        out = fluid.layers.relu(x=out)
-    if act == 'leaky':
-        out = fluid.layers.leaky_relu(x=out, alpha=0.1)
-    return out
-class YOLOv3(object):
-    def __init__(self, 
-                model_cfg_path,
-                is_train=True,
-                use_pyreader=True,
-                use_random=True):
-        self.model_cfg_path = model_cfg_path
-        self.config_parser = ConfigPaser(model_cfg_path)
-        self.is_train = is_train
-        self.use_pyreader = use_pyreader
-        self.use_random = use_random
-        self.outputs = []
-        self.losses = []
-        self.boxes = []
-        self.scores = []
-        self.downsample = 32
-    def build_model(self):
-        model_defs = self.config_parser.parse()
-        if model_defs is None:
-            return None
-        self.hyperparams = model_defs.pop(0)
-        assert self.hyperparams['type'].lower() == "net", \
-                "net config params should be given in the first segment named 'net'"
-        self.img_height = cfg.input_size
-        self.img_width = cfg.input_size
-        self.build_input()
-        out = self.image
-        layer_outputs = []
-        self.yolo_layer_defs = []
-        self.yolo_anchors = []
-        self.yolo_classes = []
-        self.outputs = []
-        for i, layer_def in enumerate(model_defs):
-            if layer_def['type'] == 'convolutional':
-                bn = layer_def.get('batch_normalize', 0)
-                ch_out = int(layer_def['filters'])
-                filter_size = int(layer_def['size'])
-                stride = int(layer_def['stride'])
-                padding = (filter_size - 1) // 2 if int(layer_def['pad']) else 0
-                act = layer_def['activation']
-                out = conv_bn_layer(
-                        input=out,
-                        ch_out=ch_out,
-                        filter_size=filter_size,
-                        stride=stride,
-                        padding=padding,
-                        act=act,
-                        bn=bool(bn),
-                        name="conv"+str(i),
-                        is_train=self.is_train)
-            elif layer_def['type'] == 'shortcut':
-                layer_from = int(layer_def['from'])
-                out = fluid.layers.elementwise_add(
-                        x=out, 
-                        y=layer_outputs[layer_from],
-                        name="res"+str(i))
-            elif layer_def['type'] == 'route':
-                layers = map(int, layer_def['layers'].split(","))
-                out = fluid.layers.concat(
-                        input=[layer_outputs[i] for i in layers],
-                        axis=1)
-            elif layer_def['type'] == 'upsample':
-                scale = int(layer_def['stride'])
-                # get dynamic upsample output shape
-                shape_nchw = fluid.layers.shape(out)
-                shape_hw = fluid.layers.slice(shape_nchw, axes=[0], \
-                                        starts=[2], ends=[4])
-                shape_hw.stop_gradient = True
-                in_shape = fluid.layers.cast(shape_hw, dtype='int32')
-                out_shape = in_shape * scale
-                out_shape.stop_gradient = True
-                # reisze by actual_shape
-                out = fluid.layers.resize_nearest(
-                        input=out,
-                        scale=scale,
-                        actual_shape=out_shape,
-                        name="upsample"+str(i))
-            elif layer_def['type'] == 'maxpool':
-                pool_size = int(layer_def['size'])
-                pool_stride = int(layer_def['stride'])
-                pool_padding = 0
-                if pool_stride == 1 and pool_size == 2:
-                    pool_padding = 1
-                out = fluid.layers.pool2d(
-                        input=out,
-                        pool_type='max',
-                        pool_size=pool_size,
-                        pool_stride=pool_stride,
-                        pool_padding=pool_padding)
-            elif layer_def['type'] == 'yolo':
-                self.yolo_layer_defs.append(layer_def)
-                self.outputs.append(out)
-                anchor_mask = map(int, layer_def['mask'].split(','))
-                anchors = map(int, layer_def['anchors'].split(','))
-                mask_anchors = []
-                for m in anchor_mask:
-                    mask_anchors.append(anchors[2 * m])
-                    mask_anchors.append(anchors[2 * m + 1])
-                self.yolo_anchors.append(mask_anchors)
-                class_num = int(layer_def['classes'])
-                self.yolo_classes.append(class_num)
-                if self.is_train:
-                    ignore_thresh = float(layer_def['ignore_thresh'])
-                    loss = fluid.layers.yolov3_loss(
-                            x=out,
-                            gtbox=self.gtbox,
-                            gtlabel=self.gtlabel,
-                            gtscore=self.gtscore,
-                            anchors=anchors,
-                            anchor_mask=anchor_mask,
-                            class_num=class_num,
-                            ignore_thresh=ignore_thresh,
-                            downsample_ratio=self.downsample,
-                            use_label_smooth=cfg.label_smooth,
-                            name="yolo_loss"+str(i))
-                    self.losses.append(fluid.layers.reduce_mean(loss))
-                else:
-                    boxes, scores = fluid.layers.yolo_box(
-                            x=out,
-                            img_size=self.im_shape,
-                            anchors=mask_anchors,
-                            class_num=class_num,
-                            conf_thresh=cfg.valid_thresh,
-                            downsample_ratio=self.downsample,
-                            name="yolo_box"+str(i))
-                    self.boxes.append(boxes)
-                    self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
-                self.downsample //= 2
-            layer_outputs.append(out)
-    def loss(self):
-        return sum(self.losses)
-    def get_pred(self):
-        yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
-        yolo_scores = fluid.layers.concat(self.scores, axis=2)
-        return fluid.layers.multiclass_nms(
-                bboxes=yolo_boxes,
-                scores=yolo_scores,
-                score_threshold=cfg.valid_thresh,
-                nms_top_k=cfg.nms_topk,
-                keep_top_k=cfg.nms_posk,
-                nms_threshold=cfg.nms_thresh,
-                background_label=-1,
-                name="multiclass_nms")
-    def get_yolo_anchors(self):
-        return self.yolo_anchors
-    def get_yolo_classes(self):
-        return self.yolo_classes
-    def build_input(self):
-        self.image_shape = [3, self.img_height, self.img_width]
-        if self.use_pyreader and self.is_train:
-            self.py_reader = fluid.layers.py_reader(
-                capacity=64,
-                shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
-                lod_levels=[0, 0, 0, 0],
-                dtypes=['float32'] * 2 + ['int32'] + ['float32'],
-                use_double_buffer=True)
-            self.image, self.gtbox, self.gtlabel, self.gtscore = fluid.layers.read_file(self.py_reader)
-        else:
-            self.image = fluid.layers.data(
-                    name='image', shape=self.image_shape, dtype='float32'
-                    )
-            self.gtbox = fluid.layers.data(
-                    name='gtbox', shape=[cfg.max_box_num, 4], dtype='float32'
-                    )
-            self.gtlabel = fluid.layers.data(
-                    name='gtlabel', shape=[cfg.max_box_num], dtype='int32'
-                    )
-            self.gtscore = fluid.layers.data(
-                    name='gtscore', shape=[cfg.max_box_num], dtype='float32'
-                    )
-            self.im_shape = fluid.layers.data(
-                    name="im_shape", shape=[2], dtype='int32')
-            self.im_id = fluid.layers.data(
-                    name="im_id", shape=[1], dtype='int32')
-    def feeds(self):
-        if not self.is_train:
-            return [self.image, self.im_id, self.im_shape]
-        return [self.image, self.gtbox, self.gtlabel, self.gtscore]
-    def get_hyperparams(self):
-        return self.hyperparams
-    def get_input_size(self):
-        return cfg.input_size
--- a/fluid/PaddleCV/yolov3/models/__init__.py
+++ b/fluid/PaddleCV/yolov3/models/__init__.py
--- a/fluid/PaddleCV/yolov3/models/darknet.py
+++ b/fluid/PaddleCV/yolov3/models/darknet.py
 #  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 #
 #Licensed under the Apache License, Version 2.0 (the "License");
 #you may not use this file except in compliance with the License.
 #You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 #Unless required by applicable law or agreed to in writing, software
 #distributed under the License is distributed on an "AS IS" BASIS,
 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #See the License for the specific language governing permissions and
 #limitations under the License.
 import paddle.fluid as fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.initializer import Constant
 from paddle.fluid.regularizer import L2Decay
-from config import cfg
+def conv_bn_layer(input,
-def conv_bn_layer(input,
+                  ch_out,
-                  ch_out,
+                  filter_size,
-                  filter_size,
+                  stride,
-                  stride,
+                  padding,
-                  padding,
+                  act='leaky',
-                  act='leaky',
+                  is_test=True,
-                  i=0):
+                  name=None):
    conv1 = fluid.layers.conv2d(
        input=input,
        num_filters=ch_out,
        filter_size=filter_size,
        stride=stride,
        padding=padding,
        act=None,
        param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
-                name="conv" + str(i)+"_weights"),
+                name=name+".conv.weights"),
        bias_attr=False)
-    bn_name = "bn" + str(i)
+    bn_name = name + ".bn"
+    out = fluid.layers.batch_norm(
-    out = fluid.layers.batch_norm(
+        input=conv1,
-        input=conv1,
+        act=None,
-        act=None,
+        is_test=is_test,
-        is_test=True,
+        param_attr=ParamAttr(
-        param_attr=ParamAttr(
+                initializer=fluid.initializer.Normal(0., 0.02),
-                initializer=fluid.initializer.Normal(0., 0.02),
+                regularizer=L2Decay(0.),
-                regularizer=L2Decay(0.),
+                name=bn_name + '.scale'),
-                name=bn_name + '_scale'),
+        bias_attr=ParamAttr(
-        bias_attr=ParamAttr(
+                initializer=fluid.initializer.Constant(0.0),
-                initializer=fluid.initializer.Constant(0.0),
+                regularizer=L2Decay(0.),
-                regularizer=L2Decay(0.),
+                name=bn_name + '.offset'),
-                name=bn_name + '_offset'),
+        moving_mean_name=bn_name + '.mean',
-        moving_mean_name=bn_name + '_mean',
+        moving_variance_name=bn_name + '.var')
-        moving_variance_name=bn_name + '_var')
+    if act == 'leaky':
-    if act == 'leaky':
+        out = fluid.layers.leaky_relu(x=out, alpha=0.1)
-        out = fluid.layers.leaky_relu(x=out, alpha=0.1)
+    return out
-    return out
+def downsample(input, ch_out, filter_size=3, stride=2, padding=1, is_test=True, name=None):
-def basicblock(input, ch_out, stride,i):
+    return conv_bn_layer(input, 
-    """
+            ch_out=ch_out, 
-    channel: convolution channels for 1x1 conv
+            filter_size=filter_size, 
-    """
+            stride=stride, 
-    conv1 = conv_bn_layer(input, ch_out, 1, 1, 0, i=i)
+            padding=padding, 
-    conv2 = conv_bn_layer(conv1, ch_out*2, 3, 1, 1, i=i+1)
+            is_test=is_test,
-    out = fluid.layers.elementwise_add(x=input, y=conv2, act=None,name="res"+str(i+2))
+            name=name)
-    return out
+def basicblock(input, ch_out, is_test=True, name=None):
-def layer_warp(block_func, input, ch_out, count, stride,i):
+    conv1 = conv_bn_layer(input, ch_out, 1, 1, 0, is_test=is_test, name=name+".0")
-    res_out = block_func(input, ch_out, stride, i=i)
+    conv2 = conv_bn_layer(conv1, ch_out*2, 3, 1, 1, is_test=is_test, name=name+".1")
-    for j in range(1, count):
+    out = fluid.layers.elementwise_add(x=input, y=conv2, act=None)
-        res_out = block_func(res_out, ch_out, 1 ,i=i+j*3)
+    return out
-    return res_out
+def layer_warp(block_func, input, ch_out, count, is_test=True, name=None):
-DarkNet_cfg = {
+    res_out = block_func(input, ch_out, is_test=is_test, name='{}.0'.format(name))
-        53: ([1,2,8,8,4],basicblock)
+    for j in range(1, count):
-}
+        res_out = block_func(res_out, ch_out, is_test=is_test, name='{}.{}'.format(name, j))
+    return res_out
-# num_filters = [32, 64, 128, 256, 512, 1024]
+DarkNet_cfg = {
-def add_DarkNet53_conv_body(body_input):
+        53: ([1,2,8,8,4],basicblock)
+}
-    stages, block_func = DarkNet_cfg[53]
-    stages = stages[0:5]
+def add_DarkNet53_conv_body(body_input, is_test=True):
-    conv1 = conv_bn_layer(
+    stages, block_func = DarkNet_cfg[53]
-            body_input, ch_out=32, filter_size=3, stride=1, padding=1, act="leaky",i=0)
+    stages = stages[0:5]
-    conv2 = conv_bn_layer(
+    conv1 = conv_bn_layer(
-            conv1, ch_out=64, filter_size=3, stride=2, padding=1, act="leaky", i=1)
+            body_input, ch_out=32, filter_size=3, stride=1, padding=1, is_test=is_test, name="yolo_input")
-    block3 = layer_warp(block_func, conv2, 32, stages[0], 1, i=2)
+    downsample_ = downsample(conv1, ch_out=conv1.shape[1]*2, is_test=is_test, name="yolo_input.downsample")
-    downsample3 = conv_bn_layer(
+    index = 2
-            block3, ch_out=128, filter_size=3, stride=2, padding=1, i=5)
+    blocks = []
-    block4 = layer_warp(block_func, downsample3, 64, stages[1], 1, i=6)
+    for i, stage in enumerate(stages):
-    downsample4 = conv_bn_layer(
+        block = layer_warp(block_func, downsample_, 32 *(2**i), stage, is_test=is_test, name="stage.{}".format(i))
-            block4, ch_out=256, filter_size=3, stride=2, padding=1, i=12)
+        blocks.append(block)
-    block5 = layer_warp(block_func, downsample4, 128, stages[2], 1,i=13)
+        index += 3 * stage
-    downsample5 = conv_bn_layer(
+        if i < len(stages) - 1: # do not downsaple in the last stage
-            block5, ch_out=512, filter_size=3, stride=2, padding=1, i=37)
+            downsample_ = downsample(block, ch_out=block.shape[1]*2, is_test=is_test, name="stage.{}.downsample".format(i))
-    block6 = layer_warp(block_func, downsample5, 256, stages[3], 1, i=38)
+        index += 1
-    downsample6 = conv_bn_layer(
+    return blocks[-1:-4:-1]
-            block6, ch_out=1024, filter_size=3, stride=2, padding=1,  i=62)
-    block7 = layer_warp(block_func, downsample6, 512, stages[4], 1,i=63)
-    return block7,block6,block5
--- a/fluid/PaddleCV/yolov3/models/yolov3.py
+++ b/fluid/PaddleCV/yolov3/models/yolov3.py
 #  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 #Licensed under the Apache License, Version 2.0 (the "License");
 #you may not use this file except in compliance with the License.
 #You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 #Unless required by applicable law or agreed to in writing, software
 #distributed under the License is distributed on an "AS IS" BASIS,
 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #See the License for the specific language governing permissions and
 #limitations under the License.
 from __future__ import division
 from __future__ import print_function
 import paddle.fluid as fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.initializer import Constant
 from paddle.fluid.initializer import Normal
 from paddle.fluid.regularizer import L2Decay
-from config_parser import ConfigPaser
+from config import cfg
-from config import cfg
+from .darknet import add_DarkNet53_conv_body
-from darknet import add_DarkNet53_conv_body
+from .darknet import conv_bn_layer
-from darknet import conv_bn_layer
+def yolo_detection_block(input, channel, is_test=True, name=None):
-def yolo_detection_block(input, channel,i):
+    assert channel % 2 == 0, "channel {} cannot be divided by 2".format(channel)
-    assert channel % 2 == 0, "channel {} cannot be divided by 2".format(channel)
+    conv = input
-    conv1 = input
+    for j in range(2):
-    for j in range(2):
+        conv = conv_bn_layer(conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j))
-        conv1 = conv_bn_layer(conv1, channel, filter_size=1, stride=1, padding=0,i=i+j*2)
+        conv = conv_bn_layer(conv, channel*2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j))
-        conv1 = conv_bn_layer(conv1, channel*2, filter_size=3, stride=1, padding=1,i=i+j*2+1)
+    route = conv_bn_layer(conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name))
-    route = conv_bn_layer(conv1, channel, filter_size=1, stride=1, padding=0,i=i+4)
+    tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name))
-    tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1, padding=1,i=i+5)
+    return route, tip
-    return route, tip
+def upsample(input, scale=2,name=None):
-def upsample(out, stride=2,name=None):
+    # get dynamic upsample output shape
-    out = out
+    shape_nchw = fluid.layers.shape(input)
-    scale = stride
+    shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
-    # get dynamic upsample output shape
+    shape_hw.stop_gradient = True
-    shape_nchw = fluid.layers.shape(out)
+    in_shape = fluid.layers.cast(shape_hw, dtype='int32')
-    shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
+    out_shape = in_shape * scale
-    shape_hw.stop_gradient = True
+    out_shape.stop_gradient = True
-    in_shape = fluid.layers.cast(shape_hw, dtype='int32')
-    out_shape = in_shape * scale
+    # reisze by actual_shape
-    out_shape.stop_gradient = True
+    out = fluid.layers.resize_nearest(
+        input=input,
-    # reisze by actual_shape
+        scale=scale,
-    out = fluid.layers.resize_nearest(
+        actual_shape=out_shape,
-        input=out,
+        name=name)
-        scale=scale,
+    return out
-        actual_shape=out_shape,
-        name=name)
+class YOLOv3(object):
-    return out
+    def __init__(self, 
+                model_cfg_path,
-class YOLOv3(object):
+                is_train=True,
-    def __init__(self, 
+                use_pyreader=True,
-                model_cfg_path,
+                use_random=True):
-                is_train=True,
+        self.model_cfg_path = model_cfg_path
-                use_pyreader=True,
+        self.is_train = is_train
-                use_random=True):
+        self.use_pyreader = use_pyreader
-        self.model_cfg_path = model_cfg_path
+        self.use_random = use_random
-        self.config_parser = ConfigPaser(model_cfg_path)
+        self.outputs = []
-        self.is_train = is_train
+        self.losses = []
-        self.use_pyreader = use_pyreader
+        self.downsample = 32
-        self.use_random = use_random
+        self.ignore_thresh = .7
-        self.outputs = []
+        self.class_num = 80
-        self.losses = []
-        self.downsample = 32
+    def build_model(self):
-        self.ignore_thresh = .7
-        self.class_num = 80
+        self.img_height = cfg.input_size
+        self.img_width = cfg.input_size
-    def build_model(self):
+        self.build_input()
-        self.img_height = cfg.input_size
-        self.img_width = cfg.input_size
+        self.outputs = []
+        self.boxes = []
-        self.build_input()
+        self.scores = []
-        out = self.image
+        blocks = add_DarkNet53_conv_body(self.image, not self.is_train)
+        for i, block in enumerate(blocks):
-        self.yolo_anchors = []
+            if i > 0:
-        self.yolo_classes = []
+                block = fluid.layers.concat(
-        self.outputs = []
+                    input=[route, block],
-        self.boxes = []
+                    axis=1)
-        self.scores = []
+            route, tip = yolo_detection_block(block, channel=512//(2**i), 
+                                        is_test=(not self.is_train),
+                                        name="yolo_block.{}".format(i))
-        scale1,scale2,scale3 = add_DarkNet53_conv_body(out)
+            block_out = fluid.layers.conv2d(
+                input=tip,
-        # 13*13 scale output
+                num_filters=255,
-        route1, tip1 = yolo_detection_block(scale1, channel=512,i=75)
+                filter_size=1,
-        # scale1 output
+                stride=1,
-        scale1_out = fluid.layers.conv2d(
+                padding=0,
-            input=tip1,
+                act=None,
-            num_filters=255,
+                param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
-            filter_size=1,
+                     name="yolo_output.{}.conv.weights".format(i)),
-            stride=1,
+                bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0),
-            padding=0,
+                                     regularizer=L2Decay(0.),
-            act=None,
+                                     name="yolo_output.{}.conv.bias".format(i)))
-            param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
+            self.outputs.append(block_out)
-                 name="conv81_weights"),
-            bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0),
+            if i < len(blocks) - 1:
-                                 regularizer=L2Decay(0.),
+                route = conv_bn_layer(
-                                 name="conv81_bias"))
+                    input=route,
+                    ch_out=256//(2**i),
-        self.outputs.append(scale1_out) 
+                    filter_size=1,
+                    stride=1,
-        route1 = conv_bn_layer(
+                    padding=0,
-            input=route1,
+                    is_test=(not self.is_train),
-            ch_out=256,
+                    name="yolo_transition.{}".format(i))
-            filter_size=1,
+                # upsample
-            stride=1,
+                route = upsample(route)
-            padding=0,
-            i=84)
-        # upsample
+        anchor_mask = [6,7,8,3,4,5,0,1,2]
-        route1 = upsample(route1)
+        anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
+        for i,out in enumerate(self.outputs):
-        # concat
+            mask = anchor_mask[i*3 : (i+1)*3]
-        route1 = fluid.layers.concat(
+            mask_anchors=[]
-            input=[route1,scale2],
-            axis=1)
+            for m in mask:
+                mask_anchors.append(anchors[2 * m])
-        # 26*26 scale output
+                mask_anchors.append(anchors[2 * m + 1])
-        route2, tip2 = yolo_detection_block(route1, channel=256,i=87)
+            class_num = int(self.class_num)
-        # scale2 output
+            if self.is_train:
-        scale2_out = fluid.layers.conv2d(
+                ignore_thresh = float(self.ignore_thresh)
-            input=tip2,
+                loss = fluid.layers.yolov3_loss(
-            num_filters=255,
+                        x=out,
-            filter_size=1,
+                        gtbox=self.gtbox,
-            stride=1,
+                        gtlabel=self.gtlabel,
-            padding=0,
+                        gtscore=self.gtscore,
-            act=None,
+                        anchors=anchors,
-            param_attr=ParamAttr(name="conv93_weights"),
+                        anchor_mask=mask,
-            bias_attr=ParamAttr(name="conv93_bias"))
+                        class_num=class_num,
+                        ignore_thresh=ignore_thresh,
-        self.outputs.append(scale2_out)
+                        downsample_ratio=self.downsample,
+                        use_label_smooth=cfg.label_smooth,
-        route2 = conv_bn_layer(
+                        name="yolo_loss"+str(i))
-            input=route2,
+                self.losses.append(fluid.layers.reduce_mean(loss))
-            ch_out=128,
+            else:
-            filter_size=1,
+                boxes, scores = fluid.layers.yolo_box(
-            stride=1,
+                        x=out,
-            padding=0,
+                        img_size=self.im_shape,
-            i=96)
+                        anchors=mask_anchors,
-        # upsample
+                        class_num=class_num,
-        route2 = upsample(route2)
+                        conf_thresh=cfg.valid_thresh,
+                        downsample_ratio=self.downsample,
-        # concat
+                        name="yolo_box"+str(i))
-        route2 = fluid.layers.concat(
+                self.boxes.append(boxes)
-            input=[route2,scale3],
+                self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
-            axis=1)
+            self.downsample //= 2
-        # 52*52 scale output
-        route3, tip3 = yolo_detection_block(route2, channel=128, i=99)
+    def loss(self):
-        # scale3 output
+        return sum(self.losses)
-        scale3_out = fluid.layers.conv2d(
-            input=tip3,
+    def get_pred(self):
-            num_filters=255,
+        yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
-            filter_size=1,
+        yolo_scores = fluid.layers.concat(self.scores, axis=2)
-            stride=1,
+        return fluid.layers.multiclass_nms(
-            padding=0,
+                bboxes=yolo_boxes,
-            act=None,
+                scores=yolo_scores,
-            param_attr=ParamAttr(name="conv105_weights"),
+                score_threshold=cfg.valid_thresh,
-            bias_attr=ParamAttr(name="conv105_bias"))
+                nms_top_k=cfg.nms_topk,
+                keep_top_k=cfg.nms_posk,
+                nms_threshold=cfg.nms_thresh,
-        self.outputs.append(scale3_out)
+                background_label=-1,
-        # yolo
+                name="multiclass_nms")
-        anchor_mask = [6,7,8,3,4,5,0,1,2]
+    def build_input(self):
-        anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
+        self.image_shape = [3, self.img_height, self.img_width]
-        for i,out in enumerate(self.outputs):
+        if self.use_pyreader and self.is_train:
-            mask = anchor_mask[i*3 : (i+1)*3]
+            self.py_reader = fluid.layers.py_reader(
-            mask_anchors=[]
+                capacity=64,
+                shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
-            for m in mask:
+                lod_levels=[0, 0, 0, 0],
-                mask_anchors.append(anchors[2 * m])
+                dtypes=['float32'] * 2 + ['int32'] + ['float32'],
-                mask_anchors.append(anchors[2 * m + 1])
+                use_double_buffer=True)
-            self.yolo_anchors.append(mask_anchors)
+            self.image, self.gtbox, self.gtlabel, self.gtscore = fluid.layers.read_file(self.py_reader)
-            class_num = int(self.class_num)
+        else:
-            self.yolo_classes.append(class_num)
+            self.image = fluid.layers.data(
+                    name='image', shape=self.image_shape, dtype='float32'
-            if self.is_train:
+                    )
-                ignore_thresh = float(self.ignore_thresh)
+            self.gtbox = fluid.layers.data(
-                loss = fluid.layers.yolov3_loss(
+                    name='gtbox', shape=[cfg.max_box_num, 4], dtype='float32'
-                        x=out,
+                    )
-                        gtbox=self.gtbox,
+            self.gtlabel = fluid.layers.data(
-                        gtlabel=self.gtlabel,
+                    name='gtlabel', shape=[cfg.max_box_num], dtype='int32'
-                        # gtscore=self.gtscore,
+                    )
-                        anchors=anchors,
+            self.gtscore = fluid.layers.data(
-                        anchor_mask=mask,
+                    name='gtscore', shape=[cfg.max_box_num], dtype='float32'
-                        class_num=class_num,
+                    )
-                        ignore_thresh=ignore_thresh,
+            self.im_shape = fluid.layers.data(
-                        downsample_ratio=self.downsample,
+                    name="im_shape", shape=[2], dtype='int32')
-                        # use_label_smooth=False,
+            self.im_id = fluid.layers.data(
-                        name="yolo_loss"+str(i))
+                    name="im_id", shape=[1], dtype='int32')
-                self.losses.append(fluid.layers.reduce_mean(loss))
-            else:
+    def feeds(self):
-                boxes, scores = fluid.layers.yolo_box(
+        if not self.is_train:
-                        x=out,
+            return [self.image, self.im_id, self.im_shape]
-                        img_size=self.im_shape,
+        return [self.image, self.gtbox, self.gtlabel, self.gtscore]
-                        anchors=mask_anchors,
-                        class_num=class_num,
-                        conf_thresh=cfg.valid_thresh,
-                        downsample_ratio=self.downsample,
-                        name="yolo_box"+str(i))
-                self.boxes.append(boxes)
-                self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
-            self.downsample //= 2
-    def loss(self):
-        return sum(self.losses)
-    def get_pred(self):
-        # return self.outputs
-        yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
-        yolo_scores = fluid.layers.concat(self.scores, axis=2)
-        return fluid.layers.multiclass_nms(
-                bboxes=yolo_boxes,
-                scores=yolo_scores,
-                score_threshold=cfg.valid_thresh,
-                nms_top_k=cfg.nms_topk,
-                keep_top_k=cfg.nms_posk,
-                nms_threshold=cfg.nms_thresh,
-                background_label=-1,
-                name="multiclass_nms")
-    def get_yolo_anchors(self):
-        return self.yolo_anchors
-    def get_yolo_classes(self):
-        return self.yolo_classes
-    def build_input(self):
-        self.image_shape = [3, self.img_height, self.img_width]
-        if self.use_pyreader and self.is_train:
-            self.py_reader = fluid.layers.py_reader(
-                capacity=64,
-                shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
-                lod_levels=[0, 0, 0, 0],
-                dtypes=['float32'] * 2 + ['int32'] + ['float32'],
-                use_double_buffer=True)
-            self.image, self.gtbox, self.gtlabel, self.gtscore = fluid.layers.read_file(self.py_reader)
-        else:
-            self.image = fluid.layers.data(
-                    name='image', shape=self.image_shape, dtype='float32'
-                    )
-            self.gtbox = fluid.layers.data(
-                    name='gtbox', shape=[cfg.max_box_num, 4], dtype='float32'
-                    )
-            self.gtlabel = fluid.layers.data(
-                    name='gtlabel', shape=[cfg.max_box_num], dtype='int32'
-                    )
-            self.gtscore = fluid.layers.data(
-                    name='gtscore', shape=[cfg.max_box_num], dtype='float32'
-                    )
-            self.im_shape = fluid.layers.data(
-                    name="im_shape", shape=[2], dtype='int32')
-            self.im_id = fluid.layers.data(
-                    name="im_id", shape=[1], dtype='int32')
-    def feeds(self):
-        if not self.is_train:
-            return [self.image, self.im_id, self.im_shape]
-        return [self.image, self.gtbox, self.gtlabel, self.gtscore]
-    def get_input_size(self):
-        return cfg.input_size
--- a/fluid/PaddleCV/yolov3/reader.py
+++ b/fluid/PaddleCV/yolov3/reader.py
@@ -255,8 +255,8 @@ def train(size=416,
          random_sizes=[],
          interval=10,
          pyreader_num=1,
-          num_workers=16,
+          num_workers=2,
-          max_queue=32,
+          max_queue=4,
          use_multiprocessing=True):
    generator = dsr.get_reader('train', size, batch_size, shuffle, int(mixup_iter/pyreader_num), random_sizes)

--- a/fluid/PaddleCV/yolov3/train.py
+++ b/fluid/PaddleCV/yolov3/train.py
@@ -26,7 +26,7 @@ from utility import parse_args, print_arguments, SmoothedValue
 import paddle
 import paddle.fluid as fluid
 import reader
-import models.yolov3 as models
+from models.yolov3 import YOLOv3
 from learning_rate import exponential_with_warmup_decay
 from config import cfg
@@ -42,27 +42,21 @@ def train():
    if not os.path.exists(cfg.model_save_dir):
        os.makedirs(cfg.model_save_dir)
-    model = models.YOLOv3(cfg.model_cfg_path, use_pyreader=cfg.use_pyreader)
+    model = YOLOv3(cfg.model_cfg_path, use_pyreader=cfg.use_pyreader)
    model.build_model()
-    input_size = model.get_input_size()
+    input_size = cfg.input_size
    loss = model.loss()
    loss.persistable = True
-    print("cfg.learning",cfg.learning_rate)
-    print("cfg.decay",cfg.decay)    
    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    print("Found {} CUDA devices.".format(devices_num))
-    learning_rate = float(cfg.learning_rate)
+    learning_rate = cfg.learning_rate
    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
-    if isinstance(gamma, list):
+    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
-        values = [learning_rate * g for g in gamma]
-    else:
-        values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
@@ -70,10 +64,9 @@ def train():
            boundaries=boundaries,
            values=values,
            warmup_iter=cfg.warm_up_iter,
-            warmup_factor=cfg.warm_up_factor,
+            warmup_factor=cfg.warm_up_factor),
-            start_step=cfg.start_iter),
+        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
-        regularization=fluid.regularizer.L2Decay(float(cfg.decay)),
+        momentum=cfg.momentum)
-        momentum=float(cfg.momentum))
    optimizer.minimize(loss)
    fluid.memory_optimize(fluid.default_main_program())
@@ -98,11 +91,11 @@ def train():
    mixup_iter = cfg.max_iter - cfg.start_iter - cfg.no_mixup_iter
    if cfg.use_pyreader:
-        train_reader = reader.train(input_size, batch_size=int(cfg.batch)/devices_num, shuffle=True, mixup_iter=mixup_iter*devices_num, random_sizes=random_sizes, interval=10, pyreader_num=devices_num, use_multiprocessing=cfg.use_multiprocess)
+        train_reader = reader.train(input_size, batch_size=cfg.batch_size/devices_num, shuffle=True, mixup_iter=mixup_iter*devices_num, random_sizes=random_sizes, interval=10, pyreader_num=devices_num, use_multiprocessing=cfg.use_multiprocess)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
-        train_reader = reader.train(input_size, batch_size=int(cfg.batch), shuffle=True, mixup_iter=mixup_iter, random_sizes=random_sizes, use_multiprocessing=cfg.use_multiprocess)
+        train_reader = reader.train(input_size, batch_size=cfg.batch_size, shuffle=True, mixup_iter=mixup_iter, random_sizes=random_sizes, use_multiprocessing=cfg.use_multiprocess)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
    def save_model(postfix):

--- a/fluid/PaddleCV/yolov3/utility.py
+++ b/fluid/PaddleCV/yolov3/utility.py
@@ -108,14 +108,15 @@ def parse_args():
    add_arg('start_iter',       int,    0,     "Start iteration.")
    add_arg('use_multiprocess', bool,   True, "add multiprocess.")
    #SOLVER
+    add_arg('batch_size',       int,    64,     "Learning rate.")
    add_arg('learning_rate',    float,  0.001,     "Learning rate.")
    add_arg('max_iter',         int,    500200,   "Iter number.")
    add_arg('snapshot_iter',    int,    2000,    "Save model every snapshot stride.")
+    add_arg('label_smooth',     bool,   True,     "Use label smooth in class label.")
+    add_arg('no_mixup_iter',    int,    40000,      "Disable mixup in last N iter.")
    # TRAIN TEST INFER
    add_arg('input_size',       int,    608,    "Image input size of YOLOv3.")
    add_arg('random_shape',     bool,   True,     "Resize to random shape for train reader.")
-    add_arg('label_smooth',     bool,   True,     "Use label smooth in class label.")
-    add_arg('no_mixup_iter',    int,    40000,      "Disable mixup in last N iter.")
    add_arg('valid_thresh',     float,  0.005,    "Valid confidence score for NMS.")
    add_arg('nms_thresh',       float,  0.45,    "NMS threshold.")
    add_arg('nms_topk',         int,    400,    "The number of boxes to perform NMS.")