add yolov3 ce (#2312)

99e7dd5e · u010070587 · Kaipeng Deng · 27730332 · 99e7dd5e · 99e7dd5e
14 changed file
--- a/PaddleCV/yolov3/.run_ce.sh
+++ b/PaddleCV/yolov3/.run_ce.sh
+#!/bin/bash
+#This file is only used for continuous evaluation.
+export CUDA_VISIBLE_DEVICES=0
+python train.py --enable_ce True --use_multiprocess False --snapshot_iter 100 --max_iter 200 | python _ce.py
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python train.py --enable_ce True --use_multiprocess False --snapshot_iter 100 --max_iter 200 | python _ce.py
--- a/PaddleCV/yolov3/README.md
+++ b/PaddleCV/yolov3/README.md
@@ -181,4 +181,3 @@ Visualization of infer result is shown as below:
 <img src="image/000000515077.png" height=300 width=400 hspace='10'/> <br />
 YOLOv3 Visualization Examples
 </p>
--- a/PaddleCV/yolov3/README_cn.md
+++ b/PaddleCV/yolov3/README_cn.md
@@ -182,4 +182,3 @@ Train Loss
 <img src="image/000000515077.png" height=300 width=400 hspace='10'/> <br />
 YOLOv3 预测可视化
 </p>
--- a/PaddleCV/yolov3/_ce.py
+++ b/PaddleCV/yolov3/_ce.py
+### This file is only used for continuous evaluation test!
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+import os
+import sys
+sys.path.append(os.environ['ceroot'])
+from kpi import CostKpi
+from kpi import DurationKpi
+train_cost_1card_kpi = CostKpi(
+    'train_cost_1card', 0.02, 0, actived=True, desc='train cost')
+train_duration_1card_kpi = DurationKpi(
+    'train_duration_1card', 0.1, 0, actived=True, desc='train duration')
+train_cost_8card_kpi = CostKpi(
+    'train_cost_8card', 0.02, 0, actived=True, desc='train cost')
+train_duration_8card_kpi = DurationKpi(
+    'train_duration_8card', 0.1, 0, actived=True, desc='train duration')
+tracking_kpis = [
+    train_cost_1card_kpi, train_duration_1card_kpi, train_cost_8card_kpi,
+    train_duration_8card_kpi
+]
+def parse_log(log):
+    for line in log.split('\n'):
+        fs = line.strip().split('\t')
+        print(fs)
+        if len(fs) == 3 and fs[0] == 'kpis':
+            print("-----%s" % fs)
+            kpi_name = fs[1]
+            kpi_value = float(fs[2])
+            yield kpi_name, kpi_value
+def log_to_ce(log):
+    kpi_tracker = {}
+    for kpi in tracking_kpis:
+        kpi_tracker[kpi.name] = kpi
+    for (kpi_name, kpi_value) in parse_log(log):
+        print(kpi_name, kpi_value)
+        kpi_tracker[kpi_name].add_record(kpi_value)
+        kpi_tracker[kpi_name].persist()
+if __name__ == '__main__':
+    log = sys.stdin.read()
+    log_to_ce(log)
--- a/PaddleCV/yolov3/box_utils.py
+++ b/PaddleCV/yolov3/box_utils.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -47,6 +46,7 @@ def coco_anno_box_to_center_relative(box, img_height, img_width):
    return np.array([x, y, w, h])
 def clip_relative_box_in_image(x, y, w, h):
    """Clip relative box coordinates x, y, w, h to [0, 1]"""
    x1 = max(x - w / 2, 0.)
@@ -58,6 +58,7 @@ def clip_relative_box_in_image(x, y, w, h):
    w = x2 - x1
    h = y2 - y1
 def box_xywh_to_xyxy(box):
    shape = box.shape
    assert shape[-1] == 4, "Box shape[-1] should be 4."
@@ -68,6 +69,7 @@ def box_xywh_to_xyxy(box):
    box = box.reshape(shape)
    return box
 def box_iou_xywh(box1, box2):
    assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4."
    assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4."
@@ -92,6 +94,7 @@ def box_iou_xywh(box1, box2):
    return inter_area / (b1_area + b2_area - inter_area)
 def box_iou_xyxy(box1, box2):
    assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4."
    assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4."
@@ -114,17 +117,21 @@ def box_iou_xyxy(box1, box2):
    return inter_area / (b1_area + b2_area - inter_area)
 def box_crop(boxes, labels, scores, crop, img_shape):
    x, y, w, h = map(float, crop)
    im_w, im_h = map(float, img_shape)
    boxes = boxes.copy()
-    boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, (boxes[:, 0] + boxes[:, 2] / 2) * im_w
+    boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, (
-    boxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, (boxes[:, 1] + boxes[:, 3] / 2) * im_h
+        boxes[:, 0] + boxes[:, 2] / 2) * im_w
+    boxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, (
+        boxes[:, 1] + boxes[:, 3] / 2) * im_h
    crop_box = np.array([x, y, x + w, y + h])
    centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
-    mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all(axis=1)
+    mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all(
+        axis=1)
    boxes[:, :2] = np.maximum(boxes[:, :2], crop_box[:2])
    boxes[:, 2:] = np.minimum(boxes[:, 2:], crop_box[2:])
@@ -135,12 +142,20 @@ def box_crop(boxes, labels, scores, crop, img_shape):
    boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1)
    labels = labels * mask.astype('float32')
    scores = scores * mask.astype('float32')
-    boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, (boxes[:, 2] - boxes[:, 0]) / w
+    boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, (
-    boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, (boxes[:, 3] - boxes[:, 1]) / h
+        boxes[:, 2] - boxes[:, 0]) / w
+    boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, (
+        boxes[:, 3] - boxes[:, 1]) / h
    return boxes, labels, scores, mask.sum()
-def draw_boxes_on_image(image_path, boxes, scores, labels, label_names, score_thresh=0.5):
+def draw_boxes_on_image(image_path,
+                        boxes,
+                        scores,
+                        labels,
+                        label_names,
+                        score_thresh=0.5):
    image = np.array(Image.open(image_path))
    plt.figure()
    _, ax = plt.subplots(1)
@@ -158,22 +173,33 @@ def draw_boxes_on_image(image_path, boxes, scores, labels, label_names, score_th
        if label not in colors:
            colors[label] = plt.get_cmap('hsv')(label / len(label_names))
        x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
-        rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, 
+        rect = plt.Rectangle(
-                            fill=False, linewidth=2.0, 
+            (x1, y1),
+            x2 - x1,
+            y2 - y1,
+            fill=False,
+            linewidth=2.0,
            edgecolor=colors[label])
        ax.add_patch(rect)
-        ax.text(x1, y1, '{} {:.4f}'.format(label_names[label], score), 
+        ax.text(
-                verticalalignment='bottom', horizontalalignment='left',
+            x1,
-                bbox={'facecolor': colors[label], 'alpha': 0.5, 'pad': 0},
+            y1,
-                fontsize=8, color='white')
+            '{} {:.4f}'.format(label_names[label], score),
-        print("\t {:15s} at {:25} score: {:.5f}".format(
+            verticalalignment='bottom',
-                label_names[int(label)], str(list(map(int, list(box)))), score))
+            horizontalalignment='left',
+            bbox={'facecolor': colors[label],
+                  'alpha': 0.5,
+                  'pad': 0},
+            fontsize=8,
+            color='white')
+        print("\t {:15s} at {:25} score: {:.5f}".format(label_names[int(
+            label)], str(list(map(int, list(box)))), score))
    image_name = image_name.replace('jpg', 'png')
    plt.axis('off')
    plt.gca().xaxis.set_major_locator(plt.NullLocator())
    plt.gca().yaxis.set_major_locator(plt.NullLocator())
-    plt.savefig("./output/{}".format(image_name), bbox_inches='tight', pad_inches=0.0)
+    plt.savefig(
+        "./output/{}".format(image_name), bbox_inches='tight', pad_inches=0.0)
    print("Detect result save at ./output/{}\n".format(image_name))
    plt.cla()
    plt.close('all')
--- a/PaddleCV/yolov3/config.py
+++ b/PaddleCV/yolov3/config.py
@@ -33,7 +33,6 @@ _C.gt_min_area = -1
 # max target box number in an image
 _C.max_box_num = 50
 #
 # Training options
 #
@@ -53,7 +52,6 @@ _C.nms_posk = 100
 # score threshold for draw box in debug mode
 _C.draw_thresh = 0.5
 #
 # Model options
 #
@@ -65,7 +63,9 @@ _C.pixel_means = [0.485, 0.456, 0.406]
 _C.pixel_stds = [0.229, 0.224, 0.225]
 # anchors box weight and height
-_C.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
+_C.anchors = [
+    10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326
+]
 # anchor mask of each yolo layer
 _C.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
@@ -73,7 +73,6 @@ _C.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
 # IoU threshold to ignore objectness loss of pred box
 _C.ignore_thresh = .7
 #
 # SOLVER options
 #

--- a/PaddleCV/yolov3/eval.py
+++ b/PaddleCV/yolov3/eval.py
@@ -79,8 +79,7 @@ def eval():
    total_time = 0
    for batch_id, batch_data in enumerate(test_reader()):
        start_time = time.time()
-        batch_outputs = exe.run(
+        batch_outputs = exe.run(fetch_list=[v.name for v in fetch_list],
-            fetch_list=[v.name for v in fetch_list],
                                feed=feeder.feed(batch_data),
                                return_numpy=False,
                                use_program_cache=True)

--- a/PaddleCV/yolov3/image_utils.py
+++ b/PaddleCV/yolov3/image_utils.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -63,13 +62,8 @@ def random_crop(img,
        return img, boxes
    if not constraints:
-        constraints = [
+        constraints = [(0.1, 1.0), (0.3, 1.0), (0.5, 1.0), (0.7, 1.0),
-                (0.1, 1.0),
+                       (0.9, 1.0), (0.0, 1.0)]
-                (0.3, 1.0),
-                (0.5, 1.0),
-                (0.7, 1.0),
-                (0.9, 1.0),
-                (0.0, 1.0)]
    img = Image.fromarray(img)
    w, h = img.size
@@ -83,12 +77,9 @@ def random_crop(img,
            crop_w = int(w * scale * np.sqrt(aspect_ratio))
            crop_x = random.randrange(w - crop_w)
            crop_y = random.randrange(h - crop_h)
-            crop_box = np.array([[
+            crop_box = np.array([[(crop_x + crop_w / 2.0) / w,
-                (crop_x + crop_w / 2.0) / w,
                                  (crop_y + crop_h / 2.0) / h,
-                crop_w / float(w),
+                                  crop_w / float(w), crop_h / float(h)]])
-                crop_h /float(h)
-                ]])
            iou = box_utils.box_iou_xywh(crop_box, boxes)
            if min_iou <= iou.min() and max_iou >= iou.max():
@@ -108,12 +99,14 @@ def random_crop(img,
    img = np.asarray(img)
    return img, boxes, labels, scores
 def random_flip(img, gtboxes, thresh=0.5):
    if random.random() > thresh:
        img = img[:, ::-1, :]
        gtboxes[:, 0] = 1.0 - gtboxes[:, 0]
    return img, gtboxes
 def random_interp(img, size, interp=None):
    interp_method = [
        cv2.INTER_NEAREST,
@@ -127,10 +120,11 @@ def random_interp(img, size, interp=None):
    h, w, _ = img.shape
    im_scale_x = size / float(w)
    im_scale_y = size / float(h)
-    img = cv2.resize(img, None, None, fx=im_scale_x, fy=im_scale_y, 
+    img = cv2.resize(
-                     interpolation=interp)
+        img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=interp)
    return img
 def random_expand(img,
                  gtboxes,
                  max_ratio=4.,
@@ -151,15 +145,15 @@ def random_expand(img,
        ratio_y = random.uniform(1, max_ratio)
    oh = int(h * ratio_y)
    ow = int(w * ratio_x)
-    off_x = random.randint(0, ow -w)
+    off_x = random.randint(0, ow - w)
-    off_y = random.randint(0, oh -h)
+    off_y = random.randint(0, oh - h)
    out_img = np.zeros((oh, ow, c))
    if fill and len(fill) == c:
        for i in range(c):
            out_img[:, :, i] = fill[i] * 255.0
-    out_img[off_y: off_y + h, off_x: off_x + w, :] = img
+    out_img[off_y:off_y + h, off_x:off_x + w, :] = img
    gtboxes[:, 0] = ((gtboxes[:, 0] * w) + off_x) / float(ow)
    gtboxes[:, 1] = ((gtboxes[:, 1] * h) + off_y) / float(oh)
    gtboxes[:, 2] = gtboxes[:, 2] / ratio_x
@@ -167,21 +161,17 @@ def random_expand(img,
    return out_img.astype('uint8'), gtboxes
 def shuffle_gtbox(gtbox, gtlabel, gtscore):
-    gt = np.concatenate([gtbox, gtlabel[:, np.newaxis], 
+    gt = np.concatenate(
-                         gtscore[:, np.newaxis]], axis=1)
+        [gtbox, gtlabel[:, np.newaxis], gtscore[:, np.newaxis]], axis=1)
    idx = np.arange(gt.shape[0])
    np.random.shuffle(idx)
    gt = gt[idx, :]
    return gt[:, :4], gt[:, 4], gt[:, 5]
-def image_mixup(img1, 
-                gtboxes1, 
+def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2,
-                gtlabels1, 
-                gtscores1, 
-                img2, 
-                gtboxes2, 
-                gtlabels2, 
                gtscores2):
    factor = np.random.beta(1.5, 1.5)
    factor = max(0.0, min(1.0, factor))
@@ -229,6 +219,7 @@ def image_mixup(img1,
    gtscores[:gt_num] = gtscores_all[:gt_num]
    return img.astype('uint8'), gtboxes, gtlabels, gtscores
 def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None):
    img = random_distort(img)
    img, gtboxes = random_expand(img, gtboxes, fill=means)
@@ -240,4 +231,3 @@ def image_augment(img, gtboxes, gtlabels, gtscores,  size, means=None):
    return img.astype('float32'), gtboxes.astype('float32'), \
            gtlabels.astype('int32'), gtscores.astype('float32')
--- a/PaddleCV/yolov3/infer.py
+++ b/PaddleCV/yolov3/infer.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import time
 import numpy as np
@@ -54,12 +53,12 @@ def infer():
            if image_name.split('.')[-1] in ['jpg', 'png']:
                image_names.append(image_name)
    for image_name in image_names:
-        infer_reader = reader.infer(input_size, os.path.join(cfg.image_path, image_name))
+        infer_reader = reader.infer(input_size,
+                                    os.path.join(cfg.image_path, image_name))
        label_names, _ = reader.get_label_infos()
        data = next(infer_reader())
        im_shape = data[0][2]
-        outputs = exe.run(
+        outputs = exe.run(fetch_list=[v.name for v in fetch_list],
-            fetch_list=[v.name for v in fetch_list],
                          feed=feeder.feed(data),
                          return_numpy=False)
        bboxes = np.array(outputs[0])
@@ -71,7 +70,8 @@ def infer():
        boxes = bboxes[:, 2:].astype('float32')
        path = os.path.join(cfg.image_path, image_name)
-        box_utils.draw_boxes_on_image(path, boxes, scores, labels, label_names, cfg.draw_thresh)
+        box_utils.draw_boxes_on_image(path, boxes, scores, labels, label_names,
+                                      cfg.draw_thresh)
 if __name__ == '__main__':

--- a/PaddleCV/yolov3/models/darknet.py
+++ b/PaddleCV/yolov3/models/darknet.py
@@ -17,6 +17,7 @@ from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.initializer import Constant
 from paddle.fluid.regularizer import L2Decay
 def conv_bn_layer(input,
                  ch_out,
                  filter_size,
@@ -32,8 +33,9 @@ def conv_bn_layer(input,
        stride=stride,
        padding=padding,
        act=None,
-        param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
+        param_attr=ParamAttr(
-                name=name+".conv.weights"),
+            initializer=fluid.initializer.Normal(0., 0.02),
+            name=name + ".conv.weights"),
        bias_attr=False)
    bn_name = name + ".bn"
@@ -55,6 +57,7 @@ def conv_bn_layer(input,
        out = fluid.layers.leaky_relu(x=out, alpha=0.1)
    return out
 def downsample(input,
               ch_out,
               filter_size=3,
@@ -62,7 +65,8 @@ def downsample(input,
               padding=1,
               is_test=True,
               name=None):
-    return conv_bn_layer(input, 
+    return conv_bn_layer(
+        input,
        ch_out=ch_out,
        filter_size=filter_size,
        stride=stride,
@@ -70,44 +74,58 @@ def downsample(input,
        is_test=is_test,
        name=name)
 def basicblock(input, ch_out, is_test=True, name=None):
-    conv1 = conv_bn_layer(input, ch_out, 1, 1, 0, 
+    conv1 = conv_bn_layer(
-                          is_test=is_test, name=name+".0")
+        input, ch_out, 1, 1, 0, is_test=is_test, name=name + ".0")
-    conv2 = conv_bn_layer(conv1, ch_out*2, 3, 1, 1, 
+    conv2 = conv_bn_layer(
-                          is_test=is_test, name=name+".1")
+        conv1, ch_out * 2, 3, 1, 1, is_test=is_test, name=name + ".1")
    out = fluid.layers.elementwise_add(x=input, y=conv2, act=None)
    return out
 def layer_warp(block_func, input, ch_out, count, is_test=True, name=None):
-    res_out = block_func(input, ch_out, is_test=is_test, 
+    res_out = block_func(
-                         name='{}.0'.format(name))
+        input, ch_out, is_test=is_test, name='{}.0'.format(name))
    for j in range(1, count):
-        res_out = block_func(res_out, ch_out, is_test=is_test, 
+        res_out = block_func(
-                             name='{}.{}'.format(name, j))
+            res_out, ch_out, is_test=is_test, name='{}.{}'.format(name, j))
    return res_out
-DarkNet_cfg = {
-        53: ([1,2,8,8,4],basicblock)
+DarkNet_cfg = {53: ([1, 2, 8, 8, 4], basicblock)}
-}
 def add_DarkNet53_conv_body(body_input, is_test=True):
    stages, block_func = DarkNet_cfg[53]
    stages = stages[0:5]
-    conv1 = conv_bn_layer(body_input, ch_out=32, filter_size=3, 
+    conv1 = conv_bn_layer(
-                          stride=1, padding=1, is_test=is_test, 
+        body_input,
+        ch_out=32,
+        filter_size=3,
+        stride=1,
+        padding=1,
+        is_test=is_test,
        name="yolo_input")
-    downsample_ = downsample(conv1, ch_out=conv1.shape[1]*2, 
+    downsample_ = downsample(
+        conv1,
+        ch_out=conv1.shape[1] * 2,
        is_test=is_test,
        name="yolo_input.downsample")
    blocks = []
    for i, stage in enumerate(stages):
-        block = layer_warp(block_func, downsample_, 32 *(2**i), 
+        block = layer_warp(
-                           stage, is_test=is_test, 
+            block_func,
+            downsample_,
+            32 * (2**i),
+            stage,
+            is_test=is_test,
            name="stage.{}".format(i))
        blocks.append(block)
        if i < len(stages) - 1:  # do not downsaple in the last stage
-            downsample_ = downsample(block, ch_out=block.shape[1]*2, 
+            downsample_ = downsample(
+                block,
+                ch_out=block.shape[1] * 2,
                is_test=is_test,
                name="stage.{}.downsample".format(i))
    return blocks[-1:-4:-1]
--- a/PaddleCV/yolov3/models/yolov3.py
+++ b/PaddleCV/yolov3/models/yolov3.py
@@ -26,26 +26,48 @@ from config import cfg
 from .darknet import add_DarkNet53_conv_body
 from .darknet import conv_bn_layer
 def yolo_detection_block(input, channel, is_test=True, name=None):
    assert channel % 2 == 0, \
            "channel {} cannot be divided by 2".format(channel)
    conv = input
    for j in range(2):
-        conv = conv_bn_layer(conv, channel, filter_size=1, 
+        conv = conv_bn_layer(
-                             stride=1, padding=0, is_test=is_test, 
+            conv,
+            channel,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            is_test=is_test,
            name='{}.{}.0'.format(name, j))
-        conv = conv_bn_layer(conv, channel*2, filter_size=3, 
+        conv = conv_bn_layer(
-                             stride=1, padding=1, is_test=is_test, 
+            conv,
+            channel * 2,
+            filter_size=3,
+            stride=1,
+            padding=1,
+            is_test=is_test,
            name='{}.{}.1'.format(name, j))
-    route = conv_bn_layer(conv, channel, filter_size=1, stride=1, 
+    route = conv_bn_layer(
-                          padding=0, is_test=is_test, 
+        conv,
+        channel,
+        filter_size=1,
+        stride=1,
+        padding=0,
+        is_test=is_test,
        name='{}.2'.format(name))
-    tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1, 
+    tip = conv_bn_layer(
-                        padding=1, is_test=is_test, 
+        route,
+        channel * 2,
+        filter_size=3,
+        stride=1,
+        padding=1,
+        is_test=is_test,
        name='{}.tip'.format(name))
    return route, tip
-def upsample(input, scale=2,name=None):
+def upsample(input, scale=2, name=None):
    # get dynamic upsample output shape
    shape_nchw = fluid.layers.shape(input)
    shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
@@ -56,16 +78,12 @@ def upsample(input, scale=2,name=None):
    # reisze by actual_shape
    out = fluid.layers.resize_nearest(
-        input=input,
+        input=input, scale=scale, actual_shape=out_shape, name=name)
-        scale=scale,
-        actual_shape=out_shape,
-        name=name)
    return out
 class YOLOv3(object):
-    def __init__(self, 
+    def __init__(self, is_train=True, use_random=True):
-                is_train=True,
-                use_random=True):
        self.is_train = is_train
        self.use_random = use_random
        self.outputs = []
@@ -77,10 +95,8 @@ class YOLOv3(object):
        if self.is_train:
            self.py_reader = fluid.layers.py_reader(
                capacity=64,
-                shapes = [[-1] + self.image_shape, 
+                shapes=[[-1] + self.image_shape, [-1, cfg.max_box_num, 4],
-                          [-1, cfg.max_box_num, 4], 
+                        [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
-                          [-1, cfg.max_box_num], 
-                          [-1, cfg.max_box_num]],
                lod_levels=[0, 0, 0, 0],
                dtypes=['float32'] * 2 + ['int32'] + ['float32'],
                use_double_buffer=True)
@@ -88,8 +104,7 @@ class YOLOv3(object):
                    fluid.layers.read_file(self.py_reader)
        else:
            self.image = fluid.layers.data(
-                    name='image', shape=self.image_shape, dtype='float32'
+                name='image', shape=self.image_shape, dtype='float32')
-                    )
            self.im_shape = fluid.layers.data(
                name="im_shape", shape=[2], dtype='int32')
            self.im_id = fluid.layers.data(
@@ -110,10 +125,10 @@ class YOLOv3(object):
        blocks = add_DarkNet53_conv_body(self.image, not self.is_train)
        for i, block in enumerate(blocks):
            if i > 0:
-                block = fluid.layers.concat(
+                block = fluid.layers.concat(input=[route, block], axis=1)
-                    input=[route, block],
+            route, tip = yolo_detection_block(
-                    axis=1)
+                block,
-            route, tip = yolo_detection_block(block, channel=512//(2**i), 
+                channel=512 // (2**i),
                is_test=(not self.is_train),
                name="yolo_block.{}".format(i))
@@ -126,9 +141,11 @@ class YOLOv3(object):
                stride=1,
                padding=0,
                act=None,
-                param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
+                param_attr=ParamAttr(
+                    initializer=fluid.initializer.Normal(0., 0.02),
                    name="yolo_output.{}.conv.weights".format(i)),
-                bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0),
+                bias_attr=ParamAttr(
+                    initializer=fluid.initializer.Constant(0.0),
                    regularizer=L2Decay(0.),
                    name="yolo_output.{}.conv.bias".format(i)))
            self.outputs.append(block_out)
@@ -136,7 +153,7 @@ class YOLOv3(object):
            if i < len(blocks) - 1:
                route = conv_bn_layer(
                    input=route,
-                    ch_out=256//(2**i),
+                    ch_out=256 // (2**i),
                    filter_size=1,
                    stride=1,
                    padding=0,
@@ -145,7 +162,6 @@ class YOLOv3(object):
                # upsample
                route = upsample(route)
        for i, out in enumerate(self.outputs):
            anchor_mask = cfg.anchor_masks[i]
@@ -161,10 +177,10 @@ class YOLOv3(object):
                    ignore_thresh=cfg.ignore_thresh,
                    downsample_ratio=self.downsample,
                    use_label_smooth=cfg.label_smooth,
-                        name="yolo_loss"+str(i))
+                    name="yolo_loss" + str(i))
                self.losses.append(fluid.layers.reduce_mean(loss))
            else:
-                mask_anchors=[]
+                mask_anchors = []
                for m in anchor_mask:
                    mask_anchors.append(cfg.anchors[2 * m])
                    mask_anchors.append(cfg.anchors[2 * m + 1])
@@ -175,13 +191,14 @@ class YOLOv3(object):
                    class_num=cfg.class_num,
                    conf_thresh=cfg.valid_thresh,
                    downsample_ratio=self.downsample,
-                        name="yolo_box"+str(i))
+                    name="yolo_box" + str(i))
                self.boxes.append(boxes)
-                self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
+                self.scores.append(
+                    fluid.layers.transpose(
+                        scores, perm=[0, 2, 1]))
            self.downsample //= 2
    def loss(self):
        return sum(self.losses)
@@ -197,4 +214,3 @@ class YOLOv3(object):
            nms_threshold=cfg.nms_thresh,
            background_label=-1,
            name="multiclass_nms")
--- a/PaddleCV/yolov3/reader.py
+++ b/PaddleCV/yolov3/reader.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -55,19 +54,15 @@ class DataSetReader(object):
        if mode == 'train':
            cfg.train_file_list = os.path.join(cfg.data_dir,
                                               cfg.train_file_list)
-            cfg.train_data_dir = os.path.join(cfg.data_dir, 
+            cfg.train_data_dir = os.path.join(cfg.data_dir, cfg.train_data_dir)
-                                              cfg.train_data_dir)
            self.COCO = COCO(cfg.train_file_list)
            self.img_dir = cfg.train_data_dir
        elif mode == 'test' or mode == 'infer':
-            cfg.val_file_list = os.path.join(cfg.data_dir, 
+            cfg.val_file_list = os.path.join(cfg.data_dir, cfg.val_file_list)
-                                             cfg.val_file_list)
+            cfg.val_data_dir = os.path.join(cfg.data_dir, cfg.val_data_dir)
-            cfg.val_data_dir = os.path.join(cfg.data_dir, 
-                                            cfg.val_data_dir)
            self.COCO = COCO(cfg.val_file_list)
            self.img_dir = cfg.val_data_dir
    def _parse_dataset_catagory(self):
        self.categories = self.COCO.loadCats(self.COCO.getCatIds())
        self.num_category = len(self.categories)
@@ -76,10 +71,7 @@ class DataSetReader(object):
        for category in self.categories:
            self.label_names.append(category['name'])
            self.label_ids.append(int(category['id']))
-        self.category_to_id_map = {
+        self.category_to_id_map = {v: i for i, v in enumerate(self.label_ids)}
-            v: i
-            for i, v in enumerate(self.label_ids)
-        }
        print("Load in {} categories.".format(self.num_category))
        self.has_parsed_categpry = True
@@ -93,7 +85,8 @@ class DataSetReader(object):
        img_height = img['height']
        img_width = img['width']
        anno = self.COCO.loadAnns(
-                self.COCO.getAnnIds(imgIds=img['id'], iscrowd=None))
+            self.COCO.getAnnIds(
+                imgIds=img['id'], iscrowd=None))
        gt_index = 0
        for target in anno:
            if target['area'] < cfg.gt_min_area:
@@ -141,7 +134,7 @@ class DataSetReader(object):
        if mode == 'infer':
            return []
        else:
-            return self._parse_images(is_train=(mode=='train'))
+            return self._parse_images(is_train=(mode == 'train'))
    def get_reader(self,
                   mode,
@@ -166,8 +159,12 @@ class DataSetReader(object):
            h, w, _ = im.shape
            im_scale_x = size / float(w)
            im_scale_y = size / float(h)
-            out_img = cv2.resize(im, None, None, 
+            out_img = cv2.resize(
-                                 fx=im_scale_x, fy=im_scale_y, 
+                im,
+                None,
+                None,
+                fx=im_scale_x,
+                fy=im_scale_y,
                interpolation=cv2.INTER_CUBIC)
            mean = np.array(mean).reshape((1, 1, -1))
            std = np.array(std).reshape((1, 1, -1))
@@ -230,7 +227,8 @@ class DataSetReader(object):
                img_size = get_img_size(size, random_sizes)
                while True:
                    img = imgs[read_cnt % len(imgs)]
-                    mixup_img = get_mixup_img(imgs, mixup_iter, total_iter, read_cnt)
+                    mixup_img = get_mixup_img(imgs, mixup_iter, total_iter,
+                                              read_cnt)
                    read_cnt += 1
                    if read_cnt % len(imgs) == 0 and shuffle:
                        np.random.shuffle(imgs)
@@ -249,8 +247,7 @@ class DataSetReader(object):
                imgs = self._parse_images_by_mode(mode)
                batch_out = []
                for img in imgs:
-                    im, im_id, im_shape = img_reader(img, size, 
+                    im, im_id, im_shape = img_reader(img, size, cfg.pixel_means,
-                                                     cfg.pixel_means, 
                                                     cfg.pixel_stds)
                    batch_out.append((im, im_id, im_shape))
                    if len(batch_out) == batch_size:
@@ -262,8 +259,7 @@ class DataSetReader(object):
                img = {}
                img['image'] = image
                img['id'] = 0
-                im, im_id, im_shape = img_reader(img, size, 
+                im, im_id, im_shape = img_reader(img, size, cfg.pixel_means,
-                                                 cfg.pixel_means, 
                                                 cfg.pixel_stds)
                batch_out = [(im, im_id, im_shape)]
                yield batch_out
@@ -273,6 +269,7 @@ class DataSetReader(object):
 dsr = DataSetReader()
 def train(size=416,
          batch_size=64,
          shuffle=True,
@@ -283,7 +280,7 @@ def train(size=416,
          max_queue=32,
          use_multiprocessing=True):
    generator = dsr.get_reader('train', size, batch_size, shuffle,
-                               int(mixup_iter/num_workers), random_sizes)
+                               int(mixup_iter / num_workers), random_sizes)
    if not use_multiprocessing:
        return generator
@@ -319,12 +316,14 @@ def train(size=416,
    return reader
 def test(size=416, batch_size=1):
    return dsr.get_reader('test', size, batch_size)
 def infer(size=416, image=None):
    return dsr.get_reader('infer', size, image=image)
 def get_label_infos():
    return dsr.get_label_infos()
--- a/PaddleCV/yolov3/train.py
+++ b/PaddleCV/yolov3/train.py
@@ -33,7 +33,7 @@ from config import cfg
 def train():
-    if cfg.debug:
+    if cfg.debug or args.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        random.seed(0)
@@ -76,15 +76,17 @@ def train():
    if cfg.pretrain:
        if not os.path.exists(cfg.pretrain):
            print("Pretrain weights not found: {}".format(cfg.pretrain))
        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrain, var.name))
        fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist)
-    build_strategy= fluid.BuildStrategy()
+    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = True
    build_strategy.sync_batch_norm = cfg.syncbn
-    compile_program = fluid.compiler.CompiledProgram(
+    compile_program = fluid.compiler.CompiledProgram(fluid.default_main_program(
-            fluid.default_main_program()).with_data_parallel(
+    )).with_data_parallel(
        loss_name=loss.name, build_strategy=build_strategy)
    random_sizes = [cfg.input_size]
@@ -93,11 +95,15 @@ def train():
    total_iter = cfg.max_iter - cfg.start_iter
    mixup_iter = total_iter - cfg.no_mixup_iter
-    train_reader = reader.train(input_size, 
+    shuffle = True
+    if args.enable_ce:
+        shuffle = False
+    train_reader = reader.train(
+        input_size,
        batch_size=cfg.batch_size,
-                                shuffle=True, 
+        shuffle=shuffle,
-                                total_iter=total_iter*devices_num, 
+        total_iter=total_iter * devices_num,
-                                mixup_iter=mixup_iter*devices_num, 
+        mixup_iter=mixup_iter * devices_num,
        random_sizes=random_sizes,
        use_multiprocessing=cfg.use_multiprocess)
    py_reader = model.py_reader
@@ -130,16 +136,26 @@ def train():
                          .get_tensor())
            print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format(
                iter_id, lr[0],
-                  smoothed_loss.get_mean_value(), 
+                smoothed_loss.get_mean_value(), start_time - prev_start_time))
-                  start_time - prev_start_time))
            sys.stdout.flush()
            if (iter_id + 1) % cfg.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
                print("Snapshot {} saved, average loss: {}, \
                      average time: {}".format(
-                      iter_id + 1, 
+                    iter_id + 1, snapshot_loss / float(cfg.snapshot_iter),
-                      snapshot_loss / float(cfg.snapshot_iter), 
                    snapshot_time / float(cfg.snapshot_iter)))
+                if args.enable_ce and iter_id == cfg.max_iter - 1:
+                    if devices_num == 1:
+                        print("kpis\ttrain_cost_1card\t%f" %
+                              (snapshot_loss / float(cfg.snapshot_iter)))
+                        print("kpis\ttrain_duration_1card\t%f" %
+                              (snapshot_time / float(cfg.snapshot_iter)))
+                    else:
+                        print("kpis\ttrain_cost_8card\t%f" %
+                              (snapshot_loss / float(cfg.snapshot_iter)))
+                        print("kpis\ttrain_duration_8card\t%f" %
+                              (snapshot_time / float(cfg.snapshot_iter)))
                snapshot_loss = 0
                snapshot_time = 0
    except fluid.core.EOFException:

--- a/PaddleCV/yolov3/utility.py
+++ b/PaddleCV/yolov3/utility.py
@@ -126,6 +126,7 @@ def parse_args():
            "The single image used to inference and visualize. None to inference all images in image_path")
    add_arg('draw_thresh',      float,  0.5,
            "Confidence score threshold to draw prediction box in image in debug mode")
+    add_arg('enable_ce',        bool,  False,                "If set True, enable continuous evaluation job.")
    # yapf: enable
    args = parser.parse_args()
    file_name = sys.argv[0]