Merge branch 'develop' of https://github.com/PaddlePaddle/models into ce_image_classification2

6552fcf2 · root · 1e7557f1 · aa0ab8c9 · 6552fcf2 · 1e7557f1
42 changed file
--- a/fluid/face_detection/README_cn.md
+++ b/fluid/face_detection/README_cn.md
@@ -165,10 +165,10 @@ python widerface_eval.py --infer=True --confs_threshold=0.15
 ```
 下图可视化了模型的预测结果：
 <p align="center">
-<img src="images/0_Parade_marchingband_1_356.jpg" height=300 width=400 hspace='10'/>
+<img src="images/0_Parade_marchingband_1_356.jpg" height=400 width=400 hspace='10'/>
-<img src="images/28_Sports_Fan_Sports_Fan_28_770.jpg" height=300 width=400 hspace='10'/>
+<img src="images/28_Sports_Fan_Sports_Fan_28_770.jpg" height=400 width=400 hspace='10'/>
-<img src="images/4_Dancing_Dancing_4_194.jpg" height=300 width=400 hspace='10'/>
+<img src="images/4_Dancing_Dancing_4_194.jpg" height=400 width=400 hspace='10'/>
-<img src="images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg" height=300 width=400 hspace='10'/>  <br />
+<img src="images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg" height=400 width=400 hspace='10'/>  <br />
 Pyramidbox 预测可视化
 </p>

--- a/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg
+++ b/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg
--- a/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg
+++ b/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg
--- a/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg
+++ b/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg
--- a/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg
+++ b/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg
--- a/fluid/icnet/.run_ce.sh
+++ b/fluid/icnet/.run_ce.sh
@@ -2,6 +2,7 @@
 # This file is only used for continuous evaluation.
+export ce_mode=1
 rm -rf *_factor.txt
-python train.py --use_gpu=True 1> log
+python train.py --use_gpu=True --random_mirror=False --random_scaling=False 1> log
 cat log | python _ce.py
--- a/fluid/icnet/_ce.py
+++ b/fluid/icnet/_ce.py
@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
 # NOTE kpi.py should shared in models in some way!!!!
-train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
+train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True)
-train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True)
+train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
 tracking_kpis = [
    train_cost_kpi,

--- a/fluid/icnet/cityscape.py
+++ b/fluid/icnet/cityscape.py
 """Reader for Cityscape dataset.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import os
 import cv2
 import numpy as np
@@ -173,8 +176,8 @@ class DataGenerater:
        """
        Scale label according to factor.
        """
-        h = label.shape[0] / factor
+        h = label.shape[0] // factor
-        w = label.shape[1] / factor
+        w = label.shape[1] // factor
        return cv2.resize(
            label, (h, w), interpolation=cv2.INTER_NEAREST)[:, :, np.newaxis]

--- a/fluid/icnet/eval.py
+++ b/fluid/icnet/eval.py
@@ -64,7 +64,7 @@ def eval(args):
    exe.run(fluid.default_startup_program())
    assert os.path.exists(args.model_path)
    fluid.io.load_params(exe, args.model_path)
-    print "loaded model from: %s" % args.model_path
+    print("loaded model from: %s" % args.model_path)
    sys.stdout.flush()
    fetch_vars = [iou, out_w, out_r]
@@ -80,11 +80,10 @@ def eval(args):
                         fetch_list=fetch_vars)
        out_wrong += result[1]
        out_right += result[2]
-        print "count: %s; current iou: %.3f;\r" % (count, result[0]),
        sys.stdout.flush()
    iou = cal_mean_iou(out_wrong, out_right)
-    print "\nmean iou: %.3f" % iou
+    print("\nmean iou: %.3f" % iou)
-    print "kpis	test_acc	%f" % iou
+    print("kpis	test_acc	%f" % iou)
 def main():

--- a/fluid/icnet/icnet.py
+++ b/fluid/icnet/icnet.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import paddle.fluid as fluid
 import numpy as np
 import sys
@@ -20,8 +23,8 @@ def conv(input,
    if padding == "SAME":
        padding_h = max(k_h - s_h, 0)
        padding_w = max(k_w - s_w, 0)
-        padding_top = padding_h / 2
+        padding_top = padding_h // 2
-        padding_left = padding_w / 2
+        padding_left = padding_w // 2
        padding_bottom = padding_h - padding_top
        padding_right = padding_w - padding_left
        padding = [
@@ -57,8 +60,8 @@ def atrous_conv(input,
    if padding == "SAME":
        padding_h = max(k_h - s_h, 0)
        padding_w = max(k_w - s_w, 0)
-        padding_top = padding_h / 2
+        padding_top = padding_h // 2
-        padding_left = padding_w / 2
+        padding_left = padding_w // 2
        padding_bottom = padding_h - padding_top
        padding_right = padding_w - padding_left
        padding = [
@@ -141,15 +144,15 @@ def dilation_convs(input):
 def pyramis_pooling(input, input_shape):
-    shape = np.ceil(input_shape / 32).astype("int32")
+    shape = np.ceil(input_shape // 32).astype("int32")
    h, w = shape
    pool1 = avg_pool(input, h, w, h, w)
    pool1_interp = interp(pool1, shape)
-    pool2 = avg_pool(input, h / 2, w / 2, h / 2, w / 2)
+    pool2 = avg_pool(input, h // 2, w // 2, h // 2, w // 2)
    pool2_interp = interp(pool2, shape)
-    pool3 = avg_pool(input, h / 3, w / 3, h / 3, w / 3)
+    pool3 = avg_pool(input, h // 3, w // 3, h // 3, w // 3)
    pool3_interp = interp(pool3, shape)
-    pool4 = avg_pool(input, h / 4, w / 4, h / 4, w / 4)
+    pool4 = avg_pool(input, h // 4, w // 4, h // 4, w // 4)
    pool4_interp = interp(pool4, shape)
    conv5_3_sum = input + pool4_interp + pool3_interp + pool2_interp + pool1_interp
    return conv5_3_sum
@@ -172,14 +175,14 @@ def shared_convs(image):
 def res_block(input, filter_num, padding=0, dilation=None, name=None):
-    tmp = conv(input, 1, 1, filter_num / 4, 1, 1, name=name + "_1_1_reduce")
+    tmp = conv(input, 1, 1, filter_num // 4, 1, 1, name=name + "_1_1_reduce")
    tmp = bn(tmp, relu=True)
    tmp = zero_padding(tmp, padding=padding)
    if dilation is None:
-        tmp = conv(tmp, 3, 3, filter_num / 4, 1, 1, name=name + "_3_3")
+        tmp = conv(tmp, 3, 3, filter_num // 4, 1, 1, name=name + "_3_3")
    else:
        tmp = atrous_conv(
-            tmp, 3, 3, filter_num / 4, dilation, name=name + "_3_3")
+            tmp, 3, 3, filter_num // 4, dilation, name=name + "_3_3")
    tmp = bn(tmp, relu=True)
    tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase")
    tmp = bn(tmp, relu=False)
@@ -195,7 +198,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
    proj_bn = bn(proj, relu=False)
    tmp = conv(
-        input, 1, 1, filter_num / 4, stride, stride, name=name + "_1_1_reduce")
+        input, 1, 1, filter_num // 4, stride, stride, name=name + "_1_1_reduce")
    tmp = bn(tmp, relu=True)
    tmp = zero_padding(tmp, padding=padding)
@@ -208,7 +211,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
            tmp,
            3,
            3,
-            filter_num / 4,
+            filter_num // 4,
            1,
            1,
            padding=padding,
@@ -218,7 +221,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
            tmp,
            3,
            3,
-            filter_num / 4,
+            filter_num // 4,
            dilation,
            padding=padding,
            name=name + "_3_3")
@@ -232,12 +235,12 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
 def sub_net_4(input, input_shape):
-    tmp = interp(input, out_shape=np.ceil(input_shape / 32))
+    tmp = interp(input, out_shape=np.ceil(input_shape // 32))
    tmp = dilation_convs(tmp)
    tmp = pyramis_pooling(tmp, input_shape)
    tmp = conv(tmp, 1, 1, 256, 1, 1, name="conv5_4_k1")
    tmp = bn(tmp, relu=True)
-    tmp = interp(tmp, input_shape / 16)
+    tmp = interp(tmp, input_shape // 16)
    return tmp
@@ -265,7 +268,7 @@ def CCF24(sub2_out, sub4_out, input_shape):
    tmp = bn(tmp, relu=False)
    tmp = tmp + sub2_out
    tmp = fluid.layers.relu(tmp)
-    tmp = interp(tmp, input_shape / 8)
+    tmp = interp(tmp, input_shape // 8)
    return tmp
@@ -275,7 +278,7 @@ def CCF124(sub1_out, sub24_out, input_shape):
    tmp = bn(tmp, relu=False)
    tmp = tmp + sub1_out
    tmp = fluid.layers.relu(tmp)
-    tmp = interp(tmp, input_shape / 4)
+    tmp = interp(tmp, input_shape // 4)
    return tmp

--- a/fluid/icnet/infer.py
+++ b/fluid/icnet/infer.py
 """Infer for ICNet model."""
+from __future__ import print_function
 import cityscape
 import argparse
 import functools
@@ -101,7 +102,7 @@ def infer(args):
    exe.run(fluid.default_startup_program())
    assert os.path.exists(args.model_path)
    fluid.io.load_params(exe, args.model_path)
-    print "loaded model from: %s" % args.model_path
+    print("loaded model from: %s" % args.model_path)
    sys.stdout.flush()
    if not os.path.isdir(args.out_path):

--- a/fluid/icnet/train.py
+++ b/fluid/icnet/train.py
 """Trainer for ICNet model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 from icnet import icnet
 import cityscape
 import argparse
 import functools
 import sys
+import os
 import time
 import paddle.fluid as fluid
 import numpy as np
@@ -11,9 +15,8 @@ from utils import add_arguments, print_arguments, get_feeder_data
 from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
 from paddle.fluid.initializer import init_on_cpu
-SEED = 90
+if 'ce_mode' in os.environ:
-# random seed must set before configuring the network.
+    np.random.seed(10)
-fluid.default_startup_program().random_seed = SEED
 parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
@@ -87,10 +90,14 @@ def train(args):
    if args.use_gpu:
        place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
+    if 'ce_mode' in os.environ:
+        fluid.default_startup_program().random_seed = 90
    exe.run(fluid.default_startup_program())
    if args.init_model is not None:
-        print "load model from: %s" % args.init_model
+        print("load model from: %s" % args.init_model)
        sys.stdout.flush()
        fluid.io.load_params(exe, args.init_model)
@@ -107,7 +114,7 @@ def train(args):
        for data in train_reader():
            if iter_id > TOTAL_STEP:
                end_time = time.time()
-                print "kpis	train_duration	%f" % (end_time - start_time)
+                print("kpis	train_duration	%f" % (end_time - start_time))
                return
            iter_id += 1
            results = exe.run(
@@ -119,10 +126,10 @@ def train(args):
            sub124_loss += results[3]
            # training log
            if iter_id % LOG_PERIOD == 0:
-                print "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % (
+                print("Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % (
                    iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD,
-                    sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD)
+                    sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD))
-                print "kpis	train_cost	%f" % (t_loss / LOG_PERIOD)
+                print("kpis	train_cost	%f" % (t_loss / LOG_PERIOD))
                t_loss = 0.
                sub4_loss = 0.
@@ -133,7 +140,7 @@ def train(args):
            if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None:
                dir_name = args.checkpoint_path + "/" + str(iter_id)
                fluid.io.save_persistables(exe, dirname=dir_name)
-                print "Saved checkpoint: %s" % (dir_name)
+                print("Saved checkpoint: %s" % (dir_name))
 def main():

--- a/fluid/icnet/utils.py
+++ b/fluid/icnet/utils.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 import distutils.util
 import numpy as np
 from paddle.fluid import core
+import six
 def print_arguments(args):
@@ -37,7 +38,7 @@ def print_arguments(args):
    :type args: argparse.Namespace
    """
    print("-----------  Configuration Arguments -----------")
-    for arg, value in sorted(vars(args).iteritems()):
+    for arg, value in sorted(six.iteritems(vars(args))):
        print("%s: %s" % (arg, value))
    print("------------------------------------------------")

--- a/fluid/image_classification/.run_ce.sh
+++ b/fluid/image_classification/.run_ce.sh
@@ -5,6 +5,6 @@ cudaid=${object_detection_cudaid:=0}
 export CUDA_VISIBLE_DEVICES=$cudaid
 python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py
-cudaid=${object_detection_cudaid:=0, 1, 2, 3}
+cudaid=${object_detection_cudaid_m:=0, 1, 2, 3}
 export CUDA_VISIBLE_DEVICES=$cudaid
 python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
@@ -45,7 +45,7 @@ def calc_diff(f1, f2):
        sq_df = np.mean(df * df)
        return max_df, sq_df
    except Exception as e:
-        return -1.0, -1.0
+        return 1.0, 1.0
 def compare(path1, path2, no_exception):

--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
@@ -245,10 +245,18 @@ class Network(object):
    @layer
    def prelu(self, input, channel_shared, name):
-        #fluid = import_fluid()
+        fluid = import_fluid()
-        #output = fluid.layers.relu(input)
+        if channel_shared:
-        #return output
+            mode = 'all'
-        raise NotImplementedError('prelu not implemented')
+        else:
+            mode = 'channel'
+        prefix = name + '_'
+        output = fluid.layers.prelu(
+            input,
+            mode=mode,
+            param_attr=fluid.ParamAttr(name=prefix + 'negslope'))
+        return output
    def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
             name):

--- a/fluid/image_classification/caffe2fluid/kaffe/transformers.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/transformers.py
@@ -176,6 +176,7 @@ class DataReshaper(object):
                    del node.reshaped_data
        return graph
 class CropFuser(object):
    '''
    Crop is to return a scalar output Blob for an input Blob of arbitrary size.
@@ -197,7 +198,8 @@ class CropFuser(object):
            cls._traced_names[fname] = []
        cls._traced_names[fname].append(tname)
-    def __init__(self, allowed_parent_types=[NodeKind.Input, NodeKind.DummyData]):
+    def __init__(self,
+                 allowed_parent_types=[NodeKind.Input, NodeKind.DummyData]):
        self.allowed_parent_types = allowed_parent_types
    def __call__(self, graph):
@@ -232,7 +234,11 @@ class CropFuser(object):
    def merge(self, parent, child):
        '''Merge the parent node into the child.'''
-        child.metadata['shape'] = [parent.output_shape.batch_size, parent.output_shape.channels, parent.output_shape.height, parent.output_shape.width]
+        child.metadata['shape'] = [
+            parent.output_shape.batch_size, parent.output_shape.channels,
+            parent.output_shape.height, parent.output_shape.width
+        ]
 class SubNodeFuser(object):
    '''
@@ -395,6 +401,8 @@ class ParameterNamer(object):
                names = ('scale', )
                if getattr(node.parameters, 'bias_term', False):
                    names = ('scale', 'offset')
+            elif node.kind == NodeKind.PReLU:
+                names = ('negslope', )
            elif node.kind == "Normalize":
                names = ('scale', )
            else:

--- a/fluid/neural_machine_translation/rnn_search/infer.py
+++ b/fluid/neural_machine_translation/rnn_search/infer.py
@@ -18,6 +18,7 @@ from __future__ import print_function
 import numpy as np
 import os
+import six
 import paddle
 import paddle.fluid as fluid
@@ -102,7 +103,7 @@ def infer():
                                              init_recursive_seq_lens, place)
        # Feed dict for inference
-        feed_dict = feeder.feed(map(lambda x: [x[0]], data))
+        feed_dict = feeder.feed([[x[0]] for x in data])
        feed_dict['init_ids'] = init_ids
        feed_dict['init_scores'] = init_scores
@@ -115,7 +116,7 @@ def infer():
        lod_level_1 = fetch_outs[0].lod()[1]
        token_array = np.array(fetch_outs[0])
        result = []
-        for i in xrange(len(lod_level_1) - 1):
+        for i in six.moves.xrange(len(lod_level_1) - 1):
            sentence_list = [
                trg_dict[token]
                for token in token_array[lod_level_1[i]:lod_level_1[i + 1]]
@@ -125,7 +126,7 @@ def infer():
        lod_level_0 = fetch_outs[0].lod()[0]
        paragraphs = [
            result[lod_level_0[i]:lod_level_0[i + 1]]
-            for i in xrange(len(lod_level_0) - 1)
+            for i in six.moves.xrange(len(lod_level_0) - 1)
        ]
        for paragraph in paragraphs:

--- a/fluid/neural_machine_translation/transformer/_ce.py
+++ b/fluid/neural_machine_translation/transformer/_ce.py
@@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
 #### NOTE kpi.py should shared in models in some way!!!!
-train_cost_card1_kpi = CostKpi('train_cost_card1', 0.01, 0, actived=True)
+train_cost_card1_kpi = CostKpi('train_cost_card1', 0.02, 0, actived=True)
 test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True)
 train_duration_card1_kpi = DurationKpi(
    'train_duration_card1', 0.06, 0, actived=True)

--- a/fluid/object_detection/.run_ce.sh
+++ b/fluid/object_detection/.run_ce.sh
@@ -14,6 +14,6 @@ cudaid=${object_detection_cudaid:=0}
 export CUDA_VISIBLE_DEVICES=$cudaid
 FLAGS_benchmark=true  python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
-cudaid=${object_detection_cudaid:=0,1,2,3}
+cudaid=${object_detection_cudaid_m:=0,1,2,3}
 export CUDA_VISIBLE_DEVICES=$cudaid
 FLAGS_benchmark=true  python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
--- a/fluid/object_detection/_ce.py
+++ b/fluid/object_detection/_ce.py
@@ -8,8 +8,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
 #### NOTE kpi.py should shared in models in some way!!!!
 train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
-test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=True)
+test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=False)
-train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=True)
+train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=False)
 train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True)
 test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=True)
 train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, 0, actived=True)

--- a/fluid/object_detection/reader.py
+++ b/fluid/object_detection/reader.py
@@ -22,6 +22,7 @@ import xml.etree.ElementTree
 import os
 import time
 import copy
+import six
 class Settings(object):
@@ -151,7 +152,7 @@ def preprocess(img, bbox_labels, mode, settings):
        mirror = int(random.uniform(0, 2))
        if mirror == 1:
            img = img[:, ::-1, :]
-            for i in xrange(len(sampled_labels)):
+            for i in six.moves.xrange(len(sampled_labels)):
                tmp = sampled_labels[i][1]
                sampled_labels[i][1] = 1 - sampled_labels[i][3]
                sampled_labels[i][3] = 1 - tmp

--- a/fluid/object_detection/train.py
+++ b/fluid/object_detection/train.py
@@ -65,7 +65,6 @@ def train(args,
        name='gt_label', shape=[1], dtype='int32', lod_level=1)
    difficult = fluid.layers.data(
        name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
    locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
    nmsed_out = fluid.layers.detection_output(
        locs, confs, box, box_var, nms_threshold=args.nms_threshold)
@@ -88,16 +87,16 @@ def train(args,
    if 'coco' in data_args.dataset:
        # learning rate decay in 12, 19 pass, respectively
        if '2014' in train_file_list:
-            epocs = 82783 / batch_size
+            epocs = 82783 // batch_size
            boundaries = [epocs * 12, epocs * 19]
        elif '2017' in train_file_list:
-            epocs = 118287 / batch_size
+            epocs = 118287 // batch_size
            boundaries = [epocs * 12, epocs * 19]
        values = [
            learning_rate, learning_rate * 0.5, learning_rate * 0.25
        ]
    elif 'pascalvoc' in data_args.dataset:
-        epocs = 19200 / batch_size
+        epocs = 19200 // batch_size
        boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
        values = [
            learning_rate, learning_rate * 0.5, learning_rate * 0.25,
@@ -126,6 +125,9 @@ def train(args,
        train_reader = paddle.batch(
            reader.train(data_args, train_file_list), batch_size=batch_size)
    else:
+        import random
+        random.seed(0)
+        np.random.seed(0)
        train_reader = paddle.batch(
            reader.train(data_args, train_file_list, False), batch_size=batch_size)
    test_reader = paddle.batch(
@@ -137,7 +139,7 @@ def train(args,
        model_path = os.path.join(model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
-        print 'save models to %s' % (model_path)
+        print('save models to %s' % (model_path))
        fluid.io.save_persistables(exe, model_path)
    best_map = 0.
@@ -166,8 +168,6 @@ def train(args,
        start_time = time.time()
        prev_start_time = start_time
        every_pass_loss = []
-        iter = 0
-        pass_duration = 0.0
        for batch_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
@@ -193,15 +193,15 @@ def train(args,
            total_time += end_time - start_time
            train_avg_loss = np.mean(every_pass_loss)
            if devices_num == 1:
-                print ("kpis	train_cost	%s" % train_avg_loss)
+                print("kpis	train_cost	%s" % train_avg_loss)
-                print ("kpis	test_acc	%s" % mean_map)
+                print("kpis	test_acc	%s" % mean_map)
-                print ("kpis	train_speed	%s" % (total_time / epoch_idx))
+                print("kpis	train_speed	%s" % (total_time / epoch_idx))
            else:
-                print ("kpis	train_cost_card%s	%s" %
+                print("kpis	train_cost_card%s	%s" %
                       (devices_num, train_avg_loss))
-                print ("kpis	test_acc_card%s	%s" %
+                print("kpis	test_acc_card%s	%s" %
                       (devices_num, mean_map))
-                print ("kpis	train_speed_card%s	%f" %
+                print("kpis	train_speed_card%s	%f" %
                       (devices_num, total_time / epoch_idx))

--- a/fluid/object_detection/utility.py
+++ b/fluid/object_detection/utility.py
@@ -16,8 +16,10 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import distutils.util
 import numpy as np
+import six
 from paddle.fluid import core
@@ -37,7 +39,7 @@ def print_arguments(args):
    :type args: argparse.Namespace
    """
    print("-----------  Configuration Arguments -----------")
-    for arg, value in sorted(vars(args).iteritems()):
+    for arg, value in sorted(six.iteritems(vars(args))):
        print("%s: %s" % (arg, value))
    print("------------------------------------------------")

--- a/fluid/ocr_recognition/.run.sh
+++ b/fluid/ocr_recognition/.run.sh
-python ctc_train.py --batch_size=128 --total_step=10000 --eval_period=10000 --log_period=10000 --use_gpu=True
--- a/fluid/ocr_recognition/.run_ce.sh
+++ b/fluid/ocr_recognition/.run_ce.sh
 export ce_mode=1
-rm *factor.txt
+python train.py --batch_size=32 --total_step=1 --eval_period=1 --log_period=1 --use_gpu=True 1> ./tmp.log
-python ctc_train.py --batch_size=32 --total_step=30000 --eval_period=30000 --log_period=30000 --use_gpu=True 1> ./tmp.log
 cat tmp.log | python _ce.py
 rm tmp.log
--- a/fluid/ocr_recognition/README.md
+++ b/fluid/ocr_recognition/README.md
@@ -5,8 +5,9 @@
 ## 代码结构
 ```
 ├── ctc_reader.py  # 下载、读取、处理数据。
-├── crnn_ctc_model.py   # 定义了训练网络、预测网络和evaluate网络。
+├── crnn_ctc_model.py   # 定义了OCR CTC model的网络结构。
-├── ctc_train.py   # 用于模型的训练。
+├── attention_model.py   # 定义了OCR attention model的网络结构。
+├── train.py   # 用于模型的训练。
 ├── infer.py   # 加载训练好的模型文件，对新数据进行预测。
 ├── eval.py     # 评估模型在指定数据集上的效果。
 └── utils.py    # 定义通用的函数。
@@ -15,9 +16,16 @@
 ## 简介
-本章的任务是识别含有单行汉语字符图片，首先采用卷积将图片转为特征图, 然后使用`im2sequence op`将特征图转为序列，通过`双向GRU`学习到序列特征。训练过程选用的损失函数为CTC(Connectionist Temporal Classification) loss，最终的评估指标为样本级别的错误率。
+本章的任务是识别图片中单行英文字符，这里我们分别使用CTC model和attention model两种不同的模型来完成该任务。
+这两种模型的有相同的编码部分，首先采用卷积将图片转为特征图, 然后使用`im2sequence op`将特征图转为序列，通过`双向GRU`学习到序列特征。
+两种模型的解码部分和使用的损失函数区别如下：
+- CTC model: 训练过程选用的损失函数为CTC(Connectionist Temporal Classification) loss, 预测阶段采用的是贪婪策略和CTC解码策略。
+- Attention model: 训练过程选用的是带注意力机制的解码策略和交叉信息熵损失函数，预测阶段采用的是柱搜索策略。
+训练以上两种模型的评估指标为样本级别的错误率。
 ## 数据
@@ -124,15 +132,23 @@ env OMP_NUM_THREADS=<num_of_physical_cores> python ctc_train.py --use_gpu False
 env CUDA_VISIABLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True
 ```
+默认使用的是`CTC model`, 可以通过选项`--model="attention"`切换为`attention model`。
 执行`python ctc_train.py --help`可查看更多使用方式和参数详细说明。
-图2为使用默认参数和默认数据集训练的收敛曲线，其中横坐标轴为训练迭代次数，纵轴为样本级错误率。其中，蓝线为训练集上的样本错误率，红线为测试集上的样本错误率。在60轮迭代训练中，测试集上最低错误率为第32轮的22.0%.
+图2为使用默认参数在默认数据集上训练`CTC model`的收敛曲线，其中横坐标轴为训练迭代次数，纵轴为样本级错误率。其中，蓝线为训练集上的样本错误率，红线为测试集上的样本错误率。测试集上最低错误率为22.0%.
 <p align="center">
-<img src="images/train.jpg" width="620" hspace='10'/> <br/>
+<img src="images/train.jpg" width="400" hspace='10'/> <br/>
 <strong>图 2</strong>
 </p>
+图3为使用默认参数在默认数据集上训练`attention model`的收敛曲线，其中横坐标轴为训练迭代次数，纵轴为样本级错误率。其中，蓝线为训练集上的样本错误率，红线为测试集上的样本错误率。测试集上最低错误率为16.25%.
+<p align="center">
+<img src="images/train_attention.jpg" width="400" hspace='10'/> <br/>
+<strong>图 3</strong>
+</p>
 ## 测试

--- a/fluid/ocr_recognition/_ce.py
+++ b/fluid/ocr_recognition/_ce.py
@@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
 # NOTE kpi.py should shared in models in some way!!!!
-train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
+train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True)
 test_acc_kpi = AccKpi('test_acc', 0.005, 0, actived=True)
 train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
 train_acc_kpi = AccKpi('train_acc', 0.005, 0, actived=True)

--- a/fluid/ocr_recognition/attention_model.py
+++ b/fluid/ocr_recognition/attention_model.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle.fluid as fluid
+decoder_size = 128
+word_vector_dim = 128
+max_length = 100
+sos = 0
+eos = 1
+gradient_clip = 10
+LR = 1.0
+beam_size = 2
+learning_rate_decay = None
+def conv_bn_pool(input,
+                 group,
+                 out_ch,
+                 act="relu",
+                 is_test=False,
+                 pool=True,
+                 use_cudnn=True):
+    tmp = input
+    for i in xrange(group):
+        filter_size = 3
+        conv_std = (2.0 / (filter_size**2 * tmp.shape[1]))**0.5
+        conv_param = fluid.ParamAttr(
+            initializer=fluid.initializer.Normal(0.0, conv_std))
+        tmp = fluid.layers.conv2d(
+            input=tmp,
+            num_filters=out_ch[i],
+            filter_size=3,
+            padding=1,
+            bias_attr=False,
+            param_attr=conv_param,
+            act=None,  # LinearActivation
+            use_cudnn=use_cudnn)
+        tmp = fluid.layers.batch_norm(input=tmp, act=act, is_test=is_test)
+    if pool == True:
+        tmp = fluid.layers.pool2d(
+            input=tmp,
+            pool_size=2,
+            pool_type='max',
+            pool_stride=2,
+            use_cudnn=use_cudnn,
+            ceil_mode=True)
+    return tmp
+def ocr_convs(input, is_test=False, use_cudnn=True):
+    tmp = input
+    tmp = conv_bn_pool(tmp, 2, [16, 16], is_test=is_test, use_cudnn=use_cudnn)
+    tmp = conv_bn_pool(tmp, 2, [32, 32], is_test=is_test, use_cudnn=use_cudnn)
+    tmp = conv_bn_pool(tmp, 2, [64, 64], is_test=is_test, use_cudnn=use_cudnn)
+    tmp = conv_bn_pool(
+        tmp, 2, [128, 128], is_test=is_test, pool=False, use_cudnn=use_cudnn)
+    return tmp
+def encoder_net(images, rnn_hidden_size=200, is_test=False, use_cudnn=True):
+    conv_features = ocr_convs(images, is_test=is_test, use_cudnn=use_cudnn)
+    sliced_feature = fluid.layers.im2sequence(
+        input=conv_features,
+        stride=[1, 1],
+        filter_size=[conv_features.shape[2], 1])
+    para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.02))
+    bias_attr = fluid.ParamAttr(
+        initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0)
+    fc_1 = fluid.layers.fc(input=sliced_feature,
+                           size=rnn_hidden_size * 3,
+                           param_attr=para_attr,
+                           bias_attr=False)
+    fc_2 = fluid.layers.fc(input=sliced_feature,
+                           size=rnn_hidden_size * 3,
+                           param_attr=para_attr,
+                           bias_attr=False)
+    gru_forward = fluid.layers.dynamic_gru(
+        input=fc_1,
+        size=rnn_hidden_size,
+        param_attr=para_attr,
+        bias_attr=bias_attr,
+        candidate_activation='relu')
+    gru_backward = fluid.layers.dynamic_gru(
+        input=fc_2,
+        size=rnn_hidden_size,
+        is_reverse=True,
+        param_attr=para_attr,
+        bias_attr=bias_attr,
+        candidate_activation='relu')
+    encoded_vector = fluid.layers.concat(
+        input=[gru_forward, gru_backward], axis=1)
+    encoded_proj = fluid.layers.fc(input=encoded_vector,
+                                   size=decoder_size,
+                                   bias_attr=False)
+    return gru_backward, encoded_vector, encoded_proj
+def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
+                               decoder_boot, decoder_size, num_classes):
+    def simple_attention(encoder_vec, encoder_proj, decoder_state):
+        decoder_state_proj = fluid.layers.fc(input=decoder_state,
+                                             size=decoder_size,
+                                             bias_attr=False)
+        decoder_state_expand = fluid.layers.sequence_expand(
+            x=decoder_state_proj, y=encoder_proj)
+        concated = encoder_proj + decoder_state_expand
+        concated = fluid.layers.tanh(x=concated)
+        attention_weights = fluid.layers.fc(input=concated,
+                                            size=1,
+                                            act=None,
+                                            bias_attr=False)
+        attention_weights = fluid.layers.sequence_softmax(
+            input=attention_weights)
+        weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
+        scaled = fluid.layers.elementwise_mul(
+            x=encoder_vec, y=weigths_reshape, axis=0)
+        context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
+        return context
+    rnn = fluid.layers.DynamicRNN()
+    with rnn.block():
+        current_word = rnn.step_input(target_embedding)
+        encoder_vec = rnn.static_input(encoder_vec)
+        encoder_proj = rnn.static_input(encoder_proj)
+        hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
+        context = simple_attention(encoder_vec, encoder_proj, hidden_mem)
+        fc_1 = fluid.layers.fc(input=context,
+                               size=decoder_size * 3,
+                               bias_attr=False)
+        fc_2 = fluid.layers.fc(input=current_word,
+                               size=decoder_size * 3,
+                               bias_attr=False)
+        decoder_inputs = fc_1 + fc_2
+        h, _, _ = fluid.layers.gru_unit(
+            input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
+        rnn.update_memory(hidden_mem, h)
+        out = fluid.layers.fc(input=h,
+                              size=num_classes + 2,
+                              bias_attr=True,
+                              act='softmax')
+        rnn.output(out)
+    return rnn()
+def attention_train_net(args, data_shape, num_classes):
+    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    label_in = fluid.layers.data(
+        name='label_in', shape=[1], dtype='int32', lod_level=1)
+    label_out = fluid.layers.data(
+        name='label_out', shape=[1], dtype='int32', lod_level=1)
+    gru_backward, encoded_vector, encoded_proj = encoder_net(images)
+    backward_first = fluid.layers.sequence_pool(
+        input=gru_backward, pool_type='first')
+    decoder_boot = fluid.layers.fc(input=backward_first,
+                                   size=decoder_size,
+                                   bias_attr=False,
+                                   act="relu")
+    label_in = fluid.layers.cast(x=label_in, dtype='int64')
+    trg_embedding = fluid.layers.embedding(
+        input=label_in,
+        size=[num_classes + 2, word_vector_dim],
+        dtype='float32')
+    prediction = gru_decoder_with_attention(trg_embedding, encoded_vector,
+                                            encoded_proj, decoder_boot,
+                                            decoder_size, num_classes)
+    fluid.clip.set_gradient_clip(fluid.clip.GradientClipByValue(gradient_clip))
+    label_out = fluid.layers.cast(x=label_out, dtype='int64')
+    _, maxid = fluid.layers.topk(input=prediction, k=1)
+    error_evaluator = fluid.evaluator.EditDistance(
+        input=maxid, label=label_out, ignored_tokens=[sos, eos])
+    inference_program = fluid.default_main_program().clone(for_test=True)
+    cost = fluid.layers.cross_entropy(input=prediction, label=label_out)
+    sum_cost = fluid.layers.reduce_sum(cost)
+    if learning_rate_decay == "piecewise_decay":
+        learning_rate = fluid.layers.piecewise_decay([50000], [LR, LR * 0.01])
+    else:
+        learning_rate = LR
+    optimizer = fluid.optimizer.Adadelta(
+        learning_rate=learning_rate, epsilon=1.0e-6, rho=0.9)
+    optimizer.minimize(sum_cost)
+    model_average = None
+    if args.average_window > 0:
+        model_average = fluid.optimizer.ModelAverage(
+            args.average_window,
+            min_average_window=args.min_average_window,
+            max_average_window=args.max_average_window)
+    return sum_cost, error_evaluator, inference_program, model_average
+def simple_attention(encoder_vec, encoder_proj, decoder_state, decoder_size):
+    decoder_state_proj = fluid.layers.fc(input=decoder_state,
+                                         size=decoder_size,
+                                         bias_attr=False)
+    decoder_state_expand = fluid.layers.sequence_expand(
+        x=decoder_state_proj, y=encoder_proj)
+    concated = fluid.layers.elementwise_add(encoder_proj, decoder_state_expand)
+    concated = fluid.layers.tanh(x=concated)
+    attention_weights = fluid.layers.fc(input=concated,
+                                        size=1,
+                                        act=None,
+                                        bias_attr=False)
+    attention_weights = fluid.layers.sequence_softmax(input=attention_weights)
+    weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
+    scaled = fluid.layers.elementwise_mul(
+        x=encoder_vec, y=weigths_reshape, axis=0)
+    context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
+    return context
+def attention_infer(images, num_classes, use_cudnn=True):
+    max_length = 20
+    gru_backward, encoded_vector, encoded_proj = encoder_net(
+        images, is_test=True, use_cudnn=use_cudnn)
+    backward_first = fluid.layers.sequence_pool(
+        input=gru_backward, pool_type='first')
+    decoder_boot = fluid.layers.fc(input=backward_first,
+                                   size=decoder_size,
+                                   bias_attr=False,
+                                   act="relu")
+    init_state = decoder_boot
+    array_len = fluid.layers.fill_constant(
+        shape=[1], dtype='int64', value=max_length)
+    counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True)
+    # fill the first element with init_state
+    state_array = fluid.layers.create_array('float32')
+    fluid.layers.array_write(init_state, array=state_array, i=counter)
+    # ids, scores as memory
+    ids_array = fluid.layers.create_array('int64')
+    scores_array = fluid.layers.create_array('float32')
+    init_ids = fluid.layers.data(
+        name="init_ids", shape=[1], dtype="int64", lod_level=2)
+    init_scores = fluid.layers.data(
+        name="init_scores", shape=[1], dtype="float32", lod_level=2)
+    fluid.layers.array_write(init_ids, array=ids_array, i=counter)
+    fluid.layers.array_write(init_scores, array=scores_array, i=counter)
+    cond = fluid.layers.less_than(x=counter, y=array_len)
+    while_op = fluid.layers.While(cond=cond)
+    with while_op.block():
+        pre_ids = fluid.layers.array_read(array=ids_array, i=counter)
+        pre_state = fluid.layers.array_read(array=state_array, i=counter)
+        pre_score = fluid.layers.array_read(array=scores_array, i=counter)
+        pre_ids_emb = fluid.layers.embedding(
+            input=pre_ids,
+            size=[num_classes + 2, word_vector_dim],
+            dtype='float32')
+        context = simple_attention(encoded_vector, encoded_proj, pre_state,
+                                   decoder_size)
+        # expand the recursive_sequence_lengths of pre_state to be the same with pre_score
+        pre_state_expanded = fluid.layers.sequence_expand(pre_state, pre_score)
+        context_expanded = fluid.layers.sequence_expand(context, pre_score)
+        fc_1 = fluid.layers.fc(input=context_expanded,
+                               size=decoder_size * 3,
+                               bias_attr=False)
+        fc_2 = fluid.layers.fc(input=pre_ids_emb,
+                               size=decoder_size * 3,
+                               bias_attr=False)
+        decoder_inputs = fc_1 + fc_2
+        current_state, _, _ = fluid.layers.gru_unit(
+            input=decoder_inputs,
+            hidden=pre_state_expanded,
+            size=decoder_size * 3)
+        current_state_with_lod = fluid.layers.lod_reset(
+            x=current_state, y=pre_score)
+        # use score to do beam search
+        current_score = fluid.layers.fc(input=current_state_with_lod,
+                                        size=num_classes + 2,
+                                        bias_attr=True,
+                                        act='softmax')
+        topk_scores, topk_indices = fluid.layers.topk(
+            current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = fluid.layers.elementwise_add(
+            x=fluid.layers.log(topk_scores),
+            y=fluid.layers.reshape(
+                pre_score, shape=[-1]),
+            axis=0)
+        selected_ids, selected_scores = fluid.layers.beam_search(
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            1,  # end_id
+            #level=0
+        )
+        fluid.layers.increment(x=counter, value=1, in_place=True)
+        # update the memories
+        fluid.layers.array_write(current_state, array=state_array, i=counter)
+        fluid.layers.array_write(selected_ids, array=ids_array, i=counter)
+        fluid.layers.array_write(selected_scores, array=scores_array, i=counter)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = fluid.layers.less_than(x=counter, y=array_len)
+        finish_cond = fluid.layers.logical_not(
+            fluid.layers.is_empty(x=selected_ids))
+        fluid.layers.logical_and(x=length_cond, y=finish_cond, out=cond)
+    ids, scores = fluid.layers.beam_search_decode(ids_array, scores_array,
+                                                  beam_size, eos)
+    return ids
+def attention_eval(data_shape, num_classes):
+    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    label_in = fluid.layers.data(
+        name='label_in', shape=[1], dtype='int32', lod_level=1)
+    label_out = fluid.layers.data(
+        name='label_out', shape=[1], dtype='int32', lod_level=1)
+    label_out = fluid.layers.cast(x=label_out, dtype='int64')
+    label_in = fluid.layers.cast(x=label_in, dtype='int64')
+    gru_backward, encoded_vector, encoded_proj = encoder_net(
+        images, is_test=True)
+    backward_first = fluid.layers.sequence_pool(
+        input=gru_backward, pool_type='first')
+    decoder_boot = fluid.layers.fc(input=backward_first,
+                                   size=decoder_size,
+                                   bias_attr=False,
+                                   act="relu")
+    trg_embedding = fluid.layers.embedding(
+        input=label_in,
+        size=[num_classes + 2, word_vector_dim],
+        dtype='float32')
+    prediction = gru_decoder_with_attention(trg_embedding, encoded_vector,
+                                            encoded_proj, decoder_boot,
+                                            decoder_size, num_classes)
+    _, maxid = fluid.layers.topk(input=prediction, k=1)
+    error_evaluator = fluid.evaluator.EditDistance(
+        input=maxid, label=label_out, ignored_tokens=[sos, eos])
+    cost = fluid.layers.cross_entropy(input=prediction, label=label_out)
+    sum_cost = fluid.layers.reduce_sum(cost)
+    return error_evaluator, sum_cost
--- a/fluid/ocr_recognition/crnn_ctc_model.py
+++ b/fluid/ocr_recognition/crnn_ctc_model.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import paddle.fluid as fluid
 from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
 from paddle.fluid.initializer import init_on_cpu
 import math
+import six
 def conv_bn_pool(input,
@@ -15,7 +19,7 @@ def conv_bn_pool(input,
                 pooling=True,
                 use_cudnn=False):
    tmp = input
-    for i in xrange(group):
+    for i in six.moves.xrange(group):
        tmp = fluid.layers.conv2d(
            input=tmp,
            num_filters=out_ch[i],
@@ -166,13 +170,16 @@ def encoder_net(images,
    return fc_out
-def ctc_train_net(images, label, args, num_classes):
+def ctc_train_net(args, data_shape, num_classes):
    L2_RATE = 0.0004
    LR = 1.0e-3
    MOMENTUM = 0.9
    learning_rate_decay = None
    regularizer = fluid.regularizer.L2Decay(L2_RATE)
+    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    label = fluid.layers.data(
+        name='label', shape=[1], dtype='int32', lod_level=1)
    fc_out = encoder_net(
        images,
        num_classes,
@@ -189,7 +196,7 @@ def ctc_train_net(images, label, args, num_classes):
    inference_program = fluid.default_main_program().clone(for_test=True)
    if learning_rate_decay == "piecewise_decay":
        learning_rate = fluid.layers.piecewise_decay([
-            args.total_step / 4, args.total_step / 2, args.total_step * 3 / 4
+            args.total_step // 4, args.total_step // 2, args.total_step * 3 // 4
        ], [LR, LR * 0.1, LR * 0.01, LR * 0.001])
    else:
        learning_rate = LR
@@ -211,7 +218,10 @@ def ctc_infer(images, num_classes, use_cudnn):
    return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)
-def ctc_eval(images, label, num_classes, use_cudnn):
+def ctc_eval(data_shape, num_classes, use_cudnn):
+    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    label = fluid.layers.data(
+        name='label', shape=[1], dtype='int32', lod_level=1)
    fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
    decoded_out = fluid.layers.ctc_greedy_decoder(
        input=fc_out, blank=num_classes)

--- a/fluid/ocr_recognition/ctc_reader.py
+++ b/fluid/ocr_recognition/ctc_reader.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import os
 import cv2
 import tarfile
 import numpy as np
 from PIL import Image
 from os import path
-from paddle.v2.image import load_image
+from paddle.dataset.image import load_image
-import paddle.v2 as paddle
+import paddle
+SOS = 0
+EOS = 1
 NUM_CLASSES = 95
 DATA_SHAPE = [1, 48, 512]
@@ -22,8 +27,8 @@ TEST_LIST_FILE_NAME = "test.list"
 class DataGenerator(object):
-    def __init__(self):
+    def __init__(self, model="crnn_ctc"):
-        pass
+        self.model = model
    def train_reader(self,
                     img_root_dir,
@@ -65,11 +70,11 @@ class DataGenerator(object):
                batchsize
            ) + "; i++) print $(4*i+1)\" \"$(4*i+2)\" \"$(4*i+3)\" \"$(4*i+4);}}' > " + to_file
        os.system(cmd)
-        print "finish batch shuffle"
+        print("finish batch shuffle")
        img_label_lines = open(to_file, 'r').readlines()
        def reader():
-            sizes = len(img_label_lines) / batchsize
+            sizes = len(img_label_lines) // batchsize
            if sizes == 0:
                raise ValueError('Batch size is bigger than the dataset size.')
            while True:
@@ -89,7 +94,10 @@ class DataGenerator(object):
                        img = img.resize((sz[0], sz[1]))
                        img = np.array(img) - 127.5
                        img = img[np.newaxis, ...]
-                        result.append([img, label])
+                        if self.model == "crnn_ctc":
+                            result.append([img, label])
+                        else:
+                            result.append([img, [SOS] + label, label + [EOS]])
                    yield result
                if not cycle:
                    break
@@ -117,7 +125,10 @@ class DataGenerator(object):
                    'L')
                img = np.array(img) - 127.5
                img = img[np.newaxis, ...]
-                yield img, label
+                if self.model == "crnn_ctc":
+                    yield img, label
+                else:
+                    yield img, [SOS] + label, label + [EOS]
        return reader
@@ -185,8 +196,12 @@ def data_shape():
    return DATA_SHAPE
-def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False):
+def train(batch_size,
-    generator = DataGenerator()
+          train_images_dir=None,
+          train_list_file=None,
+          cycle=False,
+          model="crnn_ctc"):
+    generator = DataGenerator(model)
    if train_images_dir is None:
        data_dir = download_data()
        train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME)
@@ -199,8 +214,11 @@ def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False):
        train_images_dir, train_list_file, batch_size, cycle, shuffle=shuffle)
-def test(batch_size=1, test_images_dir=None, test_list_file=None):
+def test(batch_size=1,
-    generator = DataGenerator()
+         test_images_dir=None,
+         test_list_file=None,
+         model="crnn_ctc"):
+    generator = DataGenerator(model)
    if test_images_dir is None:
        data_dir = download_data()
        test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME)
@@ -213,8 +231,9 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None):
 def inference(batch_size=1,
              infer_images_dir=None,
              infer_list_file=None,
-              cycle=False):
+              cycle=False,
-    generator = DataGenerator()
+              model="crnn_ctc"):
+    generator = DataGenerator(model)
    return paddle.batch(
        generator.infer_reader(infer_images_dir, infer_list_file, cycle),
        batch_size)

--- a/fluid/ocr_recognition/eval.py
+++ b/fluid/ocr_recognition/eval.py
 import paddle.v2 as paddle
 import paddle.fluid as fluid
-from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
+from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data
-from crnn_ctc_model import ctc_infer
+from attention_model import attention_eval
 from crnn_ctc_model import ctc_eval
-import ctc_reader
+import data_reader
 import argparse
 import functools
 import os
@@ -11,27 +11,34 @@ import os
 parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
 # yapf: disable
-add_arg('model_path',         str,  None,   "The model path to be used for inference.")
+add_arg('model',    str,   "crnn_ctc",           "Which type of network to be used. 'crnn_ctc' or 'attention'")
+add_arg('model_path',         str,  "",   "The model path to be used for inference.")
 add_arg('input_images_dir',   str,  None,   "The directory of images.")
 add_arg('input_images_list',  str,  None,   "The list file of images.")
 add_arg('use_gpu',            bool,  True,      "Whether use GPU to eval.")
 # yapf: enable
-def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
+def evaluate(args):
    """OCR inference"""
+    if args.model == "crnn_ctc":
+        eval = ctc_eval
+        get_feeder_data = get_ctc_feeder_data
+    else:
+        eval = attention_eval
+        get_feeder_data = get_attention_feeder_data
    num_classes = data_reader.num_classes()
    data_shape = data_reader.data_shape()
    # define network
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    evaluator, cost = eval(data_shape, num_classes)
-    label = fluid.layers.data(
-        name='label', shape=[1], dtype='int32', lod_level=1)
-    evaluator, cost = eval(images, label, num_classes)
    # data reader
    test_reader = data_reader.test(
        test_images_dir=args.input_images_dir,
-        test_list_file=args.input_images_list)
+        test_list_file=args.input_images_list,
+        model=args.model)
    # prepare environment
    place = fluid.CPUPlace()
@@ -48,7 +55,7 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
        model_dir = os.path.dirname(args.model_path)
        model_file_name = os.path.basename(args.model_path)
    fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
-    print "Init model from: %s." % args.model_path
+    print("Init model from: %s." % args.model_path)
    evaluator.reset(exe)
    count = 0
@@ -56,14 +63,14 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
        count += 1
        exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place))
    avg_distance, avg_seq_error = evaluator.eval(exe)
-    print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
+    print("Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
-        count, avg_distance, avg_seq_error)
+        count, avg_distance, avg_seq_error))
 def main():
    args = parser.parse_args()
    print_arguments(args)
-    evaluate(args, data_reader=ctc_reader)
+    evaluate(args)
 if __name__ == "__main__":

--- a/fluid/ocr_recognition/images/train_attention.jpg
+++ b/fluid/ocr_recognition/images/train_attention.jpg
--- a/fluid/ocr_recognition/infer.py
+++ b/fluid/ocr_recognition/infer.py
+from __future__ import print_function
 import paddle.v2 as paddle
 import paddle.fluid as fluid
+from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_for_infer
 import paddle.fluid.profiler as profiler
-from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
 from crnn_ctc_model import ctc_infer
+from attention_model import attention_infer
 import numpy as np
-import ctc_reader
+import data_reader
 import argparse
 import functools
 import os
@@ -13,6 +15,7 @@ import time
 parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
 # yapf: disable
+add_arg('model',    str,   "crnn_ctc",           "Which type of network to be used. 'crnn_ctc' or 'attention'")
 add_arg('model_path',         str,  None,   "The model path to be used for inference.")
 add_arg('input_images_dir',   str,  None,   "The directory of images.")
 add_arg('input_images_list',  str,  None,   "The list file of images.")
@@ -25,20 +28,28 @@ add_arg('batch_size',         int,  1,      "The minibatch size.")
 # yapf: enable
-def inference(args, infer=ctc_infer, data_reader=ctc_reader):
+def inference(args):
    """OCR inference"""
+    if args.model == "crnn_ctc":
+        infer = ctc_infer
+        get_feeder_data = get_ctc_feeder_data
+    else:
+        infer = attention_infer
+        get_feeder_data = get_attention_feeder_for_infer
+    eos = 1
+    sos = 0
    num_classes = data_reader.num_classes()
    data_shape = data_reader.data_shape()
    # define network
    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
-    sequence = infer(
+    ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False)
-        images, num_classes, use_cudnn=True if args.use_gpu else False)
    # data reader
    infer_reader = data_reader.inference(
        batch_size=args.batch_size,
        infer_images_dir=args.input_images_dir,
        infer_list_file=args.input_images_list,
-        cycle=True if args.iterations > 0 else False)
+        cycle=True if args.iterations > 0 else False,
+        model=args.model)
    # prepare environment
    place = fluid.CPUPlace()
    if args.use_gpu:
@@ -54,7 +65,7 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
        with open(args.dict) as dict_file:
            for i, word in enumerate(dict_file):
                dict_map[i] = word.strip()
-        print "Loaded dict from %s" % args.dict
+        print("Loaded dict from %s" % args.dict)
    # load init model
    model_dir = args.model_path
@@ -63,11 +74,12 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
        model_dir = os.path.dirname(args.model_path)
        model_file_name = os.path.basename(args.model_path)
    fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
-    print "Init model from: %s." % args.model_path
+    print("Init model from: %s." % args.model_path)
    batch_times = []
    iters = 0
    for data in infer_reader():
+        feed_dict = get_feeder_data(data, place)
        if args.iterations > 0 and iters == args.iterations + args.skip_batch_num:
            break
        if iters < args.skip_batch_num:
@@ -77,26 +89,25 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
        start = time.time()
        result = exe.run(fluid.default_main_program(),
-                         feed=get_feeder_data(
+                         feed=feed_dict,
-                             data, place, need_label=False),
+                         fetch_list=[ids],
-                         fetch_list=[sequence],
                         return_numpy=False)
+        indexes = prune(np.array(result[0]).flatten(), 0, 1)
        batch_time = time.time() - start
        fps = args.batch_size / batch_time
        batch_times.append(batch_time)
-        indexes = np.array(result[0]).flatten()
        if dict_map is not None:
-            print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
+            print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
                iters,
                batch_time,
                fps,
-                [dict_map[index] for index in indexes], )
+                [dict_map[index] for index in indexes], ))
        else:
-            print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
+            print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
                iters,
                batch_time,
                fps,
-                indexes, )
+                indexes, ))
        iters += 1
@@ -114,18 +125,29 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
    print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99))
+def prune(words, sos, eos):
+    """Remove unused tokens in prediction result."""
+    start_index = 0
+    end_index = len(words)
+    if sos in words:
+        start_index = np.where(words == sos)[0][0] + 1
+    if eos in words:
+        end_index = np.where(words == eos)[0][0]
+    return words[start_index:end_index]
 def main():
    args = parser.parse_args()
    print_arguments(args)
    if args.profile:
        if args.use_gpu:
            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
-                inference(args, data_reader=ctc_reader)
+                inference(args)
        else:
            with profiler.profiler("CPU", sorted_key='total') as cpuprof:
-                inference(args, data_reader=ctc_reader)
+                inference(args)
    else:
-        inference(args, data_reader=ctc_reader)
+        inference(args)
 if __name__ == "__main__":

--- a/fluid/ocr_recognition/ctc_train.py
+++ b/fluid/ocr_recognition/ctc_train.py
-"""Trainer for OCR CTC model."""
+"""Trainer for OCR CTC or attention model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import paddle.fluid as fluid
+from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data
 import paddle.fluid.profiler as profiler
-from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
 from crnn_ctc_model import ctc_train_net
-import ctc_reader
+from attention_model import attention_train_net
+import data_reader
 import argparse
 import functools
 import sys
@@ -20,6 +24,7 @@ add_arg('log_period',        int,   1000,       "Log period.")
 add_arg('save_model_period', int,   15000,      "Save model period. '-1' means never saving the model.")
 add_arg('eval_period',       int,   15000,      "Evaluate period. '-1' means never evaluating the model.")
 add_arg('save_model_dir',    str,   "./models", "The directory the model to be saved to.")
+add_arg('model',    str,   "crnn_ctc",           "Which type of network to be used. 'crnn_ctc' or 'attention'")
 add_arg('init_model',        str,   None,       "The init model file of directory.")
 add_arg('use_gpu',           bool,  True,      "Whether use GPU to train.")
 add_arg('min_average_window',int,   10000,     "Min average window.")
@@ -32,8 +37,16 @@ add_arg('skip_test',         bool,  False,      "Whether to skip test phase.")
 # yapf: enable
-def train(args, data_reader=ctc_reader):
+def train(args):
-    """OCR CTC training"""
+    """OCR training"""
+    if args.model == "crnn_ctc":
+        train_net = ctc_train_net
+        get_feeder_data = get_ctc_feeder_data
+    else:
+        train_net = attention_train_net
+        get_feeder_data = get_attention_feeder_data
    num_classes = None
    train_images = None
    train_list = None
@@ -43,20 +56,18 @@ def train(args, data_reader=ctc_reader):
    ) if num_classes is None else num_classes
    data_shape = data_reader.data_shape()
    # define network
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    sum_cost, error_evaluator, inference_program, model_average = train_net(
-    label = fluid.layers.data(
+        args, data_shape, num_classes)
-        name='label', shape=[1], dtype='int32', lod_level=1)
-    sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(
-        images, label, args, num_classes)
    # data reader
    train_reader = data_reader.train(
        args.batch_size,
        train_images_dir=train_images,
        train_list_file=train_list,
-        cycle=args.total_step > 0)
+        cycle=args.total_step > 0,
+        model=args.model)
    test_reader = data_reader.test(
-        test_images_dir=test_images, test_list_file=test_list)
+        test_images_dir=test_images, test_list_file=test_list, model=args.model)
    # prepare environment
    place = fluid.CPUPlace()
@@ -77,7 +88,7 @@ def train(args, data_reader=ctc_reader):
            model_dir = os.path.dirname(args.init_model)
            model_file_name = os.path.basename(args.init_model)
        fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
-        print "Init model from: %s." % args.init_model
+        print("Init model from: %s." % args.init_model)
    train_exe = exe
    error_evaluator.reset(exe)
@@ -104,18 +115,18 @@ def train(args, data_reader=ctc_reader):
        for data in test_reader():
            exe.run(inference_program, feed=get_feeder_data(data, place))
        _, test_seq_error = error_evaluator.eval(exe)
-        print "\nTime: %s; Iter[%d]; Test seq error: %s.\n" % (
+        print("\nTime: %s; Iter[%d]; Test seq error: %s.\n" % (
-            time.time(), iter_num, str(test_seq_error[0]))
+            time.time(), iter_num, str(test_seq_error[0])))
        #Note: The following logs are special for CE monitoring.
        #Other situations do not need to care about these logs.
-        print "kpis	test_acc	%f" % (1 - test_seq_error[0])
+        print("kpis	test_acc	%f" % (1 - test_seq_error[0]))
    def save_model(args, exe, iter_num):
        filename = "model_%05d" % iter_num
        fluid.io.save_params(
            exe, dirname=args.save_model_dir, filename=filename)
-        print "Saved model to: %s/%s." % (args.save_model_dir, filename)
+        print("Saved model to: %s/%s." % (args.save_model_dir, filename))
    iter_num = 0
    stop = False
@@ -144,18 +155,18 @@ def train(args, data_reader=ctc_reader):
            iter_num += 1
            # training log
            if iter_num % args.log_period == 0:
-                print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % (
+                print("\nTime: %s; Iter[%d]; Avg loss: %.3f; Avg seq err: %.3f" % (
                    time.time(), iter_num,
                    total_loss / (args.log_period * args.batch_size),
-                    total_seq_error / (args.log_period * args.batch_size))
+                    total_seq_error / (args.log_period * args.batch_size)))
-                print "kpis	train_cost	%f" % (total_loss / (args.log_period *
+                print("kpis	train_cost	%f" % (total_loss / (args.log_period *
-                                                            args.batch_size))
+                                                            args.batch_size)))
-                print "kpis	train_acc	%f" % (
+                print("kpis	train_acc	%f" % (
-                    1 - total_seq_error / (args.log_period * args.batch_size))
+                    1 - total_seq_error / (args.log_period * args.batch_size)))
                total_loss = 0.0
                total_seq_error = 0.0
-# evaluate
+            # evaluate
            if not args.skip_test and iter_num % args.eval_period == 0:
                if model_average:
                    with model_average.apply(exe):
@@ -171,7 +182,7 @@ def train(args, data_reader=ctc_reader):
                else:
                    save_model(args, exe, iter_num)
        end_time = time.time()
-        print "kpis	train_duration	%f" % (end_time - start_time)
+        print("kpis	train_duration	%f" % (end_time - start_time))
        # Postprocess benchmark data
        latencies = batch_times[args.skip_batch_num:]
        latency_avg = np.average(latencies)
@@ -195,12 +206,12 @@ def main():
    if args.profile:
        if args.use_gpu:
            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
-                train(args, data_reader=ctc_reader)
+                train(args)
        else:
            with profiler.profiler("CPU", sorted_key='total') as cpuprof:
-                train(args, data_reader=ctc_reader)
+                train(args)
    else:
-        train(args, data_reader=ctc_reader)
+        train(args)
 if __name__ == "__main__":

--- a/fluid/ocr_recognition/utility.py
+++ b/fluid/ocr_recognition/utility.py
@@ -19,6 +19,8 @@ from __future__ import print_function
 import distutils.util
 import numpy as np
 from paddle.fluid import core
+import paddle.fluid as fluid
+import six
 def print_arguments(args):
@@ -37,7 +39,7 @@ def print_arguments(args):
    :type args: argparse.Namespace
    """
    print("-----------  Configuration Arguments -----------")
-    for arg, value in sorted(vars(args).iteritems()):
+    for arg, value in sorted(six.iteritems(vars(args))):
        print("%s: %s" % (arg, value))
    print("------------------------------------------------")
@@ -77,14 +79,58 @@ def to_lodtensor(data, place):
    return res
-def get_feeder_data(data, place, need_label=True):
+def get_ctc_feeder_data(data, place, need_label=True):
    pixel_tensor = core.LoDTensor()
    pixel_data = None
    pixel_data = np.concatenate(
-        map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32")
+        list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
    pixel_tensor.set(pixel_data, place)
-    label_tensor = to_lodtensor(map(lambda x: x[1], data), place)
+    label_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place)
    if need_label:
        return {"pixel": pixel_tensor, "label": label_tensor}
    else:
        return {"pixel": pixel_tensor}
+def get_attention_feeder_data(data, place, need_label=True):
+    pixel_tensor = core.LoDTensor()
+    pixel_data = None
+    pixel_data = np.concatenate(
+        list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
+    pixel_tensor.set(pixel_data, place)
+    label_in_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place)
+    label_out_tensor = to_lodtensor(list(map(lambda x: x[2], data)), place)
+    if need_label:
+        return {
+            "pixel": pixel_tensor,
+            "label_in": label_in_tensor,
+            "label_out": label_out_tensor
+        }
+    else:
+        return {"pixel": pixel_tensor}
+def get_attention_feeder_for_infer(data, place):
+    batch_size = len(data)
+    init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64')
+    init_scores_data = np.array(
+        [1. for _ in range(batch_size)], dtype='float32')
+    init_ids_data = init_ids_data.reshape((batch_size, 1))
+    init_scores_data = init_scores_data.reshape((batch_size, 1))
+    init_recursive_seq_lens = [1] * batch_size
+    init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens]
+    init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens,
+                                       place)
+    init_scores = fluid.create_lod_tensor(init_scores_data,
+                                          init_recursive_seq_lens, place)
+    pixel_tensor = core.LoDTensor()
+    pixel_data = None
+    pixel_data = np.concatenate(
+        list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
+    pixel_tensor.set(pixel_data, place)
+    return {
+        "pixel": pixel_tensor,
+        "init_ids": init_ids,
+        "init_scores": init_scores
+    }
--- a/fluid/sequence_tagging_for_ner/_ce.py
+++ b/fluid/sequence_tagging_for_ner/_ce.py
@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
 #### NOTE kpi.py should shared in models in some way!!!!
-train_acc_kpi = AccKpi('train_precision', 0.005, actived=True)
+train_acc_kpi = AccKpi('train_precision', 0.005, actived=False)
-test_acc_kpi = CostKpi('test_precision', 0.005, actived=True)
+test_acc_kpi = CostKpi('test_precision', 0.005, actived=False)
 train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
 tracking_kpis = [

--- a/fluid/text_classification/_ce.py
+++ b/fluid/text_classification/_ce.py
@@ -8,7 +8,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
 #### NOTE kpi.py should shared in models in some way!!!!
 train_acc_kpi = AccKpi('train_acc', 0.005, actived=True)
-train_cost_kpi = CostKpi('train_cost', 0.005, actived=True)
+train_cost_kpi = CostKpi('train_cost', 0.005, actived=False)
 train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
 tracking_kpis = [

--- a/fluid/text_classification/clouds/scdb_parallel_executor.py
+++ b/fluid/text_classification/clouds/scdb_parallel_executor.py
@@ -3,6 +3,7 @@ import contextlib
 import paddle
 import paddle.fluid as fluid
 import numpy as np
+import six
 import sys
 import time
 import os
@@ -46,8 +47,8 @@ def data2tensor(data, place):
    """
    data2tensor
    """
-    input_seq = to_lodtensor(map(lambda x: x[0], data), place)
+    input_seq = to_lodtensor([x[0] for x in data], place)
-    y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+    y_data = np.array([x[1] for x in data]).astype("int64")
    y_data = y_data.reshape([-1, 1])
    return {"words": input_seq, "label": y_data}
@@ -56,8 +57,8 @@ def data2pred(data, place):
    """
    data2tensor
    """
-    input_seq = to_lodtensor(map(lambda x: x[0], data), place)
+    input_seq = to_lodtensor([x[0] for x in data], place)
-    y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+    y_data = np.array([x[1] for x in data]).astype("int64")
    y_data = y_data.reshape([-1, 1])
    return {"words": input_seq}
@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
    Save dict into file
    """
    with open(vocab, "w") as fout:
-        for k, v in word_dict.iteritems():
+        for k, v in six.iteritems(word_dict):
            outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
            fout.write(outstr)
@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
 def scdb_test_data(test_file, w_dict):
    """
-    test_set=["car", "lbs", "spot", "weibo", 
+    test_set=["car", "lbs", "spot", "weibo",
            "baby", "toutiao", "3c", "movie", "haogan"]
    """
    return data_reader(test_file, w_dict)
@@ -424,7 +425,7 @@ def start_train(train_reader,
    start_exe.run(fluid.default_startup_program())
    exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name)
-    for pass_id in xrange(pass_num):
+    for pass_id in six.moves.xrange(pass_num):
        total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0
        for data in train_reader():
            cost_val, acc_val = exe.run(feed=feeder.feed(data),
@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab",
    """
    w_dict = scdb_word_dict(vocab=vocab)
    test_files = [ "./thirdparty" + os.sep + f for f in test_list]
    train_reader = paddle.batch(
                        scdb_train_data(train_dir, w_dict),
                        batch_size = 256)

--- a/fluid/text_classification/clouds/scdb_single_card.py
+++ b/fluid/text_classification/clouds/scdb_single_card.py
@@ -3,6 +3,7 @@ import contextlib
 import paddle
 import paddle.fluid as fluid
 import numpy as np
+import six
 import sys
 import time
 import os
@@ -46,8 +47,8 @@ def data2tensor(data, place):
    """
    data2tensor
    """
-    input_seq = to_lodtensor(map(lambda x: x[0], data), place)
+    input_seq = to_lodtensor([x[0] for x in data], place)
-    y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+    y_data = np.array([x[1] for x in data]).astype("int64")
    y_data = y_data.reshape([-1, 1])
    return {"words": input_seq, "label": y_data}
@@ -56,8 +57,8 @@ def data2pred(data, place):
    """
    data2tensor
    """
-    input_seq = to_lodtensor(map(lambda x: x[0], data), place)
+    input_seq = to_lodtensor([x[0] for x in data], place)
-    y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+    y_data = np.array([x[1] for x in data]).astype("int64")
    y_data = y_data.reshape([-1, 1])
    return {"words": input_seq}
@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
    Save dict into file
    """
    with open(vocab, "w") as fout:
-        for k, v in word_dict.iteritems():
+        for k, v in six.iteritems(word_dict):
            outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
            fout.write(outstr)
@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
 def scdb_test_data(test_file, w_dict):
    """
-    test_set=["car", "lbs", "spot", "weibo", 
+    test_set=["car", "lbs", "spot", "weibo",
            "baby", "toutiao", "3c", "movie", "haogan"]
    """
    return data_reader(test_file, w_dict)
@@ -422,7 +423,7 @@ def start_train(train_reader,
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
    exe.run(fluid.default_startup_program())
-    for pass_id in xrange(pass_num):
+    for pass_id in six.moves.xrange(pass_num):
        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
        for data in train_reader():
            avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),

--- a/fluid/text_classification/train.py
+++ b/fluid/text_classification/train.py
 import os
+import six
 import sys
 import time
 import unittest
@@ -58,7 +59,7 @@ def train(train_reader,
    if "CE_MODE_X" in os.environ:
        fluid.default_startup_program().random_seed = 110
    exe.run(fluid.default_startup_program())
-    for pass_id in xrange(pass_num):
+    for pass_id in six.moves.xrange(pass_num):
        pass_start = time.time()
        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
        for data in train_reader():

--- a/fluid/text_classification/utils.py
+++ b/fluid/text_classification/utils.py
@@ -43,8 +43,8 @@ def data2tensor(data, place):
    """
    data2tensor
    """
-    input_seq = to_lodtensor(map(lambda x: x[0], data), place)
+    input_seq = to_lodtensor([x[0] for x in data], place)
-    y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+    y_data = np.array([x[1] for x in data]).astype("int64")
    y_data = y_data.reshape([-1, 1])
    return {"words": input_seq, "label": y_data}