Merge branch 'develop' of https://github.com/PaddlePaddle/models into fix_params_grad

26ba6680 · wanghaoshuang · 87512849 · cc992072 · 26ba6680 · 26ba6680
93 changed file
--- a/.gitignore
+++ b/.gitignore
 .DS_Store
 *.pyc
+.*~
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,7 +17,7 @@ addons:
      - python-pip
      - python2.7-dev
      - clang-format-3.8
-  ssh_known_hosts: 52.76.173.135
+  ssh_known_hosts: 13.229.163.131
 before_install:
  - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
  - sudo pip install -U virtualenv pre-commit pip

--- a/fluid/DeepASR/data_utils/async_data_reader.py
+++ b/fluid/DeepASR/data_utils/async_data_reader.py
@@ -15,9 +15,7 @@ from multiprocessing import Manager, Process
 import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
 import data_utils.augmentor.trans_add_delta as trans_add_delta
 from data_utils.util import suppress_complaints, suppress_signal
-from data_utils.util import SharedNDArray, SharedMemoryPoolManager
+from data_utils.util import CriticalException, ForceExitWrapper
-from data_utils.util import DaemonProcessGroup, batch_to_ndarray
-from data_utils.util import CriticalException, ForceExitWrapper, EpochEndSignal
 class SampleInfo(object):
@@ -32,11 +30,12 @@ class SampleInfo(object):
        label_bin_path (str): File containing the label data.
        label_size (int): Byte count of the sample's label data.
        label_frame_num (int): Label number of the sample.
+        sample_name (str): Key of the sample
    """
    def __init__(self, feature_bin_path, feature_start, feature_size,
                 feature_frame_num, feature_dim, label_bin_path, label_start,
-                 label_size, label_frame_num):
+                 label_size, label_frame_num, sample_name):
        self.feature_bin_path = feature_bin_path
        self.feature_start = feature_start
        self.feature_size = feature_size
@@ -47,6 +46,7 @@ class SampleInfo(object):
        self.label_start = label_start
        self.label_size = label_size
        self.label_frame_num = label_frame_num
+        self.sample_name = sample_name
 class SampleInfoBucket(object):
@@ -69,8 +69,8 @@ class SampleInfoBucket(object):
        split_sentence_threshold(int): Sentence whose length larger than
                                the value will trigger split operation.
        split_sub_sentence_len(int): sub-sentence length is equal to
-                                    (split_sub_sentence_len + \
+                                    (split_sub_sentence_len
-                                     rand() % split_perturb).
+                                     + rand() % split_perturb).
    """
    def __init__(self,
@@ -104,24 +104,33 @@ class SampleInfoBucket(object):
            feature_bin_path = self._feature_bin_paths[block_idx]
            feature_desc_path = self._feature_desc_paths[block_idx]
-            label_desc_lines = open(label_desc_path).readlines()
            feature_desc_lines = open(feature_desc_path).readlines()
-            sample_num = int(label_desc_lines[0].split()[1])
+            label_desc_lines = []
-            assert sample_num == int(feature_desc_lines[0].split()[1])
+            if label_desc_path != "":
+                label_desc_lines = open(label_desc_path).readlines()
+            sample_num = int(feature_desc_lines[0].split()[1])
+            if label_desc_path != "":
+                assert sample_num == int(label_desc_lines[0].split()[1])
            for i in xrange(sample_num):
                feature_desc_split = feature_desc_lines[i + 1].split()
+                sample_name = feature_desc_split[0]
                feature_start = int(feature_desc_split[2])
                feature_size = int(feature_desc_split[3])
                feature_frame_num = int(feature_desc_split[4])
                feature_dim = int(feature_desc_split[5])
-                label_desc_split = label_desc_lines[i + 1].split()
+                label_start = -1
-                label_start = int(label_desc_split[2])
+                label_size = -1
-                label_size = int(label_desc_split[3])
+                label_frame_num = feature_frame_num
-                label_frame_num = int(label_desc_split[4])
+                if label_desc_path != "":
-                assert feature_frame_num == label_frame_num
+                    label_desc_split = label_desc_lines[i + 1].split()
+                    label_start = int(label_desc_split[2])
+                    label_size = int(label_desc_split[3])
+                    label_frame_num = int(label_desc_split[4])
+                    assert feature_frame_num == label_frame_num
                if self._split_sentence_threshold == -1 or \
                        self._split_perturb == -1 or \
@@ -131,7 +140,7 @@ class SampleInfoBucket(object):
                        SampleInfo(feature_bin_path, feature_start,
                                   feature_size, feature_frame_num, feature_dim,
                                   label_bin_path, label_start, label_size,
-                                   label_frame_num))
+                                   label_frame_num, sample_name))
                #split sentence
                else:
                    cur_frame_pos = 0
@@ -152,16 +161,19 @@ class SampleInfoBucket(object):
                                * feature_dim * 4, cur_frame_len * feature_dim *
                                4, cur_frame_len, feature_dim, label_bin_path,
                                label_start + cur_frame_pos * 4, cur_frame_len *
-                                4, cur_frame_len))
+                                4, cur_frame_len, sample_name))
                        remain_frame_num -= cur_frame_len
                        cur_frame_pos += cur_frame_len
                        if remain_frame_num <= 0:
                            break
        return sample_info_list
+class EpochEndSignal():
+    pass
 class AsyncDataReader(object):
    """DataReader provides basic audio sample preprocessing pipeline including
    data loading and data augmentation.
@@ -190,7 +202,7 @@ class AsyncDataReader(object):
    def __init__(self,
                 feature_file_list,
-                 label_file_list,
+                 label_file_list="",
                 drop_frame_len=512,
                 proc_num=10,
                 sample_buffer_size=1024,
@@ -213,25 +225,30 @@ class AsyncDataReader(object):
        self._sample_info_buffer_size = sample_info_buffer_size
        self._batch_buffer_size = batch_buffer_size
        self._proc_num = proc_num
-        if self._proc_num <= 2:
-            raise ValueError("Value of `proc_num` should be greater than 2.")
-        self._sample_proc_num = self._proc_num - 2
        self._verbose = verbose
        self._force_exit = ForceExitWrapper(self._manager.Value('b', False))
    def generate_bucket_list(self, is_shuffle):
        if self._block_info_list is None:
            block_feature_info_lines = open(self._feature_file_list).readlines()
-            block_label_info_lines = open(self._label_file_list).readlines()
-            assert len(block_feature_info_lines) == len(block_label_info_lines)
            self._block_info_list = []
-            for i in xrange(0, len(block_feature_info_lines), 2):
+            if self._label_file_list != "":
-                block_info = (block_feature_info_lines[i],
+                block_label_info_lines = open(self._label_file_list).readlines()
-                              block_feature_info_lines[i + 1],
+                assert len(block_feature_info_lines) == len(
-                              block_label_info_lines[i],
+                    block_label_info_lines)
-                              block_label_info_lines[i + 1])
+                for i in xrange(0, len(block_feature_info_lines), 2):
-                self._block_info_list.append(
+                    block_info = (block_feature_info_lines[i],
-                    map(lambda line: line.strip(), block_info))
+                                  block_feature_info_lines[i + 1],
+                                  block_label_info_lines[i],
+                                  block_label_info_lines[i + 1])
+                    self._block_info_list.append(
+                        map(lambda line: line.strip(), block_info))
+            else:
+                for i in xrange(0, len(block_feature_info_lines), 2):
+                    block_info = (block_feature_info_lines[i],
+                                  block_feature_info_lines[i + 1], "", "")
+                    self._block_info_list.append(
+                        map(lambda line: line.strip(), block_info))
        if is_shuffle:
            self._rng.shuffle(self._block_info_list)
@@ -251,23 +268,13 @@ class AsyncDataReader(object):
    def set_transformers(self, transformers):
        self._transformers = transformers
-    def recycle(self, *args):
+    def _sample_generator(self):
-        for shared_ndarray in args:
-            if not isinstance(shared_ndarray, SharedNDArray):
-                raise Value("Only support recycle SharedNDArray object.")
-            shared_ndarray.recycle(self._pool_manager.pool)
-    def _start_async_processing(self):
        sample_info_queue = self._manager.Queue(self._sample_info_buffer_size)
        sample_queue = self._manager.Queue(self._sample_buffer_size)
        self._order_id = 0
        @suppress_complaints(verbose=self._verbose, notify=self._force_exit)
        def ordered_feeding_task(sample_info_queue):
-            if self._verbose == 0:
-                signal.signal(signal.SIGTERM, suppress_signal)
-                signal.signal(signal.SIGINT, suppress_signal)
            for sample_info_bucket in self._bucket_list:
                try:
                    sample_info_list = \
@@ -280,12 +287,13 @@ class AsyncDataReader(object):
                        sample_info_queue.put((sample_info, self._order_id))
                        self._order_id += 1
-            for i in xrange(self._sample_proc_num):
+            for i in xrange(self._proc_num):
                sample_info_queue.put(EpochEndSignal())
-        feeding_proc = DaemonProcessGroup(
+        feeding_thread = Thread(
-            proc_num=1, target=ordered_feeding_task, args=(sample_info_queue, ))
+            target=ordered_feeding_task, args=(sample_info_queue, ))
-        feeding_proc.start_all()
+        feeding_thread.daemon = True
+        feeding_thread.start()
        @suppress_complaints(verbose=self._verbose, notify=self._force_exit)
        def ordered_processing_task(sample_info_queue, sample_queue, out_order):
@@ -313,25 +321,32 @@ class AsyncDataReader(object):
                                           sample_info.feature_size)
                assert sample_info.feature_frame_num \
-                       * sample_info.feature_dim * 4 == len(feature_bytes), \
+                       * sample_info.feature_dim * 4 \
-                       (sample_info.feature_bin_path,
+                        == len(feature_bytes), \
-                        sample_info.feature_frame_num,
+                        (sample_info.feature_bin_path,
-                        sample_info.feature_dim,
+                         sample_info.feature_frame_num,
-                        len(feature_bytes))
+                         sample_info.feature_dim,
+                         len(feature_bytes))
-                label_bytes = read_bytes(sample_info.label_bin_path,
-                                         sample_info.label_start,
+                label_data = None
-                                         sample_info.label_size)
+                if sample_info.label_bin_path != "":
+                    label_bytes = read_bytes(sample_info.label_bin_path,
-                assert sample_info.label_frame_num * 4 == len(label_bytes), (
+                                             sample_info.label_start,
-                    sample_info.label_bin_path, sample_info.label_array,
+                                             sample_info.label_size)
-                    len(label_bytes))
+                    assert sample_info.label_frame_num * 4 == len(
-                label_array = struct.unpack('I' * sample_info.label_frame_num,
+                        label_bytes), (sample_info.label_bin_path,
-                                            label_bytes)
+                                       sample_info.label_array,
-                label_data = np.array(
+                                       len(label_bytes))
-                    label_array, dtype='int64').reshape(
-                        (sample_info.label_frame_num, 1))
+                    label_array = struct.unpack(
+                        'I' * sample_info.label_frame_num, label_bytes)
+                    label_data = np.array(
+                        label_array, dtype='int64').reshape(
+                            (sample_info.label_frame_num, 1))
+                else:
+                    label_data = np.zeros(
+                        (sample_info.label_frame_num, 1), dtype='int64')
                feature_frame_num = sample_info.feature_frame_num
                feature_dim = sample_info.feature_dim
@@ -341,12 +356,11 @@ class AsyncDataReader(object):
                feature_data = np.array(
                    feature_array, dtype='float32').reshape((
                        sample_info.feature_frame_num, sample_info.feature_dim))
+                sample_data = (feature_data, label_data,
-                sample_data = (feature_data, label_data)
+                               sample_info.sample_name)
                for transformer in self._transformers:
                    # @TODO(pkuyym) to make transfomer only accept feature_data
                    sample_data = transformer.perform_trans(sample_data)
                while order_id != out_order[0]:
                    time.sleep(0.001)
@@ -362,74 +376,77 @@ class AsyncDataReader(object):
        out_order = self._manager.list([0])
        args = (sample_info_queue, sample_queue, out_order)
-        sample_proc = DaemonProcessGroup(
+        workers = [
-            proc_num=self._sample_proc_num,
+            Process(
-            target=ordered_processing_task,
+                target=ordered_processing_task, args=args)
-            args=args)
+            for _ in xrange(self._proc_num)
-        sample_proc.start_all()
+        ]
-        return sample_queue
+        for w in workers:
+            w.daemon = True
+            w.start()
-    def batch_iterator(self, batch_size, minimum_batch_size):
+        finished_proc_num = 0
-        @suppress_complaints(verbose=self._verbose, notify=self._force_exit)
-        def batch_assembling_task(sample_queue, batch_queue, pool):
+        while self._force_exit == False:
-            def conv_to_shared(ndarray):
+            try:
-                while self._force_exit == False:
+                sample = sample_queue.get_nowait()
-                    try:
+            except Queue.Empty:
-                        (name, shared_ndarray) = pool.popitem()
+                time.sleep(0.001)
-                    except Exception as e:
+            else:
-                        time.sleep(0.001)
+                if isinstance(sample, EpochEndSignal):
+                    finished_proc_num += 1
+                    if finished_proc_num >= self._proc_num:
+                        break
                    else:
-                        shared_ndarray.copy(ndarray)
+                        continue
-                        return shared_ndarray
-            if self._verbose == 0:
+                yield sample
-                signal.signal(signal.SIGTERM, suppress_signal)
-                signal.signal(signal.SIGINT, suppress_signal)
+    def batch_iterator(self, batch_size, minimum_batch_size):
+        def batch_to_ndarray(batch_samples, lod):
+            assert len(batch_samples)
+            frame_dim = batch_samples[0][0].shape[1]
+            batch_feature = np.zeros((lod[-1], frame_dim), dtype="float32")
+            batch_label = np.zeros((lod[-1], 1), dtype="int64")
+            start = 0
+            name_lst = []
+            for sample in batch_samples:
+                frame_num = sample[0].shape[0]
+                batch_feature[start:start + frame_num, :] = sample[0]
+                batch_label[start:start + frame_num, :] = sample[1]
+                start += frame_num
+                name_lst.append(sample[2])
+            return (batch_feature, batch_label, name_lst)
+        @suppress_complaints(verbose=self._verbose, notify=self._force_exit)
+        def batch_assembling_task(sample_generator, batch_queue):
            batch_samples = []
            lod = [0]
-            done_num = 0
+            for sample in sample_generator():
-            while done_num < self._sample_proc_num:
+                batch_samples.append(sample)
-                sample = sample_queue.get()
+                lod.append(lod[-1] + sample[0].shape[0])
-                if isinstance(sample, EpochEndSignal):
+                if len(batch_samples) == batch_size:
-                    done_num += 1
+                    (batch_feature, batch_label, name_lst) = batch_to_ndarray(
-                else:
+                        batch_samples, lod)
-                    batch_samples.append(sample)
+                    batch_queue.put((batch_feature, batch_label, lod, name_lst))
-                    lod.append(lod[-1] + sample[0].shape[0])
+                    batch_samples = []
-                    if len(batch_samples) == batch_size:
+                    lod = [0]
-                        feature, label = batch_to_ndarray(batch_samples, lod)
-                        feature = conv_to_shared(feature)
-                        label = conv_to_shared(label)
-                        lod = conv_to_shared(np.array(lod).astype('int64'))
-                        batch_queue.put((feature, label, lod))
-                        batch_samples = []
-                        lod = [0]
            if len(batch_samples) >= minimum_batch_size:
-                (feature, label) = batch_to_ndarray(batch_samples, lod)
+                (batch_feature, batch_label, name_lst) = batch_to_ndarray(
+                    batch_samples, lod)
-                feature = conv_to_shared(feature)
+                batch_queue.put((batch_feature, batch_label, lod, name_lst))
-                label = conv_to_shared(label)
-                lod = conv_to_shared(np.array(lod).astype('int64'))
-                batch_queue.put((feature, label, lod))
            batch_queue.put(EpochEndSignal())
-        sample_queue = self._start_async_processing()
+        batch_queue = Queue.Queue(self._batch_buffer_size)
-        batch_queue = self._manager.Queue(self._batch_buffer_size)
-        self._pool_manager = SharedMemoryPoolManager(self._batch_buffer_size *
+        assembling_thread = Thread(
-                                                     3, self._manager)
-        assembling_proc = DaemonProcessGroup(
-            proc_num=1,
            target=batch_assembling_task,
-            args=(sample_queue, batch_queue, self._pool_manager.pool))
+            args=(self._sample_generator, batch_queue))
-        assembling_proc.start_all()
+        assembling_thread.daemon = True
+        assembling_thread.start()
        while self._force_exit == False:
            try:
@@ -440,6 +457,3 @@ class AsyncDataReader(object):
                if isinstance(batch_data, EpochEndSignal):
                    break
                yield batch_data
-        # clean the shared memory
-        del self._pool_manager
--- a/fluid/DeepASR/data_utils/augmentor/tests/test_data_trans.py
+++ b/fluid/DeepASR/data_utils/augmentor/tests/test_data_trans.py
@@ -22,7 +22,7 @@ class TestTransMeanVarianceNorm(unittest.TestCase):
        feature = np.zeros((2, 120), dtype="float32")
        feature.fill(1)
        trans = trans_mean_variance_norm.TransMeanVarianceNorm(self._file_path)
-        (feature1, label1) = trans.perform_trans((feature, None))
+        (feature1, label1, name) = trans.perform_trans((feature, None, None))
        (mean, var) = trans.get_mean_var()
        feature_flat1 = feature1.flatten()
        feature_flat = feature.flatten()
@@ -70,7 +70,7 @@ class TestTransAddDelta(unittest.TestCase):
        feature[2, 0:40].fill(3)
        feature[3, 0:40].fill(4)
        trans = trans_add_delta.TransAddDelta()
-        (feature, label) = trans.perform_trans((feature, None))
+        (feature, label, name) = trans.perform_trans((feature, None, None))
        self.assertAlmostEqual(feature.shape[0], 4)
        self.assertAlmostEqual(feature.shape[1], 120)
        self.assertAlmostEqual(1.0, feature[0][0])
@@ -93,7 +93,7 @@ class TestTransSplict(unittest.TestCase):
            feature[i, :].fill(i)
        trans = trans_splice.TransSplice()
-        (feature, label) = trans.perform_trans((feature, None))
+        (feature, label, name) = trans.perform_trans((feature, None, None))
        self.assertEqual(feature.shape[1], 110)
        for i in xrange(8):

--- a/fluid/DeepASR/data_utils/augmentor/trans_add_delta.py
+++ b/fluid/DeepASR/data_utils/augmentor/trans_add_delta.py
@@ -32,9 +32,9 @@ class TransAddDelta(object):
            Args: 
                sample(object,tuple): contain feature numpy and label numpy
            Returns:
-                (feature, label)
+                (feature, label, name)
        """
-        (feature, label) = sample
+        (feature, label, name) = sample
        frame_dim = feature.shape[1]
        d_frame_dim = frame_dim * 3
        head_filled = 5
@@ -64,7 +64,7 @@ class TransAddDelta(object):
                      start * d_frame_dim + 2 * frame_dim, frame_dim, nframe,
                      d_frame_dim)
        mat.shape = tmp_shape
-        return (mat[head_filled:mat.shape[0] - tail_filled, :], label)
+        return (mat[head_filled:mat.shape[0] - tail_filled, :], label, name)
    def _regress(self, data_in, start_in, data_out, start_out, size, n, step):
        """ regress

--- a/fluid/DeepASR/data_utils/augmentor/trans_mean_variance_norm.py
+++ b/fluid/DeepASR/data_utils/augmentor/trans_mean_variance_norm.py
@@ -53,9 +53,9 @@ class TransMeanVarianceNorm(object):
            Args:
                sample(object):input sample, contain feature numpy and label numpy
            Returns:
-                (feature, label)
+                (feature, label, name)
        """
-        (feature, label) = sample
+        (feature, label, name) = sample
        shape = feature.shape
        assert len(shape) == 2
        nfeature_len = shape[0] * shape[1]
@@ -68,4 +68,4 @@ class TransMeanVarianceNorm(object):
            feature[ncur_idx:ncur_idx + self._nLen] = block
            ncur_idx += self._nLen
        feature = feature.reshape(shape)
-        return (feature, label)
+        return (feature, label, name)
--- a/fluid/DeepASR/data_utils/augmentor/trans_splice.py
+++ b/fluid/DeepASR/data_utils/augmentor/trans_splice.py
@@ -30,9 +30,9 @@ class TransSplice(object):
        Args:
            sample(object): input sample(feature, label)
        Return:
-            (feature, label)
+            (feature, label, name)
        """
-        (feature, label) = sample
+        (feature, label, name) = sample
        nframe_num = feature.shape[0]
        nframe_dim = feature.shape[1]
        nnew_frame_dim = nframe_dim * (
@@ -61,4 +61,4 @@ class TransSplice(object):
            np.copyto(ret[i * nnew_frame_dim:(i + 1) * nnew_frame_dim],
                      mat[i * nframe_dim:i * nframe_dim + nnew_frame_dim])
        ret = ret.reshape((nframe_num, nnew_frame_dim))
-        return (ret, label)
+        return (ret, label, name)
--- a/fluid/DeepASR/data_utils/util.py
+++ b/fluid/DeepASR/data_utils/util.py
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import sys, time
+import sys
 from six import reraise
 from tblib import Traceback
-from multiprocessing import Manager, Process
-import posix_ipc, mmap
 import numpy as np
@@ -37,19 +35,6 @@ def lodtensor_to_ndarray(lod_tensor):
    return ret, lod_tensor.lod()
-def batch_to_ndarray(batch_samples, lod):
-    frame_dim = batch_samples[0][0].shape[1]
-    batch_feature = np.zeros((lod[-1], frame_dim), dtype="float32")
-    batch_label = np.zeros((lod[-1], 1), dtype="int64")
-    start = 0
-    for sample in batch_samples:
-        frame_num = sample[0].shape[0]
-        batch_feature[start:start + frame_num, :] = sample[0]
-        batch_label[start:start + frame_num, :] = sample[1]
-        start += frame_num
-    return (batch_feature, batch_label)
 def split_infer_result(infer_seq, lod):
    infer_batch = []
    for i in xrange(0, len(lod[0]) - 1):
@@ -57,127 +42,10 @@ def split_infer_result(infer_seq, lod):
    return infer_batch
-class DaemonProcessGroup(object):
-    def __init__(self, proc_num, target, args):
-        self._proc_num = proc_num
-        self._workers = [
-            Process(
-                target=target, args=args) for _ in xrange(self._proc_num)
-        ]
-    def start_all(self):
-        for w in self._workers:
-            w.daemon = True
-            w.start()
-    @property
-    def proc_num(self):
-        return self._proc_num
-class EpochEndSignal(object):
-    pass
 class CriticalException(Exception):
    pass
-class SharedNDArray(object):
-    """SharedNDArray utilizes shared memory to avoid data serialization when
-    data object shared among different processes. We can reconstruct the
-    `ndarray` when memory address, shape and dtype provided.
-    Args:
-        name (str): Address name of shared memory.
-        whether_verify (bool): Whether to validate the writing operation.
-    """
-    def __init__(self, name, whether_verify=False):
-        self._name = name
-        self._shm = None
-        self._buf = None
-        self._array = np.zeros(1, dtype=np.float32)
-        self._inited = False
-        self._whether_verify = whether_verify
-    def zeros_like(self, shape, dtype):
-        size = int(np.prod(shape)) * np.dtype(dtype).itemsize
-        if self._inited:
-            self._shm = posix_ipc.SharedMemory(self._name)
-        else:
-            self._shm = posix_ipc.SharedMemory(
-                self._name, posix_ipc.O_CREAT, size=size)
-        self._buf = mmap.mmap(self._shm.fd, size)
-        self._array = np.ndarray(shape, dtype, self._buf, order='C')
-    def copy(self, ndarray):
-        size = int(np.prod(ndarray.shape)) * np.dtype(ndarray.dtype).itemsize
-        self.zeros_like(ndarray.shape, ndarray.dtype)
-        self._array[:] = ndarray
-        self._buf.flush()
-        self._inited = True
-        if self._whether_verify:
-            shm = posix_ipc.SharedMemory(self._name)
-            buf = mmap.mmap(shm.fd, size)
-            array = np.ndarray(ndarray.shape, ndarray.dtype, buf, order='C')
-            np.testing.assert_array_equal(array, ndarray)
-    @property
-    def ndarray(self):
-        return self._array
-    def recycle(self, pool):
-        self._buf.close()
-        self._shm.close_fd()
-        self._inited = False
-        pool[self._name] = self
-    def __getstate__(self):
-        return (self._name, self._array.shape, self._array.dtype, self._inited,
-                self._whether_verify)
-    def __setstate__(self, state):
-        self._name = state[0]
-        self._inited = state[3]
-        self.zeros_like(state[1], state[2])
-        self._whether_verify = state[4]
-class SharedMemoryPoolManager(object):
-    """SharedMemoryPoolManager maintains a multiprocessing.Manager.dict object.
-    All available addresses are allocated once and will be reused. Though this
-    class is not process-safe, the pool can be shared between processes. All
-    shared memory should be unlinked before the main process exited.
-    Args:
-        pool_size (int): Size of shared memory pool.
-        manager (dict): A multiprocessing.Manager object, the pool is
-                        maintained by the proxy process.
-        name_prefix (str): Address prefix of shared memory.
-    """
-    def __init__(self, pool_size, manager, name_prefix='/deep_asr'):
-        self._names = []
-        self._dict = manager.dict()
-        self._time_prefix = time.strftime('%Y%m%d%H%M%S')
-        for i in xrange(pool_size):
-            name = name_prefix + '_' + self._time_prefix + '_' + str(i)
-            self._dict[name] = SharedNDArray(name)
-            self._names.append(name)
-    @property
-    def pool(self):
-        return self._dict
-    def __del__(self):
-        for name in self._names:
-            # have to unlink the shared memory
-            posix_ipc.unlink_shared_memory(name)
 def suppress_signal(signo, stack_frame):
    pass

--- a/fluid/DeepASR/decoder/post_decode_faster.cc
+++ b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -21,14 +21,15 @@ using fst::StdArc;
 Decoder::Decoder(std::string word_syms_filename,
                 std::string fst_in_filename,
-                 std::string logprior_rxfilename) {
+                 std::string logprior_rxfilename,
+                 kaldi::BaseFloat acoustic_scale) {
  const char* usage =
      "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
      "is on the graph) as matrices.";
  kaldi::ParseOptions po(usage);
  binary = true;
-  acoustic_scale = 1.5;
+  this->acoustic_scale = acoustic_scale;
  allow_partial = true;
  kaldi::FasterDecoderOptions decoder_opts;
  decoder_opts.Register(&po, true);  // true == include obscure settings.

--- a/fluid/DeepASR/decoder/post_decode_faster.h
+++ b/fluid/DeepASR/decoder/post_decode_faster.h
@@ -29,7 +29,8 @@ class Decoder {
 public:
  Decoder(std::string word_syms_filename,
          std::string fst_in_filename,
-          std::string logprior_rxfilename);
+          std::string logprior_rxfilename,
+          kaldi::BaseFloat acoustic_scale);
  ~Decoder();
  // Interface to accept the scores read from specifier and return

--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -23,7 +23,7 @@ PYBIND11_MODULE(post_decode_faster, m) {
  m.doc() = "Decoder for Deep ASR model";
  py::class_<Decoder>(m, "Decoder")
-      .def(py::init<std::string, std::string, std::string>())
+      .def(py::init<std::string, std::string, std::string, kaldi::BaseFloat>())
      .def("decode",
           (std::vector<std::string> (Decoder::*)(std::string)) &
               Decoder::decode,

--- a/fluid/DeepASR/examples/aishell/prepare_data.sh
+++ b/fluid/DeepASR/examples/aishell/prepare_data.sh
+data_dir=~/.cache/paddle/dataset/speech/deep_asr_data/aishell
+data_url='http://deep-asr-data.gz.bcebos.com/aishell_data.tar.gz'
+lst_url='http://deep-asr-data.gz.bcebos.com/aishell_lst.tar.gz'
+md5=e017d858d9e509c8a84b73f673f08b9a
+if [ ! -e $data_dir ]; then
+    mkdir -p $data_dir
+fi
+if [ ! -e $data_dir/aishell_data.tar.gz ]; then
+    echo "Download $data_dir/aishell_data.tar.gz ..."
+    wget -c  -P $data_dir $data_url
+else
+    echo "Skip downloading for $data_dir/aishell_data.tar.gz has already existed!"
+fi
+echo "Checking md5 sum ..."
+md5sum_tmp=`md5sum $data_dir/aishell_data.tar.gz | cut -d ' ' -f1`
+if [ $md5sum_tmp !=  $md5 ]; then
+    echo "Md5sum check failed, please remove and redownload "
+          "$data_dir/aishell_data.tar.gz"
+    exit 1
+fi
+echo "Untar aishell_data.tar.gz ..."
+tar xzf $data_dir/aishell_data.tar.gz -C $data_dir
+if [ ! -e data ]; then
+    mkdir data
+fi
+echo "Download and untar lst files ..."
+wget -c -P data $lst_url
+tar xvf data/aishell_lst.tar.gz -C data
+ln -s $data_dir data/aishell
--- a/fluid/DeepASR/examples/aishell/train.sh
+++ b/fluid/DeepASR/examples/aishell/train.sh
+export CUDA_VISIBLE_DEVICES=2,3,4,5
+python -u ../../train.py --train_feature_lst data/train_feature.lst \
+                   --train_label_lst data/train_label.lst \
+                   --val_feature_lst data/val_feature.lst \
+                   --val_label_lst data/val_label.lst \
+                   --mean_var data/aishell/global_mean_var \
+                   --checkpoints checkpoints \
+                   --frame_dim 2640  \
+                   --class_num 101  \
+                   --infer_models '' \
+                   --batch_size 128 \
+                   --learning_rate 0.00016 \
+                   --parallel
--- a/fluid/DeepASR/infer.py
+++ b/fluid/DeepASR/infer.py
@@ -8,7 +8,7 @@ import paddle.fluid as fluid
 import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
 import data_utils.augmentor.trans_add_delta as trans_add_delta
 import data_utils.augmentor.trans_splice as trans_splice
-import data_utils.data_reader as reader
+import data_utils.async_data_reader as reader
 from data_utils.util import lodtensor_to_ndarray
 from data_utils.util import split_infer_result
@@ -79,12 +79,13 @@ def infer(args):
        trans_splice.TransSplice()
    ]
-    infer_data_reader = reader.DataReader(args.infer_feature_lst,
+    infer_data_reader = reader.AsyncDataReader(args.infer_feature_lst,
-                                          args.infer_label_lst)
+                                               args.infer_label_lst)
    infer_data_reader.set_transformers(ltrans)
    feature_t = fluid.LoDTensor()
    one_batch = infer_data_reader.batch_iterator(args.batch_size, 1).next()
    (features, labels, lod) = one_batch
    feature_t.set(features, place)
    feature_t.set_lod([lod])

--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -17,6 +17,7 @@ from decoder.post_decode_faster import Decoder
 from data_utils.util import lodtensor_to_ndarray
 from model_utils.model import stacked_lstmp_model
 from data_utils.util import split_infer_result
+from tools.error_rate import char_errors
 def parse_args():
@@ -86,6 +87,11 @@ def parse_args():
        type=str,
        default='data/infer_label.lst',
        help='The label list path for inference. (default: %(default)s)')
+    parser.add_argument(
+        '--ref_txt',
+        type=str,
+        default='data/text.test',
+        help='The reference text for decoding. (default: %(default)s)')
    parser.add_argument(
        '--checkpoint',
        type=str,
@@ -106,6 +112,16 @@ def parse_args():
        type=str,
        default="./decoder/logprior",
        help="The log prior probs for training data. (default: %(default)s)")
+    parser.add_argument(
+        '--acoustic_scale',
+        type=float,
+        default=0.2,
+        help="Scaling factor for acoustic likelihoods. (default: %(default)f)")
+    parser.add_argument(
+        '--target_trans',
+        type=str,
+        default="./decoder/target_trans.txt",
+        help="The path to target transcription. (default: %(default)s)")
    args = parser.parse_args()
    return args
@@ -117,6 +133,18 @@ def print_arguments(args):
    print('------------------------------------------------')
+def get_trg_trans(args):
+    trans_dict = {}
+    with open(args.target_trans) as trg_trans:
+        line = trg_trans.readline()
+        while line:
+            items = line.strip().split()
+            key = items[0]
+            trans_dict[key] = ''.join(items[1:])
+            line = trg_trans.readline()
+    return trans_dict
 def infer_from_ckpt(args):
    """Inference by using checkpoint."""
@@ -140,9 +168,14 @@ def infer_from_ckpt(args):
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
+    trg_trans = get_trg_trans(args)
    # load checkpoint.
    fluid.io.load_persistables(exe, args.checkpoint)
+    # init decoder
+    decoder = Decoder(args.vocabulary, args.graphs, args.log_prior,
+                      args.acoustic_scale)
    ltrans = [
        trans_add_delta.TransAddDelta(2, 2),
        trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
@@ -157,17 +190,16 @@ def infer_from_ckpt(args):
                                               args.infer_label_lst)
    infer_data_reader.set_transformers(ltrans)
    infer_costs, infer_accs = [], []
+    total_edit_dist, total_ref_len = 0.0, 0
    for batch_id, batch_data in enumerate(
            infer_data_reader.batch_iterator(args.batch_size,
                                             args.minimum_batch_size)):
        # load_data
-        (features, labels, lod) = batch_data
+        (features, labels, lod, name_lst) = batch_data
-        feature_t.set(features.ndarray, place)
+        feature_t.set(features, place)
-        feature_t.set_lod([lod.ndarray])
+        feature_t.set_lod([lod])
-        label_t.set(labels.ndarray, place)
+        label_t.set(labels, place)
-        label_t.set_lod([lod.ndarray])
+        label_t.set_lod([lod])
-        infer_data_reader.recycle(features, labels, lod)
        results = exe.run(infer_program,
                          feed={"feature": feature_t,
@@ -179,11 +211,19 @@ def infer_from_ckpt(args):
        probs, lod = lodtensor_to_ndarray(results[0])
        infer_batch = split_infer_result(probs, lod)
-        for index, sample in enumerate(infer_batch):
-            key = "utter#%d" % (batch_id * args.batch_size + index)
-            print(key, ": ", decoder.decode(key, sample), "\n")
-    print(np.mean(infer_costs), np.mean(infer_accs))
+        for index, sample in enumerate(infer_batch):
+            key = name_lst[index]
+            ref = trg_trans[key]
+            hyp = decoder.decode(key, sample)
+            edit_dist, ref_len = char_errors(ref.decode("utf8"), hyp)
+            total_edit_dist += edit_dist
+            total_ref_len += ref_len
+            print(key + "|Ref:", ref)
+            print(key + "|Hyp:", hyp.encode("utf8"))
+            print("Instance CER: ", edit_dist / ref_len)
+    print("Total CER = %f" % (total_edit_dist / total_ref_len))
 if __name__ == '__main__':

--- a/fluid/DeepASR/tools/error_rate.py
+++ b/fluid/DeepASR/tools/error_rate.py
+# -*- coding: utf-8 -*-
+"""This module provides functions to calculate error rate in different level.
+e.g. wer for word-level, cer for char-level.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+def _levenshtein_distance(ref, hyp):
+    """Levenshtein distance is a string metric for measuring the difference
+    between two sequences. Informally, the levenshtein disctance is defined as
+    the minimum number of single-character edits (substitutions, insertions or
+    deletions) required to change one word into the other. We can naturally
+    extend the edits to word level when calculate levenshtein disctance for
+    two sentences.
+    """
+    m = len(ref)
+    n = len(hyp)
+    # special case
+    if ref == hyp:
+        return 0
+    if m == 0:
+        return n
+    if n == 0:
+        return m
+    if m < n:
+        ref, hyp = hyp, ref
+        m, n = n, m
+    # use O(min(m, n)) space
+    distance = np.zeros((2, n + 1), dtype=np.int32)
+    # initialize distance matrix
+    for j in xrange(n + 1):
+        distance[0][j] = j
+    # calculate levenshtein distance
+    for i in xrange(1, m + 1):
+        prev_row_idx = (i - 1) % 2
+        cur_row_idx = i % 2
+        distance[cur_row_idx][0] = i
+        for j in xrange(1, n + 1):
+            if ref[i - 1] == hyp[j - 1]:
+                distance[cur_row_idx][j] = distance[prev_row_idx][j - 1]
+            else:
+                s_num = distance[prev_row_idx][j - 1] + 1
+                i_num = distance[cur_row_idx][j - 1] + 1
+                d_num = distance[prev_row_idx][j] + 1
+                distance[cur_row_idx][j] = min(s_num, i_num, d_num)
+    return distance[m % 2][n]
+def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '):
+    """Compute the levenshtein distance between reference sequence and
+    hypothesis sequence in word-level.
+    :param reference: The reference sentence.
+    :type reference: basestring
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: basestring
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
+    :param delimiter: Delimiter of input sentences.
+    :type delimiter: char
+    :return: Levenshtein distance and word number of reference sentence.
+    :rtype: list
+    """
+    if ignore_case == True:
+        reference = reference.lower()
+        hypothesis = hypothesis.lower()
+    ref_words = filter(None, reference.split(delimiter))
+    hyp_words = filter(None, hypothesis.split(delimiter))
+    edit_distance = _levenshtein_distance(ref_words, hyp_words)
+    return float(edit_distance), len(ref_words)
+def char_errors(reference, hypothesis, ignore_case=False, remove_space=False):
+    """Compute the levenshtein distance between reference sequence and
+    hypothesis sequence in char-level.
+    :param reference: The reference sentence.
+    :type reference: basestring
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: basestring
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
+    :param remove_space: Whether remove internal space characters
+    :type remove_space: bool
+    :return: Levenshtein distance and length of reference sentence.
+    :rtype: list
+    """
+    if ignore_case == True:
+        reference = reference.lower()
+        hypothesis = hypothesis.lower()
+    join_char = ' '
+    if remove_space == True:
+        join_char = ''
+    reference = join_char.join(filter(None, reference.split(' ')))
+    hypothesis = join_char.join(filter(None, hypothesis.split(' ')))
+    edit_distance = _levenshtein_distance(reference, hypothesis)
+    return float(edit_distance), len(reference)
+def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
+    """Calculate word error rate (WER). WER compares reference text and
+    hypothesis text in word-level. WER is defined as:
+    .. math::
+        WER = (Sw + Dw + Iw) / Nw
+    where
+    .. code-block:: text
+        Sw is the number of words subsituted,
+        Dw is the number of words deleted,
+        Iw is the number of words inserted,
+        Nw is the number of words in the reference
+    We can use levenshtein distance to calculate WER. Please draw an attention
+    that empty items will be removed when splitting sentences by delimiter.
+    :param reference: The reference sentence.
+    :type reference: basestring
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: basestring
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
+    :param delimiter: Delimiter of input sentences.
+    :type delimiter: char
+    :return: Word error rate.
+    :rtype: float
+    :raises ValueError: If word number of reference is zero.
+    """
+    edit_distance, ref_len = word_errors(reference, hypothesis, ignore_case,
+                                         delimiter)
+    if ref_len == 0:
+        raise ValueError("Reference's word number should be greater than 0.")
+    wer = float(edit_distance) / ref_len
+    return wer
+def cer(reference, hypothesis, ignore_case=False, remove_space=False):
+    """Calculate charactor error rate (CER). CER compares reference text and
+    hypothesis text in char-level. CER is defined as:
+    .. math::
+        CER = (Sc + Dc + Ic) / Nc
+    where
+    .. code-block:: text
+        Sc is the number of characters substituted,
+        Dc is the number of characters deleted,
+        Ic is the number of characters inserted
+        Nc is the number of characters in the reference
+    We can use levenshtein distance to calculate CER. Chinese input should be
+    encoded to unicode. Please draw an attention that the leading and tailing
+    space characters will be truncated and multiple consecutive space
+    characters in a sentence will be replaced by one space character.
+    :param reference: The reference sentence.
+    :type reference: basestring
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: basestring
+    :param ignore_case: Whether case-sensitive or not.
+    :type ignore_case: bool
+    :param remove_space: Whether remove internal space characters
+    :type remove_space: bool
+    :return: Character error rate.
+    :rtype: float
+    :raises ValueError: If the reference length is zero.
+    """
+    edit_distance, ref_len = char_errors(reference, hypothesis, ignore_case,
+                                         remove_space)
+    if ref_len == 0:
+        raise ValueError("Length of reference should be greater than 0.")
+    cer = float(edit_distance) / ref_len
+    return cer
--- a/fluid/DeepASR/tools/profile.py
+++ b/fluid/DeepASR/tools/profile.py
@@ -168,15 +168,13 @@ def profile(args):
                start_time = time.time()
                frames_seen = 0
            # load_data
-            (features, labels, lod) = batch_data
+            (features, labels, lod, _) = batch_data
-            feature_t.set(features.ndarray, place)
+            feature_t.set(features, place)
-            feature_t.set_lod([lod.ndarray])
+            feature_t.set_lod([lod])
-            label_t.set(labels.ndarray, place)
+            label_t.set(labels, place)
-            label_t.set_lod([lod.ndarray])
+            label_t.set_lod([lod])
-            frames_seen += lod.ndarray[-1]
+            frames_seen += lod[-1]
-            data_reader.recycle(features, labels, lod)
            outs = exe.run(fluid.default_main_program(),
                           feed={"feature": feature_t,

--- a/fluid/DeepASR/train.py
+++ b/fluid/DeepASR/train.py
@@ -192,13 +192,11 @@ def train(args):
                test_data_reader.batch_iterator(args.batch_size,
                                                args.minimum_batch_size)):
            # load_data
-            (features, labels, lod) = batch_data
+            (features, labels, lod, _) = batch_data
-            feature_t.set(features.ndarray, place)
+            feature_t.set(features, place)
-            feature_t.set_lod([lod.ndarray])
+            feature_t.set_lod([lod])
-            label_t.set(labels.ndarray, place)
+            label_t.set(labels, place)
-            label_t.set_lod([lod.ndarray])
+            label_t.set_lod([lod])
-            test_data_reader.recycle(features, labels, lod)
            cost, acc = exe.run(test_program,
                                feed={"feature": feature_t,
@@ -212,6 +210,7 @@ def train(args):
    # train data reader
    train_data_reader = reader.AsyncDataReader(args.train_feature_lst,
                                               args.train_label_lst, -1)
    train_data_reader.set_transformers(ltrans)
    # train
    for pass_id in xrange(args.pass_num):
@@ -220,13 +219,11 @@ def train(args):
                train_data_reader.batch_iterator(args.batch_size,
                                                 args.minimum_batch_size)):
            # load_data
-            (features, labels, lod) = batch_data
+            (features, labels, lod, name_lst) = batch_data
-            feature_t.set(features.ndarray, place)
+            feature_t.set(features, place)
-            feature_t.set_lod([lod.ndarray])
+            feature_t.set_lod([lod])
-            label_t.set(labels.ndarray, place)
+            label_t.set(labels, place)
-            label_t.set_lod([lod.ndarray])
+            label_t.set_lod([lod])
-            train_data_reader.recycle(features, labels, lod)
            to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0)
            outs = exe.run(fluid.default_main_program(),

--- a/fluid/adversarial/README.md
+++ b/fluid/adversarial/README.md
@@ -4,10 +4,109 @@ The minimum PaddlePaddle version needed for the code sample in this directory is
 # Advbox
-Advbox is a Python toolbox to create adversarial examples that fool neural networks. It requires Python and paddle.
+Advbox is a toolbox to generate adversarial examples that fool neural networks and Advbox can benchmark the robustness of machine learning models.
-## How to use
+The Advbox is based on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) Fluid and is under continual development, always welcoming contributions of the latest method of adversarial attacks and defenses.
-1. train a model and save it's parameters. (like fluid_mnist.py)
-2. load the parameters which is trained in step1, then reconstruct the model.(like mnist_tutorial_fgsm.py)
+## Overview
-3. use advbox to generate the adversarial sample.
+[Szegedy et al.](https://arxiv.org/abs/1312.6199) discovered an intriguing properties of deep neural networks in the context of image classification for the first time. They showed that despite the state-of-the-art deep networks are surprisingly susceptible to adversarial attacks in the form of small perturbations to images that remain (almost) imperceptible to human vision system. These perturbations are found by optimizing the input to maximize the prediction error and the images modified by these perturbations are called as `adversarial examples`. The profound implications of these results triggered a wide interest of researchers in adversarial attacks and their defenses for deep learning in general.
+Advbox is similar to [Foolbox](https://github.com/bethgelab/foolbox) and [CleverHans](https://github.com/tensorflow/cleverhans). CleverHans only supports TensorFlow framework while foolbox interfaces with many popular machine learning frameworks such as PyTorch, Keras, TensorFlow, Theano, Lasagne and MXNet. However, these two great libraries don't support PaddlePaddle, an easy-to-use, efficient, flexible and scalable deep learning platform which is originally developed by Baidu scientists and engineers for the purpose of applying deep learning to many products at Baidu.
+## Usage
+Advbox provides many stable reference implementations of modern methods to generate adversarial examples such as FGSM, DeepFool, JSMA. When you want to benchmark the robustness of your neural networks , you can use the advbox to generate some adversarial examples and benchmark the networks. Some tips of using Advbox:
+1. Train a model and save the parameters.
+2. Load the parameters which has been trained，then reconstruct the model.
+3. Use advbox to generate the adversarial samples.
+#### Dependencies
+* PaddlePaddle: [the lastest develop branch](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html)
+* Python 2.x
+#### Structure
+Network models, attack method's implements and the criterion that defines adversarial examples are three essential elements to generate adversarial examples. Misclassification is adopted as the adversarial criterion for briefness in Advbox.
+The structure of Advbox module are as follows:
+    .
+    ├── advbox
+    |   ├── __init__.py
+    |   ├── attack
+    |        ├── __init__.py
+    |        ├── base.py
+    |        ├── deepfool.py
+    |        ├── gradient_method.py
+    |        ├── lbfgs.py
+    |        └── saliency.py
+    |   ├── models
+    |        ├── __init__.py
+    |        ├── base.py
+    |        └── paddle.py
+    |   └── adversary.py
+    ├── tutorials
+    |   ├── __init__.py
+    |   ├── mnist_model.py
+    |   ├── mnist_tutorial_lbfgs.py
+    |   ├── mnist_tutorial_fgsm.py
+    |   ├── mnist_tutorial_bim.py
+    |   ├── mnist_tutorial_ilcm.py
+    |   ├── mnist_tutorial_mifgsm.py
+    |   ├── mnist_tutorial_jsma.py
+    |   └── mnist_tutorial_deepfool.py
+    └── README.md
+**advbox.attack**
+Advbox implements several popular adversarial attacks which search adversarial examples. Each attack method uses a distance measure(L1, L2, etc.) to quantify the size of adversarial perturbations. Advbox is easy to craft adversarial example as some attack methods could perform internal hyperparameter tuning to find the minimum perturbation.
+**advbox.model**
+Advbox implements interfaces to PaddlePaddle. Additionally, other deep learning framworks such as TensorFlow can also be defined and employed. The module is use to compute predictions and gradients for given inputs in a specific framework.
+**advbox.adversary**
+Adversary contains the original object, the target and the adversarial examples. It provides the misclassification as the criterion to accept a adversarial example.
+## Tutorials
+The `./tutorials/` folder provides some tutorials to generate adversarial examples on the MNIST dataset. You can slightly modify the code to apply to other dataset. These attack methods are supported in Advbox:
+* [L-BFGS](https://arxiv.org/abs/1312.6199)
+* [FGSM](https://arxiv.org/abs/1412.6572)
+* [BIM](https://arxiv.org/abs/1607.02533)
+* [ILCM](https://arxiv.org/abs/1607.02533)
+* [MI-FGSM](https://arxiv.org/pdf/1710.06081.pdf)
+* [JSMA](https://arxiv.org/pdf/1511.07528)
+* [DeepFool](https://arxiv.org/abs/1511.04599)
+## Testing
+Benchmarks on a vanilla CNN model.
+> MNIST
+|  adversarial attacks  |  fooling rate (non-targeted)  | fooling rate (targeted) | max_epsilon | iterations | Strength |
+|:-----:| :----: | :---: | :----: | :----: | :----: |
+|L-BFGS| --- | 89.2% | --- | One shot | *** |
+|FGSM| 57.8% | 26.55% | 0.3 | One shot| *** |
+|BIM| 97.4% | --- | 0.1 | 100 | **** |
+|ILCM| ---  | 100.0% | 0.1 | 100 | **** |
+|MI-FGSM| 94.4% | 100.0% | 0.1 | 100 | **** |
+|JSMA| 96.8% | 90.4%| 0.1 | 2000 | *** |
+|DeepFool| 97.7% | 51.3% | --- | 100 | **** |
+* The strength (higher for more asterisks) is based on the impression from the reviewed literature.
+---
+## References
+* [Intriguing properties of neural networks](https://arxiv.org/abs/1312.6199), C. Szegedy et al., arxiv 2014
+* [Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572), I. Goodfellow et al., ICLR 2015
+* [Adversarial Examples In The Physical World](https://arxiv.org/pdf/1607.02533v3.pdf), A. Kurakin et al., ICLR workshop 2017
+* [Boosting Adversarial Attacks with Momentum](https://arxiv.org/abs/1710.06081), Yinpeng Dong et al., arxiv 2018
+* [The Limitations of Deep Learning in Adversarial Settings](https://arxiv.org/abs/1511.07528), N. Papernot et al., ESSP 2016
+* [DeepFool: a simple and accurate method to fool deep neural networks](https://arxiv.org/abs/1511.04599), S. Moosavi-Dezfooli et al., CVPR 2016
+* [Foolbox: A Python toolbox to benchmark the robustness of machine learning models](https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018
+* [CleverHans: An adversarial example library for constructing attacks, building defenses, and benchmarking both](https://github.com/tensorflow/cleverhans#setting-up-cleverhans)
+* [Threat of Adversarial Attacks on Deep Learning in Computer Vision: A Survey](https://arxiv.org/abs/1801.00553), Naveed Akhtar, Ajmal Mian, arxiv 2018
--- a/fluid/adversarial/advbox/attacks/gradient_method.py
+++ b/fluid/adversarial/advbox/attacks/gradient_method.py
@@ -14,7 +14,8 @@ __all__ = [
    'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM',
    'FastGradientSignMethodTargetedAttack', 'FGSMT',
    'BasicIterativeMethodAttack', 'BIM',
-    'IterativeLeastLikelyClassMethodAttack', 'ILCM'
+    'IterativeLeastLikelyClassMethodAttack', 'ILCM', 'MomentumIteratorAttack',
+    'MIFGSM'
 ]
@@ -32,7 +33,12 @@ class GradientMethodAttack(Attack):
        super(GradientMethodAttack, self).__init__(model)
        self.support_targeted = support_targeted
-    def _apply(self, adversary, norm_ord=np.inf, epsilons=0.01, steps=100):
+    def _apply(self,
+               adversary,
+               norm_ord=np.inf,
+               epsilons=0.01,
+               steps=1,
+               epsilon_steps=100):
        """
        Apply the gradient attack method.
        :param adversary(Adversary):
@@ -41,8 +47,11 @@ class GradientMethodAttack(Attack):
            Order of the norm, such as np.inf, 1, 2, etc. It can't be 0.
        :param epsilons(list|tuple|int):
            Attack step size (input variation).
+            Largest step size if epsilons is not iterable.
        :param steps:
-            The number of iterator steps.
+            The number of attack iteration.
+        :param epsilon_steps:
+            The number of Epsilons' iteration for each attack iteration.
        :return:
            adversary(Adversary): The Adversary object.
        """
@@ -55,7 +64,7 @@ class GradientMethodAttack(Attack):
                    "This attack method doesn't support targeted attack!")
        if not isinstance(epsilons, Iterable):
-            epsilons = np.linspace(epsilons, epsilons + 1e-10, num=steps)
+            epsilons = np.linspace(0, epsilons, num=epsilon_steps)
        pre_label = adversary.original_label
        min_, max_ = self.model.bounds()
@@ -65,30 +74,33 @@ class GradientMethodAttack(Attack):
                self.model.channel_axis() == adversary.original.shape[0] or
                self.model.channel_axis() == adversary.original.shape[-1])
-        step = 1
+        for epsilon in epsilons[:]:
-        adv_img = adversary.original
+            step = 1
-        for epsilon in epsilons[:steps]:
+            adv_img = adversary.original
            if epsilon == 0.0:
                continue
-            if adversary.is_targeted_attack:
+            for i in range(steps):
-                gradient = -self.model.gradient(adv_img, adversary.target_label)
+                if adversary.is_targeted_attack:
-            else:
+                    gradient = -self.model.gradient(adv_img,
-                gradient = self.model.gradient(adv_img,
+                                                    adversary.target_label)
-                                               adversary.original_label)
+                else:
-            if norm_ord == np.inf:
+                    gradient = self.model.gradient(adv_img,
-                gradient_norm = np.sign(gradient)
+                                                   adversary.original_label)
-            else:
+                if norm_ord == np.inf:
-                gradient_norm = gradient / self._norm(gradient, ord=norm_ord)
+                    gradient_norm = np.sign(gradient)
+                else:
-            adv_img = adv_img + epsilon * gradient_norm * (max_ - min_)
+                    gradient_norm = gradient / self._norm(
-            adv_img = np.clip(adv_img, min_, max_)
+                        gradient, ord=norm_ord)
-            adv_label = np.argmax(self.model.predict(adv_img))
-            logging.info('step={}, epsilon = {:.5f}, pre_label = {}, '
+                adv_img = adv_img + epsilon * gradient_norm * (max_ - min_)
-                         'adv_label={}'.format(step, epsilon, pre_label,
+                adv_img = np.clip(adv_img, min_, max_)
-                                               adv_label))
+                adv_label = np.argmax(self.model.predict(adv_img))
-            if adversary.try_accept_the_example(adv_img, adv_label):
+                logging.info('step={}, epsilon = {:.5f}, pre_label = {}, '
-                return adversary
+                             'adv_label={}'.format(step, epsilon, pre_label,
-            step += 1
+                                                   adv_label))
+                if adversary.try_accept_the_example(adv_img, adv_label):
+                    return adversary
+                step += 1
        return adversary
    @staticmethod
@@ -113,7 +125,7 @@ class FastGradientSignMethodTargetedAttack(GradientMethodAttack):
    Paper link: https://arxiv.org/abs/1412.6572
    """
-    def _apply(self, adversary, epsilons=0.03):
+    def _apply(self, adversary, epsilons=0.01):
        return GradientMethodAttack._apply(
            self,
            adversary=adversary,
@@ -144,7 +156,7 @@ class IterativeLeastLikelyClassMethodAttack(GradientMethodAttack):
    Paper link: https://arxiv.org/abs/1607.02533
    """
-    def _apply(self, adversary, epsilons=0.001, steps=1000):
+    def _apply(self, adversary, epsilons=0.01, steps=1000):
        return GradientMethodAttack._apply(
            self,
            adversary=adversary,
@@ -164,7 +176,103 @@ class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack):
        super(BasicIterativeMethodAttack, self).__init__(model, False)
+class MomentumIteratorAttack(GradientMethodAttack):
+    """
+    The Momentum Iterative Fast Gradient Sign Method (Dong et al. 2017).
+    This method won the first places in NIPS 2017 Non-targeted Adversarial
+    Attacks and Targeted Adversarial Attacks. The original paper used
+    hard labels for this attack; no label smoothing. inf norm.
+    Paper link: https://arxiv.org/pdf/1710.06081.pdf
+    """
+    def __init__(self, model, support_targeted=True):
+        """
+        :param model(model): The model to be attacked.
+        :param support_targeted(bool): Does this attack method support targeted.
+        """
+        super(MomentumIteratorAttack, self).__init__(model)
+        self.support_targeted = support_targeted
+    def _apply(self,
+               adversary,
+               norm_ord=np.inf,
+               epsilons=0.1,
+               steps=100,
+               epsilon_steps=100,
+               decay_factor=1):
+        """
+        Apply the momentum iterative gradient attack method.
+        :param adversary(Adversary):
+            The Adversary object.
+        :param norm_ord(int):
+            Order of the norm, such as np.inf, 1, 2, etc. It can't be 0.
+        :param epsilons(list|tuple|float):
+            Attack step size (input variation).
+            Largest step size if epsilons is not iterable.
+        :param epsilon_steps:
+            The number of Epsilons' iteration for each attack iteration.
+        :param steps:
+            The number of attack iteration.
+        :param decay_factor:
+            The decay factor for the momentum term.
+        :return:
+            adversary(Adversary): The Adversary object.
+        """
+        if norm_ord == 0:
+            raise ValueError("L0 norm is not supported!")
+        if not self.support_targeted:
+            if adversary.is_targeted_attack:
+                raise ValueError(
+                    "This attack method doesn't support targeted attack!")
+        assert self.model.channel_axis() == adversary.original.ndim
+        assert (self.model.channel_axis() == 1 or
+                self.model.channel_axis() == adversary.original.shape[0] or
+                self.model.channel_axis() == adversary.original.shape[-1])
+        if not isinstance(epsilons, Iterable):
+            epsilons = np.linspace(0, epsilons, num=epsilon_steps)
+        min_, max_ = self.model.bounds()
+        pre_label = adversary.original_label
+        for epsilon in epsilons[:]:
+            if epsilon == 0.0:
+                continue
+            step = 1
+            adv_img = adversary.original
+            momentum = 0
+            for i in range(steps):
+                if adversary.is_targeted_attack:
+                    gradient = -self.model.gradient(adv_img,
+                                                    adversary.target_label)
+                else:
+                    gradient = self.model.gradient(adv_img, pre_label)
+                # normalize gradient
+                velocity = gradient / self._norm(gradient, ord=1)
+                momentum = decay_factor * momentum + velocity
+                if norm_ord == np.inf:
+                    normalized_grad = np.sign(momentum)
+                else:
+                    normalized_grad = self._norm(momentum, ord=norm_ord)
+                perturbation = epsilon * normalized_grad
+                adv_img = adv_img + perturbation
+                adv_img = np.clip(adv_img, min_, max_)
+                adv_label = np.argmax(self.model.predict(adv_img))
+                logging.info(
+                    'step={}, epsilon = {:.5f}, pre_label = {}, adv_label={}'
+                    .format(step, epsilon, pre_label, adv_label))
+                if adversary.try_accept_the_example(adv_img, adv_label):
+                    return adversary
+                step += 1
+        return adversary
 FGSM = FastGradientSignMethodAttack
 FGSMT = FastGradientSignMethodTargetedAttack
 BIM = BasicIterativeMethodAttack
 ILCM = IterativeLeastLikelyClassMethodAttack
+MIFGSM = MomentumIteratorAttack
--- a/fluid/adversarial/mnist_tutorial_fgsm.py
+++ b/fluid/adversarial/mnist_tutorial_fgsm.py
-"""
-FGSM demos on mnist using advbox tool.
-"""
-import matplotlib.pyplot as plt
-import paddle.v2 as paddle
-import paddle.fluid as fluid
-from advbox.adversary import Adversary
-from advbox.attacks.gradient_method import FGSM
-from advbox.models.paddle import PaddleModel
-def cnn_model(img):
-    """
-    Mnist cnn model
-    Args:
-        img(Varaible): the input image to be recognized
-    Returns:
-        Variable: the label prediction
-    """
-    # conv1 = fluid.nets.conv2d()
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        num_filters=20,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        num_filters=50,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    return logits
-def main():
-    """
-    Advbox demo which demonstrate how to use advbox.
-    """
-    IMG_NAME = 'img'
-    LABEL_NAME = 'label'
-    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
-    # gradient should flow
-    img.stop_gradient = False
-    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
-    logits = cnn_model(img)
-    cost = fluid.layers.cross_entropy(input=logits, label=label)
-    avg_cost = fluid.layers.mean(x=cost)
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    BATCH_SIZE = 1
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-    feeder = fluid.DataFeeder(
-        feed_list=[IMG_NAME, LABEL_NAME],
-        place=place,
-        program=fluid.default_main_program())
-    fluid.io.load_params(
-        exe, "./mnist/", main_program=fluid.default_main_program())
-    # advbox demo
-    m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
-                    logits.name, avg_cost.name, (-1, 1))
-    att = FGSM(m)
-    for data in train_reader():
-        # fgsm attack
-        adversary = att(Adversary(data[0][0], data[0][1]))
-        if adversary.is_successful():
-            plt.imshow(adversary.target, cmap='Greys_r')
-            plt.show()
-            # np.save('adv_img', adversary.target)
-        break
-if __name__ == '__main__':
-    main()
--- a/fluid/adversarial/mnist_tutorial_jsma.py
+++ b/fluid/adversarial/mnist_tutorial_jsma.py
-"""
-FGSM demos on mnist using advbox tool.
-"""
-import matplotlib.pyplot as plt
-import paddle.v2 as paddle
-import paddle.fluid as fluid
-import numpy as np
-from advbox import Adversary
-from advbox.attacks.saliency import SaliencyMapAttack
-from advbox.models.paddle import PaddleModel
-def cnn_model(img):
-    """
-    Mnist cnn model
-    Args:
-        img(Varaible): the input image to be recognized
-    Returns:
-        Variable: the label prediction
-    """
-    # conv1 = fluid.nets.conv2d()
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        num_filters=20,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        num_filters=50,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    return logits
-def main():
-    """
-    Advbox demo which demonstrate how to use advbox.
-    """
-    IMG_NAME = 'img'
-    LABEL_NAME = 'label'
-    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
-    # gradient should flow
-    img.stop_gradient = False
-    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
-    logits = cnn_model(img)
-    cost = fluid.layers.cross_entropy(input=logits, label=label)
-    avg_cost = fluid.layers.mean(x=cost)
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    BATCH_SIZE = 1
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-    feeder = fluid.DataFeeder(
-        feed_list=[IMG_NAME, LABEL_NAME],
-        place=place,
-        program=fluid.default_main_program())
-    fluid.io.load_params(
-        exe, "./mnist/", main_program=fluid.default_main_program())
-    # advbox demo
-    m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
-                    logits.name, avg_cost.name, (-1, 1))
-    attack = SaliencyMapAttack(m)
-    total_num = 0
-    success_num = 0
-    for data in train_reader():
-        total_num += 1
-        # adversary.set_target(True, target_label=target_label)
-        jsma_attack = attack(Adversary(data[0][0], data[0][1]))
-        if jsma_attack is not None and jsma_attack.is_successful():
-            # plt.imshow(jsma_attack.target, cmap='Greys_r')
-            # plt.show()
-            success_num += 1
-            print('original_label=%d, adversary examples label =%d' %
-                  (data[0][1], jsma_attack.adversarial_label))
-            # np.save('adv_img', jsma_attack.adversarial_example)
-        print('total num = %d, success num = %d ' % (total_num, success_num))
-        if total_num == 100:
-            break
-if __name__ == '__main__':
-    main()
--- a/fluid/adversarial/tutorials/__init__.py
+++ b/fluid/adversarial/tutorials/__init__.py
+"""
+   A set of tutorials for generating adversarial examples with advbox.
+"""
\ No newline at end of file
--- a/fluid/adversarial/fluid_mnist.py
+++ b/fluid/adversarial/fluid_mnist.py
@@ -30,8 +30,9 @@ def mnist_cnn_model(img):
        pool_size=2,
        pool_stride=2,
        act='relu')
+    fc = fluid.layers.fc(input=conv_pool_2, size=50, act='relu')
-    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
+    logits = fluid.layers.fc(input=fc, size=10, act='softmax')
    return logits
@@ -60,7 +61,10 @@ def main():
            paddle.dataset.mnist.train(), buf_size=500),
        batch_size=BATCH_SIZE)
+    # use CPU
    place = fluid.CPUPlace()
+    # use GPU
+    # place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
    exe.run(fluid.default_startup_program())
@@ -74,9 +78,11 @@ def main():
                feed=feeder.feed(data),
                fetch_list=[avg_cost, batch_acc, batch_size])
            pass_acc.add(value=acc, weight=b_size)
+            pass_acc_val = pass_acc.eval()[0]
            print("pass_id=" + str(pass_id) + " acc=" + str(acc[0]) +
-                  " pass_acc=" + str(pass_acc.eval()[0]))
+                  " pass_acc=" + str(pass_acc_val))
-            if loss < LOSS_THRESHOLD and pass_acc > ACC_THRESHOLD:
+            if loss < LOSS_THRESHOLD and pass_acc_val > ACC_THRESHOLD:
+                # early stop
                break
        print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc.eval()[

--- a/fluid/adversarial/tutorials/mnist_tutorial_bim.py
+++ b/fluid/adversarial/tutorials/mnist_tutorial_bim.py
+"""
+BIM tutorial on mnist using advbox tool.
+BIM method iteratively take multiple small steps while adjusting the direction after each step.
+It only supports non-targeted attack.
+"""
+import sys
+sys.path.append("..")
+import matplotlib.pyplot as plt
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+from advbox.adversary import Adversary
+from advbox.attacks.gradient_method import BIM
+from advbox.models.paddle import PaddleModel
+from tutorials.mnist_model import mnist_cnn_model
+def main():
+    """
+    Advbox demo which demonstrate how to use advbox.
+    """
+    TOTAL_NUM = 500
+    IMG_NAME = 'img'
+    LABEL_NAME = 'label'
+    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
+    # gradient should flow
+    img.stop_gradient = False
+    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
+    logits = mnist_cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    # use CPU
+    place = fluid.CPUPlace()
+    # use GPU
+    # place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    BATCH_SIZE = 1
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.test(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    fluid.io.load_params(
+        exe, "./mnist/", main_program=fluid.default_main_program())
+    # advbox demo
+    m = PaddleModel(
+        fluid.default_main_program(),
+        IMG_NAME,
+        LABEL_NAME,
+        logits.name,
+        avg_cost.name, (-1, 1),
+        channel_axis=1)
+    attack = BIM(m)
+    attack_config = {"epsilons": 0.1, "steps": 100}
+    # use train data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in train_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # BIM non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    # use test data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in test_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # BIM non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    print("bim attack done")
+if __name__ == '__main__':
+    main()
--- a/fluid/adversarial/tutorials/mnist_tutorial_deepfool.py
+++ b/fluid/adversarial/tutorials/mnist_tutorial_deepfool.py
+"""
+DeepFool tutorial on mnist using advbox tool.
+Deepfool is a simple and accurate adversarial attack method.
+It supports both targeted attack and non-targeted attack.
+"""
+import sys
+sys.path.append("..")
+import matplotlib.pyplot as plt
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+from advbox.adversary import Adversary
+from advbox.attacks.deepfool import DeepFoolAttack
+from advbox.models.paddle import PaddleModel
+from tutorials.mnist_model import mnist_cnn_model
+def main():
+    """
+    Advbox demo which demonstrate how to use advbox.
+    """
+    TOTAL_NUM = 500
+    IMG_NAME = 'img'
+    LABEL_NAME = 'label'
+    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
+    # gradient should flow
+    img.stop_gradient = False
+    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
+    logits = mnist_cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    # use CPU
+    place = fluid.CPUPlace()
+    # use GPU
+    # place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    BATCH_SIZE = 1
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.test(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    fluid.io.load_params(
+        exe, "./mnist/", main_program=fluid.default_main_program())
+    # advbox demo
+    m = PaddleModel(
+        fluid.default_main_program(),
+        IMG_NAME,
+        LABEL_NAME,
+        logits.name,
+        avg_cost.name, (-1, 1),
+        channel_axis=1)
+    attack = DeepFoolAttack(m)
+    attack_config = {"iterations": 100, "overshoot": 9}
+    # use train data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in train_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # DeepFool non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        # DeepFool targeted attack
+        # tlabel = 0
+        # adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    # use test data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in test_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # DeepFool non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        # DeepFool targeted attack
+        # tlabel = 0
+        # adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    print("deelfool attack done")
+if __name__ == '__main__':
+    main()
--- a/fluid/adversarial/tutorials/mnist_tutorial_fgsm.py
+++ b/fluid/adversarial/tutorials/mnist_tutorial_fgsm.py
+"""
+FGSM tutorial on mnist using advbox tool.
+FGSM method is non-targeted attack while FGSMT is targeted attack.
+"""
+import sys
+sys.path.append("..")
+import matplotlib.pyplot as plt
+import numpy as np
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+from advbox.adversary import Adversary
+from advbox.attacks.gradient_method import FGSM
+from advbox.attacks.gradient_method import FGSMT
+from advbox.models.paddle import PaddleModel
+from tutorials.mnist_model import mnist_cnn_model
+def main():
+    """
+    Advbox demo which demonstrate how to use advbox.
+    """
+    TOTAL_NUM = 500
+    IMG_NAME = 'img'
+    LABEL_NAME = 'label'
+    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
+    # gradient should flow
+    img.stop_gradient = False
+    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
+    logits = mnist_cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    # use CPU
+    place = fluid.CPUPlace()
+    # use GPU
+    # place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    BATCH_SIZE = 1
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.test(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    fluid.io.load_params(
+        exe, "./mnist/", main_program=fluid.default_main_program())
+    # advbox demo
+    m = PaddleModel(
+        fluid.default_main_program(),
+        IMG_NAME,
+        LABEL_NAME,
+        logits.name,
+        avg_cost.name, (-1, 1),
+        channel_axis=1)
+    attack = FGSM(m)
+    # attack = FGSMT(m)
+    attack_config = {"epsilons": 0.3}
+    # use train data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in train_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # FGSM non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        # FGSMT targeted attack
+        # tlabel = 0
+        # adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    # use test data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in test_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # FGSM non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        # FGSMT targeted attack
+        # tlabel = 0
+        # adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    print("fgsm attack done")
+if __name__ == '__main__':
+    main()
--- a/fluid/adversarial/tutorials/mnist_tutorial_ilcm.py
+++ b/fluid/adversarial/tutorials/mnist_tutorial_ilcm.py
+"""
+ILCM tutorial on mnist using advbox tool.
+ILCM method extends "BIM" to support targeted attack.
+"""
+import sys
+sys.path.append("..")
+import matplotlib.pyplot as plt
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+from advbox.adversary import Adversary
+from advbox.attacks.gradient_method import ILCM
+from advbox.models.paddle import PaddleModel
+from tutorials.mnist_model import mnist_cnn_model
+def main():
+    """
+    Advbox demo which demonstrate how to use advbox.
+    """
+    TOTAL_NUM = 500
+    IMG_NAME = 'img'
+    LABEL_NAME = 'label'
+    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
+    # gradient should flow
+    img.stop_gradient = False
+    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
+    logits = mnist_cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    # use CPU
+    place = fluid.CPUPlace()
+    # use GPU
+    # place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    BATCH_SIZE = 1
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.test(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    fluid.io.load_params(
+        exe, "./mnist/", main_program=fluid.default_main_program())
+    # advbox demo
+    m = PaddleModel(
+        fluid.default_main_program(),
+        IMG_NAME,
+        LABEL_NAME,
+        logits.name,
+        avg_cost.name, (-1, 1),
+        channel_axis=1)
+    attack = ILCM(m)
+    attack_config = {"epsilons": 0.1, "steps": 100}
+    # use train data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in train_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        tlabel = 0
+        adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # ILCM targeted attack
+        adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    # use test data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in test_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        tlabel = 0
+        adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # ILCM targeted attack
+        adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    print("ilcm attack done")
+if __name__ == '__main__':
+    main()
--- a/fluid/adversarial/tutorials/mnist_tutorial_jsma.py
+++ b/fluid/adversarial/tutorials/mnist_tutorial_jsma.py
+"""
+JSMA tutorial on mnist using advbox tool.
+JSMA method supports both targeted attack and non-targeted attack.
+"""
+import sys
+sys.path.append("..")
+import matplotlib.pyplot as plt
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+from advbox.adversary import Adversary
+from advbox.attacks.saliency import JSMA
+from advbox.models.paddle import PaddleModel
+from tutorials.mnist_model import mnist_cnn_model
+def main():
+    """
+    Advbox demo which demonstrate how to use advbox.
+    """
+    TOTAL_NUM = 500
+    IMG_NAME = 'img'
+    LABEL_NAME = 'label'
+    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
+    # gradient should flow
+    img.stop_gradient = False
+    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
+    logits = mnist_cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    # use CPU
+    place = fluid.CPUPlace()
+    # use GPU
+    # place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    BATCH_SIZE = 1
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.test(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    fluid.io.load_params(
+        exe, "./mnist/", main_program=fluid.default_main_program())
+    # advbox demo
+    m = PaddleModel(
+        fluid.default_main_program(),
+        IMG_NAME,
+        LABEL_NAME,
+        logits.name,
+        avg_cost.name, (-1, 1),
+        channel_axis=1)
+    attack = JSMA(m)
+    attack_config = {
+        "max_iter": 2000,
+        "theta": 0.1,
+        "max_perturbations_per_pixel": 7
+    }
+    # use train data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in train_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # JSMA non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        # JSMA targeted attack
+        # tlabel = 0
+        # adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # adversary = attack(adversary, **attack_config)
+        # JSMA may return None
+        if adversary is not None and adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    # use test data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in test_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # JSMA non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        # JSMA targeted attack
+        # tlabel = 0
+        # adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # adversary = attack(adversary, **attack_config)
+        # JSMA may return None
+        if adversary is not None and adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    print("jsma attack done")
+if __name__ == '__main__':
+    main()
--- a/fluid/adversarial/tutorials/mnist_tutorial_lbfgs.py
+++ b/fluid/adversarial/tutorials/mnist_tutorial_lbfgs.py
+"""
+LBFGS tutorial on mnist using advbox tool.
+LBFGS method only supports targeted attack.
+"""
+import sys
+sys.path.append("..")
+import matplotlib.pyplot as plt
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+from advbox.adversary import Adversary
+from advbox.attacks.lbfgs import LBFGS
+from advbox.models.paddle import PaddleModel
+from tutorials.mnist_model import mnist_cnn_model
+def main():
+    """
+    Advbox demo which demonstrate how to use advbox.
+    """
+    TOTAL_NUM = 500
+    IMG_NAME = 'img'
+    LABEL_NAME = 'label'
+    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
+    # gradient should flow
+    img.stop_gradient = False
+    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
+    logits = mnist_cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    # use CPU
+    place = fluid.CPUPlace()
+    # use GPU
+    # place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    BATCH_SIZE = 1
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.test(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    fluid.io.load_params(
+        exe, "./mnist/", main_program=fluid.default_main_program())
+    # advbox demo
+    m = PaddleModel(
+        fluid.default_main_program(),
+        IMG_NAME,
+        LABEL_NAME,
+        logits.name,
+        avg_cost.name, (-1, 1),
+        channel_axis=1)
+    attack = LBFGS(m)
+    attack_config = {"epsilon": 0.001, }
+    # use train data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in train_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # LBFGS targeted attack
+        tlabel = 0
+        adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    # use test data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in test_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # LBFGS targeted attack
+        tlabel = 0
+        adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    print("lbfgs attack done")
+if __name__ == '__main__':
+    main()
--- a/fluid/adversarial/tutorials/mnist_tutorial_mifgsm.py
+++ b/fluid/adversarial/tutorials/mnist_tutorial_mifgsm.py
+"""
+MIFGSM tutorial on mnist using advbox tool.
+MIFGSM is a broad class of momentum iterative gradient-based methods based on FSGM.
+It supports non-targeted attack and targeted attack.
+"""
+import sys
+sys.path.append("..")
+import matplotlib.pyplot as plt
+import numpy as np
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+from advbox.adversary import Adversary
+from advbox.attacks.gradient_method import MIFGSM
+from advbox.models.paddle import PaddleModel
+from tutorials.mnist_model import mnist_cnn_model
+def main():
+    """
+    Advbox demo which demonstrate how to use advbox.
+    """
+    TOTAL_NUM = 500
+    IMG_NAME = 'img'
+    LABEL_NAME = 'label'
+    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
+    # gradient should flow
+    img.stop_gradient = False
+    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
+    logits = mnist_cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    # use CPU
+    place = fluid.CPUPlace()
+    # use GPU
+    # place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    BATCH_SIZE = 1
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.test(), buf_size=128 * 10),
+        batch_size=BATCH_SIZE)
+    fluid.io.load_params(
+        exe, "./mnist/", main_program=fluid.default_main_program())
+    # advbox demo
+    m = PaddleModel(
+        fluid.default_main_program(),
+        IMG_NAME,
+        LABEL_NAME,
+        logits.name,
+        avg_cost.name, (-1, 1),
+        channel_axis=1)
+    attack = MIFGSM(m)
+    attack_config = {
+        "norm_ord": np.inf,
+        "epsilons": 0.1,
+        "steps": 100,
+        "decay_factor": 1
+    }
+    # use train data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in train_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # MIFGSM non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        # MIFGSM targeted attack
+        # tlabel = 0
+        # adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    # use test data to generate adversarial examples
+    total_count = 0
+    fooling_count = 0
+    for data in test_reader():
+        total_count += 1
+        adversary = Adversary(data[0][0], data[0][1])
+        # MIFGSM non-targeted attack
+        adversary = attack(adversary, **attack_config)
+        # MIFGSM targeted attack
+        # tlabel = 0
+        # adversary.set_target(is_targeted_attack=True, target_label=tlabel)
+        # adversary = attack(adversary, **attack_config)
+        if adversary.is_successful():
+            fooling_count += 1
+            print(
+                'attack success, original_label=%d, adversarial_label=%d, count=%d'
+                % (data[0][1], adversary.adversarial_label, total_count))
+            # plt.imshow(adversary.target, cmap='Greys_r')
+            # plt.show()
+            # np.save('adv_img', adversary.target)
+        else:
+            print('attack failed, original_label=%d, count=%d' %
+                  (data[0][1], total_count))
+        if total_count >= TOTAL_NUM:
+            print(
+                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
+                % (fooling_count, total_count,
+                   float(fooling_count) / total_count))
+            break
+    print("mifgsm attack done")
+if __name__ == '__main__':
+    main()
--- a/fluid/image_classification/caffe2fluid/README.md
+++ b/fluid/image_classification/caffe2fluid/README.md
 ### Caffe2Fluid
-This tool is used to convert a Caffe model to Fluid model
+This tool is used to convert a Caffe model to a Fluid model
-### Howto
+### HowTo
-1, Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here:
+1. Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here:
+    - Generate pycaffe from caffe.proto
-    1) generate it from caffe.proto using protoc
+        ```
        bash ./proto/compile.sh
+        ```
-    2) download one from github directly
+    - Download one from github directly
+        ```
        cd proto/ && wget https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/caffe/caffepb.py
+        ```
+2. Convert the Caffe model to Fluid model
+   - Generate fluid code and weight file
+        ```
+        python convert.py alexnet.prototxt \
+                --caffemodel alexnet.caffemodel \
+                --data-output-path alexnet.npy \
+                --code-output-path alexnet.py
+     ```
+    - Save weights as fluid model file
+        ```
+        python alexnet.py alexnet.npy ./fluid #only infer the last layer's result
+        python alexnet.py alexnet.npy ./fluid fc8,prob #infer these 2 layer's result
+      ```
+3. Use the converted model to infer
+    - See more details in '*examples/imagenet/run.sh*'
+4. Compare the inference results with caffe
+    - See more details in '*examples/imagenet/diff.sh*'
+### How to convert custom layer
+1. Implement your custom layer in a file under '*kaffe/custom_layers*', eg: mylayer.py
+    - Implement ```shape_func(input_shape, [other_caffe_params])``` to calculate the output shape
+    - Implement ```layer_func(inputs, name, [other_caffe_params])``` to construct a fluid layer
+    - Register these two functions ```register(kind='MyType', shape=shape_func, layer=layer_func)```
+    - Notes: more examples can be found in '*kaffe/custom_layers*'
+2. Add ```import mylayer``` to  '*kaffe/custom_layers/\_\_init__.py*'
-2, Convert the caffe model using 'convert.py' which will generate a python script and a weight(in .npy) file
+3. Prepare your pycaffe as your customized version(same as previous env prepare)
+    - (option1) replace 'proto/caffe.proto' with your own caffe.proto and compile it
+    - (option2) change your pycaffe to the customized version
-3, Use the converted model to predict
+4. Convert the Caffe model to Fluid model
-    see more detail info in 'examples/xxx'
+5. Set env $CAFFE2FLUID_CUSTOM_LAYERS to the parent directory of 'custom_layers'
+   ```
+   export CAFFE2FLUID_CUSTOM_LAYERS=/path/to/caffe2fluid/kaffe
+   ```
+6. Use the converted model when loading model in 'xxxnet.py' and 'xxxnet.npy'(no need if model is already in 'fluid/model' and 'fluid/params')
 ### Tested models
- Lenet on mnist dataset
+- Lenet:
+[model addr](https://github.com/ethereon/caffe-tensorflow/blob/master/examples/mnist)
 - ResNets:(ResNet-50, ResNet-101, ResNet-152)
-    model addr: `https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777`_
+[model addr](https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777)
 - GoogleNet:
-    model addr: `https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034`_
+[model addr](https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034)
 - VGG:
-    model addr: `https://gist.github.com/ksimonyan/211839e770f7b538e2d8`_
+[model addr](https://gist.github.com/ksimonyan/211839e770f7b538e2d8)
 - AlexNet:
-    model addr: `https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet`_
+[model addr](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet)
 ### Notes
-Some of this code come from here: https://github.com/ethereon/caffe-tensorflow
+Some of this code come from here: [caffe-tensorflow](https://github.com/ethereon/caffe-tensorflow)
--- a/fluid/image_classification/caffe2fluid/convert.py
+++ b/fluid/image_classification/caffe2fluid/convert.py
@@ -43,11 +43,17 @@ def convert(def_path, caffemodel_path, data_output_path, code_output_path,
            print_stderr('Saving source...')
            with open(code_output_path, 'wb') as src_out:
                src_out.write(transformer.transform_source())
+        print_stderr('set env variable before using converted model '\
+                'if used custom_layers:')
+        custom_pk_path = os.path.dirname(os.path.abspath(__file__))
+        custom_pk_path = os.path.join(custom_pk_path, 'kaffe')
+        print_stderr('export CAFFE2FLUID_CUSTOM_LAYERS=%s' % (custom_pk_path))
        print_stderr('Done.')
+        return 0
    except KaffeError as err:
        fatal_error('Error encountered: {}'.format(err))
-    return 0
+    return 1
 def main():

--- a/fluid/image_classification/caffe2fluid/examples/imagenet/README.md
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/README.md
-a demo to show converting caffe models on 'imagenet' using caffe2fluid
+A demo to show converting caffe models on 'imagenet' using caffe2fluid
 ---
 # How to use
-1. prepare python environment
+1. Prepare python environment
-2. download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
-3. run the tool
+2. Download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
-    eg: bash ./run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
+3. Convert the Caffe model to Fluid model
+    - generate fluid code and weight file
+    <pre><code>python convert.py alexnet.prototxt \
+        --caffemodel alexnet.caffemodel \
+        --data-output-path alexnet.npy \
+        --code-output-path alexnet.py
+    </code></pre>
+    - save weights as fluid model file
+    <pre><code>python alexnet.py alexnet.npy ./fluid_model
+    </code></pre>
+4. Do inference
+   <pre><code>python infer.py infer ./fluid_mode data/65.jpeg
+</code></pre>
+5. convert model and do inference together
+   <pre><code>bash ./run.sh alexnet ./models.caffe/alexnet ./models/alexnet
+</code></pre>
+    The Caffe model is stored in './models.caffe/alexnet/alexnet.prototxt|caffemodel'
+    and the Fluid model will be save in './models/alexnet/alexnet.py|npy'
+6. test the difference with caffe's results(need pycaffe installed)
+   <pre><code>bash ./diff.sh resnet
+</code></pre>
+Make sure your caffemodel stored in './models.caffe/resnet'.
+The results will be stored in './results/resnet.paddle|caffe'
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
+#!/usr/bin/python
+#
+#a tool to compare tensors in two files or two directories
+#
+import sys
+import os
+def walk_dir(rootdir):
+    for subdir, dirs, files in os.walk(rootdir):
+        for file in files:
+            yield file
+def calc_diff(f1, f2):
+    import numpy as np
+    d1 = np.load(f1).flatten()
+    d2 = np.load(f2).flatten()
+    d1_num = reduce(lambda x, y: x * y, d1.shape)
+    d2_num = reduce(lambda x, y: x * y, d2.shape)
+    if d1_num != d2_num:
+        print d1.shape
+        print d2.shape
+        assert (d1_num == d2_num), "their shape is not consistent"
+    try:
+        df = np.abs(d1 - d2)
+        max_df = np.max(df)
+        sq_df = np.mean(df * df)
+        return max_df, sq_df
+    except Exception as e:
+        return -1.0, -1.0
+def compare(path1, path2):
+    def diff(f1, f2):
+        max_df, sq_df = calc_diff(f1, f2)
+        print('compare %s <=> %s with result[max_df:%.4e, sq_df:%.4e]' %
+              (f1, f2, max_df, sq_df))
+        assert (max_df < 1e-5), \
+                'max_df is too large with value[%.6e]' % (max_df)
+        assert (sq_df < 1e-10), \
+                'sq_df is too large with value[%.6e]' % (sq_df)
+    if os.path.exists(path1) is False:
+        print('not found %s' % (path1))
+        return 1
+    elif os.path.exists(path2) is False:
+        print('not found %s' % (path2))
+        return 1
+    if path1.find('.npy') > 0 and path2.find('.npy') > 0:
+        diff(path1, path2)
+        return
+    for f in walk_dir(path2):
+        if f.find('.npy') < 0:
+            continue
+        f1 = os.path.join(path1, f)
+        f2 = os.path.join(path2, f)
+        diff(f1, f2)
+    print('all checking succeed to pass')
+    return 0
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        path1 = 'lenet.tf/results'
+        path2 = 'lenet.paddle/results'
+    elif len(sys.argv) == 3:
+        path1 = sys.argv[1]
+        path2 = sys.argv[2]
+    else:
+        print('usage:')
+        print(' %s [path1] [path2]' % (sys.argv[0]))
+        exit(1)
+    print('compare inner result in %s %s' % (path1, path2))
+    exit(compare(path1, path2))
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh
+#!/bin/bash
+#
+#function:
+#   a tool used to check the difference of models' results generated by caffe model and paddle model
+#
+#howto:
+#   bash diff.sh resnet50 #when this has been finished, you can get the difference in precision
+#
+#notes:
+#   0, in order to infer using caffe, we need pycaffe installed
+#   1, prepare your caffe model in 'models.caffe/', eg: 'model.caffe/resnet101/resnet101.[prototxt|caffemodel]'
+#   2, converted paddle model will be in 'models'
+#   3, results of layers will be stored in 'results/${model_name}.[paddle|caffe]'
+#   4, only the last layer will be checked by default
+model_name="resnet50"
+results_root="results/"
+if [[ -n $1 ]];then
+    if [ $1 = "-h" ];then
+        echo "usage:"
+        echo "  bash $0 [model_name]"
+        echo "  eg:bash $0 resnet50"
+        exit 0
+    fi
+    model_name=$1
+fi
+mkdir -p $results_root
+model_prototxt="models.caffe/$model_name/${model_name}.prototxt"
+model_caffemodel="models.caffe/${model_name}/${model_name}.caffemodel"
+#1, dump layers' results from paddle
+paddle_results="$results_root/${model_name}.paddle"
+rm -rf $paddle_results
+rm -rf "results.paddle"
+bash run.sh $model_name ./models.caffe/$model_name ./models/$model_name
+if [[ $? -ne 0 ]] || [[ ! -e "results.paddle" ]];then
+    echo "not found paddle's results, maybe failed to convert"
+    exit 1
+fi
+mv results.paddle $paddle_results
+#2, dump layers' results from caffe
+caffe_results="$results_root/${model_name}.caffe"
+rm -rf $caffe_results
+rm -rf "results.caffe"
+cfpython ./infer.py caffe $model_prototxt $model_caffemodel $paddle_results/data.npy
+if [[ $? -ne 0 ]] || [[ ! -e "results.caffe" ]];then
+    echo "not found caffe's results, maybe failed to do inference with caffe"
+    exit 1
+fi
+mv results.caffe $caffe_results
+#3, extract layer names
+cat $model_prototxt | grep name | perl -ne 'if(/^\s*name:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
+#4, compare one by one
+for i in $(cat ".layer_names" | tail -n1);do
+    echo "process $i"
+    python compare.py $caffe_results/${i}.npy $paddle_results/${i}.npy
+done
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
@@ -10,8 +10,11 @@ import os
 import sys
 import inspect
 import numpy as np
-import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
+def import_fluid():
+    import paddle.fluid as fluid
+    return fluid
 def load_data(imgfile, shape):
@@ -40,7 +43,7 @@ def build_model(net_file, net_name):
          (net_file, net_name))
    net_path = os.path.dirname(net_file)
-    module_name = os.path.basename(net_file).rstrip('.py')
+    module_name = os.path.splitext(os.path.basename(net_file))[0]
    if net_path not in sys.path:
        sys.path.insert(0, net_path)
@@ -48,23 +51,25 @@ def build_model(net_file, net_name):
        m = __import__(module_name, fromlist=[net_name])
        MyNet = getattr(m, net_name)
    except Exception as e:
-        print('failed to load module[%s]' % (module_name))
+        print('failed to load module[%s.%s]' % (module_name, net_name))
        print(e)
        return None
-    input_name = 'data'
+    fluid = import_fluid()
-    input_shape = MyNet.input_shapes()[input_name]
+    inputs_dict = MyNet.input_shapes()
-    images = fluid.layers.data(name='image', shape=input_shape, dtype='float32')
+    input_name = inputs_dict.keys()[0]
+    input_shape = inputs_dict[input_name]
+    images = fluid.layers.data(
+        name=input_name, shape=input_shape, dtype='float32')
    #label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    net = MyNet({input_name: images})
-    input_shape = MyNet.input_shapes()[input_name]
+    return net, inputs_dict
-    return net, input_shape
 def dump_results(results, names, root):
    if os.path.exists(root) is False:
-        os.path.mkdir(root)
+        os.mkdir(root)
    for i in range(len(names)):
        n = names[i]
@@ -73,23 +78,27 @@ def dump_results(results, names, root):
        np.save(filename + '.npy', res)
-def infer(net_file, net_name, model_file, imgfile, debug=False):
+def load_model(exe, place, net_file, net_name, net_weight, debug):
-    """ do inference using a model which consist 'xxx.py' and 'xxx.npy'
+    """ load model using xxxnet.py and xxxnet.npy
    """
+    fluid = import_fluid()
    #1, build model
-    net, input_shape = build_model(net_file, net_name)
+    net, input_map = build_model(net_file, net_name)
+    feed_names = input_map.keys()
+    feed_shapes = [v for k, v in input_map.items()]
    prediction = net.get_output()
    #2, load weights for this model
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
    startup_program = fluid.default_startup_program()
    exe.run(startup_program)
-    if model_file.find('.npy') > 0:
+    #place = fluid.CPUPlace()
-        net.load(data_path=model_file, exe=exe, place=place)
+    if net_weight.find('.npy') > 0:
+        net.load(data_path=net_weight, exe=exe, place=place)
    else:
-        net.load(data_path=model_file, exe=exe)
+        raise ValueError('not found weight file')
    #3, test this model
    test_program = fluid.default_main_program().clone()
@@ -103,18 +112,116 @@ def infer(net_file, net_name, model_file, imgfile, debug=False):
            fetch_list_var.append(v)
            fetch_list_name.append(k)
+    return {
+        'program': test_program,
+        'feed_names': feed_names,
+        'fetch_vars': fetch_list_var,
+        'fetch_names': fetch_list_name,
+        'feed_shapes': feed_shapes
+    }
+def get_shape(fluid, program, name):
+    for var in program.list_vars():
+        if var.name == 'data':
+            return list(var.shape[1:])
+    raise ValueError('not found shape for input layer[%s], '
+                     'you can specify by yourself' % (name))
+def load_inference_model(dirname, exe):
+    """ load fluid's inference model
+    """
+    fluid = import_fluid()
+    model_fn = 'model'
+    params_fn = 'params'
+    if os.path.exists(os.path.join(dirname, model_fn)) \
+            and os.path.exists(os.path.join(dirname, params_fn)):
+        program, feed_names, fetch_targets = fluid.io.load_inference_model(\
+                dirname, exe, model_fn, params_fn)
+    else:
+        raise ValueError('not found model files in direcotry[%s]' % (dirname))
+    #print fluid.global_scope().find_var(feed_names[0])
+    input_shape = get_shape(fluid, program, feed_names[0])
+    feed_shapes = [input_shape]
+    return program, feed_names, fetch_targets, feed_shapes
+def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
+    """ do inference using a model which consist 'xxx.py' and 'xxx.npy'
+    """
+    fluid = import_fluid()
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    try:
+        ret = load_inference_model(model_path, exe)
+        program, feed_names, fetch_targets, feed_shapes = ret
+        debug = False
+        print('found a inference model for fluid')
+    except ValueError as e:
+        print('try to load model using net file and weight file')
+        net_weight = model_path
+        ret = load_model(exe, place, net_file, net_name, net_weight, debug)
+        program = ret['program']
+        feed_names = ret['feed_names']
+        fetch_targets = ret['fetch_vars']
+        fetch_list_name = ret['fetch_names']
+        feed_shapes = ret['feed_shapes']
+    input_name = feed_names[0]
+    input_shape = feed_shapes[0]
    np_images = load_data(imgfile, input_shape)
-    results = exe.run(program=test_program,
+    results = exe.run(program=program,
-                      feed={'image': np_images},
+                      feed={input_name: np_images},
-                      fetch_list=fetch_list_var)
+                      fetch_list=fetch_targets)
    if debug is True:
-        dump_path = 'results.layers'
+        dump_path = 'results.paddle'
        dump_results(results, fetch_list_name, dump_path)
-        print('all results dumped to [%s]' % (dump_path))
+        print('all result of layers dumped to [%s]' % (dump_path))
    else:
        result = results[0]
-        print('predicted class:', np.argmax(result))
+        print('succeed infer with results[class:%d]' % (np.argmax(result)))
+    return 0
+def caffe_infer(prototxt, caffemodel, datafile):
+    """ do inference using pycaffe for debug,
+        all intermediate results will be dumpped to 'results.caffe'
+    """
+    import caffe
+    net = caffe.Net(prototxt, caffemodel, caffe.TEST)
+    input_layer = net.blobs.keys()[0]
+    print('got name of input layer is:%s' % (input_layer))
+    input_shape = list(net.blobs[input_layer].data.shape[1:])
+    if '.npy' in datafile:
+        np_images = np.load(datafile)
+    else:
+        np_images = load_data(datafile, input_shape)
+    inputs = {input_layer: np_images}
+    net.forward_all(**inputs)
+    results = []
+    names = []
+    for k, v in net.blobs.items():
+        k = k.rstrip('_output')
+        k = k.replace('/', '_')
+        names.append(k)
+        results.append(v.data.copy())
+    dump_path = 'results.caffe'
+    dump_results(results, names, dump_path)
+    print('all result of layers dumped to [%s]' % (dump_path))
+    return 0
 if __name__ == "__main__":
@@ -122,21 +229,50 @@ if __name__ == "__main__":
    """
    net_file = 'models/resnet50/resnet50.py'
    weight_file = 'models/resnet50/resnet50.npy'
-    imgfile = 'data/65.jpeg'
+    datafile = 'data/65.jpeg'
    net_name = 'ResNet50'
+    model_file = 'models/resnet50/fluid'
+    ret = None
+    if len(sys.argv) <= 2:
+        pass
+    elif sys.argv[1] == 'caffe':
+        if len(sys.argv) != 5:
+            print('usage:')
+            print('\tpython %s caffe [prototxt] [caffemodel] [datafile]' %
+                  (sys.argv[0]))
+            sys.exit(1)
+        prototxt = sys.argv[2]
+        caffemodel = sys.argv[3]
+        datafile = sys.argv[4]
+        ret = caffe_infer(prototxt, caffemodel, datafile)
+    elif sys.argv[1] == 'infer':
+        if len(sys.argv) != 4:
+            print('usage:')
+            print('\tpython %s infer [fluid_model] [datafile]' % (sys.argv[0]))
+            sys.exit(1)
+        model_path = sys.argv[2]
+        datafile = sys.argv[3]
+        ret = infer(model_path, datafile)
+    elif sys.argv[1] == 'dump':
+        if len(sys.argv) != 6:
+            print('usage:')
+            print('\tpython %s dump [net_file] [weight_file] [datafile] [net_name]' \
+                    % (sys.argv[0]))
+            print('\teg:python dump %s %s %s %s %s' % (sys.argv[0],\
+                net_file, weight_file, datafile, net_name))
+            sys.exit(1)
+        net_file = sys.argv[2]
+        weight_file = sys.argv[3]
+        datafile = sys.argv[4]
+        net_name = sys.argv[5]
+        ret = infer(weight_file, datafile, net_file, net_name)
-    argc = len(sys.argv)
+    if ret is None:
-    if argc == 5:
-        net_file = sys.argv[1]
-        weight_file = sys.argv[2]
-        imgfile = sys.argv[3]
-        net_name = sys.argv[4]
-    elif argc > 1:
        print('usage:')
-        print('\tpython %s [net_file] [weight_file] [imgfile] [net_name]' %
+        print(' python %s [infer] [fluid_model] [imgfile]' % (sys.argv[0]))
-              (sys.argv[0]))
+        print(' eg:python %s infer %s %s' % (sys.argv[0], model_file, datafile))
-        print('\teg:python %s %s %s %s %s' % (sys.argv[0], net_file,
-                                              weight_file, imgfile, net_name))
        sys.exit(1)
-    infer(net_file, net_name, weight_file, imgfile)
+    sys.exit(ret)
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
@@ -3,7 +3,7 @@
 #function:
 #   a tool used to:
 #       1, convert a caffe model
-#       2, do inference using this model
+#       2, do inference(only in fluid) using this model
 #
 #usage:
 #   bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
@@ -65,8 +65,13 @@ if [[ -z $only_convert ]];then
        PYTHON=`which python`
    fi
    imgfile="data/65.jpeg"
-    net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/\"([^\"]+)\"/){ print $1."\n";}'`
+    #FIX ME:
-    $PYTHON ./infer.py $net_file $weight_file $imgfile $net_name
+    #   only look the first line in prototxt file for the name of this network, maybe not correct
+    net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/^name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'`
+    if [[ -z $net_name ]];then
+        net_name="MyNet"
+    fi
+    $PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name
    ret=$?
 fi
 exit $ret
--- a/fluid/image_classification/caffe2fluid/examples/mnist/evaluate.py
+++ b/fluid/image_classification/caffe2fluid/examples/mnist/evaluate.py
@@ -7,8 +7,8 @@
 import sys
 import os
 import numpy as np
+import paddle.fluid as fluid
 import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
 def test_model(exe, test_program, fetch_list, test_reader, feeder):
@@ -34,9 +34,6 @@ def evaluate(net_file, model_file):
    from lenet import LeNet as MyNet
-    with_gpu = False
-    paddle.init(use_gpu=with_gpu)
    #1, define network topology
    images = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
@@ -45,7 +42,7 @@ def evaluate(net_file, model_file):
    prediction = net.layers['prob']
    acc = fluid.layers.accuracy(input=prediction, label=label)
-    place = fluid.CUDAPlace(0) if with_gpu is True else fluid.CPUPlace()
+    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

--- a/fluid/image_classification/caffe2fluid/examples/mnist/run.sh
+++ b/fluid/image_classification/caffe2fluid/examples/mnist/run.sh
--- a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/__init__.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/__init__.py
+"""
+"""
+from .register import get_registered_layers
+#custom layer import begins
+import axpy
+import flatten
+import argmax
+#custom layer import ends
+custom_layers = get_registered_layers()
+def set_args(f, params):
+    """ set args for function 'f' using the parameters in node.layer.parameters
+    Args:
+        f (function): a python function object
+        params (object): a object contains attributes needed by f's arguments
+    Returns:
+        arg_names (list): a list of argument names
+        kwargs (dict): a dict contains needed arguments
+    """
+    argc = f.__code__.co_argcount
+    arg_list = f.__code__.co_varnames[0:argc]
+    kwargs = {}
+    for arg_name in arg_list:
+        try:
+            v = getattr(params, arg_name, None)
+        except Exception as e:
+            #maybe failed to extract caffe's parameters
+            v = None
+        if v is not None:
+            kwargs[arg_name] = v
+    return arg_list, kwargs
+def has_layer(kind):
+    """ test whether this layer exists in custom layer
+    """
+    return kind in custom_layers
+def compute_output_shape(kind, node):
+    assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
+        kind)
+    shape_func = custom_layers[kind]['shape']
+    parents = node.parents
+    inputs = [list(p.output_shape) for p in parents]
+    arg_names, kwargs = set_args(shape_func, node.layer.parameters)
+    if len(inputs) == 1:
+        inputs = inputs[0]
+    return shape_func(inputs, **kwargs)
+def make_node(template, kind, node):
+    """ make a TensorFlowNode for custom layer which means construct
+        a piece of code to define a layer implemented in 'custom_layers'
+    Args:
+        @template (TensorFlowNode): a factory to new a instance of TensorFLowNode
+        @kind (str): type of custom layer
+        @node (graph.Node): a layer in the net
+    Returns:
+        instance of TensorFlowNode
+    """
+    assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
+        kind)
+    layer_func = custom_layers[kind]['layer']
+    #construct arguments needed by custom layer function from node's parameters
+    arg_names, kwargs = set_args(layer_func, node.layer.parameters)
+    return template('custom_layer', kind, **kwargs)
+def make_custom_layer(kind, inputs, name, *args, **kwargs):
+    """ execute a custom layer which is implemented by users
+    Args:
+        @kind (str): type name of this layer
+        @inputs (vars): variable list created by fluid
+        @namme (str): name for this layer
+        @args (tuple): other positional arguments
+        @kwargs (dict): other kv arguments
+    Returns:
+        output (var): output variable for this layer
+    """
+    assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
+        kind)
+    layer_func = custom_layers[kind]['layer']
+    return layer_func(inputs, name, *args, **kwargs)
--- a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/argmax.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/argmax.py
+""" a custom layer for 'argmax', maybe we should implement this in standard way.
+    more info can be found here: http://caffe.berkeleyvision.org/tutorial/layers/argmax.html
+"""
+from .register import register
+def import_fluid():
+    import paddle.fluid as fluid
+    return fluid
+def argmax_shape(input_shape, out_max_val=False, top_k=1, axis=-1):
+    """ calculate the output shape of this layer using input shape
+    Args:
+        @input_shape (list of num): a list of number which represents the input shape
+        @out_max_val (bool): parameter from caffe's ArgMax layer
+        @top_k (int): parameter from caffe's ArgMax layer
+        @axis (int): parameter from caffe's ArgMax layer
+    Returns:
+        @output_shape (list of num): a list of numbers represent the output shape
+    """
+    input_shape = list(input_shape)
+    if axis < 0:
+        axis += len(input_shape)
+    assert (axis + 1 == len(input_shape)
+            ), 'only can be applied on the last dimension[axis:%d, %s] now,'\
+                    'make sure you have set axis param in xxx.prototxt file' \
+                    % (axis, str(input_shape))
+    output_shape = input_shape
+    output_shape[-1] = top_k
+    if out_max_val is True:
+        output_shape[-1] *= 2
+    return output_shape
+def argmax_layer(input, name, out_max_val=False, top_k=1, axis=-1):
+    """ build a layer of type 'ArgMax' using fluid
+    Args:
+        @input (variable): input fluid variable for this layer
+        @name (str): name for this layer
+        @out_max_val (bool): parameter from caffe's ArgMax layer
+        @top_k (int): parameter from caffe's ArgMax layer
+        @axis (int): parameter from caffe's ArgMax layer
+    Returns:
+        output (variable): output variable for this layer
+    """
+    fluid = import_fluid()
+    if axis < 0:
+        axis += len(input.shape)
+    topk_var, index_var = fluid.layers.topk(input=input, k=top_k)
+    if out_max_val is True:
+        index_var = fluid.layers.cast(index_var, dtype=topk_var.dtype)
+        output = fluid.layers.concat([index_var, topk_var], axis=axis)
+    else:
+        output = index_var
+    return output
+register(kind='ArgMax', shape=argmax_shape, layer=argmax_layer)
--- a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/axpy.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/axpy.py
+""" A custom layer for 'axpy' which receives 3 tensors and output 1 tensor.
+    the function performed is:(the mupltiplication and add are elementewise)
+        output = inputs[0] * inputs[1] + inputs[2]
+"""
+from .register import register
+def axpy_shape(input_shapes):
+    """ calculate the output shape of this layer using input shapes
+    Args:
+        @input_shapes (list of tuples): a list of input shapes
+    Returns:
+        @output_shape (list of num): a list of numbers represent the output shape
+    """
+    assert len(input_shapes) == 3, "not valid input shape for axpy layer"
+    assert len(input_shapes[0]) == len(input_shapes[1]), 'should have same dims'
+    output_shape = input_shapes[1]
+    assert (input_shapes[2] == output_shape),\
+            "shape not consistent for axpy[%s <--> %s]" \
+            % (str(output_shape), str(input_shapes[2]))
+    return output_shape
+def axpy_layer(inputs, name):
+    """ build a layer of type 'Axpy' using fluid
+    Args:
+        @inputs (list of variables): input fluid variables for this layer
+        @name (str): name for this layer
+    Returns:
+        output (variable): output variable for this layer
+    """
+    import paddle.fluid as fluid
+    assert len(inputs) == 3, "invalid inputs for axpy[%s]" % (name)
+    alpha = inputs[0]
+    x = inputs[1]
+    y = inputs[2]
+    output = fluid.layers.elementwise_mul(x, alpha, axis=0)
+    output = fluid.layers.elementwise_add(output, y)
+    return output
+register(kind='Axpy', shape=axpy_shape, layer=axpy_layer)
--- a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/flatten.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/flatten.py
+""" a custom layer for 'flatten', maybe we should implement this in standard way.
+    more info can be found here: http://caffe.berkeleyvision.org/tutorial/layers/flatten.html
+"""
+from .register import register
+def import_fluid():
+    import paddle.fluid as fluid
+    return fluid
+def flatten_shape(input_shape, axis=1, end_axis=-1):
+    """ calculate the output shape of this layer using input shape
+    Args:
+        @input_shape (list of num): a list of number which represents the input shape
+        @axis (int): parameter from caffe's Flatten layer
+        @end_axis (int): parameter from caffe's Flatten layer
+    Returns:
+        @output_shape (list of num): a list of numbers represent the output shape
+    """
+    start_axis = axis
+    end_axis = end_axis
+    input_shape = list(input_shape)
+    if start_axis < 0:
+        start_axis += len(input_shape)
+    if end_axis < 0:
+        end_axis += len(input_shape)
+    assert start_axis <= end_axis, 'invalid axis[%d] or end_axis[%d] params'\
+            % (start_axis, end_axis)
+    output_shape = input_shape[0:start_axis]
+    flat_sz = reduce(lambda a, b: a * b, input_shape[start_axis:end_axis])
+    output_shape += [flat_sz]
+    output_shape += input_shape[end_axis:-1]
+    return output_shape
+def flatten_layer(input, name, axis=1, end_axis=-1):
+    """ build a layer of type 'Flatten' using fluid
+    Args:
+        @input (variable): input fluid variable for this layer
+        @name (str): name for this layer
+        @axis (int): parameter from caffe's Flatten layer
+        @end_axis (int): parameter from caffe's Flatten layer
+    Returns:
+        output (variable): output variable for this layer
+    """
+    fluid = import_fluid()
+    input_shape = list(input.shape)
+    dims = len(input_shape)
+    start_axis = axis if axis >= 0 else axis + dims
+    end_axis = end_axis if end_axis >= 0 else end_axis + dims
+    assert start_axis <= end_axis, 'invalid axis or end_axis params'
+    output_shape = input_shape[0:start_axis]
+    flat_sz = reduce(lambda a, b: a * b, input_shape[start_axis:end_axis])
+    output_shape += [flat_sz]
+    output_shape += input_shape[end_axis:-1]
+    output = fluid.layers.reshape(input, shape=output_shape, name=name)
+    return output
+register(kind='Flatten', shape=flatten_shape, layer=flatten_layer)
--- a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/register.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/register.py
+""" this module provides 'register' for registering customized layers
+"""
+g_custom_layers = {}
+def register(kind, shape, layer):
+    """ register a custom layer or a list of custom layers
+    Args:
+        @kind (str or list): type name of the layer
+        @shape (function): a function to generate the shape of layer's output
+        @layer (function): a function to generate the shape of layer's output
+    Returns:
+        None
+    """
+    assert type(shape).__name__ == 'function', 'shape should be a function'
+    assert type(layer).__name__ == 'function', 'layer should be a function'
+    if type(kind) is str:
+        kind = [kind]
+    else:
+        assert type(
+            kind) is list, 'invalid param "kind" for register, not a list or str'
+    for k in kind:
+        assert type(
+            k) is str, 'invalid param "kind" for register, not a list of str'
+        assert k not in g_custom_layers, 'this type[%s] has already been registered' % (
+            k)
+        print('register layer[%s]' % (k))
+        g_custom_layers[k] = {'shape': shape, 'layer': layer}
+def get_registered_layers():
+    return g_custom_layers
--- a/fluid/image_classification/caffe2fluid/kaffe/graph.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/graph.py
@@ -3,7 +3,7 @@ from google.protobuf import text_format
 from .caffe import get_caffe_resolver
 from .errors import KaffeError, print_stderr
 from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch
-from .shapes import TensorShape
+from .shapes import make_tensor
 class Node(object):
@@ -52,7 +52,10 @@ class Graph(object):
    def __init__(self, nodes=None, name=None):
        self.nodes = nodes or []
        self.node_lut = {node.name: node for node in self.nodes}
-        self.name = name
+        if name is None or name == '':
+            self.name = 'MyNet'
+        else:
+            self.name = name
    def add_node(self, node):
        self.nodes.append(node)
@@ -95,7 +98,7 @@ class Graph(object):
    def compute_output_shapes(self):
        sorted_nodes = self.topologically_sorted()
        for node in sorted_nodes:
-            node.output_shape = TensorShape(
+            node.output_shape = make_tensor(
                *NodeKind.compute_output_shape(node))
    def replaced(self, new_nodes):
@@ -108,6 +111,7 @@ class Graph(object):
            if graph is None:
                raise KaffeError('Transformer failed: {}'.format(transformer))
            assert isinstance(graph, Graph)
        return graph
    def __contains__(self, key):
@@ -120,10 +124,18 @@ class Graph(object):
        for node in self.topologically_sorted():
            # If the node has learned parameters, display the first one's shape.
            # In case of convolutions, this corresponds to the weights.
-            data_shape = node.data[0].shape if node.data else '--'
+            if node.data is None:
-            out_shape = node.output_shape or '--'
+                data_shape = '--'
-            s.append('{:<20} {:<30} {:>20} {:>20}'.format(
+                out_shape = node.output_shape or '--'
-                node.kind, node.name, data_shape, tuple(out_shape)))
+                s.append('{:<20} {:<30} {:>20} {:>20}'.format(
+                    node.kind, node.name, data_shape, tuple(out_shape)))
+            else:
+                for d in node.data:
+                    #data_shape = node.data[0].shape if node.data else '--'
+                    data_shape = d.shape
+                    out_shape = node.output_shape or '--'
+                    s.append('{:<20} {:<30} {:>20} {:>20}'.format(
+                        node.kind, node.name, data_shape, tuple(out_shape)))
        return '\n'.join(s)
@@ -234,6 +246,7 @@ class GraphBuilder(object):
                if (parent_node is None) or (parent_node == node):
                    parent_node = graph.get_node(input_name)
                node.add_parent(parent_node)
            if len(layer.top) > 1:
                raise KaffeError('Multiple top nodes are not supported.')

--- a/fluid/image_classification/caffe2fluid/kaffe/layers.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/layers.py
@@ -2,6 +2,7 @@ import re
 import numbers
 from collections import namedtuple
+import custom_layers
 from .shapes import *
 LAYER_DESCRIPTORS = {
@@ -116,6 +117,9 @@ def get_v1_layer_map():
 class NodeKind(LayerType):
    @staticmethod
    def map_raw_kind(kind):
+        if custom_layers.has_layer(kind):
+            return kind
        if kind in LAYER_TYPES:
            return kind
@@ -127,6 +131,9 @@ class NodeKind(LayerType):
    @staticmethod
    def compute_output_shape(node):
+        if custom_layers.has_layer(node.kind):
+            return custom_layers.compute_output_shape(node.kind, node)
        try:
            val = LAYER_DESCRIPTORS[node.kind](node)
            return val
@@ -137,14 +144,13 @@ class NodeKind(LayerType):
 class NodeDispatchError(KaffeError):
    pass
 class NodeDispatch(object):
    @staticmethod
    def get_handler_name(node_kind):
-        if len(node_kind) <= 4:
+        if len(node_kind) <= 6:
            # A catch-all for things like ReLU and tanh
            return node_kind.lower()
        # Convert from CamelCase to under_scored
@@ -152,6 +158,9 @@ class NodeDispatch(object):
        return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
    def get_handler(self, node_kind, prefix):
+        if custom_layers.has_layer(node_kind):
+            return getattr(self, 'map_custom')
        name = self.get_handler_name(node_kind)
        name = '_'.join((prefix, name))
        try:
@@ -174,8 +183,10 @@ class LayerAdapter(object):
        try:
            return getattr(self.layer, name)
        except AttributeError:
+            print(dir(self.layer))
            raise NodeDispatchError(
-                'Caffe parameters not found for layer kind: %s' % (self.kind))
+                'Caffe parameters not found attr[%s] for layer kind[%s]' %
+                (name, self.kind))
    @staticmethod
    def get_kernel_value(scalar, repeated, idx, default=None):

--- a/fluid/image_classification/caffe2fluid/kaffe/net_template.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/net_template.py
+""" this module is used as a template for generating sub class of Network
+"""
+class MyNet(object):
+    ### automatically generated by caffe2fluid ###
+    inputs_info = "INPUTS_INFO"
+    custom_layers_path = "CAFFE2FLUID_CUSTOM_LAYERS"
+    def custom_layer_factory(self):
+        import os
+        pk_paths = []
+        default = os.path.dirname(os.path.abspath(__file__))
+        location = os.environ.get('CAFFE2FLUID_CUSTOM_LAYERS', default)
+        pk_name = 'custom_layers'
+        pk_dir = os.path.join(location, pk_name)
+        pk_paths.append((location, pk_dir))
+        location = MyNet.custom_layers_path
+        pk_dir = os.path.join(MyNet.custom_layers_path, pk_name)
+        pk_paths.append((location, pk_dir))
+        for loc, pk_dir in pk_paths:
+            if os.path.exists(pk_dir):
+                if loc not in sys.path:
+                    sys.path.insert(0, loc)
+                    break
+        try:
+            from custom_layers import make_custom_layer
+            return make_custom_layer
+        except Exception as e:
+            print('maybe you should set $CAFFE2FLUID_CUSTOM_LAYERS first')
+            raise e
+    @classmethod
+    def input_shapes(cls):
+        return cls.inputs_info
+    @classmethod
+    def convert(cls, npy_model, fluid_path, outputs=None):
+        fluid = import_fluid()
+        shapes = cls.input_shapes()
+        input_name = shapes.keys()[0]
+        feed_data = {}
+        for name, shape in shapes.items():
+            data_layer = fluid.layers.data(
+                name=name, shape=shape, dtype="float32")
+            feed_data[name] = data_layer
+        net = cls(feed_data)
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        exe.run(fluid.default_startup_program())
+        net.load(data_path=npy_model, exe=exe, place=place)
+        output_vars = []
+        if outputs is None:
+            output_vars.append(net.get_output())
+        else:
+            if type(outputs) is list:
+                for n in outputs:
+                    assert n in net.layers, 'not found layer with this name[%s]' % (
+                        n)
+                    output_vars.append(net.layers[n])
+        fluid.io.save_inference_model(
+            fluid_path, [input_name],
+            output_vars,
+            exe,
+            main_program=None,
+            model_filename='model',
+            params_filename='params')
+        return 0
+def main():
+    """ a tool used to convert caffe model to fluid
+    """
+    import sys
+    import os
+    filename = os.path.splitext(os.path.basename(sys.argv[0]))[0]
+    if len(sys.argv) < 3:
+        print('usage:')
+        print('	python %s %s.npy [save_dir] [layer names seperated by comma]' \
+                % (sys.argv[0], filename))
+        print('	eg: python %s %s.npy ./fluid' % (sys.argv[0], filename))
+        print('	eg: python %s %s.npy ./fluid layer_name1,layer_name2' \
+                % (sys.argv[0], filename))
+        return 1
+    npy_weight = sys.argv[1]
+    fluid_model = sys.argv[2]
+    outputs = None
+    if len(sys.argv) >= 4:
+        outputs = sys.argv[3].split(',')
+    ret = MyNet.convert(npy_weight, fluid_model, outputs)
+    if ret == 0:
+        outputs = 'last output layer' if outputs is None else outputs
+        print('succeed to convert to fluid format with output layers[%s]'
+              ' in directory[%s]' % (outputs, fluid_model))
+    else:
+        print('failed to convert model to fluid format')
+    return ret
+def generate_net_code(net_name, inputs_info):
+    """ generate framework of a custom net code which represent a subclass of Network
+    Args:
+        @net_name (str): class name for this net
+        @inputs_info (str): a str which represents a dict,  eg: '{"data": [3, 32, 32]}'
+    Returns:
+        net_codes (str): codes for this subclass
+    """
+    import os
+    import inspect
+    net_codes = str(inspect.getsource(MyNet))
+    net_codes = net_codes.replace('MyNet(object)', '%s(Network)' % net_name)
+    net_codes = net_codes.replace('"INPUTS_INFO"', inputs_info)
+    custom_layer_dir = os.path.dirname(os.path.abspath(__file__))
+    net_codes = net_codes.replace('CAFFE2FLUID_CUSTOM_LAYERS', custom_layer_dir)
+    return net_codes
+def generate_main_code(net_name):
+    """ generate a piece of code for 'main' function
+    Args:
+        @net_name (str): class name for this net
+    Returns:
+        main_codes (str): codes for this main function
+    """
+    import inspect
+    main_codes = str(inspect.getsource(main))
+    main_codes = main_codes.replace('MyNet', net_name)
+    return main_codes
+if __name__ == "__main__":
+    """ just for testing
+    """
+    print generate_net_code('Attribute', "{'data': [3, 277, 277]}")
+    print generate_main_code('Attribute')
--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
-import math
+import sys
 import os
+import math
 import numpy as np
 def import_fluid():
-    import paddle.v2.fluid as fluid
+    import paddle.fluid as fluid
    return fluid
@@ -64,7 +65,7 @@ class Network(object):
        if os.path.isdir(data_path):
            assert (exe is not None), \
                'must provide a executor to load fluid model'
-            fluid.io.load_persistables_if_exist(executor=exe, dirname=data_path)
+            fluid.io.load_persistables(executor=exe, dirname=data_path)
            return True
        #load model from a npy file
@@ -161,57 +162,53 @@ class Network(object):
        output = fluid.layers.relu(x=input)
        return output
-    def _adjust_pad_if_needed(self, i_hw, k_hw, s_hw, p_hw):
+    def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
-        #adjust the padding if needed
+             name):
-        i_h, i_w = i_hw
-        k_h, k_w = k_hw
-        s_h, s_w = s_hw
-        p_h, p_w = p_hw
-        def is_consistent(i, k, s, p):
-            o = i + 2 * p - k
-            if o % s == 0:
-                return True
-            else:
-                return False
-        real_p_h = 0
-        real_p_w = 0
-        if is_consistent(i_h, k_h, s_h, p_h) is False:
-            real_p_h = int(k_h / 2)
-        if is_consistent(i_w, k_w, s_w, p_w) is False:
-            real_p_w = int(k_w / 2)
-        return [real_p_h, real_p_w]
-    def pool(self, pool_type, input, k_h, k_w, s_h, s_w, name, padding):
        # Get the number of channels in the input
        in_hw = input.shape[2:]
        k_hw = [k_h, k_w]
        s_hw = [s_h, s_w]
-        if padding is None:
-            #fix bug about the difference between conv and pool
-            #more info: https://github.com/BVLC/caffe/issues/1318
-            padding = self._adjust_pad_if_needed(in_hw, k_hw, s_hw, [0, 0])
        fluid = import_fluid()
        output = fluid.layers.pool2d(
            input=input,
            pool_size=k_hw,
            pool_stride=s_hw,
            pool_padding=padding,
+            ceil_mode=ceil_mode,
            pool_type=pool_type)
        return output
    @layer
-    def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
+    def max_pool(self,
-        return self.pool('max', input, k_h, k_w, s_h, s_w, name, padding)
+                 input,
+                 k_h,
+                 k_w,
+                 s_h,
+                 s_w,
+                 ceil_mode,
+                 padding=[0, 0],
+                 name=None):
+        return self.pool('max', input, k_h, k_w, s_h, s_w, ceil_mode, padding,
+                         name)
+    @layer
+    def avg_pool(self,
+                 input,
+                 k_h,
+                 k_w,
+                 s_h,
+                 s_w,
+                 ceil_mode,
+                 padding=[0, 0],
+                 name=None):
+        return self.pool('avg', input, k_h, k_w, s_h, s_w, ceil_mode, padding,
+                         name)
    @layer
-    def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
+    def sigmoid(self, input, name):
-        return self.pool('avg', input, k_h, k_w, s_h, s_w, name, padding)
+        fluid = import_fluid()
+        return fluid.layers.sigmoid(input)
    @layer
    def lrn(self, input, radius, alpha, beta, name, bias=1.0):
@@ -258,7 +255,12 @@ class Network(object):
        return output
    @layer
-    def batch_normalization(self, input, name, scale_offset=True, relu=False):
+    def batch_normalization(self,
+                            input,
+                            name,
+                            scale_offset=True,
+                            eps=1e-5,
+                            relu=False):
        # NOTE: Currently, only inference is supported
        fluid = import_fluid()
        prefix = name + '_'
@@ -276,7 +278,7 @@ class Network(object):
            bias_attr=bias_attr,
            moving_mean_name=mean_name,
            moving_variance_name=variance_name,
-            epsilon=1e-5,
+            epsilon=eps,
            act='relu' if relu is True else None)
        return output
@@ -287,3 +289,16 @@ class Network(object):
        output = fluid.layers.dropout(
            input, dropout_prob=drop_prob, is_test=is_test, name=name)
        return output
+    def custom_layer_factory(self):
+        """ get a custom layer maker provided by subclass
+        """
+        raise NotImplementedError(
+            '[custom_layer_factory] must be implemented by the subclass.')
+    @layer
+    def custom_layer(self, inputs, kind, name, *args, **kwargs):
+        """ make custom layer
+        """
+        layer_factory = self.custom_layer_factory()
+        return layer_factory(kind, inputs, name, *args, **kwargs)
--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
@@ -109,9 +109,17 @@ class TensorFlowMapper(NodeMapper):
            # Stochastic pooling, for instance.
            raise KaffeError('Unsupported pooling type.')
        (kernel_params, padding) = self.get_kernel_params(node)
+        ceil_mode = getattr(node.layer.parameters, 'ceil_mode', True)
        return TensorFlowNode(pool_op, kernel_params.kernel_h,
                              kernel_params.kernel_w, kernel_params.stride_h,
-                              kernel_params.stride_w, **padding)
+                              kernel_params.stride_w, ceil_mode, **padding)
+    def map_sigmoid(self, node):
+        return TensorFlowNode('sigmoid')
+    def map_custom(self, node):
+        from .. import custom_layers
+        return custom_layers.make_node(TensorFlowNode, node.kind, node)
    def map_inner_product(self, node):
        #TODO: Axis
@@ -142,7 +150,13 @@ class TensorFlowMapper(NodeMapper):
    def map_batch_norm(self, node):
        scale_offset = len(node.data) == 4
-        kwargs = {} if scale_offset else {'scale_offset': False}
+        #this default value comes from caffe's param in batch_norm
+        default_eps = 1e-5
+        kwargs = {'scale_offset': scale_offset}
+        if node.parameters.eps != default_eps:
+            kwargs['eps'] = node.parameters.eps
        return MaybeActivated(
            node, default=False)('batch_normalization', **kwargs)
@@ -184,18 +198,10 @@ class TensorFlowEmitter(object):
        codes.append(network_source + '\n')
        return self.statement('\n'.join(codes))
-    def emit_class_def(self, name):
-        return self.statement('class %s(Network):' % (name))
    def emit_setup_def(self):
        return self.statement('def setup(self):')
-    def emit_shape_def(self, input_nodes):
+    def get_inputs_info(self, input_nodes):
-        self.outdent()
-        func_def = self.statement('@classmethod')
-        func_def += self.statement('def input_shapes(cls):')
-        self.indent()
        input_shapes = {}
        for n in input_nodes:
            name = n.name
@@ -204,42 +210,7 @@ class TensorFlowEmitter(object):
            input_shapes[name] = ', '.join(shape)
        input_shapes = ['"%s": [%s]' % (n, l) for n, l in input_shapes.items()]
        shape_str = ','.join(input_shapes)
-        func_def += self.statement('return {%s}' % (shape_str))
+        return '{%s}' % (shape_str)
-        return '\n\n' + func_def
-    def emit_convert_def(self, input_nodes):
-        codes = []
-        inputs = {}
-        codes.append('shapes = cls.input_shapes()')
-        for n in input_nodes:
-            name = n.name
-            layer_var = name + '_layer'
-            layer_def = '%s = fluid.layers.data(name="%s", shape=shapes["%s"],'\
-                    ' dtype="float32")' % (layer_var, name, name)
-            #layer_var, layer_def = data_layer_def(n.name, n.output_shape)
-            codes.append(layer_def)
-            inputs[name] = layer_var
-        input_dict = ','.join(['"%s": %s' % (n, l) for n, l in inputs.items()])
-        codes.append('feed_data = {' + input_dict + '}')
-        codes.append('net = cls(feed_data)')
-        codes.append("place = fluid.CPUPlace()")
-        codes.append("exe = fluid.Executor(place)")
-        codes.append("exe.run(fluid.default_startup_program())")
-        codes.append("net.load(data_path=npy_model, exe=exe, place=place)")
-        codes.append(
-            "fluid.io.save_persistables(executor=exe, dirname=fluid_path)")
-        self.outdent()
-        func_def = self.statement('@classmethod')
-        func_def += self.statement('def convert(cls, npy_model, fluid_path):')
-        self.indent()
-        func_def += self.statement('import paddle.v2.fluid as fluid')
-        for l in codes:
-            func_def += self.statement(l)
-        return '\n' + func_def
    def emit_main_def(self, name):
        if name is None:
@@ -248,13 +219,7 @@ class TensorFlowEmitter(object):
        self.prefix = ''
        main_def = self.statement('if __name__ == "__main__":')
        self.indent()
-        main_def += self.statement("#usage: python xxxnet.py xxx.npy ./model\n")
+        main_def += self.statement('exit(main())')
-        main_def += self.statement("import sys")
-        main_def += self.statement("npy_weight = sys.argv[1]")
-        main_def += self.statement("fluid_model = sys.argv[2]")
-        main_def += self.statement("%s.convert(npy_weight, fluid_model)" %
-                                   (name))
-        main_def += self.statement("exit(0)")
        return '\n\n' + main_def
    def emit_parents(self, chain):
@@ -269,10 +234,17 @@ class TensorFlowEmitter(object):
        return self.statement('self.' + node.emit())
    def emit(self, name, chains, input_nodes=None):
+        from ..net_template import generate_net_code
+        from ..net_template import generate_main_code
        self.net_name = name
+        inputs_info = self.get_inputs_info(input_nodes)
        s = self.emit_imports()
-        s += self.emit_class_def(name)
+        s += generate_net_code(name, inputs_info) + '\n'
        self.indent()
+        # define the net using api
        s += self.emit_setup_def()
        self.indent()
        blocks = []
@@ -283,8 +255,9 @@ class TensorFlowEmitter(object):
                b += self.emit_node(node)
            blocks.append(b[:-1])
        s = s + '\n\n'.join(blocks)
-        s += self.emit_shape_def(input_nodes)
-        s += self.emit_convert_def(input_nodes)
+        # define the main function
+        s += '\n\n\n' + generate_main_code(name)
        s += self.emit_main_def(name)
        return s
@@ -323,6 +296,7 @@ class Transformer(object):
            # (Caffe's GoogLeNet implementation uses slashes)
            NodeRenamer(lambda node: node.name.replace('/', '_'))
        ]
        self.graph = graph.transformed(transformers)
        # Display the graph
@@ -334,9 +308,6 @@ class Transformer(object):
            transformers = [
                # Reshape the parameters to TensorFlow's ordering
                DataReshaper({
-                    # (c_o, c_i, h, w) -> (h, w, c_i, c_o) for TF
-                    NodeKind.Convolution: (0, 1, 2, 3),
                    # (c_o, c_i) -> (c_i, c_o)
                    NodeKind.InnerProduct: (1, 0)
                }),

--- a/fluid/image_classification/caffe2fluid/kaffe/shapes.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/shapes.py
@@ -3,8 +3,24 @@ from collections import namedtuple
 from .errors import KaffeError
-TensorShape = namedtuple('TensorShape',
+Tensor4DShape = namedtuple('Tensor4DShape',
-                         ['batch_size', 'channels', 'height', 'width'])
+                           ['batch_size', 'channels', 'height', 'width'])
+Tensor2DShape = namedtuple('Tensor2DShape', ['batch_size', 'data'])
+ScalarShape = namedtuple('ScalarShape', ['batch_size'])
+def make_tensor(batch_size, d1=None, d2=None, d3=None):
+    if d3 is not None:
+        return Tensor4DShape(batch_size, d1, d2, d3)
+    elif d1 is not None and d2 is None:
+        return Tensor2DShape(batch_size, d1)
+    elif d1 is None and d2 is None and d3 is None:
+        return ScalarShape(batch_size)
+    else:
+        raise NotImplementedError('invalid params for make_tensor %s' \
+                % (str((batch_size, d1, d2, d3))))
 def get_filter_output_shape(i_h, i_w, params, round_func):
@@ -23,7 +39,7 @@ def get_strided_kernel_output_shape(node, round_func):
    params = node.layer.parameters
    has_c_o = hasattr(params, 'num_output')
    c = params.num_output if has_c_o else input_shape.channels
-    return TensorShape(input_shape.batch_size, c, o_h, o_w)
+    return make_tensor(input_shape.batch_size, c, o_h, o_w)
 def shape_not_implemented(node):
@@ -36,7 +52,7 @@ def shape_identity(node):
 def shape_scalar(node):
-    return TensorShape(1, 1, 1, 1)
+    return make_tensor(1, 1, 1, 1)
 def shape_data(node):
@@ -59,7 +75,7 @@ def shape_data(node):
 def shape_mem_data(node):
    params = node.parameters
-    return TensorShape(params.batch_size, params.channels, params.height,
+    return make_tensor(params.batch_size, params.channels, params.height,
                       params.width)
@@ -79,10 +95,15 @@ def shape_convolution(node):
 def shape_pool(node):
-    return get_strided_kernel_output_shape(node, math.ceil)
+    ceil_mode = getattr(node.layer.parameters, 'ceil_mode', True)
+    if ceil_mode is True:
+        method = math.ceil
+    else:
+        method = math.floor
+    return get_strided_kernel_output_shape(node, method)
 def shape_inner_product(node):
    input_shape = node.get_only_parent().output_shape
-    return TensorShape(input_shape.batch_size, node.layer.parameters.num_output,
+    return make_tensor(input_shape.batch_size, node.layer.parameters.num_output)
-                       1, 1)
--- a/fluid/image_classification/caffe2fluid/kaffe/transformers.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/transformers.py
@@ -66,12 +66,14 @@ class DataInjector(object):
    def adjust_parameters(self, node, data):
        if not self.did_use_pb:
            return data
        # When using the protobuf-backend, each parameter initially has four dimensions.
        # In certain cases (like FC layers), we want to eliminate the singleton dimensions.
        # This implementation takes care of the common cases. However, it does leave the
        # potential for future issues.
        # The Caffe-backend does not suffer from this problem.
        data = list(data)
        squeeze_indices = [1]  # Squeeze biases.
        if node.kind == NodeKind.InnerProduct:
            squeeze_indices.append(0)  # Squeeze FC.
@@ -80,8 +82,22 @@ class DataInjector(object):
            if idx >= len(data):
                continue
-            shape_old = data[idx].shape
+            d = data[idx]
-            data[idx] = np.squeeze(data[idx])
+            assert len(
+                d.shape
+            ) == 4, 'invalid shape[%s] from caffe when adjust_parameters' % (
+                str(d.shape))
+            shape_old = d.shape
+            sq_axis = None
+            if idx == 0:
+                sq_axis = (0, 1)
+            elif idx == 1:
+                sq_axis = (0, 1, 2)
+            else:
+                continue
+            data[idx] = np.squeeze(d, axis=sq_axis)
            shape_new = data[idx].shape
            if len(shape_old) != shape_new:
                debug('squeeze idx:%d, with kind:%s,name:%s' % \
@@ -113,7 +129,10 @@ class DataReshaper(object):
        try:
            parent = node.get_only_parent()
            s = parent.output_shape
-            return s.height > 1 or s.width > 1
+            if len(s) == 4:
+                return s.height > 1 or s.width > 1
+            else:
+                return False
        except KaffeError:
            return False
@@ -121,25 +140,26 @@ class DataReshaper(object):
        try:
            return self.mapping[node_kind]
        except KeyError:
-            raise
+            raise KaffeError('Ordering not found for node kind: {}'.format(
-            #raise KaffeError('Ordering not found for node kind: {}'.format(node_kind))
+                node_kind))
    def __call__(self, graph):
        for node in graph.nodes:
            if node.data is None:
                continue
            if node.kind not in self.reshaped_node_types:
                # Check for 2+ dimensional data
                if any(len(tensor.shape) > 1 for tensor in node.data):
                    notice('parmaters not reshaped for node: {}'.format(node))
                continue
            transpose_order = self.map(node.kind)
            weights = node.data[0]
-            if (node.kind == NodeKind.InnerProduct
+            if node.kind == NodeKind.InnerProduct:
-                ) and self.has_spatial_parent(node):
                # The FC layer connected to the spatial layer needs to be
                # re-wired to match the new spatial ordering.
-                in_shape = node.get_only_parent().output_shape
+                #in_shape = node.get_only_parent().output_shape
                fc_shape = weights.shape
                output_channels = fc_shape[0]
                weights = weights.reshape((output_channels, -1))
@@ -178,7 +198,8 @@ class SubNodeFuser(object):
                continue
            # Rewrite the fused node's children to its parent.
            for child in node.children:
-                child.parents.remove(node)
+                pos = child.parents.index(node)
+                child.parents[pos] = parent
                parent.add_child(child)
            # Disconnect the fused node from the graph.
            parent.children.remove(node)

--- a/fluid/image_classification/caffe2fluid/proto/compile.sh
+++ b/fluid/image_classification/caffe2fluid/proto/compile.sh
--- a/fluid/image_classification/se_resnext.py
+++ b/fluid/image_classification/se_resnext.py
-import os
 import paddle.v2 as paddle
 import paddle.fluid as fluid
-import reader
 def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
@@ -65,20 +63,44 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
    return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
-def SE_ResNeXt(input, class_dim, infer=False):
+def SE_ResNeXt(input, class_dim, infer=False, layers=50):
-    cardinality = 64
+    supported_layers = [50, 152]
-    reduction_ratio = 16
+    if layers not in supported_layers:
-    depth = [3, 8, 36, 3]
+        print("supported layers are", supported_layers, "but input layer is",
-    num_filters = [128, 256, 512, 1024]
+              layers)
+        exit()
-    conv = conv_bn_layer(
+    if layers == 50:
-        input=input, num_filters=64, filter_size=3, stride=2, act='relu')
+        cardinality = 32
-    conv = conv_bn_layer(
+        reduction_ratio = 16
-        input=conv, num_filters=64, filter_size=3, stride=1, act='relu')
+        depth = [3, 4, 6, 3]
-    conv = conv_bn_layer(
+        num_filters = [128, 256, 512, 1024]
-        input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
-    conv = fluid.layers.pool2d(
+        conv = conv_bn_layer(
-        input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
+            input=input, num_filters=64, filter_size=7, stride=2, act='relu')
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=1,
+            pool_type='max')
+    elif layers == 152:
+        cardinality = 64
+        reduction_ratio = 16
+        depth = [3, 8, 36, 3]
+        num_filters = [128, 256, 512, 1024]
+        conv = conv_bn_layer(
+            input=input, num_filters=64, filter_size=3, stride=2, act='relu')
+        conv = conv_bn_layer(
+            input=conv, num_filters=64, filter_size=3, stride=1, act='relu')
+        conv = conv_bn_layer(
+            input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=1,
+            pool_type='max')
    for block in range(len(depth)):
        for i in range(depth[block]):
@@ -97,93 +119,3 @@ def SE_ResNeXt(input, class_dim, infer=False):
        drop = pool
    out = fluid.layers.fc(input=drop, size=class_dim, act='softmax')
    return out
-def train(learning_rate,
-          batch_size,
-          num_passes,
-          init_model=None,
-          model_save_dir='model',
-          parallel=True):
-    class_dim = 1000
-    image_shape = [3, 224, 224]
-    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    if parallel:
-        places = fluid.layers.get_places()
-        pd = fluid.layers.ParallelDo(places)
-        with pd.do():
-            image_ = pd.read_input(image)
-            label_ = pd.read_input(label)
-            out = SE_ResNeXt(input=image_, class_dim=class_dim)
-            cost = fluid.layers.cross_entropy(input=out, label=label_)
-            avg_cost = fluid.layers.mean(x=cost)
-            accuracy = fluid.layers.accuracy(input=out, label=label_)
-            pd.write_output(avg_cost)
-            pd.write_output(accuracy)
-        avg_cost, accuracy = pd()
-        avg_cost = fluid.layers.mean(x=avg_cost)
-        accuracy = fluid.layers.mean(x=accuracy)
-    else:
-        out = SE_ResNeXt(input=image, class_dim=class_dim)
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-        accuracy = fluid.layers.accuracy(input=out, label=label)
-    optimizer = fluid.optimizer.Momentum(
-        learning_rate=learning_rate,
-        momentum=0.9,
-        regularization=fluid.regularizer.L2Decay(1e-4))
-    opts = optimizer.minimize(avg_cost)
-    inference_program = fluid.default_main_program().clone()
-    with fluid.program_guard(inference_program):
-        inference_program = fluid.io.get_inference_program([avg_cost, accuracy])
-    place = fluid.CUDAPlace(0)
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
-    if init_model is not None:
-        fluid.io.load_persistables(exe, init_model)
-    train_reader = paddle.batch(reader.train(), batch_size=batch_size)
-    test_reader = paddle.batch(reader.test(), batch_size=batch_size)
-    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
-    for pass_id in range(num_passes):
-        for batch_id, data in enumerate(train_reader()):
-            loss = exe.run(fluid.default_main_program(),
-                           feed=feeder.feed(data),
-                           fetch_list=[avg_cost])
-            print("Pass {0}, batch {1}, loss {2}".format(pass_id, batch_id,
-                                                         float(loss[0])))
-        total_loss = 0.0
-        total_acc = 0.0
-        total_batch = 0
-        for data in test_reader():
-            loss, acc = exe.run(inference_program,
-                                feed=feeder.feed(data),
-                                fetch_list=[avg_cost, accuracy])
-            total_loss += float(loss)
-            total_acc += float(acc)
-            total_batch += 1
-        print("End pass {0}, test_loss {1}, test_acc {2}".format(
-            pass_id, total_loss / total_batch, total_acc / total_batch))
-        model_path = os.path.join(model_save_dir, str(pass_id))
-        fluid.io.save_inference_model(model_path, ['image'], [out], exe)
-if __name__ == '__main__':
-    train(
-        learning_rate=0.1,
-        batch_size=8,
-        num_passes=100,
-        init_model=None,
-        parallel=False)
--- a/fluid/image_classification/train.py
+++ b/fluid/image_classification/train.py
+import os
+import numpy as np
+import time
+import sys
+import paddle.v2 as paddle
+import paddle.fluid as fluid
+from se_resnext import SE_ResNeXt
+import reader
+import argparse
+import functools
+from utility import add_arguments, print_arguments
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size',   int,  256, "Minibatch size.")
+add_arg('num_layers',   int,  50,  "How many layers for SE-ResNeXt model.")
+add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.")
+add_arg('parallel_exe', bool, True, "Whether to use ParallelExecutor to train or not.")
+# yapf: enable
+def train_parallel_do(args,
+                      learning_rate,
+                      batch_size,
+                      num_passes,
+                      init_model=None,
+                      model_save_dir='model',
+                      parallel=True,
+                      use_nccl=True,
+                      lr_strategy=None,
+                      layers=50):
+    class_dim = 1000
+    image_shape = [3, 224, 224]
+    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    if parallel:
+        places = fluid.layers.get_places()
+        pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
+        with pd.do():
+            image_ = pd.read_input(image)
+            label_ = pd.read_input(label)
+            out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers)
+            cost = fluid.layers.cross_entropy(input=out, label=label_)
+            avg_cost = fluid.layers.mean(x=cost)
+            acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1)
+            acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5)
+            pd.write_output(avg_cost)
+            pd.write_output(acc_top1)
+            pd.write_output(acc_top5)
+        avg_cost, acc_top1, acc_top5 = pd()
+        avg_cost = fluid.layers.mean(x=avg_cost)
+        acc_top1 = fluid.layers.mean(x=acc_top1)
+        acc_top5 = fluid.layers.mean(x=acc_top5)
+    else:
+        out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
+        cost = fluid.layers.cross_entropy(input=out, label=label)
+        avg_cost = fluid.layers.mean(x=cost)
+        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+    inference_program = fluid.default_main_program().clone(for_test=True)
+    if lr_strategy is None:
+        optimizer = fluid.optimizer.Momentum(
+            learning_rate=learning_rate,
+            momentum=0.9,
+            regularization=fluid.regularizer.L2Decay(1e-4))
+    else:
+        bd = lr_strategy["bd"]
+        lr = lr_strategy["lr"]
+        optimizer = fluid.optimizer.Momentum(
+            learning_rate=fluid.layers.piecewise_decay(
+                boundaries=bd, values=lr),
+            momentum=0.9,
+            regularization=fluid.regularizer.L2Decay(1e-4))
+    opts = optimizer.minimize(avg_cost)
+    if args.with_mem_opt:
+        fluid.memory_optimize(fluid.default_main_program())
+    place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    if init_model is not None:
+        fluid.io.load_persistables(exe, init_model)
+    train_reader = paddle.batch(reader.train(), batch_size=batch_size)
+    test_reader = paddle.batch(reader.test(), batch_size=batch_size)
+    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
+    for pass_id in range(num_passes):
+        train_info = [[], [], []]
+        test_info = [[], [], []]
+        for batch_id, data in enumerate(train_reader()):
+            t1 = time.time()
+            loss, acc1, acc5 = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(data),
+                fetch_list=[avg_cost, acc_top1, acc_top5])
+            t2 = time.time()
+            period = t2 - t1
+            train_info[0].append(loss[0])
+            train_info[1].append(acc1[0])
+            train_info[2].append(acc5[0])
+            if batch_id % 10 == 0:
+                print("Pass {0}, trainbatch {1}, loss {2}, \
+                       acc1 {3}, acc5 {4} time {5}"
+                                                   .format(pass_id, \
+                       batch_id, loss[0], acc1[0], acc5[0], \
+                       "%2.2f sec" % period))
+                sys.stdout.flush()
+        train_loss = np.array(train_info[0]).mean()
+        train_acc1 = np.array(train_info[1]).mean()
+        train_acc5 = np.array(train_info[2]).mean()
+        for data in test_reader():
+            t1 = time.time()
+            loss, acc1, acc5 = exe.run(
+                inference_program,
+                feed=feeder.feed(data),
+                fetch_list=[avg_cost, acc_top1, acc_top5])
+            t2 = time.time()
+            period = t2 - t1
+            test_info[0].append(loss[0])
+            test_info[1].append(acc1[0])
+            test_info[2].append(acc5[0])
+            if batch_id % 10 == 0:
+                print("Pass {0},testbatch {1},loss {2}, \
+                       acc1 {3},acc5 {4},time {5}"
+                                                  .format(pass_id, \
+                       batch_id, loss[0], acc1[0], acc5[0], \
+                       "%2.2f sec" % period))
+                sys.stdout.flush()
+        test_loss = np.array(test_info[0]).mean()
+        test_acc1 = np.array(test_info[1]).mean()
+        test_acc5 = np.array(test_info[2]).mean()
+        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \
+               test_loss {4}, test_acc1 {5}, test_acc5 {6}"
+                                                           .format(pass_id, \
+              train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
+              test_acc5))
+        sys.stdout.flush()
+        model_path = os.path.join(model_save_dir, str(pass_id))
+        if not os.path.isdir(model_path):
+            os.makedirs(model_path)
+        fluid.io.save_persistables(exe, model_path)
+def train_parallel_exe(args,
+                       learning_rate,
+                       batch_size,
+                       num_passes,
+                       init_model=None,
+                       model_save_dir='model',
+                       parallel=True,
+                       use_nccl=True,
+                       lr_strategy=None,
+                       layers=50):
+    class_dim = 1000
+    image_shape = [3, 224, 224]
+    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
+    cost = fluid.layers.cross_entropy(input=out, label=label)
+    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+    avg_cost = fluid.layers.mean(x=cost)
+    test_program = fluid.default_main_program().clone(for_test=True)
+    if lr_strategy is None:
+        optimizer = fluid.optimizer.Momentum(
+            learning_rate=learning_rate,
+            momentum=0.9,
+            regularization=fluid.regularizer.L2Decay(1e-4))
+    else:
+        bd = lr_strategy["bd"]
+        lr = lr_strategy["lr"]
+        optimizer = fluid.optimizer.Momentum(
+            learning_rate=fluid.layers.piecewise_decay(
+                boundaries=bd, values=lr),
+            momentum=0.9,
+            regularization=fluid.regularizer.L2Decay(1e-4))
+    opts = optimizer.minimize(avg_cost)
+    if args.with_mem_opt:
+        fluid.memory_optimize(fluid.default_main_program())
+    place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    if init_model is not None:
+        fluid.io.load_persistables(exe, init_model)
+    train_reader = paddle.batch(reader.train(), batch_size=batch_size)
+    test_reader = paddle.batch(reader.test(), batch_size=batch_size)
+    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
+    train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
+    test_exe = fluid.ParallelExecutor(
+        use_cuda=True, main_program=test_program, share_vars_from=train_exe)
+    fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
+    for pass_id in range(num_passes):
+        train_info = [[], [], []]
+        test_info = [[], [], []]
+        for batch_id, data in enumerate(train_reader()):
+            t1 = time.time()
+            loss, acc1, acc5 = train_exe.run(fetch_list,
+                                             feed_dict=feeder.feed(data))
+            t2 = time.time()
+            period = t2 - t1
+            loss = np.mean(np.array(loss))
+            acc1 = np.mean(np.array(acc1))
+            acc5 = np.mean(np.array(acc5))
+            train_info[0].append(loss)
+            train_info[1].append(acc1)
+            train_info[2].append(acc5)
+            if batch_id % 10 == 0:
+                print("Pass {0}, trainbatch {1}, loss {2}, \
+                       acc1 {3}, acc5 {4} time {5}"
+                                                   .format(pass_id, \
+                       batch_id, loss, acc1, acc5, \
+                       "%2.2f sec" % period))
+                sys.stdout.flush()
+        train_loss = np.array(train_info[0]).mean()
+        train_acc1 = np.array(train_info[1]).mean()
+        train_acc5 = np.array(train_info[2]).mean()
+        for data in test_reader():
+            t1 = time.time()
+            loss, acc1, acc5 = test_exe.run(fetch_list,
+                                            feed_dict=feeder.feed(data))
+            t2 = time.time()
+            period = t2 - t1
+            loss = np.mean(np.array(loss))
+            acc1 = np.mean(np.array(acc1))
+            acc5 = np.mean(np.array(acc5))
+            test_info[0].append(loss)
+            test_info[1].append(acc1)
+            test_info[2].append(acc5)
+            if batch_id % 10 == 0:
+                print("Pass {0},testbatch {1},loss {2}, \
+                       acc1 {3},acc5 {4},time {5}"
+                                                  .format(pass_id, \
+                       batch_id, loss, acc1, acc5, \
+                       "%2.2f sec" % period))
+                sys.stdout.flush()
+        test_loss = np.array(test_info[0]).mean()
+        test_acc1 = np.array(test_info[1]).mean()
+        test_acc5 = np.array(test_info[2]).mean()
+        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \
+               test_loss {4}, test_acc1 {5}, test_acc5 {6}"
+                                                           .format(pass_id, \
+              train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
+              test_acc5))
+        sys.stdout.flush()
+        model_path = os.path.join(model_save_dir, str(pass_id))
+        if not os.path.isdir(model_path):
+            os.makedirs(model_path)
+        fluid.io.save_persistables(exe, model_path)
+if __name__ == '__main__':
+    args = parser.parse_args()
+    print_arguments(args)
+    epoch_points = [30, 60, 90]
+    total_images = 1281167
+    batch_size = args.batch_size
+    step = int(total_images / batch_size + 1)
+    bd = [e * step for e in epoch_points]
+    lr = [0.1, 0.01, 0.001, 0.0001]
+    lr_strategy = {"bd": bd, "lr": lr}
+    use_nccl = True
+    # layers: 50, 152
+    layers = args.num_layers
+    method = train_parallel_exe if args.parallel_exe else train_parallel_do
+    method(
+        args,
+        learning_rate=0.1,
+        batch_size=batch_size,
+        num_passes=120,
+        init_model=None,
+        parallel=True,
+        use_nccl=True,
+        lr_strategy=lr_strategy,
+        layers=layers)
--- a/fluid/ocr_recognition/dummy_reader.py
+++ b/fluid/ocr_recognition/dummy_reader.py
-"""A dummy reader for test."""
+"""Contains common utility functions."""
 #  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 #Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,40 +13,50 @@
 #See the License for the specific language governing permissions and
 #limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import distutils.util
 import numpy as np
-import paddle.v2 as paddle
+from paddle.fluid import core
-DATA_SHAPE = [1, 512, 512]
-NUM_CLASSES = 20
+def print_arguments(args):
+    """Print argparse's arguments.
-def _read_creater(num_sample=1024, min_seq_len=1, max_seq_len=10):
+    Usage:
-    def reader():
-        for i in range(num_sample):
-            sequence_len = np.random.randint(min_seq_len, max_seq_len)
-            x = np.random.uniform(0.1, 1, DATA_SHAPE).astype("float32")
-            y = np.random.randint(0, NUM_CLASSES + 1,
-                                  [sequence_len]).astype("int32")
-            yield x, y
-    return reader
+    .. code-block:: python
+        parser = argparse.ArgumentParser()
+        parser.add_argument("name", default="Jonh", type=str, help="User name.")
+        args = parser.parse_args()
+        print_arguments(args)
-def train(batch_size, num_sample=128):
+    :param args: Input argparse.Namespace for printing.
-    """Get train dataset reader."""
+    :type args: argparse.Namespace
-    return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
+    """
+    print("-----------  Configuration Arguments -----------")
+    for arg, value in sorted(vars(args).iteritems()):
+        print("%s: %s" % (arg, value))
+    print("------------------------------------------------")
-def test(batch_size=1, num_sample=16):
+def add_arguments(argname, type, default, help, argparser, **kwargs):
-    """Get test dataset reader."""
+    """Add argparse's argument.
-    return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
+    Usage:
-def data_shape():
+    .. code-block:: python
-    """Get image shape in CHW order."""
-    return DATA_SHAPE
+        parser = argparse.ArgumentParser()
-def num_classes():
+        add_argument("name", str, "Jonh", "User name.", parser)
-    """Get number of total classes."""
+        args = parser.parse_args()
-    return NUM_CLASSES
+    """
+    type = distutils.util.strtobool if type == bool else type
+    argparser.add_argument(
+        "--" + argname,
+        default=default,
+        type=type,
+        help=help + ' Default: %(default)s.',
+        **kwargs)
--- a/fluid/language_model/README.md
+++ b/fluid/language_model/README.md
+# 语言模型
+以下是本例的简要目录结构及说明：
+```text
+.
+├── README.md            # 文档
+├── train.py             # 训练脚本
+├── infer.py             # 预测脚本
+└── utils.py             # 通用函数
+```
+## 简介
+循环神经网络语言模型的介绍可以参阅论文[Recurrent Neural Network Regularization](https://arxiv.org/abs/1409.2329)，在本例中，我们实现了GRU-RNN语言模型。
+## 训练
+运行命令 `python train.py` 开始训练模型。
+```python
+python train.py
+```
+当前支持的参数可参见[train.py](./train.py) `train_net` 函数
+```python
+vocab, train_reader, test_reader = utils.prepare_data(
+        batch_size=20, # batch size
+        buffer_size=1000, # buffer size, default value is OK
+        word_freq_threshold=0) # vocabulary related parameter, and words with frequency below this value will be filtered
+train(train_reader=train_reader,
+        vocab=vocab,
+        network=network,
+        hid_size=200, # embedding and hidden size
+        base_lr=1.0, # base learning rate
+        batch_size=20, # batch size, the same as that in prepare_data
+        pass_num=12, # the number of passes for training
+        use_cuda=True, # whether to use GPU card
+        parallel=False, # whether to be parallel
+        model_dir="model", # directory to save model
+        init_low_bound=-0.1, # uniform parameter initialization lower bound
+        init_high_bound=0.1) # uniform parameter initialization upper bound
+```
+## 自定义网络结构
+可在[train.py](./train.py) `network` 函数中调整网络结构，当前的网络结构如下：
+```python
+emb = fluid.layers.embedding(input=src, size=[vocab_size, hid_size],
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
+            learning_rate=emb_lr_x),
+        is_sparse=True)
+fc0 = fluid.layers.fc(input=emb, size=hid_size * 3,
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
+            learning_rate=gru_lr_x))
+gru_h0 = fluid.layers.dynamic_gru(input=fc0, size=hid_size,
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
+            learning_rate=gru_lr_x))
+fc = fluid.layers.fc(input=gru_h0, size=vocab_size, act='softmax',
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
+            learning_rate=fc_lr_x))
+cost = fluid.layers.cross_entropy(input=fc, label=dst)
+```
+## 训练结果示例
+我们在Tesla K40m单GPU卡上训练的日志如下所示
+```text
+epoch_1 start
+step:100 ppl:771.053
+step:200 ppl:449.597
+step:300 ppl:642.654
+step:400 ppl:458.128
+step:500 ppl:510.912
+step:600 ppl:451.545
+step:700 ppl:364.404
+step:800 ppl:324.272
+step:900 ppl:360.797
+step:1000 ppl:275.761
+step:1100 ppl:294.599
+step:1200 ppl:335.877
+step:1300 ppl:185.262
+step:1400 ppl:241.744
+step:1500 ppl:211.507
+step:1600 ppl:233.431
+step:1700 ppl:298.767
+step:1800 ppl:203.403
+step:1900 ppl:158.828
+step:2000 ppl:171.148
+step:2100 ppl:280.884
+epoch:1 num_steps:2104 time_cost(s):47.478780
+model saved in model/epoch_1
+epoch_2 start
+step:100 ppl:238.099
+step:200 ppl:136.527
+step:300 ppl:204.184
+step:400 ppl:252.886
+step:500 ppl:177.377
+step:600 ppl:197.688
+step:700 ppl:131.650
+step:800 ppl:223.906
+step:900 ppl:144.785
+step:1000 ppl:176.286
+step:1100 ppl:148.158
+step:1200 ppl:203.581
+step:1300 ppl:168.208
+step:1400 ppl:159.412
+step:1500 ppl:114.032
+step:1600 ppl:157.985
+step:1700 ppl:147.743
+step:1800 ppl:88.676
+step:1900 ppl:141.962
+step:2000 ppl:106.087
+step:2100 ppl:122.709
+epoch:2 num_steps:2104 time_cost(s):47.583789
+model saved in model/epoch_2
+...
+```
+## 预测
+运行命令 `python infer.py model_dir start_epoch last_epoch(inclusive)` 开始预测，其中，start_epoch指定开始预测的轮次，last_epoch指定结束的轮次，例如
+```python
+python infer.py model 1 12 # prediction from epoch 1 to epoch 12
+```
+## 预测结果示例
+```text
+model:model/epoch_1 ppl:254.540 time_cost(s):3.29
+model:model/epoch_2 ppl:177.671 time_cost(s):3.27
+model:model/epoch_3 ppl:156.251 time_cost(s):3.27
+model:model/epoch_4 ppl:139.036 time_cost(s):3.27
+model:model/epoch_5 ppl:132.661 time_cost(s):3.27
+model:model/epoch_6 ppl:130.092 time_cost(s):3.28
+model:model/epoch_7 ppl:128.751 time_cost(s):3.27
+model:model/epoch_8 ppl:125.411 time_cost(s):3.27
+model:model/epoch_9 ppl:124.604 time_cost(s):3.28
+model:model/epoch_10 ppl:124.754 time_cost(s):3.29
+model:model/epoch_11 ppl:125.421 time_cost(s):3.27
+model:model/epoch_12 ppl:125.676 time_cost(s):3.27
+```
--- a/fluid/language_model/infer.py
+++ b/fluid/language_model/infer.py
--- a/fluid/language_model/train.py
+++ b/fluid/language_model/train.py
--- a/fluid/language_model/utils.py
+++ b/fluid/language_model/utils.py
--- a/fluid/neural_machine_translation/transformer/config.py
+++ b/fluid/neural_machine_translation/transformer/config.py
--- a/fluid/neural_machine_translation/transformer/infer.py
+++ b/fluid/neural_machine_translation/transformer/infer.py
--- a/fluid/neural_machine_translation/transformer/model.py
+++ b/fluid/neural_machine_translation/transformer/model.py
--- a/fluid/neural_machine_translation/transformer/train.py
+++ b/fluid/neural_machine_translation/transformer/train.py
--- a/fluid/object_detection/.gitignore
+++ b/fluid/object_detection/.gitignore
--- a/fluid/object_detection/README.md
+++ b/fluid/object_detection/README.md
--- a/fluid/object_detection/data/prepare_voc_data.py
+++ b/fluid/object_detection/data/prepare_voc_data.py
@@ -60,4 +60,5 @@ def prepare_filelist(devkit_dir, years, output_dir):
            ftest.write(item[0] + ' ' + item[1] + '\n')
-prepare_filelist(devkit_dir, years, '.')
+if __name__ == '__main__':
+    prepare_filelist(devkit_dir, years, '.')
--- a/fluid/object_detection/data/pascalvoc/download.sh
+++ b/fluid/object_detection/data/pascalvoc/download.sh
--- a/fluid/object_detection/data/label_list
+++ b/fluid/object_detection/data/label_list
--- a/fluid/object_detection/eval.py
+++ b/fluid/object_detection/eval.py
--- a/fluid/object_detection/image_util.py
+++ b/fluid/object_detection/image_util.py
--- a/fluid/object_detection/load_model.py
+++ b/fluid/object_detection/load_model.py
--- a/fluid/object_detection/mobilenet_ssd.py
+++ b/fluid/object_detection/mobilenet_ssd.py
--- a/fluid/object_detection/pretrained/download_coco.sh
+++ b/fluid/object_detection/pretrained/download_coco.sh
--- a/fluid/object_detection/pretrained/download_imagenet.sh
+++ b/fluid/object_detection/pretrained/download_imagenet.sh
--- a/fluid/object_detection/reader.py
+++ b/fluid/object_detection/reader.py
--- a/fluid/object_detection/train.py
+++ b/fluid/object_detection/train.py
--- a/fluid/ocr_recognition/README.md
+++ b/fluid/ocr_recognition/README.md
--- a/fluid/ocr_recognition/crnn_ctc_model.py
+++ b/fluid/ocr_recognition/crnn_ctc_model.py
--- a/fluid/ocr_recognition/ctc_reader.py
+++ b/fluid/ocr_recognition/ctc_reader.py
--- a/fluid/ocr_recognition/ctc_train.py
+++ b/fluid/ocr_recognition/ctc_train.py
--- a/fluid/ocr_recognition/eval.py
+++ b/fluid/ocr_recognition/eval.py
--- a/fluid/ocr_recognition/images/demo.jpg
+++ b/fluid/ocr_recognition/images/demo.jpg
--- a/fluid/ocr_recognition/images/train.jpg
+++ b/fluid/ocr_recognition/images/train.jpg
--- a/fluid/ocr_recognition/inference.py
+++ b/fluid/ocr_recognition/inference.py
--- a/fluid/ocr_recognition/load_model.py
+++ b/fluid/ocr_recognition/load_model.py
--- a/fluid/policy_gradient/brain.py
+++ b/fluid/policy_gradient/brain.py
--- a/fluid/text_classification/README.md
+++ b/fluid/text_classification/README.md
--- a/fluid/text_classification/config.py
+++ b/fluid/text_classification/config.py
--- a/fluid/text_classification/infer.py
+++ b/fluid/text_classification/infer.py
--- a/fluid/text_classification/nets.py
+++ b/fluid/text_classification/nets.py
--- a/fluid/text_classification/train.py
+++ b/fluid/text_classification/train.py
--- a/fluid/text_classification/utils.py
+++ b/fluid/text_classification/utils.py